Import stripped gcc-4.0.1 sources.
[dragonfly.git] / contrib / gcc-4.0 / gcc / loop.c
blob7486736b4d5dcb647d67977a3ba6f5b811ce9a50
1 /* Perform various loop optimizations, including strength reduction.
2 Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995,
3 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
23 /* This is the loop optimization pass of the compiler.
24 It finds invariant computations within loops and moves them
25 to the beginning of the loop. Then it identifies basic and
26 general induction variables.
28 Basic induction variables (BIVs) are a pseudo registers which are set within
29 a loop only by incrementing or decrementing its value. General induction
30 variables (GIVs) are pseudo registers with a value which is a linear function
31 of a basic induction variable. BIVs are recognized by `basic_induction_var';
32 GIVs by `general_induction_var'.
34 Once induction variables are identified, strength reduction is applied to the
35 general induction variables, and induction variable elimination is applied to
36 the basic induction variables.
38 It also finds cases where
39 a register is set within the loop by zero-extending a narrower value
40 and changes these to zero the entire register once before the loop
41 and merely copy the low part within the loop.
43 Most of the complexity is in heuristics to decide when it is worth
44 while to do these things. */
46 #include "config.h"
47 #include "system.h"
48 #include "coretypes.h"
49 #include "tm.h"
50 #include "rtl.h"
51 #include "tm_p.h"
52 #include "function.h"
53 #include "expr.h"
54 #include "hard-reg-set.h"
55 #include "basic-block.h"
56 #include "insn-config.h"
57 #include "regs.h"
58 #include "recog.h"
59 #include "flags.h"
60 #include "real.h"
61 #include "cselib.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "predict.h"
65 #include "insn-flags.h"
66 #include "optabs.h"
67 #include "cfgloop.h"
68 #include "ggc.h"
70 /* Get the loop info pointer of a loop. */
71 #define LOOP_INFO(LOOP) ((struct loop_info *) (LOOP)->aux)
73 /* Get a pointer to the loop movables structure. */
74 #define LOOP_MOVABLES(LOOP) (&LOOP_INFO (LOOP)->movables)
76 /* Get a pointer to the loop registers structure. */
77 #define LOOP_REGS(LOOP) (&LOOP_INFO (LOOP)->regs)
79 /* Get a pointer to the loop induction variables structure. */
80 #define LOOP_IVS(LOOP) (&LOOP_INFO (LOOP)->ivs)
82 /* Get the luid of an insn. Catch the error of trying to reference the LUID
83 of an insn added during loop, since these don't have LUIDs. */
85 #define INSN_LUID(INSN) \
86 (INSN_UID (INSN) < max_uid_for_loop ? uid_luid[INSN_UID (INSN)] \
87 : (abort (), -1))
89 #define REGNO_FIRST_LUID(REGNO) \
90 (REGNO_FIRST_UID (REGNO) < max_uid_for_loop \
91 ? uid_luid[REGNO_FIRST_UID (REGNO)] \
92 : 0)
93 #define REGNO_LAST_LUID(REGNO) \
94 (REGNO_LAST_UID (REGNO) < max_uid_for_loop \
95 ? uid_luid[REGNO_LAST_UID (REGNO)] \
96 : INT_MAX)
98 /* A "basic induction variable" or biv is a pseudo reg that is set
99 (within this loop) only by incrementing or decrementing it. */
100 /* A "general induction variable" or giv is a pseudo reg whose
101 value is a linear function of a biv. */
103 /* Bivs are recognized by `basic_induction_var';
104 Givs by `general_induction_var'. */
106 /* An enum for the two different types of givs, those that are used
107 as memory addresses and those that are calculated into registers. */
108 enum g_types
110 DEST_ADDR,
111 DEST_REG
115 /* A `struct induction' is created for every instruction that sets
116 an induction variable (either a biv or a giv). */
118 struct induction
120 rtx insn; /* The insn that sets a biv or giv */
121 rtx new_reg; /* New register, containing strength reduced
122 version of this giv. */
123 rtx src_reg; /* Biv from which this giv is computed.
124 (If this is a biv, then this is the biv.) */
125 enum g_types giv_type; /* Indicate whether DEST_ADDR or DEST_REG */
126 rtx dest_reg; /* Destination register for insn: this is the
127 register which was the biv or giv.
128 For a biv, this equals src_reg.
129 For a DEST_ADDR type giv, this is 0. */
130 rtx *location; /* Place in the insn where this giv occurs.
131 If GIV_TYPE is DEST_REG, this is 0. */
132 /* For a biv, this is the place where add_val
133 was found. */
134 enum machine_mode mode; /* The mode of this biv or giv */
135 rtx mem; /* For DEST_ADDR, the memory object. */
136 rtx mult_val; /* Multiplicative factor for src_reg. */
137 rtx add_val; /* Additive constant for that product. */
138 int benefit; /* Gain from eliminating this insn. */
139 rtx final_value; /* If the giv is used outside the loop, and its
140 final value could be calculated, it is put
141 here, and the giv is made replaceable. Set
142 the giv to this value before the loop. */
143 unsigned combined_with; /* The number of givs this giv has been
144 combined with. If nonzero, this giv
145 cannot combine with any other giv. */
146 unsigned replaceable : 1; /* 1 if we can substitute the strength-reduced
147 variable for the original variable.
148 0 means they must be kept separate and the
149 new one must be copied into the old pseudo
150 reg each time the old one is set. */
151 unsigned not_replaceable : 1; /* Used to prevent duplicating work. This is
152 1 if we know that the giv definitely can
153 not be made replaceable, in which case we
154 don't bother checking the variable again
155 even if further info is available.
156 Both this and the above can be zero. */
157 unsigned ignore : 1; /* 1 prohibits further processing of giv */
158 unsigned always_computable : 1;/* 1 if this value is computable every
159 iteration. */
160 unsigned always_executed : 1; /* 1 if this set occurs each iteration. */
161 unsigned maybe_multiple : 1; /* Only used for a biv and 1 if this biv
162 update may be done multiple times per
163 iteration. */
164 unsigned cant_derive : 1; /* For giv's, 1 if this giv cannot derive
165 another giv. This occurs in many cases
166 where a giv's lifetime spans an update to
167 a biv. */
168 unsigned maybe_dead : 1; /* 1 if this giv might be dead. In that case,
169 we won't use it to eliminate a biv, it
170 would probably lose. */
171 unsigned auto_inc_opt : 1; /* 1 if this giv had its increment output next
172 to it to try to form an auto-inc address. */
173 unsigned shared : 1;
174 unsigned no_const_addval : 1; /* 1 if add_val does not contain a const. */
175 int lifetime; /* Length of life of this giv */
176 rtx derive_adjustment; /* If nonzero, is an adjustment to be
177 subtracted from add_val when this giv
178 derives another. This occurs when the
179 giv spans a biv update by incrementation. */
180 rtx ext_dependent; /* If nonzero, is a sign or zero extension
181 if a biv on which this giv is dependent. */
182 struct induction *next_iv; /* For givs, links together all givs that are
183 based on the same biv. For bivs, links
184 together all biv entries that refer to the
185 same biv register. */
186 struct induction *same; /* For givs, if the giv has been combined with
187 another giv, this points to the base giv.
188 The base giv will have COMBINED_WITH nonzero.
189 For bivs, if the biv has the same LOCATION
190 than another biv, this points to the base
191 biv. */
192 struct induction *same_insn; /* If there are multiple identical givs in
193 the same insn, then all but one have this
194 field set, and they all point to the giv
195 that doesn't have this field set. */
196 rtx last_use; /* For a giv made from a biv increment, this is
197 a substitute for the lifetime information. */
201 /* A `struct iv_class' is created for each biv. */
203 struct iv_class
205 unsigned int regno; /* Pseudo reg which is the biv. */
206 int biv_count; /* Number of insns setting this reg. */
207 struct induction *biv; /* List of all insns that set this reg. */
208 int giv_count; /* Number of DEST_REG givs computed from this
209 biv. The resulting count is only used in
210 check_dbra_loop. */
211 struct induction *giv; /* List of all insns that compute a giv
212 from this reg. */
213 int total_benefit; /* Sum of BENEFITs of all those givs. */
214 rtx initial_value; /* Value of reg at loop start. */
215 rtx initial_test; /* Test performed on BIV before loop. */
216 rtx final_value; /* Value of reg at loop end, if known. */
217 struct iv_class *next; /* Links all class structures together. */
218 rtx init_insn; /* insn which initializes biv, 0 if none. */
219 rtx init_set; /* SET of INIT_INSN, if any. */
220 unsigned incremented : 1; /* 1 if somewhere incremented/decremented */
221 unsigned eliminable : 1; /* 1 if plausible candidate for
222 elimination. */
223 unsigned nonneg : 1; /* 1 if we added a REG_NONNEG note for
224 this. */
225 unsigned reversed : 1; /* 1 if we reversed the loop that this
226 biv controls. */
227 unsigned all_reduced : 1; /* 1 if all givs using this biv have
228 been reduced. */
232 /* Definitions used by the basic induction variable discovery code. */
233 enum iv_mode
235 UNKNOWN_INDUCT,
236 BASIC_INDUCT,
237 NOT_BASIC_INDUCT,
238 GENERAL_INDUCT
242 /* A `struct iv' is created for every register. */
244 struct iv
246 enum iv_mode type;
247 union
249 struct iv_class *class;
250 struct induction *info;
251 } iv;
255 #define REG_IV_TYPE(ivs, n) ivs->regs[n].type
256 #define REG_IV_INFO(ivs, n) ivs->regs[n].iv.info
257 #define REG_IV_CLASS(ivs, n) ivs->regs[n].iv.class
260 struct loop_ivs
262 /* Indexed by register number, contains pointer to `struct
263 iv' if register is an induction variable. */
264 struct iv *regs;
266 /* Size of regs array. */
267 unsigned int n_regs;
269 /* The head of a list which links together (via the next field)
270 every iv class for the current loop. */
271 struct iv_class *list;
275 typedef struct loop_mem_info
277 rtx mem; /* The MEM itself. */
278 rtx reg; /* Corresponding pseudo, if any. */
279 int optimize; /* Nonzero if we can optimize access to this MEM. */
280 } loop_mem_info;
284 struct loop_reg
286 /* Number of times the reg is set during the loop being scanned.
287 During code motion, a negative value indicates a reg that has
288 been made a candidate; in particular -2 means that it is an
289 candidate that we know is equal to a constant and -1 means that
290 it is a candidate not known equal to a constant. After code
291 motion, regs moved have 0 (which is accurate now) while the
292 failed candidates have the original number of times set.
294 Therefore, at all times, == 0 indicates an invariant register;
295 < 0 a conditionally invariant one. */
296 int set_in_loop;
298 /* Original value of set_in_loop; same except that this value
299 is not set negative for a reg whose sets have been made candidates
300 and not set to 0 for a reg that is moved. */
301 int n_times_set;
303 /* Contains the insn in which a register was used if it was used
304 exactly once; contains const0_rtx if it was used more than once. */
305 rtx single_usage;
307 /* Nonzero indicates that the register cannot be moved or strength
308 reduced. */
309 char may_not_optimize;
311 /* Nonzero means reg N has already been moved out of one loop.
312 This reduces the desire to move it out of another. */
313 char moved_once;
317 struct loop_regs
319 int num; /* Number of regs used in table. */
320 int size; /* Size of table. */
321 struct loop_reg *array; /* Register usage info. array. */
322 int multiple_uses; /* Nonzero if a reg has multiple uses. */
327 struct loop_movables
329 /* Head of movable chain. */
330 struct movable *head;
331 /* Last movable in chain. */
332 struct movable *last;
336 /* Information pertaining to a loop. */
338 struct loop_info
340 /* Nonzero if there is a subroutine call in the current loop. */
341 int has_call;
342 /* Nonzero if there is a libcall in the current loop. */
343 int has_libcall;
344 /* Nonzero if there is a non constant call in the current loop. */
345 int has_nonconst_call;
346 /* Nonzero if there is a prefetch instruction in the current loop. */
347 int has_prefetch;
348 /* Nonzero if there is a volatile memory reference in the current
349 loop. */
350 int has_volatile;
351 /* Nonzero if there is a tablejump in the current loop. */
352 int has_tablejump;
353 /* Nonzero if there are ways to leave the loop other than falling
354 off the end. */
355 int has_multiple_exit_targets;
356 /* Nonzero if there is an indirect jump in the current function. */
357 int has_indirect_jump;
358 /* Register or constant initial loop value. */
359 rtx initial_value;
360 /* Register or constant value used for comparison test. */
361 rtx comparison_value;
362 /* Register or constant approximate final value. */
363 rtx final_value;
364 /* Register or constant initial loop value with term common to
365 final_value removed. */
366 rtx initial_equiv_value;
367 /* Register or constant final loop value with term common to
368 initial_value removed. */
369 rtx final_equiv_value;
370 /* Register corresponding to iteration variable. */
371 rtx iteration_var;
372 /* Constant loop increment. */
373 rtx increment;
374 enum rtx_code comparison_code;
375 /* Holds the number of loop iterations. It is zero if the number
376 could not be calculated. Must be unsigned since the number of
377 iterations can be as high as 2^wordsize - 1. For loops with a
378 wider iterator, this number will be zero if the number of loop
379 iterations is too large for an unsigned integer to hold. */
380 unsigned HOST_WIDE_INT n_iterations;
381 int used_count_register;
382 /* The loop iterator induction variable. */
383 struct iv_class *iv;
384 /* List of MEMs that are stored in this loop. */
385 rtx store_mems;
386 /* Array of MEMs that are used (read or written) in this loop, but
387 cannot be aliased by anything in this loop, except perhaps
388 themselves. In other words, if mems[i] is altered during
389 the loop, it is altered by an expression that is rtx_equal_p to
390 it. */
391 loop_mem_info *mems;
392 /* The index of the next available slot in MEMS. */
393 int mems_idx;
394 /* The number of elements allocated in MEMS. */
395 int mems_allocated;
396 /* Nonzero if we don't know what MEMs were changed in the current
397 loop. This happens if the loop contains a call (in which case
398 `has_call' will also be set) or if we store into more than
399 NUM_STORES MEMs. */
400 int unknown_address_altered;
401 /* The above doesn't count any readonly memory locations that are
402 stored. This does. */
403 int unknown_constant_address_altered;
404 /* Count of memory write instructions discovered in the loop. */
405 int num_mem_sets;
406 /* The insn where the first of these was found. */
407 rtx first_loop_store_insn;
408 /* The chain of movable insns in loop. */
409 struct loop_movables movables;
410 /* The registers used the in loop. */
411 struct loop_regs regs;
412 /* The induction variable information in loop. */
413 struct loop_ivs ivs;
414 /* Nonzero if call is in pre_header extended basic block. */
415 int pre_header_has_call;
418 /* Not really meaningful values, but at least something. */
419 #ifndef SIMULTANEOUS_PREFETCHES
420 #define SIMULTANEOUS_PREFETCHES 3
421 #endif
422 #ifndef PREFETCH_BLOCK
423 #define PREFETCH_BLOCK 32
424 #endif
425 #ifndef HAVE_prefetch
426 #define HAVE_prefetch 0
427 #define CODE_FOR_prefetch 0
428 #define gen_prefetch(a,b,c) (abort(), NULL_RTX)
429 #endif
431 /* Give up the prefetch optimizations once we exceed a given threshold.
432 It is unlikely that we would be able to optimize something in a loop
433 with so many detected prefetches. */
434 #define MAX_PREFETCHES 100
435 /* The number of prefetch blocks that are beneficial to fetch at once before
436 a loop with a known (and low) iteration count. */
437 #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
438 /* For very tiny loops it is not worthwhile to prefetch even before the loop,
439 since it is likely that the data are already in the cache. */
440 #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
442 /* Parameterize some prefetch heuristics so they can be turned on and off
443 easily for performance testing on new architectures. These can be
444 defined in target-dependent files. */
446 /* Prefetch is worthwhile only when loads/stores are dense. */
447 #ifndef PREFETCH_ONLY_DENSE_MEM
448 #define PREFETCH_ONLY_DENSE_MEM 1
449 #endif
451 /* Define what we mean by "dense" loads and stores; This value divided by 256
452 is the minimum percentage of memory references that worth prefetching. */
453 #ifndef PREFETCH_DENSE_MEM
454 #define PREFETCH_DENSE_MEM 220
455 #endif
457 /* Do not prefetch for a loop whose iteration count is known to be low. */
458 #ifndef PREFETCH_NO_LOW_LOOPCNT
459 #define PREFETCH_NO_LOW_LOOPCNT 1
460 #endif
462 /* Define what we mean by a "low" iteration count. */
463 #ifndef PREFETCH_LOW_LOOPCNT
464 #define PREFETCH_LOW_LOOPCNT 32
465 #endif
467 /* Do not prefetch for a loop that contains a function call; such a loop is
468 probably not an internal loop. */
469 #ifndef PREFETCH_NO_CALL
470 #define PREFETCH_NO_CALL 1
471 #endif
473 /* Do not prefetch accesses with an extreme stride. */
474 #ifndef PREFETCH_NO_EXTREME_STRIDE
475 #define PREFETCH_NO_EXTREME_STRIDE 1
476 #endif
478 /* Define what we mean by an "extreme" stride. */
479 #ifndef PREFETCH_EXTREME_STRIDE
480 #define PREFETCH_EXTREME_STRIDE 4096
481 #endif
483 /* Define a limit to how far apart indices can be and still be merged
484 into a single prefetch. */
485 #ifndef PREFETCH_EXTREME_DIFFERENCE
486 #define PREFETCH_EXTREME_DIFFERENCE 4096
487 #endif
489 /* Issue prefetch instructions before the loop to fetch data to be used
490 in the first few loop iterations. */
491 #ifndef PREFETCH_BEFORE_LOOP
492 #define PREFETCH_BEFORE_LOOP 1
493 #endif
495 /* Do not handle reversed order prefetches (negative stride). */
496 #ifndef PREFETCH_NO_REVERSE_ORDER
497 #define PREFETCH_NO_REVERSE_ORDER 1
498 #endif
500 /* Prefetch even if the GIV is in conditional code. */
501 #ifndef PREFETCH_CONDITIONAL
502 #define PREFETCH_CONDITIONAL 1
503 #endif
505 #define LOOP_REG_LIFETIME(LOOP, REGNO) \
506 ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
508 #define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
509 ((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
510 || REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
512 #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
513 ((REGNO) < FIRST_PSEUDO_REGISTER \
514 ? (int) hard_regno_nregs[(REGNO)][GET_MODE (SET_DEST)] : 1)
517 /* Vector mapping INSN_UIDs to luids.
518 The luids are like uids but increase monotonically always.
519 We use them to see whether a jump comes from outside a given loop. */
521 static int *uid_luid;
523 /* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
524 number the insn is contained in. */
526 static struct loop **uid_loop;
528 /* 1 + largest uid of any insn. */
530 static int max_uid_for_loop;
532 /* Number of loops detected in current function. Used as index to the
533 next few tables. */
535 static int max_loop_num;
537 /* Bound on pseudo register number before loop optimization.
538 A pseudo has valid regscan info if its number is < max_reg_before_loop. */
539 static unsigned int max_reg_before_loop;
541 /* The value to pass to the next call of reg_scan_update. */
542 static int loop_max_reg;
544 /* During the analysis of a loop, a chain of `struct movable's
545 is made to record all the movable insns found.
546 Then the entire chain can be scanned to decide which to move. */
548 struct movable
550 rtx insn; /* A movable insn */
551 rtx set_src; /* The expression this reg is set from. */
552 rtx set_dest; /* The destination of this SET. */
553 rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
554 of any registers used within the LIBCALL. */
555 int consec; /* Number of consecutive following insns
556 that must be moved with this one. */
557 unsigned int regno; /* The register it sets */
558 short lifetime; /* lifetime of that register;
559 may be adjusted when matching movables
560 that load the same value are found. */
561 short savings; /* Number of insns we can move for this reg,
562 including other movables that force this
563 or match this one. */
564 ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for
565 a low part that we should avoid changing when
566 clearing the rest of the reg. */
567 unsigned int cond : 1; /* 1 if only conditionally movable */
568 unsigned int force : 1; /* 1 means MUST move this insn */
569 unsigned int global : 1; /* 1 means reg is live outside this loop */
570 /* If PARTIAL is 1, GLOBAL means something different:
571 that the reg is live outside the range from where it is set
572 to the following label. */
573 unsigned int done : 1; /* 1 inhibits further processing of this */
575 unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
576 In particular, moving it does not make it
577 invariant. */
578 unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
579 load SRC, rather than copying INSN. */
580 unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
581 first insn of a consecutive sets group. */
582 unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
583 unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace
584 the original insn with a copy from that
585 pseudo, rather than deleting it. */
586 struct movable *match; /* First entry for same value */
587 struct movable *forces; /* An insn that must be moved if this is */
588 struct movable *next;
592 static FILE *loop_dump_stream;
594 /* Forward declarations. */
596 static void invalidate_loops_containing_label (rtx);
597 static void find_and_verify_loops (rtx, struct loops *);
598 static void mark_loop_jump (rtx, struct loop *);
599 static void prescan_loop (struct loop *);
600 static int reg_in_basic_block_p (rtx, rtx);
601 static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx);
602 static int labels_in_range_p (rtx, int);
603 static void count_one_set (struct loop_regs *, rtx, rtx, rtx *);
604 static void note_addr_stored (rtx, rtx, void *);
605 static void note_set_pseudo_multiple_uses (rtx, rtx, void *);
606 static int loop_reg_used_before_p (const struct loop *, rtx, rtx);
607 static rtx find_regs_nested (rtx, rtx);
608 static void scan_loop (struct loop*, int);
609 #if 0
610 static void replace_call_address (rtx, rtx, rtx);
611 #endif
612 static rtx skip_consec_insns (rtx, int);
613 static int libcall_benefit (rtx);
614 static rtx libcall_other_reg (rtx, rtx);
615 static void record_excess_regs (rtx, rtx, rtx *);
616 static void ignore_some_movables (struct loop_movables *);
617 static void force_movables (struct loop_movables *);
618 static void combine_movables (struct loop_movables *, struct loop_regs *);
619 static int num_unmoved_movables (const struct loop *);
620 static int regs_match_p (rtx, rtx, struct loop_movables *);
621 static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *,
622 struct loop_regs *);
623 static void add_label_notes (rtx, rtx);
624 static void move_movables (struct loop *loop, struct loop_movables *, int,
625 int);
626 static void loop_movables_add (struct loop_movables *, struct movable *);
627 static void loop_movables_free (struct loop_movables *);
628 static int count_nonfixed_reads (const struct loop *, rtx);
629 static void loop_bivs_find (struct loop *);
630 static void loop_bivs_init_find (struct loop *);
631 static void loop_bivs_check (struct loop *);
632 static void loop_givs_find (struct loop *);
633 static void loop_givs_check (struct loop *);
634 static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int);
635 static int loop_giv_reduce_benefit (struct loop *, struct iv_class *,
636 struct induction *, rtx);
637 static void loop_givs_dead_check (struct loop *, struct iv_class *);
638 static void loop_givs_reduce (struct loop *, struct iv_class *);
639 static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *);
640 static void loop_ivs_free (struct loop *);
641 static void strength_reduce (struct loop *, int);
642 static void find_single_use_in_loop (struct loop_regs *, rtx, rtx);
643 static int valid_initial_value_p (rtx, rtx, int, rtx);
644 static void find_mem_givs (const struct loop *, rtx, rtx, int, int);
645 static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx,
646 rtx, rtx *, int, int);
647 static void check_final_value (const struct loop *, struct induction *);
648 static void loop_ivs_dump (const struct loop *, FILE *, int);
649 static void loop_iv_class_dump (const struct iv_class *, FILE *, int);
650 static void loop_biv_dump (const struct induction *, FILE *, int);
651 static void loop_giv_dump (const struct induction *, FILE *, int);
652 static void record_giv (const struct loop *, struct induction *, rtx, rtx,
653 rtx, rtx, rtx, rtx, int, enum g_types, int, int,
654 rtx *);
655 static void update_giv_derive (const struct loop *, rtx);
656 static HOST_WIDE_INT get_monotonic_increment (struct iv_class *);
657 static bool biased_biv_fits_mode_p (const struct loop *, struct iv_class *,
658 HOST_WIDE_INT, enum machine_mode,
659 unsigned HOST_WIDE_INT);
660 static bool biv_fits_mode_p (const struct loop *, struct iv_class *,
661 HOST_WIDE_INT, enum machine_mode, bool);
662 static bool extension_within_bounds_p (const struct loop *, struct iv_class *,
663 HOST_WIDE_INT, rtx);
664 static void check_ext_dependent_givs (const struct loop *, struct iv_class *);
665 static int basic_induction_var (const struct loop *, rtx, enum machine_mode,
666 rtx, rtx, rtx *, rtx *, rtx **);
667 static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *);
668 static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *,
669 rtx *, rtx *, int, int *, enum machine_mode);
670 static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *,
671 rtx *, rtx *, rtx *);
672 static int check_dbra_loop (struct loop *, int);
673 static rtx express_from_1 (rtx, rtx, rtx);
674 static rtx combine_givs_p (struct induction *, struct induction *);
675 static int cmp_combine_givs_stats (const void *, const void *);
676 static void combine_givs (struct loop_regs *, struct iv_class *);
677 static int product_cheap_p (rtx, rtx);
678 static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int,
679 int, int);
680 static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx,
681 struct iv_class *, int, basic_block, rtx);
682 static int last_use_this_basic_block (rtx, rtx);
683 static void record_initial (rtx, rtx, void *);
684 static void update_reg_last_use (rtx, rtx);
685 static rtx next_insn_in_loop (const struct loop *, rtx);
686 static void loop_regs_scan (const struct loop *, int);
687 static int count_insns_in_loop (const struct loop *);
688 static int find_mem_in_note_1 (rtx *, void *);
689 static rtx find_mem_in_note (rtx);
690 static void load_mems (const struct loop *);
691 static int insert_loop_mem (rtx *, void *);
692 static int replace_loop_mem (rtx *, void *);
693 static void replace_loop_mems (rtx, rtx, rtx, int);
694 static int replace_loop_reg (rtx *, void *);
695 static void replace_loop_regs (rtx insn, rtx, rtx);
696 static void note_reg_stored (rtx, rtx, void *);
697 static void try_copy_prop (const struct loop *, rtx, unsigned int);
698 static void try_swap_copy_prop (const struct loop *, rtx, unsigned int);
699 static rtx check_insn_for_givs (struct loop *, rtx, int, int);
700 static rtx check_insn_for_bivs (struct loop *, rtx, int, int);
701 static rtx gen_add_mult (rtx, rtx, rtx, rtx);
702 static void loop_regs_update (const struct loop *, rtx);
703 static int iv_add_mult_cost (rtx, rtx, rtx, rtx);
704 static int loop_invariant_p (const struct loop *, rtx);
705 static rtx loop_insn_hoist (const struct loop *, rtx);
706 static void loop_iv_add_mult_emit_before (const struct loop *, rtx, rtx, rtx,
707 rtx, basic_block, rtx);
708 static rtx loop_insn_emit_before (const struct loop *, basic_block,
709 rtx, rtx);
710 static int loop_insn_first_p (rtx, rtx);
711 static rtx get_condition_for_loop (const struct loop *, rtx);
712 static void loop_iv_add_mult_sink (const struct loop *, rtx, rtx, rtx, rtx);
713 static void loop_iv_add_mult_hoist (const struct loop *, rtx, rtx, rtx, rtx);
714 static rtx extend_value_for_giv (struct induction *, rtx);
715 static rtx loop_insn_sink (const struct loop *, rtx);
717 static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx);
718 static rtx loop_call_insn_emit_before (const struct loop *, basic_block,
719 rtx, rtx);
720 static rtx loop_call_insn_hoist (const struct loop *, rtx);
721 static rtx loop_insn_sink_or_swim (const struct loop *, rtx);
723 static void loop_dump_aux (const struct loop *, FILE *, int);
724 static void loop_delete_insns (rtx, rtx);
725 static HOST_WIDE_INT remove_constant_addition (rtx *);
726 static rtx gen_load_of_final_value (rtx, rtx);
727 void debug_ivs (const struct loop *);
728 void debug_iv_class (const struct iv_class *);
729 void debug_biv (const struct induction *);
730 void debug_giv (const struct induction *);
731 void debug_loop (const struct loop *);
732 void debug_loops (const struct loops *);
734 typedef struct loop_replace_args
736 rtx match;
737 rtx replacement;
738 rtx insn;
739 } loop_replace_args;
741 /* Nonzero iff INSN is between START and END, inclusive. */
742 #define INSN_IN_RANGE_P(INSN, START, END) \
743 (INSN_UID (INSN) < max_uid_for_loop \
744 && INSN_LUID (INSN) >= INSN_LUID (START) \
745 && INSN_LUID (INSN) <= INSN_LUID (END))
747 /* Indirect_jump_in_function is computed once per function. */
748 static int indirect_jump_in_function;
749 static int indirect_jump_in_function_p (rtx);
751 static int compute_luids (rtx, rtx, int);
753 static int biv_elimination_giv_has_0_offset (struct induction *,
754 struct induction *, rtx);
756 /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
757 copy the value of the strength reduced giv to its original register. */
758 static int copy_cost;
760 /* Cost of using a register, to normalize the benefits of a giv. */
761 static int reg_address_cost;
763 void
764 init_loop (void)
766 rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
768 reg_address_cost = address_cost (reg, SImode);
770 copy_cost = COSTS_N_INSNS (1);
773 /* Compute the mapping from uids to luids.
774 LUIDs are numbers assigned to insns, like uids,
775 except that luids increase monotonically through the code.
776 Start at insn START and stop just before END. Assign LUIDs
777 starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
778 static int
779 compute_luids (rtx start, rtx end, int prev_luid)
781 int i;
782 rtx insn;
784 for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
786 if (INSN_UID (insn) >= max_uid_for_loop)
787 continue;
788 /* Don't assign luids to line-number NOTEs, so that the distance in
789 luids between two insns is not affected by -g. */
790 if (!NOTE_P (insn)
791 || NOTE_LINE_NUMBER (insn) <= 0)
792 uid_luid[INSN_UID (insn)] = ++i;
793 else
794 /* Give a line number note the same luid as preceding insn. */
795 uid_luid[INSN_UID (insn)] = i;
797 return i + 1;
800 /* Entry point of this file. Perform loop optimization
801 on the current function. F is the first insn of the function
802 and DUMPFILE is a stream for output of a trace of actions taken
803 (or 0 if none should be output). */
805 void
806 loop_optimize (rtx f, FILE *dumpfile, int flags)
808 rtx insn;
809 int i;
810 struct loops loops_data;
811 struct loops *loops = &loops_data;
812 struct loop_info *loops_info;
814 loop_dump_stream = dumpfile;
816 init_recog_no_volatile ();
818 max_reg_before_loop = max_reg_num ();
819 loop_max_reg = max_reg_before_loop;
821 regs_may_share = 0;
823 /* Count the number of loops. */
825 max_loop_num = 0;
826 for (insn = f; insn; insn = NEXT_INSN (insn))
828 if (NOTE_P (insn)
829 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
830 max_loop_num++;
833 /* Don't waste time if no loops. */
834 if (max_loop_num == 0)
835 return;
837 loops->num = max_loop_num;
839 /* Get size to use for tables indexed by uids.
840 Leave some space for labels allocated by find_and_verify_loops. */
841 max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
843 uid_luid = xcalloc (max_uid_for_loop, sizeof (int));
844 uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *));
846 /* Allocate storage for array of loops. */
847 loops->array = xcalloc (loops->num, sizeof (struct loop));
849 /* Find and process each loop.
850 First, find them, and record them in order of their beginnings. */
851 find_and_verify_loops (f, loops);
853 /* Allocate and initialize auxiliary loop information. */
854 loops_info = xcalloc (loops->num, sizeof (struct loop_info));
855 for (i = 0; i < (int) loops->num; i++)
856 loops->array[i].aux = loops_info + i;
858 /* Now find all register lifetimes. This must be done after
859 find_and_verify_loops, because it might reorder the insns in the
860 function. */
861 reg_scan (f, max_reg_before_loop);
863 /* This must occur after reg_scan so that registers created by gcse
864 will have entries in the register tables.
866 We could have added a call to reg_scan after gcse_main in toplev.c,
867 but moving this call to init_alias_analysis is more efficient. */
868 init_alias_analysis ();
870 /* See if we went too far. Note that get_max_uid already returns
871 one more that the maximum uid of all insn. */
872 if (get_max_uid () > max_uid_for_loop)
873 abort ();
874 /* Now reset it to the actual size we need. See above. */
875 max_uid_for_loop = get_max_uid ();
877 /* find_and_verify_loops has already called compute_luids, but it
878 might have rearranged code afterwards, so we need to recompute
879 the luids now. */
880 compute_luids (f, NULL_RTX, 0);
882 /* Don't leave gaps in uid_luid for insns that have been
883 deleted. It is possible that the first or last insn
884 using some register has been deleted by cross-jumping.
885 Make sure that uid_luid for that former insn's uid
886 points to the general area where that insn used to be. */
887 for (i = 0; i < max_uid_for_loop; i++)
889 uid_luid[0] = uid_luid[i];
890 if (uid_luid[0] != 0)
891 break;
893 for (i = 0; i < max_uid_for_loop; i++)
894 if (uid_luid[i] == 0)
895 uid_luid[i] = uid_luid[i - 1];
897 /* Determine if the function has indirect jump. On some systems
898 this prevents low overhead loop instructions from being used. */
899 indirect_jump_in_function = indirect_jump_in_function_p (f);
901 /* Now scan the loops, last ones first, since this means inner ones are done
902 before outer ones. */
903 for (i = max_loop_num - 1; i >= 0; i--)
905 struct loop *loop = &loops->array[i];
907 if (! loop->invalid && loop->end)
909 scan_loop (loop, flags);
910 ggc_collect ();
914 end_alias_analysis ();
916 /* Clean up. */
917 for (i = 0; i < (int) loops->num; i++)
918 free (loops_info[i].mems);
920 free (uid_luid);
921 free (uid_loop);
922 free (loops_info);
923 free (loops->array);
926 /* Returns the next insn, in execution order, after INSN. START and
927 END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
928 respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
929 insn-stream; it is used with loops that are entered near the
930 bottom. */
932 static rtx
933 next_insn_in_loop (const struct loop *loop, rtx insn)
935 insn = NEXT_INSN (insn);
937 if (insn == loop->end)
939 if (loop->top)
940 /* Go to the top of the loop, and continue there. */
941 insn = loop->top;
942 else
943 /* We're done. */
944 insn = NULL_RTX;
947 if (insn == loop->scan_start)
948 /* We're done. */
949 insn = NULL_RTX;
951 return insn;
954 /* Find any register references hidden inside X and add them to
955 the dependency list DEPS. This is used to look inside CLOBBER (MEM
956 when checking whether a PARALLEL can be pulled out of a loop. */
958 static rtx
959 find_regs_nested (rtx deps, rtx x)
961 enum rtx_code code = GET_CODE (x);
962 if (code == REG)
963 deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps);
964 else
966 const char *fmt = GET_RTX_FORMAT (code);
967 int i, j;
968 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
970 if (fmt[i] == 'e')
971 deps = find_regs_nested (deps, XEXP (x, i));
972 else if (fmt[i] == 'E')
973 for (j = 0; j < XVECLEN (x, i); j++)
974 deps = find_regs_nested (deps, XVECEXP (x, i, j));
977 return deps;
980 /* Optimize one loop described by LOOP. */
982 /* ??? Could also move memory writes out of loops if the destination address
983 is invariant, the source is invariant, the memory write is not volatile,
984 and if we can prove that no read inside the loop can read this address
985 before the write occurs. If there is a read of this address after the
986 write, then we can also mark the memory read as invariant. */
988 static void
989 scan_loop (struct loop *loop, int flags)
991 struct loop_info *loop_info = LOOP_INFO (loop);
992 struct loop_regs *regs = LOOP_REGS (loop);
993 int i;
994 rtx loop_start = loop->start;
995 rtx loop_end = loop->end;
996 rtx p;
997 /* 1 if we are scanning insns that could be executed zero times. */
998 int maybe_never = 0;
999 /* 1 if we are scanning insns that might never be executed
1000 due to a subroutine call which might exit before they are reached. */
1001 int call_passed = 0;
1002 /* Number of insns in the loop. */
1003 int insn_count;
1004 int tem;
1005 rtx temp, update_start, update_end;
1006 /* The SET from an insn, if it is the only SET in the insn. */
1007 rtx set, set1;
1008 /* Chain describing insns movable in current loop. */
1009 struct loop_movables *movables = LOOP_MOVABLES (loop);
1010 /* Ratio of extra register life span we can justify
1011 for saving an instruction. More if loop doesn't call subroutines
1012 since in that case saving an insn makes more difference
1013 and more registers are available. */
1014 int threshold;
1015 int in_libcall;
1017 loop->top = 0;
1019 movables->head = 0;
1020 movables->last = 0;
1022 /* Determine whether this loop starts with a jump down to a test at
1023 the end. This will occur for a small number of loops with a test
1024 that is too complex to duplicate in front of the loop.
1026 We search for the first insn or label in the loop, skipping NOTEs.
1027 However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
1028 (because we might have a loop executed only once that contains a
1029 loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
1030 (in case we have a degenerate loop).
1032 Note that if we mistakenly think that a loop is entered at the top
1033 when, in fact, it is entered at the exit test, the only effect will be
1034 slightly poorer optimization. Making the opposite error can generate
1035 incorrect code. Since very few loops now start with a jump to the
1036 exit test, the code here to detect that case is very conservative. */
1038 for (p = NEXT_INSN (loop_start);
1039 p != loop_end
1040 && !LABEL_P (p) && ! INSN_P (p)
1041 && (!NOTE_P (p)
1042 || (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
1043 && NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
1044 p = NEXT_INSN (p))
1047 loop->scan_start = p;
1049 /* If loop end is the end of the current function, then emit a
1050 NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
1051 note insn. This is the position we use when sinking insns out of
1052 the loop. */
1053 if (NEXT_INSN (loop->end) != 0)
1054 loop->sink = NEXT_INSN (loop->end);
1055 else
1056 loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
1058 /* Set up variables describing this loop. */
1059 prescan_loop (loop);
1060 threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
1062 /* If loop has a jump before the first label,
1063 the true entry is the target of that jump.
1064 Start scan from there.
1065 But record in LOOP->TOP the place where the end-test jumps
1066 back to so we can scan that after the end of the loop. */
1067 if (JUMP_P (p)
1068 /* Loop entry must be unconditional jump (and not a RETURN) */
1069 && any_uncondjump_p (p)
1070 && JUMP_LABEL (p) != 0
1071 /* Check to see whether the jump actually
1072 jumps out of the loop (meaning it's no loop).
1073 This case can happen for things like
1074 do {..} while (0). If this label was generated previously
1075 by loop, we can't tell anything about it and have to reject
1076 the loop. */
1077 && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
1079 loop->top = next_label (loop->scan_start);
1080 loop->scan_start = JUMP_LABEL (p);
1083 /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
1084 as required by loop_reg_used_before_p. So skip such loops. (This
1085 test may never be true, but it's best to play it safe.)
1087 Also, skip loops where we do not start scanning at a label. This
1088 test also rejects loops starting with a JUMP_INSN that failed the
1089 test above. */
1091 if (INSN_UID (loop->scan_start) >= max_uid_for_loop
1092 || !LABEL_P (loop->scan_start))
1094 if (loop_dump_stream)
1095 fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
1096 INSN_UID (loop_start), INSN_UID (loop_end));
1097 return;
1100 /* Allocate extra space for REGs that might be created by load_mems.
1101 We allocate a little extra slop as well, in the hopes that we
1102 won't have to reallocate the regs array. */
1103 loop_regs_scan (loop, loop_info->mems_idx + 16);
1104 insn_count = count_insns_in_loop (loop);
1106 if (loop_dump_stream)
1107 fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
1108 INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
1110 /* Scan through the loop finding insns that are safe to move.
1111 Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
1112 this reg will be considered invariant for subsequent insns.
1113 We consider whether subsequent insns use the reg
1114 in deciding whether it is worth actually moving.
1116 MAYBE_NEVER is nonzero if we have passed a conditional jump insn
1117 and therefore it is possible that the insns we are scanning
1118 would never be executed. At such times, we must make sure
1119 that it is safe to execute the insn once instead of zero times.
1120 When MAYBE_NEVER is 0, all insns will be executed at least once
1121 so that is not a problem. */
1123 for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
1124 p != NULL_RTX;
1125 p = next_insn_in_loop (loop, p))
1127 if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
1128 in_libcall--;
1129 if (NONJUMP_INSN_P (p))
1131 /* Do not scan past an optimization barrier. */
1132 if (GET_CODE (PATTERN (p)) == ASM_INPUT)
1133 break;
1134 temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
1135 if (temp)
1136 in_libcall++;
1137 if (! in_libcall
1138 && (set = single_set (p))
1139 && REG_P (SET_DEST (set))
1140 #ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
1141 && SET_DEST (set) != pic_offset_table_rtx
1142 #endif
1143 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
1145 int tem1 = 0;
1146 int tem2 = 0;
1147 int move_insn = 0;
1148 int insert_temp = 0;
1149 rtx src = SET_SRC (set);
1150 rtx dependencies = 0;
1152 /* Figure out what to use as a source of this insn. If a
1153 REG_EQUIV note is given or if a REG_EQUAL note with a
1154 constant operand is specified, use it as the source and
1155 mark that we should move this insn by calling
1156 emit_move_insn rather that duplicating the insn.
1158 Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
1159 note is present. */
1160 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
1161 if (temp)
1162 src = XEXP (temp, 0), move_insn = 1;
1163 else
1165 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
1166 if (temp && CONSTANT_P (XEXP (temp, 0)))
1167 src = XEXP (temp, 0), move_insn = 1;
1168 if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
1170 src = XEXP (temp, 0);
1171 /* A libcall block can use regs that don't appear in
1172 the equivalent expression. To move the libcall,
1173 we must move those regs too. */
1174 dependencies = libcall_other_reg (p, src);
1178 /* For parallels, add any possible uses to the dependencies, as
1179 we can't move the insn without resolving them first.
1180 MEMs inside CLOBBERs may also reference registers; these
1181 count as implicit uses. */
1182 if (GET_CODE (PATTERN (p)) == PARALLEL)
1184 for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
1186 rtx x = XVECEXP (PATTERN (p), 0, i);
1187 if (GET_CODE (x) == USE)
1188 dependencies
1189 = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
1190 dependencies);
1191 else if (GET_CODE (x) == CLOBBER
1192 && MEM_P (XEXP (x, 0)))
1193 dependencies = find_regs_nested (dependencies,
1194 XEXP (XEXP (x, 0), 0));
1198 if (/* The register is used in basic blocks other
1199 than the one where it is set (meaning that
1200 something after this point in the loop might
1201 depend on its value before the set). */
1202 ! reg_in_basic_block_p (p, SET_DEST (set))
1203 /* And the set is not guaranteed to be executed once
1204 the loop starts, or the value before the set is
1205 needed before the set occurs...
1207 ??? Note we have quadratic behavior here, mitigated
1208 by the fact that the previous test will often fail for
1209 large loops. Rather than re-scanning the entire loop
1210 each time for register usage, we should build tables
1211 of the register usage and use them here instead. */
1212 && (maybe_never
1213 || loop_reg_used_before_p (loop, set, p)))
1214 /* It is unsafe to move the set. However, it may be OK to
1215 move the source into a new pseudo, and substitute a
1216 reg-to-reg copy for the original insn.
1218 This code used to consider it OK to move a set of a variable
1219 which was not created by the user and not used in an exit
1220 test.
1221 That behavior is incorrect and was removed. */
1222 insert_temp = 1;
1224 /* Don't try to optimize a MODE_CC set with a constant
1225 source. It probably will be combined with a conditional
1226 jump. */
1227 if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC
1228 && CONSTANT_P (src))
1230 /* Don't try to optimize a register that was made
1231 by loop-optimization for an inner loop.
1232 We don't know its life-span, so we can't compute
1233 the benefit. */
1234 else if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
1236 /* Don't move the source and add a reg-to-reg copy:
1237 - with -Os (this certainly increases size),
1238 - if the mode doesn't support copy operations (obviously),
1239 - if the source is already a reg (the motion will gain nothing),
1240 - if the source is a legitimate constant (likewise). */
1241 else if (insert_temp
1242 && (optimize_size
1243 || ! can_copy_p (GET_MODE (SET_SRC (set)))
1244 || REG_P (SET_SRC (set))
1245 || (CONSTANT_P (SET_SRC (set))
1246 && LEGITIMATE_CONSTANT_P (SET_SRC (set)))))
1248 else if ((tem = loop_invariant_p (loop, src))
1249 && (dependencies == 0
1250 || (tem2
1251 = loop_invariant_p (loop, dependencies)) != 0)
1252 && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
1253 || (tem1
1254 = consec_sets_invariant_p
1255 (loop, SET_DEST (set),
1256 regs->array[REGNO (SET_DEST (set))].set_in_loop,
1257 p)))
1258 /* If the insn can cause a trap (such as divide by zero),
1259 can't move it unless it's guaranteed to be executed
1260 once loop is entered. Even a function call might
1261 prevent the trap insn from being reached
1262 (since it might exit!) */
1263 && ! ((maybe_never || call_passed)
1264 && may_trap_p (src)))
1266 struct movable *m;
1267 int regno = REGNO (SET_DEST (set));
1269 /* A potential lossage is where we have a case where two insns
1270 can be combined as long as they are both in the loop, but
1271 we move one of them outside the loop. For large loops,
1272 this can lose. The most common case of this is the address
1273 of a function being called.
1275 Therefore, if this register is marked as being used
1276 exactly once if we are in a loop with calls
1277 (a "large loop"), see if we can replace the usage of
1278 this register with the source of this SET. If we can,
1279 delete this insn.
1281 Don't do this if P has a REG_RETVAL note or if we have
1282 SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */
1284 if (loop_info->has_call
1285 && regs->array[regno].single_usage != 0
1286 && regs->array[regno].single_usage != const0_rtx
1287 && REGNO_FIRST_UID (regno) == INSN_UID (p)
1288 && (REGNO_LAST_UID (regno)
1289 == INSN_UID (regs->array[regno].single_usage))
1290 && regs->array[regno].set_in_loop == 1
1291 && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
1292 && ! side_effects_p (SET_SRC (set))
1293 && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
1294 && (! SMALL_REGISTER_CLASSES
1295 || (! (REG_P (SET_SRC (set))
1296 && (REGNO (SET_SRC (set))
1297 < FIRST_PSEUDO_REGISTER))))
1298 && regno >= FIRST_PSEUDO_REGISTER
1299 /* This test is not redundant; SET_SRC (set) might be
1300 a call-clobbered register and the life of REGNO
1301 might span a call. */
1302 && ! modified_between_p (SET_SRC (set), p,
1303 regs->array[regno].single_usage)
1304 && no_labels_between_p (p,
1305 regs->array[regno].single_usage)
1306 && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
1307 regs->array[regno].single_usage))
1309 /* Replace any usage in a REG_EQUAL note. Must copy
1310 the new source, so that we don't get rtx sharing
1311 between the SET_SOURCE and REG_NOTES of insn p. */
1312 REG_NOTES (regs->array[regno].single_usage)
1313 = (replace_rtx
1314 (REG_NOTES (regs->array[regno].single_usage),
1315 SET_DEST (set), copy_rtx (SET_SRC (set))));
1317 delete_insn (p);
1318 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1319 i++)
1320 regs->array[regno+i].set_in_loop = 0;
1321 continue;
1324 m = xmalloc (sizeof (struct movable));
1325 m->next = 0;
1326 m->insn = p;
1327 m->set_src = src;
1328 m->dependencies = dependencies;
1329 m->set_dest = SET_DEST (set);
1330 m->force = 0;
1331 m->consec
1332 = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
1333 m->done = 0;
1334 m->forces = 0;
1335 m->partial = 0;
1336 m->move_insn = move_insn;
1337 m->move_insn_first = 0;
1338 m->insert_temp = insert_temp;
1339 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
1340 m->savemode = VOIDmode;
1341 m->regno = regno;
1342 /* Set M->cond if either loop_invariant_p
1343 or consec_sets_invariant_p returned 2
1344 (only conditionally invariant). */
1345 m->cond = ((tem | tem1 | tem2) > 1);
1346 m->global = LOOP_REG_GLOBAL_P (loop, regno);
1347 m->match = 0;
1348 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1349 m->savings = regs->array[regno].n_times_set;
1350 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
1351 m->savings += libcall_benefit (p);
1352 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
1353 regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
1354 /* Add M to the end of the chain MOVABLES. */
1355 loop_movables_add (movables, m);
1357 if (m->consec > 0)
1359 /* It is possible for the first instruction to have a
1360 REG_EQUAL note but a non-invariant SET_SRC, so we must
1361 remember the status of the first instruction in case
1362 the last instruction doesn't have a REG_EQUAL note. */
1363 m->move_insn_first = m->move_insn;
1365 /* Skip this insn, not checking REG_LIBCALL notes. */
1366 p = next_nonnote_insn (p);
1367 /* Skip the consecutive insns, if there are any. */
1368 p = skip_consec_insns (p, m->consec);
1369 /* Back up to the last insn of the consecutive group. */
1370 p = prev_nonnote_insn (p);
1372 /* We must now reset m->move_insn, m->is_equiv, and
1373 possibly m->set_src to correspond to the effects of
1374 all the insns. */
1375 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
1376 if (temp)
1377 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1378 else
1380 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
1381 if (temp && CONSTANT_P (XEXP (temp, 0)))
1382 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1383 else
1384 m->move_insn = 0;
1387 m->is_equiv
1388 = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
1391 /* If this register is always set within a STRICT_LOW_PART
1392 or set to zero, then its high bytes are constant.
1393 So clear them outside the loop and within the loop
1394 just load the low bytes.
1395 We must check that the machine has an instruction to do so.
1396 Also, if the value loaded into the register
1397 depends on the same register, this cannot be done. */
1398 else if (SET_SRC (set) == const0_rtx
1399 && NONJUMP_INSN_P (NEXT_INSN (p))
1400 && (set1 = single_set (NEXT_INSN (p)))
1401 && GET_CODE (set1) == SET
1402 && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
1403 && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
1404 && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
1405 == SET_DEST (set))
1406 && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
1408 int regno = REGNO (SET_DEST (set));
1409 if (regs->array[regno].set_in_loop == 2)
1411 struct movable *m;
1412 m = xmalloc (sizeof (struct movable));
1413 m->next = 0;
1414 m->insn = p;
1415 m->set_dest = SET_DEST (set);
1416 m->dependencies = 0;
1417 m->force = 0;
1418 m->consec = 0;
1419 m->done = 0;
1420 m->forces = 0;
1421 m->move_insn = 0;
1422 m->move_insn_first = 0;
1423 m->insert_temp = insert_temp;
1424 m->partial = 1;
1425 /* If the insn may not be executed on some cycles,
1426 we can't clear the whole reg; clear just high part.
1427 Not even if the reg is used only within this loop.
1428 Consider this:
1429 while (1)
1430 while (s != t) {
1431 if (foo ()) x = *s;
1432 use (x);
1434 Clearing x before the inner loop could clobber a value
1435 being saved from the last time around the outer loop.
1436 However, if the reg is not used outside this loop
1437 and all uses of the register are in the same
1438 basic block as the store, there is no problem.
1440 If this insn was made by loop, we don't know its
1441 INSN_LUID and hence must make a conservative
1442 assumption. */
1443 m->global = (INSN_UID (p) >= max_uid_for_loop
1444 || LOOP_REG_GLOBAL_P (loop, regno)
1445 || (labels_in_range_p
1446 (p, REGNO_FIRST_LUID (regno))));
1447 if (maybe_never && m->global)
1448 m->savemode = GET_MODE (SET_SRC (set1));
1449 else
1450 m->savemode = VOIDmode;
1451 m->regno = regno;
1452 m->cond = 0;
1453 m->match = 0;
1454 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1455 m->savings = 1;
1456 for (i = 0;
1457 i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1458 i++)
1459 regs->array[regno+i].set_in_loop = -1;
1460 /* Add M to the end of the chain MOVABLES. */
1461 loop_movables_add (movables, m);
1466 /* Past a call insn, we get to insns which might not be executed
1467 because the call might exit. This matters for insns that trap.
1468 Constant and pure call insns always return, so they don't count. */
1469 else if (CALL_P (p) && ! CONST_OR_PURE_CALL_P (p))
1470 call_passed = 1;
1471 /* Past a label or a jump, we get to insns for which we
1472 can't count on whether or how many times they will be
1473 executed during each iteration. Therefore, we can
1474 only move out sets of trivial variables
1475 (those not used after the loop). */
1476 /* Similar code appears twice in strength_reduce. */
1477 else if ((LABEL_P (p) || JUMP_P (p))
1478 /* If we enter the loop in the middle, and scan around to the
1479 beginning, don't set maybe_never for that. This must be an
1480 unconditional jump, otherwise the code at the top of the
1481 loop might never be executed. Unconditional jumps are
1482 followed by a barrier then the loop_end. */
1483 && ! (JUMP_P (p) && JUMP_LABEL (p) == loop->top
1484 && NEXT_INSN (NEXT_INSN (p)) == loop_end
1485 && any_uncondjump_p (p)))
1486 maybe_never = 1;
1489 /* If one movable subsumes another, ignore that other. */
1491 ignore_some_movables (movables);
1493 /* For each movable insn, see if the reg that it loads
1494 leads when it dies right into another conditionally movable insn.
1495 If so, record that the second insn "forces" the first one,
1496 since the second can be moved only if the first is. */
1498 force_movables (movables);
1500 /* See if there are multiple movable insns that load the same value.
1501 If there are, make all but the first point at the first one
1502 through the `match' field, and add the priorities of them
1503 all together as the priority of the first. */
1505 combine_movables (movables, regs);
1507 /* Now consider each movable insn to decide whether it is worth moving.
1508 Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
1510 For machines with few registers this increases code size, so do not
1511 move moveables when optimizing for code size on such machines.
1512 (The 18 below is the value for i386.) */
1514 if (!optimize_size
1515 || (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
1517 move_movables (loop, movables, threshold, insn_count);
1519 /* Recalculate regs->array if move_movables has created new
1520 registers. */
1521 if (max_reg_num () > regs->num)
1523 loop_regs_scan (loop, 0);
1524 for (update_start = loop_start;
1525 PREV_INSN (update_start)
1526 && !LABEL_P (PREV_INSN (update_start));
1527 update_start = PREV_INSN (update_start))
1529 update_end = NEXT_INSN (loop_end);
1531 reg_scan_update (update_start, update_end, loop_max_reg);
1532 loop_max_reg = max_reg_num ();
1536 /* Now candidates that still are negative are those not moved.
1537 Change regs->array[I].set_in_loop to indicate that those are not actually
1538 invariant. */
1539 for (i = 0; i < regs->num; i++)
1540 if (regs->array[i].set_in_loop < 0)
1541 regs->array[i].set_in_loop = regs->array[i].n_times_set;
1543 /* Now that we've moved some things out of the loop, we might be able to
1544 hoist even more memory references. */
1545 load_mems (loop);
1547 /* Recalculate regs->array if load_mems has created new registers. */
1548 if (max_reg_num () > regs->num)
1549 loop_regs_scan (loop, 0);
1551 for (update_start = loop_start;
1552 PREV_INSN (update_start)
1553 && !LABEL_P (PREV_INSN (update_start));
1554 update_start = PREV_INSN (update_start))
1556 update_end = NEXT_INSN (loop_end);
1558 reg_scan_update (update_start, update_end, loop_max_reg);
1559 loop_max_reg = max_reg_num ();
1561 if (flag_strength_reduce)
1563 if (update_end && LABEL_P (update_end))
1564 /* Ensure our label doesn't go away. */
1565 LABEL_NUSES (update_end)++;
1567 strength_reduce (loop, flags);
1569 reg_scan_update (update_start, update_end, loop_max_reg);
1570 loop_max_reg = max_reg_num ();
1572 if (update_end && LABEL_P (update_end)
1573 && --LABEL_NUSES (update_end) == 0)
1574 delete_related_insns (update_end);
1578 /* The movable information is required for strength reduction. */
1579 loop_movables_free (movables);
1581 free (regs->array);
1582 regs->array = 0;
1583 regs->num = 0;
1586 /* Add elements to *OUTPUT to record all the pseudo-regs
1587 mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
1589 static void
1590 record_excess_regs (rtx in_this, rtx not_in_this, rtx *output)
1592 enum rtx_code code;
1593 const char *fmt;
1594 int i;
1596 code = GET_CODE (in_this);
1598 switch (code)
1600 case PC:
1601 case CC0:
1602 case CONST_INT:
1603 case CONST_DOUBLE:
1604 case CONST:
1605 case SYMBOL_REF:
1606 case LABEL_REF:
1607 return;
1609 case REG:
1610 if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
1611 && ! reg_mentioned_p (in_this, not_in_this))
1612 *output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
1613 return;
1615 default:
1616 break;
1619 fmt = GET_RTX_FORMAT (code);
1620 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1622 int j;
1624 switch (fmt[i])
1626 case 'E':
1627 for (j = 0; j < XVECLEN (in_this, i); j++)
1628 record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
1629 break;
1631 case 'e':
1632 record_excess_regs (XEXP (in_this, i), not_in_this, output);
1633 break;
1638 /* Check what regs are referred to in the libcall block ending with INSN,
1639 aside from those mentioned in the equivalent value.
1640 If there are none, return 0.
1641 If there are one or more, return an EXPR_LIST containing all of them. */
1643 static rtx
1644 libcall_other_reg (rtx insn, rtx equiv)
1646 rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
1647 rtx p = XEXP (note, 0);
1648 rtx output = 0;
1650 /* First, find all the regs used in the libcall block
1651 that are not mentioned as inputs to the result. */
1653 while (p != insn)
1655 if (INSN_P (p))
1656 record_excess_regs (PATTERN (p), equiv, &output);
1657 p = NEXT_INSN (p);
1660 return output;
1663 /* Return 1 if all uses of REG
1664 are between INSN and the end of the basic block. */
1666 static int
1667 reg_in_basic_block_p (rtx insn, rtx reg)
1669 int regno = REGNO (reg);
1670 rtx p;
1672 if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
1673 return 0;
1675 /* Search this basic block for the already recorded last use of the reg. */
1676 for (p = insn; p; p = NEXT_INSN (p))
1678 switch (GET_CODE (p))
1680 case NOTE:
1681 break;
1683 case INSN:
1684 case CALL_INSN:
1685 /* Ordinary insn: if this is the last use, we win. */
1686 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1687 return 1;
1688 break;
1690 case JUMP_INSN:
1691 /* Jump insn: if this is the last use, we win. */
1692 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1693 return 1;
1694 /* Otherwise, it's the end of the basic block, so we lose. */
1695 return 0;
1697 case CODE_LABEL:
1698 case BARRIER:
1699 /* It's the end of the basic block, so we lose. */
1700 return 0;
1702 default:
1703 break;
1707 /* The "last use" that was recorded can't be found after the first
1708 use. This can happen when the last use was deleted while
1709 processing an inner loop, this inner loop was then completely
1710 unrolled, and the outer loop is always exited after the inner loop,
1711 so that everything after the first use becomes a single basic block. */
1712 return 1;
1715 /* Compute the benefit of eliminating the insns in the block whose
1716 last insn is LAST. This may be a group of insns used to compute a
1717 value directly or can contain a library call. */
1719 static int
1720 libcall_benefit (rtx last)
1722 rtx insn;
1723 int benefit = 0;
1725 for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
1726 insn != last; insn = NEXT_INSN (insn))
1728 if (CALL_P (insn))
1729 benefit += 10; /* Assume at least this many insns in a library
1730 routine. */
1731 else if (NONJUMP_INSN_P (insn)
1732 && GET_CODE (PATTERN (insn)) != USE
1733 && GET_CODE (PATTERN (insn)) != CLOBBER)
1734 benefit++;
1737 return benefit;
1740 /* Skip COUNT insns from INSN, counting library calls as 1 insn. */
1742 static rtx
1743 skip_consec_insns (rtx insn, int count)
1745 for (; count > 0; count--)
1747 rtx temp;
1749 /* If first insn of libcall sequence, skip to end. */
1750 /* Do this at start of loop, since INSN is guaranteed to
1751 be an insn here. */
1752 if (!NOTE_P (insn)
1753 && (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
1754 insn = XEXP (temp, 0);
1757 insn = NEXT_INSN (insn);
1758 while (NOTE_P (insn));
1761 return insn;
1764 /* Ignore any movable whose insn falls within a libcall
1765 which is part of another movable.
1766 We make use of the fact that the movable for the libcall value
1767 was made later and so appears later on the chain. */
1769 static void
1770 ignore_some_movables (struct loop_movables *movables)
1772 struct movable *m, *m1;
1774 for (m = movables->head; m; m = m->next)
1776 /* Is this a movable for the value of a libcall? */
1777 rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
1778 if (note)
1780 rtx insn;
1781 /* Check for earlier movables inside that range,
1782 and mark them invalid. We cannot use LUIDs here because
1783 insns created by loop.c for prior loops don't have LUIDs.
1784 Rather than reject all such insns from movables, we just
1785 explicitly check each insn in the libcall (since invariant
1786 libcalls aren't that common). */
1787 for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
1788 for (m1 = movables->head; m1 != m; m1 = m1->next)
1789 if (m1->insn == insn)
1790 m1->done = 1;
1795 /* For each movable insn, see if the reg that it loads
1796 leads when it dies right into another conditionally movable insn.
1797 If so, record that the second insn "forces" the first one,
1798 since the second can be moved only if the first is. */
1800 static void
1801 force_movables (struct loop_movables *movables)
1803 struct movable *m, *m1;
1805 for (m1 = movables->head; m1; m1 = m1->next)
1806 /* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
1807 if (!m1->partial && !m1->done)
1809 int regno = m1->regno;
1810 for (m = m1->next; m; m = m->next)
1811 /* ??? Could this be a bug? What if CSE caused the
1812 register of M1 to be used after this insn?
1813 Since CSE does not update regno_last_uid,
1814 this insn M->insn might not be where it dies.
1815 But very likely this doesn't matter; what matters is
1816 that M's reg is computed from M1's reg. */
1817 if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
1818 && !m->done)
1819 break;
1820 if (m != 0 && m->set_src == m1->set_dest
1821 /* If m->consec, m->set_src isn't valid. */
1822 && m->consec == 0)
1823 m = 0;
1825 /* Increase the priority of the moving the first insn
1826 since it permits the second to be moved as well.
1827 Likewise for insns already forced by the first insn. */
1828 if (m != 0)
1830 struct movable *m2;
1832 m->forces = m1;
1833 for (m2 = m1; m2; m2 = m2->forces)
1835 m2->lifetime += m->lifetime;
1836 m2->savings += m->savings;
1842 /* Find invariant expressions that are equal and can be combined into
1843 one register. */
1845 static void
1846 combine_movables (struct loop_movables *movables, struct loop_regs *regs)
1848 struct movable *m;
1849 char *matched_regs = xmalloc (regs->num);
1850 enum machine_mode mode;
1852 /* Regs that are set more than once are not allowed to match
1853 or be matched. I'm no longer sure why not. */
1854 /* Only pseudo registers are allowed to match or be matched,
1855 since move_movables does not validate the change. */
1856 /* Perhaps testing m->consec_sets would be more appropriate here? */
1858 for (m = movables->head; m; m = m->next)
1859 if (m->match == 0 && regs->array[m->regno].n_times_set == 1
1860 && m->regno >= FIRST_PSEUDO_REGISTER
1861 && !m->insert_temp
1862 && !m->partial)
1864 struct movable *m1;
1865 int regno = m->regno;
1867 memset (matched_regs, 0, regs->num);
1868 matched_regs[regno] = 1;
1870 /* We want later insns to match the first one. Don't make the first
1871 one match any later ones. So start this loop at m->next. */
1872 for (m1 = m->next; m1; m1 = m1->next)
1873 if (m != m1 && m1->match == 0
1874 && !m1->insert_temp
1875 && regs->array[m1->regno].n_times_set == 1
1876 && m1->regno >= FIRST_PSEUDO_REGISTER
1877 /* A reg used outside the loop mustn't be eliminated. */
1878 && !m1->global
1879 /* A reg used for zero-extending mustn't be eliminated. */
1880 && !m1->partial
1881 && (matched_regs[m1->regno]
1884 /* Can combine regs with different modes loaded from the
1885 same constant only if the modes are the same or
1886 if both are integer modes with M wider or the same
1887 width as M1. The check for integer is redundant, but
1888 safe, since the only case of differing destination
1889 modes with equal sources is when both sources are
1890 VOIDmode, i.e., CONST_INT. */
1891 (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
1892 || (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT
1893 && GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT
1894 && (GET_MODE_BITSIZE (GET_MODE (m->set_dest))
1895 >= GET_MODE_BITSIZE (GET_MODE (m1->set_dest)))))
1896 /* See if the source of M1 says it matches M. */
1897 && ((REG_P (m1->set_src)
1898 && matched_regs[REGNO (m1->set_src)])
1899 || rtx_equal_for_loop_p (m->set_src, m1->set_src,
1900 movables, regs))))
1901 && ((m->dependencies == m1->dependencies)
1902 || rtx_equal_p (m->dependencies, m1->dependencies)))
1904 m->lifetime += m1->lifetime;
1905 m->savings += m1->savings;
1906 m1->done = 1;
1907 m1->match = m;
1908 matched_regs[m1->regno] = 1;
1912 /* Now combine the regs used for zero-extension.
1913 This can be done for those not marked `global'
1914 provided their lives don't overlap. */
1916 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1917 mode = GET_MODE_WIDER_MODE (mode))
1919 struct movable *m0 = 0;
1921 /* Combine all the registers for extension from mode MODE.
1922 Don't combine any that are used outside this loop. */
1923 for (m = movables->head; m; m = m->next)
1924 if (m->partial && ! m->global
1925 && mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
1927 struct movable *m1;
1929 int first = REGNO_FIRST_LUID (m->regno);
1930 int last = REGNO_LAST_LUID (m->regno);
1932 if (m0 == 0)
1934 /* First one: don't check for overlap, just record it. */
1935 m0 = m;
1936 continue;
1939 /* Make sure they extend to the same mode.
1940 (Almost always true.) */
1941 if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
1942 continue;
1944 /* We already have one: check for overlap with those
1945 already combined together. */
1946 for (m1 = movables->head; m1 != m; m1 = m1->next)
1947 if (m1 == m0 || (m1->partial && m1->match == m0))
1948 if (! (REGNO_FIRST_LUID (m1->regno) > last
1949 || REGNO_LAST_LUID (m1->regno) < first))
1950 goto overlap;
1952 /* No overlap: we can combine this with the others. */
1953 m0->lifetime += m->lifetime;
1954 m0->savings += m->savings;
1955 m->done = 1;
1956 m->match = m0;
1958 overlap:
1963 /* Clean up. */
1964 free (matched_regs);
1967 /* Returns the number of movable instructions in LOOP that were not
1968 moved outside the loop. */
1970 static int
1971 num_unmoved_movables (const struct loop *loop)
1973 int num = 0;
1974 struct movable *m;
1976 for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
1977 if (!m->done)
1978 ++num;
1980 return num;
1984 /* Return 1 if regs X and Y will become the same if moved. */
1986 static int
1987 regs_match_p (rtx x, rtx y, struct loop_movables *movables)
1989 unsigned int xn = REGNO (x);
1990 unsigned int yn = REGNO (y);
1991 struct movable *mx, *my;
1993 for (mx = movables->head; mx; mx = mx->next)
1994 if (mx->regno == xn)
1995 break;
1997 for (my = movables->head; my; my = my->next)
1998 if (my->regno == yn)
1999 break;
2001 return (mx && my
2002 && ((mx->match == my->match && mx->match != 0)
2003 || mx->match == my
2004 || mx == my->match));
2007 /* Return 1 if X and Y are identical-looking rtx's.
2008 This is the Lisp function EQUAL for rtx arguments.
2010 If two registers are matching movables or a movable register and an
2011 equivalent constant, consider them equal. */
2013 static int
2014 rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables,
2015 struct loop_regs *regs)
2017 int i;
2018 int j;
2019 struct movable *m;
2020 enum rtx_code code;
2021 const char *fmt;
2023 if (x == y)
2024 return 1;
2025 if (x == 0 || y == 0)
2026 return 0;
2028 code = GET_CODE (x);
2030 /* If we have a register and a constant, they may sometimes be
2031 equal. */
2032 if (REG_P (x) && regs->array[REGNO (x)].set_in_loop == -2
2033 && CONSTANT_P (y))
2035 for (m = movables->head; m; m = m->next)
2036 if (m->move_insn && m->regno == REGNO (x)
2037 && rtx_equal_p (m->set_src, y))
2038 return 1;
2040 else if (REG_P (y) && regs->array[REGNO (y)].set_in_loop == -2
2041 && CONSTANT_P (x))
2043 for (m = movables->head; m; m = m->next)
2044 if (m->move_insn && m->regno == REGNO (y)
2045 && rtx_equal_p (m->set_src, x))
2046 return 1;
2049 /* Otherwise, rtx's of different codes cannot be equal. */
2050 if (code != GET_CODE (y))
2051 return 0;
2053 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
2054 (REG:SI x) and (REG:HI x) are NOT equivalent. */
2056 if (GET_MODE (x) != GET_MODE (y))
2057 return 0;
2059 /* These three types of rtx's can be compared nonrecursively. */
2060 if (code == REG)
2061 return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
2063 if (code == LABEL_REF)
2064 return XEXP (x, 0) == XEXP (y, 0);
2065 if (code == SYMBOL_REF)
2066 return XSTR (x, 0) == XSTR (y, 0);
2068 /* Compare the elements. If any pair of corresponding elements
2069 fail to match, return 0 for the whole things. */
2071 fmt = GET_RTX_FORMAT (code);
2072 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2074 switch (fmt[i])
2076 case 'w':
2077 if (XWINT (x, i) != XWINT (y, i))
2078 return 0;
2079 break;
2081 case 'i':
2082 if (XINT (x, i) != XINT (y, i))
2083 return 0;
2084 break;
2086 case 'E':
2087 /* Two vectors must have the same length. */
2088 if (XVECLEN (x, i) != XVECLEN (y, i))
2089 return 0;
2091 /* And the corresponding elements must match. */
2092 for (j = 0; j < XVECLEN (x, i); j++)
2093 if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
2094 movables, regs) == 0)
2095 return 0;
2096 break;
2098 case 'e':
2099 if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
2100 == 0)
2101 return 0;
2102 break;
2104 case 's':
2105 if (strcmp (XSTR (x, i), XSTR (y, i)))
2106 return 0;
2107 break;
2109 case 'u':
2110 /* These are just backpointers, so they don't matter. */
2111 break;
2113 case '0':
2114 break;
2116 /* It is believed that rtx's at this level will never
2117 contain anything but integers and other rtx's,
2118 except for within LABEL_REFs and SYMBOL_REFs. */
2119 default:
2120 abort ();
2123 return 1;
2126 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
2127 insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
2128 references is incremented once for each added note. */
2130 static void
2131 add_label_notes (rtx x, rtx insns)
2133 enum rtx_code code = GET_CODE (x);
2134 int i, j;
2135 const char *fmt;
2136 rtx insn;
2138 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2140 /* This code used to ignore labels that referred to dispatch tables to
2141 avoid flow generating (slightly) worse code.
2143 We no longer ignore such label references (see LABEL_REF handling in
2144 mark_jump_label for additional information). */
2145 for (insn = insns; insn; insn = NEXT_INSN (insn))
2146 if (reg_mentioned_p (XEXP (x, 0), insn))
2148 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
2149 REG_NOTES (insn));
2150 if (LABEL_P (XEXP (x, 0)))
2151 LABEL_NUSES (XEXP (x, 0))++;
2155 fmt = GET_RTX_FORMAT (code);
2156 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2158 if (fmt[i] == 'e')
2159 add_label_notes (XEXP (x, i), insns);
2160 else if (fmt[i] == 'E')
2161 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2162 add_label_notes (XVECEXP (x, i, j), insns);
2166 /* Scan MOVABLES, and move the insns that deserve to be moved.
2167 If two matching movables are combined, replace one reg with the
2168 other throughout. */
2170 static void
2171 move_movables (struct loop *loop, struct loop_movables *movables,
2172 int threshold, int insn_count)
2174 struct loop_regs *regs = LOOP_REGS (loop);
2175 int nregs = regs->num;
2176 rtx new_start = 0;
2177 struct movable *m;
2178 rtx p;
2179 rtx loop_start = loop->start;
2180 rtx loop_end = loop->end;
2181 /* Map of pseudo-register replacements to handle combining
2182 when we move several insns that load the same value
2183 into different pseudo-registers. */
2184 rtx *reg_map = xcalloc (nregs, sizeof (rtx));
2185 char *already_moved = xcalloc (nregs, sizeof (char));
2187 for (m = movables->head; m; m = m->next)
2189 /* Describe this movable insn. */
2191 if (loop_dump_stream)
2193 fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
2194 INSN_UID (m->insn), m->regno, m->lifetime);
2195 if (m->consec > 0)
2196 fprintf (loop_dump_stream, "consec %d, ", m->consec);
2197 if (m->cond)
2198 fprintf (loop_dump_stream, "cond ");
2199 if (m->force)
2200 fprintf (loop_dump_stream, "force ");
2201 if (m->global)
2202 fprintf (loop_dump_stream, "global ");
2203 if (m->done)
2204 fprintf (loop_dump_stream, "done ");
2205 if (m->move_insn)
2206 fprintf (loop_dump_stream, "move-insn ");
2207 if (m->match)
2208 fprintf (loop_dump_stream, "matches %d ",
2209 INSN_UID (m->match->insn));
2210 if (m->forces)
2211 fprintf (loop_dump_stream, "forces %d ",
2212 INSN_UID (m->forces->insn));
2215 /* Ignore the insn if it's already done (it matched something else).
2216 Otherwise, see if it is now safe to move. */
2218 if (!m->done
2219 && (! m->cond
2220 || (1 == loop_invariant_p (loop, m->set_src)
2221 && (m->dependencies == 0
2222 || 1 == loop_invariant_p (loop, m->dependencies))
2223 && (m->consec == 0
2224 || 1 == consec_sets_invariant_p (loop, m->set_dest,
2225 m->consec + 1,
2226 m->insn))))
2227 && (! m->forces || m->forces->done))
2229 int regno;
2230 rtx p;
2231 int savings = m->savings;
2233 /* We have an insn that is safe to move.
2234 Compute its desirability. */
2236 p = m->insn;
2237 regno = m->regno;
2239 if (loop_dump_stream)
2240 fprintf (loop_dump_stream, "savings %d ", savings);
2242 if (regs->array[regno].moved_once && loop_dump_stream)
2243 fprintf (loop_dump_stream, "halved since already moved ");
2245 /* An insn MUST be moved if we already moved something else
2246 which is safe only if this one is moved too: that is,
2247 if already_moved[REGNO] is nonzero. */
2249 /* An insn is desirable to move if the new lifetime of the
2250 register is no more than THRESHOLD times the old lifetime.
2251 If it's not desirable, it means the loop is so big
2252 that moving won't speed things up much,
2253 and it is liable to make register usage worse. */
2255 /* It is also desirable to move if it can be moved at no
2256 extra cost because something else was already moved. */
2258 if (already_moved[regno]
2259 || (threshold * savings * m->lifetime) >=
2260 (regs->array[regno].moved_once ? insn_count * 2 : insn_count)
2261 || (m->forces && m->forces->done
2262 && regs->array[m->forces->regno].n_times_set == 1))
2264 int count;
2265 struct movable *m1;
2266 rtx first = NULL_RTX;
2267 rtx newreg = NULL_RTX;
2269 if (m->insert_temp)
2270 newreg = gen_reg_rtx (GET_MODE (m->set_dest));
2272 /* Now move the insns that set the reg. */
2274 if (m->partial && m->match)
2276 rtx newpat, i1;
2277 rtx r1, r2;
2278 /* Find the end of this chain of matching regs.
2279 Thus, we load each reg in the chain from that one reg.
2280 And that reg is loaded with 0 directly,
2281 since it has ->match == 0. */
2282 for (m1 = m; m1->match; m1 = m1->match);
2283 newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
2284 SET_DEST (PATTERN (m1->insn)));
2285 i1 = loop_insn_hoist (loop, newpat);
2287 /* Mark the moved, invariant reg as being allowed to
2288 share a hard reg with the other matching invariant. */
2289 REG_NOTES (i1) = REG_NOTES (m->insn);
2290 r1 = SET_DEST (PATTERN (m->insn));
2291 r2 = SET_DEST (PATTERN (m1->insn));
2292 regs_may_share
2293 = gen_rtx_EXPR_LIST (VOIDmode, r1,
2294 gen_rtx_EXPR_LIST (VOIDmode, r2,
2295 regs_may_share));
2296 delete_insn (m->insn);
2298 if (new_start == 0)
2299 new_start = i1;
2301 if (loop_dump_stream)
2302 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
2304 /* If we are to re-generate the item being moved with a
2305 new move insn, first delete what we have and then emit
2306 the move insn before the loop. */
2307 else if (m->move_insn)
2309 rtx i1, temp, seq;
2311 for (count = m->consec; count >= 0; count--)
2313 /* If this is the first insn of a library call sequence,
2314 something is very wrong. */
2315 if (!NOTE_P (p)
2316 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
2317 abort ();
2319 /* If this is the last insn of a libcall sequence, then
2320 delete every insn in the sequence except the last.
2321 The last insn is handled in the normal manner. */
2322 if (!NOTE_P (p)
2323 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
2325 temp = XEXP (temp, 0);
2326 while (temp != p)
2327 temp = delete_insn (temp);
2330 temp = p;
2331 p = delete_insn (p);
2333 /* simplify_giv_expr expects that it can walk the insns
2334 at m->insn forwards and see this old sequence we are
2335 tossing here. delete_insn does preserve the next
2336 pointers, but when we skip over a NOTE we must fix
2337 it up. Otherwise that code walks into the non-deleted
2338 insn stream. */
2339 while (p && NOTE_P (p))
2340 p = NEXT_INSN (temp) = NEXT_INSN (p);
2342 if (m->insert_temp)
2344 /* Replace the original insn with a move from
2345 our newly created temp. */
2346 start_sequence ();
2347 emit_move_insn (m->set_dest, newreg);
2348 seq = get_insns ();
2349 end_sequence ();
2350 emit_insn_before (seq, p);
2354 start_sequence ();
2355 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2356 m->set_src);
2357 seq = get_insns ();
2358 end_sequence ();
2360 add_label_notes (m->set_src, seq);
2362 i1 = loop_insn_hoist (loop, seq);
2363 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2364 set_unique_reg_note (i1,
2365 m->is_equiv ? REG_EQUIV : REG_EQUAL,
2366 m->set_src);
2368 if (loop_dump_stream)
2369 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
2371 /* The more regs we move, the less we like moving them. */
2372 threshold -= 3;
2374 else
2376 for (count = m->consec; count >= 0; count--)
2378 rtx i1, temp;
2380 /* If first insn of libcall sequence, skip to end. */
2381 /* Do this at start of loop, since p is guaranteed to
2382 be an insn here. */
2383 if (!NOTE_P (p)
2384 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
2385 p = XEXP (temp, 0);
2387 /* If last insn of libcall sequence, move all
2388 insns except the last before the loop. The last
2389 insn is handled in the normal manner. */
2390 if (!NOTE_P (p)
2391 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
2393 rtx fn_address = 0;
2394 rtx fn_reg = 0;
2395 rtx fn_address_insn = 0;
2397 first = 0;
2398 for (temp = XEXP (temp, 0); temp != p;
2399 temp = NEXT_INSN (temp))
2401 rtx body;
2402 rtx n;
2403 rtx next;
2405 if (NOTE_P (temp))
2406 continue;
2408 body = PATTERN (temp);
2410 /* Find the next insn after TEMP,
2411 not counting USE or NOTE insns. */
2412 for (next = NEXT_INSN (temp); next != p;
2413 next = NEXT_INSN (next))
2414 if (! (NONJUMP_INSN_P (next)
2415 && GET_CODE (PATTERN (next)) == USE)
2416 && !NOTE_P (next))
2417 break;
2419 /* If that is the call, this may be the insn
2420 that loads the function address.
2422 Extract the function address from the insn
2423 that loads it into a register.
2424 If this insn was cse'd, we get incorrect code.
2426 So emit a new move insn that copies the
2427 function address into the register that the
2428 call insn will use. flow.c will delete any
2429 redundant stores that we have created. */
2430 if (CALL_P (next)
2431 && GET_CODE (body) == SET
2432 && REG_P (SET_DEST (body))
2433 && (n = find_reg_note (temp, REG_EQUAL,
2434 NULL_RTX)))
2436 fn_reg = SET_SRC (body);
2437 if (!REG_P (fn_reg))
2438 fn_reg = SET_DEST (body);
2439 fn_address = XEXP (n, 0);
2440 fn_address_insn = temp;
2442 /* We have the call insn.
2443 If it uses the register we suspect it might,
2444 load it with the correct address directly. */
2445 if (CALL_P (temp)
2446 && fn_address != 0
2447 && reg_referenced_p (fn_reg, body))
2448 loop_insn_emit_after (loop, 0, fn_address_insn,
2449 gen_move_insn
2450 (fn_reg, fn_address));
2452 if (CALL_P (temp))
2454 i1 = loop_call_insn_hoist (loop, body);
2455 /* Because the USAGE information potentially
2456 contains objects other than hard registers
2457 we need to copy it. */
2458 if (CALL_INSN_FUNCTION_USAGE (temp))
2459 CALL_INSN_FUNCTION_USAGE (i1)
2460 = copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
2462 else
2463 i1 = loop_insn_hoist (loop, body);
2464 if (first == 0)
2465 first = i1;
2466 if (temp == fn_address_insn)
2467 fn_address_insn = i1;
2468 REG_NOTES (i1) = REG_NOTES (temp);
2469 REG_NOTES (temp) = NULL;
2470 delete_insn (temp);
2472 if (new_start == 0)
2473 new_start = first;
2475 if (m->savemode != VOIDmode)
2477 /* P sets REG to zero; but we should clear only
2478 the bits that are not covered by the mode
2479 m->savemode. */
2480 rtx reg = m->set_dest;
2481 rtx sequence;
2482 rtx tem;
2484 start_sequence ();
2485 tem = expand_simple_binop
2486 (GET_MODE (reg), AND, reg,
2487 GEN_INT ((((HOST_WIDE_INT) 1
2488 << GET_MODE_BITSIZE (m->savemode)))
2489 - 1),
2490 reg, 1, OPTAB_LIB_WIDEN);
2491 if (tem == 0)
2492 abort ();
2493 if (tem != reg)
2494 emit_move_insn (reg, tem);
2495 sequence = get_insns ();
2496 end_sequence ();
2497 i1 = loop_insn_hoist (loop, sequence);
2499 else if (CALL_P (p))
2501 i1 = loop_call_insn_hoist (loop, PATTERN (p));
2502 /* Because the USAGE information potentially
2503 contains objects other than hard registers
2504 we need to copy it. */
2505 if (CALL_INSN_FUNCTION_USAGE (p))
2506 CALL_INSN_FUNCTION_USAGE (i1)
2507 = copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
2509 else if (count == m->consec && m->move_insn_first)
2511 rtx seq;
2512 /* The SET_SRC might not be invariant, so we must
2513 use the REG_EQUAL note. */
2514 start_sequence ();
2515 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2516 m->set_src);
2517 seq = get_insns ();
2518 end_sequence ();
2520 add_label_notes (m->set_src, seq);
2522 i1 = loop_insn_hoist (loop, seq);
2523 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2524 set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
2525 : REG_EQUAL, m->set_src);
2527 else if (m->insert_temp)
2529 rtx *reg_map2 = xcalloc (REGNO (newreg),
2530 sizeof(rtx));
2531 reg_map2 [m->regno] = newreg;
2533 i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
2534 replace_regs (i1, reg_map2, REGNO (newreg), 1);
2535 free (reg_map2);
2537 else
2538 i1 = loop_insn_hoist (loop, PATTERN (p));
2540 if (REG_NOTES (i1) == 0)
2542 REG_NOTES (i1) = REG_NOTES (p);
2543 REG_NOTES (p) = NULL;
2545 /* If there is a REG_EQUAL note present whose value
2546 is not loop invariant, then delete it, since it
2547 may cause problems with later optimization passes.
2548 It is possible for cse to create such notes
2549 like this as a result of record_jump_cond. */
2551 if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
2552 && ! loop_invariant_p (loop, XEXP (temp, 0)))
2553 remove_note (i1, temp);
2556 if (new_start == 0)
2557 new_start = i1;
2559 if (loop_dump_stream)
2560 fprintf (loop_dump_stream, " moved to %d",
2561 INSN_UID (i1));
2563 /* If library call, now fix the REG_NOTES that contain
2564 insn pointers, namely REG_LIBCALL on FIRST
2565 and REG_RETVAL on I1. */
2566 if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
2568 XEXP (temp, 0) = first;
2569 temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
2570 XEXP (temp, 0) = i1;
2573 temp = p;
2574 delete_insn (p);
2575 p = NEXT_INSN (p);
2577 /* simplify_giv_expr expects that it can walk the insns
2578 at m->insn forwards and see this old sequence we are
2579 tossing here. delete_insn does preserve the next
2580 pointers, but when we skip over a NOTE we must fix
2581 it up. Otherwise that code walks into the non-deleted
2582 insn stream. */
2583 while (p && NOTE_P (p))
2584 p = NEXT_INSN (temp) = NEXT_INSN (p);
2586 if (m->insert_temp)
2588 rtx seq;
2589 /* Replace the original insn with a move from
2590 our newly created temp. */
2591 start_sequence ();
2592 emit_move_insn (m->set_dest, newreg);
2593 seq = get_insns ();
2594 end_sequence ();
2595 emit_insn_before (seq, p);
2599 /* The more regs we move, the less we like moving them. */
2600 threshold -= 3;
2603 m->done = 1;
2605 if (!m->insert_temp)
2607 /* Any other movable that loads the same register
2608 MUST be moved. */
2609 already_moved[regno] = 1;
2611 /* This reg has been moved out of one loop. */
2612 regs->array[regno].moved_once = 1;
2614 /* The reg set here is now invariant. */
2615 if (! m->partial)
2617 int i;
2618 for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
2619 regs->array[regno+i].set_in_loop = 0;
2622 /* Change the length-of-life info for the register
2623 to say it lives at least the full length of this loop.
2624 This will help guide optimizations in outer loops. */
2626 if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
2627 /* This is the old insn before all the moved insns.
2628 We can't use the moved insn because it is out of range
2629 in uid_luid. Only the old insns have luids. */
2630 REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
2631 if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
2632 REGNO_LAST_UID (regno) = INSN_UID (loop_end);
2635 /* Combine with this moved insn any other matching movables. */
2637 if (! m->partial)
2638 for (m1 = movables->head; m1; m1 = m1->next)
2639 if (m1->match == m)
2641 rtx temp;
2643 /* Schedule the reg loaded by M1
2644 for replacement so that shares the reg of M.
2645 If the modes differ (only possible in restricted
2646 circumstances, make a SUBREG.
2648 Note this assumes that the target dependent files
2649 treat REG and SUBREG equally, including within
2650 GO_IF_LEGITIMATE_ADDRESS and in all the
2651 predicates since we never verify that replacing the
2652 original register with a SUBREG results in a
2653 recognizable insn. */
2654 if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest))
2655 reg_map[m1->regno] = m->set_dest;
2656 else
2657 reg_map[m1->regno]
2658 = gen_lowpart_common (GET_MODE (m1->set_dest),
2659 m->set_dest);
2661 /* Get rid of the matching insn
2662 and prevent further processing of it. */
2663 m1->done = 1;
2665 /* If library call, delete all insns. */
2666 if ((temp = find_reg_note (m1->insn, REG_RETVAL,
2667 NULL_RTX)))
2668 delete_insn_chain (XEXP (temp, 0), m1->insn);
2669 else
2670 delete_insn (m1->insn);
2672 /* Any other movable that loads the same register
2673 MUST be moved. */
2674 already_moved[m1->regno] = 1;
2676 /* The reg merged here is now invariant,
2677 if the reg it matches is invariant. */
2678 if (! m->partial)
2680 int i;
2681 for (i = 0;
2682 i < LOOP_REGNO_NREGS (regno, m1->set_dest);
2683 i++)
2684 regs->array[m1->regno+i].set_in_loop = 0;
2688 else if (loop_dump_stream)
2689 fprintf (loop_dump_stream, "not desirable");
2691 else if (loop_dump_stream && !m->match)
2692 fprintf (loop_dump_stream, "not safe");
2694 if (loop_dump_stream)
2695 fprintf (loop_dump_stream, "\n");
2698 if (new_start == 0)
2699 new_start = loop_start;
2701 /* Go through all the instructions in the loop, making
2702 all the register substitutions scheduled in REG_MAP. */
2703 for (p = new_start; p != loop_end; p = NEXT_INSN (p))
2704 if (INSN_P (p))
2706 replace_regs (PATTERN (p), reg_map, nregs, 0);
2707 replace_regs (REG_NOTES (p), reg_map, nregs, 0);
2708 INSN_CODE (p) = -1;
2711 /* Clean up. */
2712 free (reg_map);
2713 free (already_moved);
2717 static void
2718 loop_movables_add (struct loop_movables *movables, struct movable *m)
2720 if (movables->head == 0)
2721 movables->head = m;
2722 else
2723 movables->last->next = m;
2724 movables->last = m;
2728 static void
2729 loop_movables_free (struct loop_movables *movables)
2731 struct movable *m;
2732 struct movable *m_next;
2734 for (m = movables->head; m; m = m_next)
2736 m_next = m->next;
2737 free (m);
2741 #if 0
2742 /* Scan X and replace the address of any MEM in it with ADDR.
2743 REG is the address that MEM should have before the replacement. */
2745 static void
2746 replace_call_address (rtx x, rtx reg, rtx addr)
2748 enum rtx_code code;
2749 int i;
2750 const char *fmt;
2752 if (x == 0)
2753 return;
2754 code = GET_CODE (x);
2755 switch (code)
2757 case PC:
2758 case CC0:
2759 case CONST_INT:
2760 case CONST_DOUBLE:
2761 case CONST:
2762 case SYMBOL_REF:
2763 case LABEL_REF:
2764 case REG:
2765 return;
2767 case SET:
2768 /* Short cut for very common case. */
2769 replace_call_address (XEXP (x, 1), reg, addr);
2770 return;
2772 case CALL:
2773 /* Short cut for very common case. */
2774 replace_call_address (XEXP (x, 0), reg, addr);
2775 return;
2777 case MEM:
2778 /* If this MEM uses a reg other than the one we expected,
2779 something is wrong. */
2780 if (XEXP (x, 0) != reg)
2781 abort ();
2782 XEXP (x, 0) = addr;
2783 return;
2785 default:
2786 break;
2789 fmt = GET_RTX_FORMAT (code);
2790 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2792 if (fmt[i] == 'e')
2793 replace_call_address (XEXP (x, i), reg, addr);
2794 else if (fmt[i] == 'E')
2796 int j;
2797 for (j = 0; j < XVECLEN (x, i); j++)
2798 replace_call_address (XVECEXP (x, i, j), reg, addr);
2802 #endif
2804 /* Return the number of memory refs to addresses that vary
2805 in the rtx X. */
2807 static int
2808 count_nonfixed_reads (const struct loop *loop, rtx x)
2810 enum rtx_code code;
2811 int i;
2812 const char *fmt;
2813 int value;
2815 if (x == 0)
2816 return 0;
2818 code = GET_CODE (x);
2819 switch (code)
2821 case PC:
2822 case CC0:
2823 case CONST_INT:
2824 case CONST_DOUBLE:
2825 case CONST:
2826 case SYMBOL_REF:
2827 case LABEL_REF:
2828 case REG:
2829 return 0;
2831 case MEM:
2832 return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
2833 + count_nonfixed_reads (loop, XEXP (x, 0)));
2835 default:
2836 break;
2839 value = 0;
2840 fmt = GET_RTX_FORMAT (code);
2841 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2843 if (fmt[i] == 'e')
2844 value += count_nonfixed_reads (loop, XEXP (x, i));
2845 if (fmt[i] == 'E')
2847 int j;
2848 for (j = 0; j < XVECLEN (x, i); j++)
2849 value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
2852 return value;
2855 /* Scan a loop setting the elements `loops_enclosed',
2856 `has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
2857 `unknown_address_altered', `unknown_constant_address_altered', and
2858 `num_mem_sets' in LOOP. Also, fill in the array `mems' and the
2859 list `store_mems' in LOOP. */
2861 static void
2862 prescan_loop (struct loop *loop)
2864 int level = 1;
2865 rtx insn;
2866 struct loop_info *loop_info = LOOP_INFO (loop);
2867 rtx start = loop->start;
2868 rtx end = loop->end;
2869 /* The label after END. Jumping here is just like falling off the
2870 end of the loop. We use next_nonnote_insn instead of next_label
2871 as a hedge against the (pathological) case where some actual insn
2872 might end up between the two. */
2873 rtx exit_target = next_nonnote_insn (end);
2875 loop_info->has_indirect_jump = indirect_jump_in_function;
2876 loop_info->pre_header_has_call = 0;
2877 loop_info->has_call = 0;
2878 loop_info->has_nonconst_call = 0;
2879 loop_info->has_prefetch = 0;
2880 loop_info->has_volatile = 0;
2881 loop_info->has_tablejump = 0;
2882 loop_info->has_multiple_exit_targets = 0;
2883 loop->level = 1;
2885 loop_info->unknown_address_altered = 0;
2886 loop_info->unknown_constant_address_altered = 0;
2887 loop_info->store_mems = NULL_RTX;
2888 loop_info->first_loop_store_insn = NULL_RTX;
2889 loop_info->mems_idx = 0;
2890 loop_info->num_mem_sets = 0;
2892 for (insn = start; insn && !LABEL_P (insn);
2893 insn = PREV_INSN (insn))
2895 if (CALL_P (insn))
2897 loop_info->pre_header_has_call = 1;
2898 break;
2902 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2903 insn = NEXT_INSN (insn))
2905 switch (GET_CODE (insn))
2907 case NOTE:
2908 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
2910 ++level;
2911 /* Count number of loops contained in this one. */
2912 loop->level++;
2914 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
2915 --level;
2916 break;
2918 case CALL_INSN:
2919 if (! CONST_OR_PURE_CALL_P (insn))
2921 loop_info->unknown_address_altered = 1;
2922 loop_info->has_nonconst_call = 1;
2924 else if (pure_call_p (insn))
2925 loop_info->has_nonconst_call = 1;
2926 loop_info->has_call = 1;
2927 if (can_throw_internal (insn))
2928 loop_info->has_multiple_exit_targets = 1;
2929 break;
2931 case JUMP_INSN:
2932 if (! loop_info->has_multiple_exit_targets)
2934 rtx set = pc_set (insn);
2936 if (set)
2938 rtx src = SET_SRC (set);
2939 rtx label1, label2;
2941 if (GET_CODE (src) == IF_THEN_ELSE)
2943 label1 = XEXP (src, 1);
2944 label2 = XEXP (src, 2);
2946 else
2948 label1 = src;
2949 label2 = NULL_RTX;
2954 if (label1 && label1 != pc_rtx)
2956 if (GET_CODE (label1) != LABEL_REF)
2958 /* Something tricky. */
2959 loop_info->has_multiple_exit_targets = 1;
2960 break;
2962 else if (XEXP (label1, 0) != exit_target
2963 && LABEL_OUTSIDE_LOOP_P (label1))
2965 /* A jump outside the current loop. */
2966 loop_info->has_multiple_exit_targets = 1;
2967 break;
2971 label1 = label2;
2972 label2 = NULL_RTX;
2974 while (label1);
2976 else
2978 /* A return, or something tricky. */
2979 loop_info->has_multiple_exit_targets = 1;
2982 /* Fall through. */
2984 case INSN:
2985 if (volatile_refs_p (PATTERN (insn)))
2986 loop_info->has_volatile = 1;
2988 if (JUMP_P (insn)
2989 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2990 || GET_CODE (PATTERN (insn)) == ADDR_VEC))
2991 loop_info->has_tablejump = 1;
2993 note_stores (PATTERN (insn), note_addr_stored, loop_info);
2994 if (! loop_info->first_loop_store_insn && loop_info->store_mems)
2995 loop_info->first_loop_store_insn = insn;
2997 if (flag_non_call_exceptions && can_throw_internal (insn))
2998 loop_info->has_multiple_exit_targets = 1;
2999 break;
3001 default:
3002 break;
3006 /* Now, rescan the loop, setting up the LOOP_MEMS array. */
3007 if (/* An exception thrown by a called function might land us
3008 anywhere. */
3009 ! loop_info->has_nonconst_call
3010 /* We don't want loads for MEMs moved to a location before the
3011 one at which their stack memory becomes allocated. (Note
3012 that this is not a problem for malloc, etc., since those
3013 require actual function calls. */
3014 && ! current_function_calls_alloca
3015 /* There are ways to leave the loop other than falling off the
3016 end. */
3017 && ! loop_info->has_multiple_exit_targets)
3018 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
3019 insn = NEXT_INSN (insn))
3020 for_each_rtx (&insn, insert_loop_mem, loop_info);
3022 /* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
3023 that loop_invariant_p and load_mems can use true_dependence
3024 to determine what is really clobbered. */
3025 if (loop_info->unknown_address_altered)
3027 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
3029 loop_info->store_mems
3030 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
3032 if (loop_info->unknown_constant_address_altered)
3034 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
3035 MEM_READONLY_P (mem) = 1;
3036 loop_info->store_mems
3037 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
3041 /* Invalidate all loops containing LABEL. */
3043 static void
3044 invalidate_loops_containing_label (rtx label)
3046 struct loop *loop;
3047 for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
3048 loop->invalid = 1;
3051 /* Scan the function looking for loops. Record the start and end of each loop.
3052 Also mark as invalid loops any loops that contain a setjmp or are branched
3053 to from outside the loop. */
3055 static void
3056 find_and_verify_loops (rtx f, struct loops *loops)
3058 rtx insn;
3059 rtx label;
3060 int num_loops;
3061 struct loop *current_loop;
3062 struct loop *next_loop;
3063 struct loop *loop;
3065 num_loops = loops->num;
3067 compute_luids (f, NULL_RTX, 0);
3069 /* If there are jumps to undefined labels,
3070 treat them as jumps out of any/all loops.
3071 This also avoids writing past end of tables when there are no loops. */
3072 uid_loop[0] = NULL;
3074 /* Find boundaries of loops, mark which loops are contained within
3075 loops, and invalidate loops that have setjmp. */
3077 num_loops = 0;
3078 current_loop = NULL;
3079 for (insn = f; insn; insn = NEXT_INSN (insn))
3081 if (NOTE_P (insn))
3082 switch (NOTE_LINE_NUMBER (insn))
3084 case NOTE_INSN_LOOP_BEG:
3085 next_loop = loops->array + num_loops;
3086 next_loop->num = num_loops;
3087 num_loops++;
3088 next_loop->start = insn;
3089 next_loop->outer = current_loop;
3090 current_loop = next_loop;
3091 break;
3093 case NOTE_INSN_LOOP_END:
3094 if (! current_loop)
3095 abort ();
3097 current_loop->end = insn;
3098 current_loop = current_loop->outer;
3099 break;
3101 default:
3102 break;
3105 if (CALL_P (insn)
3106 && find_reg_note (insn, REG_SETJMP, NULL))
3108 /* In this case, we must invalidate our current loop and any
3109 enclosing loop. */
3110 for (loop = current_loop; loop; loop = loop->outer)
3112 loop->invalid = 1;
3113 if (loop_dump_stream)
3114 fprintf (loop_dump_stream,
3115 "\nLoop at %d ignored due to setjmp.\n",
3116 INSN_UID (loop->start));
3120 /* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
3121 enclosing loop, but this doesn't matter. */
3122 uid_loop[INSN_UID (insn)] = current_loop;
3125 /* Any loop containing a label used in an initializer must be invalidated,
3126 because it can be jumped into from anywhere. */
3127 for (label = forced_labels; label; label = XEXP (label, 1))
3128 invalidate_loops_containing_label (XEXP (label, 0));
3130 /* Any loop containing a label used for an exception handler must be
3131 invalidated, because it can be jumped into from anywhere. */
3132 for_each_eh_label (invalidate_loops_containing_label);
3134 /* Now scan all insn's in the function. If any JUMP_INSN branches into a
3135 loop that it is not contained within, that loop is marked invalid.
3136 If any INSN or CALL_INSN uses a label's address, then the loop containing
3137 that label is marked invalid, because it could be jumped into from
3138 anywhere.
3140 Also look for blocks of code ending in an unconditional branch that
3141 exits the loop. If such a block is surrounded by a conditional
3142 branch around the block, move the block elsewhere (see below) and
3143 invert the jump to point to the code block. This may eliminate a
3144 label in our loop and will simplify processing by both us and a
3145 possible second cse pass. */
3147 for (insn = f; insn; insn = NEXT_INSN (insn))
3148 if (INSN_P (insn))
3150 struct loop *this_loop = uid_loop[INSN_UID (insn)];
3152 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
3154 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
3155 if (note)
3156 invalidate_loops_containing_label (XEXP (note, 0));
3159 if (!JUMP_P (insn))
3160 continue;
3162 mark_loop_jump (PATTERN (insn), this_loop);
3164 /* See if this is an unconditional branch outside the loop. */
3165 if (this_loop
3166 && (GET_CODE (PATTERN (insn)) == RETURN
3167 || (any_uncondjump_p (insn)
3168 && onlyjump_p (insn)
3169 && (uid_loop[INSN_UID (JUMP_LABEL (insn))]
3170 != this_loop)))
3171 && get_max_uid () < max_uid_for_loop)
3173 rtx p;
3174 rtx our_next = next_real_insn (insn);
3175 rtx last_insn_to_move = NEXT_INSN (insn);
3176 struct loop *dest_loop;
3177 struct loop *outer_loop = NULL;
3179 /* Go backwards until we reach the start of the loop, a label,
3180 or a JUMP_INSN. */
3181 for (p = PREV_INSN (insn);
3182 !LABEL_P (p)
3183 && ! (NOTE_P (p)
3184 && NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
3185 && !JUMP_P (p);
3186 p = PREV_INSN (p))
3189 /* Check for the case where we have a jump to an inner nested
3190 loop, and do not perform the optimization in that case. */
3192 if (JUMP_LABEL (insn))
3194 dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
3195 if (dest_loop)
3197 for (outer_loop = dest_loop; outer_loop;
3198 outer_loop = outer_loop->outer)
3199 if (outer_loop == this_loop)
3200 break;
3204 /* Make sure that the target of P is within the current loop. */
3206 if (JUMP_P (p) && JUMP_LABEL (p)
3207 && uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
3208 outer_loop = this_loop;
3210 /* If we stopped on a JUMP_INSN to the next insn after INSN,
3211 we have a block of code to try to move.
3213 We look backward and then forward from the target of INSN
3214 to find a BARRIER at the same loop depth as the target.
3215 If we find such a BARRIER, we make a new label for the start
3216 of the block, invert the jump in P and point it to that label,
3217 and move the block of code to the spot we found. */
3219 if (! outer_loop
3220 && JUMP_P (p)
3221 && JUMP_LABEL (p) != 0
3222 /* Just ignore jumps to labels that were never emitted.
3223 These always indicate compilation errors. */
3224 && INSN_UID (JUMP_LABEL (p)) != 0
3225 && any_condjump_p (p) && onlyjump_p (p)
3226 && next_real_insn (JUMP_LABEL (p)) == our_next
3227 /* If it's not safe to move the sequence, then we
3228 mustn't try. */
3229 && insns_safe_to_move_p (p, NEXT_INSN (insn),
3230 &last_insn_to_move))
3232 rtx target
3233 = JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
3234 struct loop *target_loop = uid_loop[INSN_UID (target)];
3235 rtx loc, loc2;
3236 rtx tmp;
3238 /* Search for possible garbage past the conditional jumps
3239 and look for the last barrier. */
3240 for (tmp = last_insn_to_move;
3241 tmp && !LABEL_P (tmp); tmp = NEXT_INSN (tmp))
3242 if (BARRIER_P (tmp))
3243 last_insn_to_move = tmp;
3245 for (loc = target; loc; loc = PREV_INSN (loc))
3246 if (BARRIER_P (loc)
3247 /* Don't move things inside a tablejump. */
3248 && ((loc2 = next_nonnote_insn (loc)) == 0
3249 || !LABEL_P (loc2)
3250 || (loc2 = next_nonnote_insn (loc2)) == 0
3251 || !JUMP_P (loc2)
3252 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
3253 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
3254 && uid_loop[INSN_UID (loc)] == target_loop)
3255 break;
3257 if (loc == 0)
3258 for (loc = target; loc; loc = NEXT_INSN (loc))
3259 if (BARRIER_P (loc)
3260 /* Don't move things inside a tablejump. */
3261 && ((loc2 = next_nonnote_insn (loc)) == 0
3262 || !LABEL_P (loc2)
3263 || (loc2 = next_nonnote_insn (loc2)) == 0
3264 || !JUMP_P (loc2)
3265 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
3266 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
3267 && uid_loop[INSN_UID (loc)] == target_loop)
3268 break;
3270 if (loc)
3272 rtx cond_label = JUMP_LABEL (p);
3273 rtx new_label = get_label_after (p);
3275 /* Ensure our label doesn't go away. */
3276 LABEL_NUSES (cond_label)++;
3278 /* Verify that uid_loop is large enough and that
3279 we can invert P. */
3280 if (invert_jump (p, new_label, 1))
3282 rtx q, r;
3284 /* If no suitable BARRIER was found, create a suitable
3285 one before TARGET. Since TARGET is a fall through
3286 path, we'll need to insert a jump around our block
3287 and add a BARRIER before TARGET.
3289 This creates an extra unconditional jump outside
3290 the loop. However, the benefits of removing rarely
3291 executed instructions from inside the loop usually
3292 outweighs the cost of the extra unconditional jump
3293 outside the loop. */
3294 if (loc == 0)
3296 rtx temp;
3298 temp = gen_jump (JUMP_LABEL (insn));
3299 temp = emit_jump_insn_before (temp, target);
3300 JUMP_LABEL (temp) = JUMP_LABEL (insn);
3301 LABEL_NUSES (JUMP_LABEL (insn))++;
3302 loc = emit_barrier_before (target);
3305 /* Include the BARRIER after INSN and copy the
3306 block after LOC. */
3307 if (squeeze_notes (&new_label, &last_insn_to_move))
3308 abort ();
3309 reorder_insns (new_label, last_insn_to_move, loc);
3311 /* All those insns are now in TARGET_LOOP. */
3312 for (q = new_label;
3313 q != NEXT_INSN (last_insn_to_move);
3314 q = NEXT_INSN (q))
3315 uid_loop[INSN_UID (q)] = target_loop;
3317 /* The label jumped to by INSN is no longer a loop
3318 exit. Unless INSN does not have a label (e.g.,
3319 it is a RETURN insn), search loop->exit_labels
3320 to find its label_ref, and remove it. Also turn
3321 off LABEL_OUTSIDE_LOOP_P bit. */
3322 if (JUMP_LABEL (insn))
3324 for (q = 0, r = this_loop->exit_labels;
3326 q = r, r = LABEL_NEXTREF (r))
3327 if (XEXP (r, 0) == JUMP_LABEL (insn))
3329 LABEL_OUTSIDE_LOOP_P (r) = 0;
3330 if (q)
3331 LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
3332 else
3333 this_loop->exit_labels = LABEL_NEXTREF (r);
3334 break;
3337 for (loop = this_loop; loop && loop != target_loop;
3338 loop = loop->outer)
3339 loop->exit_count--;
3341 /* If we didn't find it, then something is
3342 wrong. */
3343 if (! r)
3344 abort ();
3347 /* P is now a jump outside the loop, so it must be put
3348 in loop->exit_labels, and marked as such.
3349 The easiest way to do this is to just call
3350 mark_loop_jump again for P. */
3351 mark_loop_jump (PATTERN (p), this_loop);
3353 /* If INSN now jumps to the insn after it,
3354 delete INSN. */
3355 if (JUMP_LABEL (insn) != 0
3356 && (next_real_insn (JUMP_LABEL (insn))
3357 == next_real_insn (insn)))
3358 delete_related_insns (insn);
3361 /* Continue the loop after where the conditional
3362 branch used to jump, since the only branch insn
3363 in the block (if it still remains) is an inter-loop
3364 branch and hence needs no processing. */
3365 insn = NEXT_INSN (cond_label);
3367 if (--LABEL_NUSES (cond_label) == 0)
3368 delete_related_insns (cond_label);
3370 /* This loop will be continued with NEXT_INSN (insn). */
3371 insn = PREV_INSN (insn);
3378 /* If any label in X jumps to a loop different from LOOP_NUM and any of the
3379 loops it is contained in, mark the target loop invalid.
3381 For speed, we assume that X is part of a pattern of a JUMP_INSN. */
3383 static void
3384 mark_loop_jump (rtx x, struct loop *loop)
3386 struct loop *dest_loop;
3387 struct loop *outer_loop;
3388 int i;
3390 switch (GET_CODE (x))
3392 case PC:
3393 case USE:
3394 case CLOBBER:
3395 case REG:
3396 case MEM:
3397 case CONST_INT:
3398 case CONST_DOUBLE:
3399 case RETURN:
3400 return;
3402 case CONST:
3403 /* There could be a label reference in here. */
3404 mark_loop_jump (XEXP (x, 0), loop);
3405 return;
3407 case PLUS:
3408 case MINUS:
3409 case MULT:
3410 mark_loop_jump (XEXP (x, 0), loop);
3411 mark_loop_jump (XEXP (x, 1), loop);
3412 return;
3414 case LO_SUM:
3415 /* This may refer to a LABEL_REF or SYMBOL_REF. */
3416 mark_loop_jump (XEXP (x, 1), loop);
3417 return;
3419 case SIGN_EXTEND:
3420 case ZERO_EXTEND:
3421 mark_loop_jump (XEXP (x, 0), loop);
3422 return;
3424 case LABEL_REF:
3425 dest_loop = uid_loop[INSN_UID (XEXP (x, 0))];
3427 /* Link together all labels that branch outside the loop. This
3428 is used by final_[bg]iv_value and the loop unrolling code. Also
3429 mark this LABEL_REF so we know that this branch should predict
3430 false. */
3432 /* A check to make sure the label is not in an inner nested loop,
3433 since this does not count as a loop exit. */
3434 if (dest_loop)
3436 for (outer_loop = dest_loop; outer_loop;
3437 outer_loop = outer_loop->outer)
3438 if (outer_loop == loop)
3439 break;
3441 else
3442 outer_loop = NULL;
3444 if (loop && ! outer_loop)
3446 LABEL_OUTSIDE_LOOP_P (x) = 1;
3447 LABEL_NEXTREF (x) = loop->exit_labels;
3448 loop->exit_labels = x;
3450 for (outer_loop = loop;
3451 outer_loop && outer_loop != dest_loop;
3452 outer_loop = outer_loop->outer)
3453 outer_loop->exit_count++;
3456 /* If this is inside a loop, but not in the current loop or one enclosed
3457 by it, it invalidates at least one loop. */
3459 if (! dest_loop)
3460 return;
3462 /* We must invalidate every nested loop containing the target of this
3463 label, except those that also contain the jump insn. */
3465 for (; dest_loop; dest_loop = dest_loop->outer)
3467 /* Stop when we reach a loop that also contains the jump insn. */
3468 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3469 if (dest_loop == outer_loop)
3470 return;
3472 /* If we get here, we know we need to invalidate a loop. */
3473 if (loop_dump_stream && ! dest_loop->invalid)
3474 fprintf (loop_dump_stream,
3475 "\nLoop at %d ignored due to multiple entry points.\n",
3476 INSN_UID (dest_loop->start));
3478 dest_loop->invalid = 1;
3480 return;
3482 case SET:
3483 /* If this is not setting pc, ignore. */
3484 if (SET_DEST (x) == pc_rtx)
3485 mark_loop_jump (SET_SRC (x), loop);
3486 return;
3488 case IF_THEN_ELSE:
3489 mark_loop_jump (XEXP (x, 1), loop);
3490 mark_loop_jump (XEXP (x, 2), loop);
3491 return;
3493 case PARALLEL:
3494 case ADDR_VEC:
3495 for (i = 0; i < XVECLEN (x, 0); i++)
3496 mark_loop_jump (XVECEXP (x, 0, i), loop);
3497 return;
3499 case ADDR_DIFF_VEC:
3500 for (i = 0; i < XVECLEN (x, 1); i++)
3501 mark_loop_jump (XVECEXP (x, 1, i), loop);
3502 return;
3504 default:
3505 /* Strictly speaking this is not a jump into the loop, only a possible
3506 jump out of the loop. However, we have no way to link the destination
3507 of this jump onto the list of exit labels. To be safe we mark this
3508 loop and any containing loops as invalid. */
3509 if (loop)
3511 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3513 if (loop_dump_stream && ! outer_loop->invalid)
3514 fprintf (loop_dump_stream,
3515 "\nLoop at %d ignored due to unknown exit jump.\n",
3516 INSN_UID (outer_loop->start));
3517 outer_loop->invalid = 1;
3520 return;
3524 /* Return nonzero if there is a label in the range from
3525 insn INSN to and including the insn whose luid is END
3526 INSN must have an assigned luid (i.e., it must not have
3527 been previously created by loop.c). */
3529 static int
3530 labels_in_range_p (rtx insn, int end)
3532 while (insn && INSN_LUID (insn) <= end)
3534 if (LABEL_P (insn))
3535 return 1;
3536 insn = NEXT_INSN (insn);
3539 return 0;
3542 /* Record that a memory reference X is being set. */
3544 static void
3545 note_addr_stored (rtx x, rtx y ATTRIBUTE_UNUSED,
3546 void *data ATTRIBUTE_UNUSED)
3548 struct loop_info *loop_info = data;
3550 if (x == 0 || !MEM_P (x))
3551 return;
3553 /* Count number of memory writes.
3554 This affects heuristics in strength_reduce. */
3555 loop_info->num_mem_sets++;
3557 /* BLKmode MEM means all memory is clobbered. */
3558 if (GET_MODE (x) == BLKmode)
3560 if (MEM_READONLY_P (x))
3561 loop_info->unknown_constant_address_altered = 1;
3562 else
3563 loop_info->unknown_address_altered = 1;
3565 return;
3568 loop_info->store_mems = gen_rtx_EXPR_LIST (VOIDmode, x,
3569 loop_info->store_mems);
3572 /* X is a value modified by an INSN that references a biv inside a loop
3573 exit test (i.e., X is somehow related to the value of the biv). If X
3574 is a pseudo that is used more than once, then the biv is (effectively)
3575 used more than once. DATA is a pointer to a loop_regs structure. */
3577 static void
3578 note_set_pseudo_multiple_uses (rtx x, rtx y ATTRIBUTE_UNUSED, void *data)
3580 struct loop_regs *regs = (struct loop_regs *) data;
3582 if (x == 0)
3583 return;
3585 while (GET_CODE (x) == STRICT_LOW_PART
3586 || GET_CODE (x) == SIGN_EXTRACT
3587 || GET_CODE (x) == ZERO_EXTRACT
3588 || GET_CODE (x) == SUBREG)
3589 x = XEXP (x, 0);
3591 if (!REG_P (x) || REGNO (x) < FIRST_PSEUDO_REGISTER)
3592 return;
3594 /* If we do not have usage information, or if we know the register
3595 is used more than once, note that fact for check_dbra_loop. */
3596 if (REGNO (x) >= max_reg_before_loop
3597 || ! regs->array[REGNO (x)].single_usage
3598 || regs->array[REGNO (x)].single_usage == const0_rtx)
3599 regs->multiple_uses = 1;
3602 /* Return nonzero if the rtx X is invariant over the current loop.
3604 The value is 2 if we refer to something only conditionally invariant.
3606 A memory ref is invariant if it is not volatile and does not conflict
3607 with anything stored in `loop_info->store_mems'. */
3609 static int
3610 loop_invariant_p (const struct loop *loop, rtx x)
3612 struct loop_info *loop_info = LOOP_INFO (loop);
3613 struct loop_regs *regs = LOOP_REGS (loop);
3614 int i;
3615 enum rtx_code code;
3616 const char *fmt;
3617 int conditional = 0;
3618 rtx mem_list_entry;
3620 if (x == 0)
3621 return 1;
3622 code = GET_CODE (x);
3623 switch (code)
3625 case CONST_INT:
3626 case CONST_DOUBLE:
3627 case SYMBOL_REF:
3628 case CONST:
3629 return 1;
3631 case LABEL_REF:
3632 return 1;
3634 case PC:
3635 case CC0:
3636 case UNSPEC_VOLATILE:
3637 return 0;
3639 case REG:
3640 if ((x == frame_pointer_rtx || x == hard_frame_pointer_rtx
3641 || x == arg_pointer_rtx || x == pic_offset_table_rtx)
3642 && ! current_function_has_nonlocal_goto)
3643 return 1;
3645 if (LOOP_INFO (loop)->has_call
3646 && REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
3647 return 0;
3649 /* Out-of-range regs can occur when we are called from unrolling.
3650 These registers created by the unroller are set in the loop,
3651 hence are never invariant.
3652 Other out-of-range regs can be generated by load_mems; those that
3653 are written to in the loop are not invariant, while those that are
3654 not written to are invariant. It would be easy for load_mems
3655 to set n_times_set correctly for these registers, however, there
3656 is no easy way to distinguish them from registers created by the
3657 unroller. */
3659 if (REGNO (x) >= (unsigned) regs->num)
3660 return 0;
3662 if (regs->array[REGNO (x)].set_in_loop < 0)
3663 return 2;
3665 return regs->array[REGNO (x)].set_in_loop == 0;
3667 case MEM:
3668 /* Volatile memory references must be rejected. Do this before
3669 checking for read-only items, so that volatile read-only items
3670 will be rejected also. */
3671 if (MEM_VOLATILE_P (x))
3672 return 0;
3674 /* See if there is any dependence between a store and this load. */
3675 mem_list_entry = loop_info->store_mems;
3676 while (mem_list_entry)
3678 if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
3679 x, rtx_varies_p))
3680 return 0;
3682 mem_list_entry = XEXP (mem_list_entry, 1);
3685 /* It's not invalidated by a store in memory
3686 but we must still verify the address is invariant. */
3687 break;
3689 case ASM_OPERANDS:
3690 /* Don't mess with insns declared volatile. */
3691 if (MEM_VOLATILE_P (x))
3692 return 0;
3693 break;
3695 default:
3696 break;
3699 fmt = GET_RTX_FORMAT (code);
3700 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3702 if (fmt[i] == 'e')
3704 int tem = loop_invariant_p (loop, XEXP (x, i));
3705 if (tem == 0)
3706 return 0;
3707 if (tem == 2)
3708 conditional = 1;
3710 else if (fmt[i] == 'E')
3712 int j;
3713 for (j = 0; j < XVECLEN (x, i); j++)
3715 int tem = loop_invariant_p (loop, XVECEXP (x, i, j));
3716 if (tem == 0)
3717 return 0;
3718 if (tem == 2)
3719 conditional = 1;
3725 return 1 + conditional;
3728 /* Return nonzero if all the insns in the loop that set REG
3729 are INSN and the immediately following insns,
3730 and if each of those insns sets REG in an invariant way
3731 (not counting uses of REG in them).
3733 The value is 2 if some of these insns are only conditionally invariant.
3735 We assume that INSN itself is the first set of REG
3736 and that its source is invariant. */
3738 static int
3739 consec_sets_invariant_p (const struct loop *loop, rtx reg, int n_sets,
3740 rtx insn)
3742 struct loop_regs *regs = LOOP_REGS (loop);
3743 rtx p = insn;
3744 unsigned int regno = REGNO (reg);
3745 rtx temp;
3746 /* Number of sets we have to insist on finding after INSN. */
3747 int count = n_sets - 1;
3748 int old = regs->array[regno].set_in_loop;
3749 int value = 0;
3750 int this;
3752 /* If N_SETS hit the limit, we can't rely on its value. */
3753 if (n_sets == 127)
3754 return 0;
3756 regs->array[regno].set_in_loop = 0;
3758 while (count > 0)
3760 enum rtx_code code;
3761 rtx set;
3763 p = NEXT_INSN (p);
3764 code = GET_CODE (p);
3766 /* If library call, skip to end of it. */
3767 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
3768 p = XEXP (temp, 0);
3770 this = 0;
3771 if (code == INSN
3772 && (set = single_set (p))
3773 && REG_P (SET_DEST (set))
3774 && REGNO (SET_DEST (set)) == regno)
3776 this = loop_invariant_p (loop, SET_SRC (set));
3777 if (this != 0)
3778 value |= this;
3779 else if ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX)))
3781 /* If this is a libcall, then any invariant REG_EQUAL note is OK.
3782 If this is an ordinary insn, then only CONSTANT_P REG_EQUAL
3783 notes are OK. */
3784 this = (CONSTANT_P (XEXP (temp, 0))
3785 || (find_reg_note (p, REG_RETVAL, NULL_RTX)
3786 && loop_invariant_p (loop, XEXP (temp, 0))));
3787 if (this != 0)
3788 value |= this;
3791 if (this != 0)
3792 count--;
3793 else if (code != NOTE)
3795 regs->array[regno].set_in_loop = old;
3796 return 0;
3800 regs->array[regno].set_in_loop = old;
3801 /* If loop_invariant_p ever returned 2, we return 2. */
3802 return 1 + (value & 2);
3805 /* Look at all uses (not sets) of registers in X. For each, if it is
3806 the single use, set USAGE[REGNO] to INSN; if there was a previous use in
3807 a different insn, set USAGE[REGNO] to const0_rtx. */
3809 static void
3810 find_single_use_in_loop (struct loop_regs *regs, rtx insn, rtx x)
3812 enum rtx_code code = GET_CODE (x);
3813 const char *fmt = GET_RTX_FORMAT (code);
3814 int i, j;
3816 if (code == REG)
3817 regs->array[REGNO (x)].single_usage
3818 = (regs->array[REGNO (x)].single_usage != 0
3819 && regs->array[REGNO (x)].single_usage != insn)
3820 ? const0_rtx : insn;
3822 else if (code == SET)
3824 /* Don't count SET_DEST if it is a REG; otherwise count things
3825 in SET_DEST because if a register is partially modified, it won't
3826 show up as a potential movable so we don't care how USAGE is set
3827 for it. */
3828 if (!REG_P (SET_DEST (x)))
3829 find_single_use_in_loop (regs, insn, SET_DEST (x));
3830 find_single_use_in_loop (regs, insn, SET_SRC (x));
3832 else
3833 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3835 if (fmt[i] == 'e' && XEXP (x, i) != 0)
3836 find_single_use_in_loop (regs, insn, XEXP (x, i));
3837 else if (fmt[i] == 'E')
3838 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3839 find_single_use_in_loop (regs, insn, XVECEXP (x, i, j));
3843 /* Count and record any set in X which is contained in INSN. Update
3844 REGS->array[I].MAY_NOT_OPTIMIZE and LAST_SET for any register I set
3845 in X. */
3847 static void
3848 count_one_set (struct loop_regs *regs, rtx insn, rtx x, rtx *last_set)
3850 if (GET_CODE (x) == CLOBBER && REG_P (XEXP (x, 0)))
3851 /* Don't move a reg that has an explicit clobber.
3852 It's not worth the pain to try to do it correctly. */
3853 regs->array[REGNO (XEXP (x, 0))].may_not_optimize = 1;
3855 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
3857 rtx dest = SET_DEST (x);
3858 while (GET_CODE (dest) == SUBREG
3859 || GET_CODE (dest) == ZERO_EXTRACT
3860 || GET_CODE (dest) == STRICT_LOW_PART)
3861 dest = XEXP (dest, 0);
3862 if (REG_P (dest))
3864 int i;
3865 int regno = REGNO (dest);
3866 for (i = 0; i < LOOP_REGNO_NREGS (regno, dest); i++)
3868 /* If this is the first setting of this reg
3869 in current basic block, and it was set before,
3870 it must be set in two basic blocks, so it cannot
3871 be moved out of the loop. */
3872 if (regs->array[regno].set_in_loop > 0
3873 && last_set[regno] == 0)
3874 regs->array[regno+i].may_not_optimize = 1;
3875 /* If this is not first setting in current basic block,
3876 see if reg was used in between previous one and this.
3877 If so, neither one can be moved. */
3878 if (last_set[regno] != 0
3879 && reg_used_between_p (dest, last_set[regno], insn))
3880 regs->array[regno+i].may_not_optimize = 1;
3881 if (regs->array[regno+i].set_in_loop < 127)
3882 ++regs->array[regno+i].set_in_loop;
3883 last_set[regno+i] = insn;
3889 /* Given a loop that is bounded by LOOP->START and LOOP->END and that
3890 is entered at LOOP->SCAN_START, return 1 if the register set in SET
3891 contained in insn INSN is used by any insn that precedes INSN in
3892 cyclic order starting from the loop entry point.
3894 We don't want to use INSN_LUID here because if we restrict INSN to those
3895 that have a valid INSN_LUID, it means we cannot move an invariant out
3896 from an inner loop past two loops. */
3898 static int
3899 loop_reg_used_before_p (const struct loop *loop, rtx set, rtx insn)
3901 rtx reg = SET_DEST (set);
3902 rtx p;
3904 /* Scan forward checking for register usage. If we hit INSN, we
3905 are done. Otherwise, if we hit LOOP->END, wrap around to LOOP->START. */
3906 for (p = loop->scan_start; p != insn; p = NEXT_INSN (p))
3908 if (INSN_P (p) && reg_overlap_mentioned_p (reg, PATTERN (p)))
3909 return 1;
3911 if (p == loop->end)
3912 p = loop->start;
3915 return 0;
3919 /* Information we collect about arrays that we might want to prefetch. */
3920 struct prefetch_info
3922 struct iv_class *class; /* Class this prefetch is based on. */
3923 struct induction *giv; /* GIV this prefetch is based on. */
3924 rtx base_address; /* Start prefetching from this address plus
3925 index. */
3926 HOST_WIDE_INT index;
3927 HOST_WIDE_INT stride; /* Prefetch stride in bytes in each
3928 iteration. */
3929 unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
3930 prefetch area in one iteration. */
3931 unsigned int total_bytes; /* Total bytes loop will access in this block.
3932 This is set only for loops with known
3933 iteration counts and is 0xffffffff
3934 otherwise. */
3935 int prefetch_in_loop; /* Number of prefetch insns in loop. */
3936 int prefetch_before_loop; /* Number of prefetch insns before loop. */
3937 unsigned int write : 1; /* 1 for read/write prefetches. */
3940 /* Data used by check_store function. */
3941 struct check_store_data
3943 rtx mem_address;
3944 int mem_write;
3947 static void check_store (rtx, rtx, void *);
3948 static void emit_prefetch_instructions (struct loop *);
3949 static int rtx_equal_for_prefetch_p (rtx, rtx);
3951 /* Set mem_write when mem_address is found. Used as callback to
3952 note_stores. */
3953 static void
3954 check_store (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
3956 struct check_store_data *d = (struct check_store_data *) data;
3958 if ((MEM_P (x)) && rtx_equal_p (d->mem_address, XEXP (x, 0)))
3959 d->mem_write = 1;
3962 /* Like rtx_equal_p, but attempts to swap commutative operands. This is
3963 important to get some addresses combined. Later more sophisticated
3964 transformations can be added when necessary.
3966 ??? Same trick with swapping operand is done at several other places.
3967 It can be nice to develop some common way to handle this. */
3969 static int
3970 rtx_equal_for_prefetch_p (rtx x, rtx y)
3972 int i;
3973 int j;
3974 enum rtx_code code = GET_CODE (x);
3975 const char *fmt;
3977 if (x == y)
3978 return 1;
3979 if (code != GET_CODE (y))
3980 return 0;
3982 if (COMMUTATIVE_ARITH_P (x))
3984 return ((rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 0))
3985 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 1)))
3986 || (rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 1))
3987 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 0))));
3990 /* Compare the elements. If any pair of corresponding elements fails to
3991 match, return 0 for the whole thing. */
3993 fmt = GET_RTX_FORMAT (code);
3994 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3996 switch (fmt[i])
3998 case 'w':
3999 if (XWINT (x, i) != XWINT (y, i))
4000 return 0;
4001 break;
4003 case 'i':
4004 if (XINT (x, i) != XINT (y, i))
4005 return 0;
4006 break;
4008 case 'E':
4009 /* Two vectors must have the same length. */
4010 if (XVECLEN (x, i) != XVECLEN (y, i))
4011 return 0;
4013 /* And the corresponding elements must match. */
4014 for (j = 0; j < XVECLEN (x, i); j++)
4015 if (rtx_equal_for_prefetch_p (XVECEXP (x, i, j),
4016 XVECEXP (y, i, j)) == 0)
4017 return 0;
4018 break;
4020 case 'e':
4021 if (rtx_equal_for_prefetch_p (XEXP (x, i), XEXP (y, i)) == 0)
4022 return 0;
4023 break;
4025 case 's':
4026 if (strcmp (XSTR (x, i), XSTR (y, i)))
4027 return 0;
4028 break;
4030 case 'u':
4031 /* These are just backpointers, so they don't matter. */
4032 break;
4034 case '0':
4035 break;
4037 /* It is believed that rtx's at this level will never
4038 contain anything but integers and other rtx's,
4039 except for within LABEL_REFs and SYMBOL_REFs. */
4040 default:
4041 abort ();
4044 return 1;
4047 /* Remove constant addition value from the expression X (when present)
4048 and return it. */
4050 static HOST_WIDE_INT
4051 remove_constant_addition (rtx *x)
4053 HOST_WIDE_INT addval = 0;
4054 rtx exp = *x;
4056 /* Avoid clobbering a shared CONST expression. */
4057 if (GET_CODE (exp) == CONST)
4059 if (GET_CODE (XEXP (exp, 0)) == PLUS
4060 && GET_CODE (XEXP (XEXP (exp, 0), 0)) == SYMBOL_REF
4061 && GET_CODE (XEXP (XEXP (exp, 0), 1)) == CONST_INT)
4063 *x = XEXP (XEXP (exp, 0), 0);
4064 return INTVAL (XEXP (XEXP (exp, 0), 1));
4066 return 0;
4069 if (GET_CODE (exp) == CONST_INT)
4071 addval = INTVAL (exp);
4072 *x = const0_rtx;
4075 /* For plus expression recurse on ourself. */
4076 else if (GET_CODE (exp) == PLUS)
4078 addval += remove_constant_addition (&XEXP (exp, 0));
4079 addval += remove_constant_addition (&XEXP (exp, 1));
4081 /* In case our parameter was constant, remove extra zero from the
4082 expression. */
4083 if (XEXP (exp, 0) == const0_rtx)
4084 *x = XEXP (exp, 1);
4085 else if (XEXP (exp, 1) == const0_rtx)
4086 *x = XEXP (exp, 0);
4089 return addval;
4092 /* Attempt to identify accesses to arrays that are most likely to cause cache
4093 misses, and emit prefetch instructions a few prefetch blocks forward.
4095 To detect the arrays we use the GIV information that was collected by the
4096 strength reduction pass.
4098 The prefetch instructions are generated after the GIV information is done
4099 and before the strength reduction process. The new GIVs are injected into
4100 the strength reduction tables, so the prefetch addresses are optimized as
4101 well.
4103 GIVs are split into base address, stride, and constant addition values.
4104 GIVs with the same address, stride and close addition values are combined
4105 into a single prefetch. Also writes to GIVs are detected, so that prefetch
4106 for write instructions can be used for the block we write to, on machines
4107 that support write prefetches.
4109 Several heuristics are used to determine when to prefetch. They are
4110 controlled by defined symbols that can be overridden for each target. */
4112 static void
4113 emit_prefetch_instructions (struct loop *loop)
4115 int num_prefetches = 0;
4116 int num_real_prefetches = 0;
4117 int num_real_write_prefetches = 0;
4118 int num_prefetches_before = 0;
4119 int num_write_prefetches_before = 0;
4120 int ahead = 0;
4121 int i;
4122 struct iv_class *bl;
4123 struct induction *iv;
4124 struct prefetch_info info[MAX_PREFETCHES];
4125 struct loop_ivs *ivs = LOOP_IVS (loop);
4127 if (!HAVE_prefetch || PREFETCH_BLOCK == 0)
4128 return;
4130 /* Consider only loops w/o calls. When a call is done, the loop is probably
4131 slow enough to read the memory. */
4132 if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call)
4134 if (loop_dump_stream)
4135 fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n");
4137 return;
4140 /* Don't prefetch in loops known to have few iterations. */
4141 if (PREFETCH_NO_LOW_LOOPCNT
4142 && LOOP_INFO (loop)->n_iterations
4143 && LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT)
4145 if (loop_dump_stream)
4146 fprintf (loop_dump_stream,
4147 "Prefetch: ignoring loop: not enough iterations.\n");
4148 return;
4151 /* Search all induction variables and pick those interesting for the prefetch
4152 machinery. */
4153 for (bl = ivs->list; bl; bl = bl->next)
4155 struct induction *biv = bl->biv, *biv1;
4156 int basestride = 0;
4158 biv1 = biv;
4160 /* Expect all BIVs to be executed in each iteration. This makes our
4161 analysis more conservative. */
4162 while (biv1)
4164 /* Discard non-constant additions that we can't handle well yet, and
4165 BIVs that are executed multiple times; such BIVs ought to be
4166 handled in the nested loop. We accept not_every_iteration BIVs,
4167 since these only result in larger strides and make our
4168 heuristics more conservative. */
4169 if (GET_CODE (biv->add_val) != CONST_INT)
4171 if (loop_dump_stream)
4173 fprintf (loop_dump_stream,
4174 "Prefetch: ignoring biv %d: non-constant addition at insn %d:",
4175 REGNO (biv->src_reg), INSN_UID (biv->insn));
4176 print_rtl (loop_dump_stream, biv->add_val);
4177 fprintf (loop_dump_stream, "\n");
4179 break;
4182 if (biv->maybe_multiple)
4184 if (loop_dump_stream)
4186 fprintf (loop_dump_stream,
4187 "Prefetch: ignoring biv %d: maybe_multiple at insn %i:",
4188 REGNO (biv->src_reg), INSN_UID (biv->insn));
4189 print_rtl (loop_dump_stream, biv->add_val);
4190 fprintf (loop_dump_stream, "\n");
4192 break;
4195 basestride += INTVAL (biv1->add_val);
4196 biv1 = biv1->next_iv;
4199 if (biv1 || !basestride)
4200 continue;
4202 for (iv = bl->giv; iv; iv = iv->next_iv)
4204 rtx address;
4205 rtx temp;
4206 HOST_WIDE_INT index = 0;
4207 int add = 1;
4208 HOST_WIDE_INT stride = 0;
4209 int stride_sign = 1;
4210 struct check_store_data d;
4211 const char *ignore_reason = NULL;
4212 int size = GET_MODE_SIZE (GET_MODE (iv));
4214 /* See whether an induction variable is interesting to us and if
4215 not, report the reason. */
4216 if (iv->giv_type != DEST_ADDR)
4217 ignore_reason = "giv is not a destination address";
4219 /* We are interested only in constant stride memory references
4220 in order to be able to compute density easily. */
4221 else if (GET_CODE (iv->mult_val) != CONST_INT)
4222 ignore_reason = "stride is not constant";
4224 else
4226 stride = INTVAL (iv->mult_val) * basestride;
4227 if (stride < 0)
4229 stride = -stride;
4230 stride_sign = -1;
4233 /* On some targets, reversed order prefetches are not
4234 worthwhile. */
4235 if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0)
4236 ignore_reason = "reversed order stride";
4238 /* Prefetch of accesses with an extreme stride might not be
4239 worthwhile, either. */
4240 else if (PREFETCH_NO_EXTREME_STRIDE
4241 && stride > PREFETCH_EXTREME_STRIDE)
4242 ignore_reason = "extreme stride";
4244 /* Ignore GIVs with varying add values; we can't predict the
4245 value for the next iteration. */
4246 else if (!loop_invariant_p (loop, iv->add_val))
4247 ignore_reason = "giv has varying add value";
4249 /* Ignore GIVs in the nested loops; they ought to have been
4250 handled already. */
4251 else if (iv->maybe_multiple)
4252 ignore_reason = "giv is in nested loop";
4255 if (ignore_reason != NULL)
4257 if (loop_dump_stream)
4258 fprintf (loop_dump_stream,
4259 "Prefetch: ignoring giv at %d: %s.\n",
4260 INSN_UID (iv->insn), ignore_reason);
4261 continue;
4264 /* Determine the pointer to the basic array we are examining. It is
4265 the sum of the BIV's initial value and the GIV's add_val. */
4266 address = copy_rtx (iv->add_val);
4267 temp = copy_rtx (bl->initial_value);
4269 address = simplify_gen_binary (PLUS, Pmode, temp, address);
4270 index = remove_constant_addition (&address);
4272 d.mem_write = 0;
4273 d.mem_address = *iv->location;
4275 /* When the GIV is not always executed, we might be better off by
4276 not dirtying the cache pages. */
4277 if (PREFETCH_CONDITIONAL || iv->always_executed)
4278 note_stores (PATTERN (iv->insn), check_store, &d);
4279 else
4281 if (loop_dump_stream)
4282 fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %d: %s\n",
4283 INSN_UID (iv->insn), "in conditional code.");
4284 continue;
4287 /* Attempt to find another prefetch to the same array and see if we
4288 can merge this one. */
4289 for (i = 0; i < num_prefetches; i++)
4290 if (rtx_equal_for_prefetch_p (address, info[i].base_address)
4291 && stride == info[i].stride)
4293 /* In case both access same array (same location
4294 just with small difference in constant indexes), merge
4295 the prefetches. Just do the later and the earlier will
4296 get prefetched from previous iteration.
4297 The artificial threshold should not be too small,
4298 but also not bigger than small portion of memory usually
4299 traversed by single loop. */
4300 if (index >= info[i].index
4301 && index - info[i].index < PREFETCH_EXTREME_DIFFERENCE)
4303 info[i].write |= d.mem_write;
4304 info[i].bytes_accessed += size;
4305 info[i].index = index;
4306 info[i].giv = iv;
4307 info[i].class = bl;
4308 info[num_prefetches].base_address = address;
4309 add = 0;
4310 break;
4313 if (index < info[i].index
4314 && info[i].index - index < PREFETCH_EXTREME_DIFFERENCE)
4316 info[i].write |= d.mem_write;
4317 info[i].bytes_accessed += size;
4318 add = 0;
4319 break;
4323 /* Merging failed. */
4324 if (add)
4326 info[num_prefetches].giv = iv;
4327 info[num_prefetches].class = bl;
4328 info[num_prefetches].index = index;
4329 info[num_prefetches].stride = stride;
4330 info[num_prefetches].base_address = address;
4331 info[num_prefetches].write = d.mem_write;
4332 info[num_prefetches].bytes_accessed = size;
4333 num_prefetches++;
4334 if (num_prefetches >= MAX_PREFETCHES)
4336 if (loop_dump_stream)
4337 fprintf (loop_dump_stream,
4338 "Maximal number of prefetches exceeded.\n");
4339 return;
4345 for (i = 0; i < num_prefetches; i++)
4347 int density;
4349 /* Attempt to calculate the total number of bytes fetched by all
4350 iterations of the loop. Avoid overflow. */
4351 if (LOOP_INFO (loop)->n_iterations
4352 && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
4353 >= LOOP_INFO (loop)->n_iterations))
4354 info[i].total_bytes = info[i].stride * LOOP_INFO (loop)->n_iterations;
4355 else
4356 info[i].total_bytes = 0xffffffff;
4358 density = info[i].bytes_accessed * 100 / info[i].stride;
4360 /* Prefetch might be worthwhile only when the loads/stores are dense. */
4361 if (PREFETCH_ONLY_DENSE_MEM)
4362 if (density * 256 > PREFETCH_DENSE_MEM * 100
4363 && (info[i].total_bytes / PREFETCH_BLOCK
4364 >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
4366 info[i].prefetch_before_loop = 1;
4367 info[i].prefetch_in_loop
4368 = (info[i].total_bytes / PREFETCH_BLOCK
4369 > PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
4371 else
4373 info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0;
4374 if (loop_dump_stream)
4375 fprintf (loop_dump_stream,
4376 "Prefetch: ignoring giv at %d: %d%% density is too low.\n",
4377 INSN_UID (info[i].giv->insn), density);
4379 else
4380 info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1;
4382 /* Find how many prefetch instructions we'll use within the loop. */
4383 if (info[i].prefetch_in_loop != 0)
4385 info[i].prefetch_in_loop = ((info[i].stride + PREFETCH_BLOCK - 1)
4386 / PREFETCH_BLOCK);
4387 num_real_prefetches += info[i].prefetch_in_loop;
4388 if (info[i].write)
4389 num_real_write_prefetches += info[i].prefetch_in_loop;
4393 /* Determine how many iterations ahead to prefetch within the loop, based
4394 on how many prefetches we currently expect to do within the loop. */
4395 if (num_real_prefetches != 0)
4397 if ((ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches) == 0)
4399 if (loop_dump_stream)
4400 fprintf (loop_dump_stream,
4401 "Prefetch: ignoring prefetches within loop: ahead is zero; %d < %d\n",
4402 SIMULTANEOUS_PREFETCHES, num_real_prefetches);
4403 num_real_prefetches = 0, num_real_write_prefetches = 0;
4406 /* We'll also use AHEAD to determine how many prefetch instructions to
4407 emit before a loop, so don't leave it zero. */
4408 if (ahead == 0)
4409 ahead = PREFETCH_BLOCKS_BEFORE_LOOP_MAX;
4411 for (i = 0; i < num_prefetches; i++)
4413 /* Update if we've decided not to prefetch anything within the loop. */
4414 if (num_real_prefetches == 0)
4415 info[i].prefetch_in_loop = 0;
4417 /* Find how many prefetch instructions we'll use before the loop. */
4418 if (info[i].prefetch_before_loop != 0)
4420 int n = info[i].total_bytes / PREFETCH_BLOCK;
4421 if (n > ahead)
4422 n = ahead;
4423 info[i].prefetch_before_loop = n;
4424 num_prefetches_before += n;
4425 if (info[i].write)
4426 num_write_prefetches_before += n;
4429 if (loop_dump_stream)
4431 if (info[i].prefetch_in_loop == 0
4432 && info[i].prefetch_before_loop == 0)
4433 continue;
4434 fprintf (loop_dump_stream, "Prefetch insn: %d",
4435 INSN_UID (info[i].giv->insn));
4436 fprintf (loop_dump_stream,
4437 "; in loop: %d; before: %d; %s\n",
4438 info[i].prefetch_in_loop,
4439 info[i].prefetch_before_loop,
4440 info[i].write ? "read/write" : "read only");
4441 fprintf (loop_dump_stream,
4442 " density: %d%%; bytes_accessed: %u; total_bytes: %u\n",
4443 (int) (info[i].bytes_accessed * 100 / info[i].stride),
4444 info[i].bytes_accessed, info[i].total_bytes);
4445 fprintf (loop_dump_stream, " index: " HOST_WIDE_INT_PRINT_DEC
4446 "; stride: " HOST_WIDE_INT_PRINT_DEC "; address: ",
4447 info[i].index, info[i].stride);
4448 print_rtl (loop_dump_stream, info[i].base_address);
4449 fprintf (loop_dump_stream, "\n");
4453 if (num_real_prefetches + num_prefetches_before > 0)
4455 /* Record that this loop uses prefetch instructions. */
4456 LOOP_INFO (loop)->has_prefetch = 1;
4458 if (loop_dump_stream)
4460 fprintf (loop_dump_stream, "Real prefetches needed within loop: %d (write: %d)\n",
4461 num_real_prefetches, num_real_write_prefetches);
4462 fprintf (loop_dump_stream, "Real prefetches needed before loop: %d (write: %d)\n",
4463 num_prefetches_before, num_write_prefetches_before);
4467 for (i = 0; i < num_prefetches; i++)
4469 int y;
4471 for (y = 0; y < info[i].prefetch_in_loop; y++)
4473 rtx loc = copy_rtx (*info[i].giv->location);
4474 rtx insn;
4475 int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
4476 rtx before_insn = info[i].giv->insn;
4477 rtx prev_insn = PREV_INSN (info[i].giv->insn);
4478 rtx seq;
4480 /* We can save some effort by offsetting the address on
4481 architectures with offsettable memory references. */
4482 if (offsettable_address_p (0, VOIDmode, loc))
4483 loc = plus_constant (loc, bytes_ahead);
4484 else
4486 rtx reg = gen_reg_rtx (Pmode);
4487 loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
4488 GEN_INT (bytes_ahead), reg,
4489 0, before_insn);
4490 loc = reg;
4493 start_sequence ();
4494 /* Make sure the address operand is valid for prefetch. */
4495 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
4496 (loc, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
4497 loc = force_reg (Pmode, loc);
4498 emit_insn (gen_prefetch (loc, GEN_INT (info[i].write),
4499 GEN_INT (3)));
4500 seq = get_insns ();
4501 end_sequence ();
4502 emit_insn_before (seq, before_insn);
4504 /* Check all insns emitted and record the new GIV
4505 information. */
4506 insn = NEXT_INSN (prev_insn);
4507 while (insn != before_insn)
4509 insn = check_insn_for_givs (loop, insn,
4510 info[i].giv->always_executed,
4511 info[i].giv->maybe_multiple);
4512 insn = NEXT_INSN (insn);
4516 if (PREFETCH_BEFORE_LOOP)
4518 /* Emit insns before the loop to fetch the first cache lines or,
4519 if we're not prefetching within the loop, everything we expect
4520 to need. */
4521 for (y = 0; y < info[i].prefetch_before_loop; y++)
4523 rtx reg = gen_reg_rtx (Pmode);
4524 rtx loop_start = loop->start;
4525 rtx init_val = info[i].class->initial_value;
4526 rtx add_val = simplify_gen_binary (PLUS, Pmode,
4527 info[i].giv->add_val,
4528 GEN_INT (y * PREFETCH_BLOCK));
4530 /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a
4531 non-constant INIT_VAL to have the same mode as REG, which
4532 in this case we know to be Pmode. */
4533 if (GET_MODE (init_val) != Pmode && !CONSTANT_P (init_val))
4535 rtx seq;
4537 start_sequence ();
4538 init_val = convert_to_mode (Pmode, init_val, 0);
4539 seq = get_insns ();
4540 end_sequence ();
4541 loop_insn_emit_before (loop, 0, loop_start, seq);
4543 loop_iv_add_mult_emit_before (loop, init_val,
4544 info[i].giv->mult_val,
4545 add_val, reg, 0, loop_start);
4546 emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write),
4547 GEN_INT (3)),
4548 loop_start);
4553 return;
4556 /* Communication with routines called via `note_stores'. */
4558 static rtx note_insn;
4560 /* Dummy register to have nonzero DEST_REG for DEST_ADDR type givs. */
4562 static rtx addr_placeholder;
4564 /* ??? Unfinished optimizations, and possible future optimizations,
4565 for the strength reduction code. */
4567 /* ??? The interaction of biv elimination, and recognition of 'constant'
4568 bivs, may cause problems. */
4570 /* ??? Add heuristics so that DEST_ADDR strength reduction does not cause
4571 performance problems.
4573 Perhaps don't eliminate things that can be combined with an addressing
4574 mode. Find all givs that have the same biv, mult_val, and add_val;
4575 then for each giv, check to see if its only use dies in a following
4576 memory address. If so, generate a new memory address and check to see
4577 if it is valid. If it is valid, then store the modified memory address,
4578 otherwise, mark the giv as not done so that it will get its own iv. */
4580 /* ??? Could try to optimize branches when it is known that a biv is always
4581 positive. */
4583 /* ??? When replace a biv in a compare insn, we should replace with closest
4584 giv so that an optimized branch can still be recognized by the combiner,
4585 e.g. the VAX acb insn. */
4587 /* ??? Many of the checks involving uid_luid could be simplified if regscan
4588 was rerun in loop_optimize whenever a register was added or moved.
4589 Also, some of the optimizations could be a little less conservative. */
4591 /* Searches the insns between INSN and LOOP->END. Returns 1 if there
4592 is a backward branch in that range that branches to somewhere between
4593 LOOP->START and INSN. Returns 0 otherwise. */
4595 /* ??? This is quadratic algorithm. Could be rewritten to be linear.
4596 In practice, this is not a problem, because this function is seldom called,
4597 and uses a negligible amount of CPU time on average. */
4599 static int
4600 back_branch_in_range_p (const struct loop *loop, rtx insn)
4602 rtx p, q, target_insn;
4603 rtx loop_start = loop->start;
4604 rtx loop_end = loop->end;
4605 rtx orig_loop_end = loop->end;
4607 /* Stop before we get to the backward branch at the end of the loop. */
4608 loop_end = prev_nonnote_insn (loop_end);
4609 if (BARRIER_P (loop_end))
4610 loop_end = PREV_INSN (loop_end);
4612 /* Check in case insn has been deleted, search forward for first non
4613 deleted insn following it. */
4614 while (INSN_DELETED_P (insn))
4615 insn = NEXT_INSN (insn);
4617 /* Check for the case where insn is the last insn in the loop. Deal
4618 with the case where INSN was a deleted loop test insn, in which case
4619 it will now be the NOTE_LOOP_END. */
4620 if (insn == loop_end || insn == orig_loop_end)
4621 return 0;
4623 for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
4625 if (JUMP_P (p))
4627 target_insn = JUMP_LABEL (p);
4629 /* Search from loop_start to insn, to see if one of them is
4630 the target_insn. We can't use INSN_LUID comparisons here,
4631 since insn may not have an LUID entry. */
4632 for (q = loop_start; q != insn; q = NEXT_INSN (q))
4633 if (q == target_insn)
4634 return 1;
4638 return 0;
4641 /* Scan the loop body and call FNCALL for each insn. In the addition to the
4642 LOOP and INSN parameters pass MAYBE_MULTIPLE and NOT_EVERY_ITERATION to the
4643 callback.
4645 NOT_EVERY_ITERATION is 1 if current insn is not known to be executed at
4646 least once for every loop iteration except for the last one.
4648 MAYBE_MULTIPLE is 1 if current insn may be executed more than once for every
4649 loop iteration.
4651 typedef rtx (*loop_insn_callback) (struct loop *, rtx, int, int);
4652 static void
4653 for_each_insn_in_loop (struct loop *loop, loop_insn_callback fncall)
4655 int not_every_iteration = 0;
4656 int maybe_multiple = 0;
4657 int past_loop_latch = 0;
4658 bool exit_test_is_entry = false;
4659 rtx p;
4661 /* If loop_scan_start points to the loop exit test, the loop body
4662 cannot be counted on running on every iteration, and we have to
4663 be wary of subversive use of gotos inside expression
4664 statements. */
4665 if (prev_nonnote_insn (loop->scan_start) != prev_nonnote_insn (loop->start))
4667 exit_test_is_entry = true;
4668 maybe_multiple = back_branch_in_range_p (loop, loop->scan_start);
4671 /* Scan through loop and update NOT_EVERY_ITERATION and MAYBE_MULTIPLE. */
4672 for (p = next_insn_in_loop (loop, loop->scan_start);
4673 p != NULL_RTX;
4674 p = next_insn_in_loop (loop, p))
4676 p = fncall (loop, p, not_every_iteration, maybe_multiple);
4678 /* Past CODE_LABEL, we get to insns that may be executed multiple
4679 times. The only way we can be sure that they can't is if every
4680 jump insn between here and the end of the loop either
4681 returns, exits the loop, is a jump to a location that is still
4682 behind the label, or is a jump to the loop start. */
4684 if (LABEL_P (p))
4686 rtx insn = p;
4688 maybe_multiple = 0;
4690 while (1)
4692 insn = NEXT_INSN (insn);
4693 if (insn == loop->scan_start)
4694 break;
4695 if (insn == loop->end)
4697 if (loop->top != 0)
4698 insn = loop->top;
4699 else
4700 break;
4701 if (insn == loop->scan_start)
4702 break;
4705 if (JUMP_P (insn)
4706 && GET_CODE (PATTERN (insn)) != RETURN
4707 && (!any_condjump_p (insn)
4708 || (JUMP_LABEL (insn) != 0
4709 && JUMP_LABEL (insn) != loop->scan_start
4710 && !loop_insn_first_p (p, JUMP_LABEL (insn)))))
4712 maybe_multiple = 1;
4713 break;
4718 /* Past a jump, we get to insns for which we can't count
4719 on whether they will be executed during each iteration. */
4720 /* This code appears twice in strength_reduce. There is also similar
4721 code in scan_loop. */
4722 if (JUMP_P (p)
4723 /* If we enter the loop in the middle, and scan around to the
4724 beginning, don't set not_every_iteration for that.
4725 This can be any kind of jump, since we want to know if insns
4726 will be executed if the loop is executed. */
4727 && (exit_test_is_entry
4728 || !(JUMP_LABEL (p) == loop->top
4729 && ((NEXT_INSN (NEXT_INSN (p)) == loop->end
4730 && any_uncondjump_p (p))
4731 || (NEXT_INSN (p) == loop->end
4732 && any_condjump_p (p))))))
4734 rtx label = 0;
4736 /* If this is a jump outside the loop, then it also doesn't
4737 matter. Check to see if the target of this branch is on the
4738 loop->exits_labels list. */
4740 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
4741 if (XEXP (label, 0) == JUMP_LABEL (p))
4742 break;
4744 if (!label)
4745 not_every_iteration = 1;
4748 /* Note if we pass a loop latch. If we do, then we can not clear
4749 NOT_EVERY_ITERATION below when we pass the last CODE_LABEL in
4750 a loop since a jump before the last CODE_LABEL may have started
4751 a new loop iteration.
4753 Note that LOOP_TOP is only set for rotated loops and we need
4754 this check for all loops, so compare against the CODE_LABEL
4755 which immediately follows LOOP_START. */
4756 if (JUMP_P (p)
4757 && JUMP_LABEL (p) == NEXT_INSN (loop->start))
4758 past_loop_latch = 1;
4760 /* Unlike in the code motion pass where MAYBE_NEVER indicates that
4761 an insn may never be executed, NOT_EVERY_ITERATION indicates whether
4762 or not an insn is known to be executed each iteration of the
4763 loop, whether or not any iterations are known to occur.
4765 Therefore, if we have just passed a label and have no more labels
4766 between here and the test insn of the loop, and we have not passed
4767 a jump to the top of the loop, then we know these insns will be
4768 executed each iteration. */
4770 if (not_every_iteration
4771 && !past_loop_latch
4772 && LABEL_P (p)
4773 && no_labels_between_p (p, loop->end))
4774 not_every_iteration = 0;
4778 static void
4779 loop_bivs_find (struct loop *loop)
4781 struct loop_regs *regs = LOOP_REGS (loop);
4782 struct loop_ivs *ivs = LOOP_IVS (loop);
4783 /* Temporary list pointers for traversing ivs->list. */
4784 struct iv_class *bl, **backbl;
4786 ivs->list = 0;
4788 for_each_insn_in_loop (loop, check_insn_for_bivs);
4790 /* Scan ivs->list to remove all regs that proved not to be bivs.
4791 Make a sanity check against regs->n_times_set. */
4792 for (backbl = &ivs->list, bl = *backbl; bl; bl = bl->next)
4794 if (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4795 /* Above happens if register modified by subreg, etc. */
4796 /* Make sure it is not recognized as a basic induction var: */
4797 || regs->array[bl->regno].n_times_set != bl->biv_count
4798 /* If never incremented, it is invariant that we decided not to
4799 move. So leave it alone. */
4800 || ! bl->incremented)
4802 if (loop_dump_stream)
4803 fprintf (loop_dump_stream, "Biv %d: discarded, %s\n",
4804 bl->regno,
4805 (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4806 ? "not induction variable"
4807 : (! bl->incremented ? "never incremented"
4808 : "count error")));
4810 REG_IV_TYPE (ivs, bl->regno) = NOT_BASIC_INDUCT;
4811 *backbl = bl->next;
4813 else
4815 backbl = &bl->next;
4817 if (loop_dump_stream)
4818 fprintf (loop_dump_stream, "Biv %d: verified\n", bl->regno);
4824 /* Determine how BIVS are initialized by looking through pre-header
4825 extended basic block. */
4826 static void
4827 loop_bivs_init_find (struct loop *loop)
4829 struct loop_ivs *ivs = LOOP_IVS (loop);
4830 /* Temporary list pointers for traversing ivs->list. */
4831 struct iv_class *bl;
4832 int call_seen;
4833 rtx p;
4835 /* Find initial value for each biv by searching backwards from loop_start,
4836 halting at first label. Also record any test condition. */
4838 call_seen = 0;
4839 for (p = loop->start; p && !LABEL_P (p); p = PREV_INSN (p))
4841 rtx test;
4843 note_insn = p;
4845 if (CALL_P (p))
4846 call_seen = 1;
4848 if (INSN_P (p))
4849 note_stores (PATTERN (p), record_initial, ivs);
4851 /* Record any test of a biv that branches around the loop if no store
4852 between it and the start of loop. We only care about tests with
4853 constants and registers and only certain of those. */
4854 if (JUMP_P (p)
4855 && JUMP_LABEL (p) != 0
4856 && next_real_insn (JUMP_LABEL (p)) == next_real_insn (loop->end)
4857 && (test = get_condition_for_loop (loop, p)) != 0
4858 && REG_P (XEXP (test, 0))
4859 && REGNO (XEXP (test, 0)) < max_reg_before_loop
4860 && (bl = REG_IV_CLASS (ivs, REGNO (XEXP (test, 0)))) != 0
4861 && valid_initial_value_p (XEXP (test, 1), p, call_seen, loop->start)
4862 && bl->init_insn == 0)
4864 /* If an NE test, we have an initial value! */
4865 if (GET_CODE (test) == NE)
4867 bl->init_insn = p;
4868 bl->init_set = gen_rtx_SET (VOIDmode,
4869 XEXP (test, 0), XEXP (test, 1));
4871 else
4872 bl->initial_test = test;
4878 /* Look at the each biv and see if we can say anything better about its
4879 initial value from any initializing insns set up above. (This is done
4880 in two passes to avoid missing SETs in a PARALLEL.) */
4881 static void
4882 loop_bivs_check (struct loop *loop)
4884 struct loop_ivs *ivs = LOOP_IVS (loop);
4885 /* Temporary list pointers for traversing ivs->list. */
4886 struct iv_class *bl;
4887 struct iv_class **backbl;
4889 for (backbl = &ivs->list; (bl = *backbl); backbl = &bl->next)
4891 rtx src;
4892 rtx note;
4894 if (! bl->init_insn)
4895 continue;
4897 /* IF INIT_INSN has a REG_EQUAL or REG_EQUIV note and the value
4898 is a constant, use the value of that. */
4899 if (((note = find_reg_note (bl->init_insn, REG_EQUAL, 0)) != NULL
4900 && CONSTANT_P (XEXP (note, 0)))
4901 || ((note = find_reg_note (bl->init_insn, REG_EQUIV, 0)) != NULL
4902 && CONSTANT_P (XEXP (note, 0))))
4903 src = XEXP (note, 0);
4904 else
4905 src = SET_SRC (bl->init_set);
4907 if (loop_dump_stream)
4908 fprintf (loop_dump_stream,
4909 "Biv %d: initialized at insn %d: initial value ",
4910 bl->regno, INSN_UID (bl->init_insn));
4912 if ((GET_MODE (src) == GET_MODE (regno_reg_rtx[bl->regno])
4913 || GET_MODE (src) == VOIDmode)
4914 && valid_initial_value_p (src, bl->init_insn,
4915 LOOP_INFO (loop)->pre_header_has_call,
4916 loop->start))
4918 bl->initial_value = src;
4920 if (loop_dump_stream)
4922 print_simple_rtl (loop_dump_stream, src);
4923 fputc ('\n', loop_dump_stream);
4926 /* If we can't make it a giv,
4927 let biv keep initial value of "itself". */
4928 else if (loop_dump_stream)
4929 fprintf (loop_dump_stream, "is complex\n");
4934 /* Search the loop for general induction variables. */
4936 static void
4937 loop_givs_find (struct loop* loop)
4939 for_each_insn_in_loop (loop, check_insn_for_givs);
4943 /* For each giv for which we still don't know whether or not it is
4944 replaceable, check to see if it is replaceable because its final value
4945 can be calculated. */
4947 static void
4948 loop_givs_check (struct loop *loop)
4950 struct loop_ivs *ivs = LOOP_IVS (loop);
4951 struct iv_class *bl;
4953 for (bl = ivs->list; bl; bl = bl->next)
4955 struct induction *v;
4957 for (v = bl->giv; v; v = v->next_iv)
4958 if (! v->replaceable && ! v->not_replaceable)
4959 check_final_value (loop, v);
4963 /* Try to generate the simplest rtx for the expression
4964 (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
4965 value of giv's. */
4967 static rtx
4968 fold_rtx_mult_add (rtx mult1, rtx mult2, rtx add1, enum machine_mode mode)
4970 rtx temp, mult_res;
4971 rtx result;
4973 /* The modes must all be the same. This should always be true. For now,
4974 check to make sure. */
4975 if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
4976 || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
4977 || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
4978 abort ();
4980 /* Ensure that if at least one of mult1/mult2 are constant, then mult2
4981 will be a constant. */
4982 if (GET_CODE (mult1) == CONST_INT)
4984 temp = mult2;
4985 mult2 = mult1;
4986 mult1 = temp;
4989 mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
4990 if (! mult_res)
4991 mult_res = gen_rtx_MULT (mode, mult1, mult2);
4993 /* Again, put the constant second. */
4994 if (GET_CODE (add1) == CONST_INT)
4996 temp = add1;
4997 add1 = mult_res;
4998 mult_res = temp;
5001 result = simplify_binary_operation (PLUS, mode, add1, mult_res);
5002 if (! result)
5003 result = gen_rtx_PLUS (mode, add1, mult_res);
5005 return result;
5008 /* Searches the list of induction struct's for the biv BL, to try to calculate
5009 the total increment value for one iteration of the loop as a constant.
5011 Returns the increment value as an rtx, simplified as much as possible,
5012 if it can be calculated. Otherwise, returns 0. */
5014 static rtx
5015 biv_total_increment (const struct iv_class *bl)
5017 struct induction *v;
5018 rtx result;
5020 /* For increment, must check every instruction that sets it. Each
5021 instruction must be executed only once each time through the loop.
5022 To verify this, we check that the insn is always executed, and that
5023 there are no backward branches after the insn that branch to before it.
5024 Also, the insn must have a mult_val of one (to make sure it really is
5025 an increment). */
5027 result = const0_rtx;
5028 for (v = bl->biv; v; v = v->next_iv)
5030 if (v->always_computable && v->mult_val == const1_rtx
5031 && ! v->maybe_multiple
5032 && SCALAR_INT_MODE_P (v->mode))
5034 /* If we have already counted it, skip it. */
5035 if (v->same)
5036 continue;
5038 result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
5040 else
5041 return 0;
5044 return result;
5047 /* Try to prove that the register is dead after the loop exits. Trace every
5048 loop exit looking for an insn that will always be executed, which sets
5049 the register to some value, and appears before the first use of the register
5050 is found. If successful, then return 1, otherwise return 0. */
5052 /* ?? Could be made more intelligent in the handling of jumps, so that
5053 it can search past if statements and other similar structures. */
5055 static int
5056 reg_dead_after_loop (const struct loop *loop, rtx reg)
5058 rtx insn, label;
5059 int jump_count = 0;
5060 int label_count = 0;
5062 /* In addition to checking all exits of this loop, we must also check
5063 all exits of inner nested loops that would exit this loop. We don't
5064 have any way to identify those, so we just give up if there are any
5065 such inner loop exits. */
5067 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
5068 label_count++;
5070 if (label_count != loop->exit_count)
5071 return 0;
5073 /* HACK: Must also search the loop fall through exit, create a label_ref
5074 here which points to the loop->end, and append the loop_number_exit_labels
5075 list to it. */
5076 label = gen_rtx_LABEL_REF (VOIDmode, loop->end);
5077 LABEL_NEXTREF (label) = loop->exit_labels;
5079 for (; label; label = LABEL_NEXTREF (label))
5081 /* Succeed if find an insn which sets the biv or if reach end of
5082 function. Fail if find an insn that uses the biv, or if come to
5083 a conditional jump. */
5085 insn = NEXT_INSN (XEXP (label, 0));
5086 while (insn)
5088 if (INSN_P (insn))
5090 rtx set, note;
5092 if (reg_referenced_p (reg, PATTERN (insn)))
5093 return 0;
5095 note = find_reg_equal_equiv_note (insn);
5096 if (note && reg_overlap_mentioned_p (reg, XEXP (note, 0)))
5097 return 0;
5099 set = single_set (insn);
5100 if (set && rtx_equal_p (SET_DEST (set), reg))
5101 break;
5103 if (JUMP_P (insn))
5105 if (GET_CODE (PATTERN (insn)) == RETURN)
5106 break;
5107 else if (!any_uncondjump_p (insn)
5108 /* Prevent infinite loop following infinite loops. */
5109 || jump_count++ > 20)
5110 return 0;
5111 else
5112 insn = JUMP_LABEL (insn);
5116 insn = NEXT_INSN (insn);
5120 /* Success, the register is dead on all loop exits. */
5121 return 1;
5124 /* Try to calculate the final value of the biv, the value it will have at
5125 the end of the loop. If we can do it, return that value. */
5127 static rtx
5128 final_biv_value (const struct loop *loop, struct iv_class *bl)
5130 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
5131 rtx increment, tem;
5133 /* ??? This only works for MODE_INT biv's. Reject all others for now. */
5135 if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
5136 return 0;
5138 /* The final value for reversed bivs must be calculated differently than
5139 for ordinary bivs. In this case, there is already an insn after the
5140 loop which sets this biv's final value (if necessary), and there are
5141 no other loop exits, so we can return any value. */
5142 if (bl->reversed)
5144 if (loop_dump_stream)
5145 fprintf (loop_dump_stream,
5146 "Final biv value for %d, reversed biv.\n", bl->regno);
5148 return const0_rtx;
5151 /* Try to calculate the final value as initial value + (number of iterations
5152 * increment). For this to work, increment must be invariant, the only
5153 exit from the loop must be the fall through at the bottom (otherwise
5154 it may not have its final value when the loop exits), and the initial
5155 value of the biv must be invariant. */
5157 if (n_iterations != 0
5158 && ! loop->exit_count
5159 && loop_invariant_p (loop, bl->initial_value))
5161 increment = biv_total_increment (bl);
5163 if (increment && loop_invariant_p (loop, increment))
5165 /* Can calculate the loop exit value, emit insns after loop
5166 end to calculate this value into a temporary register in
5167 case it is needed later. */
5169 tem = gen_reg_rtx (bl->biv->mode);
5170 record_base_value (REGNO (tem), bl->biv->add_val, 0);
5171 loop_iv_add_mult_sink (loop, increment, GEN_INT (n_iterations),
5172 bl->initial_value, tem);
5174 if (loop_dump_stream)
5175 fprintf (loop_dump_stream,
5176 "Final biv value for %d, calculated.\n", bl->regno);
5178 return tem;
5182 /* Check to see if the biv is dead at all loop exits. */
5183 if (reg_dead_after_loop (loop, bl->biv->src_reg))
5185 if (loop_dump_stream)
5186 fprintf (loop_dump_stream,
5187 "Final biv value for %d, biv dead after loop exit.\n",
5188 bl->regno);
5190 return const0_rtx;
5193 return 0;
5196 /* Return nonzero if it is possible to eliminate the biv BL provided
5197 all givs are reduced. This is possible if either the reg is not
5198 used outside the loop, or we can compute what its final value will
5199 be. */
5201 static int
5202 loop_biv_eliminable_p (struct loop *loop, struct iv_class *bl,
5203 int threshold, int insn_count)
5205 /* For architectures with a decrement_and_branch_until_zero insn,
5206 don't do this if we put a REG_NONNEG note on the endtest for this
5207 biv. */
5209 #ifdef HAVE_decrement_and_branch_until_zero
5210 if (bl->nonneg)
5212 if (loop_dump_stream)
5213 fprintf (loop_dump_stream,
5214 "Cannot eliminate nonneg biv %d.\n", bl->regno);
5215 return 0;
5217 #endif
5219 /* Check that biv is used outside loop or if it has a final value.
5220 Compare against bl->init_insn rather than loop->start. We aren't
5221 concerned with any uses of the biv between init_insn and
5222 loop->start since these won't be affected by the value of the biv
5223 elsewhere in the function, so long as init_insn doesn't use the
5224 biv itself. */
5226 if ((REGNO_LAST_LUID (bl->regno) < INSN_LUID (loop->end)
5227 && bl->init_insn
5228 && INSN_UID (bl->init_insn) < max_uid_for_loop
5229 && REGNO_FIRST_LUID (bl->regno) >= INSN_LUID (bl->init_insn)
5230 && ! reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
5231 || (bl->final_value = final_biv_value (loop, bl)))
5232 return maybe_eliminate_biv (loop, bl, 0, threshold, insn_count);
5234 if (loop_dump_stream)
5236 fprintf (loop_dump_stream,
5237 "Cannot eliminate biv %d.\n",
5238 bl->regno);
5239 fprintf (loop_dump_stream,
5240 "First use: insn %d, last use: insn %d.\n",
5241 REGNO_FIRST_UID (bl->regno),
5242 REGNO_LAST_UID (bl->regno));
5244 return 0;
5248 /* Reduce each giv of BL that we have decided to reduce. */
5250 static void
5251 loop_givs_reduce (struct loop *loop, struct iv_class *bl)
5253 struct induction *v;
5255 for (v = bl->giv; v; v = v->next_iv)
5257 struct induction *tv;
5258 if (! v->ignore && v->same == 0)
5260 int auto_inc_opt = 0;
5262 /* If the code for derived givs immediately below has already
5263 allocated a new_reg, we must keep it. */
5264 if (! v->new_reg)
5265 v->new_reg = gen_reg_rtx (v->mode);
5267 #ifdef AUTO_INC_DEC
5268 /* If the target has auto-increment addressing modes, and
5269 this is an address giv, then try to put the increment
5270 immediately after its use, so that flow can create an
5271 auto-increment addressing mode. */
5272 /* Don't do this for loops entered at the bottom, to avoid
5273 this invalid transformation:
5274 jmp L; -> jmp L;
5275 TOP: TOP:
5276 use giv use giv
5277 L: inc giv
5278 inc biv L:
5279 test biv test giv
5280 cbr TOP cbr TOP
5282 if (v->giv_type == DEST_ADDR && bl->biv_count == 1
5283 && bl->biv->always_executed && ! bl->biv->maybe_multiple
5284 /* We don't handle reversed biv's because bl->biv->insn
5285 does not have a valid INSN_LUID. */
5286 && ! bl->reversed
5287 && v->always_executed && ! v->maybe_multiple
5288 && INSN_UID (v->insn) < max_uid_for_loop
5289 && !loop->top)
5291 /* If other giv's have been combined with this one, then
5292 this will work only if all uses of the other giv's occur
5293 before this giv's insn. This is difficult to check.
5295 We simplify this by looking for the common case where
5296 there is one DEST_REG giv, and this giv's insn is the
5297 last use of the dest_reg of that DEST_REG giv. If the
5298 increment occurs after the address giv, then we can
5299 perform the optimization. (Otherwise, the increment
5300 would have to go before other_giv, and we would not be
5301 able to combine it with the address giv to get an
5302 auto-inc address.) */
5303 if (v->combined_with)
5305 struct induction *other_giv = 0;
5307 for (tv = bl->giv; tv; tv = tv->next_iv)
5308 if (tv->same == v)
5310 if (other_giv)
5311 break;
5312 else
5313 other_giv = tv;
5315 if (! tv && other_giv
5316 && REGNO (other_giv->dest_reg) < max_reg_before_loop
5317 && (REGNO_LAST_UID (REGNO (other_giv->dest_reg))
5318 == INSN_UID (v->insn))
5319 && INSN_LUID (v->insn) < INSN_LUID (bl->biv->insn))
5320 auto_inc_opt = 1;
5322 /* Check for case where increment is before the address
5323 giv. Do this test in "loop order". */
5324 else if ((INSN_LUID (v->insn) > INSN_LUID (bl->biv->insn)
5325 && (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
5326 || (INSN_LUID (bl->biv->insn)
5327 > INSN_LUID (loop->scan_start))))
5328 || (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
5329 && (INSN_LUID (loop->scan_start)
5330 < INSN_LUID (bl->biv->insn))))
5331 auto_inc_opt = -1;
5332 else
5333 auto_inc_opt = 1;
5335 #ifdef HAVE_cc0
5337 rtx prev;
5339 /* We can't put an insn immediately after one setting
5340 cc0, or immediately before one using cc0. */
5341 if ((auto_inc_opt == 1 && sets_cc0_p (PATTERN (v->insn)))
5342 || (auto_inc_opt == -1
5343 && (prev = prev_nonnote_insn (v->insn)) != 0
5344 && INSN_P (prev)
5345 && sets_cc0_p (PATTERN (prev))))
5346 auto_inc_opt = 0;
5348 #endif
5350 if (auto_inc_opt)
5351 v->auto_inc_opt = 1;
5353 #endif
5355 /* For each place where the biv is incremented, add an insn
5356 to increment the new, reduced reg for the giv. */
5357 for (tv = bl->biv; tv; tv = tv->next_iv)
5359 rtx insert_before;
5361 /* Skip if location is the same as a previous one. */
5362 if (tv->same)
5363 continue;
5364 if (! auto_inc_opt)
5365 insert_before = NEXT_INSN (tv->insn);
5366 else if (auto_inc_opt == 1)
5367 insert_before = NEXT_INSN (v->insn);
5368 else
5369 insert_before = v->insn;
5371 if (tv->mult_val == const1_rtx)
5372 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
5373 v->new_reg, v->new_reg,
5374 0, insert_before);
5375 else /* tv->mult_val == const0_rtx */
5376 /* A multiply is acceptable here
5377 since this is presumed to be seldom executed. */
5378 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
5379 v->add_val, v->new_reg,
5380 0, insert_before);
5383 /* Add code at loop start to initialize giv's reduced reg. */
5385 loop_iv_add_mult_hoist (loop,
5386 extend_value_for_giv (v, bl->initial_value),
5387 v->mult_val, v->add_val, v->new_reg);
5393 /* Check for givs whose first use is their definition and whose
5394 last use is the definition of another giv. If so, it is likely
5395 dead and should not be used to derive another giv nor to
5396 eliminate a biv. */
5398 static void
5399 loop_givs_dead_check (struct loop *loop ATTRIBUTE_UNUSED, struct iv_class *bl)
5401 struct induction *v;
5403 for (v = bl->giv; v; v = v->next_iv)
5405 if (v->ignore
5406 || (v->same && v->same->ignore))
5407 continue;
5409 if (v->giv_type == DEST_REG
5410 && REGNO_FIRST_UID (REGNO (v->dest_reg)) == INSN_UID (v->insn))
5412 struct induction *v1;
5414 for (v1 = bl->giv; v1; v1 = v1->next_iv)
5415 if (REGNO_LAST_UID (REGNO (v->dest_reg)) == INSN_UID (v1->insn))
5416 v->maybe_dead = 1;
5422 static void
5423 loop_givs_rescan (struct loop *loop, struct iv_class *bl, rtx *reg_map)
5425 struct induction *v;
5427 for (v = bl->giv; v; v = v->next_iv)
5429 if (v->same && v->same->ignore)
5430 v->ignore = 1;
5432 if (v->ignore)
5433 continue;
5435 /* Update expression if this was combined, in case other giv was
5436 replaced. */
5437 if (v->same)
5438 v->new_reg = replace_rtx (v->new_reg,
5439 v->same->dest_reg, v->same->new_reg);
5441 /* See if this register is known to be a pointer to something. If
5442 so, see if we can find the alignment. First see if there is a
5443 destination register that is a pointer. If so, this shares the
5444 alignment too. Next see if we can deduce anything from the
5445 computational information. If not, and this is a DEST_ADDR
5446 giv, at least we know that it's a pointer, though we don't know
5447 the alignment. */
5448 if (REG_P (v->new_reg)
5449 && v->giv_type == DEST_REG
5450 && REG_POINTER (v->dest_reg))
5451 mark_reg_pointer (v->new_reg,
5452 REGNO_POINTER_ALIGN (REGNO (v->dest_reg)));
5453 else if (REG_P (v->new_reg)
5454 && REG_POINTER (v->src_reg))
5456 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->src_reg));
5458 if (align == 0
5459 || GET_CODE (v->add_val) != CONST_INT
5460 || INTVAL (v->add_val) % (align / BITS_PER_UNIT) != 0)
5461 align = 0;
5463 mark_reg_pointer (v->new_reg, align);
5465 else if (REG_P (v->new_reg)
5466 && REG_P (v->add_val)
5467 && REG_POINTER (v->add_val))
5469 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->add_val));
5471 if (align == 0 || GET_CODE (v->mult_val) != CONST_INT
5472 || INTVAL (v->mult_val) % (align / BITS_PER_UNIT) != 0)
5473 align = 0;
5475 mark_reg_pointer (v->new_reg, align);
5477 else if (REG_P (v->new_reg) && v->giv_type == DEST_ADDR)
5478 mark_reg_pointer (v->new_reg, 0);
5480 if (v->giv_type == DEST_ADDR)
5482 /* Store reduced reg as the address in the memref where we found
5483 this giv. */
5484 if (!validate_change (v->insn, v->location, v->new_reg, 0))
5486 if (loop_dump_stream)
5487 fprintf (loop_dump_stream,
5488 "unable to reduce iv to register in insn %d\n",
5489 INSN_UID (v->insn));
5490 bl->all_reduced = 0;
5491 v->ignore = 1;
5492 continue;
5495 else if (v->replaceable)
5497 reg_map[REGNO (v->dest_reg)] = v->new_reg;
5499 else
5501 rtx original_insn = v->insn;
5502 rtx note;
5504 /* Not replaceable; emit an insn to set the original giv reg from
5505 the reduced giv, same as above. */
5506 v->insn = loop_insn_emit_after (loop, 0, original_insn,
5507 gen_move_insn (v->dest_reg,
5508 v->new_reg));
5510 /* The original insn may have a REG_EQUAL note. This note is
5511 now incorrect and may result in invalid substitutions later.
5512 The original insn is dead, but may be part of a libcall
5513 sequence, which doesn't seem worth the bother of handling. */
5514 note = find_reg_note (original_insn, REG_EQUAL, NULL_RTX);
5515 if (note)
5516 remove_note (original_insn, note);
5519 /* When a loop is reversed, givs which depend on the reversed
5520 biv, and which are live outside the loop, must be set to their
5521 correct final value. This insn is only needed if the giv is
5522 not replaceable. The correct final value is the same as the
5523 value that the giv starts the reversed loop with. */
5524 if (bl->reversed && ! v->replaceable)
5525 loop_iv_add_mult_sink (loop,
5526 extend_value_for_giv (v, bl->initial_value),
5527 v->mult_val, v->add_val, v->dest_reg);
5528 else if (v->final_value)
5529 loop_insn_sink_or_swim (loop,
5530 gen_load_of_final_value (v->dest_reg,
5531 v->final_value));
5533 if (loop_dump_stream)
5535 fprintf (loop_dump_stream, "giv at %d reduced to ",
5536 INSN_UID (v->insn));
5537 print_simple_rtl (loop_dump_stream, v->new_reg);
5538 fprintf (loop_dump_stream, "\n");
5544 static int
5545 loop_giv_reduce_benefit (struct loop *loop ATTRIBUTE_UNUSED,
5546 struct iv_class *bl, struct induction *v,
5547 rtx test_reg)
5549 int add_cost;
5550 int benefit;
5552 benefit = v->benefit;
5553 PUT_MODE (test_reg, v->mode);
5554 add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val,
5555 test_reg, test_reg);
5557 /* Reduce benefit if not replaceable, since we will insert a
5558 move-insn to replace the insn that calculates this giv. Don't do
5559 this unless the giv is a user variable, since it will often be
5560 marked non-replaceable because of the duplication of the exit
5561 code outside the loop. In such a case, the copies we insert are
5562 dead and will be deleted. So they don't have a cost. Similar
5563 situations exist. */
5564 /* ??? The new final_[bg]iv_value code does a much better job of
5565 finding replaceable giv's, and hence this code may no longer be
5566 necessary. */
5567 if (! v->replaceable && ! bl->eliminable
5568 && REG_USERVAR_P (v->dest_reg))
5569 benefit -= copy_cost;
5571 /* Decrease the benefit to count the add-insns that we will insert
5572 to increment the reduced reg for the giv. ??? This can
5573 overestimate the run-time cost of the additional insns, e.g. if
5574 there are multiple basic blocks that increment the biv, but only
5575 one of these blocks is executed during each iteration. There is
5576 no good way to detect cases like this with the current structure
5577 of the loop optimizer. This code is more accurate for
5578 determining code size than run-time benefits. */
5579 benefit -= add_cost * bl->biv_count;
5581 /* Decide whether to strength-reduce this giv or to leave the code
5582 unchanged (recompute it from the biv each time it is used). This
5583 decision can be made independently for each giv. */
5585 #ifdef AUTO_INC_DEC
5586 /* Attempt to guess whether autoincrement will handle some of the
5587 new add insns; if so, increase BENEFIT (undo the subtraction of
5588 add_cost that was done above). */
5589 if (v->giv_type == DEST_ADDR
5590 /* Increasing the benefit is risky, since this is only a guess.
5591 Avoid increasing register pressure in cases where there would
5592 be no other benefit from reducing this giv. */
5593 && benefit > 0
5594 && GET_CODE (v->mult_val) == CONST_INT)
5596 int size = GET_MODE_SIZE (GET_MODE (v->mem));
5598 if (HAVE_POST_INCREMENT
5599 && INTVAL (v->mult_val) == size)
5600 benefit += add_cost * bl->biv_count;
5601 else if (HAVE_PRE_INCREMENT
5602 && INTVAL (v->mult_val) == size)
5603 benefit += add_cost * bl->biv_count;
5604 else if (HAVE_POST_DECREMENT
5605 && -INTVAL (v->mult_val) == size)
5606 benefit += add_cost * bl->biv_count;
5607 else if (HAVE_PRE_DECREMENT
5608 && -INTVAL (v->mult_val) == size)
5609 benefit += add_cost * bl->biv_count;
5611 #endif
5613 return benefit;
5617 /* Free IV structures for LOOP. */
5619 static void
5620 loop_ivs_free (struct loop *loop)
5622 struct loop_ivs *ivs = LOOP_IVS (loop);
5623 struct iv_class *iv = ivs->list;
5625 free (ivs->regs);
5627 while (iv)
5629 struct iv_class *next = iv->next;
5630 struct induction *induction;
5631 struct induction *next_induction;
5633 for (induction = iv->biv; induction; induction = next_induction)
5635 next_induction = induction->next_iv;
5636 free (induction);
5638 for (induction = iv->giv; induction; induction = next_induction)
5640 next_induction = induction->next_iv;
5641 free (induction);
5644 free (iv);
5645 iv = next;
5649 /* Look back before LOOP->START for the insn that sets REG and return
5650 the equivalent constant if there is a REG_EQUAL note otherwise just
5651 the SET_SRC of REG. */
5653 static rtx
5654 loop_find_equiv_value (const struct loop *loop, rtx reg)
5656 rtx loop_start = loop->start;
5657 rtx insn, set;
5658 rtx ret;
5660 ret = reg;
5661 for (insn = PREV_INSN (loop_start); insn; insn = PREV_INSN (insn))
5663 if (LABEL_P (insn))
5664 break;
5666 else if (INSN_P (insn) && reg_set_p (reg, insn))
5668 /* We found the last insn before the loop that sets the register.
5669 If it sets the entire register, and has a REG_EQUAL note,
5670 then use the value of the REG_EQUAL note. */
5671 if ((set = single_set (insn))
5672 && (SET_DEST (set) == reg))
5674 rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
5676 /* Only use the REG_EQUAL note if it is a constant.
5677 Other things, divide in particular, will cause
5678 problems later if we use them. */
5679 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
5680 && CONSTANT_P (XEXP (note, 0)))
5681 ret = XEXP (note, 0);
5682 else
5683 ret = SET_SRC (set);
5685 /* We cannot do this if it changes between the
5686 assignment and loop start though. */
5687 if (modified_between_p (ret, insn, loop_start))
5688 ret = reg;
5690 break;
5693 return ret;
5696 /* Find and return register term common to both expressions OP0 and
5697 OP1 or NULL_RTX if no such term exists. Each expression must be a
5698 REG or a PLUS of a REG. */
5700 static rtx
5701 find_common_reg_term (rtx op0, rtx op1)
5703 if ((REG_P (op0) || GET_CODE (op0) == PLUS)
5704 && (REG_P (op1) || GET_CODE (op1) == PLUS))
5706 rtx op00;
5707 rtx op01;
5708 rtx op10;
5709 rtx op11;
5711 if (GET_CODE (op0) == PLUS)
5712 op01 = XEXP (op0, 1), op00 = XEXP (op0, 0);
5713 else
5714 op01 = const0_rtx, op00 = op0;
5716 if (GET_CODE (op1) == PLUS)
5717 op11 = XEXP (op1, 1), op10 = XEXP (op1, 0);
5718 else
5719 op11 = const0_rtx, op10 = op1;
5721 /* Find and return common register term if present. */
5722 if (REG_P (op00) && (op00 == op10 || op00 == op11))
5723 return op00;
5724 else if (REG_P (op01) && (op01 == op10 || op01 == op11))
5725 return op01;
5728 /* No common register term found. */
5729 return NULL_RTX;
5732 /* Determine the loop iterator and calculate the number of loop
5733 iterations. Returns the exact number of loop iterations if it can
5734 be calculated, otherwise returns zero. */
5736 static unsigned HOST_WIDE_INT
5737 loop_iterations (struct loop *loop)
5739 struct loop_info *loop_info = LOOP_INFO (loop);
5740 struct loop_ivs *ivs = LOOP_IVS (loop);
5741 rtx comparison, comparison_value;
5742 rtx iteration_var, initial_value, increment, final_value;
5743 enum rtx_code comparison_code;
5744 HOST_WIDE_INT inc;
5745 unsigned HOST_WIDE_INT abs_inc;
5746 unsigned HOST_WIDE_INT abs_diff;
5747 int off_by_one;
5748 int increment_dir;
5749 int unsigned_p, compare_dir, final_larger;
5750 rtx last_loop_insn;
5751 struct iv_class *bl;
5753 loop_info->n_iterations = 0;
5754 loop_info->initial_value = 0;
5755 loop_info->initial_equiv_value = 0;
5756 loop_info->comparison_value = 0;
5757 loop_info->final_value = 0;
5758 loop_info->final_equiv_value = 0;
5759 loop_info->increment = 0;
5760 loop_info->iteration_var = 0;
5761 loop_info->iv = 0;
5763 /* We used to use prev_nonnote_insn here, but that fails because it might
5764 accidentally get the branch for a contained loop if the branch for this
5765 loop was deleted. We can only trust branches immediately before the
5766 loop_end. */
5767 last_loop_insn = PREV_INSN (loop->end);
5769 /* ??? We should probably try harder to find the jump insn
5770 at the end of the loop. The following code assumes that
5771 the last loop insn is a jump to the top of the loop. */
5772 if (!JUMP_P (last_loop_insn))
5774 if (loop_dump_stream)
5775 fprintf (loop_dump_stream,
5776 "Loop iterations: No final conditional branch found.\n");
5777 return 0;
5780 /* If there is a more than a single jump to the top of the loop
5781 we cannot (easily) determine the iteration count. */
5782 if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1)
5784 if (loop_dump_stream)
5785 fprintf (loop_dump_stream,
5786 "Loop iterations: Loop has multiple back edges.\n");
5787 return 0;
5790 /* Find the iteration variable. If the last insn is a conditional
5791 branch, and the insn before tests a register value, make that the
5792 iteration variable. */
5794 comparison = get_condition_for_loop (loop, last_loop_insn);
5795 if (comparison == 0)
5797 if (loop_dump_stream)
5798 fprintf (loop_dump_stream,
5799 "Loop iterations: No final comparison found.\n");
5800 return 0;
5803 /* ??? Get_condition may switch position of induction variable and
5804 invariant register when it canonicalizes the comparison. */
5806 comparison_code = GET_CODE (comparison);
5807 iteration_var = XEXP (comparison, 0);
5808 comparison_value = XEXP (comparison, 1);
5810 if (!REG_P (iteration_var))
5812 if (loop_dump_stream)
5813 fprintf (loop_dump_stream,
5814 "Loop iterations: Comparison not against register.\n");
5815 return 0;
5818 /* The only new registers that are created before loop iterations
5819 are givs made from biv increments or registers created by
5820 load_mems. In the latter case, it is possible that try_copy_prop
5821 will propagate a new pseudo into the old iteration register but
5822 this will be marked by having the REG_USERVAR_P bit set. */
5824 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs
5825 && ! REG_USERVAR_P (iteration_var))
5826 abort ();
5828 /* Determine the initial value of the iteration variable, and the amount
5829 that it is incremented each loop. Use the tables constructed by
5830 the strength reduction pass to calculate these values. */
5832 /* Clear the result values, in case no answer can be found. */
5833 initial_value = 0;
5834 increment = 0;
5836 /* The iteration variable can be either a giv or a biv. Check to see
5837 which it is, and compute the variable's initial value, and increment
5838 value if possible. */
5840 /* If this is a new register, can't handle it since we don't have any
5841 reg_iv_type entry for it. */
5842 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs)
5844 if (loop_dump_stream)
5845 fprintf (loop_dump_stream,
5846 "Loop iterations: No reg_iv_type entry for iteration var.\n");
5847 return 0;
5850 /* Reject iteration variables larger than the host wide int size, since they
5851 could result in a number of iterations greater than the range of our
5852 `unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */
5853 else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var))
5854 > HOST_BITS_PER_WIDE_INT))
5856 if (loop_dump_stream)
5857 fprintf (loop_dump_stream,
5858 "Loop iterations: Iteration var rejected because mode too large.\n");
5859 return 0;
5861 else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
5863 if (loop_dump_stream)
5864 fprintf (loop_dump_stream,
5865 "Loop iterations: Iteration var not an integer.\n");
5866 return 0;
5869 /* Try swapping the comparison to identify a suitable iv. */
5870 if (REG_IV_TYPE (ivs, REGNO (iteration_var)) != BASIC_INDUCT
5871 && REG_IV_TYPE (ivs, REGNO (iteration_var)) != GENERAL_INDUCT
5872 && REG_P (comparison_value)
5873 && REGNO (comparison_value) < ivs->n_regs)
5875 rtx temp = comparison_value;
5876 comparison_code = swap_condition (comparison_code);
5877 comparison_value = iteration_var;
5878 iteration_var = temp;
5881 if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT)
5883 if (REGNO (iteration_var) >= ivs->n_regs)
5884 abort ();
5886 /* Grab initial value, only useful if it is a constant. */
5887 bl = REG_IV_CLASS (ivs, REGNO (iteration_var));
5888 initial_value = bl->initial_value;
5889 if (!bl->biv->always_executed || bl->biv->maybe_multiple)
5891 if (loop_dump_stream)
5892 fprintf (loop_dump_stream,
5893 "Loop iterations: Basic induction var not set once in each iteration.\n");
5894 return 0;
5897 increment = biv_total_increment (bl);
5899 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT)
5901 HOST_WIDE_INT offset = 0;
5902 struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var));
5903 rtx biv_initial_value;
5905 if (REGNO (v->src_reg) >= ivs->n_regs)
5906 abort ();
5908 if (!v->always_executed || v->maybe_multiple)
5910 if (loop_dump_stream)
5911 fprintf (loop_dump_stream,
5912 "Loop iterations: General induction var not set once in each iteration.\n");
5913 return 0;
5916 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
5918 /* Increment value is mult_val times the increment value of the biv. */
5920 increment = biv_total_increment (bl);
5921 if (increment)
5923 struct induction *biv_inc;
5925 increment = fold_rtx_mult_add (v->mult_val,
5926 extend_value_for_giv (v, increment),
5927 const0_rtx, v->mode);
5928 /* The caller assumes that one full increment has occurred at the
5929 first loop test. But that's not true when the biv is incremented
5930 after the giv is set (which is the usual case), e.g.:
5931 i = 6; do {;} while (i++ < 9) .
5932 Therefore, we bias the initial value by subtracting the amount of
5933 the increment that occurs between the giv set and the giv test. */
5934 for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv)
5936 if (loop_insn_first_p (v->insn, biv_inc->insn))
5938 if (REG_P (biv_inc->add_val))
5940 if (loop_dump_stream)
5941 fprintf (loop_dump_stream,
5942 "Loop iterations: Basic induction var add_val is REG %d.\n",
5943 REGNO (biv_inc->add_val));
5944 return 0;
5947 /* If we have already counted it, skip it. */
5948 if (biv_inc->same)
5949 continue;
5951 offset -= INTVAL (biv_inc->add_val);
5955 if (loop_dump_stream)
5956 fprintf (loop_dump_stream,
5957 "Loop iterations: Giv iterator, initial value bias %ld.\n",
5958 (long) offset);
5960 /* Initial value is mult_val times the biv's initial value plus
5961 add_val. Only useful if it is a constant. */
5962 biv_initial_value = extend_value_for_giv (v, bl->initial_value);
5963 initial_value
5964 = fold_rtx_mult_add (v->mult_val,
5965 plus_constant (biv_initial_value, offset),
5966 v->add_val, v->mode);
5968 else
5970 if (loop_dump_stream)
5971 fprintf (loop_dump_stream,
5972 "Loop iterations: Not basic or general induction var.\n");
5973 return 0;
5976 if (initial_value == 0)
5977 return 0;
5979 unsigned_p = 0;
5980 off_by_one = 0;
5981 switch (comparison_code)
5983 case LEU:
5984 unsigned_p = 1;
5985 case LE:
5986 compare_dir = 1;
5987 off_by_one = 1;
5988 break;
5989 case GEU:
5990 unsigned_p = 1;
5991 case GE:
5992 compare_dir = -1;
5993 off_by_one = -1;
5994 break;
5995 case EQ:
5996 /* Cannot determine loop iterations with this case. */
5997 compare_dir = 0;
5998 break;
5999 case LTU:
6000 unsigned_p = 1;
6001 case LT:
6002 compare_dir = 1;
6003 break;
6004 case GTU:
6005 unsigned_p = 1;
6006 case GT:
6007 compare_dir = -1;
6008 break;
6009 case NE:
6010 compare_dir = 0;
6011 break;
6012 default:
6013 abort ();
6016 /* If the comparison value is an invariant register, then try to find
6017 its value from the insns before the start of the loop. */
6019 final_value = comparison_value;
6020 if (REG_P (comparison_value)
6021 && loop_invariant_p (loop, comparison_value))
6023 final_value = loop_find_equiv_value (loop, comparison_value);
6025 /* If we don't get an invariant final value, we are better
6026 off with the original register. */
6027 if (! loop_invariant_p (loop, final_value))
6028 final_value = comparison_value;
6031 /* Calculate the approximate final value of the induction variable
6032 (on the last successful iteration). The exact final value
6033 depends on the branch operator, and increment sign. It will be
6034 wrong if the iteration variable is not incremented by one each
6035 time through the loop and (comparison_value + off_by_one -
6036 initial_value) % increment != 0.
6037 ??? Note that the final_value may overflow and thus final_larger
6038 will be bogus. A potentially infinite loop will be classified
6039 as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */
6040 if (off_by_one)
6041 final_value = plus_constant (final_value, off_by_one);
6043 /* Save the calculated values describing this loop's bounds, in case
6044 precondition_loop_p will need them later. These values can not be
6045 recalculated inside precondition_loop_p because strength reduction
6046 optimizations may obscure the loop's structure.
6048 These values are only required by precondition_loop_p and insert_bct
6049 whenever the number of iterations cannot be computed at compile time.
6050 Only the difference between final_value and initial_value is
6051 important. Note that final_value is only approximate. */
6052 loop_info->initial_value = initial_value;
6053 loop_info->comparison_value = comparison_value;
6054 loop_info->final_value = plus_constant (comparison_value, off_by_one);
6055 loop_info->increment = increment;
6056 loop_info->iteration_var = iteration_var;
6057 loop_info->comparison_code = comparison_code;
6058 loop_info->iv = bl;
6060 /* Try to determine the iteration count for loops such
6061 as (for i = init; i < init + const; i++). When running the
6062 loop optimization twice, the first pass often converts simple
6063 loops into this form. */
6065 if (REG_P (initial_value))
6067 rtx reg1;
6068 rtx reg2;
6069 rtx const2;
6071 reg1 = initial_value;
6072 if (GET_CODE (final_value) == PLUS)
6073 reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1);
6074 else
6075 reg2 = final_value, const2 = const0_rtx;
6077 /* Check for initial_value = reg1, final_value = reg2 + const2,
6078 where reg1 != reg2. */
6079 if (REG_P (reg2) && reg2 != reg1)
6081 rtx temp;
6083 /* Find what reg1 is equivalent to. Hopefully it will
6084 either be reg2 or reg2 plus a constant. */
6085 temp = loop_find_equiv_value (loop, reg1);
6087 if (find_common_reg_term (temp, reg2))
6088 initial_value = temp;
6089 else if (loop_invariant_p (loop, reg2))
6091 /* Find what reg2 is equivalent to. Hopefully it will
6092 either be reg1 or reg1 plus a constant. Let's ignore
6093 the latter case for now since it is not so common. */
6094 temp = loop_find_equiv_value (loop, reg2);
6096 if (temp == loop_info->iteration_var)
6097 temp = initial_value;
6098 if (temp == reg1)
6099 final_value = (const2 == const0_rtx)
6100 ? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2);
6105 loop_info->initial_equiv_value = initial_value;
6106 loop_info->final_equiv_value = final_value;
6108 /* For EQ comparison loops, we don't have a valid final value.
6109 Check this now so that we won't leave an invalid value if we
6110 return early for any other reason. */
6111 if (comparison_code == EQ)
6112 loop_info->final_equiv_value = loop_info->final_value = 0;
6114 if (increment == 0)
6116 if (loop_dump_stream)
6117 fprintf (loop_dump_stream,
6118 "Loop iterations: Increment value can't be calculated.\n");
6119 return 0;
6122 if (GET_CODE (increment) != CONST_INT)
6124 /* If we have a REG, check to see if REG holds a constant value. */
6125 /* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't
6126 clear if it is worthwhile to try to handle such RTL. */
6127 if (REG_P (increment) || GET_CODE (increment) == SUBREG)
6128 increment = loop_find_equiv_value (loop, increment);
6130 if (GET_CODE (increment) != CONST_INT)
6132 if (loop_dump_stream)
6134 fprintf (loop_dump_stream,
6135 "Loop iterations: Increment value not constant ");
6136 print_simple_rtl (loop_dump_stream, increment);
6137 fprintf (loop_dump_stream, ".\n");
6139 return 0;
6141 loop_info->increment = increment;
6144 if (GET_CODE (initial_value) != CONST_INT)
6146 if (loop_dump_stream)
6148 fprintf (loop_dump_stream,
6149 "Loop iterations: Initial value not constant ");
6150 print_simple_rtl (loop_dump_stream, initial_value);
6151 fprintf (loop_dump_stream, ".\n");
6153 return 0;
6155 else if (GET_CODE (final_value) != CONST_INT)
6157 if (loop_dump_stream)
6159 fprintf (loop_dump_stream,
6160 "Loop iterations: Final value not constant ");
6161 print_simple_rtl (loop_dump_stream, final_value);
6162 fprintf (loop_dump_stream, ".\n");
6164 return 0;
6166 else if (comparison_code == EQ)
6168 rtx inc_once;
6170 if (loop_dump_stream)
6171 fprintf (loop_dump_stream, "Loop iterations: EQ comparison loop.\n");
6173 inc_once = gen_int_mode (INTVAL (initial_value) + INTVAL (increment),
6174 GET_MODE (iteration_var));
6176 if (inc_once == final_value)
6178 /* The iterator value once through the loop is equal to the
6179 comparison value. Either we have an infinite loop, or
6180 we'll loop twice. */
6181 if (increment == const0_rtx)
6182 return 0;
6183 loop_info->n_iterations = 2;
6185 else
6186 loop_info->n_iterations = 1;
6188 if (GET_CODE (loop_info->initial_value) == CONST_INT)
6189 loop_info->final_value
6190 = gen_int_mode ((INTVAL (loop_info->initial_value)
6191 + loop_info->n_iterations * INTVAL (increment)),
6192 GET_MODE (iteration_var));
6193 else
6194 loop_info->final_value
6195 = plus_constant (loop_info->initial_value,
6196 loop_info->n_iterations * INTVAL (increment));
6197 loop_info->final_equiv_value
6198 = gen_int_mode ((INTVAL (initial_value)
6199 + loop_info->n_iterations * INTVAL (increment)),
6200 GET_MODE (iteration_var));
6201 return loop_info->n_iterations;
6204 /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
6205 if (unsigned_p)
6206 final_larger
6207 = ((unsigned HOST_WIDE_INT) INTVAL (final_value)
6208 > (unsigned HOST_WIDE_INT) INTVAL (initial_value))
6209 - ((unsigned HOST_WIDE_INT) INTVAL (final_value)
6210 < (unsigned HOST_WIDE_INT) INTVAL (initial_value));
6211 else
6212 final_larger = (INTVAL (final_value) > INTVAL (initial_value))
6213 - (INTVAL (final_value) < INTVAL (initial_value));
6215 if (INTVAL (increment) > 0)
6216 increment_dir = 1;
6217 else if (INTVAL (increment) == 0)
6218 increment_dir = 0;
6219 else
6220 increment_dir = -1;
6222 /* There are 27 different cases: compare_dir = -1, 0, 1;
6223 final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
6224 There are 4 normal cases, 4 reverse cases (where the iteration variable
6225 will overflow before the loop exits), 4 infinite loop cases, and 15
6226 immediate exit (0 or 1 iteration depending on loop type) cases.
6227 Only try to optimize the normal cases. */
6229 /* (compare_dir/final_larger/increment_dir)
6230 Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
6231 Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
6232 Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
6233 Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
6235 /* ?? If the meaning of reverse loops (where the iteration variable
6236 will overflow before the loop exits) is undefined, then could
6237 eliminate all of these special checks, and just always assume
6238 the loops are normal/immediate/infinite. Note that this means
6239 the sign of increment_dir does not have to be known. Also,
6240 since it does not really hurt if immediate exit loops or infinite loops
6241 are optimized, then that case could be ignored also, and hence all
6242 loops can be optimized.
6244 According to ANSI Spec, the reverse loop case result is undefined,
6245 because the action on overflow is undefined.
6247 See also the special test for NE loops below. */
6249 if (final_larger == increment_dir && final_larger != 0
6250 && (final_larger == compare_dir || compare_dir == 0))
6251 /* Normal case. */
6253 else
6255 if (loop_dump_stream)
6256 fprintf (loop_dump_stream, "Loop iterations: Not normal loop.\n");
6257 return 0;
6260 /* Calculate the number of iterations, final_value is only an approximation,
6261 so correct for that. Note that abs_diff and n_iterations are
6262 unsigned, because they can be as large as 2^n - 1. */
6264 inc = INTVAL (increment);
6265 if (inc > 0)
6267 abs_diff = INTVAL (final_value) - INTVAL (initial_value);
6268 abs_inc = inc;
6270 else if (inc < 0)
6272 abs_diff = INTVAL (initial_value) - INTVAL (final_value);
6273 abs_inc = -inc;
6275 else
6276 abort ();
6278 /* Given that iteration_var is going to iterate over its own mode,
6279 not HOST_WIDE_INT, disregard higher bits that might have come
6280 into the picture due to sign extension of initial and final
6281 values. */
6282 abs_diff &= ((unsigned HOST_WIDE_INT) 1
6283 << (GET_MODE_BITSIZE (GET_MODE (iteration_var)) - 1)
6284 << 1) - 1;
6286 /* For NE tests, make sure that the iteration variable won't miss
6287 the final value. If abs_diff mod abs_incr is not zero, then the
6288 iteration variable will overflow before the loop exits, and we
6289 can not calculate the number of iterations. */
6290 if (compare_dir == 0 && (abs_diff % abs_inc) != 0)
6291 return 0;
6293 /* Note that the number of iterations could be calculated using
6294 (abs_diff + abs_inc - 1) / abs_inc, provided care was taken to
6295 handle potential overflow of the summation. */
6296 loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0);
6297 return loop_info->n_iterations;
6300 /* Perform strength reduction and induction variable elimination.
6302 Pseudo registers created during this function will be beyond the
6303 last valid index in several tables including
6304 REGS->ARRAY[I].N_TIMES_SET and REGNO_LAST_UID. This does not cause a
6305 problem here, because the added registers cannot be givs outside of
6306 their loop, and hence will never be reconsidered. But scan_loop
6307 must check regnos to make sure they are in bounds. */
6309 static void
6310 strength_reduce (struct loop *loop, int flags)
6312 struct loop_info *loop_info = LOOP_INFO (loop);
6313 struct loop_regs *regs = LOOP_REGS (loop);
6314 struct loop_ivs *ivs = LOOP_IVS (loop);
6315 rtx p;
6316 /* Temporary list pointer for traversing ivs->list. */
6317 struct iv_class *bl;
6318 /* Ratio of extra register life span we can justify
6319 for saving an instruction. More if loop doesn't call subroutines
6320 since in that case saving an insn makes more difference
6321 and more registers are available. */
6322 /* ??? could set this to last value of threshold in move_movables */
6323 int threshold = (loop_info->has_call ? 1 : 2) * (3 + n_non_fixed_regs);
6324 /* Map of pseudo-register replacements. */
6325 rtx *reg_map = NULL;
6326 int reg_map_size;
6327 rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
6328 int insn_count = count_insns_in_loop (loop);
6330 addr_placeholder = gen_reg_rtx (Pmode);
6332 ivs->n_regs = max_reg_before_loop;
6333 ivs->regs = xcalloc (ivs->n_regs, sizeof (struct iv));
6335 /* Find all BIVs in loop. */
6336 loop_bivs_find (loop);
6338 /* Exit if there are no bivs. */
6339 if (! ivs->list)
6341 loop_ivs_free (loop);
6342 return;
6345 /* Determine how BIVS are initialized by looking through pre-header
6346 extended basic block. */
6347 loop_bivs_init_find (loop);
6349 /* Look at the each biv and see if we can say anything better about its
6350 initial value from any initializing insns set up above. */
6351 loop_bivs_check (loop);
6353 /* Search the loop for general induction variables. */
6354 loop_givs_find (loop);
6356 /* Try to calculate and save the number of loop iterations. This is
6357 set to zero if the actual number can not be calculated. This must
6358 be called after all giv's have been identified, since otherwise it may
6359 fail if the iteration variable is a giv. */
6360 loop_iterations (loop);
6362 #ifdef HAVE_prefetch
6363 if (flags & LOOP_PREFETCH)
6364 emit_prefetch_instructions (loop);
6365 #endif
6367 /* Now for each giv for which we still don't know whether or not it is
6368 replaceable, check to see if it is replaceable because its final value
6369 can be calculated. This must be done after loop_iterations is called,
6370 so that final_giv_value will work correctly. */
6371 loop_givs_check (loop);
6373 /* Try to prove that the loop counter variable (if any) is always
6374 nonnegative; if so, record that fact with a REG_NONNEG note
6375 so that "decrement and branch until zero" insn can be used. */
6376 check_dbra_loop (loop, insn_count);
6378 /* Create reg_map to hold substitutions for replaceable giv regs.
6379 Some givs might have been made from biv increments, so look at
6380 ivs->reg_iv_type for a suitable size. */
6381 reg_map_size = ivs->n_regs;
6382 reg_map = xcalloc (reg_map_size, sizeof (rtx));
6384 /* Examine each iv class for feasibility of strength reduction/induction
6385 variable elimination. */
6387 for (bl = ivs->list; bl; bl = bl->next)
6389 struct induction *v;
6390 int benefit;
6392 /* Test whether it will be possible to eliminate this biv
6393 provided all givs are reduced. */
6394 bl->eliminable = loop_biv_eliminable_p (loop, bl, threshold, insn_count);
6396 /* This will be true at the end, if all givs which depend on this
6397 biv have been strength reduced.
6398 We can't (currently) eliminate the biv unless this is so. */
6399 bl->all_reduced = 1;
6401 /* Check each extension dependent giv in this class to see if its
6402 root biv is safe from wrapping in the interior mode. */
6403 check_ext_dependent_givs (loop, bl);
6405 /* Combine all giv's for this iv_class. */
6406 combine_givs (regs, bl);
6408 for (v = bl->giv; v; v = v->next_iv)
6410 struct induction *tv;
6412 if (v->ignore || v->same)
6413 continue;
6415 benefit = loop_giv_reduce_benefit (loop, bl, v, test_reg);
6417 /* If an insn is not to be strength reduced, then set its ignore
6418 flag, and clear bl->all_reduced. */
6420 /* A giv that depends on a reversed biv must be reduced if it is
6421 used after the loop exit, otherwise, it would have the wrong
6422 value after the loop exit. To make it simple, just reduce all
6423 of such giv's whether or not we know they are used after the loop
6424 exit. */
6426 if (v->lifetime * threshold * benefit < insn_count
6427 && ! bl->reversed)
6429 if (loop_dump_stream)
6430 fprintf (loop_dump_stream,
6431 "giv of insn %d not worth while, %d vs %d.\n",
6432 INSN_UID (v->insn),
6433 v->lifetime * threshold * benefit, insn_count);
6434 v->ignore = 1;
6435 bl->all_reduced = 0;
6437 else
6439 /* Check that we can increment the reduced giv without a
6440 multiply insn. If not, reject it. */
6442 for (tv = bl->biv; tv; tv = tv->next_iv)
6443 if (tv->mult_val == const1_rtx
6444 && ! product_cheap_p (tv->add_val, v->mult_val))
6446 if (loop_dump_stream)
6447 fprintf (loop_dump_stream,
6448 "giv of insn %d: would need a multiply.\n",
6449 INSN_UID (v->insn));
6450 v->ignore = 1;
6451 bl->all_reduced = 0;
6452 break;
6457 /* Check for givs whose first use is their definition and whose
6458 last use is the definition of another giv. If so, it is likely
6459 dead and should not be used to derive another giv nor to
6460 eliminate a biv. */
6461 loop_givs_dead_check (loop, bl);
6463 /* Reduce each giv that we decided to reduce. */
6464 loop_givs_reduce (loop, bl);
6466 /* Rescan all givs. If a giv is the same as a giv not reduced, mark it
6467 as not reduced.
6469 For each giv register that can be reduced now: if replaceable,
6470 substitute reduced reg wherever the old giv occurs;
6471 else add new move insn "giv_reg = reduced_reg". */
6472 loop_givs_rescan (loop, bl, reg_map);
6474 /* All the givs based on the biv bl have been reduced if they
6475 merit it. */
6477 /* For each giv not marked as maybe dead that has been combined with a
6478 second giv, clear any "maybe dead" mark on that second giv.
6479 v->new_reg will either be or refer to the register of the giv it
6480 combined with.
6482 Doing this clearing avoids problems in biv elimination where
6483 a giv's new_reg is a complex value that can't be put in the
6484 insn but the giv combined with (with a reg as new_reg) is
6485 marked maybe_dead. Since the register will be used in either
6486 case, we'd prefer it be used from the simpler giv. */
6488 for (v = bl->giv; v; v = v->next_iv)
6489 if (! v->maybe_dead && v->same)
6490 v->same->maybe_dead = 0;
6492 /* Try to eliminate the biv, if it is a candidate.
6493 This won't work if ! bl->all_reduced,
6494 since the givs we planned to use might not have been reduced.
6496 We have to be careful that we didn't initially think we could
6497 eliminate this biv because of a giv that we now think may be
6498 dead and shouldn't be used as a biv replacement.
6500 Also, there is the possibility that we may have a giv that looks
6501 like it can be used to eliminate a biv, but the resulting insn
6502 isn't valid. This can happen, for example, on the 88k, where a
6503 JUMP_INSN can compare a register only with zero. Attempts to
6504 replace it with a compare with a constant will fail.
6506 Note that in cases where this call fails, we may have replaced some
6507 of the occurrences of the biv with a giv, but no harm was done in
6508 doing so in the rare cases where it can occur. */
6510 if (bl->all_reduced == 1 && bl->eliminable
6511 && maybe_eliminate_biv (loop, bl, 1, threshold, insn_count))
6513 /* ?? If we created a new test to bypass the loop entirely,
6514 or otherwise drop straight in, based on this test, then
6515 we might want to rewrite it also. This way some later
6516 pass has more hope of removing the initialization of this
6517 biv entirely. */
6519 /* If final_value != 0, then the biv may be used after loop end
6520 and we must emit an insn to set it just in case.
6522 Reversed bivs already have an insn after the loop setting their
6523 value, so we don't need another one. We can't calculate the
6524 proper final value for such a biv here anyways. */
6525 if (bl->final_value && ! bl->reversed)
6526 loop_insn_sink_or_swim (loop,
6527 gen_load_of_final_value (bl->biv->dest_reg,
6528 bl->final_value));
6530 if (loop_dump_stream)
6531 fprintf (loop_dump_stream, "Reg %d: biv eliminated\n",
6532 bl->regno);
6534 /* See above note wrt final_value. But since we couldn't eliminate
6535 the biv, we must set the value after the loop instead of before. */
6536 else if (bl->final_value && ! bl->reversed)
6537 loop_insn_sink (loop, gen_load_of_final_value (bl->biv->dest_reg,
6538 bl->final_value));
6541 /* Go through all the instructions in the loop, making all the
6542 register substitutions scheduled in REG_MAP. */
6544 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
6545 if (INSN_P (p))
6547 replace_regs (PATTERN (p), reg_map, reg_map_size, 0);
6548 replace_regs (REG_NOTES (p), reg_map, reg_map_size, 0);
6549 INSN_CODE (p) = -1;
6552 if (loop_dump_stream)
6553 fprintf (loop_dump_stream, "\n");
6555 loop_ivs_free (loop);
6556 if (reg_map)
6557 free (reg_map);
6560 /*Record all basic induction variables calculated in the insn. */
6561 static rtx
6562 check_insn_for_bivs (struct loop *loop, rtx p, int not_every_iteration,
6563 int maybe_multiple)
6565 struct loop_ivs *ivs = LOOP_IVS (loop);
6566 rtx set;
6567 rtx dest_reg;
6568 rtx inc_val;
6569 rtx mult_val;
6570 rtx *location;
6572 if (NONJUMP_INSN_P (p)
6573 && (set = single_set (p))
6574 && REG_P (SET_DEST (set)))
6576 dest_reg = SET_DEST (set);
6577 if (REGNO (dest_reg) < max_reg_before_loop
6578 && REGNO (dest_reg) >= FIRST_PSEUDO_REGISTER
6579 && REG_IV_TYPE (ivs, REGNO (dest_reg)) != NOT_BASIC_INDUCT)
6581 if (basic_induction_var (loop, SET_SRC (set),
6582 GET_MODE (SET_SRC (set)),
6583 dest_reg, p, &inc_val, &mult_val,
6584 &location))
6586 /* It is a possible basic induction variable.
6587 Create and initialize an induction structure for it. */
6589 struct induction *v = xmalloc (sizeof (struct induction));
6591 record_biv (loop, v, p, dest_reg, inc_val, mult_val, location,
6592 not_every_iteration, maybe_multiple);
6593 REG_IV_TYPE (ivs, REGNO (dest_reg)) = BASIC_INDUCT;
6595 else if (REGNO (dest_reg) < ivs->n_regs)
6596 REG_IV_TYPE (ivs, REGNO (dest_reg)) = NOT_BASIC_INDUCT;
6599 return p;
6602 /* Record all givs calculated in the insn.
6603 A register is a giv if: it is only set once, it is a function of a
6604 biv and a constant (or invariant), and it is not a biv. */
6605 static rtx
6606 check_insn_for_givs (struct loop *loop, rtx p, int not_every_iteration,
6607 int maybe_multiple)
6609 struct loop_regs *regs = LOOP_REGS (loop);
6611 rtx set;
6612 /* Look for a general induction variable in a register. */
6613 if (NONJUMP_INSN_P (p)
6614 && (set = single_set (p))
6615 && REG_P (SET_DEST (set))
6616 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
6618 rtx src_reg;
6619 rtx dest_reg;
6620 rtx add_val;
6621 rtx mult_val;
6622 rtx ext_val;
6623 int benefit;
6624 rtx regnote = 0;
6625 rtx last_consec_insn;
6627 dest_reg = SET_DEST (set);
6628 if (REGNO (dest_reg) < FIRST_PSEUDO_REGISTER)
6629 return p;
6631 if (/* SET_SRC is a giv. */
6632 (general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
6633 &mult_val, &ext_val, 0, &benefit, VOIDmode)
6634 /* Equivalent expression is a giv. */
6635 || ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
6636 && general_induction_var (loop, XEXP (regnote, 0), &src_reg,
6637 &add_val, &mult_val, &ext_val, 0,
6638 &benefit, VOIDmode)))
6639 /* Don't try to handle any regs made by loop optimization.
6640 We have nothing on them in regno_first_uid, etc. */
6641 && REGNO (dest_reg) < max_reg_before_loop
6642 /* Don't recognize a BASIC_INDUCT_VAR here. */
6643 && dest_reg != src_reg
6644 /* This must be the only place where the register is set. */
6645 && (regs->array[REGNO (dest_reg)].n_times_set == 1
6646 /* or all sets must be consecutive and make a giv. */
6647 || (benefit = consec_sets_giv (loop, benefit, p,
6648 src_reg, dest_reg,
6649 &add_val, &mult_val, &ext_val,
6650 &last_consec_insn))))
6652 struct induction *v = xmalloc (sizeof (struct induction));
6654 /* If this is a library call, increase benefit. */
6655 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
6656 benefit += libcall_benefit (p);
6658 /* Skip the consecutive insns, if there are any. */
6659 if (regs->array[REGNO (dest_reg)].n_times_set != 1)
6660 p = last_consec_insn;
6662 record_giv (loop, v, p, src_reg, dest_reg, mult_val, add_val,
6663 ext_val, benefit, DEST_REG, not_every_iteration,
6664 maybe_multiple, (rtx*) 0);
6669 /* Look for givs which are memory addresses. */
6670 if (NONJUMP_INSN_P (p))
6671 find_mem_givs (loop, PATTERN (p), p, not_every_iteration,
6672 maybe_multiple);
6674 /* Update the status of whether giv can derive other givs. This can
6675 change when we pass a label or an insn that updates a biv. */
6676 if (INSN_P (p) || LABEL_P (p))
6677 update_giv_derive (loop, p);
6678 return p;
6681 /* Return 1 if X is a valid source for an initial value (or as value being
6682 compared against in an initial test).
6684 X must be either a register or constant and must not be clobbered between
6685 the current insn and the start of the loop.
6687 INSN is the insn containing X. */
6689 static int
6690 valid_initial_value_p (rtx x, rtx insn, int call_seen, rtx loop_start)
6692 if (CONSTANT_P (x))
6693 return 1;
6695 /* Only consider pseudos we know about initialized in insns whose luids
6696 we know. */
6697 if (!REG_P (x)
6698 || REGNO (x) >= max_reg_before_loop)
6699 return 0;
6701 /* Don't use call-clobbered registers across a call which clobbers it. On
6702 some machines, don't use any hard registers at all. */
6703 if (REGNO (x) < FIRST_PSEUDO_REGISTER
6704 && (SMALL_REGISTER_CLASSES
6705 || (call_used_regs[REGNO (x)] && call_seen)))
6706 return 0;
6708 /* Don't use registers that have been clobbered before the start of the
6709 loop. */
6710 if (reg_set_between_p (x, insn, loop_start))
6711 return 0;
6713 return 1;
6716 /* Scan X for memory refs and check each memory address
6717 as a possible giv. INSN is the insn whose pattern X comes from.
6718 NOT_EVERY_ITERATION is 1 if the insn might not be executed during
6719 every loop iteration. MAYBE_MULTIPLE is 1 if the insn might be executed
6720 more than once in each loop iteration. */
6722 static void
6723 find_mem_givs (const struct loop *loop, rtx x, rtx insn,
6724 int not_every_iteration, int maybe_multiple)
6726 int i, j;
6727 enum rtx_code code;
6728 const char *fmt;
6730 if (x == 0)
6731 return;
6733 code = GET_CODE (x);
6734 switch (code)
6736 case REG:
6737 case CONST_INT:
6738 case CONST:
6739 case CONST_DOUBLE:
6740 case SYMBOL_REF:
6741 case LABEL_REF:
6742 case PC:
6743 case CC0:
6744 case ADDR_VEC:
6745 case ADDR_DIFF_VEC:
6746 case USE:
6747 case CLOBBER:
6748 return;
6750 case MEM:
6752 rtx src_reg;
6753 rtx add_val;
6754 rtx mult_val;
6755 rtx ext_val;
6756 int benefit;
6758 /* This code used to disable creating GIVs with mult_val == 1 and
6759 add_val == 0. However, this leads to lost optimizations when
6760 it comes time to combine a set of related DEST_ADDR GIVs, since
6761 this one would not be seen. */
6763 if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
6764 &mult_val, &ext_val, 1, &benefit,
6765 GET_MODE (x)))
6767 /* Found one; record it. */
6768 struct induction *v = xmalloc (sizeof (struct induction));
6770 record_giv (loop, v, insn, src_reg, addr_placeholder, mult_val,
6771 add_val, ext_val, benefit, DEST_ADDR,
6772 not_every_iteration, maybe_multiple, &XEXP (x, 0));
6774 v->mem = x;
6777 return;
6779 default:
6780 break;
6783 /* Recursively scan the subexpressions for other mem refs. */
6785 fmt = GET_RTX_FORMAT (code);
6786 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6787 if (fmt[i] == 'e')
6788 find_mem_givs (loop, XEXP (x, i), insn, not_every_iteration,
6789 maybe_multiple);
6790 else if (fmt[i] == 'E')
6791 for (j = 0; j < XVECLEN (x, i); j++)
6792 find_mem_givs (loop, XVECEXP (x, i, j), insn, not_every_iteration,
6793 maybe_multiple);
6796 /* Fill in the data about one biv update.
6797 V is the `struct induction' in which we record the biv. (It is
6798 allocated by the caller, with alloca.)
6799 INSN is the insn that sets it.
6800 DEST_REG is the biv's reg.
6802 MULT_VAL is const1_rtx if the biv is being incremented here, in which case
6803 INC_VAL is the increment. Otherwise, MULT_VAL is const0_rtx and the biv is
6804 being set to INC_VAL.
6806 NOT_EVERY_ITERATION is nonzero if this biv update is not know to be
6807 executed every iteration; MAYBE_MULTIPLE is nonzero if this biv update
6808 can be executed more than once per iteration. If MAYBE_MULTIPLE
6809 and NOT_EVERY_ITERATION are both zero, we know that the biv update is
6810 executed exactly once per iteration. */
6812 static void
6813 record_biv (struct loop *loop, struct induction *v, rtx insn, rtx dest_reg,
6814 rtx inc_val, rtx mult_val, rtx *location,
6815 int not_every_iteration, int maybe_multiple)
6817 struct loop_ivs *ivs = LOOP_IVS (loop);
6818 struct iv_class *bl;
6820 v->insn = insn;
6821 v->src_reg = dest_reg;
6822 v->dest_reg = dest_reg;
6823 v->mult_val = mult_val;
6824 v->add_val = inc_val;
6825 v->ext_dependent = NULL_RTX;
6826 v->location = location;
6827 v->mode = GET_MODE (dest_reg);
6828 v->always_computable = ! not_every_iteration;
6829 v->always_executed = ! not_every_iteration;
6830 v->maybe_multiple = maybe_multiple;
6831 v->same = 0;
6833 /* Add this to the reg's iv_class, creating a class
6834 if this is the first incrementation of the reg. */
6836 bl = REG_IV_CLASS (ivs, REGNO (dest_reg));
6837 if (bl == 0)
6839 /* Create and initialize new iv_class. */
6841 bl = xmalloc (sizeof (struct iv_class));
6843 bl->regno = REGNO (dest_reg);
6844 bl->biv = 0;
6845 bl->giv = 0;
6846 bl->biv_count = 0;
6847 bl->giv_count = 0;
6849 /* Set initial value to the reg itself. */
6850 bl->initial_value = dest_reg;
6851 bl->final_value = 0;
6852 /* We haven't seen the initializing insn yet. */
6853 bl->init_insn = 0;
6854 bl->init_set = 0;
6855 bl->initial_test = 0;
6856 bl->incremented = 0;
6857 bl->eliminable = 0;
6858 bl->nonneg = 0;
6859 bl->reversed = 0;
6860 bl->total_benefit = 0;
6862 /* Add this class to ivs->list. */
6863 bl->next = ivs->list;
6864 ivs->list = bl;
6866 /* Put it in the array of biv register classes. */
6867 REG_IV_CLASS (ivs, REGNO (dest_reg)) = bl;
6869 else
6871 /* Check if location is the same as a previous one. */
6872 struct induction *induction;
6873 for (induction = bl->biv; induction; induction = induction->next_iv)
6874 if (location == induction->location)
6876 v->same = induction;
6877 break;
6881 /* Update IV_CLASS entry for this biv. */
6882 v->next_iv = bl->biv;
6883 bl->biv = v;
6884 bl->biv_count++;
6885 if (mult_val == const1_rtx)
6886 bl->incremented = 1;
6888 if (loop_dump_stream)
6889 loop_biv_dump (v, loop_dump_stream, 0);
6892 /* Fill in the data about one giv.
6893 V is the `struct induction' in which we record the giv. (It is
6894 allocated by the caller, with alloca.)
6895 INSN is the insn that sets it.
6896 BENEFIT estimates the savings from deleting this insn.
6897 TYPE is DEST_REG or DEST_ADDR; it says whether the giv is computed
6898 into a register or is used as a memory address.
6900 SRC_REG is the biv reg which the giv is computed from.
6901 DEST_REG is the giv's reg (if the giv is stored in a reg).
6902 MULT_VAL and ADD_VAL are the coefficients used to compute the giv.
6903 LOCATION points to the place where this giv's value appears in INSN. */
6905 static void
6906 record_giv (const struct loop *loop, struct induction *v, rtx insn,
6907 rtx src_reg, rtx dest_reg, rtx mult_val, rtx add_val,
6908 rtx ext_val, int benefit, enum g_types type,
6909 int not_every_iteration, int maybe_multiple, rtx *location)
6911 struct loop_ivs *ivs = LOOP_IVS (loop);
6912 struct induction *b;
6913 struct iv_class *bl;
6914 rtx set = single_set (insn);
6915 rtx temp;
6917 /* Attempt to prove constantness of the values. Don't let simplify_rtx
6918 undo the MULT canonicalization that we performed earlier. */
6919 temp = simplify_rtx (add_val);
6920 if (temp
6921 && ! (GET_CODE (add_val) == MULT
6922 && GET_CODE (temp) == ASHIFT))
6923 add_val = temp;
6925 v->insn = insn;
6926 v->src_reg = src_reg;
6927 v->giv_type = type;
6928 v->dest_reg = dest_reg;
6929 v->mult_val = mult_val;
6930 v->add_val = add_val;
6931 v->ext_dependent = ext_val;
6932 v->benefit = benefit;
6933 v->location = location;
6934 v->cant_derive = 0;
6935 v->combined_with = 0;
6936 v->maybe_multiple = maybe_multiple;
6937 v->maybe_dead = 0;
6938 v->derive_adjustment = 0;
6939 v->same = 0;
6940 v->ignore = 0;
6941 v->new_reg = 0;
6942 v->final_value = 0;
6943 v->same_insn = 0;
6944 v->auto_inc_opt = 0;
6945 v->shared = 0;
6947 /* The v->always_computable field is used in update_giv_derive, to
6948 determine whether a giv can be used to derive another giv. For a
6949 DEST_REG giv, INSN computes a new value for the giv, so its value
6950 isn't computable if INSN insn't executed every iteration.
6951 However, for a DEST_ADDR giv, INSN merely uses the value of the giv;
6952 it does not compute a new value. Hence the value is always computable
6953 regardless of whether INSN is executed each iteration. */
6955 if (type == DEST_ADDR)
6956 v->always_computable = 1;
6957 else
6958 v->always_computable = ! not_every_iteration;
6960 v->always_executed = ! not_every_iteration;
6962 if (type == DEST_ADDR)
6964 v->mode = GET_MODE (*location);
6965 v->lifetime = 1;
6967 else /* type == DEST_REG */
6969 v->mode = GET_MODE (SET_DEST (set));
6971 v->lifetime = LOOP_REG_LIFETIME (loop, REGNO (dest_reg));
6973 /* If the lifetime is zero, it means that this register is
6974 really a dead store. So mark this as a giv that can be
6975 ignored. This will not prevent the biv from being eliminated. */
6976 if (v->lifetime == 0)
6977 v->ignore = 1;
6979 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
6980 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
6983 /* Add the giv to the class of givs computed from one biv. */
6985 bl = REG_IV_CLASS (ivs, REGNO (src_reg));
6986 if (bl)
6988 v->next_iv = bl->giv;
6989 bl->giv = v;
6990 /* Don't count DEST_ADDR. This is supposed to count the number of
6991 insns that calculate givs. */
6992 if (type == DEST_REG)
6993 bl->giv_count++;
6994 bl->total_benefit += benefit;
6996 else
6997 /* Fatal error, biv missing for this giv? */
6998 abort ();
7000 if (type == DEST_ADDR)
7002 v->replaceable = 1;
7003 v->not_replaceable = 0;
7005 else
7007 /* The giv can be replaced outright by the reduced register only if all
7008 of the following conditions are true:
7009 - the insn that sets the giv is always executed on any iteration
7010 on which the giv is used at all
7011 (there are two ways to deduce this:
7012 either the insn is executed on every iteration,
7013 or all uses follow that insn in the same basic block),
7014 - the giv is not used outside the loop
7015 - no assignments to the biv occur during the giv's lifetime. */
7017 if (REGNO_FIRST_UID (REGNO (dest_reg)) == INSN_UID (insn)
7018 /* Previous line always fails if INSN was moved by loop opt. */
7019 && REGNO_LAST_LUID (REGNO (dest_reg))
7020 < INSN_LUID (loop->end)
7021 && (! not_every_iteration
7022 || last_use_this_basic_block (dest_reg, insn)))
7024 /* Now check that there are no assignments to the biv within the
7025 giv's lifetime. This requires two separate checks. */
7027 /* Check each biv update, and fail if any are between the first
7028 and last use of the giv.
7030 If this loop contains an inner loop that was unrolled, then
7031 the insn modifying the biv may have been emitted by the loop
7032 unrolling code, and hence does not have a valid luid. Just
7033 mark the biv as not replaceable in this case. It is not very
7034 useful as a biv, because it is used in two different loops.
7035 It is very unlikely that we would be able to optimize the giv
7036 using this biv anyways. */
7038 v->replaceable = 1;
7039 v->not_replaceable = 0;
7040 for (b = bl->biv; b; b = b->next_iv)
7042 if (INSN_UID (b->insn) >= max_uid_for_loop
7043 || ((INSN_LUID (b->insn)
7044 >= REGNO_FIRST_LUID (REGNO (dest_reg)))
7045 && (INSN_LUID (b->insn)
7046 <= REGNO_LAST_LUID (REGNO (dest_reg)))))
7048 v->replaceable = 0;
7049 v->not_replaceable = 1;
7050 break;
7054 /* If there are any backwards branches that go from after the
7055 biv update to before it, then this giv is not replaceable. */
7056 if (v->replaceable)
7057 for (b = bl->biv; b; b = b->next_iv)
7058 if (back_branch_in_range_p (loop, b->insn))
7060 v->replaceable = 0;
7061 v->not_replaceable = 1;
7062 break;
7065 else
7067 /* May still be replaceable, we don't have enough info here to
7068 decide. */
7069 v->replaceable = 0;
7070 v->not_replaceable = 0;
7074 /* Record whether the add_val contains a const_int, for later use by
7075 combine_givs. */
7077 rtx tem = add_val;
7079 v->no_const_addval = 1;
7080 if (tem == const0_rtx)
7082 else if (CONSTANT_P (add_val))
7083 v->no_const_addval = 0;
7084 if (GET_CODE (tem) == PLUS)
7086 while (1)
7088 if (GET_CODE (XEXP (tem, 0)) == PLUS)
7089 tem = XEXP (tem, 0);
7090 else if (GET_CODE (XEXP (tem, 1)) == PLUS)
7091 tem = XEXP (tem, 1);
7092 else
7093 break;
7095 if (CONSTANT_P (XEXP (tem, 1)))
7096 v->no_const_addval = 0;
7100 if (loop_dump_stream)
7101 loop_giv_dump (v, loop_dump_stream, 0);
7104 /* Try to calculate the final value of the giv, the value it will have at
7105 the end of the loop. If we can do it, return that value. */
7107 static rtx
7108 final_giv_value (const struct loop *loop, struct induction *v)
7110 struct loop_ivs *ivs = LOOP_IVS (loop);
7111 struct iv_class *bl;
7112 rtx insn;
7113 rtx increment, tem;
7114 rtx seq;
7115 rtx loop_end = loop->end;
7116 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
7118 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
7120 /* The final value for givs which depend on reversed bivs must be calculated
7121 differently than for ordinary givs. In this case, there is already an
7122 insn after the loop which sets this giv's final value (if necessary),
7123 and there are no other loop exits, so we can return any value. */
7124 if (bl->reversed)
7126 if (loop_dump_stream)
7127 fprintf (loop_dump_stream,
7128 "Final giv value for %d, depends on reversed biv\n",
7129 REGNO (v->dest_reg));
7130 return const0_rtx;
7133 /* Try to calculate the final value as a function of the biv it depends
7134 upon. The only exit from the loop must be the fall through at the bottom
7135 and the insn that sets the giv must be executed on every iteration
7136 (otherwise the giv may not have its final value when the loop exits). */
7138 /* ??? Can calculate the final giv value by subtracting off the
7139 extra biv increments times the giv's mult_val. The loop must have
7140 only one exit for this to work, but the loop iterations does not need
7141 to be known. */
7143 if (n_iterations != 0
7144 && ! loop->exit_count
7145 && v->always_executed)
7147 /* ?? It is tempting to use the biv's value here since these insns will
7148 be put after the loop, and hence the biv will have its final value
7149 then. However, this fails if the biv is subsequently eliminated.
7150 Perhaps determine whether biv's are eliminable before trying to
7151 determine whether giv's are replaceable so that we can use the
7152 biv value here if it is not eliminable. */
7154 /* We are emitting code after the end of the loop, so we must make
7155 sure that bl->initial_value is still valid then. It will still
7156 be valid if it is invariant. */
7158 increment = biv_total_increment (bl);
7160 if (increment && loop_invariant_p (loop, increment)
7161 && loop_invariant_p (loop, bl->initial_value))
7163 /* Can calculate the loop exit value of its biv as
7164 (n_iterations * increment) + initial_value */
7166 /* The loop exit value of the giv is then
7167 (final_biv_value - extra increments) * mult_val + add_val.
7168 The extra increments are any increments to the biv which
7169 occur in the loop after the giv's value is calculated.
7170 We must search from the insn that sets the giv to the end
7171 of the loop to calculate this value. */
7173 /* Put the final biv value in tem. */
7174 tem = gen_reg_rtx (v->mode);
7175 record_base_value (REGNO (tem), bl->biv->add_val, 0);
7176 loop_iv_add_mult_sink (loop, extend_value_for_giv (v, increment),
7177 GEN_INT (n_iterations),
7178 extend_value_for_giv (v, bl->initial_value),
7179 tem);
7181 /* Subtract off extra increments as we find them. */
7182 for (insn = NEXT_INSN (v->insn); insn != loop_end;
7183 insn = NEXT_INSN (insn))
7185 struct induction *biv;
7187 for (biv = bl->biv; biv; biv = biv->next_iv)
7188 if (biv->insn == insn)
7190 start_sequence ();
7191 tem = expand_simple_binop (GET_MODE (tem), MINUS, tem,
7192 biv->add_val, NULL_RTX, 0,
7193 OPTAB_LIB_WIDEN);
7194 seq = get_insns ();
7195 end_sequence ();
7196 loop_insn_sink (loop, seq);
7200 /* Now calculate the giv's final value. */
7201 loop_iv_add_mult_sink (loop, tem, v->mult_val, v->add_val, tem);
7203 if (loop_dump_stream)
7204 fprintf (loop_dump_stream,
7205 "Final giv value for %d, calc from biv's value.\n",
7206 REGNO (v->dest_reg));
7208 return tem;
7212 /* Replaceable giv's should never reach here. */
7213 if (v->replaceable)
7214 abort ();
7216 /* Check to see if the biv is dead at all loop exits. */
7217 if (reg_dead_after_loop (loop, v->dest_reg))
7219 if (loop_dump_stream)
7220 fprintf (loop_dump_stream,
7221 "Final giv value for %d, giv dead after loop exit.\n",
7222 REGNO (v->dest_reg));
7224 return const0_rtx;
7227 return 0;
7230 /* All this does is determine whether a giv can be made replaceable because
7231 its final value can be calculated. This code can not be part of record_giv
7232 above, because final_giv_value requires that the number of loop iterations
7233 be known, and that can not be accurately calculated until after all givs
7234 have been identified. */
7236 static void
7237 check_final_value (const struct loop *loop, struct induction *v)
7239 rtx final_value = 0;
7241 /* DEST_ADDR givs will never reach here, because they are always marked
7242 replaceable above in record_giv. */
7244 /* The giv can be replaced outright by the reduced register only if all
7245 of the following conditions are true:
7246 - the insn that sets the giv is always executed on any iteration
7247 on which the giv is used at all
7248 (there are two ways to deduce this:
7249 either the insn is executed on every iteration,
7250 or all uses follow that insn in the same basic block),
7251 - its final value can be calculated (this condition is different
7252 than the one above in record_giv)
7253 - it's not used before the it's set
7254 - no assignments to the biv occur during the giv's lifetime. */
7256 #if 0
7257 /* This is only called now when replaceable is known to be false. */
7258 /* Clear replaceable, so that it won't confuse final_giv_value. */
7259 v->replaceable = 0;
7260 #endif
7262 if ((final_value = final_giv_value (loop, v))
7263 && (v->always_executed
7264 || last_use_this_basic_block (v->dest_reg, v->insn)))
7266 int biv_increment_seen = 0, before_giv_insn = 0;
7267 rtx p = v->insn;
7268 rtx last_giv_use;
7270 v->replaceable = 1;
7271 v->not_replaceable = 0;
7273 /* When trying to determine whether or not a biv increment occurs
7274 during the lifetime of the giv, we can ignore uses of the variable
7275 outside the loop because final_value is true. Hence we can not
7276 use regno_last_uid and regno_first_uid as above in record_giv. */
7278 /* Search the loop to determine whether any assignments to the
7279 biv occur during the giv's lifetime. Start with the insn
7280 that sets the giv, and search around the loop until we come
7281 back to that insn again.
7283 Also fail if there is a jump within the giv's lifetime that jumps
7284 to somewhere outside the lifetime but still within the loop. This
7285 catches spaghetti code where the execution order is not linear, and
7286 hence the above test fails. Here we assume that the giv lifetime
7287 does not extend from one iteration of the loop to the next, so as
7288 to make the test easier. Since the lifetime isn't known yet,
7289 this requires two loops. See also record_giv above. */
7291 last_giv_use = v->insn;
7293 while (1)
7295 p = NEXT_INSN (p);
7296 if (p == loop->end)
7298 before_giv_insn = 1;
7299 p = NEXT_INSN (loop->start);
7301 if (p == v->insn)
7302 break;
7304 if (INSN_P (p))
7306 /* It is possible for the BIV increment to use the GIV if we
7307 have a cycle. Thus we must be sure to check each insn for
7308 both BIV and GIV uses, and we must check for BIV uses
7309 first. */
7311 if (! biv_increment_seen
7312 && reg_set_p (v->src_reg, PATTERN (p)))
7313 biv_increment_seen = 1;
7315 if (reg_mentioned_p (v->dest_reg, PATTERN (p)))
7317 if (biv_increment_seen || before_giv_insn)
7319 v->replaceable = 0;
7320 v->not_replaceable = 1;
7321 break;
7323 last_giv_use = p;
7328 /* Now that the lifetime of the giv is known, check for branches
7329 from within the lifetime to outside the lifetime if it is still
7330 replaceable. */
7332 if (v->replaceable)
7334 p = v->insn;
7335 while (1)
7337 p = NEXT_INSN (p);
7338 if (p == loop->end)
7339 p = NEXT_INSN (loop->start);
7340 if (p == last_giv_use)
7341 break;
7343 if (JUMP_P (p) && JUMP_LABEL (p)
7344 && LABEL_NAME (JUMP_LABEL (p))
7345 && ((loop_insn_first_p (JUMP_LABEL (p), v->insn)
7346 && loop_insn_first_p (loop->start, JUMP_LABEL (p)))
7347 || (loop_insn_first_p (last_giv_use, JUMP_LABEL (p))
7348 && loop_insn_first_p (JUMP_LABEL (p), loop->end))))
7350 v->replaceable = 0;
7351 v->not_replaceable = 1;
7353 if (loop_dump_stream)
7354 fprintf (loop_dump_stream,
7355 "Found branch outside giv lifetime.\n");
7357 break;
7362 /* If it is replaceable, then save the final value. */
7363 if (v->replaceable)
7364 v->final_value = final_value;
7367 if (loop_dump_stream && v->replaceable)
7368 fprintf (loop_dump_stream, "Insn %d: giv reg %d final_value replaceable\n",
7369 INSN_UID (v->insn), REGNO (v->dest_reg));
7372 /* Update the status of whether a giv can derive other givs.
7374 We need to do something special if there is or may be an update to the biv
7375 between the time the giv is defined and the time it is used to derive
7376 another giv.
7378 In addition, a giv that is only conditionally set is not allowed to
7379 derive another giv once a label has been passed.
7381 The cases we look at are when a label or an update to a biv is passed. */
7383 static void
7384 update_giv_derive (const struct loop *loop, rtx p)
7386 struct loop_ivs *ivs = LOOP_IVS (loop);
7387 struct iv_class *bl;
7388 struct induction *biv, *giv;
7389 rtx tem;
7390 int dummy;
7392 /* Search all IV classes, then all bivs, and finally all givs.
7394 There are three cases we are concerned with. First we have the situation
7395 of a giv that is only updated conditionally. In that case, it may not
7396 derive any givs after a label is passed.
7398 The second case is when a biv update occurs, or may occur, after the
7399 definition of a giv. For certain biv updates (see below) that are
7400 known to occur between the giv definition and use, we can adjust the
7401 giv definition. For others, or when the biv update is conditional,
7402 we must prevent the giv from deriving any other givs. There are two
7403 sub-cases within this case.
7405 If this is a label, we are concerned with any biv update that is done
7406 conditionally, since it may be done after the giv is defined followed by
7407 a branch here (actually, we need to pass both a jump and a label, but
7408 this extra tracking doesn't seem worth it).
7410 If this is a jump, we are concerned about any biv update that may be
7411 executed multiple times. We are actually only concerned about
7412 backward jumps, but it is probably not worth performing the test
7413 on the jump again here.
7415 If this is a biv update, we must adjust the giv status to show that a
7416 subsequent biv update was performed. If this adjustment cannot be done,
7417 the giv cannot derive further givs. */
7419 for (bl = ivs->list; bl; bl = bl->next)
7420 for (biv = bl->biv; biv; biv = biv->next_iv)
7421 if (LABEL_P (p) || JUMP_P (p)
7422 || biv->insn == p)
7424 /* Skip if location is the same as a previous one. */
7425 if (biv->same)
7426 continue;
7428 for (giv = bl->giv; giv; giv = giv->next_iv)
7430 /* If cant_derive is already true, there is no point in
7431 checking all of these conditions again. */
7432 if (giv->cant_derive)
7433 continue;
7435 /* If this giv is conditionally set and we have passed a label,
7436 it cannot derive anything. */
7437 if (LABEL_P (p) && ! giv->always_computable)
7438 giv->cant_derive = 1;
7440 /* Skip givs that have mult_val == 0, since
7441 they are really invariants. Also skip those that are
7442 replaceable, since we know their lifetime doesn't contain
7443 any biv update. */
7444 else if (giv->mult_val == const0_rtx || giv->replaceable)
7445 continue;
7447 /* The only way we can allow this giv to derive another
7448 is if this is a biv increment and we can form the product
7449 of biv->add_val and giv->mult_val. In this case, we will
7450 be able to compute a compensation. */
7451 else if (biv->insn == p)
7453 rtx ext_val_dummy;
7455 tem = 0;
7456 if (biv->mult_val == const1_rtx)
7457 tem = simplify_giv_expr (loop,
7458 gen_rtx_MULT (giv->mode,
7459 biv->add_val,
7460 giv->mult_val),
7461 &ext_val_dummy, &dummy);
7463 if (tem && giv->derive_adjustment)
7464 tem = simplify_giv_expr
7465 (loop,
7466 gen_rtx_PLUS (giv->mode, tem, giv->derive_adjustment),
7467 &ext_val_dummy, &dummy);
7469 if (tem)
7470 giv->derive_adjustment = tem;
7471 else
7472 giv->cant_derive = 1;
7474 else if ((LABEL_P (p) && ! biv->always_computable)
7475 || (JUMP_P (p) && biv->maybe_multiple))
7476 giv->cant_derive = 1;
7481 /* Check whether an insn is an increment legitimate for a basic induction var.
7482 X is the source of insn P, or a part of it.
7483 MODE is the mode in which X should be interpreted.
7485 DEST_REG is the putative biv, also the destination of the insn.
7486 We accept patterns of these forms:
7487 REG = REG + INVARIANT (includes REG = REG - CONSTANT)
7488 REG = INVARIANT + REG
7490 If X is suitable, we return 1, set *MULT_VAL to CONST1_RTX,
7491 store the additive term into *INC_VAL, and store the place where
7492 we found the additive term into *LOCATION.
7494 If X is an assignment of an invariant into DEST_REG, we set
7495 *MULT_VAL to CONST0_RTX, and store the invariant into *INC_VAL.
7497 We also want to detect a BIV when it corresponds to a variable
7498 whose mode was promoted. In that case, an increment
7499 of the variable may be a PLUS that adds a SUBREG of that variable to
7500 an invariant and then sign- or zero-extends the result of the PLUS
7501 into the variable.
7503 Most GIVs in such cases will be in the promoted mode, since that is the
7504 probably the natural computation mode (and almost certainly the mode
7505 used for addresses) on the machine. So we view the pseudo-reg containing
7506 the variable as the BIV, as if it were simply incremented.
7508 Note that treating the entire pseudo as a BIV will result in making
7509 simple increments to any GIVs based on it. However, if the variable
7510 overflows in its declared mode but not its promoted mode, the result will
7511 be incorrect. This is acceptable if the variable is signed, since
7512 overflows in such cases are undefined, but not if it is unsigned, since
7513 those overflows are defined. So we only check for SIGN_EXTEND and
7514 not ZERO_EXTEND.
7516 If we cannot find a biv, we return 0. */
7518 static int
7519 basic_induction_var (const struct loop *loop, rtx x, enum machine_mode mode,
7520 rtx dest_reg, rtx p, rtx *inc_val, rtx *mult_val,
7521 rtx **location)
7523 enum rtx_code code;
7524 rtx *argp, arg;
7525 rtx insn, set = 0, last, inc;
7527 code = GET_CODE (x);
7528 *location = NULL;
7529 switch (code)
7531 case PLUS:
7532 if (rtx_equal_p (XEXP (x, 0), dest_reg)
7533 || (GET_CODE (XEXP (x, 0)) == SUBREG
7534 && SUBREG_PROMOTED_VAR_P (XEXP (x, 0))
7535 && SUBREG_REG (XEXP (x, 0)) == dest_reg))
7537 argp = &XEXP (x, 1);
7539 else if (rtx_equal_p (XEXP (x, 1), dest_reg)
7540 || (GET_CODE (XEXP (x, 1)) == SUBREG
7541 && SUBREG_PROMOTED_VAR_P (XEXP (x, 1))
7542 && SUBREG_REG (XEXP (x, 1)) == dest_reg))
7544 argp = &XEXP (x, 0);
7546 else
7547 return 0;
7549 arg = *argp;
7550 if (loop_invariant_p (loop, arg) != 1)
7551 return 0;
7553 /* convert_modes can emit new instructions, e.g. when arg is a loop
7554 invariant MEM and dest_reg has a different mode.
7555 These instructions would be emitted after the end of the function
7556 and then *inc_val would be an uninitialized pseudo.
7557 Detect this and bail in this case.
7558 Other alternatives to solve this can be introducing a convert_modes
7559 variant which is allowed to fail but not allowed to emit new
7560 instructions, emit these instructions before loop start and let
7561 it be garbage collected if *inc_val is never used or saving the
7562 *inc_val initialization sequence generated here and when *inc_val
7563 is going to be actually used, emit it at some suitable place. */
7564 last = get_last_insn ();
7565 inc = convert_modes (GET_MODE (dest_reg), GET_MODE (x), arg, 0);
7566 if (get_last_insn () != last)
7568 delete_insns_since (last);
7569 return 0;
7572 *inc_val = inc;
7573 *mult_val = const1_rtx;
7574 *location = argp;
7575 return 1;
7577 case SUBREG:
7578 /* If what's inside the SUBREG is a BIV, then the SUBREG. This will
7579 handle addition of promoted variables.
7580 ??? The comment at the start of this function is wrong: promoted
7581 variable increments don't look like it says they do. */
7582 return basic_induction_var (loop, SUBREG_REG (x),
7583 GET_MODE (SUBREG_REG (x)),
7584 dest_reg, p, inc_val, mult_val, location);
7586 case REG:
7587 /* If this register is assigned in a previous insn, look at its
7588 source, but don't go outside the loop or past a label. */
7590 /* If this sets a register to itself, we would repeat any previous
7591 biv increment if we applied this strategy blindly. */
7592 if (rtx_equal_p (dest_reg, x))
7593 return 0;
7595 insn = p;
7596 while (1)
7598 rtx dest;
7601 insn = PREV_INSN (insn);
7603 while (insn && NOTE_P (insn)
7604 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
7606 if (!insn)
7607 break;
7608 set = single_set (insn);
7609 if (set == 0)
7610 break;
7611 dest = SET_DEST (set);
7612 if (dest == x
7613 || (GET_CODE (dest) == SUBREG
7614 && (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
7615 && (GET_MODE_CLASS (GET_MODE (dest)) == MODE_INT)
7616 && SUBREG_REG (dest) == x))
7617 return basic_induction_var (loop, SET_SRC (set),
7618 (GET_MODE (SET_SRC (set)) == VOIDmode
7619 ? GET_MODE (x)
7620 : GET_MODE (SET_SRC (set))),
7621 dest_reg, insn,
7622 inc_val, mult_val, location);
7624 while (GET_CODE (dest) == SUBREG
7625 || GET_CODE (dest) == ZERO_EXTRACT
7626 || GET_CODE (dest) == STRICT_LOW_PART)
7627 dest = XEXP (dest, 0);
7628 if (dest == x)
7629 break;
7631 /* Fall through. */
7633 /* Can accept constant setting of biv only when inside inner most loop.
7634 Otherwise, a biv of an inner loop may be incorrectly recognized
7635 as a biv of the outer loop,
7636 causing code to be moved INTO the inner loop. */
7637 case MEM:
7638 if (loop_invariant_p (loop, x) != 1)
7639 return 0;
7640 case CONST_INT:
7641 case SYMBOL_REF:
7642 case CONST:
7643 /* convert_modes aborts if we try to convert to or from CCmode, so just
7644 exclude that case. It is very unlikely that a condition code value
7645 would be a useful iterator anyways. convert_modes aborts if we try to
7646 convert a float mode to non-float or vice versa too. */
7647 if (loop->level == 1
7648 && GET_MODE_CLASS (mode) == GET_MODE_CLASS (GET_MODE (dest_reg))
7649 && GET_MODE_CLASS (mode) != MODE_CC)
7651 /* Possible bug here? Perhaps we don't know the mode of X. */
7652 last = get_last_insn ();
7653 inc = convert_modes (GET_MODE (dest_reg), mode, x, 0);
7654 if (get_last_insn () != last)
7656 delete_insns_since (last);
7657 return 0;
7660 *inc_val = inc;
7661 *mult_val = const0_rtx;
7662 return 1;
7664 else
7665 return 0;
7667 case SIGN_EXTEND:
7668 /* Ignore this BIV if signed arithmetic overflow is defined. */
7669 if (flag_wrapv)
7670 return 0;
7671 return basic_induction_var (loop, XEXP (x, 0), GET_MODE (XEXP (x, 0)),
7672 dest_reg, p, inc_val, mult_val, location);
7674 case ASHIFTRT:
7675 /* Similar, since this can be a sign extension. */
7676 for (insn = PREV_INSN (p);
7677 (insn && NOTE_P (insn)
7678 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
7679 insn = PREV_INSN (insn))
7682 if (insn)
7683 set = single_set (insn);
7685 if (! rtx_equal_p (dest_reg, XEXP (x, 0))
7686 && set && SET_DEST (set) == XEXP (x, 0)
7687 && GET_CODE (XEXP (x, 1)) == CONST_INT
7688 && INTVAL (XEXP (x, 1)) >= 0
7689 && GET_CODE (SET_SRC (set)) == ASHIFT
7690 && XEXP (x, 1) == XEXP (SET_SRC (set), 1))
7691 return basic_induction_var (loop, XEXP (SET_SRC (set), 0),
7692 GET_MODE (XEXP (x, 0)),
7693 dest_reg, insn, inc_val, mult_val,
7694 location);
7695 return 0;
7697 default:
7698 return 0;
7702 /* A general induction variable (giv) is any quantity that is a linear
7703 function of a basic induction variable,
7704 i.e. giv = biv * mult_val + add_val.
7705 The coefficients can be any loop invariant quantity.
7706 A giv need not be computed directly from the biv;
7707 it can be computed by way of other givs. */
7709 /* Determine whether X computes a giv.
7710 If it does, return a nonzero value
7711 which is the benefit from eliminating the computation of X;
7712 set *SRC_REG to the register of the biv that it is computed from;
7713 set *ADD_VAL and *MULT_VAL to the coefficients,
7714 such that the value of X is biv * mult + add; */
7716 static int
7717 general_induction_var (const struct loop *loop, rtx x, rtx *src_reg,
7718 rtx *add_val, rtx *mult_val, rtx *ext_val,
7719 int is_addr, int *pbenefit,
7720 enum machine_mode addr_mode)
7722 struct loop_ivs *ivs = LOOP_IVS (loop);
7723 rtx orig_x = x;
7725 /* If this is an invariant, forget it, it isn't a giv. */
7726 if (loop_invariant_p (loop, x) == 1)
7727 return 0;
7729 *pbenefit = 0;
7730 *ext_val = NULL_RTX;
7731 x = simplify_giv_expr (loop, x, ext_val, pbenefit);
7732 if (x == 0)
7733 return 0;
7735 switch (GET_CODE (x))
7737 case USE:
7738 case CONST_INT:
7739 /* Since this is now an invariant and wasn't before, it must be a giv
7740 with MULT_VAL == 0. It doesn't matter which BIV we associate this
7741 with. */
7742 *src_reg = ivs->list->biv->dest_reg;
7743 *mult_val = const0_rtx;
7744 *add_val = x;
7745 break;
7747 case REG:
7748 /* This is equivalent to a BIV. */
7749 *src_reg = x;
7750 *mult_val = const1_rtx;
7751 *add_val = const0_rtx;
7752 break;
7754 case PLUS:
7755 /* Either (plus (biv) (invar)) or
7756 (plus (mult (biv) (invar_1)) (invar_2)). */
7757 if (GET_CODE (XEXP (x, 0)) == MULT)
7759 *src_reg = XEXP (XEXP (x, 0), 0);
7760 *mult_val = XEXP (XEXP (x, 0), 1);
7762 else
7764 *src_reg = XEXP (x, 0);
7765 *mult_val = const1_rtx;
7767 *add_val = XEXP (x, 1);
7768 break;
7770 case MULT:
7771 /* ADD_VAL is zero. */
7772 *src_reg = XEXP (x, 0);
7773 *mult_val = XEXP (x, 1);
7774 *add_val = const0_rtx;
7775 break;
7777 default:
7778 abort ();
7781 /* Remove any enclosing USE from ADD_VAL and MULT_VAL (there will be
7782 unless they are CONST_INT). */
7783 if (GET_CODE (*add_val) == USE)
7784 *add_val = XEXP (*add_val, 0);
7785 if (GET_CODE (*mult_val) == USE)
7786 *mult_val = XEXP (*mult_val, 0);
7788 if (is_addr)
7789 *pbenefit += address_cost (orig_x, addr_mode) - reg_address_cost;
7790 else
7791 *pbenefit += rtx_cost (orig_x, SET);
7793 /* Always return true if this is a giv so it will be detected as such,
7794 even if the benefit is zero or negative. This allows elimination
7795 of bivs that might otherwise not be eliminated. */
7796 return 1;
7799 /* Given an expression, X, try to form it as a linear function of a biv.
7800 We will canonicalize it to be of the form
7801 (plus (mult (BIV) (invar_1))
7802 (invar_2))
7803 with possible degeneracies.
7805 The invariant expressions must each be of a form that can be used as a
7806 machine operand. We surround then with a USE rtx (a hack, but localized
7807 and certainly unambiguous!) if not a CONST_INT for simplicity in this
7808 routine; it is the caller's responsibility to strip them.
7810 If no such canonicalization is possible (i.e., two biv's are used or an
7811 expression that is neither invariant nor a biv or giv), this routine
7812 returns 0.
7814 For a nonzero return, the result will have a code of CONST_INT, USE,
7815 REG (for a BIV), PLUS, or MULT. No other codes will occur.
7817 *BENEFIT will be incremented by the benefit of any sub-giv encountered. */
7819 static rtx sge_plus (enum machine_mode, rtx, rtx);
7820 static rtx sge_plus_constant (rtx, rtx);
7822 static rtx
7823 simplify_giv_expr (const struct loop *loop, rtx x, rtx *ext_val, int *benefit)
7825 struct loop_ivs *ivs = LOOP_IVS (loop);
7826 struct loop_regs *regs = LOOP_REGS (loop);
7827 enum machine_mode mode = GET_MODE (x);
7828 rtx arg0, arg1;
7829 rtx tem;
7831 /* If this is not an integer mode, or if we cannot do arithmetic in this
7832 mode, this can't be a giv. */
7833 if (mode != VOIDmode
7834 && (GET_MODE_CLASS (mode) != MODE_INT
7835 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT))
7836 return NULL_RTX;
7838 switch (GET_CODE (x))
7840 case PLUS:
7841 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
7842 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
7843 if (arg0 == 0 || arg1 == 0)
7844 return NULL_RTX;
7846 /* Put constant last, CONST_INT last if both constant. */
7847 if ((GET_CODE (arg0) == USE
7848 || GET_CODE (arg0) == CONST_INT)
7849 && ! ((GET_CODE (arg0) == USE
7850 && GET_CODE (arg1) == USE)
7851 || GET_CODE (arg1) == CONST_INT))
7852 tem = arg0, arg0 = arg1, arg1 = tem;
7854 /* Handle addition of zero, then addition of an invariant. */
7855 if (arg1 == const0_rtx)
7856 return arg0;
7857 else if (GET_CODE (arg1) == CONST_INT || GET_CODE (arg1) == USE)
7858 switch (GET_CODE (arg0))
7860 case CONST_INT:
7861 case USE:
7862 /* Adding two invariants must result in an invariant, so enclose
7863 addition operation inside a USE and return it. */
7864 if (GET_CODE (arg0) == USE)
7865 arg0 = XEXP (arg0, 0);
7866 if (GET_CODE (arg1) == USE)
7867 arg1 = XEXP (arg1, 0);
7869 if (GET_CODE (arg0) == CONST_INT)
7870 tem = arg0, arg0 = arg1, arg1 = tem;
7871 if (GET_CODE (arg1) == CONST_INT)
7872 tem = sge_plus_constant (arg0, arg1);
7873 else
7874 tem = sge_plus (mode, arg0, arg1);
7876 if (GET_CODE (tem) != CONST_INT)
7877 tem = gen_rtx_USE (mode, tem);
7878 return tem;
7880 case REG:
7881 case MULT:
7882 /* biv + invar or mult + invar. Return sum. */
7883 return gen_rtx_PLUS (mode, arg0, arg1);
7885 case PLUS:
7886 /* (a + invar_1) + invar_2. Associate. */
7887 return
7888 simplify_giv_expr (loop,
7889 gen_rtx_PLUS (mode,
7890 XEXP (arg0, 0),
7891 gen_rtx_PLUS (mode,
7892 XEXP (arg0, 1),
7893 arg1)),
7894 ext_val, benefit);
7896 default:
7897 abort ();
7900 /* Each argument must be either REG, PLUS, or MULT. Convert REG to
7901 MULT to reduce cases. */
7902 if (REG_P (arg0))
7903 arg0 = gen_rtx_MULT (mode, arg0, const1_rtx);
7904 if (REG_P (arg1))
7905 arg1 = gen_rtx_MULT (mode, arg1, const1_rtx);
7907 /* Now have PLUS + PLUS, PLUS + MULT, MULT + PLUS, or MULT + MULT.
7908 Put a MULT first, leaving PLUS + PLUS, MULT + PLUS, or MULT + MULT.
7909 Recurse to associate the second PLUS. */
7910 if (GET_CODE (arg1) == MULT)
7911 tem = arg0, arg0 = arg1, arg1 = tem;
7913 if (GET_CODE (arg1) == PLUS)
7914 return
7915 simplify_giv_expr (loop,
7916 gen_rtx_PLUS (mode,
7917 gen_rtx_PLUS (mode, arg0,
7918 XEXP (arg1, 0)),
7919 XEXP (arg1, 1)),
7920 ext_val, benefit);
7922 /* Now must have MULT + MULT. Distribute if same biv, else not giv. */
7923 if (GET_CODE (arg0) != MULT || GET_CODE (arg1) != MULT)
7924 return NULL_RTX;
7926 if (!rtx_equal_p (arg0, arg1))
7927 return NULL_RTX;
7929 return simplify_giv_expr (loop,
7930 gen_rtx_MULT (mode,
7931 XEXP (arg0, 0),
7932 gen_rtx_PLUS (mode,
7933 XEXP (arg0, 1),
7934 XEXP (arg1, 1))),
7935 ext_val, benefit);
7937 case MINUS:
7938 /* Handle "a - b" as "a + b * (-1)". */
7939 return simplify_giv_expr (loop,
7940 gen_rtx_PLUS (mode,
7941 XEXP (x, 0),
7942 gen_rtx_MULT (mode,
7943 XEXP (x, 1),
7944 constm1_rtx)),
7945 ext_val, benefit);
7947 case MULT:
7948 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
7949 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
7950 if (arg0 == 0 || arg1 == 0)
7951 return NULL_RTX;
7953 /* Put constant last, CONST_INT last if both constant. */
7954 if ((GET_CODE (arg0) == USE || GET_CODE (arg0) == CONST_INT)
7955 && GET_CODE (arg1) != CONST_INT)
7956 tem = arg0, arg0 = arg1, arg1 = tem;
7958 /* If second argument is not now constant, not giv. */
7959 if (GET_CODE (arg1) != USE && GET_CODE (arg1) != CONST_INT)
7960 return NULL_RTX;
7962 /* Handle multiply by 0 or 1. */
7963 if (arg1 == const0_rtx)
7964 return const0_rtx;
7966 else if (arg1 == const1_rtx)
7967 return arg0;
7969 switch (GET_CODE (arg0))
7971 case REG:
7972 /* biv * invar. Done. */
7973 return gen_rtx_MULT (mode, arg0, arg1);
7975 case CONST_INT:
7976 /* Product of two constants. */
7977 return GEN_INT (INTVAL (arg0) * INTVAL (arg1));
7979 case USE:
7980 /* invar * invar is a giv, but attempt to simplify it somehow. */
7981 if (GET_CODE (arg1) != CONST_INT)
7982 return NULL_RTX;
7984 arg0 = XEXP (arg0, 0);
7985 if (GET_CODE (arg0) == MULT)
7987 /* (invar_0 * invar_1) * invar_2. Associate. */
7988 return simplify_giv_expr (loop,
7989 gen_rtx_MULT (mode,
7990 XEXP (arg0, 0),
7991 gen_rtx_MULT (mode,
7992 XEXP (arg0,
7994 arg1)),
7995 ext_val, benefit);
7997 /* Propagate the MULT expressions to the innermost nodes. */
7998 else if (GET_CODE (arg0) == PLUS)
8000 /* (invar_0 + invar_1) * invar_2. Distribute. */
8001 return simplify_giv_expr (loop,
8002 gen_rtx_PLUS (mode,
8003 gen_rtx_MULT (mode,
8004 XEXP (arg0,
8006 arg1),
8007 gen_rtx_MULT (mode,
8008 XEXP (arg0,
8010 arg1)),
8011 ext_val, benefit);
8013 return gen_rtx_USE (mode, gen_rtx_MULT (mode, arg0, arg1));
8015 case MULT:
8016 /* (a * invar_1) * invar_2. Associate. */
8017 return simplify_giv_expr (loop,
8018 gen_rtx_MULT (mode,
8019 XEXP (arg0, 0),
8020 gen_rtx_MULT (mode,
8021 XEXP (arg0, 1),
8022 arg1)),
8023 ext_val, benefit);
8025 case PLUS:
8026 /* (a + invar_1) * invar_2. Distribute. */
8027 return simplify_giv_expr (loop,
8028 gen_rtx_PLUS (mode,
8029 gen_rtx_MULT (mode,
8030 XEXP (arg0, 0),
8031 arg1),
8032 gen_rtx_MULT (mode,
8033 XEXP (arg0, 1),
8034 arg1)),
8035 ext_val, benefit);
8037 default:
8038 abort ();
8041 case ASHIFT:
8042 /* Shift by constant is multiply by power of two. */
8043 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8044 return 0;
8046 return
8047 simplify_giv_expr (loop,
8048 gen_rtx_MULT (mode,
8049 XEXP (x, 0),
8050 GEN_INT ((HOST_WIDE_INT) 1
8051 << INTVAL (XEXP (x, 1)))),
8052 ext_val, benefit);
8054 case NEG:
8055 /* "-a" is "a * (-1)" */
8056 return simplify_giv_expr (loop,
8057 gen_rtx_MULT (mode, XEXP (x, 0), constm1_rtx),
8058 ext_val, benefit);
8060 case NOT:
8061 /* "~a" is "-a - 1". Silly, but easy. */
8062 return simplify_giv_expr (loop,
8063 gen_rtx_MINUS (mode,
8064 gen_rtx_NEG (mode, XEXP (x, 0)),
8065 const1_rtx),
8066 ext_val, benefit);
8068 case USE:
8069 /* Already in proper form for invariant. */
8070 return x;
8072 case SIGN_EXTEND:
8073 case ZERO_EXTEND:
8074 case TRUNCATE:
8075 /* Conditionally recognize extensions of simple IVs. After we've
8076 computed loop traversal counts and verified the range of the
8077 source IV, we'll reevaluate this as a GIV. */
8078 if (*ext_val == NULL_RTX)
8080 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
8081 if (arg0 && *ext_val == NULL_RTX && REG_P (arg0))
8083 *ext_val = gen_rtx_fmt_e (GET_CODE (x), mode, arg0);
8084 return arg0;
8087 goto do_default;
8089 case REG:
8090 /* If this is a new register, we can't deal with it. */
8091 if (REGNO (x) >= max_reg_before_loop)
8092 return 0;
8094 /* Check for biv or giv. */
8095 switch (REG_IV_TYPE (ivs, REGNO (x)))
8097 case BASIC_INDUCT:
8098 return x;
8099 case GENERAL_INDUCT:
8101 struct induction *v = REG_IV_INFO (ivs, REGNO (x));
8103 /* Form expression from giv and add benefit. Ensure this giv
8104 can derive another and subtract any needed adjustment if so. */
8106 /* Increasing the benefit here is risky. The only case in which it
8107 is arguably correct is if this is the only use of V. In other
8108 cases, this will artificially inflate the benefit of the current
8109 giv, and lead to suboptimal code. Thus, it is disabled, since
8110 potentially not reducing an only marginally beneficial giv is
8111 less harmful than reducing many givs that are not really
8112 beneficial. */
8114 rtx single_use = regs->array[REGNO (x)].single_usage;
8115 if (single_use && single_use != const0_rtx)
8116 *benefit += v->benefit;
8119 if (v->cant_derive)
8120 return 0;
8122 tem = gen_rtx_PLUS (mode, gen_rtx_MULT (mode,
8123 v->src_reg, v->mult_val),
8124 v->add_val);
8126 if (v->derive_adjustment)
8127 tem = gen_rtx_MINUS (mode, tem, v->derive_adjustment);
8128 arg0 = simplify_giv_expr (loop, tem, ext_val, benefit);
8129 if (*ext_val)
8131 if (!v->ext_dependent)
8132 return arg0;
8134 else
8136 *ext_val = v->ext_dependent;
8137 return arg0;
8139 return 0;
8142 default:
8143 do_default:
8144 /* If it isn't an induction variable, and it is invariant, we
8145 may be able to simplify things further by looking through
8146 the bits we just moved outside the loop. */
8147 if (loop_invariant_p (loop, x) == 1)
8149 struct movable *m;
8150 struct loop_movables *movables = LOOP_MOVABLES (loop);
8152 for (m = movables->head; m; m = m->next)
8153 if (rtx_equal_p (x, m->set_dest))
8155 /* Ok, we found a match. Substitute and simplify. */
8157 /* If we match another movable, we must use that, as
8158 this one is going away. */
8159 if (m->match)
8160 return simplify_giv_expr (loop, m->match->set_dest,
8161 ext_val, benefit);
8163 /* If consec is nonzero, this is a member of a group of
8164 instructions that were moved together. We handle this
8165 case only to the point of seeking to the last insn and
8166 looking for a REG_EQUAL. Fail if we don't find one. */
8167 if (m->consec != 0)
8169 int i = m->consec;
8170 tem = m->insn;
8173 tem = NEXT_INSN (tem);
8175 while (--i > 0);
8177 tem = find_reg_note (tem, REG_EQUAL, NULL_RTX);
8178 if (tem)
8179 tem = XEXP (tem, 0);
8181 else
8183 tem = single_set (m->insn);
8184 if (tem)
8185 tem = SET_SRC (tem);
8188 if (tem)
8190 /* What we are most interested in is pointer
8191 arithmetic on invariants -- only take
8192 patterns we may be able to do something with. */
8193 if (GET_CODE (tem) == PLUS
8194 || GET_CODE (tem) == MULT
8195 || GET_CODE (tem) == ASHIFT
8196 || GET_CODE (tem) == CONST_INT
8197 || GET_CODE (tem) == SYMBOL_REF)
8199 tem = simplify_giv_expr (loop, tem, ext_val,
8200 benefit);
8201 if (tem)
8202 return tem;
8204 else if (GET_CODE (tem) == CONST
8205 && GET_CODE (XEXP (tem, 0)) == PLUS
8206 && GET_CODE (XEXP (XEXP (tem, 0), 0)) == SYMBOL_REF
8207 && GET_CODE (XEXP (XEXP (tem, 0), 1)) == CONST_INT)
8209 tem = simplify_giv_expr (loop, XEXP (tem, 0),
8210 ext_val, benefit);
8211 if (tem)
8212 return tem;
8215 break;
8218 break;
8221 /* Fall through to general case. */
8222 default:
8223 /* If invariant, return as USE (unless CONST_INT).
8224 Otherwise, not giv. */
8225 if (GET_CODE (x) == USE)
8226 x = XEXP (x, 0);
8228 if (loop_invariant_p (loop, x) == 1)
8230 if (GET_CODE (x) == CONST_INT)
8231 return x;
8232 if (GET_CODE (x) == CONST
8233 && GET_CODE (XEXP (x, 0)) == PLUS
8234 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8235 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
8236 x = XEXP (x, 0);
8237 return gen_rtx_USE (mode, x);
8239 else
8240 return 0;
8244 /* This routine folds invariants such that there is only ever one
8245 CONST_INT in the summation. It is only used by simplify_giv_expr. */
8247 static rtx
8248 sge_plus_constant (rtx x, rtx c)
8250 if (GET_CODE (x) == CONST_INT)
8251 return GEN_INT (INTVAL (x) + INTVAL (c));
8252 else if (GET_CODE (x) != PLUS)
8253 return gen_rtx_PLUS (GET_MODE (x), x, c);
8254 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8256 return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
8257 GEN_INT (INTVAL (XEXP (x, 1)) + INTVAL (c)));
8259 else if (GET_CODE (XEXP (x, 0)) == PLUS
8260 || GET_CODE (XEXP (x, 1)) != PLUS)
8262 return gen_rtx_PLUS (GET_MODE (x),
8263 sge_plus_constant (XEXP (x, 0), c), XEXP (x, 1));
8265 else
8267 return gen_rtx_PLUS (GET_MODE (x),
8268 sge_plus_constant (XEXP (x, 1), c), XEXP (x, 0));
8272 static rtx
8273 sge_plus (enum machine_mode mode, rtx x, rtx y)
8275 while (GET_CODE (y) == PLUS)
8277 rtx a = XEXP (y, 0);
8278 if (GET_CODE (a) == CONST_INT)
8279 x = sge_plus_constant (x, a);
8280 else
8281 x = gen_rtx_PLUS (mode, x, a);
8282 y = XEXP (y, 1);
8284 if (GET_CODE (y) == CONST_INT)
8285 x = sge_plus_constant (x, y);
8286 else
8287 x = gen_rtx_PLUS (mode, x, y);
8288 return x;
8291 /* Help detect a giv that is calculated by several consecutive insns;
8292 for example,
8293 giv = biv * M
8294 giv = giv + A
8295 The caller has already identified the first insn P as having a giv as dest;
8296 we check that all other insns that set the same register follow
8297 immediately after P, that they alter nothing else,
8298 and that the result of the last is still a giv.
8300 The value is 0 if the reg set in P is not really a giv.
8301 Otherwise, the value is the amount gained by eliminating
8302 all the consecutive insns that compute the value.
8304 FIRST_BENEFIT is the amount gained by eliminating the first insn, P.
8305 SRC_REG is the reg of the biv; DEST_REG is the reg of the giv.
8307 The coefficients of the ultimate giv value are stored in
8308 *MULT_VAL and *ADD_VAL. */
8310 static int
8311 consec_sets_giv (const struct loop *loop, int first_benefit, rtx p,
8312 rtx src_reg, rtx dest_reg, rtx *add_val, rtx *mult_val,
8313 rtx *ext_val, rtx *last_consec_insn)
8315 struct loop_ivs *ivs = LOOP_IVS (loop);
8316 struct loop_regs *regs = LOOP_REGS (loop);
8317 int count;
8318 enum rtx_code code;
8319 int benefit;
8320 rtx temp;
8321 rtx set;
8323 /* Indicate that this is a giv so that we can update the value produced in
8324 each insn of the multi-insn sequence.
8326 This induction structure will be used only by the call to
8327 general_induction_var below, so we can allocate it on our stack.
8328 If this is a giv, our caller will replace the induct var entry with
8329 a new induction structure. */
8330 struct induction *v;
8332 if (REG_IV_TYPE (ivs, REGNO (dest_reg)) != UNKNOWN_INDUCT)
8333 return 0;
8335 v = alloca (sizeof (struct induction));
8336 v->src_reg = src_reg;
8337 v->mult_val = *mult_val;
8338 v->add_val = *add_val;
8339 v->benefit = first_benefit;
8340 v->cant_derive = 0;
8341 v->derive_adjustment = 0;
8342 v->ext_dependent = NULL_RTX;
8344 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
8345 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
8347 count = regs->array[REGNO (dest_reg)].n_times_set - 1;
8349 while (count > 0)
8351 p = NEXT_INSN (p);
8352 code = GET_CODE (p);
8354 /* If libcall, skip to end of call sequence. */
8355 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
8356 p = XEXP (temp, 0);
8358 if (code == INSN
8359 && (set = single_set (p))
8360 && REG_P (SET_DEST (set))
8361 && SET_DEST (set) == dest_reg
8362 && (general_induction_var (loop, SET_SRC (set), &src_reg,
8363 add_val, mult_val, ext_val, 0,
8364 &benefit, VOIDmode)
8365 /* Giv created by equivalent expression. */
8366 || ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
8367 && general_induction_var (loop, XEXP (temp, 0), &src_reg,
8368 add_val, mult_val, ext_val, 0,
8369 &benefit, VOIDmode)))
8370 && src_reg == v->src_reg)
8372 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
8373 benefit += libcall_benefit (p);
8375 count--;
8376 v->mult_val = *mult_val;
8377 v->add_val = *add_val;
8378 v->benefit += benefit;
8380 else if (code != NOTE)
8382 /* Allow insns that set something other than this giv to a
8383 constant. Such insns are needed on machines which cannot
8384 include long constants and should not disqualify a giv. */
8385 if (code == INSN
8386 && (set = single_set (p))
8387 && SET_DEST (set) != dest_reg
8388 && CONSTANT_P (SET_SRC (set)))
8389 continue;
8391 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
8392 return 0;
8396 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
8397 *last_consec_insn = p;
8398 return v->benefit;
8401 /* Return an rtx, if any, that expresses giv G2 as a function of the register
8402 represented by G1. If no such expression can be found, or it is clear that
8403 it cannot possibly be a valid address, 0 is returned.
8405 To perform the computation, we note that
8406 G1 = x * v + a and
8407 G2 = y * v + b
8408 where `v' is the biv.
8410 So G2 = (y/b) * G1 + (b - a*y/x).
8412 Note that MULT = y/x.
8414 Update: A and B are now allowed to be additive expressions such that
8415 B contains all variables in A. That is, computing B-A will not require
8416 subtracting variables. */
8418 static rtx
8419 express_from_1 (rtx a, rtx b, rtx mult)
8421 /* If MULT is zero, then A*MULT is zero, and our expression is B. */
8423 if (mult == const0_rtx)
8424 return b;
8426 /* If MULT is not 1, we cannot handle A with non-constants, since we
8427 would then be required to subtract multiples of the registers in A.
8428 This is theoretically possible, and may even apply to some Fortran
8429 constructs, but it is a lot of work and we do not attempt it here. */
8431 if (mult != const1_rtx && GET_CODE (a) != CONST_INT)
8432 return NULL_RTX;
8434 /* In general these structures are sorted top to bottom (down the PLUS
8435 chain), but not left to right across the PLUS. If B is a higher
8436 order giv than A, we can strip one level and recurse. If A is higher
8437 order, we'll eventually bail out, but won't know that until the end.
8438 If they are the same, we'll strip one level around this loop. */
8440 while (GET_CODE (a) == PLUS && GET_CODE (b) == PLUS)
8442 rtx ra, rb, oa, ob, tmp;
8444 ra = XEXP (a, 0), oa = XEXP (a, 1);
8445 if (GET_CODE (ra) == PLUS)
8446 tmp = ra, ra = oa, oa = tmp;
8448 rb = XEXP (b, 0), ob = XEXP (b, 1);
8449 if (GET_CODE (rb) == PLUS)
8450 tmp = rb, rb = ob, ob = tmp;
8452 if (rtx_equal_p (ra, rb))
8453 /* We matched: remove one reg completely. */
8454 a = oa, b = ob;
8455 else if (GET_CODE (ob) != PLUS && rtx_equal_p (ra, ob))
8456 /* An alternate match. */
8457 a = oa, b = rb;
8458 else if (GET_CODE (oa) != PLUS && rtx_equal_p (oa, rb))
8459 /* An alternate match. */
8460 a = ra, b = ob;
8461 else
8463 /* Indicates an extra register in B. Strip one level from B and
8464 recurse, hoping B was the higher order expression. */
8465 ob = express_from_1 (a, ob, mult);
8466 if (ob == NULL_RTX)
8467 return NULL_RTX;
8468 return gen_rtx_PLUS (GET_MODE (b), rb, ob);
8472 /* Here we are at the last level of A, go through the cases hoping to
8473 get rid of everything but a constant. */
8475 if (GET_CODE (a) == PLUS)
8477 rtx ra, oa;
8479 ra = XEXP (a, 0), oa = XEXP (a, 1);
8480 if (rtx_equal_p (oa, b))
8481 oa = ra;
8482 else if (!rtx_equal_p (ra, b))
8483 return NULL_RTX;
8485 if (GET_CODE (oa) != CONST_INT)
8486 return NULL_RTX;
8488 return GEN_INT (-INTVAL (oa) * INTVAL (mult));
8490 else if (GET_CODE (a) == CONST_INT)
8492 return plus_constant (b, -INTVAL (a) * INTVAL (mult));
8494 else if (CONSTANT_P (a))
8496 enum machine_mode mode_a = GET_MODE (a);
8497 enum machine_mode mode_b = GET_MODE (b);
8498 enum machine_mode mode = mode_b == VOIDmode ? mode_a : mode_b;
8499 return simplify_gen_binary (MINUS, mode, b, a);
8501 else if (GET_CODE (b) == PLUS)
8503 if (rtx_equal_p (a, XEXP (b, 0)))
8504 return XEXP (b, 1);
8505 else if (rtx_equal_p (a, XEXP (b, 1)))
8506 return XEXP (b, 0);
8507 else
8508 return NULL_RTX;
8510 else if (rtx_equal_p (a, b))
8511 return const0_rtx;
8513 return NULL_RTX;
8516 static rtx
8517 express_from (struct induction *g1, struct induction *g2)
8519 rtx mult, add;
8521 /* The value that G1 will be multiplied by must be a constant integer. Also,
8522 the only chance we have of getting a valid address is if b*c/a (see above
8523 for notation) is also an integer. */
8524 if (GET_CODE (g1->mult_val) == CONST_INT
8525 && GET_CODE (g2->mult_val) == CONST_INT)
8527 if (g1->mult_val == const0_rtx
8528 || (g1->mult_val == constm1_rtx
8529 && INTVAL (g2->mult_val)
8530 == (HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1))
8531 || INTVAL (g2->mult_val) % INTVAL (g1->mult_val) != 0)
8532 return NULL_RTX;
8533 mult = GEN_INT (INTVAL (g2->mult_val) / INTVAL (g1->mult_val));
8535 else if (rtx_equal_p (g1->mult_val, g2->mult_val))
8536 mult = const1_rtx;
8537 else
8539 /* ??? Find out if the one is a multiple of the other? */
8540 return NULL_RTX;
8543 add = express_from_1 (g1->add_val, g2->add_val, mult);
8544 if (add == NULL_RTX)
8546 /* Failed. If we've got a multiplication factor between G1 and G2,
8547 scale G1's addend and try again. */
8548 if (INTVAL (mult) > 1)
8550 rtx g1_add_val = g1->add_val;
8551 if (GET_CODE (g1_add_val) == MULT
8552 && GET_CODE (XEXP (g1_add_val, 1)) == CONST_INT)
8554 HOST_WIDE_INT m;
8555 m = INTVAL (mult) * INTVAL (XEXP (g1_add_val, 1));
8556 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val),
8557 XEXP (g1_add_val, 0), GEN_INT (m));
8559 else
8561 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val), g1_add_val,
8562 mult);
8565 add = express_from_1 (g1_add_val, g2->add_val, const1_rtx);
8568 if (add == NULL_RTX)
8569 return NULL_RTX;
8571 /* Form simplified final result. */
8572 if (mult == const0_rtx)
8573 return add;
8574 else if (mult == const1_rtx)
8575 mult = g1->dest_reg;
8576 else
8577 mult = gen_rtx_MULT (g2->mode, g1->dest_reg, mult);
8579 if (add == const0_rtx)
8580 return mult;
8581 else
8583 if (GET_CODE (add) == PLUS
8584 && CONSTANT_P (XEXP (add, 1)))
8586 rtx tem = XEXP (add, 1);
8587 mult = gen_rtx_PLUS (g2->mode, mult, XEXP (add, 0));
8588 add = tem;
8591 return gen_rtx_PLUS (g2->mode, mult, add);
8595 /* Return an rtx, if any, that expresses giv G2 as a function of the register
8596 represented by G1. This indicates that G2 should be combined with G1 and
8597 that G2 can use (either directly or via an address expression) a register
8598 used to represent G1. */
8600 static rtx
8601 combine_givs_p (struct induction *g1, struct induction *g2)
8603 rtx comb, ret;
8605 /* With the introduction of ext dependent givs, we must care for modes.
8606 G2 must not use a wider mode than G1. */
8607 if (GET_MODE_SIZE (g1->mode) < GET_MODE_SIZE (g2->mode))
8608 return NULL_RTX;
8610 ret = comb = express_from (g1, g2);
8611 if (comb == NULL_RTX)
8612 return NULL_RTX;
8613 if (g1->mode != g2->mode)
8614 ret = gen_lowpart (g2->mode, comb);
8616 /* If these givs are identical, they can be combined. We use the results
8617 of express_from because the addends are not in a canonical form, so
8618 rtx_equal_p is a weaker test. */
8619 /* But don't combine a DEST_REG giv with a DEST_ADDR giv; we want the
8620 combination to be the other way round. */
8621 if (comb == g1->dest_reg
8622 && (g1->giv_type == DEST_REG || g2->giv_type == DEST_ADDR))
8624 return ret;
8627 /* If G2 can be expressed as a function of G1 and that function is valid
8628 as an address and no more expensive than using a register for G2,
8629 the expression of G2 in terms of G1 can be used. */
8630 if (ret != NULL_RTX
8631 && g2->giv_type == DEST_ADDR
8632 && memory_address_p (GET_MODE (g2->mem), ret))
8633 return ret;
8635 return NULL_RTX;
8638 /* See if BL is monotonic and has a constant per-iteration increment.
8639 Return the increment if so, otherwise return 0. */
8641 static HOST_WIDE_INT
8642 get_monotonic_increment (struct iv_class *bl)
8644 struct induction *v;
8645 rtx incr;
8647 /* Get the total increment and check that it is constant. */
8648 incr = biv_total_increment (bl);
8649 if (incr == 0 || GET_CODE (incr) != CONST_INT)
8650 return 0;
8652 for (v = bl->biv; v != 0; v = v->next_iv)
8654 if (GET_CODE (v->add_val) != CONST_INT)
8655 return 0;
8657 if (INTVAL (v->add_val) < 0 && INTVAL (incr) >= 0)
8658 return 0;
8660 if (INTVAL (v->add_val) > 0 && INTVAL (incr) <= 0)
8661 return 0;
8663 return INTVAL (incr);
8667 /* Subroutine of biv_fits_mode_p. Return true if biv BL, when biased by
8668 BIAS, will never exceed the unsigned range of MODE. LOOP is the loop
8669 to which the biv belongs and INCR is its per-iteration increment. */
8671 static bool
8672 biased_biv_fits_mode_p (const struct loop *loop, struct iv_class *bl,
8673 HOST_WIDE_INT incr, enum machine_mode mode,
8674 unsigned HOST_WIDE_INT bias)
8676 unsigned HOST_WIDE_INT initial, maximum, span, delta;
8678 /* We need to be able to manipulate MODE-size constants. */
8679 if (HOST_BITS_PER_WIDE_INT < GET_MODE_BITSIZE (mode))
8680 return false;
8682 /* The number of loop iterations must be constant. */
8683 if (LOOP_INFO (loop)->n_iterations == 0)
8684 return false;
8686 /* So must the biv's initial value. */
8687 if (bl->initial_value == 0 || GET_CODE (bl->initial_value) != CONST_INT)
8688 return false;
8690 initial = bias + INTVAL (bl->initial_value);
8691 maximum = GET_MODE_MASK (mode);
8693 /* Make sure that the initial value is within range. */
8694 if (initial > maximum)
8695 return false;
8697 /* Set up DELTA and SPAN such that the number of iterations * DELTA
8698 (calculated to arbitrary precision) must be <= SPAN. */
8699 if (incr < 0)
8701 delta = -incr;
8702 span = initial;
8704 else
8706 delta = incr;
8707 /* Handle the special case in which MAXIMUM is the largest
8708 unsigned HOST_WIDE_INT and INITIAL is 0. */
8709 if (maximum + 1 == initial)
8710 span = LOOP_INFO (loop)->n_iterations * delta;
8711 else
8712 span = maximum + 1 - initial;
8714 return (span / LOOP_INFO (loop)->n_iterations >= delta);
8718 /* Return true if biv BL will never exceed the bounds of MODE. LOOP is
8719 the loop to which BL belongs and INCR is its per-iteration increment.
8720 UNSIGNEDP is true if the biv should be treated as unsigned. */
8722 static bool
8723 biv_fits_mode_p (const struct loop *loop, struct iv_class *bl,
8724 HOST_WIDE_INT incr, enum machine_mode mode, bool unsignedp)
8726 struct loop_info *loop_info;
8727 unsigned HOST_WIDE_INT bias;
8729 /* A biv's value will always be limited to its natural mode.
8730 Larger modes will observe the same wrap-around. */
8731 if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (bl->biv->src_reg)))
8732 mode = GET_MODE (bl->biv->src_reg);
8734 loop_info = LOOP_INFO (loop);
8736 bias = (unsignedp ? 0 : (GET_MODE_MASK (mode) >> 1) + 1);
8737 if (biased_biv_fits_mode_p (loop, bl, incr, mode, bias))
8738 return true;
8740 if (mode == GET_MODE (bl->biv->src_reg)
8741 && bl->biv->src_reg == loop_info->iteration_var
8742 && loop_info->comparison_value
8743 && loop_invariant_p (loop, loop_info->comparison_value))
8745 /* If the increment is +1, and the exit test is a <, the BIV
8746 cannot overflow. (For <=, we have the problematic case that
8747 the comparison value might be the maximum value of the range.) */
8748 if (incr == 1)
8750 if (loop_info->comparison_code == LT)
8751 return true;
8752 if (loop_info->comparison_code == LTU && unsignedp)
8753 return true;
8756 /* Likewise for increment -1 and exit test >. */
8757 if (incr == -1)
8759 if (loop_info->comparison_code == GT)
8760 return true;
8761 if (loop_info->comparison_code == GTU && unsignedp)
8762 return true;
8765 return false;
8769 /* Given that X is an extension or truncation of BL, return true
8770 if it is unaffected by overflow. LOOP is the loop to which
8771 BL belongs and INCR is its per-iteration increment. */
8773 static bool
8774 extension_within_bounds_p (const struct loop *loop, struct iv_class *bl,
8775 HOST_WIDE_INT incr, rtx x)
8777 enum machine_mode mode;
8778 bool signedp, unsignedp;
8780 switch (GET_CODE (x))
8782 case SIGN_EXTEND:
8783 case ZERO_EXTEND:
8784 mode = GET_MODE (XEXP (x, 0));
8785 signedp = (GET_CODE (x) == SIGN_EXTEND);
8786 unsignedp = (GET_CODE (x) == ZERO_EXTEND);
8787 break;
8789 case TRUNCATE:
8790 /* We don't know whether this value is being used as signed
8791 or unsigned, so check the conditions for both. */
8792 mode = GET_MODE (x);
8793 signedp = unsignedp = true;
8794 break;
8796 default:
8797 abort ();
8800 return ((!signedp || biv_fits_mode_p (loop, bl, incr, mode, false))
8801 && (!unsignedp || biv_fits_mode_p (loop, bl, incr, mode, true)));
8805 /* Check each extension dependent giv in this class to see if its
8806 root biv is safe from wrapping in the interior mode, which would
8807 make the giv illegal. */
8809 static void
8810 check_ext_dependent_givs (const struct loop *loop, struct iv_class *bl)
8812 struct induction *v;
8813 HOST_WIDE_INT incr;
8815 incr = get_monotonic_increment (bl);
8817 /* Invalidate givs that fail the tests. */
8818 for (v = bl->giv; v; v = v->next_iv)
8819 if (v->ext_dependent)
8821 if (incr != 0
8822 && extension_within_bounds_p (loop, bl, incr, v->ext_dependent))
8824 if (loop_dump_stream)
8825 fprintf (loop_dump_stream,
8826 "Verified ext dependent giv at %d of reg %d\n",
8827 INSN_UID (v->insn), bl->regno);
8829 else
8831 if (loop_dump_stream)
8832 fprintf (loop_dump_stream,
8833 "Failed ext dependent giv at %d\n",
8834 INSN_UID (v->insn));
8836 v->ignore = 1;
8837 bl->all_reduced = 0;
8842 /* Generate a version of VALUE in a mode appropriate for initializing V. */
8844 static rtx
8845 extend_value_for_giv (struct induction *v, rtx value)
8847 rtx ext_dep = v->ext_dependent;
8849 if (! ext_dep)
8850 return value;
8852 /* Recall that check_ext_dependent_givs verified that the known bounds
8853 of a biv did not overflow or wrap with respect to the extension for
8854 the giv. Therefore, constants need no additional adjustment. */
8855 if (CONSTANT_P (value) && GET_MODE (value) == VOIDmode)
8856 return value;
8858 /* Otherwise, we must adjust the value to compensate for the
8859 differing modes of the biv and the giv. */
8860 return gen_rtx_fmt_e (GET_CODE (ext_dep), GET_MODE (ext_dep), value);
8863 struct combine_givs_stats
8865 int giv_number;
8866 int total_benefit;
8869 static int
8870 cmp_combine_givs_stats (const void *xp, const void *yp)
8872 const struct combine_givs_stats * const x =
8873 (const struct combine_givs_stats *) xp;
8874 const struct combine_givs_stats * const y =
8875 (const struct combine_givs_stats *) yp;
8876 int d;
8877 d = y->total_benefit - x->total_benefit;
8878 /* Stabilize the sort. */
8879 if (!d)
8880 d = x->giv_number - y->giv_number;
8881 return d;
8884 /* Check all pairs of givs for iv_class BL and see if any can be combined with
8885 any other. If so, point SAME to the giv combined with and set NEW_REG to
8886 be an expression (in terms of the other giv's DEST_REG) equivalent to the
8887 giv. Also, update BENEFIT and related fields for cost/benefit analysis. */
8889 static void
8890 combine_givs (struct loop_regs *regs, struct iv_class *bl)
8892 /* Additional benefit to add for being combined multiple times. */
8893 const int extra_benefit = 3;
8895 struct induction *g1, *g2, **giv_array;
8896 int i, j, k, giv_count;
8897 struct combine_givs_stats *stats;
8898 rtx *can_combine;
8900 /* Count givs, because bl->giv_count is incorrect here. */
8901 giv_count = 0;
8902 for (g1 = bl->giv; g1; g1 = g1->next_iv)
8903 if (!g1->ignore)
8904 giv_count++;
8906 giv_array = alloca (giv_count * sizeof (struct induction *));
8907 i = 0;
8908 for (g1 = bl->giv; g1; g1 = g1->next_iv)
8909 if (!g1->ignore)
8910 giv_array[i++] = g1;
8912 stats = xcalloc (giv_count, sizeof (*stats));
8913 can_combine = xcalloc (giv_count, giv_count * sizeof (rtx));
8915 for (i = 0; i < giv_count; i++)
8917 int this_benefit;
8918 rtx single_use;
8920 g1 = giv_array[i];
8921 stats[i].giv_number = i;
8923 /* If a DEST_REG GIV is used only once, do not allow it to combine
8924 with anything, for in doing so we will gain nothing that cannot
8925 be had by simply letting the GIV with which we would have combined
8926 to be reduced on its own. The losage shows up in particular with
8927 DEST_ADDR targets on hosts with reg+reg addressing, though it can
8928 be seen elsewhere as well. */
8929 if (g1->giv_type == DEST_REG
8930 && (single_use = regs->array[REGNO (g1->dest_reg)].single_usage)
8931 && single_use != const0_rtx)
8932 continue;
8934 this_benefit = g1->benefit;
8935 /* Add an additional weight for zero addends. */
8936 if (g1->no_const_addval)
8937 this_benefit += 1;
8939 for (j = 0; j < giv_count; j++)
8941 rtx this_combine;
8943 g2 = giv_array[j];
8944 if (g1 != g2
8945 && (this_combine = combine_givs_p (g1, g2)) != NULL_RTX)
8947 can_combine[i * giv_count + j] = this_combine;
8948 this_benefit += g2->benefit + extra_benefit;
8951 stats[i].total_benefit = this_benefit;
8954 /* Iterate, combining until we can't. */
8955 restart:
8956 qsort (stats, giv_count, sizeof (*stats), cmp_combine_givs_stats);
8958 if (loop_dump_stream)
8960 fprintf (loop_dump_stream, "Sorted combine statistics:\n");
8961 for (k = 0; k < giv_count; k++)
8963 g1 = giv_array[stats[k].giv_number];
8964 if (!g1->combined_with && !g1->same)
8965 fprintf (loop_dump_stream, " {%d, %d}",
8966 INSN_UID (giv_array[stats[k].giv_number]->insn),
8967 stats[k].total_benefit);
8969 putc ('\n', loop_dump_stream);
8972 for (k = 0; k < giv_count; k++)
8974 int g1_add_benefit = 0;
8976 i = stats[k].giv_number;
8977 g1 = giv_array[i];
8979 /* If it has already been combined, skip. */
8980 if (g1->combined_with || g1->same)
8981 continue;
8983 for (j = 0; j < giv_count; j++)
8985 g2 = giv_array[j];
8986 if (g1 != g2 && can_combine[i * giv_count + j]
8987 /* If it has already been combined, skip. */
8988 && ! g2->same && ! g2->combined_with)
8990 int l;
8992 g2->new_reg = can_combine[i * giv_count + j];
8993 g2->same = g1;
8994 /* For destination, we now may replace by mem expression instead
8995 of register. This changes the costs considerably, so add the
8996 compensation. */
8997 if (g2->giv_type == DEST_ADDR)
8998 g2->benefit = (g2->benefit + reg_address_cost
8999 - address_cost (g2->new_reg,
9000 GET_MODE (g2->mem)));
9001 g1->combined_with++;
9002 g1->lifetime += g2->lifetime;
9004 g1_add_benefit += g2->benefit;
9006 /* ??? The new final_[bg]iv_value code does a much better job
9007 of finding replaceable giv's, and hence this code may no
9008 longer be necessary. */
9009 if (! g2->replaceable && REG_USERVAR_P (g2->dest_reg))
9010 g1_add_benefit -= copy_cost;
9012 /* To help optimize the next set of combinations, remove
9013 this giv from the benefits of other potential mates. */
9014 for (l = 0; l < giv_count; ++l)
9016 int m = stats[l].giv_number;
9017 if (can_combine[m * giv_count + j])
9018 stats[l].total_benefit -= g2->benefit + extra_benefit;
9021 if (loop_dump_stream)
9022 fprintf (loop_dump_stream,
9023 "giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n",
9024 INSN_UID (g2->insn), INSN_UID (g1->insn),
9025 g1->benefit, g1_add_benefit, g1->lifetime);
9029 /* To help optimize the next set of combinations, remove
9030 this giv from the benefits of other potential mates. */
9031 if (g1->combined_with)
9033 for (j = 0; j < giv_count; ++j)
9035 int m = stats[j].giv_number;
9036 if (can_combine[m * giv_count + i])
9037 stats[j].total_benefit -= g1->benefit + extra_benefit;
9040 g1->benefit += g1_add_benefit;
9042 /* We've finished with this giv, and everything it touched.
9043 Restart the combination so that proper weights for the
9044 rest of the givs are properly taken into account. */
9045 /* ??? Ideally we would compact the arrays at this point, so
9046 as to not cover old ground. But sanely compacting
9047 can_combine is tricky. */
9048 goto restart;
9052 /* Clean up. */
9053 free (stats);
9054 free (can_combine);
9057 /* Generate sequence for REG = B * M + A. B is the initial value of
9058 the basic induction variable, M a multiplicative constant, A an
9059 additive constant and REG the destination register. */
9061 static rtx
9062 gen_add_mult (rtx b, rtx m, rtx a, rtx reg)
9064 rtx seq;
9065 rtx result;
9067 start_sequence ();
9068 /* Use unsigned arithmetic. */
9069 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
9070 if (reg != result)
9071 emit_move_insn (reg, result);
9072 seq = get_insns ();
9073 end_sequence ();
9075 return seq;
9079 /* Update registers created in insn sequence SEQ. */
9081 static void
9082 loop_regs_update (const struct loop *loop ATTRIBUTE_UNUSED, rtx seq)
9084 rtx insn;
9086 /* Update register info for alias analysis. */
9088 insn = seq;
9089 while (insn != NULL_RTX)
9091 rtx set = single_set (insn);
9093 if (set && REG_P (SET_DEST (set)))
9094 record_base_value (REGNO (SET_DEST (set)), SET_SRC (set), 0);
9096 insn = NEXT_INSN (insn);
9101 /* EMIT code before BEFORE_BB/BEFORE_INSN to set REG = B * M + A. B
9102 is the initial value of the basic induction variable, M a
9103 multiplicative constant, A an additive constant and REG the
9104 destination register. */
9106 static void
9107 loop_iv_add_mult_emit_before (const struct loop *loop, rtx b, rtx m, rtx a,
9108 rtx reg, basic_block before_bb, rtx before_insn)
9110 rtx seq;
9112 if (! before_insn)
9114 loop_iv_add_mult_hoist (loop, b, m, a, reg);
9115 return;
9118 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
9119 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
9121 /* Increase the lifetime of any invariants moved further in code. */
9122 update_reg_last_use (a, before_insn);
9123 update_reg_last_use (b, before_insn);
9124 update_reg_last_use (m, before_insn);
9126 /* It is possible that the expansion created lots of new registers.
9127 Iterate over the sequence we just created and record them all. We
9128 must do this before inserting the sequence. */
9129 loop_regs_update (loop, seq);
9131 loop_insn_emit_before (loop, before_bb, before_insn, seq);
9135 /* Emit insns in loop pre-header to set REG = B * M + A. B is the
9136 initial value of the basic induction variable, M a multiplicative
9137 constant, A an additive constant and REG the destination
9138 register. */
9140 static void
9141 loop_iv_add_mult_sink (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
9143 rtx seq;
9145 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
9146 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
9148 /* Increase the lifetime of any invariants moved further in code.
9149 ???? Is this really necessary? */
9150 update_reg_last_use (a, loop->sink);
9151 update_reg_last_use (b, loop->sink);
9152 update_reg_last_use (m, loop->sink);
9154 /* It is possible that the expansion created lots of new registers.
9155 Iterate over the sequence we just created and record them all. We
9156 must do this before inserting the sequence. */
9157 loop_regs_update (loop, seq);
9159 loop_insn_sink (loop, seq);
9163 /* Emit insns after loop to set REG = B * M + A. B is the initial
9164 value of the basic induction variable, M a multiplicative constant,
9165 A an additive constant and REG the destination register. */
9167 static void
9168 loop_iv_add_mult_hoist (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
9170 rtx seq;
9172 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
9173 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
9175 /* It is possible that the expansion created lots of new registers.
9176 Iterate over the sequence we just created and record them all. We
9177 must do this before inserting the sequence. */
9178 loop_regs_update (loop, seq);
9180 loop_insn_hoist (loop, seq);
9185 /* Similar to gen_add_mult, but compute cost rather than generating
9186 sequence. */
9188 static int
9189 iv_add_mult_cost (rtx b, rtx m, rtx a, rtx reg)
9191 int cost = 0;
9192 rtx last, result;
9194 start_sequence ();
9195 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
9196 if (reg != result)
9197 emit_move_insn (reg, result);
9198 last = get_last_insn ();
9199 while (last)
9201 rtx t = single_set (last);
9202 if (t)
9203 cost += rtx_cost (SET_SRC (t), SET);
9204 last = PREV_INSN (last);
9206 end_sequence ();
9207 return cost;
9210 /* Test whether A * B can be computed without
9211 an actual multiply insn. Value is 1 if so.
9213 ??? This function stinks because it generates a ton of wasted RTL
9214 ??? and as a result fragments GC memory to no end. There are other
9215 ??? places in the compiler which are invoked a lot and do the same
9216 ??? thing, generate wasted RTL just to see if something is possible. */
9218 static int
9219 product_cheap_p (rtx a, rtx b)
9221 rtx tmp;
9222 int win, n_insns;
9224 /* If only one is constant, make it B. */
9225 if (GET_CODE (a) == CONST_INT)
9226 tmp = a, a = b, b = tmp;
9228 /* If first constant, both constant, so don't need multiply. */
9229 if (GET_CODE (a) == CONST_INT)
9230 return 1;
9232 /* If second not constant, neither is constant, so would need multiply. */
9233 if (GET_CODE (b) != CONST_INT)
9234 return 0;
9236 /* One operand is constant, so might not need multiply insn. Generate the
9237 code for the multiply and see if a call or multiply, or long sequence
9238 of insns is generated. */
9240 start_sequence ();
9241 expand_mult (GET_MODE (a), a, b, NULL_RTX, 1);
9242 tmp = get_insns ();
9243 end_sequence ();
9245 win = 1;
9246 if (tmp == NULL_RTX)
9248 else if (INSN_P (tmp))
9250 n_insns = 0;
9251 while (tmp != NULL_RTX)
9253 rtx next = NEXT_INSN (tmp);
9255 if (++n_insns > 3
9256 || !NONJUMP_INSN_P (tmp)
9257 || (GET_CODE (PATTERN (tmp)) == SET
9258 && GET_CODE (SET_SRC (PATTERN (tmp))) == MULT)
9259 || (GET_CODE (PATTERN (tmp)) == PARALLEL
9260 && GET_CODE (XVECEXP (PATTERN (tmp), 0, 0)) == SET
9261 && GET_CODE (SET_SRC (XVECEXP (PATTERN (tmp), 0, 0))) == MULT))
9263 win = 0;
9264 break;
9267 tmp = next;
9270 else if (GET_CODE (tmp) == SET
9271 && GET_CODE (SET_SRC (tmp)) == MULT)
9272 win = 0;
9273 else if (GET_CODE (tmp) == PARALLEL
9274 && GET_CODE (XVECEXP (tmp, 0, 0)) == SET
9275 && GET_CODE (SET_SRC (XVECEXP (tmp, 0, 0))) == MULT)
9276 win = 0;
9278 return win;
9281 /* Check to see if loop can be terminated by a "decrement and branch until
9282 zero" instruction. If so, add a REG_NONNEG note to the branch insn if so.
9283 Also try reversing an increment loop to a decrement loop
9284 to see if the optimization can be performed.
9285 Value is nonzero if optimization was performed. */
9287 /* This is useful even if the architecture doesn't have such an insn,
9288 because it might change a loops which increments from 0 to n to a loop
9289 which decrements from n to 0. A loop that decrements to zero is usually
9290 faster than one that increments from zero. */
9292 /* ??? This could be rewritten to use some of the loop unrolling procedures,
9293 such as approx_final_value, biv_total_increment, loop_iterations, and
9294 final_[bg]iv_value. */
9296 static int
9297 check_dbra_loop (struct loop *loop, int insn_count)
9299 struct loop_info *loop_info = LOOP_INFO (loop);
9300 struct loop_regs *regs = LOOP_REGS (loop);
9301 struct loop_ivs *ivs = LOOP_IVS (loop);
9302 struct iv_class *bl;
9303 rtx reg;
9304 enum machine_mode mode;
9305 rtx jump_label;
9306 rtx final_value;
9307 rtx start_value;
9308 rtx new_add_val;
9309 rtx comparison;
9310 rtx before_comparison;
9311 rtx p;
9312 rtx jump;
9313 rtx first_compare;
9314 int compare_and_branch;
9315 rtx loop_start = loop->start;
9316 rtx loop_end = loop->end;
9318 /* If last insn is a conditional branch, and the insn before tests a
9319 register value, try to optimize it. Otherwise, we can't do anything. */
9321 jump = PREV_INSN (loop_end);
9322 comparison = get_condition_for_loop (loop, jump);
9323 if (comparison == 0)
9324 return 0;
9325 if (!onlyjump_p (jump))
9326 return 0;
9328 /* Try to compute whether the compare/branch at the loop end is one or
9329 two instructions. */
9330 get_condition (jump, &first_compare, false, true);
9331 if (first_compare == jump)
9332 compare_and_branch = 1;
9333 else if (first_compare == prev_nonnote_insn (jump))
9334 compare_and_branch = 2;
9335 else
9336 return 0;
9339 /* If more than one condition is present to control the loop, then
9340 do not proceed, as this function does not know how to rewrite
9341 loop tests with more than one condition.
9343 Look backwards from the first insn in the last comparison
9344 sequence and see if we've got another comparison sequence. */
9346 rtx jump1;
9347 if ((jump1 = prev_nonnote_insn (first_compare))
9348 && JUMP_P (jump1))
9349 return 0;
9352 /* Check all of the bivs to see if the compare uses one of them.
9353 Skip biv's set more than once because we can't guarantee that
9354 it will be zero on the last iteration. Also skip if the biv is
9355 used between its update and the test insn. */
9357 for (bl = ivs->list; bl; bl = bl->next)
9359 if (bl->biv_count == 1
9360 && ! bl->biv->maybe_multiple
9361 && bl->biv->dest_reg == XEXP (comparison, 0)
9362 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
9363 first_compare))
9364 break;
9367 /* Try swapping the comparison to identify a suitable biv. */
9368 if (!bl)
9369 for (bl = ivs->list; bl; bl = bl->next)
9370 if (bl->biv_count == 1
9371 && ! bl->biv->maybe_multiple
9372 && bl->biv->dest_reg == XEXP (comparison, 1)
9373 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
9374 first_compare))
9376 comparison = gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)),
9377 VOIDmode,
9378 XEXP (comparison, 1),
9379 XEXP (comparison, 0));
9380 break;
9383 if (! bl)
9384 return 0;
9386 /* Look for the case where the basic induction variable is always
9387 nonnegative, and equals zero on the last iteration.
9388 In this case, add a reg_note REG_NONNEG, which allows the
9389 m68k DBRA instruction to be used. */
9391 if (((GET_CODE (comparison) == GT && XEXP (comparison, 1) == constm1_rtx)
9392 || (GET_CODE (comparison) == NE && XEXP (comparison, 1) == const0_rtx))
9393 && GET_CODE (bl->biv->add_val) == CONST_INT
9394 && INTVAL (bl->biv->add_val) < 0)
9396 /* Initial value must be greater than 0,
9397 init_val % -dec_value == 0 to ensure that it equals zero on
9398 the last iteration */
9400 if (GET_CODE (bl->initial_value) == CONST_INT
9401 && INTVAL (bl->initial_value) > 0
9402 && (INTVAL (bl->initial_value)
9403 % (-INTVAL (bl->biv->add_val))) == 0)
9405 /* Register always nonnegative, add REG_NOTE to branch. */
9406 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
9407 REG_NOTES (jump)
9408 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
9409 REG_NOTES (jump));
9410 bl->nonneg = 1;
9412 return 1;
9415 /* If the decrement is 1 and the value was tested as >= 0 before
9416 the loop, then we can safely optimize. */
9417 for (p = loop_start; p; p = PREV_INSN (p))
9419 if (LABEL_P (p))
9420 break;
9421 if (!JUMP_P (p))
9422 continue;
9424 before_comparison = get_condition_for_loop (loop, p);
9425 if (before_comparison
9426 && XEXP (before_comparison, 0) == bl->biv->dest_reg
9427 && (GET_CODE (before_comparison) == LT
9428 || GET_CODE (before_comparison) == LTU)
9429 && XEXP (before_comparison, 1) == const0_rtx
9430 && ! reg_set_between_p (bl->biv->dest_reg, p, loop_start)
9431 && INTVAL (bl->biv->add_val) == -1)
9433 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
9434 REG_NOTES (jump)
9435 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
9436 REG_NOTES (jump));
9437 bl->nonneg = 1;
9439 return 1;
9443 else if (GET_CODE (bl->biv->add_val) == CONST_INT
9444 && INTVAL (bl->biv->add_val) > 0)
9446 /* Try to change inc to dec, so can apply above optimization. */
9447 /* Can do this if:
9448 all registers modified are induction variables or invariant,
9449 all memory references have non-overlapping addresses
9450 (obviously true if only one write)
9451 allow 2 insns for the compare/jump at the end of the loop. */
9452 /* Also, we must avoid any instructions which use both the reversed
9453 biv and another biv. Such instructions will fail if the loop is
9454 reversed. We meet this condition by requiring that either
9455 no_use_except_counting is true, or else that there is only
9456 one biv. */
9457 int num_nonfixed_reads = 0;
9458 /* 1 if the iteration var is used only to count iterations. */
9459 int no_use_except_counting = 0;
9460 /* 1 if the loop has no memory store, or it has a single memory store
9461 which is reversible. */
9462 int reversible_mem_store = 1;
9464 if (bl->giv_count == 0
9465 && !loop->exit_count
9466 && !loop_info->has_multiple_exit_targets)
9468 rtx bivreg = regno_reg_rtx[bl->regno];
9469 struct iv_class *blt;
9471 /* If there are no givs for this biv, and the only exit is the
9472 fall through at the end of the loop, then
9473 see if perhaps there are no uses except to count. */
9474 no_use_except_counting = 1;
9475 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
9476 if (INSN_P (p))
9478 rtx set = single_set (p);
9480 if (set && REG_P (SET_DEST (set))
9481 && REGNO (SET_DEST (set)) == bl->regno)
9482 /* An insn that sets the biv is okay. */
9484 else if (!reg_mentioned_p (bivreg, PATTERN (p)))
9485 /* An insn that doesn't mention the biv is okay. */
9487 else if (p == prev_nonnote_insn (prev_nonnote_insn (loop_end))
9488 || p == prev_nonnote_insn (loop_end))
9490 /* If either of these insns uses the biv and sets a pseudo
9491 that has more than one usage, then the biv has uses
9492 other than counting since it's used to derive a value
9493 that is used more than one time. */
9494 note_stores (PATTERN (p), note_set_pseudo_multiple_uses,
9495 regs);
9496 if (regs->multiple_uses)
9498 no_use_except_counting = 0;
9499 break;
9502 else
9504 no_use_except_counting = 0;
9505 break;
9509 /* A biv has uses besides counting if it is used to set
9510 another biv. */
9511 for (blt = ivs->list; blt; blt = blt->next)
9512 if (blt->init_set
9513 && reg_mentioned_p (bivreg, SET_SRC (blt->init_set)))
9515 no_use_except_counting = 0;
9516 break;
9520 if (no_use_except_counting)
9521 /* No need to worry about MEMs. */
9523 else if (loop_info->num_mem_sets <= 1)
9525 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
9526 if (INSN_P (p))
9527 num_nonfixed_reads += count_nonfixed_reads (loop, PATTERN (p));
9529 /* If the loop has a single store, and the destination address is
9530 invariant, then we can't reverse the loop, because this address
9531 might then have the wrong value at loop exit.
9532 This would work if the source was invariant also, however, in that
9533 case, the insn should have been moved out of the loop. */
9535 if (loop_info->num_mem_sets == 1)
9537 struct induction *v;
9539 /* If we could prove that each of the memory locations
9540 written to was different, then we could reverse the
9541 store -- but we don't presently have any way of
9542 knowing that. */
9543 reversible_mem_store = 0;
9545 /* If the store depends on a register that is set after the
9546 store, it depends on the initial value, and is thus not
9547 reversible. */
9548 for (v = bl->giv; reversible_mem_store && v; v = v->next_iv)
9550 if (v->giv_type == DEST_REG
9551 && reg_mentioned_p (v->dest_reg,
9552 PATTERN (loop_info->first_loop_store_insn))
9553 && loop_insn_first_p (loop_info->first_loop_store_insn,
9554 v->insn))
9555 reversible_mem_store = 0;
9559 else
9560 return 0;
9562 /* This code only acts for innermost loops. Also it simplifies
9563 the memory address check by only reversing loops with
9564 zero or one memory access.
9565 Two memory accesses could involve parts of the same array,
9566 and that can't be reversed.
9567 If the biv is used only for counting, than we don't need to worry
9568 about all these things. */
9570 if ((num_nonfixed_reads <= 1
9571 && ! loop_info->has_nonconst_call
9572 && ! loop_info->has_prefetch
9573 && ! loop_info->has_volatile
9574 && reversible_mem_store
9575 && (bl->giv_count + bl->biv_count + loop_info->num_mem_sets
9576 + num_unmoved_movables (loop) + compare_and_branch == insn_count)
9577 && (bl == ivs->list && bl->next == 0))
9578 || (no_use_except_counting && ! loop_info->has_prefetch))
9580 rtx tem;
9582 /* Loop can be reversed. */
9583 if (loop_dump_stream)
9584 fprintf (loop_dump_stream, "Can reverse loop\n");
9586 /* Now check other conditions:
9588 The increment must be a constant, as must the initial value,
9589 and the comparison code must be LT.
9591 This test can probably be improved since +/- 1 in the constant
9592 can be obtained by changing LT to LE and vice versa; this is
9593 confusing. */
9595 if (comparison
9596 /* for constants, LE gets turned into LT */
9597 && (GET_CODE (comparison) == LT
9598 || (GET_CODE (comparison) == LE
9599 && no_use_except_counting)
9600 || GET_CODE (comparison) == LTU))
9602 HOST_WIDE_INT add_val, add_adjust, comparison_val = 0;
9603 rtx initial_value, comparison_value;
9604 int nonneg = 0;
9605 enum rtx_code cmp_code;
9606 int comparison_const_width;
9607 unsigned HOST_WIDE_INT comparison_sign_mask;
9608 bool keep_first_compare;
9610 add_val = INTVAL (bl->biv->add_val);
9611 comparison_value = XEXP (comparison, 1);
9612 if (GET_MODE (comparison_value) == VOIDmode)
9613 comparison_const_width
9614 = GET_MODE_BITSIZE (GET_MODE (XEXP (comparison, 0)));
9615 else
9616 comparison_const_width
9617 = GET_MODE_BITSIZE (GET_MODE (comparison_value));
9618 if (comparison_const_width > HOST_BITS_PER_WIDE_INT)
9619 comparison_const_width = HOST_BITS_PER_WIDE_INT;
9620 comparison_sign_mask
9621 = (unsigned HOST_WIDE_INT) 1 << (comparison_const_width - 1);
9623 /* If the comparison value is not a loop invariant, then we
9624 can not reverse this loop.
9626 ??? If the insns which initialize the comparison value as
9627 a whole compute an invariant result, then we could move
9628 them out of the loop and proceed with loop reversal. */
9629 if (! loop_invariant_p (loop, comparison_value))
9630 return 0;
9632 if (GET_CODE (comparison_value) == CONST_INT)
9633 comparison_val = INTVAL (comparison_value);
9634 initial_value = bl->initial_value;
9636 /* Normalize the initial value if it is an integer and
9637 has no other use except as a counter. This will allow
9638 a few more loops to be reversed. */
9639 if (no_use_except_counting
9640 && GET_CODE (comparison_value) == CONST_INT
9641 && GET_CODE (initial_value) == CONST_INT)
9643 comparison_val = comparison_val - INTVAL (bl->initial_value);
9644 /* The code below requires comparison_val to be a multiple
9645 of add_val in order to do the loop reversal, so
9646 round up comparison_val to a multiple of add_val.
9647 Since comparison_value is constant, we know that the
9648 current comparison code is LT. */
9649 comparison_val = comparison_val + add_val - 1;
9650 comparison_val
9651 -= (unsigned HOST_WIDE_INT) comparison_val % add_val;
9652 /* We postpone overflow checks for COMPARISON_VAL here;
9653 even if there is an overflow, we might still be able to
9654 reverse the loop, if converting the loop exit test to
9655 NE is possible. */
9656 initial_value = const0_rtx;
9659 /* First check if we can do a vanilla loop reversal. */
9660 if (initial_value == const0_rtx
9661 && GET_CODE (comparison_value) == CONST_INT
9662 /* Now do postponed overflow checks on COMPARISON_VAL. */
9663 && ! (((comparison_val - add_val) ^ INTVAL (comparison_value))
9664 & comparison_sign_mask))
9666 /* Register will always be nonnegative, with value
9667 0 on last iteration */
9668 add_adjust = add_val;
9669 nonneg = 1;
9670 cmp_code = GE;
9672 else
9673 return 0;
9675 if (GET_CODE (comparison) == LE)
9676 add_adjust -= add_val;
9678 /* If the initial value is not zero, or if the comparison
9679 value is not an exact multiple of the increment, then we
9680 can not reverse this loop. */
9681 if (initial_value == const0_rtx
9682 && GET_CODE (comparison_value) == CONST_INT)
9684 if (((unsigned HOST_WIDE_INT) comparison_val % add_val) != 0)
9685 return 0;
9687 else
9689 if (! no_use_except_counting || add_val != 1)
9690 return 0;
9693 final_value = comparison_value;
9695 /* Reset these in case we normalized the initial value
9696 and comparison value above. */
9697 if (GET_CODE (comparison_value) == CONST_INT
9698 && GET_CODE (initial_value) == CONST_INT)
9700 comparison_value = GEN_INT (comparison_val);
9701 final_value
9702 = GEN_INT (comparison_val + INTVAL (bl->initial_value));
9704 bl->initial_value = initial_value;
9706 /* Save some info needed to produce the new insns. */
9707 reg = bl->biv->dest_reg;
9708 mode = GET_MODE (reg);
9709 jump_label = condjump_label (PREV_INSN (loop_end));
9710 new_add_val = GEN_INT (-INTVAL (bl->biv->add_val));
9712 /* Set start_value; if this is not a CONST_INT, we need
9713 to generate a SUB.
9714 Initialize biv to start_value before loop start.
9715 The old initializing insn will be deleted as a
9716 dead store by flow.c. */
9717 if (initial_value == const0_rtx
9718 && GET_CODE (comparison_value) == CONST_INT)
9720 start_value
9721 = gen_int_mode (comparison_val - add_adjust, mode);
9722 loop_insn_hoist (loop, gen_move_insn (reg, start_value));
9724 else if (GET_CODE (initial_value) == CONST_INT)
9726 rtx offset = GEN_INT (-INTVAL (initial_value) - add_adjust);
9727 rtx add_insn = gen_add3_insn (reg, comparison_value, offset);
9729 if (add_insn == 0)
9730 return 0;
9732 start_value
9733 = gen_rtx_PLUS (mode, comparison_value, offset);
9734 loop_insn_hoist (loop, add_insn);
9735 if (GET_CODE (comparison) == LE)
9736 final_value = gen_rtx_PLUS (mode, comparison_value,
9737 GEN_INT (add_val));
9739 else if (! add_adjust)
9741 rtx sub_insn = gen_sub3_insn (reg, comparison_value,
9742 initial_value);
9744 if (sub_insn == 0)
9745 return 0;
9746 start_value
9747 = gen_rtx_MINUS (mode, comparison_value, initial_value);
9748 loop_insn_hoist (loop, sub_insn);
9750 else
9751 /* We could handle the other cases too, but it'll be
9752 better to have a testcase first. */
9753 return 0;
9755 /* We may not have a single insn which can increment a reg, so
9756 create a sequence to hold all the insns from expand_inc. */
9757 start_sequence ();
9758 expand_inc (reg, new_add_val);
9759 tem = get_insns ();
9760 end_sequence ();
9762 p = loop_insn_emit_before (loop, 0, bl->biv->insn, tem);
9763 delete_insn (bl->biv->insn);
9765 /* Update biv info to reflect its new status. */
9766 bl->biv->insn = p;
9767 bl->initial_value = start_value;
9768 bl->biv->add_val = new_add_val;
9770 /* Update loop info. */
9771 loop_info->initial_value = reg;
9772 loop_info->initial_equiv_value = reg;
9773 loop_info->final_value = const0_rtx;
9774 loop_info->final_equiv_value = const0_rtx;
9775 loop_info->comparison_value = const0_rtx;
9776 loop_info->comparison_code = cmp_code;
9777 loop_info->increment = new_add_val;
9779 /* Inc LABEL_NUSES so that delete_insn will
9780 not delete the label. */
9781 LABEL_NUSES (XEXP (jump_label, 0))++;
9783 /* If we have a separate comparison insn that does more
9784 than just set cc0, the result of the comparison might
9785 be used outside the loop. */
9786 keep_first_compare = (compare_and_branch == 2
9787 #ifdef HAVE_CC0
9788 && sets_cc0_p (first_compare) <= 0
9789 #endif
9792 /* Emit an insn after the end of the loop to set the biv's
9793 proper exit value if it is used anywhere outside the loop. */
9794 if (keep_first_compare
9795 || (REGNO_LAST_UID (bl->regno) != INSN_UID (first_compare))
9796 || ! bl->init_insn
9797 || REGNO_FIRST_UID (bl->regno) != INSN_UID (bl->init_insn))
9798 loop_insn_sink (loop, gen_load_of_final_value (reg, final_value));
9800 if (keep_first_compare)
9801 loop_insn_sink (loop, PATTERN (first_compare));
9803 /* Delete compare/branch at end of loop. */
9804 delete_related_insns (PREV_INSN (loop_end));
9805 if (compare_and_branch == 2)
9806 delete_related_insns (first_compare);
9808 /* Add new compare/branch insn at end of loop. */
9809 start_sequence ();
9810 emit_cmp_and_jump_insns (reg, const0_rtx, cmp_code, NULL_RTX,
9811 mode, 0,
9812 XEXP (jump_label, 0));
9813 tem = get_insns ();
9814 end_sequence ();
9815 emit_jump_insn_before (tem, loop_end);
9817 for (tem = PREV_INSN (loop_end);
9818 tem && !JUMP_P (tem);
9819 tem = PREV_INSN (tem))
9822 if (tem)
9823 JUMP_LABEL (tem) = XEXP (jump_label, 0);
9825 if (nonneg)
9827 if (tem)
9829 /* Increment of LABEL_NUSES done above. */
9830 /* Register is now always nonnegative,
9831 so add REG_NONNEG note to the branch. */
9832 REG_NOTES (tem) = gen_rtx_EXPR_LIST (REG_NONNEG, reg,
9833 REG_NOTES (tem));
9835 bl->nonneg = 1;
9838 /* No insn may reference both the reversed and another biv or it
9839 will fail (see comment near the top of the loop reversal
9840 code).
9841 Earlier on, we have verified that the biv has no use except
9842 counting, or it is the only biv in this function.
9843 However, the code that computes no_use_except_counting does
9844 not verify reg notes. It's possible to have an insn that
9845 references another biv, and has a REG_EQUAL note with an
9846 expression based on the reversed biv. To avoid this case,
9847 remove all REG_EQUAL notes based on the reversed biv
9848 here. */
9849 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
9850 if (INSN_P (p))
9852 rtx *pnote;
9853 rtx set = single_set (p);
9854 /* If this is a set of a GIV based on the reversed biv, any
9855 REG_EQUAL notes should still be correct. */
9856 if (! set
9857 || !REG_P (SET_DEST (set))
9858 || (size_t) REGNO (SET_DEST (set)) >= ivs->n_regs
9859 || REG_IV_TYPE (ivs, REGNO (SET_DEST (set))) != GENERAL_INDUCT
9860 || REG_IV_INFO (ivs, REGNO (SET_DEST (set)))->src_reg != bl->biv->src_reg)
9861 for (pnote = &REG_NOTES (p); *pnote;)
9863 if (REG_NOTE_KIND (*pnote) == REG_EQUAL
9864 && reg_mentioned_p (regno_reg_rtx[bl->regno],
9865 XEXP (*pnote, 0)))
9866 *pnote = XEXP (*pnote, 1);
9867 else
9868 pnote = &XEXP (*pnote, 1);
9872 /* Mark that this biv has been reversed. Each giv which depends
9873 on this biv, and which is also live past the end of the loop
9874 will have to be fixed up. */
9876 bl->reversed = 1;
9878 if (loop_dump_stream)
9880 fprintf (loop_dump_stream, "Reversed loop");
9881 if (bl->nonneg)
9882 fprintf (loop_dump_stream, " and added reg_nonneg\n");
9883 else
9884 fprintf (loop_dump_stream, "\n");
9887 return 1;
9892 return 0;
9895 /* Verify whether the biv BL appears to be eliminable,
9896 based on the insns in the loop that refer to it.
9898 If ELIMINATE_P is nonzero, actually do the elimination.
9900 THRESHOLD and INSN_COUNT are from loop_optimize and are used to
9901 determine whether invariant insns should be placed inside or at the
9902 start of the loop. */
9904 static int
9905 maybe_eliminate_biv (const struct loop *loop, struct iv_class *bl,
9906 int eliminate_p, int threshold, int insn_count)
9908 struct loop_ivs *ivs = LOOP_IVS (loop);
9909 rtx reg = bl->biv->dest_reg;
9910 rtx p;
9912 /* Scan all insns in the loop, stopping if we find one that uses the
9913 biv in a way that we cannot eliminate. */
9915 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
9917 enum rtx_code code = GET_CODE (p);
9918 basic_block where_bb = 0;
9919 rtx where_insn = threshold >= insn_count ? 0 : p;
9920 rtx note;
9922 /* If this is a libcall that sets a giv, skip ahead to its end. */
9923 if (INSN_P (p))
9925 note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
9927 if (note)
9929 rtx last = XEXP (note, 0);
9930 rtx set = single_set (last);
9932 if (set && REG_P (SET_DEST (set)))
9934 unsigned int regno = REGNO (SET_DEST (set));
9936 if (regno < ivs->n_regs
9937 && REG_IV_TYPE (ivs, regno) == GENERAL_INDUCT
9938 && REG_IV_INFO (ivs, regno)->src_reg == bl->biv->src_reg)
9939 p = last;
9944 /* Closely examine the insn if the biv is mentioned. */
9945 if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
9946 && reg_mentioned_p (reg, PATTERN (p))
9947 && ! maybe_eliminate_biv_1 (loop, PATTERN (p), p, bl,
9948 eliminate_p, where_bb, where_insn))
9950 if (loop_dump_stream)
9951 fprintf (loop_dump_stream,
9952 "Cannot eliminate biv %d: biv used in insn %d.\n",
9953 bl->regno, INSN_UID (p));
9954 break;
9957 /* If we are eliminating, kill REG_EQUAL notes mentioning the biv. */
9958 if (eliminate_p
9959 && (note = find_reg_note (p, REG_EQUAL, NULL_RTX)) != NULL_RTX
9960 && reg_mentioned_p (reg, XEXP (note, 0)))
9961 remove_note (p, note);
9964 if (p == loop->end)
9966 if (loop_dump_stream)
9967 fprintf (loop_dump_stream, "biv %d %s eliminated.\n",
9968 bl->regno, eliminate_p ? "was" : "can be");
9969 return 1;
9972 return 0;
9975 /* INSN and REFERENCE are instructions in the same insn chain.
9976 Return nonzero if INSN is first. */
9978 static int
9979 loop_insn_first_p (rtx insn, rtx reference)
9981 rtx p, q;
9983 for (p = insn, q = reference;;)
9985 /* Start with test for not first so that INSN == REFERENCE yields not
9986 first. */
9987 if (q == insn || ! p)
9988 return 0;
9989 if (p == reference || ! q)
9990 return 1;
9992 /* Either of P or Q might be a NOTE. Notes have the same LUID as the
9993 previous insn, hence the <= comparison below does not work if
9994 P is a note. */
9995 if (INSN_UID (p) < max_uid_for_loop
9996 && INSN_UID (q) < max_uid_for_loop
9997 && !NOTE_P (p))
9998 return INSN_LUID (p) <= INSN_LUID (q);
10000 if (INSN_UID (p) >= max_uid_for_loop
10001 || NOTE_P (p))
10002 p = NEXT_INSN (p);
10003 if (INSN_UID (q) >= max_uid_for_loop)
10004 q = NEXT_INSN (q);
10008 /* We are trying to eliminate BIV in INSN using GIV. Return nonzero if
10009 the offset that we have to take into account due to auto-increment /
10010 div derivation is zero. */
10011 static int
10012 biv_elimination_giv_has_0_offset (struct induction *biv,
10013 struct induction *giv, rtx insn)
10015 /* If the giv V had the auto-inc address optimization applied
10016 to it, and INSN occurs between the giv insn and the biv
10017 insn, then we'd have to adjust the value used here.
10018 This is rare, so we don't bother to make this possible. */
10019 if (giv->auto_inc_opt
10020 && ((loop_insn_first_p (giv->insn, insn)
10021 && loop_insn_first_p (insn, biv->insn))
10022 || (loop_insn_first_p (biv->insn, insn)
10023 && loop_insn_first_p (insn, giv->insn))))
10024 return 0;
10026 return 1;
10029 /* If BL appears in X (part of the pattern of INSN), see if we can
10030 eliminate its use. If so, return 1. If not, return 0.
10032 If BIV does not appear in X, return 1.
10034 If ELIMINATE_P is nonzero, actually do the elimination.
10035 WHERE_INSN/WHERE_BB indicate where extra insns should be added.
10036 Depending on how many items have been moved out of the loop, it
10037 will either be before INSN (when WHERE_INSN is nonzero) or at the
10038 start of the loop (when WHERE_INSN is zero). */
10040 static int
10041 maybe_eliminate_biv_1 (const struct loop *loop, rtx x, rtx insn,
10042 struct iv_class *bl, int eliminate_p,
10043 basic_block where_bb, rtx where_insn)
10045 enum rtx_code code = GET_CODE (x);
10046 rtx reg = bl->biv->dest_reg;
10047 enum machine_mode mode = GET_MODE (reg);
10048 struct induction *v;
10049 rtx arg, tem;
10050 #ifdef HAVE_cc0
10051 rtx new;
10052 #endif
10053 int arg_operand;
10054 const char *fmt;
10055 int i, j;
10057 switch (code)
10059 case REG:
10060 /* If we haven't already been able to do something with this BIV,
10061 we can't eliminate it. */
10062 if (x == reg)
10063 return 0;
10064 return 1;
10066 case SET:
10067 /* If this sets the BIV, it is not a problem. */
10068 if (SET_DEST (x) == reg)
10069 return 1;
10071 /* If this is an insn that defines a giv, it is also ok because
10072 it will go away when the giv is reduced. */
10073 for (v = bl->giv; v; v = v->next_iv)
10074 if (v->giv_type == DEST_REG && SET_DEST (x) == v->dest_reg)
10075 return 1;
10077 #ifdef HAVE_cc0
10078 if (SET_DEST (x) == cc0_rtx && SET_SRC (x) == reg)
10080 /* Can replace with any giv that was reduced and
10081 that has (MULT_VAL != 0) and (ADD_VAL == 0).
10082 Require a constant for MULT_VAL, so we know it's nonzero.
10083 ??? We disable this optimization to avoid potential
10084 overflows. */
10086 for (v = bl->giv; v; v = v->next_iv)
10087 if (GET_CODE (v->mult_val) == CONST_INT && v->mult_val != const0_rtx
10088 && v->add_val == const0_rtx
10089 && ! v->ignore && ! v->maybe_dead && v->always_computable
10090 && v->mode == mode
10091 && 0)
10093 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10094 continue;
10096 if (! eliminate_p)
10097 return 1;
10099 /* If the giv has the opposite direction of change,
10100 then reverse the comparison. */
10101 if (INTVAL (v->mult_val) < 0)
10102 new = gen_rtx_COMPARE (GET_MODE (v->new_reg),
10103 const0_rtx, v->new_reg);
10104 else
10105 new = v->new_reg;
10107 /* We can probably test that giv's reduced reg. */
10108 if (validate_change (insn, &SET_SRC (x), new, 0))
10109 return 1;
10112 /* Look for a giv with (MULT_VAL != 0) and (ADD_VAL != 0);
10113 replace test insn with a compare insn (cmp REDUCED_GIV ADD_VAL).
10114 Require a constant for MULT_VAL, so we know it's nonzero.
10115 ??? Do this only if ADD_VAL is a pointer to avoid a potential
10116 overflow problem. */
10118 for (v = bl->giv; v; v = v->next_iv)
10119 if (GET_CODE (v->mult_val) == CONST_INT
10120 && v->mult_val != const0_rtx
10121 && ! v->ignore && ! v->maybe_dead && v->always_computable
10122 && v->mode == mode
10123 && (GET_CODE (v->add_val) == SYMBOL_REF
10124 || GET_CODE (v->add_val) == LABEL_REF
10125 || GET_CODE (v->add_val) == CONST
10126 || (REG_P (v->add_val)
10127 && REG_POINTER (v->add_val))))
10129 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10130 continue;
10132 if (! eliminate_p)
10133 return 1;
10135 /* If the giv has the opposite direction of change,
10136 then reverse the comparison. */
10137 if (INTVAL (v->mult_val) < 0)
10138 new = gen_rtx_COMPARE (VOIDmode, copy_rtx (v->add_val),
10139 v->new_reg);
10140 else
10141 new = gen_rtx_COMPARE (VOIDmode, v->new_reg,
10142 copy_rtx (v->add_val));
10144 /* Replace biv with the giv's reduced register. */
10145 update_reg_last_use (v->add_val, insn);
10146 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
10147 return 1;
10149 /* Insn doesn't support that constant or invariant. Copy it
10150 into a register (it will be a loop invariant.) */
10151 tem = gen_reg_rtx (GET_MODE (v->new_reg));
10153 loop_insn_emit_before (loop, 0, where_insn,
10154 gen_move_insn (tem,
10155 copy_rtx (v->add_val)));
10157 /* Substitute the new register for its invariant value in
10158 the compare expression. */
10159 XEXP (new, (INTVAL (v->mult_val) < 0) ? 0 : 1) = tem;
10160 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
10161 return 1;
10164 #endif
10165 break;
10167 case COMPARE:
10168 case EQ: case NE:
10169 case GT: case GE: case GTU: case GEU:
10170 case LT: case LE: case LTU: case LEU:
10171 /* See if either argument is the biv. */
10172 if (XEXP (x, 0) == reg)
10173 arg = XEXP (x, 1), arg_operand = 1;
10174 else if (XEXP (x, 1) == reg)
10175 arg = XEXP (x, 0), arg_operand = 0;
10176 else
10177 break;
10179 if (CONSTANT_P (arg))
10181 /* First try to replace with any giv that has constant positive
10182 mult_val and constant add_val. We might be able to support
10183 negative mult_val, but it seems complex to do it in general. */
10185 for (v = bl->giv; v; v = v->next_iv)
10186 if (GET_CODE (v->mult_val) == CONST_INT
10187 && INTVAL (v->mult_val) > 0
10188 && (GET_CODE (v->add_val) == SYMBOL_REF
10189 || GET_CODE (v->add_val) == LABEL_REF
10190 || GET_CODE (v->add_val) == CONST
10191 || (REG_P (v->add_val)
10192 && REG_POINTER (v->add_val)))
10193 && ! v->ignore && ! v->maybe_dead && v->always_computable
10194 && v->mode == mode)
10196 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10197 continue;
10199 /* Don't eliminate if the linear combination that makes up
10200 the giv overflows when it is applied to ARG. */
10201 if (GET_CODE (arg) == CONST_INT)
10203 rtx add_val;
10205 if (GET_CODE (v->add_val) == CONST_INT)
10206 add_val = v->add_val;
10207 else
10208 add_val = const0_rtx;
10210 if (const_mult_add_overflow_p (arg, v->mult_val,
10211 add_val, mode, 1))
10212 continue;
10215 if (! eliminate_p)
10216 return 1;
10218 /* Replace biv with the giv's reduced reg. */
10219 validate_change (insn, &XEXP (x, 1 - arg_operand), v->new_reg, 1);
10221 /* If all constants are actually constant integers and
10222 the derived constant can be directly placed in the COMPARE,
10223 do so. */
10224 if (GET_CODE (arg) == CONST_INT
10225 && GET_CODE (v->add_val) == CONST_INT)
10227 tem = expand_mult_add (arg, NULL_RTX, v->mult_val,
10228 v->add_val, mode, 1);
10230 else
10232 /* Otherwise, load it into a register. */
10233 tem = gen_reg_rtx (mode);
10234 loop_iv_add_mult_emit_before (loop, arg,
10235 v->mult_val, v->add_val,
10236 tem, where_bb, where_insn);
10239 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
10241 if (apply_change_group ())
10242 return 1;
10245 /* Look for giv with positive constant mult_val and nonconst add_val.
10246 Insert insns to calculate new compare value.
10247 ??? Turn this off due to possible overflow. */
10249 for (v = bl->giv; v; v = v->next_iv)
10250 if (GET_CODE (v->mult_val) == CONST_INT
10251 && INTVAL (v->mult_val) > 0
10252 && ! v->ignore && ! v->maybe_dead && v->always_computable
10253 && v->mode == mode
10254 && 0)
10256 rtx tem;
10258 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10259 continue;
10261 if (! eliminate_p)
10262 return 1;
10264 tem = gen_reg_rtx (mode);
10266 /* Replace biv with giv's reduced register. */
10267 validate_change (insn, &XEXP (x, 1 - arg_operand),
10268 v->new_reg, 1);
10270 /* Compute value to compare against. */
10271 loop_iv_add_mult_emit_before (loop, arg,
10272 v->mult_val, v->add_val,
10273 tem, where_bb, where_insn);
10274 /* Use it in this insn. */
10275 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
10276 if (apply_change_group ())
10277 return 1;
10280 else if (REG_P (arg) || MEM_P (arg))
10282 if (loop_invariant_p (loop, arg) == 1)
10284 /* Look for giv with constant positive mult_val and nonconst
10285 add_val. Insert insns to compute new compare value.
10286 ??? Turn this off due to possible overflow. */
10288 for (v = bl->giv; v; v = v->next_iv)
10289 if (GET_CODE (v->mult_val) == CONST_INT && INTVAL (v->mult_val) > 0
10290 && ! v->ignore && ! v->maybe_dead && v->always_computable
10291 && v->mode == mode
10292 && 0)
10294 rtx tem;
10296 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10297 continue;
10299 if (! eliminate_p)
10300 return 1;
10302 tem = gen_reg_rtx (mode);
10304 /* Replace biv with giv's reduced register. */
10305 validate_change (insn, &XEXP (x, 1 - arg_operand),
10306 v->new_reg, 1);
10308 /* Compute value to compare against. */
10309 loop_iv_add_mult_emit_before (loop, arg,
10310 v->mult_val, v->add_val,
10311 tem, where_bb, where_insn);
10312 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
10313 if (apply_change_group ())
10314 return 1;
10318 /* This code has problems. Basically, you can't know when
10319 seeing if we will eliminate BL, whether a particular giv
10320 of ARG will be reduced. If it isn't going to be reduced,
10321 we can't eliminate BL. We can try forcing it to be reduced,
10322 but that can generate poor code.
10324 The problem is that the benefit of reducing TV, below should
10325 be increased if BL can actually be eliminated, but this means
10326 we might have to do a topological sort of the order in which
10327 we try to process biv. It doesn't seem worthwhile to do
10328 this sort of thing now. */
10330 #if 0
10331 /* Otherwise the reg compared with had better be a biv. */
10332 if (!REG_P (arg)
10333 || REG_IV_TYPE (ivs, REGNO (arg)) != BASIC_INDUCT)
10334 return 0;
10336 /* Look for a pair of givs, one for each biv,
10337 with identical coefficients. */
10338 for (v = bl->giv; v; v = v->next_iv)
10340 struct induction *tv;
10342 if (v->ignore || v->maybe_dead || v->mode != mode)
10343 continue;
10345 for (tv = REG_IV_CLASS (ivs, REGNO (arg))->giv; tv;
10346 tv = tv->next_iv)
10347 if (! tv->ignore && ! tv->maybe_dead
10348 && rtx_equal_p (tv->mult_val, v->mult_val)
10349 && rtx_equal_p (tv->add_val, v->add_val)
10350 && tv->mode == mode)
10352 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10353 continue;
10355 if (! eliminate_p)
10356 return 1;
10358 /* Replace biv with its giv's reduced reg. */
10359 XEXP (x, 1 - arg_operand) = v->new_reg;
10360 /* Replace other operand with the other giv's
10361 reduced reg. */
10362 XEXP (x, arg_operand) = tv->new_reg;
10363 return 1;
10366 #endif
10369 /* If we get here, the biv can't be eliminated. */
10370 return 0;
10372 case MEM:
10373 /* If this address is a DEST_ADDR giv, it doesn't matter if the
10374 biv is used in it, since it will be replaced. */
10375 for (v = bl->giv; v; v = v->next_iv)
10376 if (v->giv_type == DEST_ADDR && v->location == &XEXP (x, 0))
10377 return 1;
10378 break;
10380 default:
10381 break;
10384 /* See if any subexpression fails elimination. */
10385 fmt = GET_RTX_FORMAT (code);
10386 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
10388 switch (fmt[i])
10390 case 'e':
10391 if (! maybe_eliminate_biv_1 (loop, XEXP (x, i), insn, bl,
10392 eliminate_p, where_bb, where_insn))
10393 return 0;
10394 break;
10396 case 'E':
10397 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10398 if (! maybe_eliminate_biv_1 (loop, XVECEXP (x, i, j), insn, bl,
10399 eliminate_p, where_bb, where_insn))
10400 return 0;
10401 break;
10405 return 1;
10408 /* Return nonzero if the last use of REG
10409 is in an insn following INSN in the same basic block. */
10411 static int
10412 last_use_this_basic_block (rtx reg, rtx insn)
10414 rtx n;
10415 for (n = insn;
10416 n && !LABEL_P (n) && !JUMP_P (n);
10417 n = NEXT_INSN (n))
10419 if (REGNO_LAST_UID (REGNO (reg)) == INSN_UID (n))
10420 return 1;
10422 return 0;
10425 /* Called via `note_stores' to record the initial value of a biv. Here we
10426 just record the location of the set and process it later. */
10428 static void
10429 record_initial (rtx dest, rtx set, void *data ATTRIBUTE_UNUSED)
10431 struct loop_ivs *ivs = (struct loop_ivs *) data;
10432 struct iv_class *bl;
10434 if (!REG_P (dest)
10435 || REGNO (dest) >= ivs->n_regs
10436 || REG_IV_TYPE (ivs, REGNO (dest)) != BASIC_INDUCT)
10437 return;
10439 bl = REG_IV_CLASS (ivs, REGNO (dest));
10441 /* If this is the first set found, record it. */
10442 if (bl->init_insn == 0)
10444 bl->init_insn = note_insn;
10445 bl->init_set = set;
10449 /* If any of the registers in X are "old" and currently have a last use earlier
10450 than INSN, update them to have a last use of INSN. Their actual last use
10451 will be the previous insn but it will not have a valid uid_luid so we can't
10452 use it. X must be a source expression only. */
10454 static void
10455 update_reg_last_use (rtx x, rtx insn)
10457 /* Check for the case where INSN does not have a valid luid. In this case,
10458 there is no need to modify the regno_last_uid, as this can only happen
10459 when code is inserted after the loop_end to set a pseudo's final value,
10460 and hence this insn will never be the last use of x.
10461 ???? This comment is not correct. See for example loop_givs_reduce.
10462 This may insert an insn before another new insn. */
10463 if (REG_P (x) && REGNO (x) < max_reg_before_loop
10464 && INSN_UID (insn) < max_uid_for_loop
10465 && REGNO_LAST_LUID (REGNO (x)) < INSN_LUID (insn))
10467 REGNO_LAST_UID (REGNO (x)) = INSN_UID (insn);
10469 else
10471 int i, j;
10472 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
10473 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10475 if (fmt[i] == 'e')
10476 update_reg_last_use (XEXP (x, i), insn);
10477 else if (fmt[i] == 'E')
10478 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10479 update_reg_last_use (XVECEXP (x, i, j), insn);
10484 /* Similar to rtlanal.c:get_condition, except that we also put an
10485 invariant last unless both operands are invariants. */
10487 static rtx
10488 get_condition_for_loop (const struct loop *loop, rtx x)
10490 rtx comparison = get_condition (x, (rtx*) 0, false, true);
10492 if (comparison == 0
10493 || ! loop_invariant_p (loop, XEXP (comparison, 0))
10494 || loop_invariant_p (loop, XEXP (comparison, 1)))
10495 return comparison;
10497 return gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)), VOIDmode,
10498 XEXP (comparison, 1), XEXP (comparison, 0));
10501 /* Scan the function and determine whether it has indirect (computed) jumps.
10503 This is taken mostly from flow.c; similar code exists elsewhere
10504 in the compiler. It may be useful to put this into rtlanal.c. */
10505 static int
10506 indirect_jump_in_function_p (rtx start)
10508 rtx insn;
10510 for (insn = start; insn; insn = NEXT_INSN (insn))
10511 if (computed_jump_p (insn))
10512 return 1;
10514 return 0;
10517 /* Add MEM to the LOOP_MEMS array, if appropriate. See the
10518 documentation for LOOP_MEMS for the definition of `appropriate'.
10519 This function is called from prescan_loop via for_each_rtx. */
10521 static int
10522 insert_loop_mem (rtx *mem, void *data ATTRIBUTE_UNUSED)
10524 struct loop_info *loop_info = data;
10525 int i;
10526 rtx m = *mem;
10528 if (m == NULL_RTX)
10529 return 0;
10531 switch (GET_CODE (m))
10533 case MEM:
10534 break;
10536 case CLOBBER:
10537 /* We're not interested in MEMs that are only clobbered. */
10538 return -1;
10540 case CONST_DOUBLE:
10541 /* We're not interested in the MEM associated with a
10542 CONST_DOUBLE, so there's no need to traverse into this. */
10543 return -1;
10545 case EXPR_LIST:
10546 /* We're not interested in any MEMs that only appear in notes. */
10547 return -1;
10549 default:
10550 /* This is not a MEM. */
10551 return 0;
10554 /* See if we've already seen this MEM. */
10555 for (i = 0; i < loop_info->mems_idx; ++i)
10556 if (rtx_equal_p (m, loop_info->mems[i].mem))
10558 if (MEM_VOLATILE_P (m) && !MEM_VOLATILE_P (loop_info->mems[i].mem))
10559 loop_info->mems[i].mem = m;
10560 if (GET_MODE (m) != GET_MODE (loop_info->mems[i].mem))
10561 /* The modes of the two memory accesses are different. If
10562 this happens, something tricky is going on, and we just
10563 don't optimize accesses to this MEM. */
10564 loop_info->mems[i].optimize = 0;
10566 return 0;
10569 /* Resize the array, if necessary. */
10570 if (loop_info->mems_idx == loop_info->mems_allocated)
10572 if (loop_info->mems_allocated != 0)
10573 loop_info->mems_allocated *= 2;
10574 else
10575 loop_info->mems_allocated = 32;
10577 loop_info->mems = xrealloc (loop_info->mems,
10578 loop_info->mems_allocated * sizeof (loop_mem_info));
10581 /* Actually insert the MEM. */
10582 loop_info->mems[loop_info->mems_idx].mem = m;
10583 /* We can't hoist this MEM out of the loop if it's a BLKmode MEM
10584 because we can't put it in a register. We still store it in the
10585 table, though, so that if we see the same address later, but in a
10586 non-BLK mode, we'll not think we can optimize it at that point. */
10587 loop_info->mems[loop_info->mems_idx].optimize = (GET_MODE (m) != BLKmode);
10588 loop_info->mems[loop_info->mems_idx].reg = NULL_RTX;
10589 ++loop_info->mems_idx;
10591 return 0;
10595 /* Allocate REGS->ARRAY or reallocate it if it is too small.
10597 Increment REGS->ARRAY[I].SET_IN_LOOP at the index I of each
10598 register that is modified by an insn between FROM and TO. If the
10599 value of an element of REGS->array[I].SET_IN_LOOP becomes 127 or
10600 more, stop incrementing it, to avoid overflow.
10602 Store in REGS->ARRAY[I].SINGLE_USAGE the single insn in which
10603 register I is used, if it is only used once. Otherwise, it is set
10604 to 0 (for no uses) or const0_rtx for more than one use. This
10605 parameter may be zero, in which case this processing is not done.
10607 Set REGS->ARRAY[I].MAY_NOT_OPTIMIZE nonzero if we should not
10608 optimize register I. */
10610 static void
10611 loop_regs_scan (const struct loop *loop, int extra_size)
10613 struct loop_regs *regs = LOOP_REGS (loop);
10614 int old_nregs;
10615 /* last_set[n] is nonzero iff reg n has been set in the current
10616 basic block. In that case, it is the insn that last set reg n. */
10617 rtx *last_set;
10618 rtx insn;
10619 int i;
10621 old_nregs = regs->num;
10622 regs->num = max_reg_num ();
10624 /* Grow the regs array if not allocated or too small. */
10625 if (regs->num >= regs->size)
10627 regs->size = regs->num + extra_size;
10629 regs->array = xrealloc (regs->array, regs->size * sizeof (*regs->array));
10631 /* Zero the new elements. */
10632 memset (regs->array + old_nregs, 0,
10633 (regs->size - old_nregs) * sizeof (*regs->array));
10636 /* Clear previously scanned fields but do not clear n_times_set. */
10637 for (i = 0; i < old_nregs; i++)
10639 regs->array[i].set_in_loop = 0;
10640 regs->array[i].may_not_optimize = 0;
10641 regs->array[i].single_usage = NULL_RTX;
10644 last_set = xcalloc (regs->num, sizeof (rtx));
10646 /* Scan the loop, recording register usage. */
10647 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
10648 insn = NEXT_INSN (insn))
10650 if (INSN_P (insn))
10652 /* Record registers that have exactly one use. */
10653 find_single_use_in_loop (regs, insn, PATTERN (insn));
10655 /* Include uses in REG_EQUAL notes. */
10656 if (REG_NOTES (insn))
10657 find_single_use_in_loop (regs, insn, REG_NOTES (insn));
10659 if (GET_CODE (PATTERN (insn)) == SET
10660 || GET_CODE (PATTERN (insn)) == CLOBBER)
10661 count_one_set (regs, insn, PATTERN (insn), last_set);
10662 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
10664 int i;
10665 for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
10666 count_one_set (regs, insn, XVECEXP (PATTERN (insn), 0, i),
10667 last_set);
10671 if (LABEL_P (insn) || JUMP_P (insn))
10672 memset (last_set, 0, regs->num * sizeof (rtx));
10674 /* Invalidate all registers used for function argument passing.
10675 We check rtx_varies_p for the same reason as below, to allow
10676 optimizing PIC calculations. */
10677 if (CALL_P (insn))
10679 rtx link;
10680 for (link = CALL_INSN_FUNCTION_USAGE (insn);
10681 link;
10682 link = XEXP (link, 1))
10684 rtx op, reg;
10686 if (GET_CODE (op = XEXP (link, 0)) == USE
10687 && REG_P (reg = XEXP (op, 0))
10688 && rtx_varies_p (reg, 1))
10689 regs->array[REGNO (reg)].may_not_optimize = 1;
10694 /* Invalidate all hard registers clobbered by calls. With one exception:
10695 a call-clobbered PIC register is still function-invariant for our
10696 purposes, since we can hoist any PIC calculations out of the loop.
10697 Thus the call to rtx_varies_p. */
10698 if (LOOP_INFO (loop)->has_call)
10699 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
10700 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
10701 && rtx_varies_p (regno_reg_rtx[i], 1))
10703 regs->array[i].may_not_optimize = 1;
10704 regs->array[i].set_in_loop = 1;
10707 #ifdef AVOID_CCMODE_COPIES
10708 /* Don't try to move insns which set CC registers if we should not
10709 create CCmode register copies. */
10710 for (i = regs->num - 1; i >= FIRST_PSEUDO_REGISTER; i--)
10711 if (GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
10712 regs->array[i].may_not_optimize = 1;
10713 #endif
10715 /* Set regs->array[I].n_times_set for the new registers. */
10716 for (i = old_nregs; i < regs->num; i++)
10717 regs->array[i].n_times_set = regs->array[i].set_in_loop;
10719 free (last_set);
10722 /* Returns the number of real INSNs in the LOOP. */
10724 static int
10725 count_insns_in_loop (const struct loop *loop)
10727 int count = 0;
10728 rtx insn;
10730 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
10731 insn = NEXT_INSN (insn))
10732 if (INSN_P (insn))
10733 ++count;
10735 return count;
10738 /* Move MEMs into registers for the duration of the loop. */
10740 static void
10741 load_mems (const struct loop *loop)
10743 struct loop_info *loop_info = LOOP_INFO (loop);
10744 struct loop_regs *regs = LOOP_REGS (loop);
10745 int maybe_never = 0;
10746 int i;
10747 rtx p, prev_ebb_head;
10748 rtx label = NULL_RTX;
10749 rtx end_label;
10750 /* Nonzero if the next instruction may never be executed. */
10751 int next_maybe_never = 0;
10752 unsigned int last_max_reg = max_reg_num ();
10754 if (loop_info->mems_idx == 0)
10755 return;
10757 /* We cannot use next_label here because it skips over normal insns. */
10758 end_label = next_nonnote_insn (loop->end);
10759 if (end_label && !LABEL_P (end_label))
10760 end_label = NULL_RTX;
10762 /* Check to see if it's possible that some instructions in the loop are
10763 never executed. Also check if there is a goto out of the loop other
10764 than right after the end of the loop. */
10765 for (p = next_insn_in_loop (loop, loop->scan_start);
10766 p != NULL_RTX;
10767 p = next_insn_in_loop (loop, p))
10769 if (LABEL_P (p))
10770 maybe_never = 1;
10771 else if (JUMP_P (p)
10772 /* If we enter the loop in the middle, and scan
10773 around to the beginning, don't set maybe_never
10774 for that. This must be an unconditional jump,
10775 otherwise the code at the top of the loop might
10776 never be executed. Unconditional jumps are
10777 followed a by barrier then loop end. */
10778 && ! (JUMP_P (p)
10779 && JUMP_LABEL (p) == loop->top
10780 && NEXT_INSN (NEXT_INSN (p)) == loop->end
10781 && any_uncondjump_p (p)))
10783 /* If this is a jump outside of the loop but not right
10784 after the end of the loop, we would have to emit new fixup
10785 sequences for each such label. */
10786 if (/* If we can't tell where control might go when this
10787 JUMP_INSN is executed, we must be conservative. */
10788 !JUMP_LABEL (p)
10789 || (JUMP_LABEL (p) != end_label
10790 && (INSN_UID (JUMP_LABEL (p)) >= max_uid_for_loop
10791 || INSN_LUID (JUMP_LABEL (p)) < INSN_LUID (loop->start)
10792 || INSN_LUID (JUMP_LABEL (p)) > INSN_LUID (loop->end))))
10793 return;
10795 if (!any_condjump_p (p))
10796 /* Something complicated. */
10797 maybe_never = 1;
10798 else
10799 /* If there are any more instructions in the loop, they
10800 might not be reached. */
10801 next_maybe_never = 1;
10803 else if (next_maybe_never)
10804 maybe_never = 1;
10807 /* Find start of the extended basic block that enters the loop. */
10808 for (p = loop->start;
10809 PREV_INSN (p) && !LABEL_P (p);
10810 p = PREV_INSN (p))
10812 prev_ebb_head = p;
10814 cselib_init (true);
10816 /* Build table of mems that get set to constant values before the
10817 loop. */
10818 for (; p != loop->start; p = NEXT_INSN (p))
10819 cselib_process_insn (p);
10821 /* Actually move the MEMs. */
10822 for (i = 0; i < loop_info->mems_idx; ++i)
10824 regset_head load_copies;
10825 regset_head store_copies;
10826 int written = 0;
10827 rtx reg;
10828 rtx mem = loop_info->mems[i].mem;
10829 rtx mem_list_entry;
10831 if (MEM_VOLATILE_P (mem)
10832 || loop_invariant_p (loop, XEXP (mem, 0)) != 1)
10833 /* There's no telling whether or not MEM is modified. */
10834 loop_info->mems[i].optimize = 0;
10836 /* Go through the MEMs written to in the loop to see if this
10837 one is aliased by one of them. */
10838 mem_list_entry = loop_info->store_mems;
10839 while (mem_list_entry)
10841 if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
10842 written = 1;
10843 else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
10844 mem, rtx_varies_p))
10846 /* MEM is indeed aliased by this store. */
10847 loop_info->mems[i].optimize = 0;
10848 break;
10850 mem_list_entry = XEXP (mem_list_entry, 1);
10853 if (flag_float_store && written
10854 && GET_MODE_CLASS (GET_MODE (mem)) == MODE_FLOAT)
10855 loop_info->mems[i].optimize = 0;
10857 /* If this MEM is written to, we must be sure that there
10858 are no reads from another MEM that aliases this one. */
10859 if (loop_info->mems[i].optimize && written)
10861 int j;
10863 for (j = 0; j < loop_info->mems_idx; ++j)
10865 if (j == i)
10866 continue;
10867 else if (true_dependence (mem,
10868 VOIDmode,
10869 loop_info->mems[j].mem,
10870 rtx_varies_p))
10872 /* It's not safe to hoist loop_info->mems[i] out of
10873 the loop because writes to it might not be
10874 seen by reads from loop_info->mems[j]. */
10875 loop_info->mems[i].optimize = 0;
10876 break;
10881 if (maybe_never && may_trap_p (mem))
10882 /* We can't access the MEM outside the loop; it might
10883 cause a trap that wouldn't have happened otherwise. */
10884 loop_info->mems[i].optimize = 0;
10886 if (!loop_info->mems[i].optimize)
10887 /* We thought we were going to lift this MEM out of the
10888 loop, but later discovered that we could not. */
10889 continue;
10891 INIT_REG_SET (&load_copies);
10892 INIT_REG_SET (&store_copies);
10894 /* Allocate a pseudo for this MEM. We set REG_USERVAR_P in
10895 order to keep scan_loop from moving stores to this MEM
10896 out of the loop just because this REG is neither a
10897 user-variable nor used in the loop test. */
10898 reg = gen_reg_rtx (GET_MODE (mem));
10899 REG_USERVAR_P (reg) = 1;
10900 loop_info->mems[i].reg = reg;
10902 /* Now, replace all references to the MEM with the
10903 corresponding pseudos. */
10904 maybe_never = 0;
10905 for (p = next_insn_in_loop (loop, loop->scan_start);
10906 p != NULL_RTX;
10907 p = next_insn_in_loop (loop, p))
10909 if (INSN_P (p))
10911 rtx set;
10913 set = single_set (p);
10915 /* See if this copies the mem into a register that isn't
10916 modified afterwards. We'll try to do copy propagation
10917 a little further on. */
10918 if (set
10919 /* @@@ This test is _way_ too conservative. */
10920 && ! maybe_never
10921 && REG_P (SET_DEST (set))
10922 && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
10923 && REGNO (SET_DEST (set)) < last_max_reg
10924 && regs->array[REGNO (SET_DEST (set))].n_times_set == 1
10925 && rtx_equal_p (SET_SRC (set), mem))
10926 SET_REGNO_REG_SET (&load_copies, REGNO (SET_DEST (set)));
10928 /* See if this copies the mem from a register that isn't
10929 modified afterwards. We'll try to remove the
10930 redundant copy later on by doing a little register
10931 renaming and copy propagation. This will help
10932 to untangle things for the BIV detection code. */
10933 if (set
10934 && ! maybe_never
10935 && REG_P (SET_SRC (set))
10936 && REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER
10937 && REGNO (SET_SRC (set)) < last_max_reg
10938 && regs->array[REGNO (SET_SRC (set))].n_times_set == 1
10939 && rtx_equal_p (SET_DEST (set), mem))
10940 SET_REGNO_REG_SET (&store_copies, REGNO (SET_SRC (set)));
10942 /* If this is a call which uses / clobbers this memory
10943 location, we must not change the interface here. */
10944 if (CALL_P (p)
10945 && reg_mentioned_p (loop_info->mems[i].mem,
10946 CALL_INSN_FUNCTION_USAGE (p)))
10948 cancel_changes (0);
10949 loop_info->mems[i].optimize = 0;
10950 break;
10952 else
10953 /* Replace the memory reference with the shadow register. */
10954 replace_loop_mems (p, loop_info->mems[i].mem,
10955 loop_info->mems[i].reg, written);
10958 if (LABEL_P (p)
10959 || JUMP_P (p))
10960 maybe_never = 1;
10963 if (! loop_info->mems[i].optimize)
10964 ; /* We found we couldn't do the replacement, so do nothing. */
10965 else if (! apply_change_group ())
10966 /* We couldn't replace all occurrences of the MEM. */
10967 loop_info->mems[i].optimize = 0;
10968 else
10970 /* Load the memory immediately before LOOP->START, which is
10971 the NOTE_LOOP_BEG. */
10972 cselib_val *e = cselib_lookup (mem, VOIDmode, 0);
10973 rtx set;
10974 rtx best = mem;
10975 unsigned j;
10976 struct elt_loc_list *const_equiv = 0;
10977 reg_set_iterator rsi;
10979 if (e)
10981 struct elt_loc_list *equiv;
10982 struct elt_loc_list *best_equiv = 0;
10983 for (equiv = e->locs; equiv; equiv = equiv->next)
10985 if (CONSTANT_P (equiv->loc))
10986 const_equiv = equiv;
10987 else if (REG_P (equiv->loc)
10988 /* Extending hard register lifetimes causes crash
10989 on SRC targets. Doing so on non-SRC is
10990 probably also not good idea, since we most
10991 probably have pseudoregister equivalence as
10992 well. */
10993 && REGNO (equiv->loc) >= FIRST_PSEUDO_REGISTER)
10994 best_equiv = equiv;
10996 /* Use the constant equivalence if that is cheap enough. */
10997 if (! best_equiv)
10998 best_equiv = const_equiv;
10999 else if (const_equiv
11000 && (rtx_cost (const_equiv->loc, SET)
11001 <= rtx_cost (best_equiv->loc, SET)))
11003 best_equiv = const_equiv;
11004 const_equiv = 0;
11007 /* If best_equiv is nonzero, we know that MEM is set to a
11008 constant or register before the loop. We will use this
11009 knowledge to initialize the shadow register with that
11010 constant or reg rather than by loading from MEM. */
11011 if (best_equiv)
11012 best = copy_rtx (best_equiv->loc);
11015 set = gen_move_insn (reg, best);
11016 set = loop_insn_hoist (loop, set);
11017 if (REG_P (best))
11019 for (p = prev_ebb_head; p != loop->start; p = NEXT_INSN (p))
11020 if (REGNO_LAST_UID (REGNO (best)) == INSN_UID (p))
11022 REGNO_LAST_UID (REGNO (best)) = INSN_UID (set);
11023 break;
11027 if (const_equiv)
11028 set_unique_reg_note (set, REG_EQUAL, copy_rtx (const_equiv->loc));
11030 if (written)
11032 if (label == NULL_RTX)
11034 label = gen_label_rtx ();
11035 emit_label_after (label, loop->end);
11038 /* Store the memory immediately after END, which is
11039 the NOTE_LOOP_END. */
11040 set = gen_move_insn (copy_rtx (mem), reg);
11041 loop_insn_emit_after (loop, 0, label, set);
11044 if (loop_dump_stream)
11046 fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
11047 REGNO (reg), (written ? "r/w" : "r/o"));
11048 print_rtl (loop_dump_stream, mem);
11049 fputc ('\n', loop_dump_stream);
11052 /* Attempt a bit of copy propagation. This helps untangle the
11053 data flow, and enables {basic,general}_induction_var to find
11054 more bivs/givs. */
11055 EXECUTE_IF_SET_IN_REG_SET
11056 (&load_copies, FIRST_PSEUDO_REGISTER, j, rsi)
11058 try_copy_prop (loop, reg, j);
11060 CLEAR_REG_SET (&load_copies);
11062 EXECUTE_IF_SET_IN_REG_SET
11063 (&store_copies, FIRST_PSEUDO_REGISTER, j, rsi)
11065 try_swap_copy_prop (loop, reg, j);
11067 CLEAR_REG_SET (&store_copies);
11071 /* Now, we need to replace all references to the previous exit
11072 label with the new one. */
11073 if (label != NULL_RTX && end_label != NULL_RTX)
11074 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
11075 if (JUMP_P (p) && JUMP_LABEL (p) == end_label)
11076 redirect_jump (p, label, false);
11078 cselib_finish ();
11081 /* For communication between note_reg_stored and its caller. */
11082 struct note_reg_stored_arg
11084 int set_seen;
11085 rtx reg;
11088 /* Called via note_stores, record in SET_SEEN whether X, which is written,
11089 is equal to ARG. */
11090 static void
11091 note_reg_stored (rtx x, rtx setter ATTRIBUTE_UNUSED, void *arg)
11093 struct note_reg_stored_arg *t = (struct note_reg_stored_arg *) arg;
11094 if (t->reg == x)
11095 t->set_seen = 1;
11098 /* Try to replace every occurrence of pseudo REGNO with REPLACEMENT.
11099 There must be exactly one insn that sets this pseudo; it will be
11100 deleted if all replacements succeed and we can prove that the register
11101 is not used after the loop. */
11103 static void
11104 try_copy_prop (const struct loop *loop, rtx replacement, unsigned int regno)
11106 /* This is the reg that we are copying from. */
11107 rtx reg_rtx = regno_reg_rtx[regno];
11108 rtx init_insn = 0;
11109 rtx insn;
11110 /* These help keep track of whether we replaced all uses of the reg. */
11111 int replaced_last = 0;
11112 int store_is_first = 0;
11114 for (insn = next_insn_in_loop (loop, loop->scan_start);
11115 insn != NULL_RTX;
11116 insn = next_insn_in_loop (loop, insn))
11118 rtx set;
11120 /* Only substitute within one extended basic block from the initializing
11121 insn. */
11122 if (LABEL_P (insn) && init_insn)
11123 break;
11125 if (! INSN_P (insn))
11126 continue;
11128 /* Is this the initializing insn? */
11129 set = single_set (insn);
11130 if (set
11131 && REG_P (SET_DEST (set))
11132 && REGNO (SET_DEST (set)) == regno)
11134 if (init_insn)
11135 abort ();
11137 init_insn = insn;
11138 if (REGNO_FIRST_UID (regno) == INSN_UID (insn))
11139 store_is_first = 1;
11142 /* Only substitute after seeing the initializing insn. */
11143 if (init_insn && insn != init_insn)
11145 struct note_reg_stored_arg arg;
11147 replace_loop_regs (insn, reg_rtx, replacement);
11148 if (REGNO_LAST_UID (regno) == INSN_UID (insn))
11149 replaced_last = 1;
11151 /* Stop replacing when REPLACEMENT is modified. */
11152 arg.reg = replacement;
11153 arg.set_seen = 0;
11154 note_stores (PATTERN (insn), note_reg_stored, &arg);
11155 if (arg.set_seen)
11157 rtx note = find_reg_note (insn, REG_EQUAL, NULL);
11159 /* It is possible that we've turned previously valid REG_EQUAL to
11160 invalid, as we change the REGNO to REPLACEMENT and unlike REGNO,
11161 REPLACEMENT is modified, we get different meaning. */
11162 if (note && reg_mentioned_p (replacement, XEXP (note, 0)))
11163 remove_note (insn, note);
11164 break;
11168 if (! init_insn)
11169 abort ();
11170 if (apply_change_group ())
11172 if (loop_dump_stream)
11173 fprintf (loop_dump_stream, " Replaced reg %d", regno);
11174 if (store_is_first && replaced_last)
11176 rtx first;
11177 rtx retval_note;
11179 /* Assume we're just deleting INIT_INSN. */
11180 first = init_insn;
11181 /* Look for REG_RETVAL note. If we're deleting the end of
11182 the libcall sequence, the whole sequence can go. */
11183 retval_note = find_reg_note (init_insn, REG_RETVAL, NULL_RTX);
11184 /* If we found a REG_RETVAL note, find the first instruction
11185 in the sequence. */
11186 if (retval_note)
11187 first = XEXP (retval_note, 0);
11189 /* Delete the instructions. */
11190 loop_delete_insns (first, init_insn);
11192 if (loop_dump_stream)
11193 fprintf (loop_dump_stream, ".\n");
11197 /* Replace all the instructions from FIRST up to and including LAST
11198 with NOTE_INSN_DELETED notes. */
11200 static void
11201 loop_delete_insns (rtx first, rtx last)
11203 while (1)
11205 if (loop_dump_stream)
11206 fprintf (loop_dump_stream, ", deleting init_insn (%d)",
11207 INSN_UID (first));
11208 delete_insn (first);
11210 /* If this was the LAST instructions we're supposed to delete,
11211 we're done. */
11212 if (first == last)
11213 break;
11215 first = NEXT_INSN (first);
11219 /* Try to replace occurrences of pseudo REGNO with REPLACEMENT within
11220 loop LOOP if the order of the sets of these registers can be
11221 swapped. There must be exactly one insn within the loop that sets
11222 this pseudo followed immediately by a move insn that sets
11223 REPLACEMENT with REGNO. */
11224 static void
11225 try_swap_copy_prop (const struct loop *loop, rtx replacement,
11226 unsigned int regno)
11228 rtx insn;
11229 rtx set = NULL_RTX;
11230 unsigned int new_regno;
11232 new_regno = REGNO (replacement);
11234 for (insn = next_insn_in_loop (loop, loop->scan_start);
11235 insn != NULL_RTX;
11236 insn = next_insn_in_loop (loop, insn))
11238 /* Search for the insn that copies REGNO to NEW_REGNO? */
11239 if (INSN_P (insn)
11240 && (set = single_set (insn))
11241 && REG_P (SET_DEST (set))
11242 && REGNO (SET_DEST (set)) == new_regno
11243 && REG_P (SET_SRC (set))
11244 && REGNO (SET_SRC (set)) == regno)
11245 break;
11248 if (insn != NULL_RTX)
11250 rtx prev_insn;
11251 rtx prev_set;
11253 /* Some DEF-USE info would come in handy here to make this
11254 function more general. For now, just check the previous insn
11255 which is the most likely candidate for setting REGNO. */
11257 prev_insn = PREV_INSN (insn);
11259 if (INSN_P (insn)
11260 && (prev_set = single_set (prev_insn))
11261 && REG_P (SET_DEST (prev_set))
11262 && REGNO (SET_DEST (prev_set)) == regno)
11264 /* We have:
11265 (set (reg regno) (expr))
11266 (set (reg new_regno) (reg regno))
11268 so try converting this to:
11269 (set (reg new_regno) (expr))
11270 (set (reg regno) (reg new_regno))
11272 The former construct is often generated when a global
11273 variable used for an induction variable is shadowed by a
11274 register (NEW_REGNO). The latter construct improves the
11275 chances of GIV replacement and BIV elimination. */
11277 validate_change (prev_insn, &SET_DEST (prev_set),
11278 replacement, 1);
11279 validate_change (insn, &SET_DEST (set),
11280 SET_SRC (set), 1);
11281 validate_change (insn, &SET_SRC (set),
11282 replacement, 1);
11284 if (apply_change_group ())
11286 if (loop_dump_stream)
11287 fprintf (loop_dump_stream,
11288 " Swapped set of reg %d at %d with reg %d at %d.\n",
11289 regno, INSN_UID (insn),
11290 new_regno, INSN_UID (prev_insn));
11292 /* Update first use of REGNO. */
11293 if (REGNO_FIRST_UID (regno) == INSN_UID (prev_insn))
11294 REGNO_FIRST_UID (regno) = INSN_UID (insn);
11296 /* Now perform copy propagation to hopefully
11297 remove all uses of REGNO within the loop. */
11298 try_copy_prop (loop, replacement, regno);
11304 /* Worker function for find_mem_in_note, called via for_each_rtx. */
11306 static int
11307 find_mem_in_note_1 (rtx *x, void *data)
11309 if (*x != NULL_RTX && MEM_P (*x))
11311 rtx *res = (rtx *) data;
11312 *res = *x;
11313 return 1;
11315 return 0;
11318 /* Returns the first MEM found in NOTE by depth-first search. */
11320 static rtx
11321 find_mem_in_note (rtx note)
11323 if (note && for_each_rtx (&note, find_mem_in_note_1, &note))
11324 return note;
11325 return NULL_RTX;
11328 /* Replace MEM with its associated pseudo register. This function is
11329 called from load_mems via for_each_rtx. DATA is actually a pointer
11330 to a structure describing the instruction currently being scanned
11331 and the MEM we are currently replacing. */
11333 static int
11334 replace_loop_mem (rtx *mem, void *data)
11336 loop_replace_args *args = (loop_replace_args *) data;
11337 rtx m = *mem;
11339 if (m == NULL_RTX)
11340 return 0;
11342 switch (GET_CODE (m))
11344 case MEM:
11345 break;
11347 case CONST_DOUBLE:
11348 /* We're not interested in the MEM associated with a
11349 CONST_DOUBLE, so there's no need to traverse into one. */
11350 return -1;
11352 default:
11353 /* This is not a MEM. */
11354 return 0;
11357 if (!rtx_equal_p (args->match, m))
11358 /* This is not the MEM we are currently replacing. */
11359 return 0;
11361 /* Actually replace the MEM. */
11362 validate_change (args->insn, mem, args->replacement, 1);
11364 return 0;
11367 static void
11368 replace_loop_mems (rtx insn, rtx mem, rtx reg, int written)
11370 loop_replace_args args;
11372 args.insn = insn;
11373 args.match = mem;
11374 args.replacement = reg;
11376 for_each_rtx (&insn, replace_loop_mem, &args);
11378 /* If we hoist a mem write out of the loop, then REG_EQUAL
11379 notes referring to the mem are no longer valid. */
11380 if (written)
11382 rtx note, sub;
11383 rtx *link;
11385 for (link = &REG_NOTES (insn); (note = *link); link = &XEXP (note, 1))
11387 if (REG_NOTE_KIND (note) == REG_EQUAL
11388 && (sub = find_mem_in_note (note))
11389 && true_dependence (mem, VOIDmode, sub, rtx_varies_p))
11391 /* Remove the note. */
11392 validate_change (NULL_RTX, link, XEXP (note, 1), 1);
11393 break;
11399 /* Replace one register with another. Called through for_each_rtx; PX points
11400 to the rtx being scanned. DATA is actually a pointer to
11401 a structure of arguments. */
11403 static int
11404 replace_loop_reg (rtx *px, void *data)
11406 rtx x = *px;
11407 loop_replace_args *args = (loop_replace_args *) data;
11409 if (x == NULL_RTX)
11410 return 0;
11412 if (x == args->match)
11413 validate_change (args->insn, px, args->replacement, 1);
11415 return 0;
11418 static void
11419 replace_loop_regs (rtx insn, rtx reg, rtx replacement)
11421 loop_replace_args args;
11423 args.insn = insn;
11424 args.match = reg;
11425 args.replacement = replacement;
11427 for_each_rtx (&insn, replace_loop_reg, &args);
11430 /* Emit insn for PATTERN after WHERE_INSN in basic block WHERE_BB
11431 (ignored in the interim). */
11433 static rtx
11434 loop_insn_emit_after (const struct loop *loop ATTRIBUTE_UNUSED,
11435 basic_block where_bb ATTRIBUTE_UNUSED, rtx where_insn,
11436 rtx pattern)
11438 return emit_insn_after (pattern, where_insn);
11442 /* If WHERE_INSN is nonzero emit insn for PATTERN before WHERE_INSN
11443 in basic block WHERE_BB (ignored in the interim) within the loop
11444 otherwise hoist PATTERN into the loop pre-header. */
11446 static rtx
11447 loop_insn_emit_before (const struct loop *loop,
11448 basic_block where_bb ATTRIBUTE_UNUSED,
11449 rtx where_insn, rtx pattern)
11451 if (! where_insn)
11452 return loop_insn_hoist (loop, pattern);
11453 return emit_insn_before (pattern, where_insn);
11457 /* Emit call insn for PATTERN before WHERE_INSN in basic block
11458 WHERE_BB (ignored in the interim) within the loop. */
11460 static rtx
11461 loop_call_insn_emit_before (const struct loop *loop ATTRIBUTE_UNUSED,
11462 basic_block where_bb ATTRIBUTE_UNUSED,
11463 rtx where_insn, rtx pattern)
11465 return emit_call_insn_before (pattern, where_insn);
11469 /* Hoist insn for PATTERN into the loop pre-header. */
11471 static rtx
11472 loop_insn_hoist (const struct loop *loop, rtx pattern)
11474 return loop_insn_emit_before (loop, 0, loop->start, pattern);
11478 /* Hoist call insn for PATTERN into the loop pre-header. */
11480 static rtx
11481 loop_call_insn_hoist (const struct loop *loop, rtx pattern)
11483 return loop_call_insn_emit_before (loop, 0, loop->start, pattern);
11487 /* Sink insn for PATTERN after the loop end. */
11489 static rtx
11490 loop_insn_sink (const struct loop *loop, rtx pattern)
11492 return loop_insn_emit_before (loop, 0, loop->sink, pattern);
11495 /* bl->final_value can be either general_operand or PLUS of general_operand
11496 and constant. Emit sequence of instructions to load it into REG. */
11497 static rtx
11498 gen_load_of_final_value (rtx reg, rtx final_value)
11500 rtx seq;
11501 start_sequence ();
11502 final_value = force_operand (final_value, reg);
11503 if (final_value != reg)
11504 emit_move_insn (reg, final_value);
11505 seq = get_insns ();
11506 end_sequence ();
11507 return seq;
11510 /* If the loop has multiple exits, emit insn for PATTERN before the
11511 loop to ensure that it will always be executed no matter how the
11512 loop exits. Otherwise, emit the insn for PATTERN after the loop,
11513 since this is slightly more efficient. */
11515 static rtx
11516 loop_insn_sink_or_swim (const struct loop *loop, rtx pattern)
11518 if (loop->exit_count)
11519 return loop_insn_hoist (loop, pattern);
11520 else
11521 return loop_insn_sink (loop, pattern);
11524 static void
11525 loop_ivs_dump (const struct loop *loop, FILE *file, int verbose)
11527 struct iv_class *bl;
11528 int iv_num = 0;
11530 if (! loop || ! file)
11531 return;
11533 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
11534 iv_num++;
11536 fprintf (file, "Loop %d: %d IV classes\n", loop->num, iv_num);
11538 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
11540 loop_iv_class_dump (bl, file, verbose);
11541 fputc ('\n', file);
11546 static void
11547 loop_iv_class_dump (const struct iv_class *bl, FILE *file,
11548 int verbose ATTRIBUTE_UNUSED)
11550 struct induction *v;
11551 rtx incr;
11552 int i;
11554 if (! bl || ! file)
11555 return;
11557 fprintf (file, "IV class for reg %d, benefit %d\n",
11558 bl->regno, bl->total_benefit);
11560 fprintf (file, " Init insn %d", INSN_UID (bl->init_insn));
11561 if (bl->initial_value)
11563 fprintf (file, ", init val: ");
11564 print_simple_rtl (file, bl->initial_value);
11566 if (bl->initial_test)
11568 fprintf (file, ", init test: ");
11569 print_simple_rtl (file, bl->initial_test);
11571 fputc ('\n', file);
11573 if (bl->final_value)
11575 fprintf (file, " Final val: ");
11576 print_simple_rtl (file, bl->final_value);
11577 fputc ('\n', file);
11580 if ((incr = biv_total_increment (bl)))
11582 fprintf (file, " Total increment: ");
11583 print_simple_rtl (file, incr);
11584 fputc ('\n', file);
11587 /* List the increments. */
11588 for (i = 0, v = bl->biv; v; v = v->next_iv, i++)
11590 fprintf (file, " Inc%d: insn %d, incr: ", i, INSN_UID (v->insn));
11591 print_simple_rtl (file, v->add_val);
11592 fputc ('\n', file);
11595 /* List the givs. */
11596 for (i = 0, v = bl->giv; v; v = v->next_iv, i++)
11598 fprintf (file, " Giv%d: insn %d, benefit %d, ",
11599 i, INSN_UID (v->insn), v->benefit);
11600 if (v->giv_type == DEST_ADDR)
11601 print_simple_rtl (file, v->mem);
11602 else
11603 print_simple_rtl (file, single_set (v->insn));
11604 fputc ('\n', file);
11609 static void
11610 loop_biv_dump (const struct induction *v, FILE *file, int verbose)
11612 if (! v || ! file)
11613 return;
11615 fprintf (file,
11616 "Biv %d: insn %d",
11617 REGNO (v->dest_reg), INSN_UID (v->insn));
11618 fprintf (file, " const ");
11619 print_simple_rtl (file, v->add_val);
11621 if (verbose && v->final_value)
11623 fputc ('\n', file);
11624 fprintf (file, " final ");
11625 print_simple_rtl (file, v->final_value);
11628 fputc ('\n', file);
11632 static void
11633 loop_giv_dump (const struct induction *v, FILE *file, int verbose)
11635 if (! v || ! file)
11636 return;
11638 if (v->giv_type == DEST_REG)
11639 fprintf (file, "Giv %d: insn %d",
11640 REGNO (v->dest_reg), INSN_UID (v->insn));
11641 else
11642 fprintf (file, "Dest address: insn %d",
11643 INSN_UID (v->insn));
11645 fprintf (file, " src reg %d benefit %d",
11646 REGNO (v->src_reg), v->benefit);
11647 fprintf (file, " lifetime %d",
11648 v->lifetime);
11650 if (v->replaceable)
11651 fprintf (file, " replaceable");
11653 if (v->no_const_addval)
11654 fprintf (file, " ncav");
11656 if (v->ext_dependent)
11658 switch (GET_CODE (v->ext_dependent))
11660 case SIGN_EXTEND:
11661 fprintf (file, " ext se");
11662 break;
11663 case ZERO_EXTEND:
11664 fprintf (file, " ext ze");
11665 break;
11666 case TRUNCATE:
11667 fprintf (file, " ext tr");
11668 break;
11669 default:
11670 abort ();
11674 fputc ('\n', file);
11675 fprintf (file, " mult ");
11676 print_simple_rtl (file, v->mult_val);
11678 fputc ('\n', file);
11679 fprintf (file, " add ");
11680 print_simple_rtl (file, v->add_val);
11682 if (verbose && v->final_value)
11684 fputc ('\n', file);
11685 fprintf (file, " final ");
11686 print_simple_rtl (file, v->final_value);
11689 fputc ('\n', file);
11693 void
11694 debug_ivs (const struct loop *loop)
11696 loop_ivs_dump (loop, stderr, 1);
11700 void
11701 debug_iv_class (const struct iv_class *bl)
11703 loop_iv_class_dump (bl, stderr, 1);
11707 void
11708 debug_biv (const struct induction *v)
11710 loop_biv_dump (v, stderr, 1);
11714 void
11715 debug_giv (const struct induction *v)
11717 loop_giv_dump (v, stderr, 1);
11721 #define LOOP_BLOCK_NUM_1(INSN) \
11722 ((INSN) ? (BLOCK_FOR_INSN (INSN) ? BLOCK_NUM (INSN) : - 1) : -1)
11724 /* The notes do not have an assigned block, so look at the next insn. */
11725 #define LOOP_BLOCK_NUM(INSN) \
11726 ((INSN) ? (NOTE_P (INSN) \
11727 ? LOOP_BLOCK_NUM_1 (next_nonnote_insn (INSN)) \
11728 : LOOP_BLOCK_NUM_1 (INSN)) \
11729 : -1)
11731 #define LOOP_INSN_UID(INSN) ((INSN) ? INSN_UID (INSN) : -1)
11733 static void
11734 loop_dump_aux (const struct loop *loop, FILE *file,
11735 int verbose ATTRIBUTE_UNUSED)
11737 rtx label;
11739 if (! loop || ! file || !BB_HEAD (loop->first))
11740 return;
11742 /* Print diagnostics to compare our concept of a loop with
11743 what the loop notes say. */
11744 if (! PREV_INSN (BB_HEAD (loop->first))
11745 || !NOTE_P (PREV_INSN (BB_HEAD (loop->first)))
11746 || NOTE_LINE_NUMBER (PREV_INSN (BB_HEAD (loop->first)))
11747 != NOTE_INSN_LOOP_BEG)
11748 fprintf (file, ";; No NOTE_INSN_LOOP_BEG at %d\n",
11749 INSN_UID (PREV_INSN (BB_HEAD (loop->first))));
11750 if (! NEXT_INSN (BB_END (loop->last))
11751 || !NOTE_P (NEXT_INSN (BB_END (loop->last)))
11752 || NOTE_LINE_NUMBER (NEXT_INSN (BB_END (loop->last)))
11753 != NOTE_INSN_LOOP_END)
11754 fprintf (file, ";; No NOTE_INSN_LOOP_END at %d\n",
11755 INSN_UID (NEXT_INSN (BB_END (loop->last))));
11757 if (loop->start)
11759 fprintf (file,
11760 ";; start %d (%d), end %d (%d)\n",
11761 LOOP_BLOCK_NUM (loop->start),
11762 LOOP_INSN_UID (loop->start),
11763 LOOP_BLOCK_NUM (loop->end),
11764 LOOP_INSN_UID (loop->end));
11765 fprintf (file, ";; top %d (%d), scan start %d (%d)\n",
11766 LOOP_BLOCK_NUM (loop->top),
11767 LOOP_INSN_UID (loop->top),
11768 LOOP_BLOCK_NUM (loop->scan_start),
11769 LOOP_INSN_UID (loop->scan_start));
11770 fprintf (file, ";; exit_count %d", loop->exit_count);
11771 if (loop->exit_count)
11773 fputs (", labels:", file);
11774 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
11776 fprintf (file, " %d ",
11777 LOOP_INSN_UID (XEXP (label, 0)));
11780 fputs ("\n", file);
11784 /* Call this function from the debugger to dump LOOP. */
11786 void
11787 debug_loop (const struct loop *loop)
11789 flow_loop_dump (loop, stderr, loop_dump_aux, 1);
11792 /* Call this function from the debugger to dump LOOPS. */
11794 void
11795 debug_loops (const struct loops *loops)
11797 flow_loops_dump (loops, stderr, loop_dump_aux, 1);