* config/i860/i860-protos.h (i860_va_arg): Remove.
[official-gcc.git] / gcc / loop.c
blobe0af19dd45e47f079dd3ac1328610f793c23f0cd
1 /* Perform various loop optimizations, including strength reduction.
2 Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
3 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* This is the loop optimization pass of the compiler.
23 It finds invariant computations within loops and moves them
24 to the beginning of the loop. Then it identifies basic and
25 general induction variables.
27 Basic induction variables (BIVs) are a pseudo registers which are set within
28 a loop only by incrementing or decrementing its value. General induction
29 variables (GIVs) are pseudo registers with a value which is a linear function
30 of a basic induction variable. BIVs are recognized by `basic_induction_var';
31 GIVs by `general_induction_var'.
33 Once induction variables are identified, strength reduction is applied to the
34 general induction variables, and induction variable elimination is applied to
35 the basic induction variables.
37 It also finds cases where
38 a register is set within the loop by zero-extending a narrower value
39 and changes these to zero the entire register once before the loop
40 and merely copy the low part within the loop.
42 Most of the complexity is in heuristics to decide when it is worth
43 while to do these things. */
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tm.h"
49 #include "rtl.h"
50 #include "tm_p.h"
51 #include "function.h"
52 #include "expr.h"
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
55 #include "insn-config.h"
56 #include "regs.h"
57 #include "recog.h"
58 #include "flags.h"
59 #include "real.h"
60 #include "loop.h"
61 #include "cselib.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "predict.h"
65 #include "insn-flags.h"
66 #include "optabs.h"
67 #include "cfgloop.h"
68 #include "ggc.h"
70 /* Not really meaningful values, but at least something. */
71 #ifndef SIMULTANEOUS_PREFETCHES
72 #define SIMULTANEOUS_PREFETCHES 3
73 #endif
74 #ifndef PREFETCH_BLOCK
75 #define PREFETCH_BLOCK 32
76 #endif
77 #ifndef HAVE_prefetch
78 #define HAVE_prefetch 0
79 #define CODE_FOR_prefetch 0
80 #define gen_prefetch(a,b,c) (abort(), NULL_RTX)
81 #endif
83 /* Give up the prefetch optimizations once we exceed a given threshold.
84 It is unlikely that we would be able to optimize something in a loop
85 with so many detected prefetches. */
86 #define MAX_PREFETCHES 100
87 /* The number of prefetch blocks that are beneficial to fetch at once before
88 a loop with a known (and low) iteration count. */
89 #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
90 /* For very tiny loops it is not worthwhile to prefetch even before the loop,
91 since it is likely that the data are already in the cache. */
92 #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
94 /* Parameterize some prefetch heuristics so they can be turned on and off
95 easily for performance testing on new architectures. These can be
96 defined in target-dependent files. */
98 /* Prefetch is worthwhile only when loads/stores are dense. */
99 #ifndef PREFETCH_ONLY_DENSE_MEM
100 #define PREFETCH_ONLY_DENSE_MEM 1
101 #endif
103 /* Define what we mean by "dense" loads and stores; This value divided by 256
104 is the minimum percentage of memory references that worth prefetching. */
105 #ifndef PREFETCH_DENSE_MEM
106 #define PREFETCH_DENSE_MEM 220
107 #endif
109 /* Do not prefetch for a loop whose iteration count is known to be low. */
110 #ifndef PREFETCH_NO_LOW_LOOPCNT
111 #define PREFETCH_NO_LOW_LOOPCNT 1
112 #endif
114 /* Define what we mean by a "low" iteration count. */
115 #ifndef PREFETCH_LOW_LOOPCNT
116 #define PREFETCH_LOW_LOOPCNT 32
117 #endif
119 /* Do not prefetch for a loop that contains a function call; such a loop is
120 probably not an internal loop. */
121 #ifndef PREFETCH_NO_CALL
122 #define PREFETCH_NO_CALL 1
123 #endif
125 /* Do not prefetch accesses with an extreme stride. */
126 #ifndef PREFETCH_NO_EXTREME_STRIDE
127 #define PREFETCH_NO_EXTREME_STRIDE 1
128 #endif
130 /* Define what we mean by an "extreme" stride. */
131 #ifndef PREFETCH_EXTREME_STRIDE
132 #define PREFETCH_EXTREME_STRIDE 4096
133 #endif
135 /* Define a limit to how far apart indices can be and still be merged
136 into a single prefetch. */
137 #ifndef PREFETCH_EXTREME_DIFFERENCE
138 #define PREFETCH_EXTREME_DIFFERENCE 4096
139 #endif
141 /* Issue prefetch instructions before the loop to fetch data to be used
142 in the first few loop iterations. */
143 #ifndef PREFETCH_BEFORE_LOOP
144 #define PREFETCH_BEFORE_LOOP 1
145 #endif
147 /* Do not handle reversed order prefetches (negative stride). */
148 #ifndef PREFETCH_NO_REVERSE_ORDER
149 #define PREFETCH_NO_REVERSE_ORDER 1
150 #endif
152 /* Prefetch even if the GIV is in conditional code. */
153 #ifndef PREFETCH_CONDITIONAL
154 #define PREFETCH_CONDITIONAL 1
155 #endif
157 #define LOOP_REG_LIFETIME(LOOP, REGNO) \
158 ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
160 #define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
161 ((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
162 || REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
164 #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
165 ((REGNO) < FIRST_PSEUDO_REGISTER \
166 ? (int) hard_regno_nregs[(REGNO)][GET_MODE (SET_DEST)] : 1)
169 /* Vector mapping INSN_UIDs to luids.
170 The luids are like uids but increase monotonically always.
171 We use them to see whether a jump comes from outside a given loop. */
173 int *uid_luid;
175 /* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
176 number the insn is contained in. */
178 struct loop **uid_loop;
180 /* 1 + largest uid of any insn. */
182 int max_uid_for_loop;
184 /* Number of loops detected in current function. Used as index to the
185 next few tables. */
187 static int max_loop_num;
189 /* Bound on pseudo register number before loop optimization.
190 A pseudo has valid regscan info if its number is < max_reg_before_loop. */
191 unsigned int max_reg_before_loop;
193 /* The value to pass to the next call of reg_scan_update. */
194 static int loop_max_reg;
196 /* During the analysis of a loop, a chain of `struct movable's
197 is made to record all the movable insns found.
198 Then the entire chain can be scanned to decide which to move. */
200 struct movable
202 rtx insn; /* A movable insn */
203 rtx set_src; /* The expression this reg is set from. */
204 rtx set_dest; /* The destination of this SET. */
205 rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
206 of any registers used within the LIBCALL. */
207 int consec; /* Number of consecutive following insns
208 that must be moved with this one. */
209 unsigned int regno; /* The register it sets */
210 short lifetime; /* lifetime of that register;
211 may be adjusted when matching movables
212 that load the same value are found. */
213 short savings; /* Number of insns we can move for this reg,
214 including other movables that force this
215 or match this one. */
216 ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for
217 a low part that we should avoid changing when
218 clearing the rest of the reg. */
219 unsigned int cond : 1; /* 1 if only conditionally movable */
220 unsigned int force : 1; /* 1 means MUST move this insn */
221 unsigned int global : 1; /* 1 means reg is live outside this loop */
222 /* If PARTIAL is 1, GLOBAL means something different:
223 that the reg is live outside the range from where it is set
224 to the following label. */
225 unsigned int done : 1; /* 1 inhibits further processing of this */
227 unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
228 In particular, moving it does not make it
229 invariant. */
230 unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
231 load SRC, rather than copying INSN. */
232 unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
233 first insn of a consecutive sets group. */
234 unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
235 unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace
236 the original insn with a copy from that
237 pseudo, rather than deleting it. */
238 struct movable *match; /* First entry for same value */
239 struct movable *forces; /* An insn that must be moved if this is */
240 struct movable *next;
244 FILE *loop_dump_stream;
246 /* Forward declarations. */
248 static void invalidate_loops_containing_label (rtx);
249 static void find_and_verify_loops (rtx, struct loops *);
250 static void mark_loop_jump (rtx, struct loop *);
251 static void prescan_loop (struct loop *);
252 static int reg_in_basic_block_p (rtx, rtx);
253 static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx);
254 static int labels_in_range_p (rtx, int);
255 static void count_one_set (struct loop_regs *, rtx, rtx, rtx *);
256 static void note_addr_stored (rtx, rtx, void *);
257 static void note_set_pseudo_multiple_uses (rtx, rtx, void *);
258 static int loop_reg_used_before_p (const struct loop *, rtx, rtx);
259 static rtx find_regs_nested (rtx, rtx);
260 static void scan_loop (struct loop*, int);
261 #if 0
262 static void replace_call_address (rtx, rtx, rtx);
263 #endif
264 static rtx skip_consec_insns (rtx, int);
265 static int libcall_benefit (rtx);
266 static rtx libcall_other_reg (rtx, rtx);
267 static void record_excess_regs (rtx, rtx, rtx *);
268 static void ignore_some_movables (struct loop_movables *);
269 static void force_movables (struct loop_movables *);
270 static void combine_movables (struct loop_movables *, struct loop_regs *);
271 static int num_unmoved_movables (const struct loop *);
272 static int regs_match_p (rtx, rtx, struct loop_movables *);
273 static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *,
274 struct loop_regs *);
275 static void add_label_notes (rtx, rtx);
276 static void move_movables (struct loop *loop, struct loop_movables *, int,
277 int);
278 static void loop_movables_add (struct loop_movables *, struct movable *);
279 static void loop_movables_free (struct loop_movables *);
280 static int count_nonfixed_reads (const struct loop *, rtx);
281 static void loop_bivs_find (struct loop *);
282 static void loop_bivs_init_find (struct loop *);
283 static void loop_bivs_check (struct loop *);
284 static void loop_givs_find (struct loop *);
285 static void loop_givs_check (struct loop *);
286 static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int);
287 static int loop_giv_reduce_benefit (struct loop *, struct iv_class *,
288 struct induction *, rtx);
289 static void loop_givs_dead_check (struct loop *, struct iv_class *);
290 static void loop_givs_reduce (struct loop *, struct iv_class *);
291 static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *);
292 static void loop_ivs_free (struct loop *);
293 static void strength_reduce (struct loop *, int);
294 static void find_single_use_in_loop (struct loop_regs *, rtx, rtx);
295 static int valid_initial_value_p (rtx, rtx, int, rtx);
296 static void find_mem_givs (const struct loop *, rtx, rtx, int, int);
297 static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx,
298 rtx, rtx *, int, int);
299 static void check_final_value (const struct loop *, struct induction *);
300 static void loop_ivs_dump (const struct loop *, FILE *, int);
301 static void loop_iv_class_dump (const struct iv_class *, FILE *, int);
302 static void loop_biv_dump (const struct induction *, FILE *, int);
303 static void loop_giv_dump (const struct induction *, FILE *, int);
304 static void record_giv (const struct loop *, struct induction *, rtx, rtx,
305 rtx, rtx, rtx, rtx, int, enum g_types, int, int,
306 rtx *);
307 static void update_giv_derive (const struct loop *, rtx);
308 static void check_ext_dependent_givs (const struct loop *, struct iv_class *);
309 static int basic_induction_var (const struct loop *, rtx, enum machine_mode,
310 rtx, rtx, rtx *, rtx *, rtx **);
311 static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *);
312 static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *,
313 rtx *, rtx *, int, int *, enum machine_mode);
314 static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *,
315 rtx *, rtx *, rtx *);
316 static int check_dbra_loop (struct loop *, int);
317 static rtx express_from_1 (rtx, rtx, rtx);
318 static rtx combine_givs_p (struct induction *, struct induction *);
319 static int cmp_combine_givs_stats (const void *, const void *);
320 static void combine_givs (struct loop_regs *, struct iv_class *);
321 static int product_cheap_p (rtx, rtx);
322 static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int,
323 int, int);
324 static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx,
325 struct iv_class *, int, basic_block, rtx);
326 static int last_use_this_basic_block (rtx, rtx);
327 static void record_initial (rtx, rtx, void *);
328 static void update_reg_last_use (rtx, rtx);
329 static rtx next_insn_in_loop (const struct loop *, rtx);
330 static void loop_regs_scan (const struct loop *, int);
331 static int count_insns_in_loop (const struct loop *);
332 static int find_mem_in_note_1 (rtx *, void *);
333 static rtx find_mem_in_note (rtx);
334 static void load_mems (const struct loop *);
335 static int insert_loop_mem (rtx *, void *);
336 static int replace_loop_mem (rtx *, void *);
337 static void replace_loop_mems (rtx, rtx, rtx, int);
338 static int replace_loop_reg (rtx *, void *);
339 static void replace_loop_regs (rtx insn, rtx, rtx);
340 static void note_reg_stored (rtx, rtx, void *);
341 static void try_copy_prop (const struct loop *, rtx, unsigned int);
342 static void try_swap_copy_prop (const struct loop *, rtx, unsigned int);
343 static rtx check_insn_for_givs (struct loop *, rtx, int, int);
344 static rtx check_insn_for_bivs (struct loop *, rtx, int, int);
345 static rtx gen_add_mult (rtx, rtx, rtx, rtx);
346 static void loop_regs_update (const struct loop *, rtx);
347 static int iv_add_mult_cost (rtx, rtx, rtx, rtx);
349 static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx);
350 static rtx loop_call_insn_emit_before (const struct loop *, basic_block,
351 rtx, rtx);
352 static rtx loop_call_insn_hoist (const struct loop *, rtx);
353 static rtx loop_insn_sink_or_swim (const struct loop *, rtx);
355 static void loop_dump_aux (const struct loop *, FILE *, int);
356 static void loop_delete_insns (rtx, rtx);
357 static HOST_WIDE_INT remove_constant_addition (rtx *);
358 static rtx gen_load_of_final_value (rtx, rtx);
359 void debug_ivs (const struct loop *);
360 void debug_iv_class (const struct iv_class *);
361 void debug_biv (const struct induction *);
362 void debug_giv (const struct induction *);
363 void debug_loop (const struct loop *);
364 void debug_loops (const struct loops *);
366 typedef struct loop_replace_args
368 rtx match;
369 rtx replacement;
370 rtx insn;
371 } loop_replace_args;
373 /* Nonzero iff INSN is between START and END, inclusive. */
374 #define INSN_IN_RANGE_P(INSN, START, END) \
375 (INSN_UID (INSN) < max_uid_for_loop \
376 && INSN_LUID (INSN) >= INSN_LUID (START) \
377 && INSN_LUID (INSN) <= INSN_LUID (END))
379 /* Indirect_jump_in_function is computed once per function. */
380 static int indirect_jump_in_function;
381 static int indirect_jump_in_function_p (rtx);
383 static int compute_luids (rtx, rtx, int);
385 static int biv_elimination_giv_has_0_offset (struct induction *,
386 struct induction *, rtx);
388 /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
389 copy the value of the strength reduced giv to its original register. */
390 static int copy_cost;
392 /* Cost of using a register, to normalize the benefits of a giv. */
393 static int reg_address_cost;
395 void
396 init_loop (void)
398 rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
400 reg_address_cost = address_cost (reg, SImode);
402 copy_cost = COSTS_N_INSNS (1);
405 /* Compute the mapping from uids to luids.
406 LUIDs are numbers assigned to insns, like uids,
407 except that luids increase monotonically through the code.
408 Start at insn START and stop just before END. Assign LUIDs
409 starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
410 static int
411 compute_luids (rtx start, rtx end, int prev_luid)
413 int i;
414 rtx insn;
416 for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
418 if (INSN_UID (insn) >= max_uid_for_loop)
419 continue;
420 /* Don't assign luids to line-number NOTEs, so that the distance in
421 luids between two insns is not affected by -g. */
422 if (GET_CODE (insn) != NOTE
423 || NOTE_LINE_NUMBER (insn) <= 0)
424 uid_luid[INSN_UID (insn)] = ++i;
425 else
426 /* Give a line number note the same luid as preceding insn. */
427 uid_luid[INSN_UID (insn)] = i;
429 return i + 1;
432 /* Entry point of this file. Perform loop optimization
433 on the current function. F is the first insn of the function
434 and DUMPFILE is a stream for output of a trace of actions taken
435 (or 0 if none should be output). */
437 void
438 loop_optimize (rtx f, FILE *dumpfile, int flags)
440 rtx insn;
441 int i;
442 struct loops loops_data;
443 struct loops *loops = &loops_data;
444 struct loop_info *loops_info;
446 loop_dump_stream = dumpfile;
448 init_recog_no_volatile ();
450 max_reg_before_loop = max_reg_num ();
451 loop_max_reg = max_reg_before_loop;
453 regs_may_share = 0;
455 /* Count the number of loops. */
457 max_loop_num = 0;
458 for (insn = f; insn; insn = NEXT_INSN (insn))
460 if (GET_CODE (insn) == NOTE
461 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
462 max_loop_num++;
465 /* Don't waste time if no loops. */
466 if (max_loop_num == 0)
467 return;
469 loops->num = max_loop_num;
471 /* Get size to use for tables indexed by uids.
472 Leave some space for labels allocated by find_and_verify_loops. */
473 max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
475 uid_luid = xcalloc (max_uid_for_loop, sizeof (int));
476 uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *));
478 /* Allocate storage for array of loops. */
479 loops->array = xcalloc (loops->num, sizeof (struct loop));
481 /* Find and process each loop.
482 First, find them, and record them in order of their beginnings. */
483 find_and_verify_loops (f, loops);
485 /* Allocate and initialize auxiliary loop information. */
486 loops_info = xcalloc (loops->num, sizeof (struct loop_info));
487 for (i = 0; i < (int) loops->num; i++)
488 loops->array[i].aux = loops_info + i;
490 /* Now find all register lifetimes. This must be done after
491 find_and_verify_loops, because it might reorder the insns in the
492 function. */
493 reg_scan (f, max_reg_before_loop, 1);
495 /* This must occur after reg_scan so that registers created by gcse
496 will have entries in the register tables.
498 We could have added a call to reg_scan after gcse_main in toplev.c,
499 but moving this call to init_alias_analysis is more efficient. */
500 init_alias_analysis ();
502 /* See if we went too far. Note that get_max_uid already returns
503 one more that the maximum uid of all insn. */
504 if (get_max_uid () > max_uid_for_loop)
505 abort ();
506 /* Now reset it to the actual size we need. See above. */
507 max_uid_for_loop = get_max_uid ();
509 /* find_and_verify_loops has already called compute_luids, but it
510 might have rearranged code afterwards, so we need to recompute
511 the luids now. */
512 compute_luids (f, NULL_RTX, 0);
514 /* Don't leave gaps in uid_luid for insns that have been
515 deleted. It is possible that the first or last insn
516 using some register has been deleted by cross-jumping.
517 Make sure that uid_luid for that former insn's uid
518 points to the general area where that insn used to be. */
519 for (i = 0; i < max_uid_for_loop; i++)
521 uid_luid[0] = uid_luid[i];
522 if (uid_luid[0] != 0)
523 break;
525 for (i = 0; i < max_uid_for_loop; i++)
526 if (uid_luid[i] == 0)
527 uid_luid[i] = uid_luid[i - 1];
529 /* Determine if the function has indirect jump. On some systems
530 this prevents low overhead loop instructions from being used. */
531 indirect_jump_in_function = indirect_jump_in_function_p (f);
533 /* Now scan the loops, last ones first, since this means inner ones are done
534 before outer ones. */
535 for (i = max_loop_num - 1; i >= 0; i--)
537 struct loop *loop = &loops->array[i];
539 if (! loop->invalid && loop->end)
541 scan_loop (loop, flags);
542 ggc_collect ();
546 end_alias_analysis ();
548 /* Clean up. */
549 for (i = 0; i < (int) loops->num; i++)
550 free (loops_info[i].mems);
552 free (uid_luid);
553 free (uid_loop);
554 free (loops_info);
555 free (loops->array);
558 /* Returns the next insn, in execution order, after INSN. START and
559 END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
560 respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
561 insn-stream; it is used with loops that are entered near the
562 bottom. */
564 static rtx
565 next_insn_in_loop (const struct loop *loop, rtx insn)
567 insn = NEXT_INSN (insn);
569 if (insn == loop->end)
571 if (loop->top)
572 /* Go to the top of the loop, and continue there. */
573 insn = loop->top;
574 else
575 /* We're done. */
576 insn = NULL_RTX;
579 if (insn == loop->scan_start)
580 /* We're done. */
581 insn = NULL_RTX;
583 return insn;
586 /* Find any register references hidden inside X and add them to
587 the dependency list DEPS. This is used to look inside CLOBBER (MEM
588 when checking whether a PARALLEL can be pulled out of a loop. */
590 static rtx
591 find_regs_nested (rtx deps, rtx x)
593 enum rtx_code code = GET_CODE (x);
594 if (code == REG)
595 deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps);
596 else
598 const char *fmt = GET_RTX_FORMAT (code);
599 int i, j;
600 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
602 if (fmt[i] == 'e')
603 deps = find_regs_nested (deps, XEXP (x, i));
604 else if (fmt[i] == 'E')
605 for (j = 0; j < XVECLEN (x, i); j++)
606 deps = find_regs_nested (deps, XVECEXP (x, i, j));
609 return deps;
612 /* Optimize one loop described by LOOP. */
614 /* ??? Could also move memory writes out of loops if the destination address
615 is invariant, the source is invariant, the memory write is not volatile,
616 and if we can prove that no read inside the loop can read this address
617 before the write occurs. If there is a read of this address after the
618 write, then we can also mark the memory read as invariant. */
620 static void
621 scan_loop (struct loop *loop, int flags)
623 struct loop_info *loop_info = LOOP_INFO (loop);
624 struct loop_regs *regs = LOOP_REGS (loop);
625 int i;
626 rtx loop_start = loop->start;
627 rtx loop_end = loop->end;
628 rtx p;
629 /* 1 if we are scanning insns that could be executed zero times. */
630 int maybe_never = 0;
631 /* 1 if we are scanning insns that might never be executed
632 due to a subroutine call which might exit before they are reached. */
633 int call_passed = 0;
634 /* Number of insns in the loop. */
635 int insn_count;
636 int tem;
637 rtx temp, update_start, update_end;
638 /* The SET from an insn, if it is the only SET in the insn. */
639 rtx set, set1;
640 /* Chain describing insns movable in current loop. */
641 struct loop_movables *movables = LOOP_MOVABLES (loop);
642 /* Ratio of extra register life span we can justify
643 for saving an instruction. More if loop doesn't call subroutines
644 since in that case saving an insn makes more difference
645 and more registers are available. */
646 int threshold;
647 /* Nonzero if we are scanning instructions in a sub-loop. */
648 int loop_depth = 0;
649 int in_libcall;
651 loop->top = 0;
653 movables->head = 0;
654 movables->last = 0;
656 /* Determine whether this loop starts with a jump down to a test at
657 the end. This will occur for a small number of loops with a test
658 that is too complex to duplicate in front of the loop.
660 We search for the first insn or label in the loop, skipping NOTEs.
661 However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
662 (because we might have a loop executed only once that contains a
663 loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
664 (in case we have a degenerate loop).
666 Note that if we mistakenly think that a loop is entered at the top
667 when, in fact, it is entered at the exit test, the only effect will be
668 slightly poorer optimization. Making the opposite error can generate
669 incorrect code. Since very few loops now start with a jump to the
670 exit test, the code here to detect that case is very conservative. */
672 for (p = NEXT_INSN (loop_start);
673 p != loop_end
674 && GET_CODE (p) != CODE_LABEL && ! INSN_P (p)
675 && (GET_CODE (p) != NOTE
676 || (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
677 && NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
678 p = NEXT_INSN (p))
681 loop->scan_start = p;
683 /* If loop end is the end of the current function, then emit a
684 NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
685 note insn. This is the position we use when sinking insns out of
686 the loop. */
687 if (NEXT_INSN (loop->end) != 0)
688 loop->sink = NEXT_INSN (loop->end);
689 else
690 loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
692 /* Set up variables describing this loop. */
693 prescan_loop (loop);
694 threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
696 /* If loop has a jump before the first label,
697 the true entry is the target of that jump.
698 Start scan from there.
699 But record in LOOP->TOP the place where the end-test jumps
700 back to so we can scan that after the end of the loop. */
701 if (GET_CODE (p) == JUMP_INSN
702 /* Loop entry must be unconditional jump (and not a RETURN) */
703 && any_uncondjump_p (p)
704 && JUMP_LABEL (p) != 0
705 /* Check to see whether the jump actually
706 jumps out of the loop (meaning it's no loop).
707 This case can happen for things like
708 do {..} while (0). If this label was generated previously
709 by loop, we can't tell anything about it and have to reject
710 the loop. */
711 && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
713 loop->top = next_label (loop->scan_start);
714 loop->scan_start = JUMP_LABEL (p);
717 /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
718 as required by loop_reg_used_before_p. So skip such loops. (This
719 test may never be true, but it's best to play it safe.)
721 Also, skip loops where we do not start scanning at a label. This
722 test also rejects loops starting with a JUMP_INSN that failed the
723 test above. */
725 if (INSN_UID (loop->scan_start) >= max_uid_for_loop
726 || GET_CODE (loop->scan_start) != CODE_LABEL)
728 if (loop_dump_stream)
729 fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
730 INSN_UID (loop_start), INSN_UID (loop_end));
731 return;
734 /* Allocate extra space for REGs that might be created by load_mems.
735 We allocate a little extra slop as well, in the hopes that we
736 won't have to reallocate the regs array. */
737 loop_regs_scan (loop, loop_info->mems_idx + 16);
738 insn_count = count_insns_in_loop (loop);
740 if (loop_dump_stream)
742 fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
743 INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
744 if (loop->cont)
745 fprintf (loop_dump_stream, "Continue at insn %d.\n",
746 INSN_UID (loop->cont));
749 /* Scan through the loop finding insns that are safe to move.
750 Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
751 this reg will be considered invariant for subsequent insns.
752 We consider whether subsequent insns use the reg
753 in deciding whether it is worth actually moving.
755 MAYBE_NEVER is nonzero if we have passed a conditional jump insn
756 and therefore it is possible that the insns we are scanning
757 would never be executed. At such times, we must make sure
758 that it is safe to execute the insn once instead of zero times.
759 When MAYBE_NEVER is 0, all insns will be executed at least once
760 so that is not a problem. */
762 for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
763 p != NULL_RTX;
764 p = next_insn_in_loop (loop, p))
766 if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
767 in_libcall--;
768 if (GET_CODE (p) == INSN)
770 temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
771 if (temp)
772 in_libcall++;
773 if (! in_libcall
774 && (set = single_set (p))
775 && REG_P (SET_DEST (set))
776 #ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
777 && SET_DEST (set) != pic_offset_table_rtx
778 #endif
779 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
781 int tem1 = 0;
782 int tem2 = 0;
783 int move_insn = 0;
784 int insert_temp = 0;
785 rtx src = SET_SRC (set);
786 rtx dependencies = 0;
788 /* Figure out what to use as a source of this insn. If a
789 REG_EQUIV note is given or if a REG_EQUAL note with a
790 constant operand is specified, use it as the source and
791 mark that we should move this insn by calling
792 emit_move_insn rather that duplicating the insn.
794 Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
795 note is present. */
796 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
797 if (temp)
798 src = XEXP (temp, 0), move_insn = 1;
799 else
801 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
802 if (temp && CONSTANT_P (XEXP (temp, 0)))
803 src = XEXP (temp, 0), move_insn = 1;
804 if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
806 src = XEXP (temp, 0);
807 /* A libcall block can use regs that don't appear in
808 the equivalent expression. To move the libcall,
809 we must move those regs too. */
810 dependencies = libcall_other_reg (p, src);
814 /* For parallels, add any possible uses to the dependencies, as
815 we can't move the insn without resolving them first.
816 MEMs inside CLOBBERs may also reference registers; these
817 count as implicit uses. */
818 if (GET_CODE (PATTERN (p)) == PARALLEL)
820 for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
822 rtx x = XVECEXP (PATTERN (p), 0, i);
823 if (GET_CODE (x) == USE)
824 dependencies
825 = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
826 dependencies);
827 else if (GET_CODE (x) == CLOBBER
828 && MEM_P (XEXP (x, 0)))
829 dependencies = find_regs_nested (dependencies,
830 XEXP (XEXP (x, 0), 0));
834 if (/* The register is used in basic blocks other
835 than the one where it is set (meaning that
836 something after this point in the loop might
837 depend on its value before the set). */
838 ! reg_in_basic_block_p (p, SET_DEST (set))
839 /* And the set is not guaranteed to be executed once
840 the loop starts, or the value before the set is
841 needed before the set occurs...
843 ??? Note we have quadratic behavior here, mitigated
844 by the fact that the previous test will often fail for
845 large loops. Rather than re-scanning the entire loop
846 each time for register usage, we should build tables
847 of the register usage and use them here instead. */
848 && (maybe_never
849 || loop_reg_used_before_p (loop, set, p)))
850 /* It is unsafe to move the set. However, it may be OK to
851 move the source into a new pseudo, and substitute a
852 reg-to-reg copy for the original insn.
854 This code used to consider it OK to move a set of a variable
855 which was not created by the user and not used in an exit
856 test.
857 That behavior is incorrect and was removed. */
858 insert_temp = 1;
860 /* Don't try to optimize a MODE_CC set with a constant
861 source. It probably will be combined with a conditional
862 jump. */
863 if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC
864 && CONSTANT_P (src))
866 /* Don't try to optimize a register that was made
867 by loop-optimization for an inner loop.
868 We don't know its life-span, so we can't compute
869 the benefit. */
870 else if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
872 /* Don't move the source and add a reg-to-reg copy:
873 - with -Os (this certainly increases size),
874 - if the mode doesn't support copy operations (obviously),
875 - if the source is already a reg (the motion will gain nothing),
876 - if the source is a legitimate constant (likewise). */
877 else if (insert_temp
878 && (optimize_size
879 || ! can_copy_p (GET_MODE (SET_SRC (set)))
880 || REG_P (SET_SRC (set))
881 || (CONSTANT_P (SET_SRC (set))
882 && LEGITIMATE_CONSTANT_P (SET_SRC (set)))))
884 else if ((tem = loop_invariant_p (loop, src))
885 && (dependencies == 0
886 || (tem2
887 = loop_invariant_p (loop, dependencies)) != 0)
888 && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
889 || (tem1
890 = consec_sets_invariant_p
891 (loop, SET_DEST (set),
892 regs->array[REGNO (SET_DEST (set))].set_in_loop,
893 p)))
894 /* If the insn can cause a trap (such as divide by zero),
895 can't move it unless it's guaranteed to be executed
896 once loop is entered. Even a function call might
897 prevent the trap insn from being reached
898 (since it might exit!) */
899 && ! ((maybe_never || call_passed)
900 && may_trap_p (src)))
902 struct movable *m;
903 int regno = REGNO (SET_DEST (set));
905 /* A potential lossage is where we have a case where two insns
906 can be combined as long as they are both in the loop, but
907 we move one of them outside the loop. For large loops,
908 this can lose. The most common case of this is the address
909 of a function being called.
911 Therefore, if this register is marked as being used
912 exactly once if we are in a loop with calls
913 (a "large loop"), see if we can replace the usage of
914 this register with the source of this SET. If we can,
915 delete this insn.
917 Don't do this if P has a REG_RETVAL note or if we have
918 SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */
920 if (loop_info->has_call
921 && regs->array[regno].single_usage != 0
922 && regs->array[regno].single_usage != const0_rtx
923 && REGNO_FIRST_UID (regno) == INSN_UID (p)
924 && (REGNO_LAST_UID (regno)
925 == INSN_UID (regs->array[regno].single_usage))
926 && regs->array[regno].set_in_loop == 1
927 && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
928 && ! side_effects_p (SET_SRC (set))
929 && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
930 && (! SMALL_REGISTER_CLASSES
931 || (! (REG_P (SET_SRC (set))
932 && (REGNO (SET_SRC (set))
933 < FIRST_PSEUDO_REGISTER))))
934 && regno >= FIRST_PSEUDO_REGISTER
935 /* This test is not redundant; SET_SRC (set) might be
936 a call-clobbered register and the life of REGNO
937 might span a call. */
938 && ! modified_between_p (SET_SRC (set), p,
939 regs->array[regno].single_usage)
940 && no_labels_between_p (p,
941 regs->array[regno].single_usage)
942 && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
943 regs->array[regno].single_usage))
945 /* Replace any usage in a REG_EQUAL note. Must copy
946 the new source, so that we don't get rtx sharing
947 between the SET_SOURCE and REG_NOTES of insn p. */
948 REG_NOTES (regs->array[regno].single_usage)
949 = (replace_rtx
950 (REG_NOTES (regs->array[regno].single_usage),
951 SET_DEST (set), copy_rtx (SET_SRC (set))));
953 delete_insn (p);
954 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
955 i++)
956 regs->array[regno+i].set_in_loop = 0;
957 continue;
960 m = xmalloc (sizeof (struct movable));
961 m->next = 0;
962 m->insn = p;
963 m->set_src = src;
964 m->dependencies = dependencies;
965 m->set_dest = SET_DEST (set);
966 m->force = 0;
967 m->consec
968 = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
969 m->done = 0;
970 m->forces = 0;
971 m->partial = 0;
972 m->move_insn = move_insn;
973 m->move_insn_first = 0;
974 m->insert_temp = insert_temp;
975 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
976 m->savemode = VOIDmode;
977 m->regno = regno;
978 /* Set M->cond if either loop_invariant_p
979 or consec_sets_invariant_p returned 2
980 (only conditionally invariant). */
981 m->cond = ((tem | tem1 | tem2) > 1);
982 m->global = LOOP_REG_GLOBAL_P (loop, regno);
983 m->match = 0;
984 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
985 m->savings = regs->array[regno].n_times_set;
986 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
987 m->savings += libcall_benefit (p);
988 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
989 regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
990 /* Add M to the end of the chain MOVABLES. */
991 loop_movables_add (movables, m);
993 if (m->consec > 0)
995 /* It is possible for the first instruction to have a
996 REG_EQUAL note but a non-invariant SET_SRC, so we must
997 remember the status of the first instruction in case
998 the last instruction doesn't have a REG_EQUAL note. */
999 m->move_insn_first = m->move_insn;
1001 /* Skip this insn, not checking REG_LIBCALL notes. */
1002 p = next_nonnote_insn (p);
1003 /* Skip the consecutive insns, if there are any. */
1004 p = skip_consec_insns (p, m->consec);
1005 /* Back up to the last insn of the consecutive group. */
1006 p = prev_nonnote_insn (p);
1008 /* We must now reset m->move_insn, m->is_equiv, and
1009 possibly m->set_src to correspond to the effects of
1010 all the insns. */
1011 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
1012 if (temp)
1013 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1014 else
1016 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
1017 if (temp && CONSTANT_P (XEXP (temp, 0)))
1018 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1019 else
1020 m->move_insn = 0;
1023 m->is_equiv
1024 = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
1027 /* If this register is always set within a STRICT_LOW_PART
1028 or set to zero, then its high bytes are constant.
1029 So clear them outside the loop and within the loop
1030 just load the low bytes.
1031 We must check that the machine has an instruction to do so.
1032 Also, if the value loaded into the register
1033 depends on the same register, this cannot be done. */
1034 else if (SET_SRC (set) == const0_rtx
1035 && GET_CODE (NEXT_INSN (p)) == INSN
1036 && (set1 = single_set (NEXT_INSN (p)))
1037 && GET_CODE (set1) == SET
1038 && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
1039 && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
1040 && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
1041 == SET_DEST (set))
1042 && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
1044 int regno = REGNO (SET_DEST (set));
1045 if (regs->array[regno].set_in_loop == 2)
1047 struct movable *m;
1048 m = xmalloc (sizeof (struct movable));
1049 m->next = 0;
1050 m->insn = p;
1051 m->set_dest = SET_DEST (set);
1052 m->dependencies = 0;
1053 m->force = 0;
1054 m->consec = 0;
1055 m->done = 0;
1056 m->forces = 0;
1057 m->move_insn = 0;
1058 m->move_insn_first = 0;
1059 m->insert_temp = insert_temp;
1060 m->partial = 1;
1061 /* If the insn may not be executed on some cycles,
1062 we can't clear the whole reg; clear just high part.
1063 Not even if the reg is used only within this loop.
1064 Consider this:
1065 while (1)
1066 while (s != t) {
1067 if (foo ()) x = *s;
1068 use (x);
1070 Clearing x before the inner loop could clobber a value
1071 being saved from the last time around the outer loop.
1072 However, if the reg is not used outside this loop
1073 and all uses of the register are in the same
1074 basic block as the store, there is no problem.
1076 If this insn was made by loop, we don't know its
1077 INSN_LUID and hence must make a conservative
1078 assumption. */
1079 m->global = (INSN_UID (p) >= max_uid_for_loop
1080 || LOOP_REG_GLOBAL_P (loop, regno)
1081 || (labels_in_range_p
1082 (p, REGNO_FIRST_LUID (regno))));
1083 if (maybe_never && m->global)
1084 m->savemode = GET_MODE (SET_SRC (set1));
1085 else
1086 m->savemode = VOIDmode;
1087 m->regno = regno;
1088 m->cond = 0;
1089 m->match = 0;
1090 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1091 m->savings = 1;
1092 for (i = 0;
1093 i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1094 i++)
1095 regs->array[regno+i].set_in_loop = -1;
1096 /* Add M to the end of the chain MOVABLES. */
1097 loop_movables_add (movables, m);
1102 /* Past a call insn, we get to insns which might not be executed
1103 because the call might exit. This matters for insns that trap.
1104 Constant and pure call insns always return, so they don't count. */
1105 else if (GET_CODE (p) == CALL_INSN && ! CONST_OR_PURE_CALL_P (p))
1106 call_passed = 1;
1107 /* Past a label or a jump, we get to insns for which we
1108 can't count on whether or how many times they will be
1109 executed during each iteration. Therefore, we can
1110 only move out sets of trivial variables
1111 (those not used after the loop). */
1112 /* Similar code appears twice in strength_reduce. */
1113 else if ((GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN)
1114 /* If we enter the loop in the middle, and scan around to the
1115 beginning, don't set maybe_never for that. This must be an
1116 unconditional jump, otherwise the code at the top of the
1117 loop might never be executed. Unconditional jumps are
1118 followed by a barrier then the loop_end. */
1119 && ! (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == loop->top
1120 && NEXT_INSN (NEXT_INSN (p)) == loop_end
1121 && any_uncondjump_p (p)))
1122 maybe_never = 1;
1123 else if (GET_CODE (p) == NOTE)
1125 /* At the virtual top of a converted loop, insns are again known to
1126 be executed: logically, the loop begins here even though the exit
1127 code has been duplicated. */
1128 if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP && loop_depth == 0)
1129 maybe_never = call_passed = 0;
1130 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
1131 loop_depth++;
1132 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
1133 loop_depth--;
1137 /* If one movable subsumes another, ignore that other. */
1139 ignore_some_movables (movables);
1141 /* For each movable insn, see if the reg that it loads
1142 leads when it dies right into another conditionally movable insn.
1143 If so, record that the second insn "forces" the first one,
1144 since the second can be moved only if the first is. */
1146 force_movables (movables);
1148 /* See if there are multiple movable insns that load the same value.
1149 If there are, make all but the first point at the first one
1150 through the `match' field, and add the priorities of them
1151 all together as the priority of the first. */
1153 combine_movables (movables, regs);
1155 /* Now consider each movable insn to decide whether it is worth moving.
1156 Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
1158 For machines with few registers this increases code size, so do not
1159 move moveables when optimizing for code size on such machines.
1160 (The 18 below is the value for i386.) */
1162 if (!optimize_size
1163 || (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
1165 move_movables (loop, movables, threshold, insn_count);
1167 /* Recalculate regs->array if move_movables has created new
1168 registers. */
1169 if (max_reg_num () > regs->num)
1171 loop_regs_scan (loop, 0);
1172 for (update_start = loop_start;
1173 PREV_INSN (update_start)
1174 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1175 update_start = PREV_INSN (update_start))
1177 update_end = NEXT_INSN (loop_end);
1179 reg_scan_update (update_start, update_end, loop_max_reg);
1180 loop_max_reg = max_reg_num ();
1184 /* Now candidates that still are negative are those not moved.
1185 Change regs->array[I].set_in_loop to indicate that those are not actually
1186 invariant. */
1187 for (i = 0; i < regs->num; i++)
1188 if (regs->array[i].set_in_loop < 0)
1189 regs->array[i].set_in_loop = regs->array[i].n_times_set;
1191 /* Now that we've moved some things out of the loop, we might be able to
1192 hoist even more memory references. */
1193 load_mems (loop);
1195 /* Recalculate regs->array if load_mems has created new registers. */
1196 if (max_reg_num () > regs->num)
1197 loop_regs_scan (loop, 0);
1199 for (update_start = loop_start;
1200 PREV_INSN (update_start)
1201 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1202 update_start = PREV_INSN (update_start))
1204 update_end = NEXT_INSN (loop_end);
1206 reg_scan_update (update_start, update_end, loop_max_reg);
1207 loop_max_reg = max_reg_num ();
1209 if (flag_strength_reduce)
1211 if (update_end && GET_CODE (update_end) == CODE_LABEL)
1212 /* Ensure our label doesn't go away. */
1213 LABEL_NUSES (update_end)++;
1215 strength_reduce (loop, flags);
1217 reg_scan_update (update_start, update_end, loop_max_reg);
1218 loop_max_reg = max_reg_num ();
1220 if (update_end && GET_CODE (update_end) == CODE_LABEL
1221 && --LABEL_NUSES (update_end) == 0)
1222 delete_related_insns (update_end);
1226 /* The movable information is required for strength reduction. */
1227 loop_movables_free (movables);
1229 free (regs->array);
1230 regs->array = 0;
1231 regs->num = 0;
1234 /* Add elements to *OUTPUT to record all the pseudo-regs
1235 mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
1237 static void
1238 record_excess_regs (rtx in_this, rtx not_in_this, rtx *output)
1240 enum rtx_code code;
1241 const char *fmt;
1242 int i;
1244 code = GET_CODE (in_this);
1246 switch (code)
1248 case PC:
1249 case CC0:
1250 case CONST_INT:
1251 case CONST_DOUBLE:
1252 case CONST:
1253 case SYMBOL_REF:
1254 case LABEL_REF:
1255 return;
1257 case REG:
1258 if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
1259 && ! reg_mentioned_p (in_this, not_in_this))
1260 *output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
1261 return;
1263 default:
1264 break;
1267 fmt = GET_RTX_FORMAT (code);
1268 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1270 int j;
1272 switch (fmt[i])
1274 case 'E':
1275 for (j = 0; j < XVECLEN (in_this, i); j++)
1276 record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
1277 break;
1279 case 'e':
1280 record_excess_regs (XEXP (in_this, i), not_in_this, output);
1281 break;
1286 /* Check what regs are referred to in the libcall block ending with INSN,
1287 aside from those mentioned in the equivalent value.
1288 If there are none, return 0.
1289 If there are one or more, return an EXPR_LIST containing all of them. */
1291 static rtx
1292 libcall_other_reg (rtx insn, rtx equiv)
1294 rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
1295 rtx p = XEXP (note, 0);
1296 rtx output = 0;
1298 /* First, find all the regs used in the libcall block
1299 that are not mentioned as inputs to the result. */
1301 while (p != insn)
1303 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
1304 || GET_CODE (p) == CALL_INSN)
1305 record_excess_regs (PATTERN (p), equiv, &output);
1306 p = NEXT_INSN (p);
1309 return output;
1312 /* Return 1 if all uses of REG
1313 are between INSN and the end of the basic block. */
1315 static int
1316 reg_in_basic_block_p (rtx insn, rtx reg)
1318 int regno = REGNO (reg);
1319 rtx p;
1321 if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
1322 return 0;
1324 /* Search this basic block for the already recorded last use of the reg. */
1325 for (p = insn; p; p = NEXT_INSN (p))
1327 switch (GET_CODE (p))
1329 case NOTE:
1330 break;
1332 case INSN:
1333 case CALL_INSN:
1334 /* Ordinary insn: if this is the last use, we win. */
1335 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1336 return 1;
1337 break;
1339 case JUMP_INSN:
1340 /* Jump insn: if this is the last use, we win. */
1341 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1342 return 1;
1343 /* Otherwise, it's the end of the basic block, so we lose. */
1344 return 0;
1346 case CODE_LABEL:
1347 case BARRIER:
1348 /* It's the end of the basic block, so we lose. */
1349 return 0;
1351 default:
1352 break;
1356 /* The "last use" that was recorded can't be found after the first
1357 use. This can happen when the last use was deleted while
1358 processing an inner loop, this inner loop was then completely
1359 unrolled, and the outer loop is always exited after the inner loop,
1360 so that everything after the first use becomes a single basic block. */
1361 return 1;
1364 /* Compute the benefit of eliminating the insns in the block whose
1365 last insn is LAST. This may be a group of insns used to compute a
1366 value directly or can contain a library call. */
1368 static int
1369 libcall_benefit (rtx last)
1371 rtx insn;
1372 int benefit = 0;
1374 for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
1375 insn != last; insn = NEXT_INSN (insn))
1377 if (GET_CODE (insn) == CALL_INSN)
1378 benefit += 10; /* Assume at least this many insns in a library
1379 routine. */
1380 else if (GET_CODE (insn) == INSN
1381 && GET_CODE (PATTERN (insn)) != USE
1382 && GET_CODE (PATTERN (insn)) != CLOBBER)
1383 benefit++;
1386 return benefit;
1389 /* Skip COUNT insns from INSN, counting library calls as 1 insn. */
1391 static rtx
1392 skip_consec_insns (rtx insn, int count)
1394 for (; count > 0; count--)
1396 rtx temp;
1398 /* If first insn of libcall sequence, skip to end. */
1399 /* Do this at start of loop, since INSN is guaranteed to
1400 be an insn here. */
1401 if (GET_CODE (insn) != NOTE
1402 && (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
1403 insn = XEXP (temp, 0);
1406 insn = NEXT_INSN (insn);
1407 while (GET_CODE (insn) == NOTE);
1410 return insn;
1413 /* Ignore any movable whose insn falls within a libcall
1414 which is part of another movable.
1415 We make use of the fact that the movable for the libcall value
1416 was made later and so appears later on the chain. */
1418 static void
1419 ignore_some_movables (struct loop_movables *movables)
1421 struct movable *m, *m1;
1423 for (m = movables->head; m; m = m->next)
1425 /* Is this a movable for the value of a libcall? */
1426 rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
1427 if (note)
1429 rtx insn;
1430 /* Check for earlier movables inside that range,
1431 and mark them invalid. We cannot use LUIDs here because
1432 insns created by loop.c for prior loops don't have LUIDs.
1433 Rather than reject all such insns from movables, we just
1434 explicitly check each insn in the libcall (since invariant
1435 libcalls aren't that common). */
1436 for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
1437 for (m1 = movables->head; m1 != m; m1 = m1->next)
1438 if (m1->insn == insn)
1439 m1->done = 1;
1444 /* For each movable insn, see if the reg that it loads
1445 leads when it dies right into another conditionally movable insn.
1446 If so, record that the second insn "forces" the first one,
1447 since the second can be moved only if the first is. */
1449 static void
1450 force_movables (struct loop_movables *movables)
1452 struct movable *m, *m1;
1454 for (m1 = movables->head; m1; m1 = m1->next)
1455 /* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
1456 if (!m1->partial && !m1->done)
1458 int regno = m1->regno;
1459 for (m = m1->next; m; m = m->next)
1460 /* ??? Could this be a bug? What if CSE caused the
1461 register of M1 to be used after this insn?
1462 Since CSE does not update regno_last_uid,
1463 this insn M->insn might not be where it dies.
1464 But very likely this doesn't matter; what matters is
1465 that M's reg is computed from M1's reg. */
1466 if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
1467 && !m->done)
1468 break;
1469 if (m != 0 && m->set_src == m1->set_dest
1470 /* If m->consec, m->set_src isn't valid. */
1471 && m->consec == 0)
1472 m = 0;
1474 /* Increase the priority of the moving the first insn
1475 since it permits the second to be moved as well.
1476 Likewise for insns already forced by the first insn. */
1477 if (m != 0)
1479 struct movable *m2;
1481 m->forces = m1;
1482 for (m2 = m1; m2; m2 = m2->forces)
1484 m2->lifetime += m->lifetime;
1485 m2->savings += m->savings;
1491 /* Find invariant expressions that are equal and can be combined into
1492 one register. */
1494 static void
1495 combine_movables (struct loop_movables *movables, struct loop_regs *regs)
1497 struct movable *m;
1498 char *matched_regs = xmalloc (regs->num);
1499 enum machine_mode mode;
1501 /* Regs that are set more than once are not allowed to match
1502 or be matched. I'm no longer sure why not. */
1503 /* Only pseudo registers are allowed to match or be matched,
1504 since move_movables does not validate the change. */
1505 /* Perhaps testing m->consec_sets would be more appropriate here? */
1507 for (m = movables->head; m; m = m->next)
1508 if (m->match == 0 && regs->array[m->regno].n_times_set == 1
1509 && m->regno >= FIRST_PSEUDO_REGISTER
1510 && !m->insert_temp
1511 && !m->partial)
1513 struct movable *m1;
1514 int regno = m->regno;
1516 memset (matched_regs, 0, regs->num);
1517 matched_regs[regno] = 1;
1519 /* We want later insns to match the first one. Don't make the first
1520 one match any later ones. So start this loop at m->next. */
1521 for (m1 = m->next; m1; m1 = m1->next)
1522 if (m != m1 && m1->match == 0
1523 && !m1->insert_temp
1524 && regs->array[m1->regno].n_times_set == 1
1525 && m1->regno >= FIRST_PSEUDO_REGISTER
1526 /* A reg used outside the loop mustn't be eliminated. */
1527 && !m1->global
1528 /* A reg used for zero-extending mustn't be eliminated. */
1529 && !m1->partial
1530 && (matched_regs[m1->regno]
1533 /* Can combine regs with different modes loaded from the
1534 same constant only if the modes are the same or
1535 if both are integer modes with M wider or the same
1536 width as M1. The check for integer is redundant, but
1537 safe, since the only case of differing destination
1538 modes with equal sources is when both sources are
1539 VOIDmode, i.e., CONST_INT. */
1540 (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
1541 || (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT
1542 && GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT
1543 && (GET_MODE_BITSIZE (GET_MODE (m->set_dest))
1544 >= GET_MODE_BITSIZE (GET_MODE (m1->set_dest)))))
1545 /* See if the source of M1 says it matches M. */
1546 && ((REG_P (m1->set_src)
1547 && matched_regs[REGNO (m1->set_src)])
1548 || rtx_equal_for_loop_p (m->set_src, m1->set_src,
1549 movables, regs))))
1550 && ((m->dependencies == m1->dependencies)
1551 || rtx_equal_p (m->dependencies, m1->dependencies)))
1553 m->lifetime += m1->lifetime;
1554 m->savings += m1->savings;
1555 m1->done = 1;
1556 m1->match = m;
1557 matched_regs[m1->regno] = 1;
1561 /* Now combine the regs used for zero-extension.
1562 This can be done for those not marked `global'
1563 provided their lives don't overlap. */
1565 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1566 mode = GET_MODE_WIDER_MODE (mode))
1568 struct movable *m0 = 0;
1570 /* Combine all the registers for extension from mode MODE.
1571 Don't combine any that are used outside this loop. */
1572 for (m = movables->head; m; m = m->next)
1573 if (m->partial && ! m->global
1574 && mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
1576 struct movable *m1;
1578 int first = REGNO_FIRST_LUID (m->regno);
1579 int last = REGNO_LAST_LUID (m->regno);
1581 if (m0 == 0)
1583 /* First one: don't check for overlap, just record it. */
1584 m0 = m;
1585 continue;
1588 /* Make sure they extend to the same mode.
1589 (Almost always true.) */
1590 if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
1591 continue;
1593 /* We already have one: check for overlap with those
1594 already combined together. */
1595 for (m1 = movables->head; m1 != m; m1 = m1->next)
1596 if (m1 == m0 || (m1->partial && m1->match == m0))
1597 if (! (REGNO_FIRST_LUID (m1->regno) > last
1598 || REGNO_LAST_LUID (m1->regno) < first))
1599 goto overlap;
1601 /* No overlap: we can combine this with the others. */
1602 m0->lifetime += m->lifetime;
1603 m0->savings += m->savings;
1604 m->done = 1;
1605 m->match = m0;
1607 overlap:
1612 /* Clean up. */
1613 free (matched_regs);
1616 /* Returns the number of movable instructions in LOOP that were not
1617 moved outside the loop. */
1619 static int
1620 num_unmoved_movables (const struct loop *loop)
1622 int num = 0;
1623 struct movable *m;
1625 for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
1626 if (!m->done)
1627 ++num;
1629 return num;
1633 /* Return 1 if regs X and Y will become the same if moved. */
1635 static int
1636 regs_match_p (rtx x, rtx y, struct loop_movables *movables)
1638 unsigned int xn = REGNO (x);
1639 unsigned int yn = REGNO (y);
1640 struct movable *mx, *my;
1642 for (mx = movables->head; mx; mx = mx->next)
1643 if (mx->regno == xn)
1644 break;
1646 for (my = movables->head; my; my = my->next)
1647 if (my->regno == yn)
1648 break;
1650 return (mx && my
1651 && ((mx->match == my->match && mx->match != 0)
1652 || mx->match == my
1653 || mx == my->match));
1656 /* Return 1 if X and Y are identical-looking rtx's.
1657 This is the Lisp function EQUAL for rtx arguments.
1659 If two registers are matching movables or a movable register and an
1660 equivalent constant, consider them equal. */
1662 static int
1663 rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables,
1664 struct loop_regs *regs)
1666 int i;
1667 int j;
1668 struct movable *m;
1669 enum rtx_code code;
1670 const char *fmt;
1672 if (x == y)
1673 return 1;
1674 if (x == 0 || y == 0)
1675 return 0;
1677 code = GET_CODE (x);
1679 /* If we have a register and a constant, they may sometimes be
1680 equal. */
1681 if (REG_P (x) && regs->array[REGNO (x)].set_in_loop == -2
1682 && CONSTANT_P (y))
1684 for (m = movables->head; m; m = m->next)
1685 if (m->move_insn && m->regno == REGNO (x)
1686 && rtx_equal_p (m->set_src, y))
1687 return 1;
1689 else if (REG_P (y) && regs->array[REGNO (y)].set_in_loop == -2
1690 && CONSTANT_P (x))
1692 for (m = movables->head; m; m = m->next)
1693 if (m->move_insn && m->regno == REGNO (y)
1694 && rtx_equal_p (m->set_src, x))
1695 return 1;
1698 /* Otherwise, rtx's of different codes cannot be equal. */
1699 if (code != GET_CODE (y))
1700 return 0;
1702 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
1703 (REG:SI x) and (REG:HI x) are NOT equivalent. */
1705 if (GET_MODE (x) != GET_MODE (y))
1706 return 0;
1708 /* These three types of rtx's can be compared nonrecursively. */
1709 if (code == REG)
1710 return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
1712 if (code == LABEL_REF)
1713 return XEXP (x, 0) == XEXP (y, 0);
1714 if (code == SYMBOL_REF)
1715 return XSTR (x, 0) == XSTR (y, 0);
1717 /* Compare the elements. If any pair of corresponding elements
1718 fail to match, return 0 for the whole things. */
1720 fmt = GET_RTX_FORMAT (code);
1721 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1723 switch (fmt[i])
1725 case 'w':
1726 if (XWINT (x, i) != XWINT (y, i))
1727 return 0;
1728 break;
1730 case 'i':
1731 if (XINT (x, i) != XINT (y, i))
1732 return 0;
1733 break;
1735 case 'E':
1736 /* Two vectors must have the same length. */
1737 if (XVECLEN (x, i) != XVECLEN (y, i))
1738 return 0;
1740 /* And the corresponding elements must match. */
1741 for (j = 0; j < XVECLEN (x, i); j++)
1742 if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
1743 movables, regs) == 0)
1744 return 0;
1745 break;
1747 case 'e':
1748 if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
1749 == 0)
1750 return 0;
1751 break;
1753 case 's':
1754 if (strcmp (XSTR (x, i), XSTR (y, i)))
1755 return 0;
1756 break;
1758 case 'u':
1759 /* These are just backpointers, so they don't matter. */
1760 break;
1762 case '0':
1763 break;
1765 /* It is believed that rtx's at this level will never
1766 contain anything but integers and other rtx's,
1767 except for within LABEL_REFs and SYMBOL_REFs. */
1768 default:
1769 abort ();
1772 return 1;
1775 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
1776 insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
1777 references is incremented once for each added note. */
1779 static void
1780 add_label_notes (rtx x, rtx insns)
1782 enum rtx_code code = GET_CODE (x);
1783 int i, j;
1784 const char *fmt;
1785 rtx insn;
1787 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
1789 /* This code used to ignore labels that referred to dispatch tables to
1790 avoid flow generating (slightly) worse code.
1792 We no longer ignore such label references (see LABEL_REF handling in
1793 mark_jump_label for additional information). */
1794 for (insn = insns; insn; insn = NEXT_INSN (insn))
1795 if (reg_mentioned_p (XEXP (x, 0), insn))
1797 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
1798 REG_NOTES (insn));
1799 if (LABEL_P (XEXP (x, 0)))
1800 LABEL_NUSES (XEXP (x, 0))++;
1804 fmt = GET_RTX_FORMAT (code);
1805 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1807 if (fmt[i] == 'e')
1808 add_label_notes (XEXP (x, i), insns);
1809 else if (fmt[i] == 'E')
1810 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1811 add_label_notes (XVECEXP (x, i, j), insns);
1815 /* Scan MOVABLES, and move the insns that deserve to be moved.
1816 If two matching movables are combined, replace one reg with the
1817 other throughout. */
1819 static void
1820 move_movables (struct loop *loop, struct loop_movables *movables,
1821 int threshold, int insn_count)
1823 struct loop_regs *regs = LOOP_REGS (loop);
1824 int nregs = regs->num;
1825 rtx new_start = 0;
1826 struct movable *m;
1827 rtx p;
1828 rtx loop_start = loop->start;
1829 rtx loop_end = loop->end;
1830 /* Map of pseudo-register replacements to handle combining
1831 when we move several insns that load the same value
1832 into different pseudo-registers. */
1833 rtx *reg_map = xcalloc (nregs, sizeof (rtx));
1834 char *already_moved = xcalloc (nregs, sizeof (char));
1836 for (m = movables->head; m; m = m->next)
1838 /* Describe this movable insn. */
1840 if (loop_dump_stream)
1842 fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
1843 INSN_UID (m->insn), m->regno, m->lifetime);
1844 if (m->consec > 0)
1845 fprintf (loop_dump_stream, "consec %d, ", m->consec);
1846 if (m->cond)
1847 fprintf (loop_dump_stream, "cond ");
1848 if (m->force)
1849 fprintf (loop_dump_stream, "force ");
1850 if (m->global)
1851 fprintf (loop_dump_stream, "global ");
1852 if (m->done)
1853 fprintf (loop_dump_stream, "done ");
1854 if (m->move_insn)
1855 fprintf (loop_dump_stream, "move-insn ");
1856 if (m->match)
1857 fprintf (loop_dump_stream, "matches %d ",
1858 INSN_UID (m->match->insn));
1859 if (m->forces)
1860 fprintf (loop_dump_stream, "forces %d ",
1861 INSN_UID (m->forces->insn));
1864 /* Ignore the insn if it's already done (it matched something else).
1865 Otherwise, see if it is now safe to move. */
1867 if (!m->done
1868 && (! m->cond
1869 || (1 == loop_invariant_p (loop, m->set_src)
1870 && (m->dependencies == 0
1871 || 1 == loop_invariant_p (loop, m->dependencies))
1872 && (m->consec == 0
1873 || 1 == consec_sets_invariant_p (loop, m->set_dest,
1874 m->consec + 1,
1875 m->insn))))
1876 && (! m->forces || m->forces->done))
1878 int regno;
1879 rtx p;
1880 int savings = m->savings;
1882 /* We have an insn that is safe to move.
1883 Compute its desirability. */
1885 p = m->insn;
1886 regno = m->regno;
1888 if (loop_dump_stream)
1889 fprintf (loop_dump_stream, "savings %d ", savings);
1891 if (regs->array[regno].moved_once && loop_dump_stream)
1892 fprintf (loop_dump_stream, "halved since already moved ");
1894 /* An insn MUST be moved if we already moved something else
1895 which is safe only if this one is moved too: that is,
1896 if already_moved[REGNO] is nonzero. */
1898 /* An insn is desirable to move if the new lifetime of the
1899 register is no more than THRESHOLD times the old lifetime.
1900 If it's not desirable, it means the loop is so big
1901 that moving won't speed things up much,
1902 and it is liable to make register usage worse. */
1904 /* It is also desirable to move if it can be moved at no
1905 extra cost because something else was already moved. */
1907 if (already_moved[regno]
1908 || flag_move_all_movables
1909 || (threshold * savings * m->lifetime) >=
1910 (regs->array[regno].moved_once ? insn_count * 2 : insn_count)
1911 || (m->forces && m->forces->done
1912 && regs->array[m->forces->regno].n_times_set == 1))
1914 int count;
1915 struct movable *m1;
1916 rtx first = NULL_RTX;
1917 rtx newreg = NULL_RTX;
1919 if (m->insert_temp)
1920 newreg = gen_reg_rtx (GET_MODE (m->set_dest));
1922 /* Now move the insns that set the reg. */
1924 if (m->partial && m->match)
1926 rtx newpat, i1;
1927 rtx r1, r2;
1928 /* Find the end of this chain of matching regs.
1929 Thus, we load each reg in the chain from that one reg.
1930 And that reg is loaded with 0 directly,
1931 since it has ->match == 0. */
1932 for (m1 = m; m1->match; m1 = m1->match);
1933 newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
1934 SET_DEST (PATTERN (m1->insn)));
1935 i1 = loop_insn_hoist (loop, newpat);
1937 /* Mark the moved, invariant reg as being allowed to
1938 share a hard reg with the other matching invariant. */
1939 REG_NOTES (i1) = REG_NOTES (m->insn);
1940 r1 = SET_DEST (PATTERN (m->insn));
1941 r2 = SET_DEST (PATTERN (m1->insn));
1942 regs_may_share
1943 = gen_rtx_EXPR_LIST (VOIDmode, r1,
1944 gen_rtx_EXPR_LIST (VOIDmode, r2,
1945 regs_may_share));
1946 delete_insn (m->insn);
1948 if (new_start == 0)
1949 new_start = i1;
1951 if (loop_dump_stream)
1952 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
1954 /* If we are to re-generate the item being moved with a
1955 new move insn, first delete what we have and then emit
1956 the move insn before the loop. */
1957 else if (m->move_insn)
1959 rtx i1, temp, seq;
1961 for (count = m->consec; count >= 0; count--)
1963 /* If this is the first insn of a library call sequence,
1964 something is very wrong. */
1965 if (GET_CODE (p) != NOTE
1966 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
1967 abort ();
1969 /* If this is the last insn of a libcall sequence, then
1970 delete every insn in the sequence except the last.
1971 The last insn is handled in the normal manner. */
1972 if (GET_CODE (p) != NOTE
1973 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
1975 temp = XEXP (temp, 0);
1976 while (temp != p)
1977 temp = delete_insn (temp);
1980 temp = p;
1981 p = delete_insn (p);
1983 /* simplify_giv_expr expects that it can walk the insns
1984 at m->insn forwards and see this old sequence we are
1985 tossing here. delete_insn does preserve the next
1986 pointers, but when we skip over a NOTE we must fix
1987 it up. Otherwise that code walks into the non-deleted
1988 insn stream. */
1989 while (p && GET_CODE (p) == NOTE)
1990 p = NEXT_INSN (temp) = NEXT_INSN (p);
1992 if (m->insert_temp)
1994 /* Replace the original insn with a move from
1995 our newly created temp. */
1996 start_sequence ();
1997 emit_move_insn (m->set_dest, newreg);
1998 seq = get_insns ();
1999 end_sequence ();
2000 emit_insn_before (seq, p);
2004 start_sequence ();
2005 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2006 m->set_src);
2007 seq = get_insns ();
2008 end_sequence ();
2010 add_label_notes (m->set_src, seq);
2012 i1 = loop_insn_hoist (loop, seq);
2013 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2014 set_unique_reg_note (i1,
2015 m->is_equiv ? REG_EQUIV : REG_EQUAL,
2016 m->set_src);
2018 if (loop_dump_stream)
2019 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
2021 /* The more regs we move, the less we like moving them. */
2022 threshold -= 3;
2024 else
2026 for (count = m->consec; count >= 0; count--)
2028 rtx i1, temp;
2030 /* If first insn of libcall sequence, skip to end. */
2031 /* Do this at start of loop, since p is guaranteed to
2032 be an insn here. */
2033 if (GET_CODE (p) != NOTE
2034 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
2035 p = XEXP (temp, 0);
2037 /* If last insn of libcall sequence, move all
2038 insns except the last before the loop. The last
2039 insn is handled in the normal manner. */
2040 if (GET_CODE (p) != NOTE
2041 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
2043 rtx fn_address = 0;
2044 rtx fn_reg = 0;
2045 rtx fn_address_insn = 0;
2047 first = 0;
2048 for (temp = XEXP (temp, 0); temp != p;
2049 temp = NEXT_INSN (temp))
2051 rtx body;
2052 rtx n;
2053 rtx next;
2055 if (GET_CODE (temp) == NOTE)
2056 continue;
2058 body = PATTERN (temp);
2060 /* Find the next insn after TEMP,
2061 not counting USE or NOTE insns. */
2062 for (next = NEXT_INSN (temp); next != p;
2063 next = NEXT_INSN (next))
2064 if (! (GET_CODE (next) == INSN
2065 && GET_CODE (PATTERN (next)) == USE)
2066 && GET_CODE (next) != NOTE)
2067 break;
2069 /* If that is the call, this may be the insn
2070 that loads the function address.
2072 Extract the function address from the insn
2073 that loads it into a register.
2074 If this insn was cse'd, we get incorrect code.
2076 So emit a new move insn that copies the
2077 function address into the register that the
2078 call insn will use. flow.c will delete any
2079 redundant stores that we have created. */
2080 if (GET_CODE (next) == CALL_INSN
2081 && GET_CODE (body) == SET
2082 && REG_P (SET_DEST (body))
2083 && (n = find_reg_note (temp, REG_EQUAL,
2084 NULL_RTX)))
2086 fn_reg = SET_SRC (body);
2087 if (!REG_P (fn_reg))
2088 fn_reg = SET_DEST (body);
2089 fn_address = XEXP (n, 0);
2090 fn_address_insn = temp;
2092 /* We have the call insn.
2093 If it uses the register we suspect it might,
2094 load it with the correct address directly. */
2095 if (GET_CODE (temp) == CALL_INSN
2096 && fn_address != 0
2097 && reg_referenced_p (fn_reg, body))
2098 loop_insn_emit_after (loop, 0, fn_address_insn,
2099 gen_move_insn
2100 (fn_reg, fn_address));
2102 if (GET_CODE (temp) == CALL_INSN)
2104 i1 = loop_call_insn_hoist (loop, body);
2105 /* Because the USAGE information potentially
2106 contains objects other than hard registers
2107 we need to copy it. */
2108 if (CALL_INSN_FUNCTION_USAGE (temp))
2109 CALL_INSN_FUNCTION_USAGE (i1)
2110 = copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
2112 else
2113 i1 = loop_insn_hoist (loop, body);
2114 if (first == 0)
2115 first = i1;
2116 if (temp == fn_address_insn)
2117 fn_address_insn = i1;
2118 REG_NOTES (i1) = REG_NOTES (temp);
2119 REG_NOTES (temp) = NULL;
2120 delete_insn (temp);
2122 if (new_start == 0)
2123 new_start = first;
2125 if (m->savemode != VOIDmode)
2127 /* P sets REG to zero; but we should clear only
2128 the bits that are not covered by the mode
2129 m->savemode. */
2130 rtx reg = m->set_dest;
2131 rtx sequence;
2132 rtx tem;
2134 start_sequence ();
2135 tem = expand_simple_binop
2136 (GET_MODE (reg), AND, reg,
2137 GEN_INT ((((HOST_WIDE_INT) 1
2138 << GET_MODE_BITSIZE (m->savemode)))
2139 - 1),
2140 reg, 1, OPTAB_LIB_WIDEN);
2141 if (tem == 0)
2142 abort ();
2143 if (tem != reg)
2144 emit_move_insn (reg, tem);
2145 sequence = get_insns ();
2146 end_sequence ();
2147 i1 = loop_insn_hoist (loop, sequence);
2149 else if (GET_CODE (p) == CALL_INSN)
2151 i1 = loop_call_insn_hoist (loop, PATTERN (p));
2152 /* Because the USAGE information potentially
2153 contains objects other than hard registers
2154 we need to copy it. */
2155 if (CALL_INSN_FUNCTION_USAGE (p))
2156 CALL_INSN_FUNCTION_USAGE (i1)
2157 = copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
2159 else if (count == m->consec && m->move_insn_first)
2161 rtx seq;
2162 /* The SET_SRC might not be invariant, so we must
2163 use the REG_EQUAL note. */
2164 start_sequence ();
2165 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2166 m->set_src);
2167 seq = get_insns ();
2168 end_sequence ();
2170 add_label_notes (m->set_src, seq);
2172 i1 = loop_insn_hoist (loop, seq);
2173 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2174 set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
2175 : REG_EQUAL, m->set_src);
2177 else if (m->insert_temp)
2179 rtx *reg_map2 = xcalloc (REGNO (newreg),
2180 sizeof(rtx));
2181 reg_map2 [m->regno] = newreg;
2183 i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
2184 replace_regs (i1, reg_map2, REGNO (newreg), 1);
2185 free (reg_map2);
2187 else
2188 i1 = loop_insn_hoist (loop, PATTERN (p));
2190 if (REG_NOTES (i1) == 0)
2192 REG_NOTES (i1) = REG_NOTES (p);
2193 REG_NOTES (p) = NULL;
2195 /* If there is a REG_EQUAL note present whose value
2196 is not loop invariant, then delete it, since it
2197 may cause problems with later optimization passes.
2198 It is possible for cse to create such notes
2199 like this as a result of record_jump_cond. */
2201 if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
2202 && ! loop_invariant_p (loop, XEXP (temp, 0)))
2203 remove_note (i1, temp);
2206 if (new_start == 0)
2207 new_start = i1;
2209 if (loop_dump_stream)
2210 fprintf (loop_dump_stream, " moved to %d",
2211 INSN_UID (i1));
2213 /* If library call, now fix the REG_NOTES that contain
2214 insn pointers, namely REG_LIBCALL on FIRST
2215 and REG_RETVAL on I1. */
2216 if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
2218 XEXP (temp, 0) = first;
2219 temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
2220 XEXP (temp, 0) = i1;
2223 temp = p;
2224 delete_insn (p);
2225 p = NEXT_INSN (p);
2227 /* simplify_giv_expr expects that it can walk the insns
2228 at m->insn forwards and see this old sequence we are
2229 tossing here. delete_insn does preserve the next
2230 pointers, but when we skip over a NOTE we must fix
2231 it up. Otherwise that code walks into the non-deleted
2232 insn stream. */
2233 while (p && GET_CODE (p) == NOTE)
2234 p = NEXT_INSN (temp) = NEXT_INSN (p);
2236 if (m->insert_temp)
2238 rtx seq;
2239 /* Replace the original insn with a move from
2240 our newly created temp. */
2241 start_sequence ();
2242 emit_move_insn (m->set_dest, newreg);
2243 seq = get_insns ();
2244 end_sequence ();
2245 emit_insn_before (seq, p);
2249 /* The more regs we move, the less we like moving them. */
2250 threshold -= 3;
2253 m->done = 1;
2255 if (!m->insert_temp)
2257 /* Any other movable that loads the same register
2258 MUST be moved. */
2259 already_moved[regno] = 1;
2261 /* This reg has been moved out of one loop. */
2262 regs->array[regno].moved_once = 1;
2264 /* The reg set here is now invariant. */
2265 if (! m->partial)
2267 int i;
2268 for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
2269 regs->array[regno+i].set_in_loop = 0;
2272 /* Change the length-of-life info for the register
2273 to say it lives at least the full length of this loop.
2274 This will help guide optimizations in outer loops. */
2276 if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
2277 /* This is the old insn before all the moved insns.
2278 We can't use the moved insn because it is out of range
2279 in uid_luid. Only the old insns have luids. */
2280 REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
2281 if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
2282 REGNO_LAST_UID (regno) = INSN_UID (loop_end);
2285 /* Combine with this moved insn any other matching movables. */
2287 if (! m->partial)
2288 for (m1 = movables->head; m1; m1 = m1->next)
2289 if (m1->match == m)
2291 rtx temp;
2293 /* Schedule the reg loaded by M1
2294 for replacement so that shares the reg of M.
2295 If the modes differ (only possible in restricted
2296 circumstances, make a SUBREG.
2298 Note this assumes that the target dependent files
2299 treat REG and SUBREG equally, including within
2300 GO_IF_LEGITIMATE_ADDRESS and in all the
2301 predicates since we never verify that replacing the
2302 original register with a SUBREG results in a
2303 recognizable insn. */
2304 if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest))
2305 reg_map[m1->regno] = m->set_dest;
2306 else
2307 reg_map[m1->regno]
2308 = gen_lowpart_common (GET_MODE (m1->set_dest),
2309 m->set_dest);
2311 /* Get rid of the matching insn
2312 and prevent further processing of it. */
2313 m1->done = 1;
2315 /* If library call, delete all insns. */
2316 if ((temp = find_reg_note (m1->insn, REG_RETVAL,
2317 NULL_RTX)))
2318 delete_insn_chain (XEXP (temp, 0), m1->insn);
2319 else
2320 delete_insn (m1->insn);
2322 /* Any other movable that loads the same register
2323 MUST be moved. */
2324 already_moved[m1->regno] = 1;
2326 /* The reg merged here is now invariant,
2327 if the reg it matches is invariant. */
2328 if (! m->partial)
2330 int i;
2331 for (i = 0;
2332 i < LOOP_REGNO_NREGS (regno, m1->set_dest);
2333 i++)
2334 regs->array[m1->regno+i].set_in_loop = 0;
2338 else if (loop_dump_stream)
2339 fprintf (loop_dump_stream, "not desirable");
2341 else if (loop_dump_stream && !m->match)
2342 fprintf (loop_dump_stream, "not safe");
2344 if (loop_dump_stream)
2345 fprintf (loop_dump_stream, "\n");
2348 if (new_start == 0)
2349 new_start = loop_start;
2351 /* Go through all the instructions in the loop, making
2352 all the register substitutions scheduled in REG_MAP. */
2353 for (p = new_start; p != loop_end; p = NEXT_INSN (p))
2354 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
2355 || GET_CODE (p) == CALL_INSN)
2357 replace_regs (PATTERN (p), reg_map, nregs, 0);
2358 replace_regs (REG_NOTES (p), reg_map, nregs, 0);
2359 INSN_CODE (p) = -1;
2362 /* Clean up. */
2363 free (reg_map);
2364 free (already_moved);
2368 static void
2369 loop_movables_add (struct loop_movables *movables, struct movable *m)
2371 if (movables->head == 0)
2372 movables->head = m;
2373 else
2374 movables->last->next = m;
2375 movables->last = m;
2379 static void
2380 loop_movables_free (struct loop_movables *movables)
2382 struct movable *m;
2383 struct movable *m_next;
2385 for (m = movables->head; m; m = m_next)
2387 m_next = m->next;
2388 free (m);
2392 #if 0
2393 /* Scan X and replace the address of any MEM in it with ADDR.
2394 REG is the address that MEM should have before the replacement. */
2396 static void
2397 replace_call_address (rtx x, rtx reg, rtx addr)
2399 enum rtx_code code;
2400 int i;
2401 const char *fmt;
2403 if (x == 0)
2404 return;
2405 code = GET_CODE (x);
2406 switch (code)
2408 case PC:
2409 case CC0:
2410 case CONST_INT:
2411 case CONST_DOUBLE:
2412 case CONST:
2413 case SYMBOL_REF:
2414 case LABEL_REF:
2415 case REG:
2416 return;
2418 case SET:
2419 /* Short cut for very common case. */
2420 replace_call_address (XEXP (x, 1), reg, addr);
2421 return;
2423 case CALL:
2424 /* Short cut for very common case. */
2425 replace_call_address (XEXP (x, 0), reg, addr);
2426 return;
2428 case MEM:
2429 /* If this MEM uses a reg other than the one we expected,
2430 something is wrong. */
2431 if (XEXP (x, 0) != reg)
2432 abort ();
2433 XEXP (x, 0) = addr;
2434 return;
2436 default:
2437 break;
2440 fmt = GET_RTX_FORMAT (code);
2441 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2443 if (fmt[i] == 'e')
2444 replace_call_address (XEXP (x, i), reg, addr);
2445 else if (fmt[i] == 'E')
2447 int j;
2448 for (j = 0; j < XVECLEN (x, i); j++)
2449 replace_call_address (XVECEXP (x, i, j), reg, addr);
2453 #endif
2455 /* Return the number of memory refs to addresses that vary
2456 in the rtx X. */
2458 static int
2459 count_nonfixed_reads (const struct loop *loop, rtx x)
2461 enum rtx_code code;
2462 int i;
2463 const char *fmt;
2464 int value;
2466 if (x == 0)
2467 return 0;
2469 code = GET_CODE (x);
2470 switch (code)
2472 case PC:
2473 case CC0:
2474 case CONST_INT:
2475 case CONST_DOUBLE:
2476 case CONST:
2477 case SYMBOL_REF:
2478 case LABEL_REF:
2479 case REG:
2480 return 0;
2482 case MEM:
2483 return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
2484 + count_nonfixed_reads (loop, XEXP (x, 0)));
2486 default:
2487 break;
2490 value = 0;
2491 fmt = GET_RTX_FORMAT (code);
2492 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2494 if (fmt[i] == 'e')
2495 value += count_nonfixed_reads (loop, XEXP (x, i));
2496 if (fmt[i] == 'E')
2498 int j;
2499 for (j = 0; j < XVECLEN (x, i); j++)
2500 value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
2503 return value;
2506 /* Scan a loop setting the elements `cont', `vtop', `loops_enclosed',
2507 `has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
2508 `unknown_address_altered', `unknown_constant_address_altered', and
2509 `num_mem_sets' in LOOP. Also, fill in the array `mems' and the
2510 list `store_mems' in LOOP. */
2512 static void
2513 prescan_loop (struct loop *loop)
2515 int level = 1;
2516 rtx insn;
2517 struct loop_info *loop_info = LOOP_INFO (loop);
2518 rtx start = loop->start;
2519 rtx end = loop->end;
2520 /* The label after END. Jumping here is just like falling off the
2521 end of the loop. We use next_nonnote_insn instead of next_label
2522 as a hedge against the (pathological) case where some actual insn
2523 might end up between the two. */
2524 rtx exit_target = next_nonnote_insn (end);
2526 loop_info->has_indirect_jump = indirect_jump_in_function;
2527 loop_info->pre_header_has_call = 0;
2528 loop_info->has_call = 0;
2529 loop_info->has_nonconst_call = 0;
2530 loop_info->has_prefetch = 0;
2531 loop_info->has_volatile = 0;
2532 loop_info->has_tablejump = 0;
2533 loop_info->has_multiple_exit_targets = 0;
2534 loop->level = 1;
2536 loop_info->unknown_address_altered = 0;
2537 loop_info->unknown_constant_address_altered = 0;
2538 loop_info->store_mems = NULL_RTX;
2539 loop_info->first_loop_store_insn = NULL_RTX;
2540 loop_info->mems_idx = 0;
2541 loop_info->num_mem_sets = 0;
2542 /* If loop opts run twice, this was set on 1st pass for 2nd. */
2543 loop_info->preconditioned = NOTE_PRECONDITIONED (end);
2545 for (insn = start; insn && GET_CODE (insn) != CODE_LABEL;
2546 insn = PREV_INSN (insn))
2548 if (GET_CODE (insn) == CALL_INSN)
2550 loop_info->pre_header_has_call = 1;
2551 break;
2555 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2556 insn = NEXT_INSN (insn))
2558 switch (GET_CODE (insn))
2560 case NOTE:
2561 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
2563 ++level;
2564 /* Count number of loops contained in this one. */
2565 loop->level++;
2567 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
2568 --level;
2569 break;
2571 case CALL_INSN:
2572 if (! CONST_OR_PURE_CALL_P (insn))
2574 loop_info->unknown_address_altered = 1;
2575 loop_info->has_nonconst_call = 1;
2577 else if (pure_call_p (insn))
2578 loop_info->has_nonconst_call = 1;
2579 loop_info->has_call = 1;
2580 if (can_throw_internal (insn))
2581 loop_info->has_multiple_exit_targets = 1;
2583 /* Calls initializing constant objects have CLOBBER of MEM /u in the
2584 attached FUNCTION_USAGE expression list, not accounted for by the
2585 code above. We should note these to avoid missing dependencies in
2586 later references. */
2588 rtx fusage_entry;
2590 for (fusage_entry = CALL_INSN_FUNCTION_USAGE (insn);
2591 fusage_entry; fusage_entry = XEXP (fusage_entry, 1))
2593 rtx fusage = XEXP (fusage_entry, 0);
2595 if (GET_CODE (fusage) == CLOBBER
2596 && MEM_P (XEXP (fusage, 0))
2597 && RTX_UNCHANGING_P (XEXP (fusage, 0)))
2599 note_stores (fusage, note_addr_stored, loop_info);
2600 if (! loop_info->first_loop_store_insn
2601 && loop_info->store_mems)
2602 loop_info->first_loop_store_insn = insn;
2606 break;
2608 case JUMP_INSN:
2609 if (! loop_info->has_multiple_exit_targets)
2611 rtx set = pc_set (insn);
2613 if (set)
2615 rtx src = SET_SRC (set);
2616 rtx label1, label2;
2618 if (GET_CODE (src) == IF_THEN_ELSE)
2620 label1 = XEXP (src, 1);
2621 label2 = XEXP (src, 2);
2623 else
2625 label1 = src;
2626 label2 = NULL_RTX;
2631 if (label1 && label1 != pc_rtx)
2633 if (GET_CODE (label1) != LABEL_REF)
2635 /* Something tricky. */
2636 loop_info->has_multiple_exit_targets = 1;
2637 break;
2639 else if (XEXP (label1, 0) != exit_target
2640 && LABEL_OUTSIDE_LOOP_P (label1))
2642 /* A jump outside the current loop. */
2643 loop_info->has_multiple_exit_targets = 1;
2644 break;
2648 label1 = label2;
2649 label2 = NULL_RTX;
2651 while (label1);
2653 else
2655 /* A return, or something tricky. */
2656 loop_info->has_multiple_exit_targets = 1;
2659 /* Fall through. */
2661 case INSN:
2662 if (volatile_refs_p (PATTERN (insn)))
2663 loop_info->has_volatile = 1;
2665 if (GET_CODE (insn) == JUMP_INSN
2666 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2667 || GET_CODE (PATTERN (insn)) == ADDR_VEC))
2668 loop_info->has_tablejump = 1;
2670 note_stores (PATTERN (insn), note_addr_stored, loop_info);
2671 if (! loop_info->first_loop_store_insn && loop_info->store_mems)
2672 loop_info->first_loop_store_insn = insn;
2674 if (flag_non_call_exceptions && can_throw_internal (insn))
2675 loop_info->has_multiple_exit_targets = 1;
2676 break;
2678 default:
2679 break;
2683 /* Now, rescan the loop, setting up the LOOP_MEMS array. */
2684 if (/* An exception thrown by a called function might land us
2685 anywhere. */
2686 ! loop_info->has_nonconst_call
2687 /* We don't want loads for MEMs moved to a location before the
2688 one at which their stack memory becomes allocated. (Note
2689 that this is not a problem for malloc, etc., since those
2690 require actual function calls. */
2691 && ! current_function_calls_alloca
2692 /* There are ways to leave the loop other than falling off the
2693 end. */
2694 && ! loop_info->has_multiple_exit_targets)
2695 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2696 insn = NEXT_INSN (insn))
2697 for_each_rtx (&insn, insert_loop_mem, loop_info);
2699 /* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
2700 that loop_invariant_p and load_mems can use true_dependence
2701 to determine what is really clobbered. */
2702 if (loop_info->unknown_address_altered)
2704 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2706 loop_info->store_mems
2707 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2709 if (loop_info->unknown_constant_address_altered)
2711 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2713 RTX_UNCHANGING_P (mem) = 1;
2714 loop_info->store_mems
2715 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2719 /* Invalidate all loops containing LABEL. */
2721 static void
2722 invalidate_loops_containing_label (rtx label)
2724 struct loop *loop;
2725 for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
2726 loop->invalid = 1;
2729 /* Scan the function looking for loops. Record the start and end of each loop.
2730 Also mark as invalid loops any loops that contain a setjmp or are branched
2731 to from outside the loop. */
2733 static void
2734 find_and_verify_loops (rtx f, struct loops *loops)
2736 rtx insn;
2737 rtx label;
2738 int num_loops;
2739 struct loop *current_loop;
2740 struct loop *next_loop;
2741 struct loop *loop;
2743 num_loops = loops->num;
2745 compute_luids (f, NULL_RTX, 0);
2747 /* If there are jumps to undefined labels,
2748 treat them as jumps out of any/all loops.
2749 This also avoids writing past end of tables when there are no loops. */
2750 uid_loop[0] = NULL;
2752 /* Find boundaries of loops, mark which loops are contained within
2753 loops, and invalidate loops that have setjmp. */
2755 num_loops = 0;
2756 current_loop = NULL;
2757 for (insn = f; insn; insn = NEXT_INSN (insn))
2759 if (GET_CODE (insn) == NOTE)
2760 switch (NOTE_LINE_NUMBER (insn))
2762 case NOTE_INSN_LOOP_BEG:
2763 next_loop = loops->array + num_loops;
2764 next_loop->num = num_loops;
2765 num_loops++;
2766 next_loop->start = insn;
2767 next_loop->outer = current_loop;
2768 current_loop = next_loop;
2769 break;
2771 case NOTE_INSN_LOOP_CONT:
2772 current_loop->cont = insn;
2773 break;
2775 case NOTE_INSN_LOOP_VTOP:
2776 current_loop->vtop = insn;
2777 break;
2779 case NOTE_INSN_LOOP_END:
2780 if (! current_loop)
2781 abort ();
2783 current_loop->end = insn;
2784 current_loop = current_loop->outer;
2785 break;
2787 default:
2788 break;
2791 if (GET_CODE (insn) == CALL_INSN
2792 && find_reg_note (insn, REG_SETJMP, NULL))
2794 /* In this case, we must invalidate our current loop and any
2795 enclosing loop. */
2796 for (loop = current_loop; loop; loop = loop->outer)
2798 loop->invalid = 1;
2799 if (loop_dump_stream)
2800 fprintf (loop_dump_stream,
2801 "\nLoop at %d ignored due to setjmp.\n",
2802 INSN_UID (loop->start));
2806 /* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
2807 enclosing loop, but this doesn't matter. */
2808 uid_loop[INSN_UID (insn)] = current_loop;
2811 /* Any loop containing a label used in an initializer must be invalidated,
2812 because it can be jumped into from anywhere. */
2813 for (label = forced_labels; label; label = XEXP (label, 1))
2814 invalidate_loops_containing_label (XEXP (label, 0));
2816 /* Any loop containing a label used for an exception handler must be
2817 invalidated, because it can be jumped into from anywhere. */
2818 for_each_eh_label (invalidate_loops_containing_label);
2820 /* Now scan all insn's in the function. If any JUMP_INSN branches into a
2821 loop that it is not contained within, that loop is marked invalid.
2822 If any INSN or CALL_INSN uses a label's address, then the loop containing
2823 that label is marked invalid, because it could be jumped into from
2824 anywhere.
2826 Also look for blocks of code ending in an unconditional branch that
2827 exits the loop. If such a block is surrounded by a conditional
2828 branch around the block, move the block elsewhere (see below) and
2829 invert the jump to point to the code block. This may eliminate a
2830 label in our loop and will simplify processing by both us and a
2831 possible second cse pass. */
2833 for (insn = f; insn; insn = NEXT_INSN (insn))
2834 if (INSN_P (insn))
2836 struct loop *this_loop = uid_loop[INSN_UID (insn)];
2838 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
2840 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
2841 if (note)
2842 invalidate_loops_containing_label (XEXP (note, 0));
2845 if (GET_CODE (insn) != JUMP_INSN)
2846 continue;
2848 mark_loop_jump (PATTERN (insn), this_loop);
2850 /* See if this is an unconditional branch outside the loop. */
2851 if (this_loop
2852 && (GET_CODE (PATTERN (insn)) == RETURN
2853 || (any_uncondjump_p (insn)
2854 && onlyjump_p (insn)
2855 && (uid_loop[INSN_UID (JUMP_LABEL (insn))]
2856 != this_loop)))
2857 && get_max_uid () < max_uid_for_loop)
2859 rtx p;
2860 rtx our_next = next_real_insn (insn);
2861 rtx last_insn_to_move = NEXT_INSN (insn);
2862 struct loop *dest_loop;
2863 struct loop *outer_loop = NULL;
2865 /* Go backwards until we reach the start of the loop, a label,
2866 or a JUMP_INSN. */
2867 for (p = PREV_INSN (insn);
2868 GET_CODE (p) != CODE_LABEL
2869 && ! (GET_CODE (p) == NOTE
2870 && NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
2871 && GET_CODE (p) != JUMP_INSN;
2872 p = PREV_INSN (p))
2875 /* Check for the case where we have a jump to an inner nested
2876 loop, and do not perform the optimization in that case. */
2878 if (JUMP_LABEL (insn))
2880 dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
2881 if (dest_loop)
2883 for (outer_loop = dest_loop; outer_loop;
2884 outer_loop = outer_loop->outer)
2885 if (outer_loop == this_loop)
2886 break;
2890 /* Make sure that the target of P is within the current loop. */
2892 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
2893 && uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
2894 outer_loop = this_loop;
2896 /* If we stopped on a JUMP_INSN to the next insn after INSN,
2897 we have a block of code to try to move.
2899 We look backward and then forward from the target of INSN
2900 to find a BARRIER at the same loop depth as the target.
2901 If we find such a BARRIER, we make a new label for the start
2902 of the block, invert the jump in P and point it to that label,
2903 and move the block of code to the spot we found. */
2905 if (! outer_loop
2906 && GET_CODE (p) == JUMP_INSN
2907 && JUMP_LABEL (p) != 0
2908 /* Just ignore jumps to labels that were never emitted.
2909 These always indicate compilation errors. */
2910 && INSN_UID (JUMP_LABEL (p)) != 0
2911 && any_condjump_p (p) && onlyjump_p (p)
2912 && next_real_insn (JUMP_LABEL (p)) == our_next
2913 /* If it's not safe to move the sequence, then we
2914 mustn't try. */
2915 && insns_safe_to_move_p (p, NEXT_INSN (insn),
2916 &last_insn_to_move))
2918 rtx target
2919 = JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
2920 struct loop *target_loop = uid_loop[INSN_UID (target)];
2921 rtx loc, loc2;
2922 rtx tmp;
2924 /* Search for possible garbage past the conditional jumps
2925 and look for the last barrier. */
2926 for (tmp = last_insn_to_move;
2927 tmp && GET_CODE (tmp) != CODE_LABEL; tmp = NEXT_INSN (tmp))
2928 if (GET_CODE (tmp) == BARRIER)
2929 last_insn_to_move = tmp;
2931 for (loc = target; loc; loc = PREV_INSN (loc))
2932 if (GET_CODE (loc) == BARRIER
2933 /* Don't move things inside a tablejump. */
2934 && ((loc2 = next_nonnote_insn (loc)) == 0
2935 || GET_CODE (loc2) != CODE_LABEL
2936 || (loc2 = next_nonnote_insn (loc2)) == 0
2937 || GET_CODE (loc2) != JUMP_INSN
2938 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2939 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2940 && uid_loop[INSN_UID (loc)] == target_loop)
2941 break;
2943 if (loc == 0)
2944 for (loc = target; loc; loc = NEXT_INSN (loc))
2945 if (GET_CODE (loc) == BARRIER
2946 /* Don't move things inside a tablejump. */
2947 && ((loc2 = next_nonnote_insn (loc)) == 0
2948 || GET_CODE (loc2) != CODE_LABEL
2949 || (loc2 = next_nonnote_insn (loc2)) == 0
2950 || GET_CODE (loc2) != JUMP_INSN
2951 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2952 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2953 && uid_loop[INSN_UID (loc)] == target_loop)
2954 break;
2956 if (loc)
2958 rtx cond_label = JUMP_LABEL (p);
2959 rtx new_label = get_label_after (p);
2961 /* Ensure our label doesn't go away. */
2962 LABEL_NUSES (cond_label)++;
2964 /* Verify that uid_loop is large enough and that
2965 we can invert P. */
2966 if (invert_jump (p, new_label, 1))
2968 rtx q, r;
2970 /* If no suitable BARRIER was found, create a suitable
2971 one before TARGET. Since TARGET is a fall through
2972 path, we'll need to insert a jump around our block
2973 and add a BARRIER before TARGET.
2975 This creates an extra unconditional jump outside
2976 the loop. However, the benefits of removing rarely
2977 executed instructions from inside the loop usually
2978 outweighs the cost of the extra unconditional jump
2979 outside the loop. */
2980 if (loc == 0)
2982 rtx temp;
2984 temp = gen_jump (JUMP_LABEL (insn));
2985 temp = emit_jump_insn_before (temp, target);
2986 JUMP_LABEL (temp) = JUMP_LABEL (insn);
2987 LABEL_NUSES (JUMP_LABEL (insn))++;
2988 loc = emit_barrier_before (target);
2991 /* Include the BARRIER after INSN and copy the
2992 block after LOC. */
2993 if (squeeze_notes (&new_label, &last_insn_to_move))
2994 abort ();
2995 reorder_insns (new_label, last_insn_to_move, loc);
2997 /* All those insns are now in TARGET_LOOP. */
2998 for (q = new_label;
2999 q != NEXT_INSN (last_insn_to_move);
3000 q = NEXT_INSN (q))
3001 uid_loop[INSN_UID (q)] = target_loop;
3003 /* The label jumped to by INSN is no longer a loop
3004 exit. Unless INSN does not have a label (e.g.,
3005 it is a RETURN insn), search loop->exit_labels
3006 to find its label_ref, and remove it. Also turn
3007 off LABEL_OUTSIDE_LOOP_P bit. */
3008 if (JUMP_LABEL (insn))
3010 for (q = 0, r = this_loop->exit_labels;
3012 q = r, r = LABEL_NEXTREF (r))
3013 if (XEXP (r, 0) == JUMP_LABEL (insn))
3015 LABEL_OUTSIDE_LOOP_P (r) = 0;
3016 if (q)
3017 LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
3018 else
3019 this_loop->exit_labels = LABEL_NEXTREF (r);
3020 break;
3023 for (loop = this_loop; loop && loop != target_loop;
3024 loop = loop->outer)
3025 loop->exit_count--;
3027 /* If we didn't find it, then something is
3028 wrong. */
3029 if (! r)
3030 abort ();
3033 /* P is now a jump outside the loop, so it must be put
3034 in loop->exit_labels, and marked as such.
3035 The easiest way to do this is to just call
3036 mark_loop_jump again for P. */
3037 mark_loop_jump (PATTERN (p), this_loop);
3039 /* If INSN now jumps to the insn after it,
3040 delete INSN. */
3041 if (JUMP_LABEL (insn) != 0
3042 && (next_real_insn (JUMP_LABEL (insn))
3043 == next_real_insn (insn)))
3044 delete_related_insns (insn);
3047 /* Continue the loop after where the conditional
3048 branch used to jump, since the only branch insn
3049 in the block (if it still remains) is an inter-loop
3050 branch and hence needs no processing. */
3051 insn = NEXT_INSN (cond_label);
3053 if (--LABEL_NUSES (cond_label) == 0)
3054 delete_related_insns (cond_label);
3056 /* This loop will be continued with NEXT_INSN (insn). */
3057 insn = PREV_INSN (insn);
3064 /* If any label in X jumps to a loop different from LOOP_NUM and any of the
3065 loops it is contained in, mark the target loop invalid.
3067 For speed, we assume that X is part of a pattern of a JUMP_INSN. */
3069 static void
3070 mark_loop_jump (rtx x, struct loop *loop)
3072 struct loop *dest_loop;
3073 struct loop *outer_loop;
3074 int i;
3076 switch (GET_CODE (x))
3078 case PC:
3079 case USE:
3080 case CLOBBER:
3081 case REG:
3082 case MEM:
3083 case CONST_INT:
3084 case CONST_DOUBLE:
3085 case RETURN:
3086 return;
3088 case CONST:
3089 /* There could be a label reference in here. */
3090 mark_loop_jump (XEXP (x, 0), loop);
3091 return;
3093 case PLUS:
3094 case MINUS:
3095 case MULT:
3096 mark_loop_jump (XEXP (x, 0), loop);
3097 mark_loop_jump (XEXP (x, 1), loop);
3098 return;
3100 case LO_SUM:
3101 /* This may refer to a LABEL_REF or SYMBOL_REF. */
3102 mark_loop_jump (XEXP (x, 1), loop);
3103 return;
3105 case SIGN_EXTEND:
3106 case ZERO_EXTEND:
3107 mark_loop_jump (XEXP (x, 0), loop);
3108 return;
3110 case LABEL_REF:
3111 dest_loop = uid_loop[INSN_UID (XEXP (x, 0))];
3113 /* Link together all labels that branch outside the loop. This
3114 is used by final_[bg]iv_value and the loop unrolling code. Also
3115 mark this LABEL_REF so we know that this branch should predict
3116 false. */
3118 /* A check to make sure the label is not in an inner nested loop,
3119 since this does not count as a loop exit. */
3120 if (dest_loop)
3122 for (outer_loop = dest_loop; outer_loop;
3123 outer_loop = outer_loop->outer)
3124 if (outer_loop == loop)
3125 break;
3127 else
3128 outer_loop = NULL;
3130 if (loop && ! outer_loop)
3132 LABEL_OUTSIDE_LOOP_P (x) = 1;
3133 LABEL_NEXTREF (x) = loop->exit_labels;
3134 loop->exit_labels = x;
3136 for (outer_loop = loop;
3137 outer_loop && outer_loop != dest_loop;
3138 outer_loop = outer_loop->outer)
3139 outer_loop->exit_count++;
3142 /* If this is inside a loop, but not in the current loop or one enclosed
3143 by it, it invalidates at least one loop. */
3145 if (! dest_loop)
3146 return;
3148 /* We must invalidate every nested loop containing the target of this
3149 label, except those that also contain the jump insn. */
3151 for (; dest_loop; dest_loop = dest_loop->outer)
3153 /* Stop when we reach a loop that also contains the jump insn. */
3154 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3155 if (dest_loop == outer_loop)
3156 return;
3158 /* If we get here, we know we need to invalidate a loop. */
3159 if (loop_dump_stream && ! dest_loop->invalid)
3160 fprintf (loop_dump_stream,
3161 "\nLoop at %d ignored due to multiple entry points.\n",
3162 INSN_UID (dest_loop->start));
3164 dest_loop->invalid = 1;
3166 return;
3168 case SET:
3169 /* If this is not setting pc, ignore. */
3170 if (SET_DEST (x) == pc_rtx)
3171 mark_loop_jump (SET_SRC (x), loop);
3172 return;
3174 case IF_THEN_ELSE:
3175 mark_loop_jump (XEXP (x, 1), loop);
3176 mark_loop_jump (XEXP (x, 2), loop);
3177 return;
3179 case PARALLEL:
3180 case ADDR_VEC:
3181 for (i = 0; i < XVECLEN (x, 0); i++)
3182 mark_loop_jump (XVECEXP (x, 0, i), loop);
3183 return;
3185 case ADDR_DIFF_VEC:
3186 for (i = 0; i < XVECLEN (x, 1); i++)
3187 mark_loop_jump (XVECEXP (x, 1, i), loop);
3188 return;
3190 default:
3191 /* Strictly speaking this is not a jump into the loop, only a possible
3192 jump out of the loop. However, we have no way to link the destination
3193 of this jump onto the list of exit labels. To be safe we mark this
3194 loop and any containing loops as invalid. */
3195 if (loop)
3197 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3199 if (loop_dump_stream && ! outer_loop->invalid)
3200 fprintf (loop_dump_stream,
3201 "\nLoop at %d ignored due to unknown exit jump.\n",
3202 INSN_UID (outer_loop->start));
3203 outer_loop->invalid = 1;
3206 return;
3210 /* Return nonzero if there is a label in the range from
3211 insn INSN to and including the insn whose luid is END
3212 INSN must have an assigned luid (i.e., it must not have
3213 been previously created by loop.c). */
3215 static int
3216 labels_in_range_p (rtx insn, int end)
3218 while (insn && INSN_LUID (insn) <= end)
3220 if (GET_CODE (insn) == CODE_LABEL)
3221 return 1;
3222 insn = NEXT_INSN (insn);
3225 return 0;
3228 /* Record that a memory reference X is being set. */
3230 static void
3231 note_addr_stored (rtx x, rtx y ATTRIBUTE_UNUSED,
3232 void *data ATTRIBUTE_UNUSED)
3234 struct loop_info *loop_info = data;
3236 if (x == 0 || !MEM_P (x))
3237 return;
3239 /* Count number of memory writes.
3240 This affects heuristics in strength_reduce. */
3241 loop_info->num_mem_sets++;
3243 /* BLKmode MEM means all memory is clobbered. */
3244 if (GET_MODE (x) == BLKmode)
3246 if (RTX_UNCHANGING_P (x))
3247 loop_info->unknown_constant_address_altered = 1;
3248 else
3249 loop_info->unknown_address_altered = 1;
3251 return;
3254 loop_info->store_mems = gen_rtx_EXPR_LIST (VOIDmode, x,
3255 loop_info->store_mems);
3258 /* X is a value modified by an INSN that references a biv inside a loop
3259 exit test (ie, X is somehow related to the value of the biv). If X
3260 is a pseudo that is used more than once, then the biv is (effectively)
3261 used more than once. DATA is a pointer to a loop_regs structure. */
3263 static void
3264 note_set_pseudo_multiple_uses (rtx x, rtx y ATTRIBUTE_UNUSED, void *data)
3266 struct loop_regs *regs = (struct loop_regs *) data;
3268 if (x == 0)
3269 return;
3271 while (GET_CODE (x) == STRICT_LOW_PART
3272 || GET_CODE (x) == SIGN_EXTRACT
3273 || GET_CODE (x) == ZERO_EXTRACT
3274 || GET_CODE (x) == SUBREG)
3275 x = XEXP (x, 0);
3277 if (!REG_P (x) || REGNO (x) < FIRST_PSEUDO_REGISTER)
3278 return;
3280 /* If we do not have usage information, or if we know the register
3281 is used more than once, note that fact for check_dbra_loop. */
3282 if (REGNO (x) >= max_reg_before_loop
3283 || ! regs->array[REGNO (x)].single_usage
3284 || regs->array[REGNO (x)].single_usage == const0_rtx)
3285 regs->multiple_uses = 1;
3288 /* Return nonzero if the rtx X is invariant over the current loop.
3290 The value is 2 if we refer to something only conditionally invariant.
3292 A memory ref is invariant if it is not volatile and does not conflict
3293 with anything stored in `loop_info->store_mems'. */
3296 loop_invariant_p (const struct loop *loop, rtx x)
3298 struct loop_info *loop_info = LOOP_INFO (loop);
3299 struct loop_regs *regs = LOOP_REGS (loop);
3300 int i;
3301 enum rtx_code code;
3302 const char *fmt;
3303 int conditional = 0;
3304 rtx mem_list_entry;
3306 if (x == 0)
3307 return 1;
3308 code = GET_CODE (x);
3309 switch (code)
3311 case CONST_INT:
3312 case CONST_DOUBLE:
3313 case SYMBOL_REF:
3314 case CONST:
3315 return 1;
3317 case LABEL_REF:
3318 /* A LABEL_REF is normally invariant, however, if we are unrolling
3319 loops, and this label is inside the loop, then it isn't invariant.
3320 This is because each unrolled copy of the loop body will have
3321 a copy of this label. If this was invariant, then an insn loading
3322 the address of this label into a register might get moved outside
3323 the loop, and then each loop body would end up using the same label.
3325 We don't know the loop bounds here though, so just fail for all
3326 labels. */
3327 if (flag_old_unroll_loops)
3328 return 0;
3329 else
3330 return 1;
3332 case PC:
3333 case CC0:
3334 case UNSPEC_VOLATILE:
3335 return 0;
3337 case REG:
3338 /* We used to check RTX_UNCHANGING_P (x) here, but that is invalid
3339 since the reg might be set by initialization within the loop. */
3341 if ((x == frame_pointer_rtx || x == hard_frame_pointer_rtx
3342 || x == arg_pointer_rtx || x == pic_offset_table_rtx)
3343 && ! current_function_has_nonlocal_goto)
3344 return 1;
3346 if (LOOP_INFO (loop)->has_call
3347 && REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
3348 return 0;
3350 /* Out-of-range regs can occur when we are called from unrolling.
3351 These registers created by the unroller are set in the loop,
3352 hence are never invariant.
3353 Other out-of-range regs can be generated by load_mems; those that
3354 are written to in the loop are not invariant, while those that are
3355 not written to are invariant. It would be easy for load_mems
3356 to set n_times_set correctly for these registers, however, there
3357 is no easy way to distinguish them from registers created by the
3358 unroller. */
3360 if (REGNO (x) >= (unsigned) regs->num)
3361 return 0;
3363 if (regs->array[REGNO (x)].set_in_loop < 0)
3364 return 2;
3366 return regs->array[REGNO (x)].set_in_loop == 0;
3368 case MEM:
3369 /* Volatile memory references must be rejected. Do this before
3370 checking for read-only items, so that volatile read-only items
3371 will be rejected also. */
3372 if (MEM_VOLATILE_P (x))
3373 return 0;
3375 /* See if there is any dependence between a store and this load. */
3376 mem_list_entry = loop_info->store_mems;
3377 while (mem_list_entry)
3379 if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
3380 x, rtx_varies_p))
3381 return 0;
3383 mem_list_entry = XEXP (mem_list_entry, 1);
3386 /* It's not invalidated by a store in memory
3387 but we must still verify the address is invariant. */
3388 break;
3390 case ASM_OPERANDS:
3391 /* Don't mess with insns declared volatile. */
3392 if (MEM_VOLATILE_P (x))
3393 return 0;
3394 break;
3396 default:
3397 break;
3400 fmt = GET_RTX_FORMAT (code);
3401 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3403 if (fmt[i] == 'e')
3405 int tem = loop_invariant_p (loop, XEXP (x, i));
3406 if (tem == 0)
3407 return 0;
3408 if (tem == 2)
3409 conditional = 1;
3411 else if (fmt[i] == 'E')
3413 int j;
3414 for (j = 0; j < XVECLEN (x, i); j++)
3416 int tem = loop_invariant_p (loop, XVECEXP (x, i, j));
3417 if (tem == 0)
3418 return 0;
3419 if (tem == 2)
3420 conditional = 1;
3426 return 1 + conditional;
3429 /* Return nonzero if all the insns in the loop that set REG
3430 are INSN and the immediately following insns,
3431 and if each of those insns sets REG in an invariant way
3432 (not counting uses of REG in them).
3434 The value is 2 if some of these insns are only conditionally invariant.
3436 We assume that INSN itself is the first set of REG
3437 and that its source is invariant. */
3439 static int
3440 consec_sets_invariant_p (const struct loop *loop, rtx reg, int n_sets,
3441 rtx insn)
3443 struct loop_regs *regs = LOOP_REGS (loop);
3444 rtx p = insn;
3445 unsigned int regno = REGNO (reg);
3446 rtx temp;
3447 /* Number of sets we have to insist on finding after INSN. */
3448 int count = n_sets - 1;
3449 int old = regs->array[regno].set_in_loop;
3450 int value = 0;
3451 int this;
3453 /* If N_SETS hit the limit, we can't rely on its value. */
3454 if (n_sets == 127)
3455 return 0;
3457 regs->array[regno].set_in_loop = 0;
3459 while (count > 0)
3461 enum rtx_code code;
3462 rtx set;
3464 p = NEXT_INSN (p);
3465 code = GET_CODE (p);
3467 /* If library call, skip to end of it. */
3468 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
3469 p = XEXP (temp, 0);
3471 this = 0;
3472 if (code == INSN
3473 && (set = single_set (p))
3474 && REG_P (SET_DEST (set))
3475 && REGNO (SET_DEST (set)) == regno)
3477 this = loop_invariant_p (loop, SET_SRC (set));
3478 if (this != 0)
3479 value |= this;
3480 else if ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX)))
3482 /* If this is a libcall, then any invariant REG_EQUAL note is OK.
3483 If this is an ordinary insn, then only CONSTANT_P REG_EQUAL
3484 notes are OK. */
3485 this = (CONSTANT_P (XEXP (temp, 0))
3486 || (find_reg_note (p, REG_RETVAL, NULL_RTX)
3487 && loop_invariant_p (loop, XEXP (temp, 0))));
3488 if (this != 0)
3489 value |= this;
3492 if (this != 0)
3493 count--;
3494 else if (code != NOTE)
3496 regs->array[regno].set_in_loop = old;
3497 return 0;
3501 regs->array[regno].set_in_loop = old;
3502 /* If loop_invariant_p ever returned 2, we return 2. */
3503 return 1 + (value & 2);
3506 /* Look at all uses (not sets) of registers in X. For each, if it is
3507 the single use, set USAGE[REGNO] to INSN; if there was a previous use in
3508 a different insn, set USAGE[REGNO] to const0_rtx. */
3510 static void
3511 find_single_use_in_loop (struct loop_regs *regs, rtx insn, rtx x)
3513 enum rtx_code code = GET_CODE (x);
3514 const char *fmt = GET_RTX_FORMAT (code);
3515 int i, j;
3517 if (code == REG)
3518 regs->array[REGNO (x)].single_usage
3519 = (regs->array[REGNO (x)].single_usage != 0
3520 && regs->array[REGNO (x)].single_usage != insn)
3521 ? const0_rtx : insn;
3523 else if (code == SET)
3525 /* Don't count SET_DEST if it is a REG; otherwise count things
3526 in SET_DEST because if a register is partially modified, it won't
3527 show up as a potential movable so we don't care how USAGE is set
3528 for it. */
3529 if (!REG_P (SET_DEST (x)))
3530 find_single_use_in_loop (regs, insn, SET_DEST (x));
3531 find_single_use_in_loop (regs, insn, SET_SRC (x));
3533 else
3534 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3536 if (fmt[i] == 'e' && XEXP (x, i) != 0)
3537 find_single_use_in_loop (regs, insn, XEXP (x, i));
3538 else if (fmt[i] == 'E')
3539 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3540 find_single_use_in_loop (regs, insn, XVECEXP (x, i, j));
3544 /* Count and record any set in X which is contained in INSN. Update
3545 REGS->array[I].MAY_NOT_OPTIMIZE and LAST_SET for any register I set
3546 in X. */
3548 static void
3549 count_one_set (struct loop_regs *regs, rtx insn, rtx x, rtx *last_set)
3551 if (GET_CODE (x) == CLOBBER && REG_P (XEXP (x, 0)))
3552 /* Don't move a reg that has an explicit clobber.
3553 It's not worth the pain to try to do it correctly. */
3554 regs->array[REGNO (XEXP (x, 0))].may_not_optimize = 1;
3556 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
3558 rtx dest = SET_DEST (x);
3559 while (GET_CODE (dest) == SUBREG
3560 || GET_CODE (dest) == ZERO_EXTRACT
3561 || GET_CODE (dest) == SIGN_EXTRACT
3562 || GET_CODE (dest) == STRICT_LOW_PART)
3563 dest = XEXP (dest, 0);
3564 if (REG_P (dest))
3566 int i;
3567 int regno = REGNO (dest);
3568 for (i = 0; i < LOOP_REGNO_NREGS (regno, dest); i++)
3570 /* If this is the first setting of this reg
3571 in current basic block, and it was set before,
3572 it must be set in two basic blocks, so it cannot
3573 be moved out of the loop. */
3574 if (regs->array[regno].set_in_loop > 0
3575 && last_set[regno] == 0)
3576 regs->array[regno+i].may_not_optimize = 1;
3577 /* If this is not first setting in current basic block,
3578 see if reg was used in between previous one and this.
3579 If so, neither one can be moved. */
3580 if (last_set[regno] != 0
3581 && reg_used_between_p (dest, last_set[regno], insn))
3582 regs->array[regno+i].may_not_optimize = 1;
3583 if (regs->array[regno+i].set_in_loop < 127)
3584 ++regs->array[regno+i].set_in_loop;
3585 last_set[regno+i] = insn;
3591 /* Given a loop that is bounded by LOOP->START and LOOP->END and that
3592 is entered at LOOP->SCAN_START, return 1 if the register set in SET
3593 contained in insn INSN is used by any insn that precedes INSN in
3594 cyclic order starting from the loop entry point.
3596 We don't want to use INSN_LUID here because if we restrict INSN to those
3597 that have a valid INSN_LUID, it means we cannot move an invariant out
3598 from an inner loop past two loops. */
3600 static int
3601 loop_reg_used_before_p (const struct loop *loop, rtx set, rtx insn)
3603 rtx reg = SET_DEST (set);
3604 rtx p;
3606 /* Scan forward checking for register usage. If we hit INSN, we
3607 are done. Otherwise, if we hit LOOP->END, wrap around to LOOP->START. */
3608 for (p = loop->scan_start; p != insn; p = NEXT_INSN (p))
3610 if (INSN_P (p) && reg_overlap_mentioned_p (reg, PATTERN (p)))
3611 return 1;
3613 if (p == loop->end)
3614 p = loop->start;
3617 return 0;
3621 /* Information we collect about arrays that we might want to prefetch. */
3622 struct prefetch_info
3624 struct iv_class *class; /* Class this prefetch is based on. */
3625 struct induction *giv; /* GIV this prefetch is based on. */
3626 rtx base_address; /* Start prefetching from this address plus
3627 index. */
3628 HOST_WIDE_INT index;
3629 HOST_WIDE_INT stride; /* Prefetch stride in bytes in each
3630 iteration. */
3631 unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
3632 prefetch area in one iteration. */
3633 unsigned int total_bytes; /* Total bytes loop will access in this block.
3634 This is set only for loops with known
3635 iteration counts and is 0xffffffff
3636 otherwise. */
3637 int prefetch_in_loop; /* Number of prefetch insns in loop. */
3638 int prefetch_before_loop; /* Number of prefetch insns before loop. */
3639 unsigned int write : 1; /* 1 for read/write prefetches. */
3642 /* Data used by check_store function. */
3643 struct check_store_data
3645 rtx mem_address;
3646 int mem_write;
3649 static void check_store (rtx, rtx, void *);
3650 static void emit_prefetch_instructions (struct loop *);
3651 static int rtx_equal_for_prefetch_p (rtx, rtx);
3653 /* Set mem_write when mem_address is found. Used as callback to
3654 note_stores. */
3655 static void
3656 check_store (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
3658 struct check_store_data *d = (struct check_store_data *) data;
3660 if ((MEM_P (x)) && rtx_equal_p (d->mem_address, XEXP (x, 0)))
3661 d->mem_write = 1;
3664 /* Like rtx_equal_p, but attempts to swap commutative operands. This is
3665 important to get some addresses combined. Later more sophisticated
3666 transformations can be added when necessary.
3668 ??? Same trick with swapping operand is done at several other places.
3669 It can be nice to develop some common way to handle this. */
3671 static int
3672 rtx_equal_for_prefetch_p (rtx x, rtx y)
3674 int i;
3675 int j;
3676 enum rtx_code code = GET_CODE (x);
3677 const char *fmt;
3679 if (x == y)
3680 return 1;
3681 if (code != GET_CODE (y))
3682 return 0;
3684 if (COMMUTATIVE_ARITH_P (x))
3686 return ((rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 0))
3687 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 1)))
3688 || (rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 1))
3689 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 0))));
3692 /* Compare the elements. If any pair of corresponding elements fails to
3693 match, return 0 for the whole thing. */
3695 fmt = GET_RTX_FORMAT (code);
3696 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3698 switch (fmt[i])
3700 case 'w':
3701 if (XWINT (x, i) != XWINT (y, i))
3702 return 0;
3703 break;
3705 case 'i':
3706 if (XINT (x, i) != XINT (y, i))
3707 return 0;
3708 break;
3710 case 'E':
3711 /* Two vectors must have the same length. */
3712 if (XVECLEN (x, i) != XVECLEN (y, i))
3713 return 0;
3715 /* And the corresponding elements must match. */
3716 for (j = 0; j < XVECLEN (x, i); j++)
3717 if (rtx_equal_for_prefetch_p (XVECEXP (x, i, j),
3718 XVECEXP (y, i, j)) == 0)
3719 return 0;
3720 break;
3722 case 'e':
3723 if (rtx_equal_for_prefetch_p (XEXP (x, i), XEXP (y, i)) == 0)
3724 return 0;
3725 break;
3727 case 's':
3728 if (strcmp (XSTR (x, i), XSTR (y, i)))
3729 return 0;
3730 break;
3732 case 'u':
3733 /* These are just backpointers, so they don't matter. */
3734 break;
3736 case '0':
3737 break;
3739 /* It is believed that rtx's at this level will never
3740 contain anything but integers and other rtx's,
3741 except for within LABEL_REFs and SYMBOL_REFs. */
3742 default:
3743 abort ();
3746 return 1;
3749 /* Remove constant addition value from the expression X (when present)
3750 and return it. */
3752 static HOST_WIDE_INT
3753 remove_constant_addition (rtx *x)
3755 HOST_WIDE_INT addval = 0;
3756 rtx exp = *x;
3758 /* Avoid clobbering a shared CONST expression. */
3759 if (GET_CODE (exp) == CONST)
3761 if (GET_CODE (XEXP (exp, 0)) == PLUS
3762 && GET_CODE (XEXP (XEXP (exp, 0), 0)) == SYMBOL_REF
3763 && GET_CODE (XEXP (XEXP (exp, 0), 1)) == CONST_INT)
3765 *x = XEXP (XEXP (exp, 0), 0);
3766 return INTVAL (XEXP (XEXP (exp, 0), 1));
3768 return 0;
3771 if (GET_CODE (exp) == CONST_INT)
3773 addval = INTVAL (exp);
3774 *x = const0_rtx;
3777 /* For plus expression recurse on ourself. */
3778 else if (GET_CODE (exp) == PLUS)
3780 addval += remove_constant_addition (&XEXP (exp, 0));
3781 addval += remove_constant_addition (&XEXP (exp, 1));
3783 /* In case our parameter was constant, remove extra zero from the
3784 expression. */
3785 if (XEXP (exp, 0) == const0_rtx)
3786 *x = XEXP (exp, 1);
3787 else if (XEXP (exp, 1) == const0_rtx)
3788 *x = XEXP (exp, 0);
3791 return addval;
3794 /* Attempt to identify accesses to arrays that are most likely to cause cache
3795 misses, and emit prefetch instructions a few prefetch blocks forward.
3797 To detect the arrays we use the GIV information that was collected by the
3798 strength reduction pass.
3800 The prefetch instructions are generated after the GIV information is done
3801 and before the strength reduction process. The new GIVs are injected into
3802 the strength reduction tables, so the prefetch addresses are optimized as
3803 well.
3805 GIVs are split into base address, stride, and constant addition values.
3806 GIVs with the same address, stride and close addition values are combined
3807 into a single prefetch. Also writes to GIVs are detected, so that prefetch
3808 for write instructions can be used for the block we write to, on machines
3809 that support write prefetches.
3811 Several heuristics are used to determine when to prefetch. They are
3812 controlled by defined symbols that can be overridden for each target. */
3814 static void
3815 emit_prefetch_instructions (struct loop *loop)
3817 int num_prefetches = 0;
3818 int num_real_prefetches = 0;
3819 int num_real_write_prefetches = 0;
3820 int num_prefetches_before = 0;
3821 int num_write_prefetches_before = 0;
3822 int ahead = 0;
3823 int i;
3824 struct iv_class *bl;
3825 struct induction *iv;
3826 struct prefetch_info info[MAX_PREFETCHES];
3827 struct loop_ivs *ivs = LOOP_IVS (loop);
3829 if (!HAVE_prefetch)
3830 return;
3832 /* Consider only loops w/o calls. When a call is done, the loop is probably
3833 slow enough to read the memory. */
3834 if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call)
3836 if (loop_dump_stream)
3837 fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n");
3839 return;
3842 /* Don't prefetch in loops known to have few iterations. */
3843 if (PREFETCH_NO_LOW_LOOPCNT
3844 && LOOP_INFO (loop)->n_iterations
3845 && LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT)
3847 if (loop_dump_stream)
3848 fprintf (loop_dump_stream,
3849 "Prefetch: ignoring loop: not enough iterations.\n");
3850 return;
3853 /* Search all induction variables and pick those interesting for the prefetch
3854 machinery. */
3855 for (bl = ivs->list; bl; bl = bl->next)
3857 struct induction *biv = bl->biv, *biv1;
3858 int basestride = 0;
3860 biv1 = biv;
3862 /* Expect all BIVs to be executed in each iteration. This makes our
3863 analysis more conservative. */
3864 while (biv1)
3866 /* Discard non-constant additions that we can't handle well yet, and
3867 BIVs that are executed multiple times; such BIVs ought to be
3868 handled in the nested loop. We accept not_every_iteration BIVs,
3869 since these only result in larger strides and make our
3870 heuristics more conservative. */
3871 if (GET_CODE (biv->add_val) != CONST_INT)
3873 if (loop_dump_stream)
3875 fprintf (loop_dump_stream,
3876 "Prefetch: ignoring biv %d: non-constant addition at insn %d:",
3877 REGNO (biv->src_reg), INSN_UID (biv->insn));
3878 print_rtl (loop_dump_stream, biv->add_val);
3879 fprintf (loop_dump_stream, "\n");
3881 break;
3884 if (biv->maybe_multiple)
3886 if (loop_dump_stream)
3888 fprintf (loop_dump_stream,
3889 "Prefetch: ignoring biv %d: maybe_multiple at insn %i:",
3890 REGNO (biv->src_reg), INSN_UID (biv->insn));
3891 print_rtl (loop_dump_stream, biv->add_val);
3892 fprintf (loop_dump_stream, "\n");
3894 break;
3897 basestride += INTVAL (biv1->add_val);
3898 biv1 = biv1->next_iv;
3901 if (biv1 || !basestride)
3902 continue;
3904 for (iv = bl->giv; iv; iv = iv->next_iv)
3906 rtx address;
3907 rtx temp;
3908 HOST_WIDE_INT index = 0;
3909 int add = 1;
3910 HOST_WIDE_INT stride = 0;
3911 int stride_sign = 1;
3912 struct check_store_data d;
3913 const char *ignore_reason = NULL;
3914 int size = GET_MODE_SIZE (GET_MODE (iv));
3916 /* See whether an induction variable is interesting to us and if
3917 not, report the reason. */
3918 if (iv->giv_type != DEST_ADDR)
3919 ignore_reason = "giv is not a destination address";
3921 /* We are interested only in constant stride memory references
3922 in order to be able to compute density easily. */
3923 else if (GET_CODE (iv->mult_val) != CONST_INT)
3924 ignore_reason = "stride is not constant";
3926 else
3928 stride = INTVAL (iv->mult_val) * basestride;
3929 if (stride < 0)
3931 stride = -stride;
3932 stride_sign = -1;
3935 /* On some targets, reversed order prefetches are not
3936 worthwhile. */
3937 if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0)
3938 ignore_reason = "reversed order stride";
3940 /* Prefetch of accesses with an extreme stride might not be
3941 worthwhile, either. */
3942 else if (PREFETCH_NO_EXTREME_STRIDE
3943 && stride > PREFETCH_EXTREME_STRIDE)
3944 ignore_reason = "extreme stride";
3946 /* Ignore GIVs with varying add values; we can't predict the
3947 value for the next iteration. */
3948 else if (!loop_invariant_p (loop, iv->add_val))
3949 ignore_reason = "giv has varying add value";
3951 /* Ignore GIVs in the nested loops; they ought to have been
3952 handled already. */
3953 else if (iv->maybe_multiple)
3954 ignore_reason = "giv is in nested loop";
3957 if (ignore_reason != NULL)
3959 if (loop_dump_stream)
3960 fprintf (loop_dump_stream,
3961 "Prefetch: ignoring giv at %d: %s.\n",
3962 INSN_UID (iv->insn), ignore_reason);
3963 continue;
3966 /* Determine the pointer to the basic array we are examining. It is
3967 the sum of the BIV's initial value and the GIV's add_val. */
3968 address = copy_rtx (iv->add_val);
3969 temp = copy_rtx (bl->initial_value);
3971 address = simplify_gen_binary (PLUS, Pmode, temp, address);
3972 index = remove_constant_addition (&address);
3974 d.mem_write = 0;
3975 d.mem_address = *iv->location;
3977 /* When the GIV is not always executed, we might be better off by
3978 not dirtying the cache pages. */
3979 if (PREFETCH_CONDITIONAL || iv->always_executed)
3980 note_stores (PATTERN (iv->insn), check_store, &d);
3981 else
3983 if (loop_dump_stream)
3984 fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %d: %s\n",
3985 INSN_UID (iv->insn), "in conditional code.");
3986 continue;
3989 /* Attempt to find another prefetch to the same array and see if we
3990 can merge this one. */
3991 for (i = 0; i < num_prefetches; i++)
3992 if (rtx_equal_for_prefetch_p (address, info[i].base_address)
3993 && stride == info[i].stride)
3995 /* In case both access same array (same location
3996 just with small difference in constant indexes), merge
3997 the prefetches. Just do the later and the earlier will
3998 get prefetched from previous iteration.
3999 The artificial threshold should not be too small,
4000 but also not bigger than small portion of memory usually
4001 traversed by single loop. */
4002 if (index >= info[i].index
4003 && index - info[i].index < PREFETCH_EXTREME_DIFFERENCE)
4005 info[i].write |= d.mem_write;
4006 info[i].bytes_accessed += size;
4007 info[i].index = index;
4008 info[i].giv = iv;
4009 info[i].class = bl;
4010 info[num_prefetches].base_address = address;
4011 add = 0;
4012 break;
4015 if (index < info[i].index
4016 && info[i].index - index < PREFETCH_EXTREME_DIFFERENCE)
4018 info[i].write |= d.mem_write;
4019 info[i].bytes_accessed += size;
4020 add = 0;
4021 break;
4025 /* Merging failed. */
4026 if (add)
4028 info[num_prefetches].giv = iv;
4029 info[num_prefetches].class = bl;
4030 info[num_prefetches].index = index;
4031 info[num_prefetches].stride = stride;
4032 info[num_prefetches].base_address = address;
4033 info[num_prefetches].write = d.mem_write;
4034 info[num_prefetches].bytes_accessed = size;
4035 num_prefetches++;
4036 if (num_prefetches >= MAX_PREFETCHES)
4038 if (loop_dump_stream)
4039 fprintf (loop_dump_stream,
4040 "Maximal number of prefetches exceeded.\n");
4041 return;
4047 for (i = 0; i < num_prefetches; i++)
4049 int density;
4051 /* Attempt to calculate the total number of bytes fetched by all
4052 iterations of the loop. Avoid overflow. */
4053 if (LOOP_INFO (loop)->n_iterations
4054 && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
4055 >= LOOP_INFO (loop)->n_iterations))
4056 info[i].total_bytes = info[i].stride * LOOP_INFO (loop)->n_iterations;
4057 else
4058 info[i].total_bytes = 0xffffffff;
4060 density = info[i].bytes_accessed * 100 / info[i].stride;
4062 /* Prefetch might be worthwhile only when the loads/stores are dense. */
4063 if (PREFETCH_ONLY_DENSE_MEM)
4064 if (density * 256 > PREFETCH_DENSE_MEM * 100
4065 && (info[i].total_bytes / PREFETCH_BLOCK
4066 >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
4068 info[i].prefetch_before_loop = 1;
4069 info[i].prefetch_in_loop
4070 = (info[i].total_bytes / PREFETCH_BLOCK
4071 > PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
4073 else
4075 info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0;
4076 if (loop_dump_stream)
4077 fprintf (loop_dump_stream,
4078 "Prefetch: ignoring giv at %d: %d%% density is too low.\n",
4079 INSN_UID (info[i].giv->insn), density);
4081 else
4082 info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1;
4084 /* Find how many prefetch instructions we'll use within the loop. */
4085 if (info[i].prefetch_in_loop != 0)
4087 info[i].prefetch_in_loop = ((info[i].stride + PREFETCH_BLOCK - 1)
4088 / PREFETCH_BLOCK);
4089 num_real_prefetches += info[i].prefetch_in_loop;
4090 if (info[i].write)
4091 num_real_write_prefetches += info[i].prefetch_in_loop;
4095 /* Determine how many iterations ahead to prefetch within the loop, based
4096 on how many prefetches we currently expect to do within the loop. */
4097 if (num_real_prefetches != 0)
4099 if ((ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches) == 0)
4101 if (loop_dump_stream)
4102 fprintf (loop_dump_stream,
4103 "Prefetch: ignoring prefetches within loop: ahead is zero; %d < %d\n",
4104 SIMULTANEOUS_PREFETCHES, num_real_prefetches);
4105 num_real_prefetches = 0, num_real_write_prefetches = 0;
4108 /* We'll also use AHEAD to determine how many prefetch instructions to
4109 emit before a loop, so don't leave it zero. */
4110 if (ahead == 0)
4111 ahead = PREFETCH_BLOCKS_BEFORE_LOOP_MAX;
4113 for (i = 0; i < num_prefetches; i++)
4115 /* Update if we've decided not to prefetch anything within the loop. */
4116 if (num_real_prefetches == 0)
4117 info[i].prefetch_in_loop = 0;
4119 /* Find how many prefetch instructions we'll use before the loop. */
4120 if (info[i].prefetch_before_loop != 0)
4122 int n = info[i].total_bytes / PREFETCH_BLOCK;
4123 if (n > ahead)
4124 n = ahead;
4125 info[i].prefetch_before_loop = n;
4126 num_prefetches_before += n;
4127 if (info[i].write)
4128 num_write_prefetches_before += n;
4131 if (loop_dump_stream)
4133 if (info[i].prefetch_in_loop == 0
4134 && info[i].prefetch_before_loop == 0)
4135 continue;
4136 fprintf (loop_dump_stream, "Prefetch insn: %d",
4137 INSN_UID (info[i].giv->insn));
4138 fprintf (loop_dump_stream,
4139 "; in loop: %d; before: %d; %s\n",
4140 info[i].prefetch_in_loop,
4141 info[i].prefetch_before_loop,
4142 info[i].write ? "read/write" : "read only");
4143 fprintf (loop_dump_stream,
4144 " density: %d%%; bytes_accessed: %u; total_bytes: %u\n",
4145 (int) (info[i].bytes_accessed * 100 / info[i].stride),
4146 info[i].bytes_accessed, info[i].total_bytes);
4147 fprintf (loop_dump_stream, " index: " HOST_WIDE_INT_PRINT_DEC
4148 "; stride: " HOST_WIDE_INT_PRINT_DEC "; address: ",
4149 info[i].index, info[i].stride);
4150 print_rtl (loop_dump_stream, info[i].base_address);
4151 fprintf (loop_dump_stream, "\n");
4155 if (num_real_prefetches + num_prefetches_before > 0)
4157 /* Record that this loop uses prefetch instructions. */
4158 LOOP_INFO (loop)->has_prefetch = 1;
4160 if (loop_dump_stream)
4162 fprintf (loop_dump_stream, "Real prefetches needed within loop: %d (write: %d)\n",
4163 num_real_prefetches, num_real_write_prefetches);
4164 fprintf (loop_dump_stream, "Real prefetches needed before loop: %d (write: %d)\n",
4165 num_prefetches_before, num_write_prefetches_before);
4169 for (i = 0; i < num_prefetches; i++)
4171 int y;
4173 for (y = 0; y < info[i].prefetch_in_loop; y++)
4175 rtx loc = copy_rtx (*info[i].giv->location);
4176 rtx insn;
4177 int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
4178 rtx before_insn = info[i].giv->insn;
4179 rtx prev_insn = PREV_INSN (info[i].giv->insn);
4180 rtx seq;
4182 /* We can save some effort by offsetting the address on
4183 architectures with offsettable memory references. */
4184 if (offsettable_address_p (0, VOIDmode, loc))
4185 loc = plus_constant (loc, bytes_ahead);
4186 else
4188 rtx reg = gen_reg_rtx (Pmode);
4189 loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
4190 GEN_INT (bytes_ahead), reg,
4191 0, before_insn);
4192 loc = reg;
4195 start_sequence ();
4196 /* Make sure the address operand is valid for prefetch. */
4197 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
4198 (loc, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
4199 loc = force_reg (Pmode, loc);
4200 emit_insn (gen_prefetch (loc, GEN_INT (info[i].write),
4201 GEN_INT (3)));
4202 seq = get_insns ();
4203 end_sequence ();
4204 emit_insn_before (seq, before_insn);
4206 /* Check all insns emitted and record the new GIV
4207 information. */
4208 insn = NEXT_INSN (prev_insn);
4209 while (insn != before_insn)
4211 insn = check_insn_for_givs (loop, insn,
4212 info[i].giv->always_executed,
4213 info[i].giv->maybe_multiple);
4214 insn = NEXT_INSN (insn);
4218 if (PREFETCH_BEFORE_LOOP)
4220 /* Emit insns before the loop to fetch the first cache lines or,
4221 if we're not prefetching within the loop, everything we expect
4222 to need. */
4223 for (y = 0; y < info[i].prefetch_before_loop; y++)
4225 rtx reg = gen_reg_rtx (Pmode);
4226 rtx loop_start = loop->start;
4227 rtx init_val = info[i].class->initial_value;
4228 rtx add_val = simplify_gen_binary (PLUS, Pmode,
4229 info[i].giv->add_val,
4230 GEN_INT (y * PREFETCH_BLOCK));
4232 /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a
4233 non-constant INIT_VAL to have the same mode as REG, which
4234 in this case we know to be Pmode. */
4235 if (GET_MODE (init_val) != Pmode && !CONSTANT_P (init_val))
4237 rtx seq;
4239 start_sequence ();
4240 init_val = convert_to_mode (Pmode, init_val, 0);
4241 seq = get_insns ();
4242 end_sequence ();
4243 loop_insn_emit_before (loop, 0, loop_start, seq);
4245 loop_iv_add_mult_emit_before (loop, init_val,
4246 info[i].giv->mult_val,
4247 add_val, reg, 0, loop_start);
4248 emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write),
4249 GEN_INT (3)),
4250 loop_start);
4255 return;
4258 /* Communication with routines called via `note_stores'. */
4260 static rtx note_insn;
4262 /* Dummy register to have nonzero DEST_REG for DEST_ADDR type givs. */
4264 static rtx addr_placeholder;
4266 /* ??? Unfinished optimizations, and possible future optimizations,
4267 for the strength reduction code. */
4269 /* ??? The interaction of biv elimination, and recognition of 'constant'
4270 bivs, may cause problems. */
4272 /* ??? Add heuristics so that DEST_ADDR strength reduction does not cause
4273 performance problems.
4275 Perhaps don't eliminate things that can be combined with an addressing
4276 mode. Find all givs that have the same biv, mult_val, and add_val;
4277 then for each giv, check to see if its only use dies in a following
4278 memory address. If so, generate a new memory address and check to see
4279 if it is valid. If it is valid, then store the modified memory address,
4280 otherwise, mark the giv as not done so that it will get its own iv. */
4282 /* ??? Could try to optimize branches when it is known that a biv is always
4283 positive. */
4285 /* ??? When replace a biv in a compare insn, we should replace with closest
4286 giv so that an optimized branch can still be recognized by the combiner,
4287 e.g. the VAX acb insn. */
4289 /* ??? Many of the checks involving uid_luid could be simplified if regscan
4290 was rerun in loop_optimize whenever a register was added or moved.
4291 Also, some of the optimizations could be a little less conservative. */
4293 /* Scan the loop body and call FNCALL for each insn. In the addition to the
4294 LOOP and INSN parameters pass MAYBE_MULTIPLE and NOT_EVERY_ITERATION to the
4295 callback.
4297 NOT_EVERY_ITERATION is 1 if current insn is not known to be executed at
4298 least once for every loop iteration except for the last one.
4300 MAYBE_MULTIPLE is 1 if current insn may be executed more than once for every
4301 loop iteration.
4303 void
4304 for_each_insn_in_loop (struct loop *loop, loop_insn_callback fncall)
4306 int not_every_iteration = 0;
4307 int maybe_multiple = 0;
4308 int past_loop_latch = 0;
4309 int loop_depth = 0;
4310 rtx p;
4312 /* If loop_scan_start points to the loop exit test, we have to be wary of
4313 subversive use of gotos inside expression statements. */
4314 if (prev_nonnote_insn (loop->scan_start) != prev_nonnote_insn (loop->start))
4315 maybe_multiple = back_branch_in_range_p (loop, loop->scan_start);
4317 /* Scan through loop and update NOT_EVERY_ITERATION and MAYBE_MULTIPLE. */
4318 for (p = next_insn_in_loop (loop, loop->scan_start);
4319 p != NULL_RTX;
4320 p = next_insn_in_loop (loop, p))
4322 p = fncall (loop, p, not_every_iteration, maybe_multiple);
4324 /* Past CODE_LABEL, we get to insns that may be executed multiple
4325 times. The only way we can be sure that they can't is if every
4326 jump insn between here and the end of the loop either
4327 returns, exits the loop, is a jump to a location that is still
4328 behind the label, or is a jump to the loop start. */
4330 if (GET_CODE (p) == CODE_LABEL)
4332 rtx insn = p;
4334 maybe_multiple = 0;
4336 while (1)
4338 insn = NEXT_INSN (insn);
4339 if (insn == loop->scan_start)
4340 break;
4341 if (insn == loop->end)
4343 if (loop->top != 0)
4344 insn = loop->top;
4345 else
4346 break;
4347 if (insn == loop->scan_start)
4348 break;
4351 if (GET_CODE (insn) == JUMP_INSN
4352 && GET_CODE (PATTERN (insn)) != RETURN
4353 && (!any_condjump_p (insn)
4354 || (JUMP_LABEL (insn) != 0
4355 && JUMP_LABEL (insn) != loop->scan_start
4356 && !loop_insn_first_p (p, JUMP_LABEL (insn)))))
4358 maybe_multiple = 1;
4359 break;
4364 /* Past a jump, we get to insns for which we can't count
4365 on whether they will be executed during each iteration. */
4366 /* This code appears twice in strength_reduce. There is also similar
4367 code in scan_loop. */
4368 if (GET_CODE (p) == JUMP_INSN
4369 /* If we enter the loop in the middle, and scan around to the
4370 beginning, don't set not_every_iteration for that.
4371 This can be any kind of jump, since we want to know if insns
4372 will be executed if the loop is executed. */
4373 && !(JUMP_LABEL (p) == loop->top
4374 && ((NEXT_INSN (NEXT_INSN (p)) == loop->end
4375 && any_uncondjump_p (p))
4376 || (NEXT_INSN (p) == loop->end && any_condjump_p (p)))))
4378 rtx label = 0;
4380 /* If this is a jump outside the loop, then it also doesn't
4381 matter. Check to see if the target of this branch is on the
4382 loop->exits_labels list. */
4384 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
4385 if (XEXP (label, 0) == JUMP_LABEL (p))
4386 break;
4388 if (!label)
4389 not_every_iteration = 1;
4392 else if (GET_CODE (p) == NOTE)
4394 /* At the virtual top of a converted loop, insns are again known to
4395 be executed each iteration: logically, the loop begins here
4396 even though the exit code has been duplicated.
4398 Insns are also again known to be executed each iteration at
4399 the LOOP_CONT note. */
4400 if ((NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP
4401 || NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_CONT)
4402 && loop_depth == 0)
4403 not_every_iteration = 0;
4404 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
4405 loop_depth++;
4406 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
4407 loop_depth--;
4410 /* Note if we pass a loop latch. If we do, then we can not clear
4411 NOT_EVERY_ITERATION below when we pass the last CODE_LABEL in
4412 a loop since a jump before the last CODE_LABEL may have started
4413 a new loop iteration.
4415 Note that LOOP_TOP is only set for rotated loops and we need
4416 this check for all loops, so compare against the CODE_LABEL
4417 which immediately follows LOOP_START. */
4418 if (GET_CODE (p) == JUMP_INSN
4419 && JUMP_LABEL (p) == NEXT_INSN (loop->start))
4420 past_loop_latch = 1;
4422 /* Unlike in the code motion pass where MAYBE_NEVER indicates that
4423 an insn may never be executed, NOT_EVERY_ITERATION indicates whether
4424 or not an insn is known to be executed each iteration of the
4425 loop, whether or not any iterations are known to occur.
4427 Therefore, if we have just passed a label and have no more labels
4428 between here and the test insn of the loop, and we have not passed
4429 a jump to the top of the loop, then we know these insns will be
4430 executed each iteration. */
4432 if (not_every_iteration
4433 && !past_loop_latch
4434 && GET_CODE (p) == CODE_LABEL
4435 && no_labels_between_p (p, loop->end)
4436 && loop_insn_first_p (p, loop->cont))
4437 not_every_iteration = 0;
4441 static void
4442 loop_bivs_find (struct loop *loop)
4444 struct loop_regs *regs = LOOP_REGS (loop);
4445 struct loop_ivs *ivs = LOOP_IVS (loop);
4446 /* Temporary list pointers for traversing ivs->list. */
4447 struct iv_class *bl, **backbl;
4449 ivs->list = 0;
4451 for_each_insn_in_loop (loop, check_insn_for_bivs);
4453 /* Scan ivs->list to remove all regs that proved not to be bivs.
4454 Make a sanity check against regs->n_times_set. */
4455 for (backbl = &ivs->list, bl = *backbl; bl; bl = bl->next)
4457 if (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4458 /* Above happens if register modified by subreg, etc. */
4459 /* Make sure it is not recognized as a basic induction var: */
4460 || regs->array[bl->regno].n_times_set != bl->biv_count
4461 /* If never incremented, it is invariant that we decided not to
4462 move. So leave it alone. */
4463 || ! bl->incremented)
4465 if (loop_dump_stream)
4466 fprintf (loop_dump_stream, "Biv %d: discarded, %s\n",
4467 bl->regno,
4468 (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4469 ? "not induction variable"
4470 : (! bl->incremented ? "never incremented"
4471 : "count error")));
4473 REG_IV_TYPE (ivs, bl->regno) = NOT_BASIC_INDUCT;
4474 *backbl = bl->next;
4476 else
4478 backbl = &bl->next;
4480 if (loop_dump_stream)
4481 fprintf (loop_dump_stream, "Biv %d: verified\n", bl->regno);
4487 /* Determine how BIVS are initialized by looking through pre-header
4488 extended basic block. */
4489 static void
4490 loop_bivs_init_find (struct loop *loop)
4492 struct loop_ivs *ivs = LOOP_IVS (loop);
4493 /* Temporary list pointers for traversing ivs->list. */
4494 struct iv_class *bl;
4495 int call_seen;
4496 rtx p;
4498 /* Find initial value for each biv by searching backwards from loop_start,
4499 halting at first label. Also record any test condition. */
4501 call_seen = 0;
4502 for (p = loop->start; p && GET_CODE (p) != CODE_LABEL; p = PREV_INSN (p))
4504 rtx test;
4506 note_insn = p;
4508 if (GET_CODE (p) == CALL_INSN)
4509 call_seen = 1;
4511 if (INSN_P (p))
4512 note_stores (PATTERN (p), record_initial, ivs);
4514 /* Record any test of a biv that branches around the loop if no store
4515 between it and the start of loop. We only care about tests with
4516 constants and registers and only certain of those. */
4517 if (GET_CODE (p) == JUMP_INSN
4518 && JUMP_LABEL (p) != 0
4519 && next_real_insn (JUMP_LABEL (p)) == next_real_insn (loop->end)
4520 && (test = get_condition_for_loop (loop, p)) != 0
4521 && REG_P (XEXP (test, 0))
4522 && REGNO (XEXP (test, 0)) < max_reg_before_loop
4523 && (bl = REG_IV_CLASS (ivs, REGNO (XEXP (test, 0)))) != 0
4524 && valid_initial_value_p (XEXP (test, 1), p, call_seen, loop->start)
4525 && bl->init_insn == 0)
4527 /* If an NE test, we have an initial value! */
4528 if (GET_CODE (test) == NE)
4530 bl->init_insn = p;
4531 bl->init_set = gen_rtx_SET (VOIDmode,
4532 XEXP (test, 0), XEXP (test, 1));
4534 else
4535 bl->initial_test = test;
4541 /* Look at the each biv and see if we can say anything better about its
4542 initial value from any initializing insns set up above. (This is done
4543 in two passes to avoid missing SETs in a PARALLEL.) */
4544 static void
4545 loop_bivs_check (struct loop *loop)
4547 struct loop_ivs *ivs = LOOP_IVS (loop);
4548 /* Temporary list pointers for traversing ivs->list. */
4549 struct iv_class *bl;
4550 struct iv_class **backbl;
4552 for (backbl = &ivs->list; (bl = *backbl); backbl = &bl->next)
4554 rtx src;
4555 rtx note;
4557 if (! bl->init_insn)
4558 continue;
4560 /* IF INIT_INSN has a REG_EQUAL or REG_EQUIV note and the value
4561 is a constant, use the value of that. */
4562 if (((note = find_reg_note (bl->init_insn, REG_EQUAL, 0)) != NULL
4563 && CONSTANT_P (XEXP (note, 0)))
4564 || ((note = find_reg_note (bl->init_insn, REG_EQUIV, 0)) != NULL
4565 && CONSTANT_P (XEXP (note, 0))))
4566 src = XEXP (note, 0);
4567 else
4568 src = SET_SRC (bl->init_set);
4570 if (loop_dump_stream)
4571 fprintf (loop_dump_stream,
4572 "Biv %d: initialized at insn %d: initial value ",
4573 bl->regno, INSN_UID (bl->init_insn));
4575 if ((GET_MODE (src) == GET_MODE (regno_reg_rtx[bl->regno])
4576 || GET_MODE (src) == VOIDmode)
4577 && valid_initial_value_p (src, bl->init_insn,
4578 LOOP_INFO (loop)->pre_header_has_call,
4579 loop->start))
4581 bl->initial_value = src;
4583 if (loop_dump_stream)
4585 print_simple_rtl (loop_dump_stream, src);
4586 fputc ('\n', loop_dump_stream);
4589 /* If we can't make it a giv,
4590 let biv keep initial value of "itself". */
4591 else if (loop_dump_stream)
4592 fprintf (loop_dump_stream, "is complex\n");
4597 /* Search the loop for general induction variables. */
4599 static void
4600 loop_givs_find (struct loop* loop)
4602 for_each_insn_in_loop (loop, check_insn_for_givs);
4606 /* For each giv for which we still don't know whether or not it is
4607 replaceable, check to see if it is replaceable because its final value
4608 can be calculated. */
4610 static void
4611 loop_givs_check (struct loop *loop)
4613 struct loop_ivs *ivs = LOOP_IVS (loop);
4614 struct iv_class *bl;
4616 for (bl = ivs->list; bl; bl = bl->next)
4618 struct induction *v;
4620 for (v = bl->giv; v; v = v->next_iv)
4621 if (! v->replaceable && ! v->not_replaceable)
4622 check_final_value (loop, v);
4627 /* Return nonzero if it is possible to eliminate the biv BL provided
4628 all givs are reduced. This is possible if either the reg is not
4629 used outside the loop, or we can compute what its final value will
4630 be. */
4632 static int
4633 loop_biv_eliminable_p (struct loop *loop, struct iv_class *bl,
4634 int threshold, int insn_count)
4636 /* For architectures with a decrement_and_branch_until_zero insn,
4637 don't do this if we put a REG_NONNEG note on the endtest for this
4638 biv. */
4640 #ifdef HAVE_decrement_and_branch_until_zero
4641 if (bl->nonneg)
4643 if (loop_dump_stream)
4644 fprintf (loop_dump_stream,
4645 "Cannot eliminate nonneg biv %d.\n", bl->regno);
4646 return 0;
4648 #endif
4650 /* Check that biv is used outside loop or if it has a final value.
4651 Compare against bl->init_insn rather than loop->start. We aren't
4652 concerned with any uses of the biv between init_insn and
4653 loop->start since these won't be affected by the value of the biv
4654 elsewhere in the function, so long as init_insn doesn't use the
4655 biv itself. */
4657 if ((REGNO_LAST_LUID (bl->regno) < INSN_LUID (loop->end)
4658 && bl->init_insn
4659 && INSN_UID (bl->init_insn) < max_uid_for_loop
4660 && REGNO_FIRST_LUID (bl->regno) >= INSN_LUID (bl->init_insn)
4661 && ! reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
4662 || (bl->final_value = final_biv_value (loop, bl)))
4663 return maybe_eliminate_biv (loop, bl, 0, threshold, insn_count);
4665 if (loop_dump_stream)
4667 fprintf (loop_dump_stream,
4668 "Cannot eliminate biv %d.\n",
4669 bl->regno);
4670 fprintf (loop_dump_stream,
4671 "First use: insn %d, last use: insn %d.\n",
4672 REGNO_FIRST_UID (bl->regno),
4673 REGNO_LAST_UID (bl->regno));
4675 return 0;
4679 /* Reduce each giv of BL that we have decided to reduce. */
4681 static void
4682 loop_givs_reduce (struct loop *loop, struct iv_class *bl)
4684 struct induction *v;
4686 for (v = bl->giv; v; v = v->next_iv)
4688 struct induction *tv;
4689 if (! v->ignore && v->same == 0)
4691 int auto_inc_opt = 0;
4693 /* If the code for derived givs immediately below has already
4694 allocated a new_reg, we must keep it. */
4695 if (! v->new_reg)
4696 v->new_reg = gen_reg_rtx (v->mode);
4698 #ifdef AUTO_INC_DEC
4699 /* If the target has auto-increment addressing modes, and
4700 this is an address giv, then try to put the increment
4701 immediately after its use, so that flow can create an
4702 auto-increment addressing mode. */
4703 /* Don't do this for loops entered at the bottom, to avoid
4704 this invalid transformation:
4705 jmp L; -> jmp L;
4706 TOP: TOP:
4707 use giv use giv
4708 L: inc giv
4709 inc biv L:
4710 test biv test giv
4711 cbr TOP cbr TOP
4713 if (v->giv_type == DEST_ADDR && bl->biv_count == 1
4714 && bl->biv->always_executed && ! bl->biv->maybe_multiple
4715 /* We don't handle reversed biv's because bl->biv->insn
4716 does not have a valid INSN_LUID. */
4717 && ! bl->reversed
4718 && v->always_executed && ! v->maybe_multiple
4719 && INSN_UID (v->insn) < max_uid_for_loop
4720 && !loop->top)
4722 /* If other giv's have been combined with this one, then
4723 this will work only if all uses of the other giv's occur
4724 before this giv's insn. This is difficult to check.
4726 We simplify this by looking for the common case where
4727 there is one DEST_REG giv, and this giv's insn is the
4728 last use of the dest_reg of that DEST_REG giv. If the
4729 increment occurs after the address giv, then we can
4730 perform the optimization. (Otherwise, the increment
4731 would have to go before other_giv, and we would not be
4732 able to combine it with the address giv to get an
4733 auto-inc address.) */
4734 if (v->combined_with)
4736 struct induction *other_giv = 0;
4738 for (tv = bl->giv; tv; tv = tv->next_iv)
4739 if (tv->same == v)
4741 if (other_giv)
4742 break;
4743 else
4744 other_giv = tv;
4746 if (! tv && other_giv
4747 && REGNO (other_giv->dest_reg) < max_reg_before_loop
4748 && (REGNO_LAST_UID (REGNO (other_giv->dest_reg))
4749 == INSN_UID (v->insn))
4750 && INSN_LUID (v->insn) < INSN_LUID (bl->biv->insn))
4751 auto_inc_opt = 1;
4753 /* Check for case where increment is before the address
4754 giv. Do this test in "loop order". */
4755 else if ((INSN_LUID (v->insn) > INSN_LUID (bl->biv->insn)
4756 && (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4757 || (INSN_LUID (bl->biv->insn)
4758 > INSN_LUID (loop->scan_start))))
4759 || (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4760 && (INSN_LUID (loop->scan_start)
4761 < INSN_LUID (bl->biv->insn))))
4762 auto_inc_opt = -1;
4763 else
4764 auto_inc_opt = 1;
4766 #ifdef HAVE_cc0
4768 rtx prev;
4770 /* We can't put an insn immediately after one setting
4771 cc0, or immediately before one using cc0. */
4772 if ((auto_inc_opt == 1 && sets_cc0_p (PATTERN (v->insn)))
4773 || (auto_inc_opt == -1
4774 && (prev = prev_nonnote_insn (v->insn)) != 0
4775 && INSN_P (prev)
4776 && sets_cc0_p (PATTERN (prev))))
4777 auto_inc_opt = 0;
4779 #endif
4781 if (auto_inc_opt)
4782 v->auto_inc_opt = 1;
4784 #endif
4786 /* For each place where the biv is incremented, add an insn
4787 to increment the new, reduced reg for the giv. */
4788 for (tv = bl->biv; tv; tv = tv->next_iv)
4790 rtx insert_before;
4792 /* Skip if location is the same as a previous one. */
4793 if (tv->same)
4794 continue;
4795 if (! auto_inc_opt)
4796 insert_before = NEXT_INSN (tv->insn);
4797 else if (auto_inc_opt == 1)
4798 insert_before = NEXT_INSN (v->insn);
4799 else
4800 insert_before = v->insn;
4802 if (tv->mult_val == const1_rtx)
4803 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4804 v->new_reg, v->new_reg,
4805 0, insert_before);
4806 else /* tv->mult_val == const0_rtx */
4807 /* A multiply is acceptable here
4808 since this is presumed to be seldom executed. */
4809 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4810 v->add_val, v->new_reg,
4811 0, insert_before);
4814 /* Add code at loop start to initialize giv's reduced reg. */
4816 loop_iv_add_mult_hoist (loop,
4817 extend_value_for_giv (v, bl->initial_value),
4818 v->mult_val, v->add_val, v->new_reg);
4824 /* Check for givs whose first use is their definition and whose
4825 last use is the definition of another giv. If so, it is likely
4826 dead and should not be used to derive another giv nor to
4827 eliminate a biv. */
4829 static void
4830 loop_givs_dead_check (struct loop *loop ATTRIBUTE_UNUSED, struct iv_class *bl)
4832 struct induction *v;
4834 for (v = bl->giv; v; v = v->next_iv)
4836 if (v->ignore
4837 || (v->same && v->same->ignore))
4838 continue;
4840 if (v->giv_type == DEST_REG
4841 && REGNO_FIRST_UID (REGNO (v->dest_reg)) == INSN_UID (v->insn))
4843 struct induction *v1;
4845 for (v1 = bl->giv; v1; v1 = v1->next_iv)
4846 if (REGNO_LAST_UID (REGNO (v->dest_reg)) == INSN_UID (v1->insn))
4847 v->maybe_dead = 1;
4853 static void
4854 loop_givs_rescan (struct loop *loop, struct iv_class *bl, rtx *reg_map)
4856 struct induction *v;
4858 for (v = bl->giv; v; v = v->next_iv)
4860 if (v->same && v->same->ignore)
4861 v->ignore = 1;
4863 if (v->ignore)
4864 continue;
4866 /* Update expression if this was combined, in case other giv was
4867 replaced. */
4868 if (v->same)
4869 v->new_reg = replace_rtx (v->new_reg,
4870 v->same->dest_reg, v->same->new_reg);
4872 /* See if this register is known to be a pointer to something. If
4873 so, see if we can find the alignment. First see if there is a
4874 destination register that is a pointer. If so, this shares the
4875 alignment too. Next see if we can deduce anything from the
4876 computational information. If not, and this is a DEST_ADDR
4877 giv, at least we know that it's a pointer, though we don't know
4878 the alignment. */
4879 if (REG_P (v->new_reg)
4880 && v->giv_type == DEST_REG
4881 && REG_POINTER (v->dest_reg))
4882 mark_reg_pointer (v->new_reg,
4883 REGNO_POINTER_ALIGN (REGNO (v->dest_reg)));
4884 else if (REG_P (v->new_reg)
4885 && REG_POINTER (v->src_reg))
4887 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->src_reg));
4889 if (align == 0
4890 || GET_CODE (v->add_val) != CONST_INT
4891 || INTVAL (v->add_val) % (align / BITS_PER_UNIT) != 0)
4892 align = 0;
4894 mark_reg_pointer (v->new_reg, align);
4896 else if (REG_P (v->new_reg)
4897 && REG_P (v->add_val)
4898 && REG_POINTER (v->add_val))
4900 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->add_val));
4902 if (align == 0 || GET_CODE (v->mult_val) != CONST_INT
4903 || INTVAL (v->mult_val) % (align / BITS_PER_UNIT) != 0)
4904 align = 0;
4906 mark_reg_pointer (v->new_reg, align);
4908 else if (REG_P (v->new_reg) && v->giv_type == DEST_ADDR)
4909 mark_reg_pointer (v->new_reg, 0);
4911 if (v->giv_type == DEST_ADDR)
4912 /* Store reduced reg as the address in the memref where we found
4913 this giv. */
4914 validate_change (v->insn, v->location, v->new_reg, 0);
4915 else if (v->replaceable)
4917 reg_map[REGNO (v->dest_reg)] = v->new_reg;
4919 else
4921 rtx original_insn = v->insn;
4922 rtx note;
4924 /* Not replaceable; emit an insn to set the original giv reg from
4925 the reduced giv, same as above. */
4926 v->insn = loop_insn_emit_after (loop, 0, original_insn,
4927 gen_move_insn (v->dest_reg,
4928 v->new_reg));
4930 /* The original insn may have a REG_EQUAL note. This note is
4931 now incorrect and may result in invalid substitutions later.
4932 The original insn is dead, but may be part of a libcall
4933 sequence, which doesn't seem worth the bother of handling. */
4934 note = find_reg_note (original_insn, REG_EQUAL, NULL_RTX);
4935 if (note)
4936 remove_note (original_insn, note);
4939 /* When a loop is reversed, givs which depend on the reversed
4940 biv, and which are live outside the loop, must be set to their
4941 correct final value. This insn is only needed if the giv is
4942 not replaceable. The correct final value is the same as the
4943 value that the giv starts the reversed loop with. */
4944 if (bl->reversed && ! v->replaceable)
4945 loop_iv_add_mult_sink (loop,
4946 extend_value_for_giv (v, bl->initial_value),
4947 v->mult_val, v->add_val, v->dest_reg);
4948 else if (v->final_value)
4949 loop_insn_sink_or_swim (loop,
4950 gen_load_of_final_value (v->dest_reg,
4951 v->final_value));
4953 if (loop_dump_stream)
4955 fprintf (loop_dump_stream, "giv at %d reduced to ",
4956 INSN_UID (v->insn));
4957 print_simple_rtl (loop_dump_stream, v->new_reg);
4958 fprintf (loop_dump_stream, "\n");
4964 static int
4965 loop_giv_reduce_benefit (struct loop *loop ATTRIBUTE_UNUSED,
4966 struct iv_class *bl, struct induction *v,
4967 rtx test_reg)
4969 int add_cost;
4970 int benefit;
4972 benefit = v->benefit;
4973 PUT_MODE (test_reg, v->mode);
4974 add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val,
4975 test_reg, test_reg);
4977 /* Reduce benefit if not replaceable, since we will insert a
4978 move-insn to replace the insn that calculates this giv. Don't do
4979 this unless the giv is a user variable, since it will often be
4980 marked non-replaceable because of the duplication of the exit
4981 code outside the loop. In such a case, the copies we insert are
4982 dead and will be deleted. So they don't have a cost. Similar
4983 situations exist. */
4984 /* ??? The new final_[bg]iv_value code does a much better job of
4985 finding replaceable giv's, and hence this code may no longer be
4986 necessary. */
4987 if (! v->replaceable && ! bl->eliminable
4988 && REG_USERVAR_P (v->dest_reg))
4989 benefit -= copy_cost;
4991 /* Decrease the benefit to count the add-insns that we will insert
4992 to increment the reduced reg for the giv. ??? This can
4993 overestimate the run-time cost of the additional insns, e.g. if
4994 there are multiple basic blocks that increment the biv, but only
4995 one of these blocks is executed during each iteration. There is
4996 no good way to detect cases like this with the current structure
4997 of the loop optimizer. This code is more accurate for
4998 determining code size than run-time benefits. */
4999 benefit -= add_cost * bl->biv_count;
5001 /* Decide whether to strength-reduce this giv or to leave the code
5002 unchanged (recompute it from the biv each time it is used). This
5003 decision can be made independently for each giv. */
5005 #ifdef AUTO_INC_DEC
5006 /* Attempt to guess whether autoincrement will handle some of the
5007 new add insns; if so, increase BENEFIT (undo the subtraction of
5008 add_cost that was done above). */
5009 if (v->giv_type == DEST_ADDR
5010 /* Increasing the benefit is risky, since this is only a guess.
5011 Avoid increasing register pressure in cases where there would
5012 be no other benefit from reducing this giv. */
5013 && benefit > 0
5014 && GET_CODE (v->mult_val) == CONST_INT)
5016 int size = GET_MODE_SIZE (GET_MODE (v->mem));
5018 if (HAVE_POST_INCREMENT
5019 && INTVAL (v->mult_val) == size)
5020 benefit += add_cost * bl->biv_count;
5021 else if (HAVE_PRE_INCREMENT
5022 && INTVAL (v->mult_val) == size)
5023 benefit += add_cost * bl->biv_count;
5024 else if (HAVE_POST_DECREMENT
5025 && -INTVAL (v->mult_val) == size)
5026 benefit += add_cost * bl->biv_count;
5027 else if (HAVE_PRE_DECREMENT
5028 && -INTVAL (v->mult_val) == size)
5029 benefit += add_cost * bl->biv_count;
5031 #endif
5033 return benefit;
5037 /* Free IV structures for LOOP. */
5039 static void
5040 loop_ivs_free (struct loop *loop)
5042 struct loop_ivs *ivs = LOOP_IVS (loop);
5043 struct iv_class *iv = ivs->list;
5045 free (ivs->regs);
5047 while (iv)
5049 struct iv_class *next = iv->next;
5050 struct induction *induction;
5051 struct induction *next_induction;
5053 for (induction = iv->biv; induction; induction = next_induction)
5055 next_induction = induction->next_iv;
5056 free (induction);
5058 for (induction = iv->giv; induction; induction = next_induction)
5060 next_induction = induction->next_iv;
5061 free (induction);
5064 free (iv);
5065 iv = next;
5070 /* Perform strength reduction and induction variable elimination.
5072 Pseudo registers created during this function will be beyond the
5073 last valid index in several tables including
5074 REGS->ARRAY[I].N_TIMES_SET and REGNO_LAST_UID. This does not cause a
5075 problem here, because the added registers cannot be givs outside of
5076 their loop, and hence will never be reconsidered. But scan_loop
5077 must check regnos to make sure they are in bounds. */
5079 static void
5080 strength_reduce (struct loop *loop, int flags)
5082 struct loop_info *loop_info = LOOP_INFO (loop);
5083 struct loop_regs *regs = LOOP_REGS (loop);
5084 struct loop_ivs *ivs = LOOP_IVS (loop);
5085 rtx p;
5086 /* Temporary list pointer for traversing ivs->list. */
5087 struct iv_class *bl;
5088 /* Ratio of extra register life span we can justify
5089 for saving an instruction. More if loop doesn't call subroutines
5090 since in that case saving an insn makes more difference
5091 and more registers are available. */
5092 /* ??? could set this to last value of threshold in move_movables */
5093 int threshold = (loop_info->has_call ? 1 : 2) * (3 + n_non_fixed_regs);
5094 /* Map of pseudo-register replacements. */
5095 rtx *reg_map = NULL;
5096 int reg_map_size;
5097 int unrolled_insn_copies = 0;
5098 rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
5099 int insn_count = count_insns_in_loop (loop);
5101 addr_placeholder = gen_reg_rtx (Pmode);
5103 ivs->n_regs = max_reg_before_loop;
5104 ivs->regs = xcalloc (ivs->n_regs, sizeof (struct iv));
5106 /* Find all BIVs in loop. */
5107 loop_bivs_find (loop);
5109 /* Exit if there are no bivs. */
5110 if (! ivs->list)
5112 /* Can still unroll the loop anyways, but indicate that there is no
5113 strength reduction info available. */
5114 if (flags & LOOP_UNROLL)
5115 unroll_loop (loop, insn_count, 0);
5117 loop_ivs_free (loop);
5118 return;
5121 /* Determine how BIVS are initialized by looking through pre-header
5122 extended basic block. */
5123 loop_bivs_init_find (loop);
5125 /* Look at the each biv and see if we can say anything better about its
5126 initial value from any initializing insns set up above. */
5127 loop_bivs_check (loop);
5129 /* Search the loop for general induction variables. */
5130 loop_givs_find (loop);
5132 /* Try to calculate and save the number of loop iterations. This is
5133 set to zero if the actual number can not be calculated. This must
5134 be called after all giv's have been identified, since otherwise it may
5135 fail if the iteration variable is a giv. */
5136 loop_iterations (loop);
5138 #ifdef HAVE_prefetch
5139 if (flags & LOOP_PREFETCH)
5140 emit_prefetch_instructions (loop);
5141 #endif
5143 /* Now for each giv for which we still don't know whether or not it is
5144 replaceable, check to see if it is replaceable because its final value
5145 can be calculated. This must be done after loop_iterations is called,
5146 so that final_giv_value will work correctly. */
5147 loop_givs_check (loop);
5149 /* Try to prove that the loop counter variable (if any) is always
5150 nonnegative; if so, record that fact with a REG_NONNEG note
5151 so that "decrement and branch until zero" insn can be used. */
5152 check_dbra_loop (loop, insn_count);
5154 /* Create reg_map to hold substitutions for replaceable giv regs.
5155 Some givs might have been made from biv increments, so look at
5156 ivs->reg_iv_type for a suitable size. */
5157 reg_map_size = ivs->n_regs;
5158 reg_map = xcalloc (reg_map_size, sizeof (rtx));
5160 /* Examine each iv class for feasibility of strength reduction/induction
5161 variable elimination. */
5163 for (bl = ivs->list; bl; bl = bl->next)
5165 struct induction *v;
5166 int benefit;
5168 /* Test whether it will be possible to eliminate this biv
5169 provided all givs are reduced. */
5170 bl->eliminable = loop_biv_eliminable_p (loop, bl, threshold, insn_count);
5172 /* This will be true at the end, if all givs which depend on this
5173 biv have been strength reduced.
5174 We can't (currently) eliminate the biv unless this is so. */
5175 bl->all_reduced = 1;
5177 /* Check each extension dependent giv in this class to see if its
5178 root biv is safe from wrapping in the interior mode. */
5179 check_ext_dependent_givs (loop, bl);
5181 /* Combine all giv's for this iv_class. */
5182 combine_givs (regs, bl);
5184 for (v = bl->giv; v; v = v->next_iv)
5186 struct induction *tv;
5188 if (v->ignore || v->same)
5189 continue;
5191 benefit = loop_giv_reduce_benefit (loop, bl, v, test_reg);
5193 /* If an insn is not to be strength reduced, then set its ignore
5194 flag, and clear bl->all_reduced. */
5196 /* A giv that depends on a reversed biv must be reduced if it is
5197 used after the loop exit, otherwise, it would have the wrong
5198 value after the loop exit. To make it simple, just reduce all
5199 of such giv's whether or not we know they are used after the loop
5200 exit. */
5202 if (! flag_reduce_all_givs
5203 && v->lifetime * threshold * benefit < insn_count
5204 && ! bl->reversed)
5206 if (loop_dump_stream)
5207 fprintf (loop_dump_stream,
5208 "giv of insn %d not worth while, %d vs %d.\n",
5209 INSN_UID (v->insn),
5210 v->lifetime * threshold * benefit, insn_count);
5211 v->ignore = 1;
5212 bl->all_reduced = 0;
5214 else
5216 /* Check that we can increment the reduced giv without a
5217 multiply insn. If not, reject it. */
5219 for (tv = bl->biv; tv; tv = tv->next_iv)
5220 if (tv->mult_val == const1_rtx
5221 && ! product_cheap_p (tv->add_val, v->mult_val))
5223 if (loop_dump_stream)
5224 fprintf (loop_dump_stream,
5225 "giv of insn %d: would need a multiply.\n",
5226 INSN_UID (v->insn));
5227 v->ignore = 1;
5228 bl->all_reduced = 0;
5229 break;
5234 /* Check for givs whose first use is their definition and whose
5235 last use is the definition of another giv. If so, it is likely
5236 dead and should not be used to derive another giv nor to
5237 eliminate a biv. */
5238 loop_givs_dead_check (loop, bl);
5240 /* Reduce each giv that we decided to reduce. */
5241 loop_givs_reduce (loop, bl);
5243 /* Rescan all givs. If a giv is the same as a giv not reduced, mark it
5244 as not reduced.
5246 For each giv register that can be reduced now: if replaceable,
5247 substitute reduced reg wherever the old giv occurs;
5248 else add new move insn "giv_reg = reduced_reg". */
5249 loop_givs_rescan (loop, bl, reg_map);
5251 /* All the givs based on the biv bl have been reduced if they
5252 merit it. */
5254 /* For each giv not marked as maybe dead that has been combined with a
5255 second giv, clear any "maybe dead" mark on that second giv.
5256 v->new_reg will either be or refer to the register of the giv it
5257 combined with.
5259 Doing this clearing avoids problems in biv elimination where
5260 a giv's new_reg is a complex value that can't be put in the
5261 insn but the giv combined with (with a reg as new_reg) is
5262 marked maybe_dead. Since the register will be used in either
5263 case, we'd prefer it be used from the simpler giv. */
5265 for (v = bl->giv; v; v = v->next_iv)
5266 if (! v->maybe_dead && v->same)
5267 v->same->maybe_dead = 0;
5269 /* Try to eliminate the biv, if it is a candidate.
5270 This won't work if ! bl->all_reduced,
5271 since the givs we planned to use might not have been reduced.
5273 We have to be careful that we didn't initially think we could
5274 eliminate this biv because of a giv that we now think may be
5275 dead and shouldn't be used as a biv replacement.
5277 Also, there is the possibility that we may have a giv that looks
5278 like it can be used to eliminate a biv, but the resulting insn
5279 isn't valid. This can happen, for example, on the 88k, where a
5280 JUMP_INSN can compare a register only with zero. Attempts to
5281 replace it with a compare with a constant will fail.
5283 Note that in cases where this call fails, we may have replaced some
5284 of the occurrences of the biv with a giv, but no harm was done in
5285 doing so in the rare cases where it can occur. */
5287 if (bl->all_reduced == 1 && bl->eliminable
5288 && maybe_eliminate_biv (loop, bl, 1, threshold, insn_count))
5290 /* ?? If we created a new test to bypass the loop entirely,
5291 or otherwise drop straight in, based on this test, then
5292 we might want to rewrite it also. This way some later
5293 pass has more hope of removing the initialization of this
5294 biv entirely. */
5296 /* If final_value != 0, then the biv may be used after loop end
5297 and we must emit an insn to set it just in case.
5299 Reversed bivs already have an insn after the loop setting their
5300 value, so we don't need another one. We can't calculate the
5301 proper final value for such a biv here anyways. */
5302 if (bl->final_value && ! bl->reversed)
5303 loop_insn_sink_or_swim (loop,
5304 gen_load_of_final_value (bl->biv->dest_reg,
5305 bl->final_value));
5307 if (loop_dump_stream)
5308 fprintf (loop_dump_stream, "Reg %d: biv eliminated\n",
5309 bl->regno);
5311 /* See above note wrt final_value. But since we couldn't eliminate
5312 the biv, we must set the value after the loop instead of before. */
5313 else if (bl->final_value && ! bl->reversed)
5314 loop_insn_sink (loop, gen_load_of_final_value (bl->biv->dest_reg,
5315 bl->final_value));
5318 /* Go through all the instructions in the loop, making all the
5319 register substitutions scheduled in REG_MAP. */
5321 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
5322 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5323 || GET_CODE (p) == CALL_INSN)
5325 replace_regs (PATTERN (p), reg_map, reg_map_size, 0);
5326 replace_regs (REG_NOTES (p), reg_map, reg_map_size, 0);
5327 INSN_CODE (p) = -1;
5330 if (loop_info->n_iterations > 0)
5332 /* When we completely unroll a loop we will likely not need the increment
5333 of the loop BIV and we will not need the conditional branch at the
5334 end of the loop. */
5335 unrolled_insn_copies = insn_count - 2;
5337 #ifdef HAVE_cc0
5338 /* When we completely unroll a loop on a HAVE_cc0 machine we will not
5339 need the comparison before the conditional branch at the end of the
5340 loop. */
5341 unrolled_insn_copies -= 1;
5342 #endif
5344 /* We'll need one copy for each loop iteration. */
5345 unrolled_insn_copies *= loop_info->n_iterations;
5347 /* A little slop to account for the ability to remove initialization
5348 code, better CSE, and other secondary benefits of completely
5349 unrolling some loops. */
5350 unrolled_insn_copies -= 1;
5352 /* Clamp the value. */
5353 if (unrolled_insn_copies < 0)
5354 unrolled_insn_copies = 0;
5357 /* Unroll loops from within strength reduction so that we can use the
5358 induction variable information that strength_reduce has already
5359 collected. Always unroll loops that would be as small or smaller
5360 unrolled than when rolled. */
5361 if ((flags & LOOP_UNROLL)
5362 || ((flags & LOOP_AUTO_UNROLL)
5363 && loop_info->n_iterations > 0
5364 && unrolled_insn_copies <= insn_count))
5365 unroll_loop (loop, insn_count, 1);
5367 if (loop_dump_stream)
5368 fprintf (loop_dump_stream, "\n");
5370 loop_ivs_free (loop);
5371 if (reg_map)
5372 free (reg_map);
5375 /*Record all basic induction variables calculated in the insn. */
5376 static rtx
5377 check_insn_for_bivs (struct loop *loop, rtx p, int not_every_iteration,
5378 int maybe_multiple)
5380 struct loop_ivs *ivs = LOOP_IVS (loop);
5381 rtx set;
5382 rtx dest_reg;
5383 rtx inc_val;
5384 rtx mult_val;
5385 rtx *location;
5387 if (GET_CODE (p) == INSN
5388 && (set = single_set (p))
5389 && REG_P (SET_DEST (set)))
5391 dest_reg = SET_DEST (set);
5392 if (REGNO (dest_reg) < max_reg_before_loop
5393 && REGNO (dest_reg) >= FIRST_PSEUDO_REGISTER
5394 && REG_IV_TYPE (ivs, REGNO (dest_reg)) != NOT_BASIC_INDUCT)
5396 if (basic_induction_var (loop, SET_SRC (set),
5397 GET_MODE (SET_SRC (set)),
5398 dest_reg, p, &inc_val, &mult_val,
5399 &location))
5401 /* It is a possible basic induction variable.
5402 Create and initialize an induction structure for it. */
5404 struct induction *v = xmalloc (sizeof (struct induction));
5406 record_biv (loop, v, p, dest_reg, inc_val, mult_val, location,
5407 not_every_iteration, maybe_multiple);
5408 REG_IV_TYPE (ivs, REGNO (dest_reg)) = BASIC_INDUCT;
5410 else if (REGNO (dest_reg) < ivs->n_regs)
5411 REG_IV_TYPE (ivs, REGNO (dest_reg)) = NOT_BASIC_INDUCT;
5414 return p;
5417 /* Record all givs calculated in the insn.
5418 A register is a giv if: it is only set once, it is a function of a
5419 biv and a constant (or invariant), and it is not a biv. */
5420 static rtx
5421 check_insn_for_givs (struct loop *loop, rtx p, int not_every_iteration,
5422 int maybe_multiple)
5424 struct loop_regs *regs = LOOP_REGS (loop);
5426 rtx set;
5427 /* Look for a general induction variable in a register. */
5428 if (GET_CODE (p) == INSN
5429 && (set = single_set (p))
5430 && REG_P (SET_DEST (set))
5431 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
5433 rtx src_reg;
5434 rtx dest_reg;
5435 rtx add_val;
5436 rtx mult_val;
5437 rtx ext_val;
5438 int benefit;
5439 rtx regnote = 0;
5440 rtx last_consec_insn;
5442 dest_reg = SET_DEST (set);
5443 if (REGNO (dest_reg) < FIRST_PSEUDO_REGISTER)
5444 return p;
5446 if (/* SET_SRC is a giv. */
5447 (general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
5448 &mult_val, &ext_val, 0, &benefit, VOIDmode)
5449 /* Equivalent expression is a giv. */
5450 || ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
5451 && general_induction_var (loop, XEXP (regnote, 0), &src_reg,
5452 &add_val, &mult_val, &ext_val, 0,
5453 &benefit, VOIDmode)))
5454 /* Don't try to handle any regs made by loop optimization.
5455 We have nothing on them in regno_first_uid, etc. */
5456 && REGNO (dest_reg) < max_reg_before_loop
5457 /* Don't recognize a BASIC_INDUCT_VAR here. */
5458 && dest_reg != src_reg
5459 /* This must be the only place where the register is set. */
5460 && (regs->array[REGNO (dest_reg)].n_times_set == 1
5461 /* or all sets must be consecutive and make a giv. */
5462 || (benefit = consec_sets_giv (loop, benefit, p,
5463 src_reg, dest_reg,
5464 &add_val, &mult_val, &ext_val,
5465 &last_consec_insn))))
5467 struct induction *v = xmalloc (sizeof (struct induction));
5469 /* If this is a library call, increase benefit. */
5470 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
5471 benefit += libcall_benefit (p);
5473 /* Skip the consecutive insns, if there are any. */
5474 if (regs->array[REGNO (dest_reg)].n_times_set != 1)
5475 p = last_consec_insn;
5477 record_giv (loop, v, p, src_reg, dest_reg, mult_val, add_val,
5478 ext_val, benefit, DEST_REG, not_every_iteration,
5479 maybe_multiple, (rtx*) 0);
5484 /* Look for givs which are memory addresses. */
5485 if (GET_CODE (p) == INSN)
5486 find_mem_givs (loop, PATTERN (p), p, not_every_iteration,
5487 maybe_multiple);
5489 /* Update the status of whether giv can derive other givs. This can
5490 change when we pass a label or an insn that updates a biv. */
5491 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5492 || GET_CODE (p) == CODE_LABEL)
5493 update_giv_derive (loop, p);
5494 return p;
5497 /* Return 1 if X is a valid source for an initial value (or as value being
5498 compared against in an initial test).
5500 X must be either a register or constant and must not be clobbered between
5501 the current insn and the start of the loop.
5503 INSN is the insn containing X. */
5505 static int
5506 valid_initial_value_p (rtx x, rtx insn, int call_seen, rtx loop_start)
5508 if (CONSTANT_P (x))
5509 return 1;
5511 /* Only consider pseudos we know about initialized in insns whose luids
5512 we know. */
5513 if (!REG_P (x)
5514 || REGNO (x) >= max_reg_before_loop)
5515 return 0;
5517 /* Don't use call-clobbered registers across a call which clobbers it. On
5518 some machines, don't use any hard registers at all. */
5519 if (REGNO (x) < FIRST_PSEUDO_REGISTER
5520 && (SMALL_REGISTER_CLASSES
5521 || (call_used_regs[REGNO (x)] && call_seen)))
5522 return 0;
5524 /* Don't use registers that have been clobbered before the start of the
5525 loop. */
5526 if (reg_set_between_p (x, insn, loop_start))
5527 return 0;
5529 return 1;
5532 /* Scan X for memory refs and check each memory address
5533 as a possible giv. INSN is the insn whose pattern X comes from.
5534 NOT_EVERY_ITERATION is 1 if the insn might not be executed during
5535 every loop iteration. MAYBE_MULTIPLE is 1 if the insn might be executed
5536 more than once in each loop iteration. */
5538 static void
5539 find_mem_givs (const struct loop *loop, rtx x, rtx insn,
5540 int not_every_iteration, int maybe_multiple)
5542 int i, j;
5543 enum rtx_code code;
5544 const char *fmt;
5546 if (x == 0)
5547 return;
5549 code = GET_CODE (x);
5550 switch (code)
5552 case REG:
5553 case CONST_INT:
5554 case CONST:
5555 case CONST_DOUBLE:
5556 case SYMBOL_REF:
5557 case LABEL_REF:
5558 case PC:
5559 case CC0:
5560 case ADDR_VEC:
5561 case ADDR_DIFF_VEC:
5562 case USE:
5563 case CLOBBER:
5564 return;
5566 case MEM:
5568 rtx src_reg;
5569 rtx add_val;
5570 rtx mult_val;
5571 rtx ext_val;
5572 int benefit;
5574 /* This code used to disable creating GIVs with mult_val == 1 and
5575 add_val == 0. However, this leads to lost optimizations when
5576 it comes time to combine a set of related DEST_ADDR GIVs, since
5577 this one would not be seen. */
5579 if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
5580 &mult_val, &ext_val, 1, &benefit,
5581 GET_MODE (x)))
5583 /* Found one; record it. */
5584 struct induction *v = xmalloc (sizeof (struct induction));
5586 record_giv (loop, v, insn, src_reg, addr_placeholder, mult_val,
5587 add_val, ext_val, benefit, DEST_ADDR,
5588 not_every_iteration, maybe_multiple, &XEXP (x, 0));
5590 v->mem = x;
5593 return;
5595 default:
5596 break;
5599 /* Recursively scan the subexpressions for other mem refs. */
5601 fmt = GET_RTX_FORMAT (code);
5602 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5603 if (fmt[i] == 'e')
5604 find_mem_givs (loop, XEXP (x, i), insn, not_every_iteration,
5605 maybe_multiple);
5606 else if (fmt[i] == 'E')
5607 for (j = 0; j < XVECLEN (x, i); j++)
5608 find_mem_givs (loop, XVECEXP (x, i, j), insn, not_every_iteration,
5609 maybe_multiple);
5612 /* Fill in the data about one biv update.
5613 V is the `struct induction' in which we record the biv. (It is
5614 allocated by the caller, with alloca.)
5615 INSN is the insn that sets it.
5616 DEST_REG is the biv's reg.
5618 MULT_VAL is const1_rtx if the biv is being incremented here, in which case
5619 INC_VAL is the increment. Otherwise, MULT_VAL is const0_rtx and the biv is
5620 being set to INC_VAL.
5622 NOT_EVERY_ITERATION is nonzero if this biv update is not know to be
5623 executed every iteration; MAYBE_MULTIPLE is nonzero if this biv update
5624 can be executed more than once per iteration. If MAYBE_MULTIPLE
5625 and NOT_EVERY_ITERATION are both zero, we know that the biv update is
5626 executed exactly once per iteration. */
5628 static void
5629 record_biv (struct loop *loop, struct induction *v, rtx insn, rtx dest_reg,
5630 rtx inc_val, rtx mult_val, rtx *location,
5631 int not_every_iteration, int maybe_multiple)
5633 struct loop_ivs *ivs = LOOP_IVS (loop);
5634 struct iv_class *bl;
5636 v->insn = insn;
5637 v->src_reg = dest_reg;
5638 v->dest_reg = dest_reg;
5639 v->mult_val = mult_val;
5640 v->add_val = inc_val;
5641 v->ext_dependent = NULL_RTX;
5642 v->location = location;
5643 v->mode = GET_MODE (dest_reg);
5644 v->always_computable = ! not_every_iteration;
5645 v->always_executed = ! not_every_iteration;
5646 v->maybe_multiple = maybe_multiple;
5647 v->same = 0;
5649 /* Add this to the reg's iv_class, creating a class
5650 if this is the first incrementation of the reg. */
5652 bl = REG_IV_CLASS (ivs, REGNO (dest_reg));
5653 if (bl == 0)
5655 /* Create and initialize new iv_class. */
5657 bl = xmalloc (sizeof (struct iv_class));
5659 bl->regno = REGNO (dest_reg);
5660 bl->biv = 0;
5661 bl->giv = 0;
5662 bl->biv_count = 0;
5663 bl->giv_count = 0;
5665 /* Set initial value to the reg itself. */
5666 bl->initial_value = dest_reg;
5667 bl->final_value = 0;
5668 /* We haven't seen the initializing insn yet. */
5669 bl->init_insn = 0;
5670 bl->init_set = 0;
5671 bl->initial_test = 0;
5672 bl->incremented = 0;
5673 bl->eliminable = 0;
5674 bl->nonneg = 0;
5675 bl->reversed = 0;
5676 bl->total_benefit = 0;
5678 /* Add this class to ivs->list. */
5679 bl->next = ivs->list;
5680 ivs->list = bl;
5682 /* Put it in the array of biv register classes. */
5683 REG_IV_CLASS (ivs, REGNO (dest_reg)) = bl;
5685 else
5687 /* Check if location is the same as a previous one. */
5688 struct induction *induction;
5689 for (induction = bl->biv; induction; induction = induction->next_iv)
5690 if (location == induction->location)
5692 v->same = induction;
5693 break;
5697 /* Update IV_CLASS entry for this biv. */
5698 v->next_iv = bl->biv;
5699 bl->biv = v;
5700 bl->biv_count++;
5701 if (mult_val == const1_rtx)
5702 bl->incremented = 1;
5704 if (loop_dump_stream)
5705 loop_biv_dump (v, loop_dump_stream, 0);
5708 /* Fill in the data about one giv.
5709 V is the `struct induction' in which we record the giv. (It is
5710 allocated by the caller, with alloca.)
5711 INSN is the insn that sets it.
5712 BENEFIT estimates the savings from deleting this insn.
5713 TYPE is DEST_REG or DEST_ADDR; it says whether the giv is computed
5714 into a register or is used as a memory address.
5716 SRC_REG is the biv reg which the giv is computed from.
5717 DEST_REG is the giv's reg (if the giv is stored in a reg).
5718 MULT_VAL and ADD_VAL are the coefficients used to compute the giv.
5719 LOCATION points to the place where this giv's value appears in INSN. */
5721 static void
5722 record_giv (const struct loop *loop, struct induction *v, rtx insn,
5723 rtx src_reg, rtx dest_reg, rtx mult_val, rtx add_val,
5724 rtx ext_val, int benefit, enum g_types type,
5725 int not_every_iteration, int maybe_multiple, rtx *location)
5727 struct loop_ivs *ivs = LOOP_IVS (loop);
5728 struct induction *b;
5729 struct iv_class *bl;
5730 rtx set = single_set (insn);
5731 rtx temp;
5733 /* Attempt to prove constantness of the values. Don't let simplify_rtx
5734 undo the MULT canonicalization that we performed earlier. */
5735 temp = simplify_rtx (add_val);
5736 if (temp
5737 && ! (GET_CODE (add_val) == MULT
5738 && GET_CODE (temp) == ASHIFT))
5739 add_val = temp;
5741 v->insn = insn;
5742 v->src_reg = src_reg;
5743 v->giv_type = type;
5744 v->dest_reg = dest_reg;
5745 v->mult_val = mult_val;
5746 v->add_val = add_val;
5747 v->ext_dependent = ext_val;
5748 v->benefit = benefit;
5749 v->location = location;
5750 v->cant_derive = 0;
5751 v->combined_with = 0;
5752 v->maybe_multiple = maybe_multiple;
5753 v->maybe_dead = 0;
5754 v->derive_adjustment = 0;
5755 v->same = 0;
5756 v->ignore = 0;
5757 v->new_reg = 0;
5758 v->final_value = 0;
5759 v->same_insn = 0;
5760 v->auto_inc_opt = 0;
5761 v->unrolled = 0;
5762 v->shared = 0;
5764 /* The v->always_computable field is used in update_giv_derive, to
5765 determine whether a giv can be used to derive another giv. For a
5766 DEST_REG giv, INSN computes a new value for the giv, so its value
5767 isn't computable if INSN insn't executed every iteration.
5768 However, for a DEST_ADDR giv, INSN merely uses the value of the giv;
5769 it does not compute a new value. Hence the value is always computable
5770 regardless of whether INSN is executed each iteration. */
5772 if (type == DEST_ADDR)
5773 v->always_computable = 1;
5774 else
5775 v->always_computable = ! not_every_iteration;
5777 v->always_executed = ! not_every_iteration;
5779 if (type == DEST_ADDR)
5781 v->mode = GET_MODE (*location);
5782 v->lifetime = 1;
5784 else /* type == DEST_REG */
5786 v->mode = GET_MODE (SET_DEST (set));
5788 v->lifetime = LOOP_REG_LIFETIME (loop, REGNO (dest_reg));
5790 /* If the lifetime is zero, it means that this register is
5791 really a dead store. So mark this as a giv that can be
5792 ignored. This will not prevent the biv from being eliminated. */
5793 if (v->lifetime == 0)
5794 v->ignore = 1;
5796 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
5797 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
5800 /* Add the giv to the class of givs computed from one biv. */
5802 bl = REG_IV_CLASS (ivs, REGNO (src_reg));
5803 if (bl)
5805 v->next_iv = bl->giv;
5806 bl->giv = v;
5807 /* Don't count DEST_ADDR. This is supposed to count the number of
5808 insns that calculate givs. */
5809 if (type == DEST_REG)
5810 bl->giv_count++;
5811 bl->total_benefit += benefit;
5813 else
5814 /* Fatal error, biv missing for this giv? */
5815 abort ();
5817 if (type == DEST_ADDR)
5819 v->replaceable = 1;
5820 v->not_replaceable = 0;
5822 else
5824 /* The giv can be replaced outright by the reduced register only if all
5825 of the following conditions are true:
5826 - the insn that sets the giv is always executed on any iteration
5827 on which the giv is used at all
5828 (there are two ways to deduce this:
5829 either the insn is executed on every iteration,
5830 or all uses follow that insn in the same basic block),
5831 - the giv is not used outside the loop
5832 - no assignments to the biv occur during the giv's lifetime. */
5834 if (REGNO_FIRST_UID (REGNO (dest_reg)) == INSN_UID (insn)
5835 /* Previous line always fails if INSN was moved by loop opt. */
5836 && REGNO_LAST_LUID (REGNO (dest_reg))
5837 < INSN_LUID (loop->end)
5838 && (! not_every_iteration
5839 || last_use_this_basic_block (dest_reg, insn)))
5841 /* Now check that there are no assignments to the biv within the
5842 giv's lifetime. This requires two separate checks. */
5844 /* Check each biv update, and fail if any are between the first
5845 and last use of the giv.
5847 If this loop contains an inner loop that was unrolled, then
5848 the insn modifying the biv may have been emitted by the loop
5849 unrolling code, and hence does not have a valid luid. Just
5850 mark the biv as not replaceable in this case. It is not very
5851 useful as a biv, because it is used in two different loops.
5852 It is very unlikely that we would be able to optimize the giv
5853 using this biv anyways. */
5855 v->replaceable = 1;
5856 v->not_replaceable = 0;
5857 for (b = bl->biv; b; b = b->next_iv)
5859 if (INSN_UID (b->insn) >= max_uid_for_loop
5860 || ((INSN_LUID (b->insn)
5861 >= REGNO_FIRST_LUID (REGNO (dest_reg)))
5862 && (INSN_LUID (b->insn)
5863 <= REGNO_LAST_LUID (REGNO (dest_reg)))))
5865 v->replaceable = 0;
5866 v->not_replaceable = 1;
5867 break;
5871 /* If there are any backwards branches that go from after the
5872 biv update to before it, then this giv is not replaceable. */
5873 if (v->replaceable)
5874 for (b = bl->biv; b; b = b->next_iv)
5875 if (back_branch_in_range_p (loop, b->insn))
5877 v->replaceable = 0;
5878 v->not_replaceable = 1;
5879 break;
5882 else
5884 /* May still be replaceable, we don't have enough info here to
5885 decide. */
5886 v->replaceable = 0;
5887 v->not_replaceable = 0;
5891 /* Record whether the add_val contains a const_int, for later use by
5892 combine_givs. */
5894 rtx tem = add_val;
5896 v->no_const_addval = 1;
5897 if (tem == const0_rtx)
5899 else if (CONSTANT_P (add_val))
5900 v->no_const_addval = 0;
5901 if (GET_CODE (tem) == PLUS)
5903 while (1)
5905 if (GET_CODE (XEXP (tem, 0)) == PLUS)
5906 tem = XEXP (tem, 0);
5907 else if (GET_CODE (XEXP (tem, 1)) == PLUS)
5908 tem = XEXP (tem, 1);
5909 else
5910 break;
5912 if (CONSTANT_P (XEXP (tem, 1)))
5913 v->no_const_addval = 0;
5917 if (loop_dump_stream)
5918 loop_giv_dump (v, loop_dump_stream, 0);
5921 /* All this does is determine whether a giv can be made replaceable because
5922 its final value can be calculated. This code can not be part of record_giv
5923 above, because final_giv_value requires that the number of loop iterations
5924 be known, and that can not be accurately calculated until after all givs
5925 have been identified. */
5927 static void
5928 check_final_value (const struct loop *loop, struct induction *v)
5930 rtx final_value = 0;
5932 /* DEST_ADDR givs will never reach here, because they are always marked
5933 replaceable above in record_giv. */
5935 /* The giv can be replaced outright by the reduced register only if all
5936 of the following conditions are true:
5937 - the insn that sets the giv is always executed on any iteration
5938 on which the giv is used at all
5939 (there are two ways to deduce this:
5940 either the insn is executed on every iteration,
5941 or all uses follow that insn in the same basic block),
5942 - its final value can be calculated (this condition is different
5943 than the one above in record_giv)
5944 - it's not used before the it's set
5945 - no assignments to the biv occur during the giv's lifetime. */
5947 #if 0
5948 /* This is only called now when replaceable is known to be false. */
5949 /* Clear replaceable, so that it won't confuse final_giv_value. */
5950 v->replaceable = 0;
5951 #endif
5953 if ((final_value = final_giv_value (loop, v))
5954 && (v->always_executed
5955 || last_use_this_basic_block (v->dest_reg, v->insn)))
5957 int biv_increment_seen = 0, before_giv_insn = 0;
5958 rtx p = v->insn;
5959 rtx last_giv_use;
5961 v->replaceable = 1;
5962 v->not_replaceable = 0;
5964 /* When trying to determine whether or not a biv increment occurs
5965 during the lifetime of the giv, we can ignore uses of the variable
5966 outside the loop because final_value is true. Hence we can not
5967 use regno_last_uid and regno_first_uid as above in record_giv. */
5969 /* Search the loop to determine whether any assignments to the
5970 biv occur during the giv's lifetime. Start with the insn
5971 that sets the giv, and search around the loop until we come
5972 back to that insn again.
5974 Also fail if there is a jump within the giv's lifetime that jumps
5975 to somewhere outside the lifetime but still within the loop. This
5976 catches spaghetti code where the execution order is not linear, and
5977 hence the above test fails. Here we assume that the giv lifetime
5978 does not extend from one iteration of the loop to the next, so as
5979 to make the test easier. Since the lifetime isn't known yet,
5980 this requires two loops. See also record_giv above. */
5982 last_giv_use = v->insn;
5984 while (1)
5986 p = NEXT_INSN (p);
5987 if (p == loop->end)
5989 before_giv_insn = 1;
5990 p = NEXT_INSN (loop->start);
5992 if (p == v->insn)
5993 break;
5995 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5996 || GET_CODE (p) == CALL_INSN)
5998 /* It is possible for the BIV increment to use the GIV if we
5999 have a cycle. Thus we must be sure to check each insn for
6000 both BIV and GIV uses, and we must check for BIV uses
6001 first. */
6003 if (! biv_increment_seen
6004 && reg_set_p (v->src_reg, PATTERN (p)))
6005 biv_increment_seen = 1;
6007 if (reg_mentioned_p (v->dest_reg, PATTERN (p)))
6009 if (biv_increment_seen || before_giv_insn)
6011 v->replaceable = 0;
6012 v->not_replaceable = 1;
6013 break;
6015 last_giv_use = p;
6020 /* Now that the lifetime of the giv is known, check for branches
6021 from within the lifetime to outside the lifetime if it is still
6022 replaceable. */
6024 if (v->replaceable)
6026 p = v->insn;
6027 while (1)
6029 p = NEXT_INSN (p);
6030 if (p == loop->end)
6031 p = NEXT_INSN (loop->start);
6032 if (p == last_giv_use)
6033 break;
6035 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
6036 && LABEL_NAME (JUMP_LABEL (p))
6037 && ((loop_insn_first_p (JUMP_LABEL (p), v->insn)
6038 && loop_insn_first_p (loop->start, JUMP_LABEL (p)))
6039 || (loop_insn_first_p (last_giv_use, JUMP_LABEL (p))
6040 && loop_insn_first_p (JUMP_LABEL (p), loop->end))))
6042 v->replaceable = 0;
6043 v->not_replaceable = 1;
6045 if (loop_dump_stream)
6046 fprintf (loop_dump_stream,
6047 "Found branch outside giv lifetime.\n");
6049 break;
6054 /* If it is replaceable, then save the final value. */
6055 if (v->replaceable)
6056 v->final_value = final_value;
6059 if (loop_dump_stream && v->replaceable)
6060 fprintf (loop_dump_stream, "Insn %d: giv reg %d final_value replaceable\n",
6061 INSN_UID (v->insn), REGNO (v->dest_reg));
6064 /* Update the status of whether a giv can derive other givs.
6066 We need to do something special if there is or may be an update to the biv
6067 between the time the giv is defined and the time it is used to derive
6068 another giv.
6070 In addition, a giv that is only conditionally set is not allowed to
6071 derive another giv once a label has been passed.
6073 The cases we look at are when a label or an update to a biv is passed. */
6075 static void
6076 update_giv_derive (const struct loop *loop, rtx p)
6078 struct loop_ivs *ivs = LOOP_IVS (loop);
6079 struct iv_class *bl;
6080 struct induction *biv, *giv;
6081 rtx tem;
6082 int dummy;
6084 /* Search all IV classes, then all bivs, and finally all givs.
6086 There are three cases we are concerned with. First we have the situation
6087 of a giv that is only updated conditionally. In that case, it may not
6088 derive any givs after a label is passed.
6090 The second case is when a biv update occurs, or may occur, after the
6091 definition of a giv. For certain biv updates (see below) that are
6092 known to occur between the giv definition and use, we can adjust the
6093 giv definition. For others, or when the biv update is conditional,
6094 we must prevent the giv from deriving any other givs. There are two
6095 sub-cases within this case.
6097 If this is a label, we are concerned with any biv update that is done
6098 conditionally, since it may be done after the giv is defined followed by
6099 a branch here (actually, we need to pass both a jump and a label, but
6100 this extra tracking doesn't seem worth it).
6102 If this is a jump, we are concerned about any biv update that may be
6103 executed multiple times. We are actually only concerned about
6104 backward jumps, but it is probably not worth performing the test
6105 on the jump again here.
6107 If this is a biv update, we must adjust the giv status to show that a
6108 subsequent biv update was performed. If this adjustment cannot be done,
6109 the giv cannot derive further givs. */
6111 for (bl = ivs->list; bl; bl = bl->next)
6112 for (biv = bl->biv; biv; biv = biv->next_iv)
6113 if (GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN
6114 || biv->insn == p)
6116 /* Skip if location is the same as a previous one. */
6117 if (biv->same)
6118 continue;
6120 for (giv = bl->giv; giv; giv = giv->next_iv)
6122 /* If cant_derive is already true, there is no point in
6123 checking all of these conditions again. */
6124 if (giv->cant_derive)
6125 continue;
6127 /* If this giv is conditionally set and we have passed a label,
6128 it cannot derive anything. */
6129 if (GET_CODE (p) == CODE_LABEL && ! giv->always_computable)
6130 giv->cant_derive = 1;
6132 /* Skip givs that have mult_val == 0, since
6133 they are really invariants. Also skip those that are
6134 replaceable, since we know their lifetime doesn't contain
6135 any biv update. */
6136 else if (giv->mult_val == const0_rtx || giv->replaceable)
6137 continue;
6139 /* The only way we can allow this giv to derive another
6140 is if this is a biv increment and we can form the product
6141 of biv->add_val and giv->mult_val. In this case, we will
6142 be able to compute a compensation. */
6143 else if (biv->insn == p)
6145 rtx ext_val_dummy;
6147 tem = 0;
6148 if (biv->mult_val == const1_rtx)
6149 tem = simplify_giv_expr (loop,
6150 gen_rtx_MULT (giv->mode,
6151 biv->add_val,
6152 giv->mult_val),
6153 &ext_val_dummy, &dummy);
6155 if (tem && giv->derive_adjustment)
6156 tem = simplify_giv_expr
6157 (loop,
6158 gen_rtx_PLUS (giv->mode, tem, giv->derive_adjustment),
6159 &ext_val_dummy, &dummy);
6161 if (tem)
6162 giv->derive_adjustment = tem;
6163 else
6164 giv->cant_derive = 1;
6166 else if ((GET_CODE (p) == CODE_LABEL && ! biv->always_computable)
6167 || (GET_CODE (p) == JUMP_INSN && biv->maybe_multiple))
6168 giv->cant_derive = 1;
6173 /* Check whether an insn is an increment legitimate for a basic induction var.
6174 X is the source of insn P, or a part of it.
6175 MODE is the mode in which X should be interpreted.
6177 DEST_REG is the putative biv, also the destination of the insn.
6178 We accept patterns of these forms:
6179 REG = REG + INVARIANT (includes REG = REG - CONSTANT)
6180 REG = INVARIANT + REG
6182 If X is suitable, we return 1, set *MULT_VAL to CONST1_RTX,
6183 store the additive term into *INC_VAL, and store the place where
6184 we found the additive term into *LOCATION.
6186 If X is an assignment of an invariant into DEST_REG, we set
6187 *MULT_VAL to CONST0_RTX, and store the invariant into *INC_VAL.
6189 We also want to detect a BIV when it corresponds to a variable
6190 whose mode was promoted. In that case, an increment
6191 of the variable may be a PLUS that adds a SUBREG of that variable to
6192 an invariant and then sign- or zero-extends the result of the PLUS
6193 into the variable.
6195 Most GIVs in such cases will be in the promoted mode, since that is the
6196 probably the natural computation mode (and almost certainly the mode
6197 used for addresses) on the machine. So we view the pseudo-reg containing
6198 the variable as the BIV, as if it were simply incremented.
6200 Note that treating the entire pseudo as a BIV will result in making
6201 simple increments to any GIVs based on it. However, if the variable
6202 overflows in its declared mode but not its promoted mode, the result will
6203 be incorrect. This is acceptable if the variable is signed, since
6204 overflows in such cases are undefined, but not if it is unsigned, since
6205 those overflows are defined. So we only check for SIGN_EXTEND and
6206 not ZERO_EXTEND.
6208 If we cannot find a biv, we return 0. */
6210 static int
6211 basic_induction_var (const struct loop *loop, rtx x, enum machine_mode mode,
6212 rtx dest_reg, rtx p, rtx *inc_val, rtx *mult_val,
6213 rtx **location)
6215 enum rtx_code code;
6216 rtx *argp, arg;
6217 rtx insn, set = 0, last, inc;
6219 code = GET_CODE (x);
6220 *location = NULL;
6221 switch (code)
6223 case PLUS:
6224 if (rtx_equal_p (XEXP (x, 0), dest_reg)
6225 || (GET_CODE (XEXP (x, 0)) == SUBREG
6226 && SUBREG_PROMOTED_VAR_P (XEXP (x, 0))
6227 && SUBREG_REG (XEXP (x, 0)) == dest_reg))
6229 argp = &XEXP (x, 1);
6231 else if (rtx_equal_p (XEXP (x, 1), dest_reg)
6232 || (GET_CODE (XEXP (x, 1)) == SUBREG
6233 && SUBREG_PROMOTED_VAR_P (XEXP (x, 1))
6234 && SUBREG_REG (XEXP (x, 1)) == dest_reg))
6236 argp = &XEXP (x, 0);
6238 else
6239 return 0;
6241 arg = *argp;
6242 if (loop_invariant_p (loop, arg) != 1)
6243 return 0;
6245 /* convert_modes can emit new instructions, e.g. when arg is a loop
6246 invariant MEM and dest_reg has a different mode.
6247 These instructions would be emitted after the end of the function
6248 and then *inc_val would be an uninitialized pseudo.
6249 Detect this and bail in this case.
6250 Other alternatives to solve this can be introducing a convert_modes
6251 variant which is allowed to fail but not allowed to emit new
6252 instructions, emit these instructions before loop start and let
6253 it be garbage collected if *inc_val is never used or saving the
6254 *inc_val initialization sequence generated here and when *inc_val
6255 is going to be actually used, emit it at some suitable place. */
6256 last = get_last_insn ();
6257 inc = convert_modes (GET_MODE (dest_reg), GET_MODE (x), arg, 0);
6258 if (get_last_insn () != last)
6260 delete_insns_since (last);
6261 return 0;
6264 *inc_val = inc;
6265 *mult_val = const1_rtx;
6266 *location = argp;
6267 return 1;
6269 case SUBREG:
6270 /* If what's inside the SUBREG is a BIV, then the SUBREG. This will
6271 handle addition of promoted variables.
6272 ??? The comment at the start of this function is wrong: promoted
6273 variable increments don't look like it says they do. */
6274 return basic_induction_var (loop, SUBREG_REG (x),
6275 GET_MODE (SUBREG_REG (x)),
6276 dest_reg, p, inc_val, mult_val, location);
6278 case REG:
6279 /* If this register is assigned in a previous insn, look at its
6280 source, but don't go outside the loop or past a label. */
6282 /* If this sets a register to itself, we would repeat any previous
6283 biv increment if we applied this strategy blindly. */
6284 if (rtx_equal_p (dest_reg, x))
6285 return 0;
6287 insn = p;
6288 while (1)
6290 rtx dest;
6293 insn = PREV_INSN (insn);
6295 while (insn && GET_CODE (insn) == NOTE
6296 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6298 if (!insn)
6299 break;
6300 set = single_set (insn);
6301 if (set == 0)
6302 break;
6303 dest = SET_DEST (set);
6304 if (dest == x
6305 || (GET_CODE (dest) == SUBREG
6306 && (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
6307 && (GET_MODE_CLASS (GET_MODE (dest)) == MODE_INT)
6308 && SUBREG_REG (dest) == x))
6309 return basic_induction_var (loop, SET_SRC (set),
6310 (GET_MODE (SET_SRC (set)) == VOIDmode
6311 ? GET_MODE (x)
6312 : GET_MODE (SET_SRC (set))),
6313 dest_reg, insn,
6314 inc_val, mult_val, location);
6316 while (GET_CODE (dest) == SIGN_EXTRACT
6317 || GET_CODE (dest) == ZERO_EXTRACT
6318 || GET_CODE (dest) == SUBREG
6319 || GET_CODE (dest) == STRICT_LOW_PART)
6320 dest = XEXP (dest, 0);
6321 if (dest == x)
6322 break;
6324 /* Fall through. */
6326 /* Can accept constant setting of biv only when inside inner most loop.
6327 Otherwise, a biv of an inner loop may be incorrectly recognized
6328 as a biv of the outer loop,
6329 causing code to be moved INTO the inner loop. */
6330 case MEM:
6331 if (loop_invariant_p (loop, x) != 1)
6332 return 0;
6333 case CONST_INT:
6334 case SYMBOL_REF:
6335 case CONST:
6336 /* convert_modes aborts if we try to convert to or from CCmode, so just
6337 exclude that case. It is very unlikely that a condition code value
6338 would be a useful iterator anyways. convert_modes aborts if we try to
6339 convert a float mode to non-float or vice versa too. */
6340 if (loop->level == 1
6341 && GET_MODE_CLASS (mode) == GET_MODE_CLASS (GET_MODE (dest_reg))
6342 && GET_MODE_CLASS (mode) != MODE_CC)
6344 /* Possible bug here? Perhaps we don't know the mode of X. */
6345 last = get_last_insn ();
6346 inc = convert_modes (GET_MODE (dest_reg), mode, x, 0);
6347 if (get_last_insn () != last)
6349 delete_insns_since (last);
6350 return 0;
6353 *inc_val = inc;
6354 *mult_val = const0_rtx;
6355 return 1;
6357 else
6358 return 0;
6360 case SIGN_EXTEND:
6361 /* Ignore this BIV if signed arithmetic overflow is defined. */
6362 if (flag_wrapv)
6363 return 0;
6364 return basic_induction_var (loop, XEXP (x, 0), GET_MODE (XEXP (x, 0)),
6365 dest_reg, p, inc_val, mult_val, location);
6367 case ASHIFTRT:
6368 /* Similar, since this can be a sign extension. */
6369 for (insn = PREV_INSN (p);
6370 (insn && GET_CODE (insn) == NOTE
6371 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6372 insn = PREV_INSN (insn))
6375 if (insn)
6376 set = single_set (insn);
6378 if (! rtx_equal_p (dest_reg, XEXP (x, 0))
6379 && set && SET_DEST (set) == XEXP (x, 0)
6380 && GET_CODE (XEXP (x, 1)) == CONST_INT
6381 && INTVAL (XEXP (x, 1)) >= 0
6382 && GET_CODE (SET_SRC (set)) == ASHIFT
6383 && XEXP (x, 1) == XEXP (SET_SRC (set), 1))
6384 return basic_induction_var (loop, XEXP (SET_SRC (set), 0),
6385 GET_MODE (XEXP (x, 0)),
6386 dest_reg, insn, inc_val, mult_val,
6387 location);
6388 return 0;
6390 default:
6391 return 0;
6395 /* A general induction variable (giv) is any quantity that is a linear
6396 function of a basic induction variable,
6397 i.e. giv = biv * mult_val + add_val.
6398 The coefficients can be any loop invariant quantity.
6399 A giv need not be computed directly from the biv;
6400 it can be computed by way of other givs. */
6402 /* Determine whether X computes a giv.
6403 If it does, return a nonzero value
6404 which is the benefit from eliminating the computation of X;
6405 set *SRC_REG to the register of the biv that it is computed from;
6406 set *ADD_VAL and *MULT_VAL to the coefficients,
6407 such that the value of X is biv * mult + add; */
6409 static int
6410 general_induction_var (const struct loop *loop, rtx x, rtx *src_reg,
6411 rtx *add_val, rtx *mult_val, rtx *ext_val,
6412 int is_addr, int *pbenefit,
6413 enum machine_mode addr_mode)
6415 struct loop_ivs *ivs = LOOP_IVS (loop);
6416 rtx orig_x = x;
6418 /* If this is an invariant, forget it, it isn't a giv. */
6419 if (loop_invariant_p (loop, x) == 1)
6420 return 0;
6422 *pbenefit = 0;
6423 *ext_val = NULL_RTX;
6424 x = simplify_giv_expr (loop, x, ext_val, pbenefit);
6425 if (x == 0)
6426 return 0;
6428 switch (GET_CODE (x))
6430 case USE:
6431 case CONST_INT:
6432 /* Since this is now an invariant and wasn't before, it must be a giv
6433 with MULT_VAL == 0. It doesn't matter which BIV we associate this
6434 with. */
6435 *src_reg = ivs->list->biv->dest_reg;
6436 *mult_val = const0_rtx;
6437 *add_val = x;
6438 break;
6440 case REG:
6441 /* This is equivalent to a BIV. */
6442 *src_reg = x;
6443 *mult_val = const1_rtx;
6444 *add_val = const0_rtx;
6445 break;
6447 case PLUS:
6448 /* Either (plus (biv) (invar)) or
6449 (plus (mult (biv) (invar_1)) (invar_2)). */
6450 if (GET_CODE (XEXP (x, 0)) == MULT)
6452 *src_reg = XEXP (XEXP (x, 0), 0);
6453 *mult_val = XEXP (XEXP (x, 0), 1);
6455 else
6457 *src_reg = XEXP (x, 0);
6458 *mult_val = const1_rtx;
6460 *add_val = XEXP (x, 1);
6461 break;
6463 case MULT:
6464 /* ADD_VAL is zero. */
6465 *src_reg = XEXP (x, 0);
6466 *mult_val = XEXP (x, 1);
6467 *add_val = const0_rtx;
6468 break;
6470 default:
6471 abort ();
6474 /* Remove any enclosing USE from ADD_VAL and MULT_VAL (there will be
6475 unless they are CONST_INT). */
6476 if (GET_CODE (*add_val) == USE)
6477 *add_val = XEXP (*add_val, 0);
6478 if (GET_CODE (*mult_val) == USE)
6479 *mult_val = XEXP (*mult_val, 0);
6481 if (is_addr)
6482 *pbenefit += address_cost (orig_x, addr_mode) - reg_address_cost;
6483 else
6484 *pbenefit += rtx_cost (orig_x, SET);
6486 /* Always return true if this is a giv so it will be detected as such,
6487 even if the benefit is zero or negative. This allows elimination
6488 of bivs that might otherwise not be eliminated. */
6489 return 1;
6492 /* Given an expression, X, try to form it as a linear function of a biv.
6493 We will canonicalize it to be of the form
6494 (plus (mult (BIV) (invar_1))
6495 (invar_2))
6496 with possible degeneracies.
6498 The invariant expressions must each be of a form that can be used as a
6499 machine operand. We surround then with a USE rtx (a hack, but localized
6500 and certainly unambiguous!) if not a CONST_INT for simplicity in this
6501 routine; it is the caller's responsibility to strip them.
6503 If no such canonicalization is possible (i.e., two biv's are used or an
6504 expression that is neither invariant nor a biv or giv), this routine
6505 returns 0.
6507 For a nonzero return, the result will have a code of CONST_INT, USE,
6508 REG (for a BIV), PLUS, or MULT. No other codes will occur.
6510 *BENEFIT will be incremented by the benefit of any sub-giv encountered. */
6512 static rtx sge_plus (enum machine_mode, rtx, rtx);
6513 static rtx sge_plus_constant (rtx, rtx);
6515 static rtx
6516 simplify_giv_expr (const struct loop *loop, rtx x, rtx *ext_val, int *benefit)
6518 struct loop_ivs *ivs = LOOP_IVS (loop);
6519 struct loop_regs *regs = LOOP_REGS (loop);
6520 enum machine_mode mode = GET_MODE (x);
6521 rtx arg0, arg1;
6522 rtx tem;
6524 /* If this is not an integer mode, or if we cannot do arithmetic in this
6525 mode, this can't be a giv. */
6526 if (mode != VOIDmode
6527 && (GET_MODE_CLASS (mode) != MODE_INT
6528 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT))
6529 return NULL_RTX;
6531 switch (GET_CODE (x))
6533 case PLUS:
6534 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6535 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6536 if (arg0 == 0 || arg1 == 0)
6537 return NULL_RTX;
6539 /* Put constant last, CONST_INT last if both constant. */
6540 if ((GET_CODE (arg0) == USE
6541 || GET_CODE (arg0) == CONST_INT)
6542 && ! ((GET_CODE (arg0) == USE
6543 && GET_CODE (arg1) == USE)
6544 || GET_CODE (arg1) == CONST_INT))
6545 tem = arg0, arg0 = arg1, arg1 = tem;
6547 /* Handle addition of zero, then addition of an invariant. */
6548 if (arg1 == const0_rtx)
6549 return arg0;
6550 else if (GET_CODE (arg1) == CONST_INT || GET_CODE (arg1) == USE)
6551 switch (GET_CODE (arg0))
6553 case CONST_INT:
6554 case USE:
6555 /* Adding two invariants must result in an invariant, so enclose
6556 addition operation inside a USE and return it. */
6557 if (GET_CODE (arg0) == USE)
6558 arg0 = XEXP (arg0, 0);
6559 if (GET_CODE (arg1) == USE)
6560 arg1 = XEXP (arg1, 0);
6562 if (GET_CODE (arg0) == CONST_INT)
6563 tem = arg0, arg0 = arg1, arg1 = tem;
6564 if (GET_CODE (arg1) == CONST_INT)
6565 tem = sge_plus_constant (arg0, arg1);
6566 else
6567 tem = sge_plus (mode, arg0, arg1);
6569 if (GET_CODE (tem) != CONST_INT)
6570 tem = gen_rtx_USE (mode, tem);
6571 return tem;
6573 case REG:
6574 case MULT:
6575 /* biv + invar or mult + invar. Return sum. */
6576 return gen_rtx_PLUS (mode, arg0, arg1);
6578 case PLUS:
6579 /* (a + invar_1) + invar_2. Associate. */
6580 return
6581 simplify_giv_expr (loop,
6582 gen_rtx_PLUS (mode,
6583 XEXP (arg0, 0),
6584 gen_rtx_PLUS (mode,
6585 XEXP (arg0, 1),
6586 arg1)),
6587 ext_val, benefit);
6589 default:
6590 abort ();
6593 /* Each argument must be either REG, PLUS, or MULT. Convert REG to
6594 MULT to reduce cases. */
6595 if (REG_P (arg0))
6596 arg0 = gen_rtx_MULT (mode, arg0, const1_rtx);
6597 if (REG_P (arg1))
6598 arg1 = gen_rtx_MULT (mode, arg1, const1_rtx);
6600 /* Now have PLUS + PLUS, PLUS + MULT, MULT + PLUS, or MULT + MULT.
6601 Put a MULT first, leaving PLUS + PLUS, MULT + PLUS, or MULT + MULT.
6602 Recurse to associate the second PLUS. */
6603 if (GET_CODE (arg1) == MULT)
6604 tem = arg0, arg0 = arg1, arg1 = tem;
6606 if (GET_CODE (arg1) == PLUS)
6607 return
6608 simplify_giv_expr (loop,
6609 gen_rtx_PLUS (mode,
6610 gen_rtx_PLUS (mode, arg0,
6611 XEXP (arg1, 0)),
6612 XEXP (arg1, 1)),
6613 ext_val, benefit);
6615 /* Now must have MULT + MULT. Distribute if same biv, else not giv. */
6616 if (GET_CODE (arg0) != MULT || GET_CODE (arg1) != MULT)
6617 return NULL_RTX;
6619 if (!rtx_equal_p (arg0, arg1))
6620 return NULL_RTX;
6622 return simplify_giv_expr (loop,
6623 gen_rtx_MULT (mode,
6624 XEXP (arg0, 0),
6625 gen_rtx_PLUS (mode,
6626 XEXP (arg0, 1),
6627 XEXP (arg1, 1))),
6628 ext_val, benefit);
6630 case MINUS:
6631 /* Handle "a - b" as "a + b * (-1)". */
6632 return simplify_giv_expr (loop,
6633 gen_rtx_PLUS (mode,
6634 XEXP (x, 0),
6635 gen_rtx_MULT (mode,
6636 XEXP (x, 1),
6637 constm1_rtx)),
6638 ext_val, benefit);
6640 case MULT:
6641 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6642 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6643 if (arg0 == 0 || arg1 == 0)
6644 return NULL_RTX;
6646 /* Put constant last, CONST_INT last if both constant. */
6647 if ((GET_CODE (arg0) == USE || GET_CODE (arg0) == CONST_INT)
6648 && GET_CODE (arg1) != CONST_INT)
6649 tem = arg0, arg0 = arg1, arg1 = tem;
6651 /* If second argument is not now constant, not giv. */
6652 if (GET_CODE (arg1) != USE && GET_CODE (arg1) != CONST_INT)
6653 return NULL_RTX;
6655 /* Handle multiply by 0 or 1. */
6656 if (arg1 == const0_rtx)
6657 return const0_rtx;
6659 else if (arg1 == const1_rtx)
6660 return arg0;
6662 switch (GET_CODE (arg0))
6664 case REG:
6665 /* biv * invar. Done. */
6666 return gen_rtx_MULT (mode, arg0, arg1);
6668 case CONST_INT:
6669 /* Product of two constants. */
6670 return GEN_INT (INTVAL (arg0) * INTVAL (arg1));
6672 case USE:
6673 /* invar * invar is a giv, but attempt to simplify it somehow. */
6674 if (GET_CODE (arg1) != CONST_INT)
6675 return NULL_RTX;
6677 arg0 = XEXP (arg0, 0);
6678 if (GET_CODE (arg0) == MULT)
6680 /* (invar_0 * invar_1) * invar_2. Associate. */
6681 return simplify_giv_expr (loop,
6682 gen_rtx_MULT (mode,
6683 XEXP (arg0, 0),
6684 gen_rtx_MULT (mode,
6685 XEXP (arg0,
6687 arg1)),
6688 ext_val, benefit);
6690 /* Propagate the MULT expressions to the innermost nodes. */
6691 else if (GET_CODE (arg0) == PLUS)
6693 /* (invar_0 + invar_1) * invar_2. Distribute. */
6694 return simplify_giv_expr (loop,
6695 gen_rtx_PLUS (mode,
6696 gen_rtx_MULT (mode,
6697 XEXP (arg0,
6699 arg1),
6700 gen_rtx_MULT (mode,
6701 XEXP (arg0,
6703 arg1)),
6704 ext_val, benefit);
6706 return gen_rtx_USE (mode, gen_rtx_MULT (mode, arg0, arg1));
6708 case MULT:
6709 /* (a * invar_1) * invar_2. Associate. */
6710 return simplify_giv_expr (loop,
6711 gen_rtx_MULT (mode,
6712 XEXP (arg0, 0),
6713 gen_rtx_MULT (mode,
6714 XEXP (arg0, 1),
6715 arg1)),
6716 ext_val, benefit);
6718 case PLUS:
6719 /* (a + invar_1) * invar_2. Distribute. */
6720 return simplify_giv_expr (loop,
6721 gen_rtx_PLUS (mode,
6722 gen_rtx_MULT (mode,
6723 XEXP (arg0, 0),
6724 arg1),
6725 gen_rtx_MULT (mode,
6726 XEXP (arg0, 1),
6727 arg1)),
6728 ext_val, benefit);
6730 default:
6731 abort ();
6734 case ASHIFT:
6735 /* Shift by constant is multiply by power of two. */
6736 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6737 return 0;
6739 return
6740 simplify_giv_expr (loop,
6741 gen_rtx_MULT (mode,
6742 XEXP (x, 0),
6743 GEN_INT ((HOST_WIDE_INT) 1
6744 << INTVAL (XEXP (x, 1)))),
6745 ext_val, benefit);
6747 case NEG:
6748 /* "-a" is "a * (-1)" */
6749 return simplify_giv_expr (loop,
6750 gen_rtx_MULT (mode, XEXP (x, 0), constm1_rtx),
6751 ext_val, benefit);
6753 case NOT:
6754 /* "~a" is "-a - 1". Silly, but easy. */
6755 return simplify_giv_expr (loop,
6756 gen_rtx_MINUS (mode,
6757 gen_rtx_NEG (mode, XEXP (x, 0)),
6758 const1_rtx),
6759 ext_val, benefit);
6761 case USE:
6762 /* Already in proper form for invariant. */
6763 return x;
6765 case SIGN_EXTEND:
6766 case ZERO_EXTEND:
6767 case TRUNCATE:
6768 /* Conditionally recognize extensions of simple IVs. After we've
6769 computed loop traversal counts and verified the range of the
6770 source IV, we'll reevaluate this as a GIV. */
6771 if (*ext_val == NULL_RTX)
6773 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6774 if (arg0 && *ext_val == NULL_RTX && REG_P (arg0))
6776 *ext_val = gen_rtx_fmt_e (GET_CODE (x), mode, arg0);
6777 return arg0;
6780 goto do_default;
6782 case REG:
6783 /* If this is a new register, we can't deal with it. */
6784 if (REGNO (x) >= max_reg_before_loop)
6785 return 0;
6787 /* Check for biv or giv. */
6788 switch (REG_IV_TYPE (ivs, REGNO (x)))
6790 case BASIC_INDUCT:
6791 return x;
6792 case GENERAL_INDUCT:
6794 struct induction *v = REG_IV_INFO (ivs, REGNO (x));
6796 /* Form expression from giv and add benefit. Ensure this giv
6797 can derive another and subtract any needed adjustment if so. */
6799 /* Increasing the benefit here is risky. The only case in which it
6800 is arguably correct is if this is the only use of V. In other
6801 cases, this will artificially inflate the benefit of the current
6802 giv, and lead to suboptimal code. Thus, it is disabled, since
6803 potentially not reducing an only marginally beneficial giv is
6804 less harmful than reducing many givs that are not really
6805 beneficial. */
6807 rtx single_use = regs->array[REGNO (x)].single_usage;
6808 if (single_use && single_use != const0_rtx)
6809 *benefit += v->benefit;
6812 if (v->cant_derive)
6813 return 0;
6815 tem = gen_rtx_PLUS (mode, gen_rtx_MULT (mode,
6816 v->src_reg, v->mult_val),
6817 v->add_val);
6819 if (v->derive_adjustment)
6820 tem = gen_rtx_MINUS (mode, tem, v->derive_adjustment);
6821 arg0 = simplify_giv_expr (loop, tem, ext_val, benefit);
6822 if (*ext_val)
6824 if (!v->ext_dependent)
6825 return arg0;
6827 else
6829 *ext_val = v->ext_dependent;
6830 return arg0;
6832 return 0;
6835 default:
6836 do_default:
6837 /* If it isn't an induction variable, and it is invariant, we
6838 may be able to simplify things further by looking through
6839 the bits we just moved outside the loop. */
6840 if (loop_invariant_p (loop, x) == 1)
6842 struct movable *m;
6843 struct loop_movables *movables = LOOP_MOVABLES (loop);
6845 for (m = movables->head; m; m = m->next)
6846 if (rtx_equal_p (x, m->set_dest))
6848 /* Ok, we found a match. Substitute and simplify. */
6850 /* If we match another movable, we must use that, as
6851 this one is going away. */
6852 if (m->match)
6853 return simplify_giv_expr (loop, m->match->set_dest,
6854 ext_val, benefit);
6856 /* If consec is nonzero, this is a member of a group of
6857 instructions that were moved together. We handle this
6858 case only to the point of seeking to the last insn and
6859 looking for a REG_EQUAL. Fail if we don't find one. */
6860 if (m->consec != 0)
6862 int i = m->consec;
6863 tem = m->insn;
6866 tem = NEXT_INSN (tem);
6868 while (--i > 0);
6870 tem = find_reg_note (tem, REG_EQUAL, NULL_RTX);
6871 if (tem)
6872 tem = XEXP (tem, 0);
6874 else
6876 tem = single_set (m->insn);
6877 if (tem)
6878 tem = SET_SRC (tem);
6881 if (tem)
6883 /* What we are most interested in is pointer
6884 arithmetic on invariants -- only take
6885 patterns we may be able to do something with. */
6886 if (GET_CODE (tem) == PLUS
6887 || GET_CODE (tem) == MULT
6888 || GET_CODE (tem) == ASHIFT
6889 || GET_CODE (tem) == CONST_INT
6890 || GET_CODE (tem) == SYMBOL_REF)
6892 tem = simplify_giv_expr (loop, tem, ext_val,
6893 benefit);
6894 if (tem)
6895 return tem;
6897 else if (GET_CODE (tem) == CONST
6898 && GET_CODE (XEXP (tem, 0)) == PLUS
6899 && GET_CODE (XEXP (XEXP (tem, 0), 0)) == SYMBOL_REF
6900 && GET_CODE (XEXP (XEXP (tem, 0), 1)) == CONST_INT)
6902 tem = simplify_giv_expr (loop, XEXP (tem, 0),
6903 ext_val, benefit);
6904 if (tem)
6905 return tem;
6908 break;
6911 break;
6914 /* Fall through to general case. */
6915 default:
6916 /* If invariant, return as USE (unless CONST_INT).
6917 Otherwise, not giv. */
6918 if (GET_CODE (x) == USE)
6919 x = XEXP (x, 0);
6921 if (loop_invariant_p (loop, x) == 1)
6923 if (GET_CODE (x) == CONST_INT)
6924 return x;
6925 if (GET_CODE (x) == CONST
6926 && GET_CODE (XEXP (x, 0)) == PLUS
6927 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6928 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
6929 x = XEXP (x, 0);
6930 return gen_rtx_USE (mode, x);
6932 else
6933 return 0;
6937 /* This routine folds invariants such that there is only ever one
6938 CONST_INT in the summation. It is only used by simplify_giv_expr. */
6940 static rtx
6941 sge_plus_constant (rtx x, rtx c)
6943 if (GET_CODE (x) == CONST_INT)
6944 return GEN_INT (INTVAL (x) + INTVAL (c));
6945 else if (GET_CODE (x) != PLUS)
6946 return gen_rtx_PLUS (GET_MODE (x), x, c);
6947 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6949 return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
6950 GEN_INT (INTVAL (XEXP (x, 1)) + INTVAL (c)));
6952 else if (GET_CODE (XEXP (x, 0)) == PLUS
6953 || GET_CODE (XEXP (x, 1)) != PLUS)
6955 return gen_rtx_PLUS (GET_MODE (x),
6956 sge_plus_constant (XEXP (x, 0), c), XEXP (x, 1));
6958 else
6960 return gen_rtx_PLUS (GET_MODE (x),
6961 sge_plus_constant (XEXP (x, 1), c), XEXP (x, 0));
6965 static rtx
6966 sge_plus (enum machine_mode mode, rtx x, rtx y)
6968 while (GET_CODE (y) == PLUS)
6970 rtx a = XEXP (y, 0);
6971 if (GET_CODE (a) == CONST_INT)
6972 x = sge_plus_constant (x, a);
6973 else
6974 x = gen_rtx_PLUS (mode, x, a);
6975 y = XEXP (y, 1);
6977 if (GET_CODE (y) == CONST_INT)
6978 x = sge_plus_constant (x, y);
6979 else
6980 x = gen_rtx_PLUS (mode, x, y);
6981 return x;
6984 /* Help detect a giv that is calculated by several consecutive insns;
6985 for example,
6986 giv = biv * M
6987 giv = giv + A
6988 The caller has already identified the first insn P as having a giv as dest;
6989 we check that all other insns that set the same register follow
6990 immediately after P, that they alter nothing else,
6991 and that the result of the last is still a giv.
6993 The value is 0 if the reg set in P is not really a giv.
6994 Otherwise, the value is the amount gained by eliminating
6995 all the consecutive insns that compute the value.
6997 FIRST_BENEFIT is the amount gained by eliminating the first insn, P.
6998 SRC_REG is the reg of the biv; DEST_REG is the reg of the giv.
7000 The coefficients of the ultimate giv value are stored in
7001 *MULT_VAL and *ADD_VAL. */
7003 static int
7004 consec_sets_giv (const struct loop *loop, int first_benefit, rtx p,
7005 rtx src_reg, rtx dest_reg, rtx *add_val, rtx *mult_val,
7006 rtx *ext_val, rtx *last_consec_insn)
7008 struct loop_ivs *ivs = LOOP_IVS (loop);
7009 struct loop_regs *regs = LOOP_REGS (loop);
7010 int count;
7011 enum rtx_code code;
7012 int benefit;
7013 rtx temp;
7014 rtx set;
7016 /* Indicate that this is a giv so that we can update the value produced in
7017 each insn of the multi-insn sequence.
7019 This induction structure will be used only by the call to
7020 general_induction_var below, so we can allocate it on our stack.
7021 If this is a giv, our caller will replace the induct var entry with
7022 a new induction structure. */
7023 struct induction *v;
7025 if (REG_IV_TYPE (ivs, REGNO (dest_reg)) != UNKNOWN_INDUCT)
7026 return 0;
7028 v = alloca (sizeof (struct induction));
7029 v->src_reg = src_reg;
7030 v->mult_val = *mult_val;
7031 v->add_val = *add_val;
7032 v->benefit = first_benefit;
7033 v->cant_derive = 0;
7034 v->derive_adjustment = 0;
7035 v->ext_dependent = NULL_RTX;
7037 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
7038 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
7040 count = regs->array[REGNO (dest_reg)].n_times_set - 1;
7042 while (count > 0)
7044 p = NEXT_INSN (p);
7045 code = GET_CODE (p);
7047 /* If libcall, skip to end of call sequence. */
7048 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
7049 p = XEXP (temp, 0);
7051 if (code == INSN
7052 && (set = single_set (p))
7053 && REG_P (SET_DEST (set))
7054 && SET_DEST (set) == dest_reg
7055 && (general_induction_var (loop, SET_SRC (set), &src_reg,
7056 add_val, mult_val, ext_val, 0,
7057 &benefit, VOIDmode)
7058 /* Giv created by equivalent expression. */
7059 || ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
7060 && general_induction_var (loop, XEXP (temp, 0), &src_reg,
7061 add_val, mult_val, ext_val, 0,
7062 &benefit, VOIDmode)))
7063 && src_reg == v->src_reg)
7065 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
7066 benefit += libcall_benefit (p);
7068 count--;
7069 v->mult_val = *mult_val;
7070 v->add_val = *add_val;
7071 v->benefit += benefit;
7073 else if (code != NOTE)
7075 /* Allow insns that set something other than this giv to a
7076 constant. Such insns are needed on machines which cannot
7077 include long constants and should not disqualify a giv. */
7078 if (code == INSN
7079 && (set = single_set (p))
7080 && SET_DEST (set) != dest_reg
7081 && CONSTANT_P (SET_SRC (set)))
7082 continue;
7084 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7085 return 0;
7089 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7090 *last_consec_insn = p;
7091 return v->benefit;
7094 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7095 represented by G1. If no such expression can be found, or it is clear that
7096 it cannot possibly be a valid address, 0 is returned.
7098 To perform the computation, we note that
7099 G1 = x * v + a and
7100 G2 = y * v + b
7101 where `v' is the biv.
7103 So G2 = (y/b) * G1 + (b - a*y/x).
7105 Note that MULT = y/x.
7107 Update: A and B are now allowed to be additive expressions such that
7108 B contains all variables in A. That is, computing B-A will not require
7109 subtracting variables. */
7111 static rtx
7112 express_from_1 (rtx a, rtx b, rtx mult)
7114 /* If MULT is zero, then A*MULT is zero, and our expression is B. */
7116 if (mult == const0_rtx)
7117 return b;
7119 /* If MULT is not 1, we cannot handle A with non-constants, since we
7120 would then be required to subtract multiples of the registers in A.
7121 This is theoretically possible, and may even apply to some Fortran
7122 constructs, but it is a lot of work and we do not attempt it here. */
7124 if (mult != const1_rtx && GET_CODE (a) != CONST_INT)
7125 return NULL_RTX;
7127 /* In general these structures are sorted top to bottom (down the PLUS
7128 chain), but not left to right across the PLUS. If B is a higher
7129 order giv than A, we can strip one level and recurse. If A is higher
7130 order, we'll eventually bail out, but won't know that until the end.
7131 If they are the same, we'll strip one level around this loop. */
7133 while (GET_CODE (a) == PLUS && GET_CODE (b) == PLUS)
7135 rtx ra, rb, oa, ob, tmp;
7137 ra = XEXP (a, 0), oa = XEXP (a, 1);
7138 if (GET_CODE (ra) == PLUS)
7139 tmp = ra, ra = oa, oa = tmp;
7141 rb = XEXP (b, 0), ob = XEXP (b, 1);
7142 if (GET_CODE (rb) == PLUS)
7143 tmp = rb, rb = ob, ob = tmp;
7145 if (rtx_equal_p (ra, rb))
7146 /* We matched: remove one reg completely. */
7147 a = oa, b = ob;
7148 else if (GET_CODE (ob) != PLUS && rtx_equal_p (ra, ob))
7149 /* An alternate match. */
7150 a = oa, b = rb;
7151 else if (GET_CODE (oa) != PLUS && rtx_equal_p (oa, rb))
7152 /* An alternate match. */
7153 a = ra, b = ob;
7154 else
7156 /* Indicates an extra register in B. Strip one level from B and
7157 recurse, hoping B was the higher order expression. */
7158 ob = express_from_1 (a, ob, mult);
7159 if (ob == NULL_RTX)
7160 return NULL_RTX;
7161 return gen_rtx_PLUS (GET_MODE (b), rb, ob);
7165 /* Here we are at the last level of A, go through the cases hoping to
7166 get rid of everything but a constant. */
7168 if (GET_CODE (a) == PLUS)
7170 rtx ra, oa;
7172 ra = XEXP (a, 0), oa = XEXP (a, 1);
7173 if (rtx_equal_p (oa, b))
7174 oa = ra;
7175 else if (!rtx_equal_p (ra, b))
7176 return NULL_RTX;
7178 if (GET_CODE (oa) != CONST_INT)
7179 return NULL_RTX;
7181 return GEN_INT (-INTVAL (oa) * INTVAL (mult));
7183 else if (GET_CODE (a) == CONST_INT)
7185 return plus_constant (b, -INTVAL (a) * INTVAL (mult));
7187 else if (CONSTANT_P (a))
7189 enum machine_mode mode_a = GET_MODE (a);
7190 enum machine_mode mode_b = GET_MODE (b);
7191 enum machine_mode mode = mode_b == VOIDmode ? mode_a : mode_b;
7192 return simplify_gen_binary (MINUS, mode, b, a);
7194 else if (GET_CODE (b) == PLUS)
7196 if (rtx_equal_p (a, XEXP (b, 0)))
7197 return XEXP (b, 1);
7198 else if (rtx_equal_p (a, XEXP (b, 1)))
7199 return XEXP (b, 0);
7200 else
7201 return NULL_RTX;
7203 else if (rtx_equal_p (a, b))
7204 return const0_rtx;
7206 return NULL_RTX;
7210 express_from (struct induction *g1, struct induction *g2)
7212 rtx mult, add;
7214 /* The value that G1 will be multiplied by must be a constant integer. Also,
7215 the only chance we have of getting a valid address is if b*c/a (see above
7216 for notation) is also an integer. */
7217 if (GET_CODE (g1->mult_val) == CONST_INT
7218 && GET_CODE (g2->mult_val) == CONST_INT)
7220 if (g1->mult_val == const0_rtx
7221 || (g1->mult_val == constm1_rtx
7222 && INTVAL (g2->mult_val)
7223 == (HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1))
7224 || INTVAL (g2->mult_val) % INTVAL (g1->mult_val) != 0)
7225 return NULL_RTX;
7226 mult = GEN_INT (INTVAL (g2->mult_val) / INTVAL (g1->mult_val));
7228 else if (rtx_equal_p (g1->mult_val, g2->mult_val))
7229 mult = const1_rtx;
7230 else
7232 /* ??? Find out if the one is a multiple of the other? */
7233 return NULL_RTX;
7236 add = express_from_1 (g1->add_val, g2->add_val, mult);
7237 if (add == NULL_RTX)
7239 /* Failed. If we've got a multiplication factor between G1 and G2,
7240 scale G1's addend and try again. */
7241 if (INTVAL (mult) > 1)
7243 rtx g1_add_val = g1->add_val;
7244 if (GET_CODE (g1_add_val) == MULT
7245 && GET_CODE (XEXP (g1_add_val, 1)) == CONST_INT)
7247 HOST_WIDE_INT m;
7248 m = INTVAL (mult) * INTVAL (XEXP (g1_add_val, 1));
7249 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val),
7250 XEXP (g1_add_val, 0), GEN_INT (m));
7252 else
7254 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val), g1_add_val,
7255 mult);
7258 add = express_from_1 (g1_add_val, g2->add_val, const1_rtx);
7261 if (add == NULL_RTX)
7262 return NULL_RTX;
7264 /* Form simplified final result. */
7265 if (mult == const0_rtx)
7266 return add;
7267 else if (mult == const1_rtx)
7268 mult = g1->dest_reg;
7269 else
7270 mult = gen_rtx_MULT (g2->mode, g1->dest_reg, mult);
7272 if (add == const0_rtx)
7273 return mult;
7274 else
7276 if (GET_CODE (add) == PLUS
7277 && CONSTANT_P (XEXP (add, 1)))
7279 rtx tem = XEXP (add, 1);
7280 mult = gen_rtx_PLUS (g2->mode, mult, XEXP (add, 0));
7281 add = tem;
7284 return gen_rtx_PLUS (g2->mode, mult, add);
7288 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7289 represented by G1. This indicates that G2 should be combined with G1 and
7290 that G2 can use (either directly or via an address expression) a register
7291 used to represent G1. */
7293 static rtx
7294 combine_givs_p (struct induction *g1, struct induction *g2)
7296 rtx comb, ret;
7298 /* With the introduction of ext dependent givs, we must care for modes.
7299 G2 must not use a wider mode than G1. */
7300 if (GET_MODE_SIZE (g1->mode) < GET_MODE_SIZE (g2->mode))
7301 return NULL_RTX;
7303 ret = comb = express_from (g1, g2);
7304 if (comb == NULL_RTX)
7305 return NULL_RTX;
7306 if (g1->mode != g2->mode)
7307 ret = gen_lowpart (g2->mode, comb);
7309 /* If these givs are identical, they can be combined. We use the results
7310 of express_from because the addends are not in a canonical form, so
7311 rtx_equal_p is a weaker test. */
7312 /* But don't combine a DEST_REG giv with a DEST_ADDR giv; we want the
7313 combination to be the other way round. */
7314 if (comb == g1->dest_reg
7315 && (g1->giv_type == DEST_REG || g2->giv_type == DEST_ADDR))
7317 return ret;
7320 /* If G2 can be expressed as a function of G1 and that function is valid
7321 as an address and no more expensive than using a register for G2,
7322 the expression of G2 in terms of G1 can be used. */
7323 if (ret != NULL_RTX
7324 && g2->giv_type == DEST_ADDR
7325 && memory_address_p (GET_MODE (g2->mem), ret))
7326 return ret;
7328 return NULL_RTX;
7331 /* Check each extension dependent giv in this class to see if its
7332 root biv is safe from wrapping in the interior mode, which would
7333 make the giv illegal. */
7335 static void
7336 check_ext_dependent_givs (const struct loop *loop, struct iv_class *bl)
7338 struct loop_info *loop_info = LOOP_INFO (loop);
7339 int ze_ok = 0, se_ok = 0, info_ok = 0;
7340 enum machine_mode biv_mode = GET_MODE (bl->biv->src_reg);
7341 HOST_WIDE_INT start_val;
7342 unsigned HOST_WIDE_INT u_end_val = 0;
7343 unsigned HOST_WIDE_INT u_start_val = 0;
7344 rtx incr = pc_rtx;
7345 struct induction *v;
7347 /* Make sure the iteration data is available. We must have
7348 constants in order to be certain of no overflow. */
7349 if (loop_info->n_iterations > 0
7350 && bl->initial_value
7351 && GET_CODE (bl->initial_value) == CONST_INT
7352 && (incr = biv_total_increment (bl))
7353 && GET_CODE (incr) == CONST_INT
7354 /* Make sure the host can represent the arithmetic. */
7355 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (biv_mode))
7357 unsigned HOST_WIDE_INT abs_incr, total_incr;
7358 HOST_WIDE_INT s_end_val;
7359 int neg_incr;
7361 info_ok = 1;
7362 start_val = INTVAL (bl->initial_value);
7363 u_start_val = start_val;
7365 neg_incr = 0, abs_incr = INTVAL (incr);
7366 if (INTVAL (incr) < 0)
7367 neg_incr = 1, abs_incr = -abs_incr;
7368 total_incr = abs_incr * loop_info->n_iterations;
7370 /* Check for host arithmetic overflow. */
7371 if (total_incr / loop_info->n_iterations == abs_incr)
7373 unsigned HOST_WIDE_INT u_max;
7374 HOST_WIDE_INT s_max;
7376 u_end_val = start_val + (neg_incr ? -total_incr : total_incr);
7377 s_end_val = u_end_val;
7378 u_max = GET_MODE_MASK (biv_mode);
7379 s_max = u_max >> 1;
7381 /* Check zero extension of biv ok. */
7382 if (start_val >= 0
7383 /* Check for host arithmetic overflow. */
7384 && (neg_incr
7385 ? u_end_val < u_start_val
7386 : u_end_val > u_start_val)
7387 /* Check for target arithmetic overflow. */
7388 && (neg_incr
7389 ? 1 /* taken care of with host overflow */
7390 : u_end_val <= u_max))
7392 ze_ok = 1;
7395 /* Check sign extension of biv ok. */
7396 /* ??? While it is true that overflow with signed and pointer
7397 arithmetic is undefined, I fear too many programmers don't
7398 keep this fact in mind -- myself included on occasion.
7399 So leave alone with the signed overflow optimizations. */
7400 if (start_val >= -s_max - 1
7401 /* Check for host arithmetic overflow. */
7402 && (neg_incr
7403 ? s_end_val < start_val
7404 : s_end_val > start_val)
7405 /* Check for target arithmetic overflow. */
7406 && (neg_incr
7407 ? s_end_val >= -s_max - 1
7408 : s_end_val <= s_max))
7410 se_ok = 1;
7415 /* If we know the BIV is compared at run-time against an
7416 invariant value, and the increment is +/- 1, we may also
7417 be able to prove that the BIV cannot overflow. */
7418 else if (bl->biv->src_reg == loop_info->iteration_var
7419 && loop_info->comparison_value
7420 && loop_invariant_p (loop, loop_info->comparison_value)
7421 && (incr = biv_total_increment (bl))
7422 && GET_CODE (incr) == CONST_INT)
7424 /* If the increment is +1, and the exit test is a <,
7425 the BIV cannot overflow. (For <=, we have the
7426 problematic case that the comparison value might
7427 be the maximum value of the range.) */
7428 if (INTVAL (incr) == 1)
7430 if (loop_info->comparison_code == LT)
7431 se_ok = ze_ok = 1;
7432 else if (loop_info->comparison_code == LTU)
7433 ze_ok = 1;
7436 /* Likewise for increment -1 and exit test >. */
7437 if (INTVAL (incr) == -1)
7439 if (loop_info->comparison_code == GT)
7440 se_ok = ze_ok = 1;
7441 else if (loop_info->comparison_code == GTU)
7442 ze_ok = 1;
7446 /* Invalidate givs that fail the tests. */
7447 for (v = bl->giv; v; v = v->next_iv)
7448 if (v->ext_dependent)
7450 enum rtx_code code = GET_CODE (v->ext_dependent);
7451 int ok = 0;
7453 switch (code)
7455 case SIGN_EXTEND:
7456 ok = se_ok;
7457 break;
7458 case ZERO_EXTEND:
7459 ok = ze_ok;
7460 break;
7462 case TRUNCATE:
7463 /* We don't know whether this value is being used as either
7464 signed or unsigned, so to safely truncate we must satisfy
7465 both. The initial check here verifies the BIV itself;
7466 once that is successful we may check its range wrt the
7467 derived GIV. This works only if we were able to determine
7468 constant start and end values above. */
7469 if (se_ok && ze_ok && info_ok)
7471 enum machine_mode outer_mode = GET_MODE (v->ext_dependent);
7472 unsigned HOST_WIDE_INT max = GET_MODE_MASK (outer_mode) >> 1;
7474 /* We know from the above that both endpoints are nonnegative,
7475 and that there is no wrapping. Verify that both endpoints
7476 are within the (signed) range of the outer mode. */
7477 if (u_start_val <= max && u_end_val <= max)
7478 ok = 1;
7480 break;
7482 default:
7483 abort ();
7486 if (ok)
7488 if (loop_dump_stream)
7490 fprintf (loop_dump_stream,
7491 "Verified ext dependent giv at %d of reg %d\n",
7492 INSN_UID (v->insn), bl->regno);
7495 else
7497 if (loop_dump_stream)
7499 const char *why;
7501 if (info_ok)
7502 why = "biv iteration values overflowed";
7503 else
7505 if (incr == pc_rtx)
7506 incr = biv_total_increment (bl);
7507 if (incr == const1_rtx)
7508 why = "biv iteration info incomplete; incr by 1";
7509 else
7510 why = "biv iteration info incomplete";
7513 fprintf (loop_dump_stream,
7514 "Failed ext dependent giv at %d, %s\n",
7515 INSN_UID (v->insn), why);
7517 v->ignore = 1;
7518 bl->all_reduced = 0;
7523 /* Generate a version of VALUE in a mode appropriate for initializing V. */
7526 extend_value_for_giv (struct induction *v, rtx value)
7528 rtx ext_dep = v->ext_dependent;
7530 if (! ext_dep)
7531 return value;
7533 /* Recall that check_ext_dependent_givs verified that the known bounds
7534 of a biv did not overflow or wrap with respect to the extension for
7535 the giv. Therefore, constants need no additional adjustment. */
7536 if (CONSTANT_P (value) && GET_MODE (value) == VOIDmode)
7537 return value;
7539 /* Otherwise, we must adjust the value to compensate for the
7540 differing modes of the biv and the giv. */
7541 return gen_rtx_fmt_e (GET_CODE (ext_dep), GET_MODE (ext_dep), value);
7544 struct combine_givs_stats
7546 int giv_number;
7547 int total_benefit;
7550 static int
7551 cmp_combine_givs_stats (const void *xp, const void *yp)
7553 const struct combine_givs_stats * const x =
7554 (const struct combine_givs_stats *) xp;
7555 const struct combine_givs_stats * const y =
7556 (const struct combine_givs_stats *) yp;
7557 int d;
7558 d = y->total_benefit - x->total_benefit;
7559 /* Stabilize the sort. */
7560 if (!d)
7561 d = x->giv_number - y->giv_number;
7562 return d;
7565 /* Check all pairs of givs for iv_class BL and see if any can be combined with
7566 any other. If so, point SAME to the giv combined with and set NEW_REG to
7567 be an expression (in terms of the other giv's DEST_REG) equivalent to the
7568 giv. Also, update BENEFIT and related fields for cost/benefit analysis. */
7570 static void
7571 combine_givs (struct loop_regs *regs, struct iv_class *bl)
7573 /* Additional benefit to add for being combined multiple times. */
7574 const int extra_benefit = 3;
7576 struct induction *g1, *g2, **giv_array;
7577 int i, j, k, giv_count;
7578 struct combine_givs_stats *stats;
7579 rtx *can_combine;
7581 /* Count givs, because bl->giv_count is incorrect here. */
7582 giv_count = 0;
7583 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7584 if (!g1->ignore)
7585 giv_count++;
7587 giv_array = alloca (giv_count * sizeof (struct induction *));
7588 i = 0;
7589 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7590 if (!g1->ignore)
7591 giv_array[i++] = g1;
7593 stats = xcalloc (giv_count, sizeof (*stats));
7594 can_combine = xcalloc (giv_count, giv_count * sizeof (rtx));
7596 for (i = 0; i < giv_count; i++)
7598 int this_benefit;
7599 rtx single_use;
7601 g1 = giv_array[i];
7602 stats[i].giv_number = i;
7604 /* If a DEST_REG GIV is used only once, do not allow it to combine
7605 with anything, for in doing so we will gain nothing that cannot
7606 be had by simply letting the GIV with which we would have combined
7607 to be reduced on its own. The losage shows up in particular with
7608 DEST_ADDR targets on hosts with reg+reg addressing, though it can
7609 be seen elsewhere as well. */
7610 if (g1->giv_type == DEST_REG
7611 && (single_use = regs->array[REGNO (g1->dest_reg)].single_usage)
7612 && single_use != const0_rtx)
7613 continue;
7615 this_benefit = g1->benefit;
7616 /* Add an additional weight for zero addends. */
7617 if (g1->no_const_addval)
7618 this_benefit += 1;
7620 for (j = 0; j < giv_count; j++)
7622 rtx this_combine;
7624 g2 = giv_array[j];
7625 if (g1 != g2
7626 && (this_combine = combine_givs_p (g1, g2)) != NULL_RTX)
7628 can_combine[i * giv_count + j] = this_combine;
7629 this_benefit += g2->benefit + extra_benefit;
7632 stats[i].total_benefit = this_benefit;
7635 /* Iterate, combining until we can't. */
7636 restart:
7637 qsort (stats, giv_count, sizeof (*stats), cmp_combine_givs_stats);
7639 if (loop_dump_stream)
7641 fprintf (loop_dump_stream, "Sorted combine statistics:\n");
7642 for (k = 0; k < giv_count; k++)
7644 g1 = giv_array[stats[k].giv_number];
7645 if (!g1->combined_with && !g1->same)
7646 fprintf (loop_dump_stream, " {%d, %d}",
7647 INSN_UID (giv_array[stats[k].giv_number]->insn),
7648 stats[k].total_benefit);
7650 putc ('\n', loop_dump_stream);
7653 for (k = 0; k < giv_count; k++)
7655 int g1_add_benefit = 0;
7657 i = stats[k].giv_number;
7658 g1 = giv_array[i];
7660 /* If it has already been combined, skip. */
7661 if (g1->combined_with || g1->same)
7662 continue;
7664 for (j = 0; j < giv_count; j++)
7666 g2 = giv_array[j];
7667 if (g1 != g2 && can_combine[i * giv_count + j]
7668 /* If it has already been combined, skip. */
7669 && ! g2->same && ! g2->combined_with)
7671 int l;
7673 g2->new_reg = can_combine[i * giv_count + j];
7674 g2->same = g1;
7675 /* For destination, we now may replace by mem expression instead
7676 of register. This changes the costs considerably, so add the
7677 compensation. */
7678 if (g2->giv_type == DEST_ADDR)
7679 g2->benefit = (g2->benefit + reg_address_cost
7680 - address_cost (g2->new_reg,
7681 GET_MODE (g2->mem)));
7682 g1->combined_with++;
7683 g1->lifetime += g2->lifetime;
7685 g1_add_benefit += g2->benefit;
7687 /* ??? The new final_[bg]iv_value code does a much better job
7688 of finding replaceable giv's, and hence this code may no
7689 longer be necessary. */
7690 if (! g2->replaceable && REG_USERVAR_P (g2->dest_reg))
7691 g1_add_benefit -= copy_cost;
7693 /* To help optimize the next set of combinations, remove
7694 this giv from the benefits of other potential mates. */
7695 for (l = 0; l < giv_count; ++l)
7697 int m = stats[l].giv_number;
7698 if (can_combine[m * giv_count + j])
7699 stats[l].total_benefit -= g2->benefit + extra_benefit;
7702 if (loop_dump_stream)
7703 fprintf (loop_dump_stream,
7704 "giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n",
7705 INSN_UID (g2->insn), INSN_UID (g1->insn),
7706 g1->benefit, g1_add_benefit, g1->lifetime);
7710 /* To help optimize the next set of combinations, remove
7711 this giv from the benefits of other potential mates. */
7712 if (g1->combined_with)
7714 for (j = 0; j < giv_count; ++j)
7716 int m = stats[j].giv_number;
7717 if (can_combine[m * giv_count + i])
7718 stats[j].total_benefit -= g1->benefit + extra_benefit;
7721 g1->benefit += g1_add_benefit;
7723 /* We've finished with this giv, and everything it touched.
7724 Restart the combination so that proper weights for the
7725 rest of the givs are properly taken into account. */
7726 /* ??? Ideally we would compact the arrays at this point, so
7727 as to not cover old ground. But sanely compacting
7728 can_combine is tricky. */
7729 goto restart;
7733 /* Clean up. */
7734 free (stats);
7735 free (can_combine);
7738 /* Generate sequence for REG = B * M + A. B is the initial value of
7739 the basic induction variable, M a multiplicative constant, A an
7740 additive constant and REG the destination register. */
7742 static rtx
7743 gen_add_mult (rtx b, rtx m, rtx a, rtx reg)
7745 rtx seq;
7746 rtx result;
7748 start_sequence ();
7749 /* Use unsigned arithmetic. */
7750 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7751 if (reg != result)
7752 emit_move_insn (reg, result);
7753 seq = get_insns ();
7754 end_sequence ();
7756 return seq;
7760 /* Update registers created in insn sequence SEQ. */
7762 static void
7763 loop_regs_update (const struct loop *loop ATTRIBUTE_UNUSED, rtx seq)
7765 rtx insn;
7767 /* Update register info for alias analysis. */
7769 insn = seq;
7770 while (insn != NULL_RTX)
7772 rtx set = single_set (insn);
7774 if (set && REG_P (SET_DEST (set)))
7775 record_base_value (REGNO (SET_DEST (set)), SET_SRC (set), 0);
7777 insn = NEXT_INSN (insn);
7782 /* EMIT code before BEFORE_BB/BEFORE_INSN to set REG = B * M + A. B
7783 is the initial value of the basic induction variable, M a
7784 multiplicative constant, A an additive constant and REG the
7785 destination register. */
7787 void
7788 loop_iv_add_mult_emit_before (const struct loop *loop, rtx b, rtx m, rtx a,
7789 rtx reg, basic_block before_bb, rtx before_insn)
7791 rtx seq;
7793 if (! before_insn)
7795 loop_iv_add_mult_hoist (loop, b, m, a, reg);
7796 return;
7799 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7800 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7802 /* Increase the lifetime of any invariants moved further in code. */
7803 update_reg_last_use (a, before_insn);
7804 update_reg_last_use (b, before_insn);
7805 update_reg_last_use (m, before_insn);
7807 /* It is possible that the expansion created lots of new registers.
7808 Iterate over the sequence we just created and record them all. We
7809 must do this before inserting the sequence. */
7810 loop_regs_update (loop, seq);
7812 loop_insn_emit_before (loop, before_bb, before_insn, seq);
7816 /* Emit insns in loop pre-header to set REG = B * M + A. B is the
7817 initial value of the basic induction variable, M a multiplicative
7818 constant, A an additive constant and REG the destination
7819 register. */
7821 void
7822 loop_iv_add_mult_sink (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
7824 rtx seq;
7826 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7827 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7829 /* Increase the lifetime of any invariants moved further in code.
7830 ???? Is this really necessary? */
7831 update_reg_last_use (a, loop->sink);
7832 update_reg_last_use (b, loop->sink);
7833 update_reg_last_use (m, loop->sink);
7835 /* It is possible that the expansion created lots of new registers.
7836 Iterate over the sequence we just created and record them all. We
7837 must do this before inserting the sequence. */
7838 loop_regs_update (loop, seq);
7840 loop_insn_sink (loop, seq);
7844 /* Emit insns after loop to set REG = B * M + A. B is the initial
7845 value of the basic induction variable, M a multiplicative constant,
7846 A an additive constant and REG the destination register. */
7848 void
7849 loop_iv_add_mult_hoist (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
7851 rtx seq;
7853 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7854 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7856 /* It is possible that the expansion created lots of new registers.
7857 Iterate over the sequence we just created and record them all. We
7858 must do this before inserting the sequence. */
7859 loop_regs_update (loop, seq);
7861 loop_insn_hoist (loop, seq);
7866 /* Similar to gen_add_mult, but compute cost rather than generating
7867 sequence. */
7869 static int
7870 iv_add_mult_cost (rtx b, rtx m, rtx a, rtx reg)
7872 int cost = 0;
7873 rtx last, result;
7875 start_sequence ();
7876 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7877 if (reg != result)
7878 emit_move_insn (reg, result);
7879 last = get_last_insn ();
7880 while (last)
7882 rtx t = single_set (last);
7883 if (t)
7884 cost += rtx_cost (SET_SRC (t), SET);
7885 last = PREV_INSN (last);
7887 end_sequence ();
7888 return cost;
7891 /* Test whether A * B can be computed without
7892 an actual multiply insn. Value is 1 if so.
7894 ??? This function stinks because it generates a ton of wasted RTL
7895 ??? and as a result fragments GC memory to no end. There are other
7896 ??? places in the compiler which are invoked a lot and do the same
7897 ??? thing, generate wasted RTL just to see if something is possible. */
7899 static int
7900 product_cheap_p (rtx a, rtx b)
7902 rtx tmp;
7903 int win, n_insns;
7905 /* If only one is constant, make it B. */
7906 if (GET_CODE (a) == CONST_INT)
7907 tmp = a, a = b, b = tmp;
7909 /* If first constant, both constant, so don't need multiply. */
7910 if (GET_CODE (a) == CONST_INT)
7911 return 1;
7913 /* If second not constant, neither is constant, so would need multiply. */
7914 if (GET_CODE (b) != CONST_INT)
7915 return 0;
7917 /* One operand is constant, so might not need multiply insn. Generate the
7918 code for the multiply and see if a call or multiply, or long sequence
7919 of insns is generated. */
7921 start_sequence ();
7922 expand_mult (GET_MODE (a), a, b, NULL_RTX, 1);
7923 tmp = get_insns ();
7924 end_sequence ();
7926 win = 1;
7927 if (INSN_P (tmp))
7929 n_insns = 0;
7930 while (tmp != NULL_RTX)
7932 rtx next = NEXT_INSN (tmp);
7934 if (++n_insns > 3
7935 || GET_CODE (tmp) != INSN
7936 || (GET_CODE (PATTERN (tmp)) == SET
7937 && GET_CODE (SET_SRC (PATTERN (tmp))) == MULT)
7938 || (GET_CODE (PATTERN (tmp)) == PARALLEL
7939 && GET_CODE (XVECEXP (PATTERN (tmp), 0, 0)) == SET
7940 && GET_CODE (SET_SRC (XVECEXP (PATTERN (tmp), 0, 0))) == MULT))
7942 win = 0;
7943 break;
7946 tmp = next;
7949 else if (GET_CODE (tmp) == SET
7950 && GET_CODE (SET_SRC (tmp)) == MULT)
7951 win = 0;
7952 else if (GET_CODE (tmp) == PARALLEL
7953 && GET_CODE (XVECEXP (tmp, 0, 0)) == SET
7954 && GET_CODE (SET_SRC (XVECEXP (tmp, 0, 0))) == MULT)
7955 win = 0;
7957 return win;
7960 /* Check to see if loop can be terminated by a "decrement and branch until
7961 zero" instruction. If so, add a REG_NONNEG note to the branch insn if so.
7962 Also try reversing an increment loop to a decrement loop
7963 to see if the optimization can be performed.
7964 Value is nonzero if optimization was performed. */
7966 /* This is useful even if the architecture doesn't have such an insn,
7967 because it might change a loops which increments from 0 to n to a loop
7968 which decrements from n to 0. A loop that decrements to zero is usually
7969 faster than one that increments from zero. */
7971 /* ??? This could be rewritten to use some of the loop unrolling procedures,
7972 such as approx_final_value, biv_total_increment, loop_iterations, and
7973 final_[bg]iv_value. */
7975 static int
7976 check_dbra_loop (struct loop *loop, int insn_count)
7978 struct loop_info *loop_info = LOOP_INFO (loop);
7979 struct loop_regs *regs = LOOP_REGS (loop);
7980 struct loop_ivs *ivs = LOOP_IVS (loop);
7981 struct iv_class *bl;
7982 rtx reg;
7983 enum machine_mode mode;
7984 rtx jump_label;
7985 rtx final_value;
7986 rtx start_value;
7987 rtx new_add_val;
7988 rtx comparison;
7989 rtx before_comparison;
7990 rtx p;
7991 rtx jump;
7992 rtx first_compare;
7993 int compare_and_branch;
7994 rtx loop_start = loop->start;
7995 rtx loop_end = loop->end;
7997 /* If last insn is a conditional branch, and the insn before tests a
7998 register value, try to optimize it. Otherwise, we can't do anything. */
8000 jump = PREV_INSN (loop_end);
8001 comparison = get_condition_for_loop (loop, jump);
8002 if (comparison == 0)
8003 return 0;
8004 if (!onlyjump_p (jump))
8005 return 0;
8007 /* Try to compute whether the compare/branch at the loop end is one or
8008 two instructions. */
8009 get_condition (jump, &first_compare, false);
8010 if (first_compare == jump)
8011 compare_and_branch = 1;
8012 else if (first_compare == prev_nonnote_insn (jump))
8013 compare_and_branch = 2;
8014 else
8015 return 0;
8018 /* If more than one condition is present to control the loop, then
8019 do not proceed, as this function does not know how to rewrite
8020 loop tests with more than one condition.
8022 Look backwards from the first insn in the last comparison
8023 sequence and see if we've got another comparison sequence. */
8025 rtx jump1;
8026 if ((jump1 = prev_nonnote_insn (first_compare)) != loop->cont)
8027 if (GET_CODE (jump1) == JUMP_INSN)
8028 return 0;
8031 /* Check all of the bivs to see if the compare uses one of them.
8032 Skip biv's set more than once because we can't guarantee that
8033 it will be zero on the last iteration. Also skip if the biv is
8034 used between its update and the test insn. */
8036 for (bl = ivs->list; bl; bl = bl->next)
8038 if (bl->biv_count == 1
8039 && ! bl->biv->maybe_multiple
8040 && bl->biv->dest_reg == XEXP (comparison, 0)
8041 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
8042 first_compare))
8043 break;
8046 /* Try swapping the comparison to identify a suitable biv. */
8047 if (!bl)
8048 for (bl = ivs->list; bl; bl = bl->next)
8049 if (bl->biv_count == 1
8050 && ! bl->biv->maybe_multiple
8051 && bl->biv->dest_reg == XEXP (comparison, 1)
8052 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
8053 first_compare))
8055 comparison = gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)),
8056 VOIDmode,
8057 XEXP (comparison, 1),
8058 XEXP (comparison, 0));
8059 break;
8062 if (! bl)
8063 return 0;
8065 /* Look for the case where the basic induction variable is always
8066 nonnegative, and equals zero on the last iteration.
8067 In this case, add a reg_note REG_NONNEG, which allows the
8068 m68k DBRA instruction to be used. */
8070 if (((GET_CODE (comparison) == GT && XEXP (comparison, 1) == constm1_rtx)
8071 || (GET_CODE (comparison) == NE && XEXP (comparison, 1) == const0_rtx))
8072 && GET_CODE (bl->biv->add_val) == CONST_INT
8073 && INTVAL (bl->biv->add_val) < 0)
8075 /* Initial value must be greater than 0,
8076 init_val % -dec_value == 0 to ensure that it equals zero on
8077 the last iteration */
8079 if (GET_CODE (bl->initial_value) == CONST_INT
8080 && INTVAL (bl->initial_value) > 0
8081 && (INTVAL (bl->initial_value)
8082 % (-INTVAL (bl->biv->add_val))) == 0)
8084 /* Register always nonnegative, add REG_NOTE to branch. */
8085 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8086 REG_NOTES (jump)
8087 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8088 REG_NOTES (jump));
8089 bl->nonneg = 1;
8091 return 1;
8094 /* If the decrement is 1 and the value was tested as >= 0 before
8095 the loop, then we can safely optimize. */
8096 for (p = loop_start; p; p = PREV_INSN (p))
8098 if (GET_CODE (p) == CODE_LABEL)
8099 break;
8100 if (GET_CODE (p) != JUMP_INSN)
8101 continue;
8103 before_comparison = get_condition_for_loop (loop, p);
8104 if (before_comparison
8105 && XEXP (before_comparison, 0) == bl->biv->dest_reg
8106 && (GET_CODE (before_comparison) == LT
8107 || GET_CODE (before_comparison) == LTU)
8108 && XEXP (before_comparison, 1) == const0_rtx
8109 && ! reg_set_between_p (bl->biv->dest_reg, p, loop_start)
8110 && INTVAL (bl->biv->add_val) == -1)
8112 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8113 REG_NOTES (jump)
8114 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8115 REG_NOTES (jump));
8116 bl->nonneg = 1;
8118 return 1;
8122 else if (GET_CODE (bl->biv->add_val) == CONST_INT
8123 && INTVAL (bl->biv->add_val) > 0)
8125 /* Try to change inc to dec, so can apply above optimization. */
8126 /* Can do this if:
8127 all registers modified are induction variables or invariant,
8128 all memory references have non-overlapping addresses
8129 (obviously true if only one write)
8130 allow 2 insns for the compare/jump at the end of the loop. */
8131 /* Also, we must avoid any instructions which use both the reversed
8132 biv and another biv. Such instructions will fail if the loop is
8133 reversed. We meet this condition by requiring that either
8134 no_use_except_counting is true, or else that there is only
8135 one biv. */
8136 int num_nonfixed_reads = 0;
8137 /* 1 if the iteration var is used only to count iterations. */
8138 int no_use_except_counting = 0;
8139 /* 1 if the loop has no memory store, or it has a single memory store
8140 which is reversible. */
8141 int reversible_mem_store = 1;
8143 if (bl->giv_count == 0
8144 && !loop->exit_count
8145 && !loop_info->has_multiple_exit_targets)
8147 rtx bivreg = regno_reg_rtx[bl->regno];
8148 struct iv_class *blt;
8150 /* If there are no givs for this biv, and the only exit is the
8151 fall through at the end of the loop, then
8152 see if perhaps there are no uses except to count. */
8153 no_use_except_counting = 1;
8154 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8155 if (INSN_P (p))
8157 rtx set = single_set (p);
8159 if (set && REG_P (SET_DEST (set))
8160 && REGNO (SET_DEST (set)) == bl->regno)
8161 /* An insn that sets the biv is okay. */
8163 else if (!reg_mentioned_p (bivreg, PATTERN (p)))
8164 /* An insn that doesn't mention the biv is okay. */
8166 else if (p == prev_nonnote_insn (prev_nonnote_insn (loop_end))
8167 || p == prev_nonnote_insn (loop_end))
8169 /* If either of these insns uses the biv and sets a pseudo
8170 that has more than one usage, then the biv has uses
8171 other than counting since it's used to derive a value
8172 that is used more than one time. */
8173 note_stores (PATTERN (p), note_set_pseudo_multiple_uses,
8174 regs);
8175 if (regs->multiple_uses)
8177 no_use_except_counting = 0;
8178 break;
8181 else
8183 no_use_except_counting = 0;
8184 break;
8188 /* A biv has uses besides counting if it is used to set
8189 another biv. */
8190 for (blt = ivs->list; blt; blt = blt->next)
8191 if (blt->init_set
8192 && reg_mentioned_p (bivreg, SET_SRC (blt->init_set)))
8194 no_use_except_counting = 0;
8195 break;
8199 if (no_use_except_counting)
8200 /* No need to worry about MEMs. */
8202 else if (loop_info->num_mem_sets <= 1)
8204 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8205 if (INSN_P (p))
8206 num_nonfixed_reads += count_nonfixed_reads (loop, PATTERN (p));
8208 /* If the loop has a single store, and the destination address is
8209 invariant, then we can't reverse the loop, because this address
8210 might then have the wrong value at loop exit.
8211 This would work if the source was invariant also, however, in that
8212 case, the insn should have been moved out of the loop. */
8214 if (loop_info->num_mem_sets == 1)
8216 struct induction *v;
8218 /* If we could prove that each of the memory locations
8219 written to was different, then we could reverse the
8220 store -- but we don't presently have any way of
8221 knowing that. */
8222 reversible_mem_store = 0;
8224 /* If the store depends on a register that is set after the
8225 store, it depends on the initial value, and is thus not
8226 reversible. */
8227 for (v = bl->giv; reversible_mem_store && v; v = v->next_iv)
8229 if (v->giv_type == DEST_REG
8230 && reg_mentioned_p (v->dest_reg,
8231 PATTERN (loop_info->first_loop_store_insn))
8232 && loop_insn_first_p (loop_info->first_loop_store_insn,
8233 v->insn))
8234 reversible_mem_store = 0;
8238 else
8239 return 0;
8241 /* This code only acts for innermost loops. Also it simplifies
8242 the memory address check by only reversing loops with
8243 zero or one memory access.
8244 Two memory accesses could involve parts of the same array,
8245 and that can't be reversed.
8246 If the biv is used only for counting, than we don't need to worry
8247 about all these things. */
8249 if ((num_nonfixed_reads <= 1
8250 && ! loop_info->has_nonconst_call
8251 && ! loop_info->has_prefetch
8252 && ! loop_info->has_volatile
8253 && reversible_mem_store
8254 && (bl->giv_count + bl->biv_count + loop_info->num_mem_sets
8255 + num_unmoved_movables (loop) + compare_and_branch == insn_count)
8256 && (bl == ivs->list && bl->next == 0))
8257 || (no_use_except_counting && ! loop_info->has_prefetch))
8259 rtx tem;
8261 /* Loop can be reversed. */
8262 if (loop_dump_stream)
8263 fprintf (loop_dump_stream, "Can reverse loop\n");
8265 /* Now check other conditions:
8267 The increment must be a constant, as must the initial value,
8268 and the comparison code must be LT.
8270 This test can probably be improved since +/- 1 in the constant
8271 can be obtained by changing LT to LE and vice versa; this is
8272 confusing. */
8274 if (comparison
8275 /* for constants, LE gets turned into LT */
8276 && (GET_CODE (comparison) == LT
8277 || (GET_CODE (comparison) == LE
8278 && no_use_except_counting)
8279 || GET_CODE (comparison) == LTU))
8281 HOST_WIDE_INT add_val, add_adjust, comparison_val = 0;
8282 rtx initial_value, comparison_value;
8283 int nonneg = 0;
8284 enum rtx_code cmp_code;
8285 int comparison_const_width;
8286 unsigned HOST_WIDE_INT comparison_sign_mask;
8287 bool keep_first_compare;
8289 add_val = INTVAL (bl->biv->add_val);
8290 comparison_value = XEXP (comparison, 1);
8291 if (GET_MODE (comparison_value) == VOIDmode)
8292 comparison_const_width
8293 = GET_MODE_BITSIZE (GET_MODE (XEXP (comparison, 0)));
8294 else
8295 comparison_const_width
8296 = GET_MODE_BITSIZE (GET_MODE (comparison_value));
8297 if (comparison_const_width > HOST_BITS_PER_WIDE_INT)
8298 comparison_const_width = HOST_BITS_PER_WIDE_INT;
8299 comparison_sign_mask
8300 = (unsigned HOST_WIDE_INT) 1 << (comparison_const_width - 1);
8302 /* If the comparison value is not a loop invariant, then we
8303 can not reverse this loop.
8305 ??? If the insns which initialize the comparison value as
8306 a whole compute an invariant result, then we could move
8307 them out of the loop and proceed with loop reversal. */
8308 if (! loop_invariant_p (loop, comparison_value))
8309 return 0;
8311 if (GET_CODE (comparison_value) == CONST_INT)
8312 comparison_val = INTVAL (comparison_value);
8313 initial_value = bl->initial_value;
8315 /* Normalize the initial value if it is an integer and
8316 has no other use except as a counter. This will allow
8317 a few more loops to be reversed. */
8318 if (no_use_except_counting
8319 && GET_CODE (comparison_value) == CONST_INT
8320 && GET_CODE (initial_value) == CONST_INT)
8322 comparison_val = comparison_val - INTVAL (bl->initial_value);
8323 /* The code below requires comparison_val to be a multiple
8324 of add_val in order to do the loop reversal, so
8325 round up comparison_val to a multiple of add_val.
8326 Since comparison_value is constant, we know that the
8327 current comparison code is LT. */
8328 comparison_val = comparison_val + add_val - 1;
8329 comparison_val
8330 -= (unsigned HOST_WIDE_INT) comparison_val % add_val;
8331 /* We postpone overflow checks for COMPARISON_VAL here;
8332 even if there is an overflow, we might still be able to
8333 reverse the loop, if converting the loop exit test to
8334 NE is possible. */
8335 initial_value = const0_rtx;
8338 /* First check if we can do a vanilla loop reversal. */
8339 if (initial_value == const0_rtx
8340 /* If we have a decrement_and_branch_on_count,
8341 prefer the NE test, since this will allow that
8342 instruction to be generated. Note that we must
8343 use a vanilla loop reversal if the biv is used to
8344 calculate a giv or has a non-counting use. */
8345 #if ! defined (HAVE_decrement_and_branch_until_zero) \
8346 && defined (HAVE_decrement_and_branch_on_count)
8347 && (! (add_val == 1 && loop->vtop
8348 && (bl->biv_count == 0
8349 || no_use_except_counting)))
8350 #endif
8351 && GET_CODE (comparison_value) == CONST_INT
8352 /* Now do postponed overflow checks on COMPARISON_VAL. */
8353 && ! (((comparison_val - add_val) ^ INTVAL (comparison_value))
8354 & comparison_sign_mask))
8356 /* Register will always be nonnegative, with value
8357 0 on last iteration */
8358 add_adjust = add_val;
8359 nonneg = 1;
8360 cmp_code = GE;
8362 else if (add_val == 1 && loop->vtop
8363 && (bl->biv_count == 0
8364 || no_use_except_counting))
8366 add_adjust = 0;
8367 cmp_code = NE;
8369 else
8370 return 0;
8372 if (GET_CODE (comparison) == LE)
8373 add_adjust -= add_val;
8375 /* If the initial value is not zero, or if the comparison
8376 value is not an exact multiple of the increment, then we
8377 can not reverse this loop. */
8378 if (initial_value == const0_rtx
8379 && GET_CODE (comparison_value) == CONST_INT)
8381 if (((unsigned HOST_WIDE_INT) comparison_val % add_val) != 0)
8382 return 0;
8384 else
8386 if (! no_use_except_counting || add_val != 1)
8387 return 0;
8390 final_value = comparison_value;
8392 /* Reset these in case we normalized the initial value
8393 and comparison value above. */
8394 if (GET_CODE (comparison_value) == CONST_INT
8395 && GET_CODE (initial_value) == CONST_INT)
8397 comparison_value = GEN_INT (comparison_val);
8398 final_value
8399 = GEN_INT (comparison_val + INTVAL (bl->initial_value));
8401 bl->initial_value = initial_value;
8403 /* Save some info needed to produce the new insns. */
8404 reg = bl->biv->dest_reg;
8405 mode = GET_MODE (reg);
8406 jump_label = condjump_label (PREV_INSN (loop_end));
8407 new_add_val = GEN_INT (-INTVAL (bl->biv->add_val));
8409 /* Set start_value; if this is not a CONST_INT, we need
8410 to generate a SUB.
8411 Initialize biv to start_value before loop start.
8412 The old initializing insn will be deleted as a
8413 dead store by flow.c. */
8414 if (initial_value == const0_rtx
8415 && GET_CODE (comparison_value) == CONST_INT)
8417 start_value
8418 = gen_int_mode (comparison_val - add_adjust, mode);
8419 loop_insn_hoist (loop, gen_move_insn (reg, start_value));
8421 else if (GET_CODE (initial_value) == CONST_INT)
8423 rtx offset = GEN_INT (-INTVAL (initial_value) - add_adjust);
8424 rtx add_insn = gen_add3_insn (reg, comparison_value, offset);
8426 if (add_insn == 0)
8427 return 0;
8429 start_value
8430 = gen_rtx_PLUS (mode, comparison_value, offset);
8431 loop_insn_hoist (loop, add_insn);
8432 if (GET_CODE (comparison) == LE)
8433 final_value = gen_rtx_PLUS (mode, comparison_value,
8434 GEN_INT (add_val));
8436 else if (! add_adjust)
8438 rtx sub_insn = gen_sub3_insn (reg, comparison_value,
8439 initial_value);
8441 if (sub_insn == 0)
8442 return 0;
8443 start_value
8444 = gen_rtx_MINUS (mode, comparison_value, initial_value);
8445 loop_insn_hoist (loop, sub_insn);
8447 else
8448 /* We could handle the other cases too, but it'll be
8449 better to have a testcase first. */
8450 return 0;
8452 /* We may not have a single insn which can increment a reg, so
8453 create a sequence to hold all the insns from expand_inc. */
8454 start_sequence ();
8455 expand_inc (reg, new_add_val);
8456 tem = get_insns ();
8457 end_sequence ();
8459 p = loop_insn_emit_before (loop, 0, bl->biv->insn, tem);
8460 delete_insn (bl->biv->insn);
8462 /* Update biv info to reflect its new status. */
8463 bl->biv->insn = p;
8464 bl->initial_value = start_value;
8465 bl->biv->add_val = new_add_val;
8467 /* Update loop info. */
8468 loop_info->initial_value = reg;
8469 loop_info->initial_equiv_value = reg;
8470 loop_info->final_value = const0_rtx;
8471 loop_info->final_equiv_value = const0_rtx;
8472 loop_info->comparison_value = const0_rtx;
8473 loop_info->comparison_code = cmp_code;
8474 loop_info->increment = new_add_val;
8476 /* Inc LABEL_NUSES so that delete_insn will
8477 not delete the label. */
8478 LABEL_NUSES (XEXP (jump_label, 0))++;
8480 /* If we have a separate comparison insn that does more
8481 than just set cc0, the result of the comparison might
8482 be used outside the loop. */
8483 keep_first_compare = (compare_and_branch == 2
8484 #ifdef HAVE_CC0
8485 && sets_cc0_p (first_compare) <= 0
8486 #endif
8489 /* Emit an insn after the end of the loop to set the biv's
8490 proper exit value if it is used anywhere outside the loop. */
8491 if (keep_first_compare
8492 || (REGNO_LAST_UID (bl->regno) != INSN_UID (first_compare))
8493 || ! bl->init_insn
8494 || REGNO_FIRST_UID (bl->regno) != INSN_UID (bl->init_insn))
8495 loop_insn_sink (loop, gen_load_of_final_value (reg, final_value));
8497 if (keep_first_compare)
8498 loop_insn_sink (loop, PATTERN (first_compare));
8500 /* Delete compare/branch at end of loop. */
8501 delete_related_insns (PREV_INSN (loop_end));
8502 if (compare_and_branch == 2)
8503 delete_related_insns (first_compare);
8505 /* Add new compare/branch insn at end of loop. */
8506 start_sequence ();
8507 emit_cmp_and_jump_insns (reg, const0_rtx, cmp_code, NULL_RTX,
8508 mode, 0,
8509 XEXP (jump_label, 0));
8510 tem = get_insns ();
8511 end_sequence ();
8512 emit_jump_insn_before (tem, loop_end);
8514 for (tem = PREV_INSN (loop_end);
8515 tem && GET_CODE (tem) != JUMP_INSN;
8516 tem = PREV_INSN (tem))
8519 if (tem)
8520 JUMP_LABEL (tem) = XEXP (jump_label, 0);
8522 if (nonneg)
8524 if (tem)
8526 /* Increment of LABEL_NUSES done above. */
8527 /* Register is now always nonnegative,
8528 so add REG_NONNEG note to the branch. */
8529 REG_NOTES (tem) = gen_rtx_EXPR_LIST (REG_NONNEG, reg,
8530 REG_NOTES (tem));
8532 bl->nonneg = 1;
8535 /* No insn may reference both the reversed and another biv or it
8536 will fail (see comment near the top of the loop reversal
8537 code).
8538 Earlier on, we have verified that the biv has no use except
8539 counting, or it is the only biv in this function.
8540 However, the code that computes no_use_except_counting does
8541 not verify reg notes. It's possible to have an insn that
8542 references another biv, and has a REG_EQUAL note with an
8543 expression based on the reversed biv. To avoid this case,
8544 remove all REG_EQUAL notes based on the reversed biv
8545 here. */
8546 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8547 if (INSN_P (p))
8549 rtx *pnote;
8550 rtx set = single_set (p);
8551 /* If this is a set of a GIV based on the reversed biv, any
8552 REG_EQUAL notes should still be correct. */
8553 if (! set
8554 || !REG_P (SET_DEST (set))
8555 || (size_t) REGNO (SET_DEST (set)) >= ivs->n_regs
8556 || REG_IV_TYPE (ivs, REGNO (SET_DEST (set))) != GENERAL_INDUCT
8557 || REG_IV_INFO (ivs, REGNO (SET_DEST (set)))->src_reg != bl->biv->src_reg)
8558 for (pnote = &REG_NOTES (p); *pnote;)
8560 if (REG_NOTE_KIND (*pnote) == REG_EQUAL
8561 && reg_mentioned_p (regno_reg_rtx[bl->regno],
8562 XEXP (*pnote, 0)))
8563 *pnote = XEXP (*pnote, 1);
8564 else
8565 pnote = &XEXP (*pnote, 1);
8569 /* Mark that this biv has been reversed. Each giv which depends
8570 on this biv, and which is also live past the end of the loop
8571 will have to be fixed up. */
8573 bl->reversed = 1;
8575 if (loop_dump_stream)
8577 fprintf (loop_dump_stream, "Reversed loop");
8578 if (bl->nonneg)
8579 fprintf (loop_dump_stream, " and added reg_nonneg\n");
8580 else
8581 fprintf (loop_dump_stream, "\n");
8584 return 1;
8589 return 0;
8592 /* Verify whether the biv BL appears to be eliminable,
8593 based on the insns in the loop that refer to it.
8595 If ELIMINATE_P is nonzero, actually do the elimination.
8597 THRESHOLD and INSN_COUNT are from loop_optimize and are used to
8598 determine whether invariant insns should be placed inside or at the
8599 start of the loop. */
8601 static int
8602 maybe_eliminate_biv (const struct loop *loop, struct iv_class *bl,
8603 int eliminate_p, int threshold, int insn_count)
8605 struct loop_ivs *ivs = LOOP_IVS (loop);
8606 rtx reg = bl->biv->dest_reg;
8607 rtx p;
8609 /* Scan all insns in the loop, stopping if we find one that uses the
8610 biv in a way that we cannot eliminate. */
8612 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
8614 enum rtx_code code = GET_CODE (p);
8615 basic_block where_bb = 0;
8616 rtx where_insn = threshold >= insn_count ? 0 : p;
8617 rtx note;
8619 /* If this is a libcall that sets a giv, skip ahead to its end. */
8620 if (INSN_P (p))
8622 note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
8624 if (note)
8626 rtx last = XEXP (note, 0);
8627 rtx set = single_set (last);
8629 if (set && REG_P (SET_DEST (set)))
8631 unsigned int regno = REGNO (SET_DEST (set));
8633 if (regno < ivs->n_regs
8634 && REG_IV_TYPE (ivs, regno) == GENERAL_INDUCT
8635 && REG_IV_INFO (ivs, regno)->src_reg == bl->biv->src_reg)
8636 p = last;
8641 /* Closely examine the insn if the biv is mentioned. */
8642 if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
8643 && reg_mentioned_p (reg, PATTERN (p))
8644 && ! maybe_eliminate_biv_1 (loop, PATTERN (p), p, bl,
8645 eliminate_p, where_bb, where_insn))
8647 if (loop_dump_stream)
8648 fprintf (loop_dump_stream,
8649 "Cannot eliminate biv %d: biv used in insn %d.\n",
8650 bl->regno, INSN_UID (p));
8651 break;
8654 /* If we are eliminating, kill REG_EQUAL notes mentioning the biv. */
8655 if (eliminate_p
8656 && (note = find_reg_note (p, REG_EQUAL, NULL_RTX)) != NULL_RTX
8657 && reg_mentioned_p (reg, XEXP (note, 0)))
8658 remove_note (p, note);
8661 if (p == loop->end)
8663 if (loop_dump_stream)
8664 fprintf (loop_dump_stream, "biv %d %s eliminated.\n",
8665 bl->regno, eliminate_p ? "was" : "can be");
8666 return 1;
8669 return 0;
8672 /* INSN and REFERENCE are instructions in the same insn chain.
8673 Return nonzero if INSN is first. */
8676 loop_insn_first_p (rtx insn, rtx reference)
8678 rtx p, q;
8680 for (p = insn, q = reference;;)
8682 /* Start with test for not first so that INSN == REFERENCE yields not
8683 first. */
8684 if (q == insn || ! p)
8685 return 0;
8686 if (p == reference || ! q)
8687 return 1;
8689 /* Either of P or Q might be a NOTE. Notes have the same LUID as the
8690 previous insn, hence the <= comparison below does not work if
8691 P is a note. */
8692 if (INSN_UID (p) < max_uid_for_loop
8693 && INSN_UID (q) < max_uid_for_loop
8694 && GET_CODE (p) != NOTE)
8695 return INSN_LUID (p) <= INSN_LUID (q);
8697 if (INSN_UID (p) >= max_uid_for_loop
8698 || GET_CODE (p) == NOTE)
8699 p = NEXT_INSN (p);
8700 if (INSN_UID (q) >= max_uid_for_loop)
8701 q = NEXT_INSN (q);
8705 /* We are trying to eliminate BIV in INSN using GIV. Return nonzero if
8706 the offset that we have to take into account due to auto-increment /
8707 div derivation is zero. */
8708 static int
8709 biv_elimination_giv_has_0_offset (struct induction *biv,
8710 struct induction *giv, rtx insn)
8712 /* If the giv V had the auto-inc address optimization applied
8713 to it, and INSN occurs between the giv insn and the biv
8714 insn, then we'd have to adjust the value used here.
8715 This is rare, so we don't bother to make this possible. */
8716 if (giv->auto_inc_opt
8717 && ((loop_insn_first_p (giv->insn, insn)
8718 && loop_insn_first_p (insn, biv->insn))
8719 || (loop_insn_first_p (biv->insn, insn)
8720 && loop_insn_first_p (insn, giv->insn))))
8721 return 0;
8723 return 1;
8726 /* If BL appears in X (part of the pattern of INSN), see if we can
8727 eliminate its use. If so, return 1. If not, return 0.
8729 If BIV does not appear in X, return 1.
8731 If ELIMINATE_P is nonzero, actually do the elimination.
8732 WHERE_INSN/WHERE_BB indicate where extra insns should be added.
8733 Depending on how many items have been moved out of the loop, it
8734 will either be before INSN (when WHERE_INSN is nonzero) or at the
8735 start of the loop (when WHERE_INSN is zero). */
8737 static int
8738 maybe_eliminate_biv_1 (const struct loop *loop, rtx x, rtx insn,
8739 struct iv_class *bl, int eliminate_p,
8740 basic_block where_bb, rtx where_insn)
8742 enum rtx_code code = GET_CODE (x);
8743 rtx reg = bl->biv->dest_reg;
8744 enum machine_mode mode = GET_MODE (reg);
8745 struct induction *v;
8746 rtx arg, tem;
8747 #ifdef HAVE_cc0
8748 rtx new;
8749 #endif
8750 int arg_operand;
8751 const char *fmt;
8752 int i, j;
8754 switch (code)
8756 case REG:
8757 /* If we haven't already been able to do something with this BIV,
8758 we can't eliminate it. */
8759 if (x == reg)
8760 return 0;
8761 return 1;
8763 case SET:
8764 /* If this sets the BIV, it is not a problem. */
8765 if (SET_DEST (x) == reg)
8766 return 1;
8768 /* If this is an insn that defines a giv, it is also ok because
8769 it will go away when the giv is reduced. */
8770 for (v = bl->giv; v; v = v->next_iv)
8771 if (v->giv_type == DEST_REG && SET_DEST (x) == v->dest_reg)
8772 return 1;
8774 #ifdef HAVE_cc0
8775 if (SET_DEST (x) == cc0_rtx && SET_SRC (x) == reg)
8777 /* Can replace with any giv that was reduced and
8778 that has (MULT_VAL != 0) and (ADD_VAL == 0).
8779 Require a constant for MULT_VAL, so we know it's nonzero.
8780 ??? We disable this optimization to avoid potential
8781 overflows. */
8783 for (v = bl->giv; v; v = v->next_iv)
8784 if (GET_CODE (v->mult_val) == CONST_INT && v->mult_val != const0_rtx
8785 && v->add_val == const0_rtx
8786 && ! v->ignore && ! v->maybe_dead && v->always_computable
8787 && v->mode == mode
8788 && 0)
8790 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8791 continue;
8793 if (! eliminate_p)
8794 return 1;
8796 /* If the giv has the opposite direction of change,
8797 then reverse the comparison. */
8798 if (INTVAL (v->mult_val) < 0)
8799 new = gen_rtx_COMPARE (GET_MODE (v->new_reg),
8800 const0_rtx, v->new_reg);
8801 else
8802 new = v->new_reg;
8804 /* We can probably test that giv's reduced reg. */
8805 if (validate_change (insn, &SET_SRC (x), new, 0))
8806 return 1;
8809 /* Look for a giv with (MULT_VAL != 0) and (ADD_VAL != 0);
8810 replace test insn with a compare insn (cmp REDUCED_GIV ADD_VAL).
8811 Require a constant for MULT_VAL, so we know it's nonzero.
8812 ??? Do this only if ADD_VAL is a pointer to avoid a potential
8813 overflow problem. */
8815 for (v = bl->giv; v; v = v->next_iv)
8816 if (GET_CODE (v->mult_val) == CONST_INT
8817 && v->mult_val != const0_rtx
8818 && ! v->ignore && ! v->maybe_dead && v->always_computable
8819 && v->mode == mode
8820 && (GET_CODE (v->add_val) == SYMBOL_REF
8821 || GET_CODE (v->add_val) == LABEL_REF
8822 || GET_CODE (v->add_val) == CONST
8823 || (REG_P (v->add_val)
8824 && REG_POINTER (v->add_val))))
8826 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8827 continue;
8829 if (! eliminate_p)
8830 return 1;
8832 /* If the giv has the opposite direction of change,
8833 then reverse the comparison. */
8834 if (INTVAL (v->mult_val) < 0)
8835 new = gen_rtx_COMPARE (VOIDmode, copy_rtx (v->add_val),
8836 v->new_reg);
8837 else
8838 new = gen_rtx_COMPARE (VOIDmode, v->new_reg,
8839 copy_rtx (v->add_val));
8841 /* Replace biv with the giv's reduced register. */
8842 update_reg_last_use (v->add_val, insn);
8843 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8844 return 1;
8846 /* Insn doesn't support that constant or invariant. Copy it
8847 into a register (it will be a loop invariant.) */
8848 tem = gen_reg_rtx (GET_MODE (v->new_reg));
8850 loop_insn_emit_before (loop, 0, where_insn,
8851 gen_move_insn (tem,
8852 copy_rtx (v->add_val)));
8854 /* Substitute the new register for its invariant value in
8855 the compare expression. */
8856 XEXP (new, (INTVAL (v->mult_val) < 0) ? 0 : 1) = tem;
8857 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8858 return 1;
8861 #endif
8862 break;
8864 case COMPARE:
8865 case EQ: case NE:
8866 case GT: case GE: case GTU: case GEU:
8867 case LT: case LE: case LTU: case LEU:
8868 /* See if either argument is the biv. */
8869 if (XEXP (x, 0) == reg)
8870 arg = XEXP (x, 1), arg_operand = 1;
8871 else if (XEXP (x, 1) == reg)
8872 arg = XEXP (x, 0), arg_operand = 0;
8873 else
8874 break;
8876 if (CONSTANT_P (arg))
8878 /* First try to replace with any giv that has constant positive
8879 mult_val and constant add_val. We might be able to support
8880 negative mult_val, but it seems complex to do it in general. */
8882 for (v = bl->giv; v; v = v->next_iv)
8883 if (GET_CODE (v->mult_val) == CONST_INT
8884 && INTVAL (v->mult_val) > 0
8885 && (GET_CODE (v->add_val) == SYMBOL_REF
8886 || GET_CODE (v->add_val) == LABEL_REF
8887 || GET_CODE (v->add_val) == CONST
8888 || (REG_P (v->add_val)
8889 && REG_POINTER (v->add_val)))
8890 && ! v->ignore && ! v->maybe_dead && v->always_computable
8891 && v->mode == mode)
8893 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8894 continue;
8896 /* Don't eliminate if the linear combination that makes up
8897 the giv overflows when it is applied to ARG. */
8898 if (GET_CODE (arg) == CONST_INT)
8900 rtx add_val;
8902 if (GET_CODE (v->add_val) == CONST_INT)
8903 add_val = v->add_val;
8904 else
8905 add_val = const0_rtx;
8907 if (const_mult_add_overflow_p (arg, v->mult_val,
8908 add_val, mode, 1))
8909 continue;
8912 if (! eliminate_p)
8913 return 1;
8915 /* Replace biv with the giv's reduced reg. */
8916 validate_change (insn, &XEXP (x, 1 - arg_operand), v->new_reg, 1);
8918 /* If all constants are actually constant integers and
8919 the derived constant can be directly placed in the COMPARE,
8920 do so. */
8921 if (GET_CODE (arg) == CONST_INT
8922 && GET_CODE (v->add_val) == CONST_INT)
8924 tem = expand_mult_add (arg, NULL_RTX, v->mult_val,
8925 v->add_val, mode, 1);
8927 else
8929 /* Otherwise, load it into a register. */
8930 tem = gen_reg_rtx (mode);
8931 loop_iv_add_mult_emit_before (loop, arg,
8932 v->mult_val, v->add_val,
8933 tem, where_bb, where_insn);
8936 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8938 if (apply_change_group ())
8939 return 1;
8942 /* Look for giv with positive constant mult_val and nonconst add_val.
8943 Insert insns to calculate new compare value.
8944 ??? Turn this off due to possible overflow. */
8946 for (v = bl->giv; v; v = v->next_iv)
8947 if (GET_CODE (v->mult_val) == CONST_INT
8948 && INTVAL (v->mult_val) > 0
8949 && ! v->ignore && ! v->maybe_dead && v->always_computable
8950 && v->mode == mode
8951 && 0)
8953 rtx tem;
8955 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8956 continue;
8958 if (! eliminate_p)
8959 return 1;
8961 tem = gen_reg_rtx (mode);
8963 /* Replace biv with giv's reduced register. */
8964 validate_change (insn, &XEXP (x, 1 - arg_operand),
8965 v->new_reg, 1);
8967 /* Compute value to compare against. */
8968 loop_iv_add_mult_emit_before (loop, arg,
8969 v->mult_val, v->add_val,
8970 tem, where_bb, where_insn);
8971 /* Use it in this insn. */
8972 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8973 if (apply_change_group ())
8974 return 1;
8977 else if (REG_P (arg) || MEM_P (arg))
8979 if (loop_invariant_p (loop, arg) == 1)
8981 /* Look for giv with constant positive mult_val and nonconst
8982 add_val. Insert insns to compute new compare value.
8983 ??? Turn this off due to possible overflow. */
8985 for (v = bl->giv; v; v = v->next_iv)
8986 if (GET_CODE (v->mult_val) == CONST_INT && INTVAL (v->mult_val) > 0
8987 && ! v->ignore && ! v->maybe_dead && v->always_computable
8988 && v->mode == mode
8989 && 0)
8991 rtx tem;
8993 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8994 continue;
8996 if (! eliminate_p)
8997 return 1;
8999 tem = gen_reg_rtx (mode);
9001 /* Replace biv with giv's reduced register. */
9002 validate_change (insn, &XEXP (x, 1 - arg_operand),
9003 v->new_reg, 1);
9005 /* Compute value to compare against. */
9006 loop_iv_add_mult_emit_before (loop, arg,
9007 v->mult_val, v->add_val,
9008 tem, where_bb, where_insn);
9009 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
9010 if (apply_change_group ())
9011 return 1;
9015 /* This code has problems. Basically, you can't know when
9016 seeing if we will eliminate BL, whether a particular giv
9017 of ARG will be reduced. If it isn't going to be reduced,
9018 we can't eliminate BL. We can try forcing it to be reduced,
9019 but that can generate poor code.
9021 The problem is that the benefit of reducing TV, below should
9022 be increased if BL can actually be eliminated, but this means
9023 we might have to do a topological sort of the order in which
9024 we try to process biv. It doesn't seem worthwhile to do
9025 this sort of thing now. */
9027 #if 0
9028 /* Otherwise the reg compared with had better be a biv. */
9029 if (!REG_P (arg)
9030 || REG_IV_TYPE (ivs, REGNO (arg)) != BASIC_INDUCT)
9031 return 0;
9033 /* Look for a pair of givs, one for each biv,
9034 with identical coefficients. */
9035 for (v = bl->giv; v; v = v->next_iv)
9037 struct induction *tv;
9039 if (v->ignore || v->maybe_dead || v->mode != mode)
9040 continue;
9042 for (tv = REG_IV_CLASS (ivs, REGNO (arg))->giv; tv;
9043 tv = tv->next_iv)
9044 if (! tv->ignore && ! tv->maybe_dead
9045 && rtx_equal_p (tv->mult_val, v->mult_val)
9046 && rtx_equal_p (tv->add_val, v->add_val)
9047 && tv->mode == mode)
9049 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
9050 continue;
9052 if (! eliminate_p)
9053 return 1;
9055 /* Replace biv with its giv's reduced reg. */
9056 XEXP (x, 1 - arg_operand) = v->new_reg;
9057 /* Replace other operand with the other giv's
9058 reduced reg. */
9059 XEXP (x, arg_operand) = tv->new_reg;
9060 return 1;
9063 #endif
9066 /* If we get here, the biv can't be eliminated. */
9067 return 0;
9069 case MEM:
9070 /* If this address is a DEST_ADDR giv, it doesn't matter if the
9071 biv is used in it, since it will be replaced. */
9072 for (v = bl->giv; v; v = v->next_iv)
9073 if (v->giv_type == DEST_ADDR && v->location == &XEXP (x, 0))
9074 return 1;
9075 break;
9077 default:
9078 break;
9081 /* See if any subexpression fails elimination. */
9082 fmt = GET_RTX_FORMAT (code);
9083 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
9085 switch (fmt[i])
9087 case 'e':
9088 if (! maybe_eliminate_biv_1 (loop, XEXP (x, i), insn, bl,
9089 eliminate_p, where_bb, where_insn))
9090 return 0;
9091 break;
9093 case 'E':
9094 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9095 if (! maybe_eliminate_biv_1 (loop, XVECEXP (x, i, j), insn, bl,
9096 eliminate_p, where_bb, where_insn))
9097 return 0;
9098 break;
9102 return 1;
9105 /* Return nonzero if the last use of REG
9106 is in an insn following INSN in the same basic block. */
9108 static int
9109 last_use_this_basic_block (rtx reg, rtx insn)
9111 rtx n;
9112 for (n = insn;
9113 n && GET_CODE (n) != CODE_LABEL && GET_CODE (n) != JUMP_INSN;
9114 n = NEXT_INSN (n))
9116 if (REGNO_LAST_UID (REGNO (reg)) == INSN_UID (n))
9117 return 1;
9119 return 0;
9122 /* Called via `note_stores' to record the initial value of a biv. Here we
9123 just record the location of the set and process it later. */
9125 static void
9126 record_initial (rtx dest, rtx set, void *data ATTRIBUTE_UNUSED)
9128 struct loop_ivs *ivs = (struct loop_ivs *) data;
9129 struct iv_class *bl;
9131 if (!REG_P (dest)
9132 || REGNO (dest) >= ivs->n_regs
9133 || REG_IV_TYPE (ivs, REGNO (dest)) != BASIC_INDUCT)
9134 return;
9136 bl = REG_IV_CLASS (ivs, REGNO (dest));
9138 /* If this is the first set found, record it. */
9139 if (bl->init_insn == 0)
9141 bl->init_insn = note_insn;
9142 bl->init_set = set;
9146 /* If any of the registers in X are "old" and currently have a last use earlier
9147 than INSN, update them to have a last use of INSN. Their actual last use
9148 will be the previous insn but it will not have a valid uid_luid so we can't
9149 use it. X must be a source expression only. */
9151 static void
9152 update_reg_last_use (rtx x, rtx insn)
9154 /* Check for the case where INSN does not have a valid luid. In this case,
9155 there is no need to modify the regno_last_uid, as this can only happen
9156 when code is inserted after the loop_end to set a pseudo's final value,
9157 and hence this insn will never be the last use of x.
9158 ???? This comment is not correct. See for example loop_givs_reduce.
9159 This may insert an insn before another new insn. */
9160 if (REG_P (x) && REGNO (x) < max_reg_before_loop
9161 && INSN_UID (insn) < max_uid_for_loop
9162 && REGNO_LAST_LUID (REGNO (x)) < INSN_LUID (insn))
9164 REGNO_LAST_UID (REGNO (x)) = INSN_UID (insn);
9166 else
9168 int i, j;
9169 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
9170 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9172 if (fmt[i] == 'e')
9173 update_reg_last_use (XEXP (x, i), insn);
9174 else if (fmt[i] == 'E')
9175 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9176 update_reg_last_use (XVECEXP (x, i, j), insn);
9181 /* Given an insn INSN and condition COND, return the condition in a
9182 canonical form to simplify testing by callers. Specifically:
9184 (1) The code will always be a comparison operation (EQ, NE, GT, etc.).
9185 (2) Both operands will be machine operands; (cc0) will have been replaced.
9186 (3) If an operand is a constant, it will be the second operand.
9187 (4) (LE x const) will be replaced with (LT x <const+1>) and similarly
9188 for GE, GEU, and LEU.
9190 If the condition cannot be understood, or is an inequality floating-point
9191 comparison which needs to be reversed, 0 will be returned.
9193 If REVERSE is nonzero, then reverse the condition prior to canonizing it.
9195 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9196 insn used in locating the condition was found. If a replacement test
9197 of the condition is desired, it should be placed in front of that
9198 insn and we will be sure that the inputs are still valid.
9200 If WANT_REG is nonzero, we wish the condition to be relative to that
9201 register, if possible. Therefore, do not canonicalize the condition
9202 further. If ALLOW_CC_MODE is nonzero, allow the condition returned
9203 to be a compare to a CC mode register. */
9206 canonicalize_condition (rtx insn, rtx cond, int reverse, rtx *earliest,
9207 rtx want_reg, int allow_cc_mode)
9209 enum rtx_code code;
9210 rtx prev = insn;
9211 rtx set;
9212 rtx tem;
9213 rtx op0, op1;
9214 int reverse_code = 0;
9215 enum machine_mode mode;
9217 code = GET_CODE (cond);
9218 mode = GET_MODE (cond);
9219 op0 = XEXP (cond, 0);
9220 op1 = XEXP (cond, 1);
9222 if (reverse)
9223 code = reversed_comparison_code (cond, insn);
9224 if (code == UNKNOWN)
9225 return 0;
9227 if (earliest)
9228 *earliest = insn;
9230 /* If we are comparing a register with zero, see if the register is set
9231 in the previous insn to a COMPARE or a comparison operation. Perform
9232 the same tests as a function of STORE_FLAG_VALUE as find_comparison_args
9233 in cse.c */
9235 while ((GET_RTX_CLASS (code) == RTX_COMPARE
9236 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
9237 && op1 == CONST0_RTX (GET_MODE (op0))
9238 && op0 != want_reg)
9240 /* Set nonzero when we find something of interest. */
9241 rtx x = 0;
9243 #ifdef HAVE_cc0
9244 /* If comparison with cc0, import actual comparison from compare
9245 insn. */
9246 if (op0 == cc0_rtx)
9248 if ((prev = prev_nonnote_insn (prev)) == 0
9249 || GET_CODE (prev) != INSN
9250 || (set = single_set (prev)) == 0
9251 || SET_DEST (set) != cc0_rtx)
9252 return 0;
9254 op0 = SET_SRC (set);
9255 op1 = CONST0_RTX (GET_MODE (op0));
9256 if (earliest)
9257 *earliest = prev;
9259 #endif
9261 /* If this is a COMPARE, pick up the two things being compared. */
9262 if (GET_CODE (op0) == COMPARE)
9264 op1 = XEXP (op0, 1);
9265 op0 = XEXP (op0, 0);
9266 continue;
9268 else if (!REG_P (op0))
9269 break;
9271 /* Go back to the previous insn. Stop if it is not an INSN. We also
9272 stop if it isn't a single set or if it has a REG_INC note because
9273 we don't want to bother dealing with it. */
9275 if ((prev = prev_nonnote_insn (prev)) == 0
9276 || GET_CODE (prev) != INSN
9277 || FIND_REG_INC_NOTE (prev, NULL_RTX))
9278 break;
9280 set = set_of (op0, prev);
9282 if (set
9283 && (GET_CODE (set) != SET
9284 || !rtx_equal_p (SET_DEST (set), op0)))
9285 break;
9287 /* If this is setting OP0, get what it sets it to if it looks
9288 relevant. */
9289 if (set)
9291 enum machine_mode inner_mode = GET_MODE (SET_DEST (set));
9292 #ifdef FLOAT_STORE_FLAG_VALUE
9293 REAL_VALUE_TYPE fsfv;
9294 #endif
9296 /* ??? We may not combine comparisons done in a CCmode with
9297 comparisons not done in a CCmode. This is to aid targets
9298 like Alpha that have an IEEE compliant EQ instruction, and
9299 a non-IEEE compliant BEQ instruction. The use of CCmode is
9300 actually artificial, simply to prevent the combination, but
9301 should not affect other platforms.
9303 However, we must allow VOIDmode comparisons to match either
9304 CCmode or non-CCmode comparison, because some ports have
9305 modeless comparisons inside branch patterns.
9307 ??? This mode check should perhaps look more like the mode check
9308 in simplify_comparison in combine. */
9310 if ((GET_CODE (SET_SRC (set)) == COMPARE
9311 || (((code == NE
9312 || (code == LT
9313 && GET_MODE_CLASS (inner_mode) == MODE_INT
9314 && (GET_MODE_BITSIZE (inner_mode)
9315 <= HOST_BITS_PER_WIDE_INT)
9316 && (STORE_FLAG_VALUE
9317 & ((HOST_WIDE_INT) 1
9318 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9319 #ifdef FLOAT_STORE_FLAG_VALUE
9320 || (code == LT
9321 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9322 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9323 REAL_VALUE_NEGATIVE (fsfv)))
9324 #endif
9326 && COMPARISON_P (SET_SRC (set))))
9327 && (((GET_MODE_CLASS (mode) == MODE_CC)
9328 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9329 || mode == VOIDmode || inner_mode == VOIDmode))
9330 x = SET_SRC (set);
9331 else if (((code == EQ
9332 || (code == GE
9333 && (GET_MODE_BITSIZE (inner_mode)
9334 <= HOST_BITS_PER_WIDE_INT)
9335 && GET_MODE_CLASS (inner_mode) == MODE_INT
9336 && (STORE_FLAG_VALUE
9337 & ((HOST_WIDE_INT) 1
9338 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9339 #ifdef FLOAT_STORE_FLAG_VALUE
9340 || (code == GE
9341 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9342 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9343 REAL_VALUE_NEGATIVE (fsfv)))
9344 #endif
9346 && COMPARISON_P (SET_SRC (set))
9347 && (((GET_MODE_CLASS (mode) == MODE_CC)
9348 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9349 || mode == VOIDmode || inner_mode == VOIDmode))
9352 reverse_code = 1;
9353 x = SET_SRC (set);
9355 else
9356 break;
9359 else if (reg_set_p (op0, prev))
9360 /* If this sets OP0, but not directly, we have to give up. */
9361 break;
9363 if (x)
9365 if (COMPARISON_P (x))
9366 code = GET_CODE (x);
9367 if (reverse_code)
9369 code = reversed_comparison_code (x, prev);
9370 if (code == UNKNOWN)
9371 return 0;
9372 reverse_code = 0;
9375 op0 = XEXP (x, 0), op1 = XEXP (x, 1);
9376 if (earliest)
9377 *earliest = prev;
9381 /* If constant is first, put it last. */
9382 if (CONSTANT_P (op0))
9383 code = swap_condition (code), tem = op0, op0 = op1, op1 = tem;
9385 /* If OP0 is the result of a comparison, we weren't able to find what
9386 was really being compared, so fail. */
9387 if (!allow_cc_mode
9388 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
9389 return 0;
9391 /* Canonicalize any ordered comparison with integers involving equality
9392 if we can do computations in the relevant mode and we do not
9393 overflow. */
9395 if (GET_MODE_CLASS (GET_MODE (op0)) != MODE_CC
9396 && GET_CODE (op1) == CONST_INT
9397 && GET_MODE (op0) != VOIDmode
9398 && GET_MODE_BITSIZE (GET_MODE (op0)) <= HOST_BITS_PER_WIDE_INT)
9400 HOST_WIDE_INT const_val = INTVAL (op1);
9401 unsigned HOST_WIDE_INT uconst_val = const_val;
9402 unsigned HOST_WIDE_INT max_val
9403 = (unsigned HOST_WIDE_INT) GET_MODE_MASK (GET_MODE (op0));
9405 switch (code)
9407 case LE:
9408 if ((unsigned HOST_WIDE_INT) const_val != max_val >> 1)
9409 code = LT, op1 = gen_int_mode (const_val + 1, GET_MODE (op0));
9410 break;
9412 /* When cross-compiling, const_val might be sign-extended from
9413 BITS_PER_WORD to HOST_BITS_PER_WIDE_INT */
9414 case GE:
9415 if ((HOST_WIDE_INT) (const_val & max_val)
9416 != (((HOST_WIDE_INT) 1
9417 << (GET_MODE_BITSIZE (GET_MODE (op0)) - 1))))
9418 code = GT, op1 = gen_int_mode (const_val - 1, GET_MODE (op0));
9419 break;
9421 case LEU:
9422 if (uconst_val < max_val)
9423 code = LTU, op1 = gen_int_mode (uconst_val + 1, GET_MODE (op0));
9424 break;
9426 case GEU:
9427 if (uconst_val != 0)
9428 code = GTU, op1 = gen_int_mode (uconst_val - 1, GET_MODE (op0));
9429 break;
9431 default:
9432 break;
9436 /* Never return CC0; return zero instead. */
9437 if (CC0_P (op0))
9438 return 0;
9440 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
9443 /* Given a jump insn JUMP, return the condition that will cause it to branch
9444 to its JUMP_LABEL. If the condition cannot be understood, or is an
9445 inequality floating-point comparison which needs to be reversed, 0 will
9446 be returned.
9448 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9449 insn used in locating the condition was found. If a replacement test
9450 of the condition is desired, it should be placed in front of that
9451 insn and we will be sure that the inputs are still valid.
9453 If ALLOW_CC_MODE is nonzero, allow the condition returned to be a
9454 compare CC mode register. */
9457 get_condition (rtx jump, rtx *earliest, int allow_cc_mode)
9459 rtx cond;
9460 int reverse;
9461 rtx set;
9463 /* If this is not a standard conditional jump, we can't parse it. */
9464 if (GET_CODE (jump) != JUMP_INSN
9465 || ! any_condjump_p (jump))
9466 return 0;
9467 set = pc_set (jump);
9469 cond = XEXP (SET_SRC (set), 0);
9471 /* If this branches to JUMP_LABEL when the condition is false, reverse
9472 the condition. */
9473 reverse
9474 = GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF
9475 && XEXP (XEXP (SET_SRC (set), 2), 0) == JUMP_LABEL (jump);
9477 return canonicalize_condition (jump, cond, reverse, earliest, NULL_RTX,
9478 allow_cc_mode);
9481 /* Similar to above routine, except that we also put an invariant last
9482 unless both operands are invariants. */
9485 get_condition_for_loop (const struct loop *loop, rtx x)
9487 rtx comparison = get_condition (x, (rtx*) 0, false);
9489 if (comparison == 0
9490 || ! loop_invariant_p (loop, XEXP (comparison, 0))
9491 || loop_invariant_p (loop, XEXP (comparison, 1)))
9492 return comparison;
9494 return gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)), VOIDmode,
9495 XEXP (comparison, 1), XEXP (comparison, 0));
9498 /* Scan the function and determine whether it has indirect (computed) jumps.
9500 This is taken mostly from flow.c; similar code exists elsewhere
9501 in the compiler. It may be useful to put this into rtlanal.c. */
9502 static int
9503 indirect_jump_in_function_p (rtx start)
9505 rtx insn;
9507 for (insn = start; insn; insn = NEXT_INSN (insn))
9508 if (computed_jump_p (insn))
9509 return 1;
9511 return 0;
9514 /* Add MEM to the LOOP_MEMS array, if appropriate. See the
9515 documentation for LOOP_MEMS for the definition of `appropriate'.
9516 This function is called from prescan_loop via for_each_rtx. */
9518 static int
9519 insert_loop_mem (rtx *mem, void *data ATTRIBUTE_UNUSED)
9521 struct loop_info *loop_info = data;
9522 int i;
9523 rtx m = *mem;
9525 if (m == NULL_RTX)
9526 return 0;
9528 switch (GET_CODE (m))
9530 case MEM:
9531 break;
9533 case CLOBBER:
9534 /* We're not interested in MEMs that are only clobbered. */
9535 return -1;
9537 case CONST_DOUBLE:
9538 /* We're not interested in the MEM associated with a
9539 CONST_DOUBLE, so there's no need to traverse into this. */
9540 return -1;
9542 case EXPR_LIST:
9543 /* We're not interested in any MEMs that only appear in notes. */
9544 return -1;
9546 default:
9547 /* This is not a MEM. */
9548 return 0;
9551 /* See if we've already seen this MEM. */
9552 for (i = 0; i < loop_info->mems_idx; ++i)
9553 if (rtx_equal_p (m, loop_info->mems[i].mem))
9555 if (MEM_VOLATILE_P (m) && !MEM_VOLATILE_P (loop_info->mems[i].mem))
9556 loop_info->mems[i].mem = m;
9557 if (GET_MODE (m) != GET_MODE (loop_info->mems[i].mem))
9558 /* The modes of the two memory accesses are different. If
9559 this happens, something tricky is going on, and we just
9560 don't optimize accesses to this MEM. */
9561 loop_info->mems[i].optimize = 0;
9563 return 0;
9566 /* Resize the array, if necessary. */
9567 if (loop_info->mems_idx == loop_info->mems_allocated)
9569 if (loop_info->mems_allocated != 0)
9570 loop_info->mems_allocated *= 2;
9571 else
9572 loop_info->mems_allocated = 32;
9574 loop_info->mems = xrealloc (loop_info->mems,
9575 loop_info->mems_allocated * sizeof (loop_mem_info));
9578 /* Actually insert the MEM. */
9579 loop_info->mems[loop_info->mems_idx].mem = m;
9580 /* We can't hoist this MEM out of the loop if it's a BLKmode MEM
9581 because we can't put it in a register. We still store it in the
9582 table, though, so that if we see the same address later, but in a
9583 non-BLK mode, we'll not think we can optimize it at that point. */
9584 loop_info->mems[loop_info->mems_idx].optimize = (GET_MODE (m) != BLKmode);
9585 loop_info->mems[loop_info->mems_idx].reg = NULL_RTX;
9586 ++loop_info->mems_idx;
9588 return 0;
9592 /* Allocate REGS->ARRAY or reallocate it if it is too small.
9594 Increment REGS->ARRAY[I].SET_IN_LOOP at the index I of each
9595 register that is modified by an insn between FROM and TO. If the
9596 value of an element of REGS->array[I].SET_IN_LOOP becomes 127 or
9597 more, stop incrementing it, to avoid overflow.
9599 Store in REGS->ARRAY[I].SINGLE_USAGE the single insn in which
9600 register I is used, if it is only used once. Otherwise, it is set
9601 to 0 (for no uses) or const0_rtx for more than one use. This
9602 parameter may be zero, in which case this processing is not done.
9604 Set REGS->ARRAY[I].MAY_NOT_OPTIMIZE nonzero if we should not
9605 optimize register I. */
9607 static void
9608 loop_regs_scan (const struct loop *loop, int extra_size)
9610 struct loop_regs *regs = LOOP_REGS (loop);
9611 int old_nregs;
9612 /* last_set[n] is nonzero iff reg n has been set in the current
9613 basic block. In that case, it is the insn that last set reg n. */
9614 rtx *last_set;
9615 rtx insn;
9616 int i;
9618 old_nregs = regs->num;
9619 regs->num = max_reg_num ();
9621 /* Grow the regs array if not allocated or too small. */
9622 if (regs->num >= regs->size)
9624 regs->size = regs->num + extra_size;
9626 regs->array = xrealloc (regs->array, regs->size * sizeof (*regs->array));
9628 /* Zero the new elements. */
9629 memset (regs->array + old_nregs, 0,
9630 (regs->size - old_nregs) * sizeof (*regs->array));
9633 /* Clear previously scanned fields but do not clear n_times_set. */
9634 for (i = 0; i < old_nregs; i++)
9636 regs->array[i].set_in_loop = 0;
9637 regs->array[i].may_not_optimize = 0;
9638 regs->array[i].single_usage = NULL_RTX;
9641 last_set = xcalloc (regs->num, sizeof (rtx));
9643 /* Scan the loop, recording register usage. */
9644 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9645 insn = NEXT_INSN (insn))
9647 if (INSN_P (insn))
9649 /* Record registers that have exactly one use. */
9650 find_single_use_in_loop (regs, insn, PATTERN (insn));
9652 /* Include uses in REG_EQUAL notes. */
9653 if (REG_NOTES (insn))
9654 find_single_use_in_loop (regs, insn, REG_NOTES (insn));
9656 if (GET_CODE (PATTERN (insn)) == SET
9657 || GET_CODE (PATTERN (insn)) == CLOBBER)
9658 count_one_set (regs, insn, PATTERN (insn), last_set);
9659 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
9661 int i;
9662 for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
9663 count_one_set (regs, insn, XVECEXP (PATTERN (insn), 0, i),
9664 last_set);
9668 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9669 memset (last_set, 0, regs->num * sizeof (rtx));
9671 /* Invalidate all registers used for function argument passing.
9672 We check rtx_varies_p for the same reason as below, to allow
9673 optimizing PIC calculations. */
9674 if (GET_CODE (insn) == CALL_INSN)
9676 rtx link;
9677 for (link = CALL_INSN_FUNCTION_USAGE (insn);
9678 link;
9679 link = XEXP (link, 1))
9681 rtx op, reg;
9683 if (GET_CODE (op = XEXP (link, 0)) == USE
9684 && REG_P (reg = XEXP (op, 0))
9685 && rtx_varies_p (reg, 1))
9686 regs->array[REGNO (reg)].may_not_optimize = 1;
9691 /* Invalidate all hard registers clobbered by calls. With one exception:
9692 a call-clobbered PIC register is still function-invariant for our
9693 purposes, since we can hoist any PIC calculations out of the loop.
9694 Thus the call to rtx_varies_p. */
9695 if (LOOP_INFO (loop)->has_call)
9696 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
9697 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
9698 && rtx_varies_p (regno_reg_rtx[i], 1))
9700 regs->array[i].may_not_optimize = 1;
9701 regs->array[i].set_in_loop = 1;
9704 #ifdef AVOID_CCMODE_COPIES
9705 /* Don't try to move insns which set CC registers if we should not
9706 create CCmode register copies. */
9707 for (i = regs->num - 1; i >= FIRST_PSEUDO_REGISTER; i--)
9708 if (GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
9709 regs->array[i].may_not_optimize = 1;
9710 #endif
9712 /* Set regs->array[I].n_times_set for the new registers. */
9713 for (i = old_nregs; i < regs->num; i++)
9714 regs->array[i].n_times_set = regs->array[i].set_in_loop;
9716 free (last_set);
9719 /* Returns the number of real INSNs in the LOOP. */
9721 static int
9722 count_insns_in_loop (const struct loop *loop)
9724 int count = 0;
9725 rtx insn;
9727 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9728 insn = NEXT_INSN (insn))
9729 if (INSN_P (insn))
9730 ++count;
9732 return count;
9735 /* Move MEMs into registers for the duration of the loop. */
9737 static void
9738 load_mems (const struct loop *loop)
9740 struct loop_info *loop_info = LOOP_INFO (loop);
9741 struct loop_regs *regs = LOOP_REGS (loop);
9742 int maybe_never = 0;
9743 int i;
9744 rtx p, prev_ebb_head;
9745 rtx label = NULL_RTX;
9746 rtx end_label;
9747 /* Nonzero if the next instruction may never be executed. */
9748 int next_maybe_never = 0;
9749 unsigned int last_max_reg = max_reg_num ();
9751 if (loop_info->mems_idx == 0)
9752 return;
9754 /* We cannot use next_label here because it skips over normal insns. */
9755 end_label = next_nonnote_insn (loop->end);
9756 if (end_label && GET_CODE (end_label) != CODE_LABEL)
9757 end_label = NULL_RTX;
9759 /* Check to see if it's possible that some instructions in the loop are
9760 never executed. Also check if there is a goto out of the loop other
9761 than right after the end of the loop. */
9762 for (p = next_insn_in_loop (loop, loop->scan_start);
9763 p != NULL_RTX;
9764 p = next_insn_in_loop (loop, p))
9766 if (GET_CODE (p) == CODE_LABEL)
9767 maybe_never = 1;
9768 else if (GET_CODE (p) == JUMP_INSN
9769 /* If we enter the loop in the middle, and scan
9770 around to the beginning, don't set maybe_never
9771 for that. This must be an unconditional jump,
9772 otherwise the code at the top of the loop might
9773 never be executed. Unconditional jumps are
9774 followed a by barrier then loop end. */
9775 && ! (GET_CODE (p) == JUMP_INSN
9776 && JUMP_LABEL (p) == loop->top
9777 && NEXT_INSN (NEXT_INSN (p)) == loop->end
9778 && any_uncondjump_p (p)))
9780 /* If this is a jump outside of the loop but not right
9781 after the end of the loop, we would have to emit new fixup
9782 sequences for each such label. */
9783 if (/* If we can't tell where control might go when this
9784 JUMP_INSN is executed, we must be conservative. */
9785 !JUMP_LABEL (p)
9786 || (JUMP_LABEL (p) != end_label
9787 && (INSN_UID (JUMP_LABEL (p)) >= max_uid_for_loop
9788 || INSN_LUID (JUMP_LABEL (p)) < INSN_LUID (loop->start)
9789 || INSN_LUID (JUMP_LABEL (p)) > INSN_LUID (loop->end))))
9790 return;
9792 if (!any_condjump_p (p))
9793 /* Something complicated. */
9794 maybe_never = 1;
9795 else
9796 /* If there are any more instructions in the loop, they
9797 might not be reached. */
9798 next_maybe_never = 1;
9800 else if (next_maybe_never)
9801 maybe_never = 1;
9804 /* Find start of the extended basic block that enters the loop. */
9805 for (p = loop->start;
9806 PREV_INSN (p) && GET_CODE (p) != CODE_LABEL;
9807 p = PREV_INSN (p))
9809 prev_ebb_head = p;
9811 cselib_init (true);
9813 /* Build table of mems that get set to constant values before the
9814 loop. */
9815 for (; p != loop->start; p = NEXT_INSN (p))
9816 cselib_process_insn (p);
9818 /* Actually move the MEMs. */
9819 for (i = 0; i < loop_info->mems_idx; ++i)
9821 regset_head load_copies;
9822 regset_head store_copies;
9823 int written = 0;
9824 rtx reg;
9825 rtx mem = loop_info->mems[i].mem;
9826 rtx mem_list_entry;
9828 if (MEM_VOLATILE_P (mem)
9829 || loop_invariant_p (loop, XEXP (mem, 0)) != 1)
9830 /* There's no telling whether or not MEM is modified. */
9831 loop_info->mems[i].optimize = 0;
9833 /* Go through the MEMs written to in the loop to see if this
9834 one is aliased by one of them. */
9835 mem_list_entry = loop_info->store_mems;
9836 while (mem_list_entry)
9838 if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
9839 written = 1;
9840 else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
9841 mem, rtx_varies_p))
9843 /* MEM is indeed aliased by this store. */
9844 loop_info->mems[i].optimize = 0;
9845 break;
9847 mem_list_entry = XEXP (mem_list_entry, 1);
9850 if (flag_float_store && written
9851 && GET_MODE_CLASS (GET_MODE (mem)) == MODE_FLOAT)
9852 loop_info->mems[i].optimize = 0;
9854 /* If this MEM is written to, we must be sure that there
9855 are no reads from another MEM that aliases this one. */
9856 if (loop_info->mems[i].optimize && written)
9858 int j;
9860 for (j = 0; j < loop_info->mems_idx; ++j)
9862 if (j == i)
9863 continue;
9864 else if (true_dependence (mem,
9865 VOIDmode,
9866 loop_info->mems[j].mem,
9867 rtx_varies_p))
9869 /* It's not safe to hoist loop_info->mems[i] out of
9870 the loop because writes to it might not be
9871 seen by reads from loop_info->mems[j]. */
9872 loop_info->mems[i].optimize = 0;
9873 break;
9878 if (maybe_never && may_trap_p (mem))
9879 /* We can't access the MEM outside the loop; it might
9880 cause a trap that wouldn't have happened otherwise. */
9881 loop_info->mems[i].optimize = 0;
9883 if (!loop_info->mems[i].optimize)
9884 /* We thought we were going to lift this MEM out of the
9885 loop, but later discovered that we could not. */
9886 continue;
9888 INIT_REG_SET (&load_copies);
9889 INIT_REG_SET (&store_copies);
9891 /* Allocate a pseudo for this MEM. We set REG_USERVAR_P in
9892 order to keep scan_loop from moving stores to this MEM
9893 out of the loop just because this REG is neither a
9894 user-variable nor used in the loop test. */
9895 reg = gen_reg_rtx (GET_MODE (mem));
9896 REG_USERVAR_P (reg) = 1;
9897 loop_info->mems[i].reg = reg;
9899 /* Now, replace all references to the MEM with the
9900 corresponding pseudos. */
9901 maybe_never = 0;
9902 for (p = next_insn_in_loop (loop, loop->scan_start);
9903 p != NULL_RTX;
9904 p = next_insn_in_loop (loop, p))
9906 if (INSN_P (p))
9908 rtx set;
9910 set = single_set (p);
9912 /* See if this copies the mem into a register that isn't
9913 modified afterwards. We'll try to do copy propagation
9914 a little further on. */
9915 if (set
9916 /* @@@ This test is _way_ too conservative. */
9917 && ! maybe_never
9918 && REG_P (SET_DEST (set))
9919 && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
9920 && REGNO (SET_DEST (set)) < last_max_reg
9921 && regs->array[REGNO (SET_DEST (set))].n_times_set == 1
9922 && rtx_equal_p (SET_SRC (set), mem))
9923 SET_REGNO_REG_SET (&load_copies, REGNO (SET_DEST (set)));
9925 /* See if this copies the mem from a register that isn't
9926 modified afterwards. We'll try to remove the
9927 redundant copy later on by doing a little register
9928 renaming and copy propagation. This will help
9929 to untangle things for the BIV detection code. */
9930 if (set
9931 && ! maybe_never
9932 && REG_P (SET_SRC (set))
9933 && REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER
9934 && REGNO (SET_SRC (set)) < last_max_reg
9935 && regs->array[REGNO (SET_SRC (set))].n_times_set == 1
9936 && rtx_equal_p (SET_DEST (set), mem))
9937 SET_REGNO_REG_SET (&store_copies, REGNO (SET_SRC (set)));
9939 /* If this is a call which uses / clobbers this memory
9940 location, we must not change the interface here. */
9941 if (GET_CODE (p) == CALL_INSN
9942 && reg_mentioned_p (loop_info->mems[i].mem,
9943 CALL_INSN_FUNCTION_USAGE (p)))
9945 cancel_changes (0);
9946 loop_info->mems[i].optimize = 0;
9947 break;
9949 else
9950 /* Replace the memory reference with the shadow register. */
9951 replace_loop_mems (p, loop_info->mems[i].mem,
9952 loop_info->mems[i].reg, written);
9955 if (GET_CODE (p) == CODE_LABEL
9956 || GET_CODE (p) == JUMP_INSN)
9957 maybe_never = 1;
9960 if (! loop_info->mems[i].optimize)
9961 ; /* We found we couldn't do the replacement, so do nothing. */
9962 else if (! apply_change_group ())
9963 /* We couldn't replace all occurrences of the MEM. */
9964 loop_info->mems[i].optimize = 0;
9965 else
9967 /* Load the memory immediately before LOOP->START, which is
9968 the NOTE_LOOP_BEG. */
9969 cselib_val *e = cselib_lookup (mem, VOIDmode, 0);
9970 rtx set;
9971 rtx best = mem;
9972 int j;
9973 struct elt_loc_list *const_equiv = 0;
9975 if (e)
9977 struct elt_loc_list *equiv;
9978 struct elt_loc_list *best_equiv = 0;
9979 for (equiv = e->locs; equiv; equiv = equiv->next)
9981 if (CONSTANT_P (equiv->loc))
9982 const_equiv = equiv;
9983 else if (REG_P (equiv->loc)
9984 /* Extending hard register lifetimes causes crash
9985 on SRC targets. Doing so on non-SRC is
9986 probably also not good idea, since we most
9987 probably have pseudoregister equivalence as
9988 well. */
9989 && REGNO (equiv->loc) >= FIRST_PSEUDO_REGISTER)
9990 best_equiv = equiv;
9992 /* Use the constant equivalence if that is cheap enough. */
9993 if (! best_equiv)
9994 best_equiv = const_equiv;
9995 else if (const_equiv
9996 && (rtx_cost (const_equiv->loc, SET)
9997 <= rtx_cost (best_equiv->loc, SET)))
9999 best_equiv = const_equiv;
10000 const_equiv = 0;
10003 /* If best_equiv is nonzero, we know that MEM is set to a
10004 constant or register before the loop. We will use this
10005 knowledge to initialize the shadow register with that
10006 constant or reg rather than by loading from MEM. */
10007 if (best_equiv)
10008 best = copy_rtx (best_equiv->loc);
10011 set = gen_move_insn (reg, best);
10012 set = loop_insn_hoist (loop, set);
10013 if (REG_P (best))
10015 for (p = prev_ebb_head; p != loop->start; p = NEXT_INSN (p))
10016 if (REGNO_LAST_UID (REGNO (best)) == INSN_UID (p))
10018 REGNO_LAST_UID (REGNO (best)) = INSN_UID (set);
10019 break;
10023 if (const_equiv)
10024 set_unique_reg_note (set, REG_EQUAL, copy_rtx (const_equiv->loc));
10026 if (written)
10028 if (label == NULL_RTX)
10030 label = gen_label_rtx ();
10031 emit_label_after (label, loop->end);
10034 /* Store the memory immediately after END, which is
10035 the NOTE_LOOP_END. */
10036 set = gen_move_insn (copy_rtx (mem), reg);
10037 loop_insn_emit_after (loop, 0, label, set);
10040 if (loop_dump_stream)
10042 fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
10043 REGNO (reg), (written ? "r/w" : "r/o"));
10044 print_rtl (loop_dump_stream, mem);
10045 fputc ('\n', loop_dump_stream);
10048 /* Attempt a bit of copy propagation. This helps untangle the
10049 data flow, and enables {basic,general}_induction_var to find
10050 more bivs/givs. */
10051 EXECUTE_IF_SET_IN_REG_SET
10052 (&load_copies, FIRST_PSEUDO_REGISTER, j,
10054 try_copy_prop (loop, reg, j);
10056 CLEAR_REG_SET (&load_copies);
10058 EXECUTE_IF_SET_IN_REG_SET
10059 (&store_copies, FIRST_PSEUDO_REGISTER, j,
10061 try_swap_copy_prop (loop, reg, j);
10063 CLEAR_REG_SET (&store_copies);
10067 /* Now, we need to replace all references to the previous exit
10068 label with the new one. */
10069 if (label != NULL_RTX && end_label != NULL_RTX)
10070 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
10071 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == end_label)
10072 redirect_jump (p, label, false);
10074 cselib_finish ();
10077 /* For communication between note_reg_stored and its caller. */
10078 struct note_reg_stored_arg
10080 int set_seen;
10081 rtx reg;
10084 /* Called via note_stores, record in SET_SEEN whether X, which is written,
10085 is equal to ARG. */
10086 static void
10087 note_reg_stored (rtx x, rtx setter ATTRIBUTE_UNUSED, void *arg)
10089 struct note_reg_stored_arg *t = (struct note_reg_stored_arg *) arg;
10090 if (t->reg == x)
10091 t->set_seen = 1;
10094 /* Try to replace every occurrence of pseudo REGNO with REPLACEMENT.
10095 There must be exactly one insn that sets this pseudo; it will be
10096 deleted if all replacements succeed and we can prove that the register
10097 is not used after the loop. */
10099 static void
10100 try_copy_prop (const struct loop *loop, rtx replacement, unsigned int regno)
10102 /* This is the reg that we are copying from. */
10103 rtx reg_rtx = regno_reg_rtx[regno];
10104 rtx init_insn = 0;
10105 rtx insn;
10106 /* These help keep track of whether we replaced all uses of the reg. */
10107 int replaced_last = 0;
10108 int store_is_first = 0;
10110 for (insn = next_insn_in_loop (loop, loop->scan_start);
10111 insn != NULL_RTX;
10112 insn = next_insn_in_loop (loop, insn))
10114 rtx set;
10116 /* Only substitute within one extended basic block from the initializing
10117 insn. */
10118 if (GET_CODE (insn) == CODE_LABEL && init_insn)
10119 break;
10121 if (! INSN_P (insn))
10122 continue;
10124 /* Is this the initializing insn? */
10125 set = single_set (insn);
10126 if (set
10127 && REG_P (SET_DEST (set))
10128 && REGNO (SET_DEST (set)) == regno)
10130 if (init_insn)
10131 abort ();
10133 init_insn = insn;
10134 if (REGNO_FIRST_UID (regno) == INSN_UID (insn))
10135 store_is_first = 1;
10138 /* Only substitute after seeing the initializing insn. */
10139 if (init_insn && insn != init_insn)
10141 struct note_reg_stored_arg arg;
10143 replace_loop_regs (insn, reg_rtx, replacement);
10144 if (REGNO_LAST_UID (regno) == INSN_UID (insn))
10145 replaced_last = 1;
10147 /* Stop replacing when REPLACEMENT is modified. */
10148 arg.reg = replacement;
10149 arg.set_seen = 0;
10150 note_stores (PATTERN (insn), note_reg_stored, &arg);
10151 if (arg.set_seen)
10153 rtx note = find_reg_note (insn, REG_EQUAL, NULL);
10155 /* It is possible that we've turned previously valid REG_EQUAL to
10156 invalid, as we change the REGNO to REPLACEMENT and unlike REGNO,
10157 REPLACEMENT is modified, we get different meaning. */
10158 if (note && reg_mentioned_p (replacement, XEXP (note, 0)))
10159 remove_note (insn, note);
10160 break;
10164 if (! init_insn)
10165 abort ();
10166 if (apply_change_group ())
10168 if (loop_dump_stream)
10169 fprintf (loop_dump_stream, " Replaced reg %d", regno);
10170 if (store_is_first && replaced_last)
10172 rtx first;
10173 rtx retval_note;
10175 /* Assume we're just deleting INIT_INSN. */
10176 first = init_insn;
10177 /* Look for REG_RETVAL note. If we're deleting the end of
10178 the libcall sequence, the whole sequence can go. */
10179 retval_note = find_reg_note (init_insn, REG_RETVAL, NULL_RTX);
10180 /* If we found a REG_RETVAL note, find the first instruction
10181 in the sequence. */
10182 if (retval_note)
10183 first = XEXP (retval_note, 0);
10185 /* Delete the instructions. */
10186 loop_delete_insns (first, init_insn);
10188 if (loop_dump_stream)
10189 fprintf (loop_dump_stream, ".\n");
10193 /* Replace all the instructions from FIRST up to and including LAST
10194 with NOTE_INSN_DELETED notes. */
10196 static void
10197 loop_delete_insns (rtx first, rtx last)
10199 while (1)
10201 if (loop_dump_stream)
10202 fprintf (loop_dump_stream, ", deleting init_insn (%d)",
10203 INSN_UID (first));
10204 delete_insn (first);
10206 /* If this was the LAST instructions we're supposed to delete,
10207 we're done. */
10208 if (first == last)
10209 break;
10211 first = NEXT_INSN (first);
10215 /* Try to replace occurrences of pseudo REGNO with REPLACEMENT within
10216 loop LOOP if the order of the sets of these registers can be
10217 swapped. There must be exactly one insn within the loop that sets
10218 this pseudo followed immediately by a move insn that sets
10219 REPLACEMENT with REGNO. */
10220 static void
10221 try_swap_copy_prop (const struct loop *loop, rtx replacement,
10222 unsigned int regno)
10224 rtx insn;
10225 rtx set = NULL_RTX;
10226 unsigned int new_regno;
10228 new_regno = REGNO (replacement);
10230 for (insn = next_insn_in_loop (loop, loop->scan_start);
10231 insn != NULL_RTX;
10232 insn = next_insn_in_loop (loop, insn))
10234 /* Search for the insn that copies REGNO to NEW_REGNO? */
10235 if (INSN_P (insn)
10236 && (set = single_set (insn))
10237 && REG_P (SET_DEST (set))
10238 && REGNO (SET_DEST (set)) == new_regno
10239 && REG_P (SET_SRC (set))
10240 && REGNO (SET_SRC (set)) == regno)
10241 break;
10244 if (insn != NULL_RTX)
10246 rtx prev_insn;
10247 rtx prev_set;
10249 /* Some DEF-USE info would come in handy here to make this
10250 function more general. For now, just check the previous insn
10251 which is the most likely candidate for setting REGNO. */
10253 prev_insn = PREV_INSN (insn);
10255 if (INSN_P (insn)
10256 && (prev_set = single_set (prev_insn))
10257 && REG_P (SET_DEST (prev_set))
10258 && REGNO (SET_DEST (prev_set)) == regno)
10260 /* We have:
10261 (set (reg regno) (expr))
10262 (set (reg new_regno) (reg regno))
10264 so try converting this to:
10265 (set (reg new_regno) (expr))
10266 (set (reg regno) (reg new_regno))
10268 The former construct is often generated when a global
10269 variable used for an induction variable is shadowed by a
10270 register (NEW_REGNO). The latter construct improves the
10271 chances of GIV replacement and BIV elimination. */
10273 validate_change (prev_insn, &SET_DEST (prev_set),
10274 replacement, 1);
10275 validate_change (insn, &SET_DEST (set),
10276 SET_SRC (set), 1);
10277 validate_change (insn, &SET_SRC (set),
10278 replacement, 1);
10280 if (apply_change_group ())
10282 if (loop_dump_stream)
10283 fprintf (loop_dump_stream,
10284 " Swapped set of reg %d at %d with reg %d at %d.\n",
10285 regno, INSN_UID (insn),
10286 new_regno, INSN_UID (prev_insn));
10288 /* Update first use of REGNO. */
10289 if (REGNO_FIRST_UID (regno) == INSN_UID (prev_insn))
10290 REGNO_FIRST_UID (regno) = INSN_UID (insn);
10292 /* Now perform copy propagation to hopefully
10293 remove all uses of REGNO within the loop. */
10294 try_copy_prop (loop, replacement, regno);
10300 /* Worker function for find_mem_in_note, called via for_each_rtx. */
10302 static int
10303 find_mem_in_note_1 (rtx *x, void *data)
10305 if (*x != NULL_RTX && MEM_P (*x))
10307 rtx *res = (rtx *) data;
10308 *res = *x;
10309 return 1;
10311 return 0;
10314 /* Returns the first MEM found in NOTE by depth-first search. */
10316 static rtx
10317 find_mem_in_note (rtx note)
10319 if (note && for_each_rtx (&note, find_mem_in_note_1, &note))
10320 return note;
10321 return NULL_RTX;
10324 /* Replace MEM with its associated pseudo register. This function is
10325 called from load_mems via for_each_rtx. DATA is actually a pointer
10326 to a structure describing the instruction currently being scanned
10327 and the MEM we are currently replacing. */
10329 static int
10330 replace_loop_mem (rtx *mem, void *data)
10332 loop_replace_args *args = (loop_replace_args *) data;
10333 rtx m = *mem;
10335 if (m == NULL_RTX)
10336 return 0;
10338 switch (GET_CODE (m))
10340 case MEM:
10341 break;
10343 case CONST_DOUBLE:
10344 /* We're not interested in the MEM associated with a
10345 CONST_DOUBLE, so there's no need to traverse into one. */
10346 return -1;
10348 default:
10349 /* This is not a MEM. */
10350 return 0;
10353 if (!rtx_equal_p (args->match, m))
10354 /* This is not the MEM we are currently replacing. */
10355 return 0;
10357 /* Actually replace the MEM. */
10358 validate_change (args->insn, mem, args->replacement, 1);
10360 return 0;
10363 static void
10364 replace_loop_mems (rtx insn, rtx mem, rtx reg, int written)
10366 loop_replace_args args;
10368 args.insn = insn;
10369 args.match = mem;
10370 args.replacement = reg;
10372 for_each_rtx (&insn, replace_loop_mem, &args);
10374 /* If we hoist a mem write out of the loop, then REG_EQUAL
10375 notes referring to the mem are no longer valid. */
10376 if (written)
10378 rtx note, sub;
10379 rtx *link;
10381 for (link = &REG_NOTES (insn); (note = *link); link = &XEXP (note, 1))
10383 if (REG_NOTE_KIND (note) == REG_EQUAL
10384 && (sub = find_mem_in_note (note))
10385 && true_dependence (mem, VOIDmode, sub, rtx_varies_p))
10387 /* Remove the note. */
10388 validate_change (NULL_RTX, link, XEXP (note, 1), 1);
10389 break;
10395 /* Replace one register with another. Called through for_each_rtx; PX points
10396 to the rtx being scanned. DATA is actually a pointer to
10397 a structure of arguments. */
10399 static int
10400 replace_loop_reg (rtx *px, void *data)
10402 rtx x = *px;
10403 loop_replace_args *args = (loop_replace_args *) data;
10405 if (x == NULL_RTX)
10406 return 0;
10408 if (x == args->match)
10409 validate_change (args->insn, px, args->replacement, 1);
10411 return 0;
10414 static void
10415 replace_loop_regs (rtx insn, rtx reg, rtx replacement)
10417 loop_replace_args args;
10419 args.insn = insn;
10420 args.match = reg;
10421 args.replacement = replacement;
10423 for_each_rtx (&insn, replace_loop_reg, &args);
10426 /* Emit insn for PATTERN after WHERE_INSN in basic block WHERE_BB
10427 (ignored in the interim). */
10429 static rtx
10430 loop_insn_emit_after (const struct loop *loop ATTRIBUTE_UNUSED,
10431 basic_block where_bb ATTRIBUTE_UNUSED, rtx where_insn,
10432 rtx pattern)
10434 return emit_insn_after (pattern, where_insn);
10438 /* If WHERE_INSN is nonzero emit insn for PATTERN before WHERE_INSN
10439 in basic block WHERE_BB (ignored in the interim) within the loop
10440 otherwise hoist PATTERN into the loop pre-header. */
10443 loop_insn_emit_before (const struct loop *loop,
10444 basic_block where_bb ATTRIBUTE_UNUSED,
10445 rtx where_insn, rtx pattern)
10447 if (! where_insn)
10448 return loop_insn_hoist (loop, pattern);
10449 return emit_insn_before (pattern, where_insn);
10453 /* Emit call insn for PATTERN before WHERE_INSN in basic block
10454 WHERE_BB (ignored in the interim) within the loop. */
10456 static rtx
10457 loop_call_insn_emit_before (const struct loop *loop ATTRIBUTE_UNUSED,
10458 basic_block where_bb ATTRIBUTE_UNUSED,
10459 rtx where_insn, rtx pattern)
10461 return emit_call_insn_before (pattern, where_insn);
10465 /* Hoist insn for PATTERN into the loop pre-header. */
10468 loop_insn_hoist (const struct loop *loop, rtx pattern)
10470 return loop_insn_emit_before (loop, 0, loop->start, pattern);
10474 /* Hoist call insn for PATTERN into the loop pre-header. */
10476 static rtx
10477 loop_call_insn_hoist (const struct loop *loop, rtx pattern)
10479 return loop_call_insn_emit_before (loop, 0, loop->start, pattern);
10483 /* Sink insn for PATTERN after the loop end. */
10486 loop_insn_sink (const struct loop *loop, rtx pattern)
10488 return loop_insn_emit_before (loop, 0, loop->sink, pattern);
10491 /* bl->final_value can be either general_operand or PLUS of general_operand
10492 and constant. Emit sequence of instructions to load it into REG. */
10493 static rtx
10494 gen_load_of_final_value (rtx reg, rtx final_value)
10496 rtx seq;
10497 start_sequence ();
10498 final_value = force_operand (final_value, reg);
10499 if (final_value != reg)
10500 emit_move_insn (reg, final_value);
10501 seq = get_insns ();
10502 end_sequence ();
10503 return seq;
10506 /* If the loop has multiple exits, emit insn for PATTERN before the
10507 loop to ensure that it will always be executed no matter how the
10508 loop exits. Otherwise, emit the insn for PATTERN after the loop,
10509 since this is slightly more efficient. */
10511 static rtx
10512 loop_insn_sink_or_swim (const struct loop *loop, rtx pattern)
10514 if (loop->exit_count)
10515 return loop_insn_hoist (loop, pattern);
10516 else
10517 return loop_insn_sink (loop, pattern);
10520 static void
10521 loop_ivs_dump (const struct loop *loop, FILE *file, int verbose)
10523 struct iv_class *bl;
10524 int iv_num = 0;
10526 if (! loop || ! file)
10527 return;
10529 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10530 iv_num++;
10532 fprintf (file, "Loop %d: %d IV classes\n", loop->num, iv_num);
10534 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10536 loop_iv_class_dump (bl, file, verbose);
10537 fputc ('\n', file);
10542 static void
10543 loop_iv_class_dump (const struct iv_class *bl, FILE *file,
10544 int verbose ATTRIBUTE_UNUSED)
10546 struct induction *v;
10547 rtx incr;
10548 int i;
10550 if (! bl || ! file)
10551 return;
10553 fprintf (file, "IV class for reg %d, benefit %d\n",
10554 bl->regno, bl->total_benefit);
10556 fprintf (file, " Init insn %d", INSN_UID (bl->init_insn));
10557 if (bl->initial_value)
10559 fprintf (file, ", init val: ");
10560 print_simple_rtl (file, bl->initial_value);
10562 if (bl->initial_test)
10564 fprintf (file, ", init test: ");
10565 print_simple_rtl (file, bl->initial_test);
10567 fputc ('\n', file);
10569 if (bl->final_value)
10571 fprintf (file, " Final val: ");
10572 print_simple_rtl (file, bl->final_value);
10573 fputc ('\n', file);
10576 if ((incr = biv_total_increment (bl)))
10578 fprintf (file, " Total increment: ");
10579 print_simple_rtl (file, incr);
10580 fputc ('\n', file);
10583 /* List the increments. */
10584 for (i = 0, v = bl->biv; v; v = v->next_iv, i++)
10586 fprintf (file, " Inc%d: insn %d, incr: ", i, INSN_UID (v->insn));
10587 print_simple_rtl (file, v->add_val);
10588 fputc ('\n', file);
10591 /* List the givs. */
10592 for (i = 0, v = bl->giv; v; v = v->next_iv, i++)
10594 fprintf (file, " Giv%d: insn %d, benefit %d, ",
10595 i, INSN_UID (v->insn), v->benefit);
10596 if (v->giv_type == DEST_ADDR)
10597 print_simple_rtl (file, v->mem);
10598 else
10599 print_simple_rtl (file, single_set (v->insn));
10600 fputc ('\n', file);
10605 static void
10606 loop_biv_dump (const struct induction *v, FILE *file, int verbose)
10608 if (! v || ! file)
10609 return;
10611 fprintf (file,
10612 "Biv %d: insn %d",
10613 REGNO (v->dest_reg), INSN_UID (v->insn));
10614 fprintf (file, " const ");
10615 print_simple_rtl (file, v->add_val);
10617 if (verbose && v->final_value)
10619 fputc ('\n', file);
10620 fprintf (file, " final ");
10621 print_simple_rtl (file, v->final_value);
10624 fputc ('\n', file);
10628 static void
10629 loop_giv_dump (const struct induction *v, FILE *file, int verbose)
10631 if (! v || ! file)
10632 return;
10634 if (v->giv_type == DEST_REG)
10635 fprintf (file, "Giv %d: insn %d",
10636 REGNO (v->dest_reg), INSN_UID (v->insn));
10637 else
10638 fprintf (file, "Dest address: insn %d",
10639 INSN_UID (v->insn));
10641 fprintf (file, " src reg %d benefit %d",
10642 REGNO (v->src_reg), v->benefit);
10643 fprintf (file, " lifetime %d",
10644 v->lifetime);
10646 if (v->replaceable)
10647 fprintf (file, " replaceable");
10649 if (v->no_const_addval)
10650 fprintf (file, " ncav");
10652 if (v->ext_dependent)
10654 switch (GET_CODE (v->ext_dependent))
10656 case SIGN_EXTEND:
10657 fprintf (file, " ext se");
10658 break;
10659 case ZERO_EXTEND:
10660 fprintf (file, " ext ze");
10661 break;
10662 case TRUNCATE:
10663 fprintf (file, " ext tr");
10664 break;
10665 default:
10666 abort ();
10670 fputc ('\n', file);
10671 fprintf (file, " mult ");
10672 print_simple_rtl (file, v->mult_val);
10674 fputc ('\n', file);
10675 fprintf (file, " add ");
10676 print_simple_rtl (file, v->add_val);
10678 if (verbose && v->final_value)
10680 fputc ('\n', file);
10681 fprintf (file, " final ");
10682 print_simple_rtl (file, v->final_value);
10685 fputc ('\n', file);
10689 void
10690 debug_ivs (const struct loop *loop)
10692 loop_ivs_dump (loop, stderr, 1);
10696 void
10697 debug_iv_class (const struct iv_class *bl)
10699 loop_iv_class_dump (bl, stderr, 1);
10703 void
10704 debug_biv (const struct induction *v)
10706 loop_biv_dump (v, stderr, 1);
10710 void
10711 debug_giv (const struct induction *v)
10713 loop_giv_dump (v, stderr, 1);
10717 #define LOOP_BLOCK_NUM_1(INSN) \
10718 ((INSN) ? (BLOCK_FOR_INSN (INSN) ? BLOCK_NUM (INSN) : - 1) : -1)
10720 /* The notes do not have an assigned block, so look at the next insn. */
10721 #define LOOP_BLOCK_NUM(INSN) \
10722 ((INSN) ? (GET_CODE (INSN) == NOTE \
10723 ? LOOP_BLOCK_NUM_1 (next_nonnote_insn (INSN)) \
10724 : LOOP_BLOCK_NUM_1 (INSN)) \
10725 : -1)
10727 #define LOOP_INSN_UID(INSN) ((INSN) ? INSN_UID (INSN) : -1)
10729 static void
10730 loop_dump_aux (const struct loop *loop, FILE *file,
10731 int verbose ATTRIBUTE_UNUSED)
10733 rtx label;
10735 if (! loop || ! file)
10736 return;
10738 /* Print diagnostics to compare our concept of a loop with
10739 what the loop notes say. */
10740 if (! PREV_INSN (BB_HEAD (loop->first))
10741 || GET_CODE (PREV_INSN (BB_HEAD (loop->first))) != NOTE
10742 || NOTE_LINE_NUMBER (PREV_INSN (BB_HEAD (loop->first)))
10743 != NOTE_INSN_LOOP_BEG)
10744 fprintf (file, ";; No NOTE_INSN_LOOP_BEG at %d\n",
10745 INSN_UID (PREV_INSN (BB_HEAD (loop->first))));
10746 if (! NEXT_INSN (BB_END (loop->last))
10747 || GET_CODE (NEXT_INSN (BB_END (loop->last))) != NOTE
10748 || NOTE_LINE_NUMBER (NEXT_INSN (BB_END (loop->last)))
10749 != NOTE_INSN_LOOP_END)
10750 fprintf (file, ";; No NOTE_INSN_LOOP_END at %d\n",
10751 INSN_UID (NEXT_INSN (BB_END (loop->last))));
10753 if (loop->start)
10755 fprintf (file,
10756 ";; start %d (%d), cont dom %d (%d), cont %d (%d), vtop %d (%d), end %d (%d)\n",
10757 LOOP_BLOCK_NUM (loop->start),
10758 LOOP_INSN_UID (loop->start),
10759 LOOP_BLOCK_NUM (loop->cont),
10760 LOOP_INSN_UID (loop->cont),
10761 LOOP_BLOCK_NUM (loop->cont),
10762 LOOP_INSN_UID (loop->cont),
10763 LOOP_BLOCK_NUM (loop->vtop),
10764 LOOP_INSN_UID (loop->vtop),
10765 LOOP_BLOCK_NUM (loop->end),
10766 LOOP_INSN_UID (loop->end));
10767 fprintf (file, ";; top %d (%d), scan start %d (%d)\n",
10768 LOOP_BLOCK_NUM (loop->top),
10769 LOOP_INSN_UID (loop->top),
10770 LOOP_BLOCK_NUM (loop->scan_start),
10771 LOOP_INSN_UID (loop->scan_start));
10772 fprintf (file, ";; exit_count %d", loop->exit_count);
10773 if (loop->exit_count)
10775 fputs (", labels:", file);
10776 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
10778 fprintf (file, " %d ",
10779 LOOP_INSN_UID (XEXP (label, 0)));
10782 fputs ("\n", file);
10784 /* This can happen when a marked loop appears as two nested loops,
10785 say from while (a || b) {}. The inner loop won't match
10786 the loop markers but the outer one will. */
10787 if (LOOP_BLOCK_NUM (loop->cont) != loop->latch->index)
10788 fprintf (file, ";; NOTE_INSN_LOOP_CONT not in loop latch\n");
10792 /* Call this function from the debugger to dump LOOP. */
10794 void
10795 debug_loop (const struct loop *loop)
10797 flow_loop_dump (loop, stderr, loop_dump_aux, 1);
10800 /* Call this function from the debugger to dump LOOPS. */
10802 void
10803 debug_loops (const struct loops *loops)
10805 flow_loops_dump (loops, stderr, loop_dump_aux, 1);