PR c++/3478
[official-gcc.git] / gcc / loop.c
blobc0cf160a868f19e229e8b00901142c5e4cc01d4c
1 /* Perform various loop optimizations, including strength reduction.
2 Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
3 1998, 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* This is the loop optimization pass of the compiler.
23 It finds invariant computations within loops and moves them
24 to the beginning of the loop. Then it identifies basic and
25 general induction variables.
27 Basic induction variables (BIVs) are a pseudo registers which are set within
28 a loop only by incrementing or decrementing its value. General induction
29 variables (GIVs) are pseudo registers with a value which is a linear function
30 of a basic induction variable. BIVs are recognized by `basic_induction_var';
31 GIVs by `general_induction_var'.
33 Once induction variables are identified, strength reduction is applied to the
34 general induction variables, and induction variable elimination is applied to
35 the basic induction variables.
37 It also finds cases where
38 a register is set within the loop by zero-extending a narrower value
39 and changes these to zero the entire register once before the loop
40 and merely copy the low part within the loop.
42 Most of the complexity is in heuristics to decide when it is worth
43 while to do these things. */
45 #include "config.h"
46 #include "system.h"
47 #include "coretypes.h"
48 #include "tm.h"
49 #include "rtl.h"
50 #include "tm_p.h"
51 #include "function.h"
52 #include "expr.h"
53 #include "hard-reg-set.h"
54 #include "basic-block.h"
55 #include "insn-config.h"
56 #include "regs.h"
57 #include "recog.h"
58 #include "flags.h"
59 #include "real.h"
60 #include "loop.h"
61 #include "cselib.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "predict.h"
65 #include "insn-flags.h"
66 #include "optabs.h"
67 #include "cfgloop.h"
69 /* Not really meaningful values, but at least something. */
70 #ifndef SIMULTANEOUS_PREFETCHES
71 #define SIMULTANEOUS_PREFETCHES 3
72 #endif
73 #ifndef PREFETCH_BLOCK
74 #define PREFETCH_BLOCK 32
75 #endif
76 #ifndef HAVE_prefetch
77 #define HAVE_prefetch 0
78 #define CODE_FOR_prefetch 0
79 #define gen_prefetch(a,b,c) (abort(), NULL_RTX)
80 #endif
82 /* Give up the prefetch optimizations once we exceed a given threshold.
83 It is unlikely that we would be able to optimize something in a loop
84 with so many detected prefetches. */
85 #define MAX_PREFETCHES 100
86 /* The number of prefetch blocks that are beneficial to fetch at once before
87 a loop with a known (and low) iteration count. */
88 #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
89 /* For very tiny loops it is not worthwhile to prefetch even before the loop,
90 since it is likely that the data are already in the cache. */
91 #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
93 /* Parameterize some prefetch heuristics so they can be turned on and off
94 easily for performance testing on new architectures. These can be
95 defined in target-dependent files. */
97 /* Prefetch is worthwhile only when loads/stores are dense. */
98 #ifndef PREFETCH_ONLY_DENSE_MEM
99 #define PREFETCH_ONLY_DENSE_MEM 1
100 #endif
102 /* Define what we mean by "dense" loads and stores; This value divided by 256
103 is the minimum percentage of memory references that worth prefetching. */
104 #ifndef PREFETCH_DENSE_MEM
105 #define PREFETCH_DENSE_MEM 220
106 #endif
108 /* Do not prefetch for a loop whose iteration count is known to be low. */
109 #ifndef PREFETCH_NO_LOW_LOOPCNT
110 #define PREFETCH_NO_LOW_LOOPCNT 1
111 #endif
113 /* Define what we mean by a "low" iteration count. */
114 #ifndef PREFETCH_LOW_LOOPCNT
115 #define PREFETCH_LOW_LOOPCNT 32
116 #endif
118 /* Do not prefetch for a loop that contains a function call; such a loop is
119 probably not an internal loop. */
120 #ifndef PREFETCH_NO_CALL
121 #define PREFETCH_NO_CALL 1
122 #endif
124 /* Do not prefetch accesses with an extreme stride. */
125 #ifndef PREFETCH_NO_EXTREME_STRIDE
126 #define PREFETCH_NO_EXTREME_STRIDE 1
127 #endif
129 /* Define what we mean by an "extreme" stride. */
130 #ifndef PREFETCH_EXTREME_STRIDE
131 #define PREFETCH_EXTREME_STRIDE 4096
132 #endif
134 /* Define a limit to how far apart indices can be and still be merged
135 into a single prefetch. */
136 #ifndef PREFETCH_EXTREME_DIFFERENCE
137 #define PREFETCH_EXTREME_DIFFERENCE 4096
138 #endif
140 /* Issue prefetch instructions before the loop to fetch data to be used
141 in the first few loop iterations. */
142 #ifndef PREFETCH_BEFORE_LOOP
143 #define PREFETCH_BEFORE_LOOP 1
144 #endif
146 /* Do not handle reversed order prefetches (negative stride). */
147 #ifndef PREFETCH_NO_REVERSE_ORDER
148 #define PREFETCH_NO_REVERSE_ORDER 1
149 #endif
151 /* Prefetch even if the GIV is in conditional code. */
152 #ifndef PREFETCH_CONDITIONAL
153 #define PREFETCH_CONDITIONAL 1
154 #endif
156 #define LOOP_REG_LIFETIME(LOOP, REGNO) \
157 ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
159 #define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
160 ((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
161 || REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
163 #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
164 ((REGNO) < FIRST_PSEUDO_REGISTER \
165 ? (int) HARD_REGNO_NREGS ((REGNO), GET_MODE (SET_DEST)) : 1)
168 /* Vector mapping INSN_UIDs to luids.
169 The luids are like uids but increase monotonically always.
170 We use them to see whether a jump comes from outside a given loop. */
172 int *uid_luid;
174 /* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
175 number the insn is contained in. */
177 struct loop **uid_loop;
179 /* 1 + largest uid of any insn. */
181 int max_uid_for_loop;
183 /* Number of loops detected in current function. Used as index to the
184 next few tables. */
186 static int max_loop_num;
188 /* Bound on pseudo register number before loop optimization.
189 A pseudo has valid regscan info if its number is < max_reg_before_loop. */
190 unsigned int max_reg_before_loop;
192 /* The value to pass to the next call of reg_scan_update. */
193 static int loop_max_reg;
195 /* During the analysis of a loop, a chain of `struct movable's
196 is made to record all the movable insns found.
197 Then the entire chain can be scanned to decide which to move. */
199 struct movable
201 rtx insn; /* A movable insn */
202 rtx set_src; /* The expression this reg is set from. */
203 rtx set_dest; /* The destination of this SET. */
204 rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
205 of any registers used within the LIBCALL. */
206 int consec; /* Number of consecutive following insns
207 that must be moved with this one. */
208 unsigned int regno; /* The register it sets */
209 short lifetime; /* lifetime of that register;
210 may be adjusted when matching movables
211 that load the same value are found. */
212 short savings; /* Number of insns we can move for this reg,
213 including other movables that force this
214 or match this one. */
215 ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for
216 a low part that we should avoid changing when
217 clearing the rest of the reg. */
218 unsigned int cond : 1; /* 1 if only conditionally movable */
219 unsigned int force : 1; /* 1 means MUST move this insn */
220 unsigned int global : 1; /* 1 means reg is live outside this loop */
221 /* If PARTIAL is 1, GLOBAL means something different:
222 that the reg is live outside the range from where it is set
223 to the following label. */
224 unsigned int done : 1; /* 1 inhibits further processing of this */
226 unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
227 In particular, moving it does not make it
228 invariant. */
229 unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
230 load SRC, rather than copying INSN. */
231 unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
232 first insn of a consecutive sets group. */
233 unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
234 unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace
235 the original insn with a copy from that
236 pseudo, rather than deleting it. */
237 struct movable *match; /* First entry for same value */
238 struct movable *forces; /* An insn that must be moved if this is */
239 struct movable *next;
243 FILE *loop_dump_stream;
245 /* Forward declarations. */
247 static void invalidate_loops_containing_label (rtx);
248 static void find_and_verify_loops (rtx, struct loops *);
249 static void mark_loop_jump (rtx, struct loop *);
250 static void prescan_loop (struct loop *);
251 static int reg_in_basic_block_p (rtx, rtx);
252 static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx);
253 static int labels_in_range_p (rtx, int);
254 static void count_one_set (struct loop_regs *, rtx, rtx, rtx *);
255 static void note_addr_stored (rtx, rtx, void *);
256 static void note_set_pseudo_multiple_uses (rtx, rtx, void *);
257 static int loop_reg_used_before_p (const struct loop *, rtx, rtx);
258 static rtx find_regs_nested (rtx, rtx);
259 static void scan_loop (struct loop*, int);
260 #if 0
261 static void replace_call_address (rtx, rtx, rtx);
262 #endif
263 static rtx skip_consec_insns (rtx, int);
264 static int libcall_benefit (rtx);
265 static void ignore_some_movables (struct loop_movables *);
266 static void force_movables (struct loop_movables *);
267 static void combine_movables (struct loop_movables *, struct loop_regs *);
268 static int num_unmoved_movables (const struct loop *);
269 static int regs_match_p (rtx, rtx, struct loop_movables *);
270 static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *,
271 struct loop_regs *);
272 static void add_label_notes (rtx, rtx);
273 static void move_movables (struct loop *loop, struct loop_movables *, int,
274 int);
275 static void loop_movables_add (struct loop_movables *, struct movable *);
276 static void loop_movables_free (struct loop_movables *);
277 static int count_nonfixed_reads (const struct loop *, rtx);
278 static void loop_bivs_find (struct loop *);
279 static void loop_bivs_init_find (struct loop *);
280 static void loop_bivs_check (struct loop *);
281 static void loop_givs_find (struct loop *);
282 static void loop_givs_check (struct loop *);
283 static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int);
284 static int loop_giv_reduce_benefit (struct loop *, struct iv_class *,
285 struct induction *, rtx);
286 static void loop_givs_dead_check (struct loop *, struct iv_class *);
287 static void loop_givs_reduce (struct loop *, struct iv_class *);
288 static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *);
289 static void loop_ivs_free (struct loop *);
290 static void strength_reduce (struct loop *, int);
291 static void find_single_use_in_loop (struct loop_regs *, rtx, rtx);
292 static int valid_initial_value_p (rtx, rtx, int, rtx);
293 static void find_mem_givs (const struct loop *, rtx, rtx, int, int);
294 static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx,
295 rtx, rtx *, int, int);
296 static void check_final_value (const struct loop *, struct induction *);
297 static void loop_ivs_dump (const struct loop *, FILE *, int);
298 static void loop_iv_class_dump (const struct iv_class *, FILE *, int);
299 static void loop_biv_dump (const struct induction *, FILE *, int);
300 static void loop_giv_dump (const struct induction *, FILE *, int);
301 static void record_giv (const struct loop *, struct induction *, rtx, rtx,
302 rtx, rtx, rtx, rtx, int, enum g_types, int, int,
303 rtx *);
304 static void update_giv_derive (const struct loop *, rtx);
305 static void check_ext_dependent_givs (const struct loop *, struct iv_class *);
306 static int basic_induction_var (const struct loop *, rtx, enum machine_mode,
307 rtx, rtx, rtx *, rtx *, rtx **);
308 static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *);
309 static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *,
310 rtx *, rtx *, int, int *, enum machine_mode);
311 static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *,
312 rtx *, rtx *, rtx *);
313 static int check_dbra_loop (struct loop *, int);
314 static rtx express_from_1 (rtx, rtx, rtx);
315 static rtx combine_givs_p (struct induction *, struct induction *);
316 static int cmp_combine_givs_stats (const void *, const void *);
317 static void combine_givs (struct loop_regs *, struct iv_class *);
318 static int product_cheap_p (rtx, rtx);
319 static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int,
320 int, int);
321 static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx,
322 struct iv_class *, int, basic_block, rtx);
323 static int last_use_this_basic_block (rtx, rtx);
324 static void record_initial (rtx, rtx, void *);
325 static void update_reg_last_use (rtx, rtx);
326 static rtx next_insn_in_loop (const struct loop *, rtx);
327 static void loop_regs_scan (const struct loop *, int);
328 static int count_insns_in_loop (const struct loop *);
329 static int find_mem_in_note_1 (rtx *, void *);
330 static rtx find_mem_in_note (rtx);
331 static void load_mems (const struct loop *);
332 static int insert_loop_mem (rtx *, void *);
333 static int replace_loop_mem (rtx *, void *);
334 static void replace_loop_mems (rtx, rtx, rtx, int);
335 static int replace_loop_reg (rtx *, void *);
336 static void replace_loop_regs (rtx insn, rtx, rtx);
337 static void note_reg_stored (rtx, rtx, void *);
338 static void try_copy_prop (const struct loop *, rtx, unsigned int);
339 static void try_swap_copy_prop (const struct loop *, rtx, unsigned int);
340 static rtx check_insn_for_givs (struct loop *, rtx, int, int);
341 static rtx check_insn_for_bivs (struct loop *, rtx, int, int);
342 static rtx gen_add_mult (rtx, rtx, rtx, rtx);
343 static void loop_regs_update (const struct loop *, rtx);
344 static int iv_add_mult_cost (rtx, rtx, rtx, rtx);
346 static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx);
347 static rtx loop_call_insn_emit_before (const struct loop *, basic_block,
348 rtx, rtx);
349 static rtx loop_call_insn_hoist (const struct loop *, rtx);
350 static rtx loop_insn_sink_or_swim (const struct loop *, rtx);
352 static void loop_dump_aux (const struct loop *, FILE *, int);
353 static void loop_delete_insns (rtx, rtx);
354 static HOST_WIDE_INT remove_constant_addition (rtx *);
355 static rtx gen_load_of_final_value (rtx, rtx);
356 void debug_ivs (const struct loop *);
357 void debug_iv_class (const struct iv_class *);
358 void debug_biv (const struct induction *);
359 void debug_giv (const struct induction *);
360 void debug_loop (const struct loop *);
361 void debug_loops (const struct loops *);
363 typedef struct loop_replace_args
365 rtx match;
366 rtx replacement;
367 rtx insn;
368 } loop_replace_args;
370 /* Nonzero iff INSN is between START and END, inclusive. */
371 #define INSN_IN_RANGE_P(INSN, START, END) \
372 (INSN_UID (INSN) < max_uid_for_loop \
373 && INSN_LUID (INSN) >= INSN_LUID (START) \
374 && INSN_LUID (INSN) <= INSN_LUID (END))
376 /* Indirect_jump_in_function is computed once per function. */
377 static int indirect_jump_in_function;
378 static int indirect_jump_in_function_p (rtx);
380 static int compute_luids (rtx, rtx, int);
382 static int biv_elimination_giv_has_0_offset (struct induction *,
383 struct induction *, rtx);
385 /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
386 copy the value of the strength reduced giv to its original register. */
387 static int copy_cost;
389 /* Cost of using a register, to normalize the benefits of a giv. */
390 static int reg_address_cost;
392 void
393 init_loop (void)
395 rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
397 reg_address_cost = address_cost (reg, SImode);
399 copy_cost = COSTS_N_INSNS (1);
402 /* Compute the mapping from uids to luids.
403 LUIDs are numbers assigned to insns, like uids,
404 except that luids increase monotonically through the code.
405 Start at insn START and stop just before END. Assign LUIDs
406 starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
407 static int
408 compute_luids (rtx start, rtx end, int prev_luid)
410 int i;
411 rtx insn;
413 for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
415 if (INSN_UID (insn) >= max_uid_for_loop)
416 continue;
417 /* Don't assign luids to line-number NOTEs, so that the distance in
418 luids between two insns is not affected by -g. */
419 if (GET_CODE (insn) != NOTE
420 || NOTE_LINE_NUMBER (insn) <= 0)
421 uid_luid[INSN_UID (insn)] = ++i;
422 else
423 /* Give a line number note the same luid as preceding insn. */
424 uid_luid[INSN_UID (insn)] = i;
426 return i + 1;
429 /* Entry point of this file. Perform loop optimization
430 on the current function. F is the first insn of the function
431 and DUMPFILE is a stream for output of a trace of actions taken
432 (or 0 if none should be output). */
434 void
435 loop_optimize (rtx f, FILE *dumpfile, int flags)
437 rtx insn;
438 int i;
439 struct loops loops_data;
440 struct loops *loops = &loops_data;
441 struct loop_info *loops_info;
443 loop_dump_stream = dumpfile;
445 init_recog_no_volatile ();
447 max_reg_before_loop = max_reg_num ();
448 loop_max_reg = max_reg_before_loop;
450 regs_may_share = 0;
452 /* Count the number of loops. */
454 max_loop_num = 0;
455 for (insn = f; insn; insn = NEXT_INSN (insn))
457 if (GET_CODE (insn) == NOTE
458 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
459 max_loop_num++;
462 /* Don't waste time if no loops. */
463 if (max_loop_num == 0)
464 return;
466 loops->num = max_loop_num;
468 /* Get size to use for tables indexed by uids.
469 Leave some space for labels allocated by find_and_verify_loops. */
470 max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
472 uid_luid = xcalloc (max_uid_for_loop, sizeof (int));
473 uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *));
475 /* Allocate storage for array of loops. */
476 loops->array = xcalloc (loops->num, sizeof (struct loop));
478 /* Find and process each loop.
479 First, find them, and record them in order of their beginnings. */
480 find_and_verify_loops (f, loops);
482 /* Allocate and initialize auxiliary loop information. */
483 loops_info = xcalloc (loops->num, sizeof (struct loop_info));
484 for (i = 0; i < (int) loops->num; i++)
485 loops->array[i].aux = loops_info + i;
487 /* Now find all register lifetimes. This must be done after
488 find_and_verify_loops, because it might reorder the insns in the
489 function. */
490 reg_scan (f, max_reg_before_loop, 1);
492 /* This must occur after reg_scan so that registers created by gcse
493 will have entries in the register tables.
495 We could have added a call to reg_scan after gcse_main in toplev.c,
496 but moving this call to init_alias_analysis is more efficient. */
497 init_alias_analysis ();
499 /* See if we went too far. Note that get_max_uid already returns
500 one more that the maximum uid of all insn. */
501 if (get_max_uid () > max_uid_for_loop)
502 abort ();
503 /* Now reset it to the actual size we need. See above. */
504 max_uid_for_loop = get_max_uid ();
506 /* find_and_verify_loops has already called compute_luids, but it
507 might have rearranged code afterwards, so we need to recompute
508 the luids now. */
509 compute_luids (f, NULL_RTX, 0);
511 /* Don't leave gaps in uid_luid for insns that have been
512 deleted. It is possible that the first or last insn
513 using some register has been deleted by cross-jumping.
514 Make sure that uid_luid for that former insn's uid
515 points to the general area where that insn used to be. */
516 for (i = 0; i < max_uid_for_loop; i++)
518 uid_luid[0] = uid_luid[i];
519 if (uid_luid[0] != 0)
520 break;
522 for (i = 0; i < max_uid_for_loop; i++)
523 if (uid_luid[i] == 0)
524 uid_luid[i] = uid_luid[i - 1];
526 /* Determine if the function has indirect jump. On some systems
527 this prevents low overhead loop instructions from being used. */
528 indirect_jump_in_function = indirect_jump_in_function_p (f);
530 /* Now scan the loops, last ones first, since this means inner ones are done
531 before outer ones. */
532 for (i = max_loop_num - 1; i >= 0; i--)
534 struct loop *loop = &loops->array[i];
536 if (! loop->invalid && loop->end)
537 scan_loop (loop, flags);
540 end_alias_analysis ();
542 /* Clean up. */
543 for (i = 0; i < (int) loops->num; i++)
544 free (loops_info[i].mems);
546 free (uid_luid);
547 free (uid_loop);
548 free (loops_info);
549 free (loops->array);
552 /* Returns the next insn, in execution order, after INSN. START and
553 END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
554 respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
555 insn-stream; it is used with loops that are entered near the
556 bottom. */
558 static rtx
559 next_insn_in_loop (const struct loop *loop, rtx insn)
561 insn = NEXT_INSN (insn);
563 if (insn == loop->end)
565 if (loop->top)
566 /* Go to the top of the loop, and continue there. */
567 insn = loop->top;
568 else
569 /* We're done. */
570 insn = NULL_RTX;
573 if (insn == loop->scan_start)
574 /* We're done. */
575 insn = NULL_RTX;
577 return insn;
580 /* Find any register references hidden inside X and add them to
581 the dependency list DEPS. This is used to look inside CLOBBER (MEM
582 when checking whether a PARALLEL can be pulled out of a loop. */
584 static rtx
585 find_regs_nested (rtx deps, rtx x)
587 enum rtx_code code = GET_CODE (x);
588 if (code == REG)
589 deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps);
590 else
592 const char *fmt = GET_RTX_FORMAT (code);
593 int i, j;
594 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
596 if (fmt[i] == 'e')
597 deps = find_regs_nested (deps, XEXP (x, i));
598 else if (fmt[i] == 'E')
599 for (j = 0; j < XVECLEN (x, i); j++)
600 deps = find_regs_nested (deps, XVECEXP (x, i, j));
603 return deps;
606 /* Optimize one loop described by LOOP. */
608 /* ??? Could also move memory writes out of loops if the destination address
609 is invariant, the source is invariant, the memory write is not volatile,
610 and if we can prove that no read inside the loop can read this address
611 before the write occurs. If there is a read of this address after the
612 write, then we can also mark the memory read as invariant. */
614 static void
615 scan_loop (struct loop *loop, int flags)
617 struct loop_info *loop_info = LOOP_INFO (loop);
618 struct loop_regs *regs = LOOP_REGS (loop);
619 int i;
620 rtx loop_start = loop->start;
621 rtx loop_end = loop->end;
622 rtx p;
623 /* 1 if we are scanning insns that could be executed zero times. */
624 int maybe_never = 0;
625 /* 1 if we are scanning insns that might never be executed
626 due to a subroutine call which might exit before they are reached. */
627 int call_passed = 0;
628 /* Number of insns in the loop. */
629 int insn_count;
630 int tem;
631 rtx temp, update_start, update_end;
632 /* The SET from an insn, if it is the only SET in the insn. */
633 rtx set, set1;
634 /* Chain describing insns movable in current loop. */
635 struct loop_movables *movables = LOOP_MOVABLES (loop);
636 /* Ratio of extra register life span we can justify
637 for saving an instruction. More if loop doesn't call subroutines
638 since in that case saving an insn makes more difference
639 and more registers are available. */
640 int threshold;
641 /* Nonzero if we are scanning instructions in a sub-loop. */
642 int loop_depth = 0;
643 int in_libcall;
645 loop->top = 0;
647 movables->head = 0;
648 movables->last = 0;
650 /* Determine whether this loop starts with a jump down to a test at
651 the end. This will occur for a small number of loops with a test
652 that is too complex to duplicate in front of the loop.
654 We search for the first insn or label in the loop, skipping NOTEs.
655 However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
656 (because we might have a loop executed only once that contains a
657 loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
658 (in case we have a degenerate loop).
660 Note that if we mistakenly think that a loop is entered at the top
661 when, in fact, it is entered at the exit test, the only effect will be
662 slightly poorer optimization. Making the opposite error can generate
663 incorrect code. Since very few loops now start with a jump to the
664 exit test, the code here to detect that case is very conservative. */
666 for (p = NEXT_INSN (loop_start);
667 p != loop_end
668 && GET_CODE (p) != CODE_LABEL && ! INSN_P (p)
669 && (GET_CODE (p) != NOTE
670 || (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
671 && NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
672 p = NEXT_INSN (p))
675 loop->scan_start = p;
677 /* If loop end is the end of the current function, then emit a
678 NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
679 note insn. This is the position we use when sinking insns out of
680 the loop. */
681 if (NEXT_INSN (loop->end) != 0)
682 loop->sink = NEXT_INSN (loop->end);
683 else
684 loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
686 /* Set up variables describing this loop. */
687 prescan_loop (loop);
688 threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
690 /* If loop has a jump before the first label,
691 the true entry is the target of that jump.
692 Start scan from there.
693 But record in LOOP->TOP the place where the end-test jumps
694 back to so we can scan that after the end of the loop. */
695 if (GET_CODE (p) == JUMP_INSN
696 /* Loop entry must be unconditional jump (and not a RETURN) */
697 && any_uncondjump_p (p)
698 && JUMP_LABEL (p) != 0
699 /* Check to see whether the jump actually
700 jumps out of the loop (meaning it's no loop).
701 This case can happen for things like
702 do {..} while (0). If this label was generated previously
703 by loop, we can't tell anything about it and have to reject
704 the loop. */
705 && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
707 loop->top = next_label (loop->scan_start);
708 loop->scan_start = JUMP_LABEL (p);
711 /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
712 as required by loop_reg_used_before_p. So skip such loops. (This
713 test may never be true, but it's best to play it safe.)
715 Also, skip loops where we do not start scanning at a label. This
716 test also rejects loops starting with a JUMP_INSN that failed the
717 test above. */
719 if (INSN_UID (loop->scan_start) >= max_uid_for_loop
720 || GET_CODE (loop->scan_start) != CODE_LABEL)
722 if (loop_dump_stream)
723 fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
724 INSN_UID (loop_start), INSN_UID (loop_end));
725 return;
728 /* Allocate extra space for REGs that might be created by load_mems.
729 We allocate a little extra slop as well, in the hopes that we
730 won't have to reallocate the regs array. */
731 loop_regs_scan (loop, loop_info->mems_idx + 16);
732 insn_count = count_insns_in_loop (loop);
734 if (loop_dump_stream)
736 fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
737 INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
738 if (loop->cont)
739 fprintf (loop_dump_stream, "Continue at insn %d.\n",
740 INSN_UID (loop->cont));
743 /* Scan through the loop finding insns that are safe to move.
744 Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
745 this reg will be considered invariant for subsequent insns.
746 We consider whether subsequent insns use the reg
747 in deciding whether it is worth actually moving.
749 MAYBE_NEVER is nonzero if we have passed a conditional jump insn
750 and therefore it is possible that the insns we are scanning
751 would never be executed. At such times, we must make sure
752 that it is safe to execute the insn once instead of zero times.
753 When MAYBE_NEVER is 0, all insns will be executed at least once
754 so that is not a problem. */
756 for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
757 p != NULL_RTX;
758 p = next_insn_in_loop (loop, p))
760 if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
761 in_libcall--;
762 if (GET_CODE (p) == INSN)
764 temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
765 if (temp)
766 in_libcall++;
767 if (! in_libcall
768 && (set = single_set (p))
769 && GET_CODE (SET_DEST (set)) == REG
770 #ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
771 && SET_DEST (set) != pic_offset_table_rtx
772 #endif
773 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
775 int tem1 = 0;
776 int tem2 = 0;
777 int move_insn = 0;
778 int insert_temp = 0;
779 rtx src = SET_SRC (set);
780 rtx dependencies = 0;
782 /* Figure out what to use as a source of this insn. If a
783 REG_EQUIV note is given or if a REG_EQUAL note with a
784 constant operand is specified, use it as the source and
785 mark that we should move this insn by calling
786 emit_move_insn rather that duplicating the insn.
788 Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
789 note is present. */
790 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
791 if (temp)
792 src = XEXP (temp, 0), move_insn = 1;
793 else
795 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
796 if (temp && CONSTANT_P (XEXP (temp, 0)))
797 src = XEXP (temp, 0), move_insn = 1;
798 if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
800 src = XEXP (temp, 0);
801 /* A libcall block can use regs that don't appear in
802 the equivalent expression. To move the libcall,
803 we must move those regs too. */
804 dependencies = libcall_other_reg (p, src);
808 /* For parallels, add any possible uses to the dependencies, as
809 we can't move the insn without resolving them first.
810 MEMs inside CLOBBERs may also reference registers; these
811 count as implicit uses. */
812 if (GET_CODE (PATTERN (p)) == PARALLEL)
814 for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
816 rtx x = XVECEXP (PATTERN (p), 0, i);
817 if (GET_CODE (x) == USE)
818 dependencies
819 = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
820 dependencies);
821 else if (GET_CODE (x) == CLOBBER
822 && GET_CODE (XEXP (x, 0)) == MEM)
823 dependencies = find_regs_nested (dependencies,
824 XEXP (XEXP (x, 0), 0));
828 if (/* The register is used in basic blocks other
829 than the one where it is set (meaning that
830 something after this point in the loop might
831 depend on its value before the set). */
832 ! reg_in_basic_block_p (p, SET_DEST (set))
833 /* And the set is not guaranteed to be executed once
834 the loop starts, or the value before the set is
835 needed before the set occurs...
837 ??? Note we have quadratic behavior here, mitigated
838 by the fact that the previous test will often fail for
839 large loops. Rather than re-scanning the entire loop
840 each time for register usage, we should build tables
841 of the register usage and use them here instead. */
842 && (maybe_never
843 || loop_reg_used_before_p (loop, set, p)))
844 /* It is unsafe to move the set. However, it may be OK to
845 move the source into a new pseudo, and substitute a
846 reg-to-reg copy for the original insn.
848 This code used to consider it OK to move a set of a variable
849 which was not created by the user and not used in an exit
850 test.
851 That behavior is incorrect and was removed. */
852 insert_temp = 1;
854 /* Don't try to optimize a MODE_CC set with a constant
855 source. It probably will be combined with a conditional
856 jump. */
857 if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC
858 && CONSTANT_P (src))
860 /* Don't try to optimize a register that was made
861 by loop-optimization for an inner loop.
862 We don't know its life-span, so we can't compute
863 the benefit. */
864 else if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
866 /* Don't move the source and add a reg-to-reg copy:
867 - with -Os (this certainly increases size),
868 - if the mode doesn't support copy operations (obviously),
869 - if the source is already a reg (the motion will gain nothing),
870 - if the source is a legitimate constant (likewise). */
871 else if (insert_temp
872 && (optimize_size
873 || ! can_copy_p (GET_MODE (SET_SRC (set)))
874 || GET_CODE (SET_SRC (set)) == REG
875 || (CONSTANT_P (SET_SRC (set))
876 && LEGITIMATE_CONSTANT_P (SET_SRC (set)))))
878 else if ((tem = loop_invariant_p (loop, src))
879 && (dependencies == 0
880 || (tem2
881 = loop_invariant_p (loop, dependencies)) != 0)
882 && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
883 || (tem1
884 = consec_sets_invariant_p
885 (loop, SET_DEST (set),
886 regs->array[REGNO (SET_DEST (set))].set_in_loop,
887 p)))
888 /* If the insn can cause a trap (such as divide by zero),
889 can't move it unless it's guaranteed to be executed
890 once loop is entered. Even a function call might
891 prevent the trap insn from being reached
892 (since it might exit!) */
893 && ! ((maybe_never || call_passed)
894 && may_trap_p (src)))
896 struct movable *m;
897 int regno = REGNO (SET_DEST (set));
899 /* A potential lossage is where we have a case where two insns
900 can be combined as long as they are both in the loop, but
901 we move one of them outside the loop. For large loops,
902 this can lose. The most common case of this is the address
903 of a function being called.
905 Therefore, if this register is marked as being used
906 exactly once if we are in a loop with calls
907 (a "large loop"), see if we can replace the usage of
908 this register with the source of this SET. If we can,
909 delete this insn.
911 Don't do this if P has a REG_RETVAL note or if we have
912 SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */
914 if (loop_info->has_call
915 && regs->array[regno].single_usage != 0
916 && regs->array[regno].single_usage != const0_rtx
917 && REGNO_FIRST_UID (regno) == INSN_UID (p)
918 && (REGNO_LAST_UID (regno)
919 == INSN_UID (regs->array[regno].single_usage))
920 && regs->array[regno].set_in_loop == 1
921 && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
922 && ! side_effects_p (SET_SRC (set))
923 && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
924 && (! SMALL_REGISTER_CLASSES
925 || (! (GET_CODE (SET_SRC (set)) == REG
926 && (REGNO (SET_SRC (set))
927 < FIRST_PSEUDO_REGISTER))))
928 /* This test is not redundant; SET_SRC (set) might be
929 a call-clobbered register and the life of REGNO
930 might span a call. */
931 && ! modified_between_p (SET_SRC (set), p,
932 regs->array[regno].single_usage)
933 && no_labels_between_p (p,
934 regs->array[regno].single_usage)
935 && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
936 regs->array[regno].single_usage))
938 /* Replace any usage in a REG_EQUAL note. Must copy
939 the new source, so that we don't get rtx sharing
940 between the SET_SOURCE and REG_NOTES of insn p. */
941 REG_NOTES (regs->array[regno].single_usage)
942 = (replace_rtx
943 (REG_NOTES (regs->array[regno].single_usage),
944 SET_DEST (set), copy_rtx (SET_SRC (set))));
946 delete_insn (p);
947 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
948 i++)
949 regs->array[regno+i].set_in_loop = 0;
950 continue;
953 m = xmalloc (sizeof (struct movable));
954 m->next = 0;
955 m->insn = p;
956 m->set_src = src;
957 m->dependencies = dependencies;
958 m->set_dest = SET_DEST (set);
959 m->force = 0;
960 m->consec
961 = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
962 m->done = 0;
963 m->forces = 0;
964 m->partial = 0;
965 m->move_insn = move_insn;
966 m->move_insn_first = 0;
967 m->insert_temp = insert_temp;
968 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
969 m->savemode = VOIDmode;
970 m->regno = regno;
971 /* Set M->cond if either loop_invariant_p
972 or consec_sets_invariant_p returned 2
973 (only conditionally invariant). */
974 m->cond = ((tem | tem1 | tem2) > 1);
975 m->global = LOOP_REG_GLOBAL_P (loop, regno);
976 m->match = 0;
977 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
978 m->savings = regs->array[regno].n_times_set;
979 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
980 m->savings += libcall_benefit (p);
981 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
982 regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
983 /* Add M to the end of the chain MOVABLES. */
984 loop_movables_add (movables, m);
986 if (m->consec > 0)
988 /* It is possible for the first instruction to have a
989 REG_EQUAL note but a non-invariant SET_SRC, so we must
990 remember the status of the first instruction in case
991 the last instruction doesn't have a REG_EQUAL note. */
992 m->move_insn_first = m->move_insn;
994 /* Skip this insn, not checking REG_LIBCALL notes. */
995 p = next_nonnote_insn (p);
996 /* Skip the consecutive insns, if there are any. */
997 p = skip_consec_insns (p, m->consec);
998 /* Back up to the last insn of the consecutive group. */
999 p = prev_nonnote_insn (p);
1001 /* We must now reset m->move_insn, m->is_equiv, and
1002 possibly m->set_src to correspond to the effects of
1003 all the insns. */
1004 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
1005 if (temp)
1006 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1007 else
1009 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
1010 if (temp && CONSTANT_P (XEXP (temp, 0)))
1011 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1012 else
1013 m->move_insn = 0;
1016 m->is_equiv
1017 = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
1020 /* If this register is always set within a STRICT_LOW_PART
1021 or set to zero, then its high bytes are constant.
1022 So clear them outside the loop and within the loop
1023 just load the low bytes.
1024 We must check that the machine has an instruction to do so.
1025 Also, if the value loaded into the register
1026 depends on the same register, this cannot be done. */
1027 else if (SET_SRC (set) == const0_rtx
1028 && GET_CODE (NEXT_INSN (p)) == INSN
1029 && (set1 = single_set (NEXT_INSN (p)))
1030 && GET_CODE (set1) == SET
1031 && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
1032 && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
1033 && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
1034 == SET_DEST (set))
1035 && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
1037 int regno = REGNO (SET_DEST (set));
1038 if (regs->array[regno].set_in_loop == 2)
1040 struct movable *m;
1041 m = xmalloc (sizeof (struct movable));
1042 m->next = 0;
1043 m->insn = p;
1044 m->set_dest = SET_DEST (set);
1045 m->dependencies = 0;
1046 m->force = 0;
1047 m->consec = 0;
1048 m->done = 0;
1049 m->forces = 0;
1050 m->move_insn = 0;
1051 m->move_insn_first = 0;
1052 m->insert_temp = insert_temp;
1053 m->partial = 1;
1054 /* If the insn may not be executed on some cycles,
1055 we can't clear the whole reg; clear just high part.
1056 Not even if the reg is used only within this loop.
1057 Consider this:
1058 while (1)
1059 while (s != t) {
1060 if (foo ()) x = *s;
1061 use (x);
1063 Clearing x before the inner loop could clobber a value
1064 being saved from the last time around the outer loop.
1065 However, if the reg is not used outside this loop
1066 and all uses of the register are in the same
1067 basic block as the store, there is no problem.
1069 If this insn was made by loop, we don't know its
1070 INSN_LUID and hence must make a conservative
1071 assumption. */
1072 m->global = (INSN_UID (p) >= max_uid_for_loop
1073 || LOOP_REG_GLOBAL_P (loop, regno)
1074 || (labels_in_range_p
1075 (p, REGNO_FIRST_LUID (regno))));
1076 if (maybe_never && m->global)
1077 m->savemode = GET_MODE (SET_SRC (set1));
1078 else
1079 m->savemode = VOIDmode;
1080 m->regno = regno;
1081 m->cond = 0;
1082 m->match = 0;
1083 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1084 m->savings = 1;
1085 for (i = 0;
1086 i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1087 i++)
1088 regs->array[regno+i].set_in_loop = -1;
1089 /* Add M to the end of the chain MOVABLES. */
1090 loop_movables_add (movables, m);
1095 /* Past a call insn, we get to insns which might not be executed
1096 because the call might exit. This matters for insns that trap.
1097 Constant and pure call insns always return, so they don't count. */
1098 else if (GET_CODE (p) == CALL_INSN && ! CONST_OR_PURE_CALL_P (p))
1099 call_passed = 1;
1100 /* Past a label or a jump, we get to insns for which we
1101 can't count on whether or how many times they will be
1102 executed during each iteration. Therefore, we can
1103 only move out sets of trivial variables
1104 (those not used after the loop). */
1105 /* Similar code appears twice in strength_reduce. */
1106 else if ((GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN)
1107 /* If we enter the loop in the middle, and scan around to the
1108 beginning, don't set maybe_never for that. This must be an
1109 unconditional jump, otherwise the code at the top of the
1110 loop might never be executed. Unconditional jumps are
1111 followed by a barrier then the loop_end. */
1112 && ! (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == loop->top
1113 && NEXT_INSN (NEXT_INSN (p)) == loop_end
1114 && any_uncondjump_p (p)))
1115 maybe_never = 1;
1116 else if (GET_CODE (p) == NOTE)
1118 /* At the virtual top of a converted loop, insns are again known to
1119 be executed: logically, the loop begins here even though the exit
1120 code has been duplicated. */
1121 if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP && loop_depth == 0)
1122 maybe_never = call_passed = 0;
1123 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
1124 loop_depth++;
1125 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
1126 loop_depth--;
1130 /* If one movable subsumes another, ignore that other. */
1132 ignore_some_movables (movables);
1134 /* For each movable insn, see if the reg that it loads
1135 leads when it dies right into another conditionally movable insn.
1136 If so, record that the second insn "forces" the first one,
1137 since the second can be moved only if the first is. */
1139 force_movables (movables);
1141 /* See if there are multiple movable insns that load the same value.
1142 If there are, make all but the first point at the first one
1143 through the `match' field, and add the priorities of them
1144 all together as the priority of the first. */
1146 combine_movables (movables, regs);
1148 /* Now consider each movable insn to decide whether it is worth moving.
1149 Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
1151 For machines with few registers this increases code size, so do not
1152 move moveables when optimizing for code size on such machines.
1153 (The 18 below is the value for i386.) */
1155 if (!optimize_size
1156 || (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
1158 move_movables (loop, movables, threshold, insn_count);
1160 /* Recalculate regs->array if move_movables has created new
1161 registers. */
1162 if (max_reg_num () > regs->num)
1164 loop_regs_scan (loop, 0);
1165 for (update_start = loop_start;
1166 PREV_INSN (update_start)
1167 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1168 update_start = PREV_INSN (update_start))
1170 update_end = NEXT_INSN (loop_end);
1172 reg_scan_update (update_start, update_end, loop_max_reg);
1173 loop_max_reg = max_reg_num ();
1177 /* Now candidates that still are negative are those not moved.
1178 Change regs->array[I].set_in_loop to indicate that those are not actually
1179 invariant. */
1180 for (i = 0; i < regs->num; i++)
1181 if (regs->array[i].set_in_loop < 0)
1182 regs->array[i].set_in_loop = regs->array[i].n_times_set;
1184 /* Now that we've moved some things out of the loop, we might be able to
1185 hoist even more memory references. */
1186 load_mems (loop);
1188 /* Recalculate regs->array if load_mems has created new registers. */
1189 if (max_reg_num () > regs->num)
1190 loop_regs_scan (loop, 0);
1192 for (update_start = loop_start;
1193 PREV_INSN (update_start)
1194 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1195 update_start = PREV_INSN (update_start))
1197 update_end = NEXT_INSN (loop_end);
1199 reg_scan_update (update_start, update_end, loop_max_reg);
1200 loop_max_reg = max_reg_num ();
1202 if (flag_strength_reduce)
1204 if (update_end && GET_CODE (update_end) == CODE_LABEL)
1205 /* Ensure our label doesn't go away. */
1206 LABEL_NUSES (update_end)++;
1208 strength_reduce (loop, flags);
1210 reg_scan_update (update_start, update_end, loop_max_reg);
1211 loop_max_reg = max_reg_num ();
1213 if (update_end && GET_CODE (update_end) == CODE_LABEL
1214 && --LABEL_NUSES (update_end) == 0)
1215 delete_related_insns (update_end);
1219 /* The movable information is required for strength reduction. */
1220 loop_movables_free (movables);
1222 free (regs->array);
1223 regs->array = 0;
1224 regs->num = 0;
1227 /* Add elements to *OUTPUT to record all the pseudo-regs
1228 mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
1230 void
1231 record_excess_regs (rtx in_this, rtx not_in_this, rtx *output)
1233 enum rtx_code code;
1234 const char *fmt;
1235 int i;
1237 code = GET_CODE (in_this);
1239 switch (code)
1241 case PC:
1242 case CC0:
1243 case CONST_INT:
1244 case CONST_DOUBLE:
1245 case CONST:
1246 case SYMBOL_REF:
1247 case LABEL_REF:
1248 return;
1250 case REG:
1251 if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
1252 && ! reg_mentioned_p (in_this, not_in_this))
1253 *output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
1254 return;
1256 default:
1257 break;
1260 fmt = GET_RTX_FORMAT (code);
1261 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1263 int j;
1265 switch (fmt[i])
1267 case 'E':
1268 for (j = 0; j < XVECLEN (in_this, i); j++)
1269 record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
1270 break;
1272 case 'e':
1273 record_excess_regs (XEXP (in_this, i), not_in_this, output);
1274 break;
1279 /* Check what regs are referred to in the libcall block ending with INSN,
1280 aside from those mentioned in the equivalent value.
1281 If there are none, return 0.
1282 If there are one or more, return an EXPR_LIST containing all of them. */
1285 libcall_other_reg (rtx insn, rtx equiv)
1287 rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
1288 rtx p = XEXP (note, 0);
1289 rtx output = 0;
1291 /* First, find all the regs used in the libcall block
1292 that are not mentioned as inputs to the result. */
1294 while (p != insn)
1296 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
1297 || GET_CODE (p) == CALL_INSN)
1298 record_excess_regs (PATTERN (p), equiv, &output);
1299 p = NEXT_INSN (p);
1302 return output;
1305 /* Return 1 if all uses of REG
1306 are between INSN and the end of the basic block. */
1308 static int
1309 reg_in_basic_block_p (rtx insn, rtx reg)
1311 int regno = REGNO (reg);
1312 rtx p;
1314 if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
1315 return 0;
1317 /* Search this basic block for the already recorded last use of the reg. */
1318 for (p = insn; p; p = NEXT_INSN (p))
1320 switch (GET_CODE (p))
1322 case NOTE:
1323 break;
1325 case INSN:
1326 case CALL_INSN:
1327 /* Ordinary insn: if this is the last use, we win. */
1328 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1329 return 1;
1330 break;
1332 case JUMP_INSN:
1333 /* Jump insn: if this is the last use, we win. */
1334 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1335 return 1;
1336 /* Otherwise, it's the end of the basic block, so we lose. */
1337 return 0;
1339 case CODE_LABEL:
1340 case BARRIER:
1341 /* It's the end of the basic block, so we lose. */
1342 return 0;
1344 default:
1345 break;
1349 /* The "last use" that was recorded can't be found after the first
1350 use. This can happen when the last use was deleted while
1351 processing an inner loop, this inner loop was then completely
1352 unrolled, and the outer loop is always exited after the inner loop,
1353 so that everything after the first use becomes a single basic block. */
1354 return 1;
1357 /* Compute the benefit of eliminating the insns in the block whose
1358 last insn is LAST. This may be a group of insns used to compute a
1359 value directly or can contain a library call. */
1361 static int
1362 libcall_benefit (rtx last)
1364 rtx insn;
1365 int benefit = 0;
1367 for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
1368 insn != last; insn = NEXT_INSN (insn))
1370 if (GET_CODE (insn) == CALL_INSN)
1371 benefit += 10; /* Assume at least this many insns in a library
1372 routine. */
1373 else if (GET_CODE (insn) == INSN
1374 && GET_CODE (PATTERN (insn)) != USE
1375 && GET_CODE (PATTERN (insn)) != CLOBBER)
1376 benefit++;
1379 return benefit;
1382 /* Skip COUNT insns from INSN, counting library calls as 1 insn. */
1384 static rtx
1385 skip_consec_insns (rtx insn, int count)
1387 for (; count > 0; count--)
1389 rtx temp;
1391 /* If first insn of libcall sequence, skip to end. */
1392 /* Do this at start of loop, since INSN is guaranteed to
1393 be an insn here. */
1394 if (GET_CODE (insn) != NOTE
1395 && (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
1396 insn = XEXP (temp, 0);
1399 insn = NEXT_INSN (insn);
1400 while (GET_CODE (insn) == NOTE);
1403 return insn;
1406 /* Ignore any movable whose insn falls within a libcall
1407 which is part of another movable.
1408 We make use of the fact that the movable for the libcall value
1409 was made later and so appears later on the chain. */
1411 static void
1412 ignore_some_movables (struct loop_movables *movables)
1414 struct movable *m, *m1;
1416 for (m = movables->head; m; m = m->next)
1418 /* Is this a movable for the value of a libcall? */
1419 rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
1420 if (note)
1422 rtx insn;
1423 /* Check for earlier movables inside that range,
1424 and mark them invalid. We cannot use LUIDs here because
1425 insns created by loop.c for prior loops don't have LUIDs.
1426 Rather than reject all such insns from movables, we just
1427 explicitly check each insn in the libcall (since invariant
1428 libcalls aren't that common). */
1429 for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
1430 for (m1 = movables->head; m1 != m; m1 = m1->next)
1431 if (m1->insn == insn)
1432 m1->done = 1;
1437 /* For each movable insn, see if the reg that it loads
1438 leads when it dies right into another conditionally movable insn.
1439 If so, record that the second insn "forces" the first one,
1440 since the second can be moved only if the first is. */
1442 static void
1443 force_movables (struct loop_movables *movables)
1445 struct movable *m, *m1;
1447 for (m1 = movables->head; m1; m1 = m1->next)
1448 /* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
1449 if (!m1->partial && !m1->done)
1451 int regno = m1->regno;
1452 for (m = m1->next; m; m = m->next)
1453 /* ??? Could this be a bug? What if CSE caused the
1454 register of M1 to be used after this insn?
1455 Since CSE does not update regno_last_uid,
1456 this insn M->insn might not be where it dies.
1457 But very likely this doesn't matter; what matters is
1458 that M's reg is computed from M1's reg. */
1459 if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
1460 && !m->done)
1461 break;
1462 if (m != 0 && m->set_src == m1->set_dest
1463 /* If m->consec, m->set_src isn't valid. */
1464 && m->consec == 0)
1465 m = 0;
1467 /* Increase the priority of the moving the first insn
1468 since it permits the second to be moved as well. */
1469 if (m != 0)
1471 m->forces = m1;
1472 m1->lifetime += m->lifetime;
1473 m1->savings += m->savings;
1478 /* Find invariant expressions that are equal and can be combined into
1479 one register. */
1481 static void
1482 combine_movables (struct loop_movables *movables, struct loop_regs *regs)
1484 struct movable *m;
1485 char *matched_regs = xmalloc (regs->num);
1486 enum machine_mode mode;
1488 /* Regs that are set more than once are not allowed to match
1489 or be matched. I'm no longer sure why not. */
1490 /* Only pseudo registers are allowed to match or be matched,
1491 since move_movables does not validate the change. */
1492 /* Perhaps testing m->consec_sets would be more appropriate here? */
1494 for (m = movables->head; m; m = m->next)
1495 if (m->match == 0 && regs->array[m->regno].n_times_set == 1
1496 && m->regno >= FIRST_PSEUDO_REGISTER
1497 && !m->insert_temp
1498 && !m->partial)
1500 struct movable *m1;
1501 int regno = m->regno;
1503 memset (matched_regs, 0, regs->num);
1504 matched_regs[regno] = 1;
1506 /* We want later insns to match the first one. Don't make the first
1507 one match any later ones. So start this loop at m->next. */
1508 for (m1 = m->next; m1; m1 = m1->next)
1509 if (m != m1 && m1->match == 0
1510 && !m1->insert_temp
1511 && regs->array[m1->regno].n_times_set == 1
1512 && m1->regno >= FIRST_PSEUDO_REGISTER
1513 /* A reg used outside the loop mustn't be eliminated. */
1514 && !m1->global
1515 /* A reg used for zero-extending mustn't be eliminated. */
1516 && !m1->partial
1517 && (matched_regs[m1->regno]
1520 /* Can combine regs with different modes loaded from the
1521 same constant only if the modes are the same or
1522 if both are integer modes with M wider or the same
1523 width as M1. The check for integer is redundant, but
1524 safe, since the only case of differing destination
1525 modes with equal sources is when both sources are
1526 VOIDmode, i.e., CONST_INT. */
1527 (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
1528 || (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT
1529 && GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT
1530 && (GET_MODE_BITSIZE (GET_MODE (m->set_dest))
1531 >= GET_MODE_BITSIZE (GET_MODE (m1->set_dest)))))
1532 /* See if the source of M1 says it matches M. */
1533 && ((GET_CODE (m1->set_src) == REG
1534 && matched_regs[REGNO (m1->set_src)])
1535 || rtx_equal_for_loop_p (m->set_src, m1->set_src,
1536 movables, regs))))
1537 && ((m->dependencies == m1->dependencies)
1538 || rtx_equal_p (m->dependencies, m1->dependencies)))
1540 m->lifetime += m1->lifetime;
1541 m->savings += m1->savings;
1542 m1->done = 1;
1543 m1->match = m;
1544 matched_regs[m1->regno] = 1;
1548 /* Now combine the regs used for zero-extension.
1549 This can be done for those not marked `global'
1550 provided their lives don't overlap. */
1552 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1553 mode = GET_MODE_WIDER_MODE (mode))
1555 struct movable *m0 = 0;
1557 /* Combine all the registers for extension from mode MODE.
1558 Don't combine any that are used outside this loop. */
1559 for (m = movables->head; m; m = m->next)
1560 if (m->partial && ! m->global
1561 && mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
1563 struct movable *m1;
1565 int first = REGNO_FIRST_LUID (m->regno);
1566 int last = REGNO_LAST_LUID (m->regno);
1568 if (m0 == 0)
1570 /* First one: don't check for overlap, just record it. */
1571 m0 = m;
1572 continue;
1575 /* Make sure they extend to the same mode.
1576 (Almost always true.) */
1577 if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
1578 continue;
1580 /* We already have one: check for overlap with those
1581 already combined together. */
1582 for (m1 = movables->head; m1 != m; m1 = m1->next)
1583 if (m1 == m0 || (m1->partial && m1->match == m0))
1584 if (! (REGNO_FIRST_LUID (m1->regno) > last
1585 || REGNO_LAST_LUID (m1->regno) < first))
1586 goto overlap;
1588 /* No overlap: we can combine this with the others. */
1589 m0->lifetime += m->lifetime;
1590 m0->savings += m->savings;
1591 m->done = 1;
1592 m->match = m0;
1594 overlap:
1599 /* Clean up. */
1600 free (matched_regs);
1603 /* Returns the number of movable instructions in LOOP that were not
1604 moved outside the loop. */
1606 static int
1607 num_unmoved_movables (const struct loop *loop)
1609 int num = 0;
1610 struct movable *m;
1612 for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
1613 if (!m->done)
1614 ++num;
1616 return num;
1620 /* Return 1 if regs X and Y will become the same if moved. */
1622 static int
1623 regs_match_p (rtx x, rtx y, struct loop_movables *movables)
1625 unsigned int xn = REGNO (x);
1626 unsigned int yn = REGNO (y);
1627 struct movable *mx, *my;
1629 for (mx = movables->head; mx; mx = mx->next)
1630 if (mx->regno == xn)
1631 break;
1633 for (my = movables->head; my; my = my->next)
1634 if (my->regno == yn)
1635 break;
1637 return (mx && my
1638 && ((mx->match == my->match && mx->match != 0)
1639 || mx->match == my
1640 || mx == my->match));
1643 /* Return 1 if X and Y are identical-looking rtx's.
1644 This is the Lisp function EQUAL for rtx arguments.
1646 If two registers are matching movables or a movable register and an
1647 equivalent constant, consider them equal. */
1649 static int
1650 rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables,
1651 struct loop_regs *regs)
1653 int i;
1654 int j;
1655 struct movable *m;
1656 enum rtx_code code;
1657 const char *fmt;
1659 if (x == y)
1660 return 1;
1661 if (x == 0 || y == 0)
1662 return 0;
1664 code = GET_CODE (x);
1666 /* If we have a register and a constant, they may sometimes be
1667 equal. */
1668 if (GET_CODE (x) == REG && regs->array[REGNO (x)].set_in_loop == -2
1669 && CONSTANT_P (y))
1671 for (m = movables->head; m; m = m->next)
1672 if (m->move_insn && m->regno == REGNO (x)
1673 && rtx_equal_p (m->set_src, y))
1674 return 1;
1676 else if (GET_CODE (y) == REG && regs->array[REGNO (y)].set_in_loop == -2
1677 && CONSTANT_P (x))
1679 for (m = movables->head; m; m = m->next)
1680 if (m->move_insn && m->regno == REGNO (y)
1681 && rtx_equal_p (m->set_src, x))
1682 return 1;
1685 /* Otherwise, rtx's of different codes cannot be equal. */
1686 if (code != GET_CODE (y))
1687 return 0;
1689 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
1690 (REG:SI x) and (REG:HI x) are NOT equivalent. */
1692 if (GET_MODE (x) != GET_MODE (y))
1693 return 0;
1695 /* These three types of rtx's can be compared nonrecursively. */
1696 if (code == REG)
1697 return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
1699 if (code == LABEL_REF)
1700 return XEXP (x, 0) == XEXP (y, 0);
1701 if (code == SYMBOL_REF)
1702 return XSTR (x, 0) == XSTR (y, 0);
1704 /* Compare the elements. If any pair of corresponding elements
1705 fail to match, return 0 for the whole things. */
1707 fmt = GET_RTX_FORMAT (code);
1708 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1710 switch (fmt[i])
1712 case 'w':
1713 if (XWINT (x, i) != XWINT (y, i))
1714 return 0;
1715 break;
1717 case 'i':
1718 if (XINT (x, i) != XINT (y, i))
1719 return 0;
1720 break;
1722 case 'E':
1723 /* Two vectors must have the same length. */
1724 if (XVECLEN (x, i) != XVECLEN (y, i))
1725 return 0;
1727 /* And the corresponding elements must match. */
1728 for (j = 0; j < XVECLEN (x, i); j++)
1729 if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
1730 movables, regs) == 0)
1731 return 0;
1732 break;
1734 case 'e':
1735 if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
1736 == 0)
1737 return 0;
1738 break;
1740 case 's':
1741 if (strcmp (XSTR (x, i), XSTR (y, i)))
1742 return 0;
1743 break;
1745 case 'u':
1746 /* These are just backpointers, so they don't matter. */
1747 break;
1749 case '0':
1750 break;
1752 /* It is believed that rtx's at this level will never
1753 contain anything but integers and other rtx's,
1754 except for within LABEL_REFs and SYMBOL_REFs. */
1755 default:
1756 abort ();
1759 return 1;
1762 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
1763 insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
1764 references is incremented once for each added note. */
1766 static void
1767 add_label_notes (rtx x, rtx insns)
1769 enum rtx_code code = GET_CODE (x);
1770 int i, j;
1771 const char *fmt;
1772 rtx insn;
1774 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
1776 /* This code used to ignore labels that referred to dispatch tables to
1777 avoid flow generating (slightly) worse code.
1779 We no longer ignore such label references (see LABEL_REF handling in
1780 mark_jump_label for additional information). */
1781 for (insn = insns; insn; insn = NEXT_INSN (insn))
1782 if (reg_mentioned_p (XEXP (x, 0), insn))
1784 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
1785 REG_NOTES (insn));
1786 if (LABEL_P (XEXP (x, 0)))
1787 LABEL_NUSES (XEXP (x, 0))++;
1791 fmt = GET_RTX_FORMAT (code);
1792 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1794 if (fmt[i] == 'e')
1795 add_label_notes (XEXP (x, i), insns);
1796 else if (fmt[i] == 'E')
1797 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1798 add_label_notes (XVECEXP (x, i, j), insns);
1802 /* Scan MOVABLES, and move the insns that deserve to be moved.
1803 If two matching movables are combined, replace one reg with the
1804 other throughout. */
1806 static void
1807 move_movables (struct loop *loop, struct loop_movables *movables,
1808 int threshold, int insn_count)
1810 struct loop_regs *regs = LOOP_REGS (loop);
1811 int nregs = regs->num;
1812 rtx new_start = 0;
1813 struct movable *m;
1814 rtx p;
1815 rtx loop_start = loop->start;
1816 rtx loop_end = loop->end;
1817 /* Map of pseudo-register replacements to handle combining
1818 when we move several insns that load the same value
1819 into different pseudo-registers. */
1820 rtx *reg_map = xcalloc (nregs, sizeof (rtx));
1821 char *already_moved = xcalloc (nregs, sizeof (char));
1823 for (m = movables->head; m; m = m->next)
1825 /* Describe this movable insn. */
1827 if (loop_dump_stream)
1829 fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
1830 INSN_UID (m->insn), m->regno, m->lifetime);
1831 if (m->consec > 0)
1832 fprintf (loop_dump_stream, "consec %d, ", m->consec);
1833 if (m->cond)
1834 fprintf (loop_dump_stream, "cond ");
1835 if (m->force)
1836 fprintf (loop_dump_stream, "force ");
1837 if (m->global)
1838 fprintf (loop_dump_stream, "global ");
1839 if (m->done)
1840 fprintf (loop_dump_stream, "done ");
1841 if (m->move_insn)
1842 fprintf (loop_dump_stream, "move-insn ");
1843 if (m->match)
1844 fprintf (loop_dump_stream, "matches %d ",
1845 INSN_UID (m->match->insn));
1846 if (m->forces)
1847 fprintf (loop_dump_stream, "forces %d ",
1848 INSN_UID (m->forces->insn));
1851 /* Ignore the insn if it's already done (it matched something else).
1852 Otherwise, see if it is now safe to move. */
1854 if (!m->done
1855 && (! m->cond
1856 || (1 == loop_invariant_p (loop, m->set_src)
1857 && (m->dependencies == 0
1858 || 1 == loop_invariant_p (loop, m->dependencies))
1859 && (m->consec == 0
1860 || 1 == consec_sets_invariant_p (loop, m->set_dest,
1861 m->consec + 1,
1862 m->insn))))
1863 && (! m->forces || m->forces->done))
1865 int regno;
1866 rtx p;
1867 int savings = m->savings;
1869 /* We have an insn that is safe to move.
1870 Compute its desirability. */
1872 p = m->insn;
1873 regno = m->regno;
1875 if (loop_dump_stream)
1876 fprintf (loop_dump_stream, "savings %d ", savings);
1878 if (regs->array[regno].moved_once && loop_dump_stream)
1879 fprintf (loop_dump_stream, "halved since already moved ");
1881 /* An insn MUST be moved if we already moved something else
1882 which is safe only if this one is moved too: that is,
1883 if already_moved[REGNO] is nonzero. */
1885 /* An insn is desirable to move if the new lifetime of the
1886 register is no more than THRESHOLD times the old lifetime.
1887 If it's not desirable, it means the loop is so big
1888 that moving won't speed things up much,
1889 and it is liable to make register usage worse. */
1891 /* It is also desirable to move if it can be moved at no
1892 extra cost because something else was already moved. */
1894 if (already_moved[regno]
1895 || flag_move_all_movables
1896 || (threshold * savings * m->lifetime) >=
1897 (regs->array[regno].moved_once ? insn_count * 2 : insn_count)
1898 || (m->forces && m->forces->done
1899 && regs->array[m->forces->regno].n_times_set == 1))
1901 int count;
1902 struct movable *m1;
1903 rtx first = NULL_RTX;
1904 rtx newreg = NULL_RTX;
1906 if (m->insert_temp)
1907 newreg = gen_reg_rtx (GET_MODE (m->set_dest));
1909 /* Now move the insns that set the reg. */
1911 if (m->partial && m->match)
1913 rtx newpat, i1;
1914 rtx r1, r2;
1915 /* Find the end of this chain of matching regs.
1916 Thus, we load each reg in the chain from that one reg.
1917 And that reg is loaded with 0 directly,
1918 since it has ->match == 0. */
1919 for (m1 = m; m1->match; m1 = m1->match);
1920 newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
1921 SET_DEST (PATTERN (m1->insn)));
1922 i1 = loop_insn_hoist (loop, newpat);
1924 /* Mark the moved, invariant reg as being allowed to
1925 share a hard reg with the other matching invariant. */
1926 REG_NOTES (i1) = REG_NOTES (m->insn);
1927 r1 = SET_DEST (PATTERN (m->insn));
1928 r2 = SET_DEST (PATTERN (m1->insn));
1929 regs_may_share
1930 = gen_rtx_EXPR_LIST (VOIDmode, r1,
1931 gen_rtx_EXPR_LIST (VOIDmode, r2,
1932 regs_may_share));
1933 delete_insn (m->insn);
1935 if (new_start == 0)
1936 new_start = i1;
1938 if (loop_dump_stream)
1939 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
1941 /* If we are to re-generate the item being moved with a
1942 new move insn, first delete what we have and then emit
1943 the move insn before the loop. */
1944 else if (m->move_insn)
1946 rtx i1, temp, seq;
1948 for (count = m->consec; count >= 0; count--)
1950 /* If this is the first insn of a library call sequence,
1951 something is very wrong. */
1952 if (GET_CODE (p) != NOTE
1953 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
1954 abort ();
1956 /* If this is the last insn of a libcall sequence, then
1957 delete every insn in the sequence except the last.
1958 The last insn is handled in the normal manner. */
1959 if (GET_CODE (p) != NOTE
1960 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
1962 temp = XEXP (temp, 0);
1963 while (temp != p)
1964 temp = delete_insn (temp);
1967 temp = p;
1968 p = delete_insn (p);
1970 /* simplify_giv_expr expects that it can walk the insns
1971 at m->insn forwards and see this old sequence we are
1972 tossing here. delete_insn does preserve the next
1973 pointers, but when we skip over a NOTE we must fix
1974 it up. Otherwise that code walks into the non-deleted
1975 insn stream. */
1976 while (p && GET_CODE (p) == NOTE)
1977 p = NEXT_INSN (temp) = NEXT_INSN (p);
1979 if (m->insert_temp)
1981 /* Replace the original insn with a move from
1982 our newly created temp. */
1983 start_sequence ();
1984 emit_move_insn (m->set_dest, newreg);
1985 seq = get_insns ();
1986 end_sequence ();
1987 emit_insn_before (seq, p);
1991 start_sequence ();
1992 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
1993 m->set_src);
1994 seq = get_insns ();
1995 end_sequence ();
1997 add_label_notes (m->set_src, seq);
1999 i1 = loop_insn_hoist (loop, seq);
2000 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2001 set_unique_reg_note (i1,
2002 m->is_equiv ? REG_EQUIV : REG_EQUAL,
2003 m->set_src);
2005 if (loop_dump_stream)
2006 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
2008 /* The more regs we move, the less we like moving them. */
2009 threshold -= 3;
2011 else
2013 for (count = m->consec; count >= 0; count--)
2015 rtx i1, temp;
2017 /* If first insn of libcall sequence, skip to end. */
2018 /* Do this at start of loop, since p is guaranteed to
2019 be an insn here. */
2020 if (GET_CODE (p) != NOTE
2021 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
2022 p = XEXP (temp, 0);
2024 /* If last insn of libcall sequence, move all
2025 insns except the last before the loop. The last
2026 insn is handled in the normal manner. */
2027 if (GET_CODE (p) != NOTE
2028 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
2030 rtx fn_address = 0;
2031 rtx fn_reg = 0;
2032 rtx fn_address_insn = 0;
2034 first = 0;
2035 for (temp = XEXP (temp, 0); temp != p;
2036 temp = NEXT_INSN (temp))
2038 rtx body;
2039 rtx n;
2040 rtx next;
2042 if (GET_CODE (temp) == NOTE)
2043 continue;
2045 body = PATTERN (temp);
2047 /* Find the next insn after TEMP,
2048 not counting USE or NOTE insns. */
2049 for (next = NEXT_INSN (temp); next != p;
2050 next = NEXT_INSN (next))
2051 if (! (GET_CODE (next) == INSN
2052 && GET_CODE (PATTERN (next)) == USE)
2053 && GET_CODE (next) != NOTE)
2054 break;
2056 /* If that is the call, this may be the insn
2057 that loads the function address.
2059 Extract the function address from the insn
2060 that loads it into a register.
2061 If this insn was cse'd, we get incorrect code.
2063 So emit a new move insn that copies the
2064 function address into the register that the
2065 call insn will use. flow.c will delete any
2066 redundant stores that we have created. */
2067 if (GET_CODE (next) == CALL_INSN
2068 && GET_CODE (body) == SET
2069 && GET_CODE (SET_DEST (body)) == REG
2070 && (n = find_reg_note (temp, REG_EQUAL,
2071 NULL_RTX)))
2073 fn_reg = SET_SRC (body);
2074 if (GET_CODE (fn_reg) != REG)
2075 fn_reg = SET_DEST (body);
2076 fn_address = XEXP (n, 0);
2077 fn_address_insn = temp;
2079 /* We have the call insn.
2080 If it uses the register we suspect it might,
2081 load it with the correct address directly. */
2082 if (GET_CODE (temp) == CALL_INSN
2083 && fn_address != 0
2084 && reg_referenced_p (fn_reg, body))
2085 loop_insn_emit_after (loop, 0, fn_address_insn,
2086 gen_move_insn
2087 (fn_reg, fn_address));
2089 if (GET_CODE (temp) == CALL_INSN)
2091 i1 = loop_call_insn_hoist (loop, body);
2092 /* Because the USAGE information potentially
2093 contains objects other than hard registers
2094 we need to copy it. */
2095 if (CALL_INSN_FUNCTION_USAGE (temp))
2096 CALL_INSN_FUNCTION_USAGE (i1)
2097 = copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
2099 else
2100 i1 = loop_insn_hoist (loop, body);
2101 if (first == 0)
2102 first = i1;
2103 if (temp == fn_address_insn)
2104 fn_address_insn = i1;
2105 REG_NOTES (i1) = REG_NOTES (temp);
2106 REG_NOTES (temp) = NULL;
2107 delete_insn (temp);
2109 if (new_start == 0)
2110 new_start = first;
2112 if (m->savemode != VOIDmode)
2114 /* P sets REG to zero; but we should clear only
2115 the bits that are not covered by the mode
2116 m->savemode. */
2117 rtx reg = m->set_dest;
2118 rtx sequence;
2119 rtx tem;
2121 start_sequence ();
2122 tem = expand_simple_binop
2123 (GET_MODE (reg), AND, reg,
2124 GEN_INT ((((HOST_WIDE_INT) 1
2125 << GET_MODE_BITSIZE (m->savemode)))
2126 - 1),
2127 reg, 1, OPTAB_LIB_WIDEN);
2128 if (tem == 0)
2129 abort ();
2130 if (tem != reg)
2131 emit_move_insn (reg, tem);
2132 sequence = get_insns ();
2133 end_sequence ();
2134 i1 = loop_insn_hoist (loop, sequence);
2136 else if (GET_CODE (p) == CALL_INSN)
2138 i1 = loop_call_insn_hoist (loop, PATTERN (p));
2139 /* Because the USAGE information potentially
2140 contains objects other than hard registers
2141 we need to copy it. */
2142 if (CALL_INSN_FUNCTION_USAGE (p))
2143 CALL_INSN_FUNCTION_USAGE (i1)
2144 = copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
2146 else if (count == m->consec && m->move_insn_first)
2148 rtx seq;
2149 /* The SET_SRC might not be invariant, so we must
2150 use the REG_EQUAL note. */
2151 start_sequence ();
2152 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2153 m->set_src);
2154 seq = get_insns ();
2155 end_sequence ();
2157 add_label_notes (m->set_src, seq);
2159 i1 = loop_insn_hoist (loop, seq);
2160 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2161 set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
2162 : REG_EQUAL, m->set_src);
2164 else if (m->insert_temp)
2166 rtx *reg_map2 = xcalloc (REGNO (newreg),
2167 sizeof(rtx));
2168 reg_map2 [m->regno] = newreg;
2170 i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
2171 replace_regs (i1, reg_map2, REGNO (newreg), 1);
2172 free (reg_map2);
2174 else
2175 i1 = loop_insn_hoist (loop, PATTERN (p));
2177 if (REG_NOTES (i1) == 0)
2179 REG_NOTES (i1) = REG_NOTES (p);
2180 REG_NOTES (p) = NULL;
2182 /* If there is a REG_EQUAL note present whose value
2183 is not loop invariant, then delete it, since it
2184 may cause problems with later optimization passes.
2185 It is possible for cse to create such notes
2186 like this as a result of record_jump_cond. */
2188 if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
2189 && ! loop_invariant_p (loop, XEXP (temp, 0)))
2190 remove_note (i1, temp);
2193 if (new_start == 0)
2194 new_start = i1;
2196 if (loop_dump_stream)
2197 fprintf (loop_dump_stream, " moved to %d",
2198 INSN_UID (i1));
2200 /* If library call, now fix the REG_NOTES that contain
2201 insn pointers, namely REG_LIBCALL on FIRST
2202 and REG_RETVAL on I1. */
2203 if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
2205 XEXP (temp, 0) = first;
2206 temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
2207 XEXP (temp, 0) = i1;
2210 temp = p;
2211 delete_insn (p);
2212 p = NEXT_INSN (p);
2214 /* simplify_giv_expr expects that it can walk the insns
2215 at m->insn forwards and see this old sequence we are
2216 tossing here. delete_insn does preserve the next
2217 pointers, but when we skip over a NOTE we must fix
2218 it up. Otherwise that code walks into the non-deleted
2219 insn stream. */
2220 while (p && GET_CODE (p) == NOTE)
2221 p = NEXT_INSN (temp) = NEXT_INSN (p);
2223 if (m->insert_temp)
2225 rtx seq;
2226 /* Replace the original insn with a move from
2227 our newly created temp. */
2228 start_sequence ();
2229 emit_move_insn (m->set_dest, newreg);
2230 seq = get_insns ();
2231 end_sequence ();
2232 emit_insn_before (seq, p);
2236 /* The more regs we move, the less we like moving them. */
2237 threshold -= 3;
2240 m->done = 1;
2242 if (!m->insert_temp)
2244 /* Any other movable that loads the same register
2245 MUST be moved. */
2246 already_moved[regno] = 1;
2248 /* This reg has been moved out of one loop. */
2249 regs->array[regno].moved_once = 1;
2251 /* The reg set here is now invariant. */
2252 if (! m->partial)
2254 int i;
2255 for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
2256 regs->array[regno+i].set_in_loop = 0;
2259 /* Change the length-of-life info for the register
2260 to say it lives at least the full length of this loop.
2261 This will help guide optimizations in outer loops. */
2263 if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
2264 /* This is the old insn before all the moved insns.
2265 We can't use the moved insn because it is out of range
2266 in uid_luid. Only the old insns have luids. */
2267 REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
2268 if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
2269 REGNO_LAST_UID (regno) = INSN_UID (loop_end);
2272 /* Combine with this moved insn any other matching movables. */
2274 if (! m->partial)
2275 for (m1 = movables->head; m1; m1 = m1->next)
2276 if (m1->match == m)
2278 rtx temp;
2280 /* Schedule the reg loaded by M1
2281 for replacement so that shares the reg of M.
2282 If the modes differ (only possible in restricted
2283 circumstances, make a SUBREG.
2285 Note this assumes that the target dependent files
2286 treat REG and SUBREG equally, including within
2287 GO_IF_LEGITIMATE_ADDRESS and in all the
2288 predicates since we never verify that replacing the
2289 original register with a SUBREG results in a
2290 recognizable insn. */
2291 if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest))
2292 reg_map[m1->regno] = m->set_dest;
2293 else
2294 reg_map[m1->regno]
2295 = gen_lowpart_common (GET_MODE (m1->set_dest),
2296 m->set_dest);
2298 /* Get rid of the matching insn
2299 and prevent further processing of it. */
2300 m1->done = 1;
2302 /* If library call, delete all insns. */
2303 if ((temp = find_reg_note (m1->insn, REG_RETVAL,
2304 NULL_RTX)))
2305 delete_insn_chain (XEXP (temp, 0), m1->insn);
2306 else
2307 delete_insn (m1->insn);
2309 /* Any other movable that loads the same register
2310 MUST be moved. */
2311 already_moved[m1->regno] = 1;
2313 /* The reg merged here is now invariant,
2314 if the reg it matches is invariant. */
2315 if (! m->partial)
2317 int i;
2318 for (i = 0;
2319 i < LOOP_REGNO_NREGS (regno, m1->set_dest);
2320 i++)
2321 regs->array[m1->regno+i].set_in_loop = 0;
2325 else if (loop_dump_stream)
2326 fprintf (loop_dump_stream, "not desirable");
2328 else if (loop_dump_stream && !m->match)
2329 fprintf (loop_dump_stream, "not safe");
2331 if (loop_dump_stream)
2332 fprintf (loop_dump_stream, "\n");
2335 if (new_start == 0)
2336 new_start = loop_start;
2338 /* Go through all the instructions in the loop, making
2339 all the register substitutions scheduled in REG_MAP. */
2340 for (p = new_start; p != loop_end; p = NEXT_INSN (p))
2341 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
2342 || GET_CODE (p) == CALL_INSN)
2344 replace_regs (PATTERN (p), reg_map, nregs, 0);
2345 replace_regs (REG_NOTES (p), reg_map, nregs, 0);
2346 INSN_CODE (p) = -1;
2349 /* Clean up. */
2350 free (reg_map);
2351 free (already_moved);
2355 static void
2356 loop_movables_add (struct loop_movables *movables, struct movable *m)
2358 if (movables->head == 0)
2359 movables->head = m;
2360 else
2361 movables->last->next = m;
2362 movables->last = m;
2366 static void
2367 loop_movables_free (struct loop_movables *movables)
2369 struct movable *m;
2370 struct movable *m_next;
2372 for (m = movables->head; m; m = m_next)
2374 m_next = m->next;
2375 free (m);
2379 #if 0
2380 /* Scan X and replace the address of any MEM in it with ADDR.
2381 REG is the address that MEM should have before the replacement. */
2383 static void
2384 replace_call_address (rtx x, rtx reg, rtx addr)
2386 enum rtx_code code;
2387 int i;
2388 const char *fmt;
2390 if (x == 0)
2391 return;
2392 code = GET_CODE (x);
2393 switch (code)
2395 case PC:
2396 case CC0:
2397 case CONST_INT:
2398 case CONST_DOUBLE:
2399 case CONST:
2400 case SYMBOL_REF:
2401 case LABEL_REF:
2402 case REG:
2403 return;
2405 case SET:
2406 /* Short cut for very common case. */
2407 replace_call_address (XEXP (x, 1), reg, addr);
2408 return;
2410 case CALL:
2411 /* Short cut for very common case. */
2412 replace_call_address (XEXP (x, 0), reg, addr);
2413 return;
2415 case MEM:
2416 /* If this MEM uses a reg other than the one we expected,
2417 something is wrong. */
2418 if (XEXP (x, 0) != reg)
2419 abort ();
2420 XEXP (x, 0) = addr;
2421 return;
2423 default:
2424 break;
2427 fmt = GET_RTX_FORMAT (code);
2428 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2430 if (fmt[i] == 'e')
2431 replace_call_address (XEXP (x, i), reg, addr);
2432 else if (fmt[i] == 'E')
2434 int j;
2435 for (j = 0; j < XVECLEN (x, i); j++)
2436 replace_call_address (XVECEXP (x, i, j), reg, addr);
2440 #endif
2442 /* Return the number of memory refs to addresses that vary
2443 in the rtx X. */
2445 static int
2446 count_nonfixed_reads (const struct loop *loop, rtx x)
2448 enum rtx_code code;
2449 int i;
2450 const char *fmt;
2451 int value;
2453 if (x == 0)
2454 return 0;
2456 code = GET_CODE (x);
2457 switch (code)
2459 case PC:
2460 case CC0:
2461 case CONST_INT:
2462 case CONST_DOUBLE:
2463 case CONST:
2464 case SYMBOL_REF:
2465 case LABEL_REF:
2466 case REG:
2467 return 0;
2469 case MEM:
2470 return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
2471 + count_nonfixed_reads (loop, XEXP (x, 0)));
2473 default:
2474 break;
2477 value = 0;
2478 fmt = GET_RTX_FORMAT (code);
2479 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2481 if (fmt[i] == 'e')
2482 value += count_nonfixed_reads (loop, XEXP (x, i));
2483 if (fmt[i] == 'E')
2485 int j;
2486 for (j = 0; j < XVECLEN (x, i); j++)
2487 value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
2490 return value;
2493 /* Scan a loop setting the elements `cont', `vtop', `loops_enclosed',
2494 `has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
2495 `unknown_address_altered', `unknown_constant_address_altered', and
2496 `num_mem_sets' in LOOP. Also, fill in the array `mems' and the
2497 list `store_mems' in LOOP. */
2499 static void
2500 prescan_loop (struct loop *loop)
2502 int level = 1;
2503 rtx insn;
2504 struct loop_info *loop_info = LOOP_INFO (loop);
2505 rtx start = loop->start;
2506 rtx end = loop->end;
2507 /* The label after END. Jumping here is just like falling off the
2508 end of the loop. We use next_nonnote_insn instead of next_label
2509 as a hedge against the (pathological) case where some actual insn
2510 might end up between the two. */
2511 rtx exit_target = next_nonnote_insn (end);
2513 loop_info->has_indirect_jump = indirect_jump_in_function;
2514 loop_info->pre_header_has_call = 0;
2515 loop_info->has_call = 0;
2516 loop_info->has_nonconst_call = 0;
2517 loop_info->has_prefetch = 0;
2518 loop_info->has_volatile = 0;
2519 loop_info->has_tablejump = 0;
2520 loop_info->has_multiple_exit_targets = 0;
2521 loop->level = 1;
2523 loop_info->unknown_address_altered = 0;
2524 loop_info->unknown_constant_address_altered = 0;
2525 loop_info->store_mems = NULL_RTX;
2526 loop_info->first_loop_store_insn = NULL_RTX;
2527 loop_info->mems_idx = 0;
2528 loop_info->num_mem_sets = 0;
2529 /* If loop opts run twice, this was set on 1st pass for 2nd. */
2530 loop_info->preconditioned = NOTE_PRECONDITIONED (end);
2532 for (insn = start; insn && GET_CODE (insn) != CODE_LABEL;
2533 insn = PREV_INSN (insn))
2535 if (GET_CODE (insn) == CALL_INSN)
2537 loop_info->pre_header_has_call = 1;
2538 break;
2542 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2543 insn = NEXT_INSN (insn))
2545 switch (GET_CODE (insn))
2547 case NOTE:
2548 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
2550 ++level;
2551 /* Count number of loops contained in this one. */
2552 loop->level++;
2554 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
2555 --level;
2556 break;
2558 case CALL_INSN:
2559 if (! CONST_OR_PURE_CALL_P (insn))
2561 loop_info->unknown_address_altered = 1;
2562 loop_info->has_nonconst_call = 1;
2564 else if (pure_call_p (insn))
2565 loop_info->has_nonconst_call = 1;
2566 loop_info->has_call = 1;
2567 if (can_throw_internal (insn))
2568 loop_info->has_multiple_exit_targets = 1;
2570 /* Calls initializing constant objects have CLOBBER of MEM /u in the
2571 attached FUNCTION_USAGE expression list, not accounted for by the
2572 code above. We should note these to avoid missing dependencies in
2573 later references. */
2575 rtx fusage_entry;
2577 for (fusage_entry = CALL_INSN_FUNCTION_USAGE (insn);
2578 fusage_entry; fusage_entry = XEXP (fusage_entry, 1))
2580 rtx fusage = XEXP (fusage_entry, 0);
2582 if (GET_CODE (fusage) == CLOBBER
2583 && GET_CODE (XEXP (fusage, 0)) == MEM
2584 && RTX_UNCHANGING_P (XEXP (fusage, 0)))
2586 note_stores (fusage, note_addr_stored, loop_info);
2587 if (! loop_info->first_loop_store_insn
2588 && loop_info->store_mems)
2589 loop_info->first_loop_store_insn = insn;
2593 break;
2595 case JUMP_INSN:
2596 if (! loop_info->has_multiple_exit_targets)
2598 rtx set = pc_set (insn);
2600 if (set)
2602 rtx src = SET_SRC (set);
2603 rtx label1, label2;
2605 if (GET_CODE (src) == IF_THEN_ELSE)
2607 label1 = XEXP (src, 1);
2608 label2 = XEXP (src, 2);
2610 else
2612 label1 = src;
2613 label2 = NULL_RTX;
2618 if (label1 && label1 != pc_rtx)
2620 if (GET_CODE (label1) != LABEL_REF)
2622 /* Something tricky. */
2623 loop_info->has_multiple_exit_targets = 1;
2624 break;
2626 else if (XEXP (label1, 0) != exit_target
2627 && LABEL_OUTSIDE_LOOP_P (label1))
2629 /* A jump outside the current loop. */
2630 loop_info->has_multiple_exit_targets = 1;
2631 break;
2635 label1 = label2;
2636 label2 = NULL_RTX;
2638 while (label1);
2640 else
2642 /* A return, or something tricky. */
2643 loop_info->has_multiple_exit_targets = 1;
2646 /* Fall through. */
2648 case INSN:
2649 if (volatile_refs_p (PATTERN (insn)))
2650 loop_info->has_volatile = 1;
2652 if (GET_CODE (insn) == JUMP_INSN
2653 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2654 || GET_CODE (PATTERN (insn)) == ADDR_VEC))
2655 loop_info->has_tablejump = 1;
2657 note_stores (PATTERN (insn), note_addr_stored, loop_info);
2658 if (! loop_info->first_loop_store_insn && loop_info->store_mems)
2659 loop_info->first_loop_store_insn = insn;
2661 if (flag_non_call_exceptions && can_throw_internal (insn))
2662 loop_info->has_multiple_exit_targets = 1;
2663 break;
2665 default:
2666 break;
2670 /* Now, rescan the loop, setting up the LOOP_MEMS array. */
2671 if (/* An exception thrown by a called function might land us
2672 anywhere. */
2673 ! loop_info->has_nonconst_call
2674 /* We don't want loads for MEMs moved to a location before the
2675 one at which their stack memory becomes allocated. (Note
2676 that this is not a problem for malloc, etc., since those
2677 require actual function calls. */
2678 && ! current_function_calls_alloca
2679 /* There are ways to leave the loop other than falling off the
2680 end. */
2681 && ! loop_info->has_multiple_exit_targets)
2682 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2683 insn = NEXT_INSN (insn))
2684 for_each_rtx (&insn, insert_loop_mem, loop_info);
2686 /* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
2687 that loop_invariant_p and load_mems can use true_dependence
2688 to determine what is really clobbered. */
2689 if (loop_info->unknown_address_altered)
2691 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2693 loop_info->store_mems
2694 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2696 if (loop_info->unknown_constant_address_altered)
2698 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2700 RTX_UNCHANGING_P (mem) = 1;
2701 loop_info->store_mems
2702 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2706 /* Invalidate all loops containing LABEL. */
2708 static void
2709 invalidate_loops_containing_label (rtx label)
2711 struct loop *loop;
2712 for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
2713 loop->invalid = 1;
2716 /* Scan the function looking for loops. Record the start and end of each loop.
2717 Also mark as invalid loops any loops that contain a setjmp or are branched
2718 to from outside the loop. */
2720 static void
2721 find_and_verify_loops (rtx f, struct loops *loops)
2723 rtx insn;
2724 rtx label;
2725 int num_loops;
2726 struct loop *current_loop;
2727 struct loop *next_loop;
2728 struct loop *loop;
2730 num_loops = loops->num;
2732 compute_luids (f, NULL_RTX, 0);
2734 /* If there are jumps to undefined labels,
2735 treat them as jumps out of any/all loops.
2736 This also avoids writing past end of tables when there are no loops. */
2737 uid_loop[0] = NULL;
2739 /* Find boundaries of loops, mark which loops are contained within
2740 loops, and invalidate loops that have setjmp. */
2742 num_loops = 0;
2743 current_loop = NULL;
2744 for (insn = f; insn; insn = NEXT_INSN (insn))
2746 if (GET_CODE (insn) == NOTE)
2747 switch (NOTE_LINE_NUMBER (insn))
2749 case NOTE_INSN_LOOP_BEG:
2750 next_loop = loops->array + num_loops;
2751 next_loop->num = num_loops;
2752 num_loops++;
2753 next_loop->start = insn;
2754 next_loop->outer = current_loop;
2755 current_loop = next_loop;
2756 break;
2758 case NOTE_INSN_LOOP_CONT:
2759 current_loop->cont = insn;
2760 break;
2762 case NOTE_INSN_LOOP_VTOP:
2763 current_loop->vtop = insn;
2764 break;
2766 case NOTE_INSN_LOOP_END:
2767 if (! current_loop)
2768 abort ();
2770 current_loop->end = insn;
2771 current_loop = current_loop->outer;
2772 break;
2774 default:
2775 break;
2778 if (GET_CODE (insn) == CALL_INSN
2779 && find_reg_note (insn, REG_SETJMP, NULL))
2781 /* In this case, we must invalidate our current loop and any
2782 enclosing loop. */
2783 for (loop = current_loop; loop; loop = loop->outer)
2785 loop->invalid = 1;
2786 if (loop_dump_stream)
2787 fprintf (loop_dump_stream,
2788 "\nLoop at %d ignored due to setjmp.\n",
2789 INSN_UID (loop->start));
2793 /* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
2794 enclosing loop, but this doesn't matter. */
2795 uid_loop[INSN_UID (insn)] = current_loop;
2798 /* Any loop containing a label used in an initializer must be invalidated,
2799 because it can be jumped into from anywhere. */
2800 for (label = forced_labels; label; label = XEXP (label, 1))
2801 invalidate_loops_containing_label (XEXP (label, 0));
2803 /* Any loop containing a label used for an exception handler must be
2804 invalidated, because it can be jumped into from anywhere. */
2805 for_each_eh_label (invalidate_loops_containing_label);
2807 /* Now scan all insn's in the function. If any JUMP_INSN branches into a
2808 loop that it is not contained within, that loop is marked invalid.
2809 If any INSN or CALL_INSN uses a label's address, then the loop containing
2810 that label is marked invalid, because it could be jumped into from
2811 anywhere.
2813 Also look for blocks of code ending in an unconditional branch that
2814 exits the loop. If such a block is surrounded by a conditional
2815 branch around the block, move the block elsewhere (see below) and
2816 invert the jump to point to the code block. This may eliminate a
2817 label in our loop and will simplify processing by both us and a
2818 possible second cse pass. */
2820 for (insn = f; insn; insn = NEXT_INSN (insn))
2821 if (INSN_P (insn))
2823 struct loop *this_loop = uid_loop[INSN_UID (insn)];
2825 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
2827 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
2828 if (note)
2829 invalidate_loops_containing_label (XEXP (note, 0));
2832 if (GET_CODE (insn) != JUMP_INSN)
2833 continue;
2835 mark_loop_jump (PATTERN (insn), this_loop);
2837 /* See if this is an unconditional branch outside the loop. */
2838 if (this_loop
2839 && (GET_CODE (PATTERN (insn)) == RETURN
2840 || (any_uncondjump_p (insn)
2841 && onlyjump_p (insn)
2842 && (uid_loop[INSN_UID (JUMP_LABEL (insn))]
2843 != this_loop)))
2844 && get_max_uid () < max_uid_for_loop)
2846 rtx p;
2847 rtx our_next = next_real_insn (insn);
2848 rtx last_insn_to_move = NEXT_INSN (insn);
2849 struct loop *dest_loop;
2850 struct loop *outer_loop = NULL;
2852 /* Go backwards until we reach the start of the loop, a label,
2853 or a JUMP_INSN. */
2854 for (p = PREV_INSN (insn);
2855 GET_CODE (p) != CODE_LABEL
2856 && ! (GET_CODE (p) == NOTE
2857 && NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
2858 && GET_CODE (p) != JUMP_INSN;
2859 p = PREV_INSN (p))
2862 /* Check for the case where we have a jump to an inner nested
2863 loop, and do not perform the optimization in that case. */
2865 if (JUMP_LABEL (insn))
2867 dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
2868 if (dest_loop)
2870 for (outer_loop = dest_loop; outer_loop;
2871 outer_loop = outer_loop->outer)
2872 if (outer_loop == this_loop)
2873 break;
2877 /* Make sure that the target of P is within the current loop. */
2879 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
2880 && uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
2881 outer_loop = this_loop;
2883 /* If we stopped on a JUMP_INSN to the next insn after INSN,
2884 we have a block of code to try to move.
2886 We look backward and then forward from the target of INSN
2887 to find a BARRIER at the same loop depth as the target.
2888 If we find such a BARRIER, we make a new label for the start
2889 of the block, invert the jump in P and point it to that label,
2890 and move the block of code to the spot we found. */
2892 if (! outer_loop
2893 && GET_CODE (p) == JUMP_INSN
2894 && JUMP_LABEL (p) != 0
2895 /* Just ignore jumps to labels that were never emitted.
2896 These always indicate compilation errors. */
2897 && INSN_UID (JUMP_LABEL (p)) != 0
2898 && any_condjump_p (p) && onlyjump_p (p)
2899 && next_real_insn (JUMP_LABEL (p)) == our_next
2900 /* If it's not safe to move the sequence, then we
2901 mustn't try. */
2902 && insns_safe_to_move_p (p, NEXT_INSN (insn),
2903 &last_insn_to_move))
2905 rtx target
2906 = JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
2907 struct loop *target_loop = uid_loop[INSN_UID (target)];
2908 rtx loc, loc2;
2909 rtx tmp;
2911 /* Search for possible garbage past the conditional jumps
2912 and look for the last barrier. */
2913 for (tmp = last_insn_to_move;
2914 tmp && GET_CODE (tmp) != CODE_LABEL; tmp = NEXT_INSN (tmp))
2915 if (GET_CODE (tmp) == BARRIER)
2916 last_insn_to_move = tmp;
2918 for (loc = target; loc; loc = PREV_INSN (loc))
2919 if (GET_CODE (loc) == BARRIER
2920 /* Don't move things inside a tablejump. */
2921 && ((loc2 = next_nonnote_insn (loc)) == 0
2922 || GET_CODE (loc2) != CODE_LABEL
2923 || (loc2 = next_nonnote_insn (loc2)) == 0
2924 || GET_CODE (loc2) != JUMP_INSN
2925 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2926 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2927 && uid_loop[INSN_UID (loc)] == target_loop)
2928 break;
2930 if (loc == 0)
2931 for (loc = target; loc; loc = NEXT_INSN (loc))
2932 if (GET_CODE (loc) == BARRIER
2933 /* Don't move things inside a tablejump. */
2934 && ((loc2 = next_nonnote_insn (loc)) == 0
2935 || GET_CODE (loc2) != CODE_LABEL
2936 || (loc2 = next_nonnote_insn (loc2)) == 0
2937 || GET_CODE (loc2) != JUMP_INSN
2938 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2939 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2940 && uid_loop[INSN_UID (loc)] == target_loop)
2941 break;
2943 if (loc)
2945 rtx cond_label = JUMP_LABEL (p);
2946 rtx new_label = get_label_after (p);
2948 /* Ensure our label doesn't go away. */
2949 LABEL_NUSES (cond_label)++;
2951 /* Verify that uid_loop is large enough and that
2952 we can invert P. */
2953 if (invert_jump (p, new_label, 1))
2955 rtx q, r;
2957 /* If no suitable BARRIER was found, create a suitable
2958 one before TARGET. Since TARGET is a fall through
2959 path, we'll need to insert a jump around our block
2960 and add a BARRIER before TARGET.
2962 This creates an extra unconditional jump outside
2963 the loop. However, the benefits of removing rarely
2964 executed instructions from inside the loop usually
2965 outweighs the cost of the extra unconditional jump
2966 outside the loop. */
2967 if (loc == 0)
2969 rtx temp;
2971 temp = gen_jump (JUMP_LABEL (insn));
2972 temp = emit_jump_insn_before (temp, target);
2973 JUMP_LABEL (temp) = JUMP_LABEL (insn);
2974 LABEL_NUSES (JUMP_LABEL (insn))++;
2975 loc = emit_barrier_before (target);
2978 /* Include the BARRIER after INSN and copy the
2979 block after LOC. */
2980 if (squeeze_notes (&new_label, &last_insn_to_move))
2981 abort ();
2982 reorder_insns (new_label, last_insn_to_move, loc);
2984 /* All those insns are now in TARGET_LOOP. */
2985 for (q = new_label;
2986 q != NEXT_INSN (last_insn_to_move);
2987 q = NEXT_INSN (q))
2988 uid_loop[INSN_UID (q)] = target_loop;
2990 /* The label jumped to by INSN is no longer a loop
2991 exit. Unless INSN does not have a label (e.g.,
2992 it is a RETURN insn), search loop->exit_labels
2993 to find its label_ref, and remove it. Also turn
2994 off LABEL_OUTSIDE_LOOP_P bit. */
2995 if (JUMP_LABEL (insn))
2997 for (q = 0, r = this_loop->exit_labels;
2999 q = r, r = LABEL_NEXTREF (r))
3000 if (XEXP (r, 0) == JUMP_LABEL (insn))
3002 LABEL_OUTSIDE_LOOP_P (r) = 0;
3003 if (q)
3004 LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
3005 else
3006 this_loop->exit_labels = LABEL_NEXTREF (r);
3007 break;
3010 for (loop = this_loop; loop && loop != target_loop;
3011 loop = loop->outer)
3012 loop->exit_count--;
3014 /* If we didn't find it, then something is
3015 wrong. */
3016 if (! r)
3017 abort ();
3020 /* P is now a jump outside the loop, so it must be put
3021 in loop->exit_labels, and marked as such.
3022 The easiest way to do this is to just call
3023 mark_loop_jump again for P. */
3024 mark_loop_jump (PATTERN (p), this_loop);
3026 /* If INSN now jumps to the insn after it,
3027 delete INSN. */
3028 if (JUMP_LABEL (insn) != 0
3029 && (next_real_insn (JUMP_LABEL (insn))
3030 == next_real_insn (insn)))
3031 delete_related_insns (insn);
3034 /* Continue the loop after where the conditional
3035 branch used to jump, since the only branch insn
3036 in the block (if it still remains) is an inter-loop
3037 branch and hence needs no processing. */
3038 insn = NEXT_INSN (cond_label);
3040 if (--LABEL_NUSES (cond_label) == 0)
3041 delete_related_insns (cond_label);
3043 /* This loop will be continued with NEXT_INSN (insn). */
3044 insn = PREV_INSN (insn);
3051 /* If any label in X jumps to a loop different from LOOP_NUM and any of the
3052 loops it is contained in, mark the target loop invalid.
3054 For speed, we assume that X is part of a pattern of a JUMP_INSN. */
3056 static void
3057 mark_loop_jump (rtx x, struct loop *loop)
3059 struct loop *dest_loop;
3060 struct loop *outer_loop;
3061 int i;
3063 switch (GET_CODE (x))
3065 case PC:
3066 case USE:
3067 case CLOBBER:
3068 case REG:
3069 case MEM:
3070 case CONST_INT:
3071 case CONST_DOUBLE:
3072 case RETURN:
3073 return;
3075 case CONST:
3076 /* There could be a label reference in here. */
3077 mark_loop_jump (XEXP (x, 0), loop);
3078 return;
3080 case PLUS:
3081 case MINUS:
3082 case MULT:
3083 mark_loop_jump (XEXP (x, 0), loop);
3084 mark_loop_jump (XEXP (x, 1), loop);
3085 return;
3087 case LO_SUM:
3088 /* This may refer to a LABEL_REF or SYMBOL_REF. */
3089 mark_loop_jump (XEXP (x, 1), loop);
3090 return;
3092 case SIGN_EXTEND:
3093 case ZERO_EXTEND:
3094 mark_loop_jump (XEXP (x, 0), loop);
3095 return;
3097 case LABEL_REF:
3098 dest_loop = uid_loop[INSN_UID (XEXP (x, 0))];
3100 /* Link together all labels that branch outside the loop. This
3101 is used by final_[bg]iv_value and the loop unrolling code. Also
3102 mark this LABEL_REF so we know that this branch should predict
3103 false. */
3105 /* A check to make sure the label is not in an inner nested loop,
3106 since this does not count as a loop exit. */
3107 if (dest_loop)
3109 for (outer_loop = dest_loop; outer_loop;
3110 outer_loop = outer_loop->outer)
3111 if (outer_loop == loop)
3112 break;
3114 else
3115 outer_loop = NULL;
3117 if (loop && ! outer_loop)
3119 LABEL_OUTSIDE_LOOP_P (x) = 1;
3120 LABEL_NEXTREF (x) = loop->exit_labels;
3121 loop->exit_labels = x;
3123 for (outer_loop = loop;
3124 outer_loop && outer_loop != dest_loop;
3125 outer_loop = outer_loop->outer)
3126 outer_loop->exit_count++;
3129 /* If this is inside a loop, but not in the current loop or one enclosed
3130 by it, it invalidates at least one loop. */
3132 if (! dest_loop)
3133 return;
3135 /* We must invalidate every nested loop containing the target of this
3136 label, except those that also contain the jump insn. */
3138 for (; dest_loop; dest_loop = dest_loop->outer)
3140 /* Stop when we reach a loop that also contains the jump insn. */
3141 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3142 if (dest_loop == outer_loop)
3143 return;
3145 /* If we get here, we know we need to invalidate a loop. */
3146 if (loop_dump_stream && ! dest_loop->invalid)
3147 fprintf (loop_dump_stream,
3148 "\nLoop at %d ignored due to multiple entry points.\n",
3149 INSN_UID (dest_loop->start));
3151 dest_loop->invalid = 1;
3153 return;
3155 case SET:
3156 /* If this is not setting pc, ignore. */
3157 if (SET_DEST (x) == pc_rtx)
3158 mark_loop_jump (SET_SRC (x), loop);
3159 return;
3161 case IF_THEN_ELSE:
3162 mark_loop_jump (XEXP (x, 1), loop);
3163 mark_loop_jump (XEXP (x, 2), loop);
3164 return;
3166 case PARALLEL:
3167 case ADDR_VEC:
3168 for (i = 0; i < XVECLEN (x, 0); i++)
3169 mark_loop_jump (XVECEXP (x, 0, i), loop);
3170 return;
3172 case ADDR_DIFF_VEC:
3173 for (i = 0; i < XVECLEN (x, 1); i++)
3174 mark_loop_jump (XVECEXP (x, 1, i), loop);
3175 return;
3177 default:
3178 /* Strictly speaking this is not a jump into the loop, only a possible
3179 jump out of the loop. However, we have no way to link the destination
3180 of this jump onto the list of exit labels. To be safe we mark this
3181 loop and any containing loops as invalid. */
3182 if (loop)
3184 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3186 if (loop_dump_stream && ! outer_loop->invalid)
3187 fprintf (loop_dump_stream,
3188 "\nLoop at %d ignored due to unknown exit jump.\n",
3189 INSN_UID (outer_loop->start));
3190 outer_loop->invalid = 1;
3193 return;
3197 /* Return nonzero if there is a label in the range from
3198 insn INSN to and including the insn whose luid is END
3199 INSN must have an assigned luid (i.e., it must not have
3200 been previously created by loop.c). */
3202 static int
3203 labels_in_range_p (rtx insn, int end)
3205 while (insn && INSN_LUID (insn) <= end)
3207 if (GET_CODE (insn) == CODE_LABEL)
3208 return 1;
3209 insn = NEXT_INSN (insn);
3212 return 0;
3215 /* Record that a memory reference X is being set. */
3217 static void
3218 note_addr_stored (rtx x, rtx y ATTRIBUTE_UNUSED,
3219 void *data ATTRIBUTE_UNUSED)
3221 struct loop_info *loop_info = data;
3223 if (x == 0 || GET_CODE (x) != MEM)
3224 return;
3226 /* Count number of memory writes.
3227 This affects heuristics in strength_reduce. */
3228 loop_info->num_mem_sets++;
3230 /* BLKmode MEM means all memory is clobbered. */
3231 if (GET_MODE (x) == BLKmode)
3233 if (RTX_UNCHANGING_P (x))
3234 loop_info->unknown_constant_address_altered = 1;
3235 else
3236 loop_info->unknown_address_altered = 1;
3238 return;
3241 loop_info->store_mems = gen_rtx_EXPR_LIST (VOIDmode, x,
3242 loop_info->store_mems);
3245 /* X is a value modified by an INSN that references a biv inside a loop
3246 exit test (ie, X is somehow related to the value of the biv). If X
3247 is a pseudo that is used more than once, then the biv is (effectively)
3248 used more than once. DATA is a pointer to a loop_regs structure. */
3250 static void
3251 note_set_pseudo_multiple_uses (rtx x, rtx y ATTRIBUTE_UNUSED, void *data)
3253 struct loop_regs *regs = (struct loop_regs *) data;
3255 if (x == 0)
3256 return;
3258 while (GET_CODE (x) == STRICT_LOW_PART
3259 || GET_CODE (x) == SIGN_EXTRACT
3260 || GET_CODE (x) == ZERO_EXTRACT
3261 || GET_CODE (x) == SUBREG)
3262 x = XEXP (x, 0);
3264 if (GET_CODE (x) != REG || REGNO (x) < FIRST_PSEUDO_REGISTER)
3265 return;
3267 /* If we do not have usage information, or if we know the register
3268 is used more than once, note that fact for check_dbra_loop. */
3269 if (REGNO (x) >= max_reg_before_loop
3270 || ! regs->array[REGNO (x)].single_usage
3271 || regs->array[REGNO (x)].single_usage == const0_rtx)
3272 regs->multiple_uses = 1;
3275 /* Return nonzero if the rtx X is invariant over the current loop.
3277 The value is 2 if we refer to something only conditionally invariant.
3279 A memory ref is invariant if it is not volatile and does not conflict
3280 with anything stored in `loop_info->store_mems'. */
3283 loop_invariant_p (const struct loop *loop, rtx x)
3285 struct loop_info *loop_info = LOOP_INFO (loop);
3286 struct loop_regs *regs = LOOP_REGS (loop);
3287 int i;
3288 enum rtx_code code;
3289 const char *fmt;
3290 int conditional = 0;
3291 rtx mem_list_entry;
3293 if (x == 0)
3294 return 1;
3295 code = GET_CODE (x);
3296 switch (code)
3298 case CONST_INT:
3299 case CONST_DOUBLE:
3300 case SYMBOL_REF:
3301 case CONST:
3302 return 1;
3304 case LABEL_REF:
3305 /* A LABEL_REF is normally invariant, however, if we are unrolling
3306 loops, and this label is inside the loop, then it isn't invariant.
3307 This is because each unrolled copy of the loop body will have
3308 a copy of this label. If this was invariant, then an insn loading
3309 the address of this label into a register might get moved outside
3310 the loop, and then each loop body would end up using the same label.
3312 We don't know the loop bounds here though, so just fail for all
3313 labels. */
3314 if (flag_old_unroll_loops)
3315 return 0;
3316 else
3317 return 1;
3319 case PC:
3320 case CC0:
3321 case UNSPEC_VOLATILE:
3322 return 0;
3324 case REG:
3325 /* We used to check RTX_UNCHANGING_P (x) here, but that is invalid
3326 since the reg might be set by initialization within the loop. */
3328 if ((x == frame_pointer_rtx || x == hard_frame_pointer_rtx
3329 || x == arg_pointer_rtx || x == pic_offset_table_rtx)
3330 && ! current_function_has_nonlocal_goto)
3331 return 1;
3333 if (LOOP_INFO (loop)->has_call
3334 && REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
3335 return 0;
3337 /* Out-of-range regs can occur when we are called from unrolling.
3338 These registers created by the unroller are set in the loop,
3339 hence are never invariant.
3340 Other out-of-range regs can be generated by load_mems; those that
3341 are written to in the loop are not invariant, while those that are
3342 not written to are invariant. It would be easy for load_mems
3343 to set n_times_set correctly for these registers, however, there
3344 is no easy way to distinguish them from registers created by the
3345 unroller. */
3347 if (REGNO (x) >= (unsigned) regs->num)
3348 return 0;
3350 if (regs->array[REGNO (x)].set_in_loop < 0)
3351 return 2;
3353 return regs->array[REGNO (x)].set_in_loop == 0;
3355 case MEM:
3356 /* Volatile memory references must be rejected. Do this before
3357 checking for read-only items, so that volatile read-only items
3358 will be rejected also. */
3359 if (MEM_VOLATILE_P (x))
3360 return 0;
3362 /* See if there is any dependence between a store and this load. */
3363 mem_list_entry = loop_info->store_mems;
3364 while (mem_list_entry)
3366 if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
3367 x, rtx_varies_p))
3368 return 0;
3370 mem_list_entry = XEXP (mem_list_entry, 1);
3373 /* It's not invalidated by a store in memory
3374 but we must still verify the address is invariant. */
3375 break;
3377 case ASM_OPERANDS:
3378 /* Don't mess with insns declared volatile. */
3379 if (MEM_VOLATILE_P (x))
3380 return 0;
3381 break;
3383 default:
3384 break;
3387 fmt = GET_RTX_FORMAT (code);
3388 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3390 if (fmt[i] == 'e')
3392 int tem = loop_invariant_p (loop, XEXP (x, i));
3393 if (tem == 0)
3394 return 0;
3395 if (tem == 2)
3396 conditional = 1;
3398 else if (fmt[i] == 'E')
3400 int j;
3401 for (j = 0; j < XVECLEN (x, i); j++)
3403 int tem = loop_invariant_p (loop, XVECEXP (x, i, j));
3404 if (tem == 0)
3405 return 0;
3406 if (tem == 2)
3407 conditional = 1;
3413 return 1 + conditional;
3416 /* Return nonzero if all the insns in the loop that set REG
3417 are INSN and the immediately following insns,
3418 and if each of those insns sets REG in an invariant way
3419 (not counting uses of REG in them).
3421 The value is 2 if some of these insns are only conditionally invariant.
3423 We assume that INSN itself is the first set of REG
3424 and that its source is invariant. */
3426 static int
3427 consec_sets_invariant_p (const struct loop *loop, rtx reg, int n_sets,
3428 rtx insn)
3430 struct loop_regs *regs = LOOP_REGS (loop);
3431 rtx p = insn;
3432 unsigned int regno = REGNO (reg);
3433 rtx temp;
3434 /* Number of sets we have to insist on finding after INSN. */
3435 int count = n_sets - 1;
3436 int old = regs->array[regno].set_in_loop;
3437 int value = 0;
3438 int this;
3440 /* If N_SETS hit the limit, we can't rely on its value. */
3441 if (n_sets == 127)
3442 return 0;
3444 regs->array[regno].set_in_loop = 0;
3446 while (count > 0)
3448 enum rtx_code code;
3449 rtx set;
3451 p = NEXT_INSN (p);
3452 code = GET_CODE (p);
3454 /* If library call, skip to end of it. */
3455 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
3456 p = XEXP (temp, 0);
3458 this = 0;
3459 if (code == INSN
3460 && (set = single_set (p))
3461 && GET_CODE (SET_DEST (set)) == REG
3462 && REGNO (SET_DEST (set)) == regno)
3464 this = loop_invariant_p (loop, SET_SRC (set));
3465 if (this != 0)
3466 value |= this;
3467 else if ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX)))
3469 /* If this is a libcall, then any invariant REG_EQUAL note is OK.
3470 If this is an ordinary insn, then only CONSTANT_P REG_EQUAL
3471 notes are OK. */
3472 this = (CONSTANT_P (XEXP (temp, 0))
3473 || (find_reg_note (p, REG_RETVAL, NULL_RTX)
3474 && loop_invariant_p (loop, XEXP (temp, 0))));
3475 if (this != 0)
3476 value |= this;
3479 if (this != 0)
3480 count--;
3481 else if (code != NOTE)
3483 regs->array[regno].set_in_loop = old;
3484 return 0;
3488 regs->array[regno].set_in_loop = old;
3489 /* If loop_invariant_p ever returned 2, we return 2. */
3490 return 1 + (value & 2);
3493 #if 0
3494 /* I don't think this condition is sufficient to allow INSN
3495 to be moved, so we no longer test it. */
3497 /* Return 1 if all insns in the basic block of INSN and following INSN
3498 that set REG are invariant according to TABLE. */
3500 static int
3501 all_sets_invariant_p (rtx reg, rtx insn, short *table)
3503 rtx p = insn;
3504 int regno = REGNO (reg);
3506 while (1)
3508 enum rtx_code code;
3509 p = NEXT_INSN (p);
3510 code = GET_CODE (p);
3511 if (code == CODE_LABEL || code == JUMP_INSN)
3512 return 1;
3513 if (code == INSN && GET_CODE (PATTERN (p)) == SET
3514 && GET_CODE (SET_DEST (PATTERN (p))) == REG
3515 && REGNO (SET_DEST (PATTERN (p))) == regno)
3517 if (! loop_invariant_p (loop, SET_SRC (PATTERN (p)), table))
3518 return 0;
3522 #endif /* 0 */
3524 /* Look at all uses (not sets) of registers in X. For each, if it is
3525 the single use, set USAGE[REGNO] to INSN; if there was a previous use in
3526 a different insn, set USAGE[REGNO] to const0_rtx. */
3528 static void
3529 find_single_use_in_loop (struct loop_regs *regs, rtx insn, rtx x)
3531 enum rtx_code code = GET_CODE (x);
3532 const char *fmt = GET_RTX_FORMAT (code);
3533 int i, j;
3535 if (code == REG)
3536 regs->array[REGNO (x)].single_usage
3537 = (regs->array[REGNO (x)].single_usage != 0
3538 && regs->array[REGNO (x)].single_usage != insn)
3539 ? const0_rtx : insn;
3541 else if (code == SET)
3543 /* Don't count SET_DEST if it is a REG; otherwise count things
3544 in SET_DEST because if a register is partially modified, it won't
3545 show up as a potential movable so we don't care how USAGE is set
3546 for it. */
3547 if (GET_CODE (SET_DEST (x)) != REG)
3548 find_single_use_in_loop (regs, insn, SET_DEST (x));
3549 find_single_use_in_loop (regs, insn, SET_SRC (x));
3551 else
3552 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3554 if (fmt[i] == 'e' && XEXP (x, i) != 0)
3555 find_single_use_in_loop (regs, insn, XEXP (x, i));
3556 else if (fmt[i] == 'E')
3557 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3558 find_single_use_in_loop (regs, insn, XVECEXP (x, i, j));
3562 /* Count and record any set in X which is contained in INSN. Update
3563 REGS->array[I].MAY_NOT_OPTIMIZE and LAST_SET for any register I set
3564 in X. */
3566 static void
3567 count_one_set (struct loop_regs *regs, rtx insn, rtx x, rtx *last_set)
3569 if (GET_CODE (x) == CLOBBER && GET_CODE (XEXP (x, 0)) == REG)
3570 /* Don't move a reg that has an explicit clobber.
3571 It's not worth the pain to try to do it correctly. */
3572 regs->array[REGNO (XEXP (x, 0))].may_not_optimize = 1;
3574 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
3576 rtx dest = SET_DEST (x);
3577 while (GET_CODE (dest) == SUBREG
3578 || GET_CODE (dest) == ZERO_EXTRACT
3579 || GET_CODE (dest) == SIGN_EXTRACT
3580 || GET_CODE (dest) == STRICT_LOW_PART)
3581 dest = XEXP (dest, 0);
3582 if (GET_CODE (dest) == REG)
3584 int i;
3585 int regno = REGNO (dest);
3586 for (i = 0; i < LOOP_REGNO_NREGS (regno, dest); i++)
3588 /* If this is the first setting of this reg
3589 in current basic block, and it was set before,
3590 it must be set in two basic blocks, so it cannot
3591 be moved out of the loop. */
3592 if (regs->array[regno].set_in_loop > 0
3593 && last_set[regno] == 0)
3594 regs->array[regno+i].may_not_optimize = 1;
3595 /* If this is not first setting in current basic block,
3596 see if reg was used in between previous one and this.
3597 If so, neither one can be moved. */
3598 if (last_set[regno] != 0
3599 && reg_used_between_p (dest, last_set[regno], insn))
3600 regs->array[regno+i].may_not_optimize = 1;
3601 if (regs->array[regno+i].set_in_loop < 127)
3602 ++regs->array[regno+i].set_in_loop;
3603 last_set[regno+i] = insn;
3609 /* Given a loop that is bounded by LOOP->START and LOOP->END and that
3610 is entered at LOOP->SCAN_START, return 1 if the register set in SET
3611 contained in insn INSN is used by any insn that precedes INSN in
3612 cyclic order starting from the loop entry point.
3614 We don't want to use INSN_LUID here because if we restrict INSN to those
3615 that have a valid INSN_LUID, it means we cannot move an invariant out
3616 from an inner loop past two loops. */
3618 static int
3619 loop_reg_used_before_p (const struct loop *loop, rtx set, rtx insn)
3621 rtx reg = SET_DEST (set);
3622 rtx p;
3624 /* Scan forward checking for register usage. If we hit INSN, we
3625 are done. Otherwise, if we hit LOOP->END, wrap around to LOOP->START. */
3626 for (p = loop->scan_start; p != insn; p = NEXT_INSN (p))
3628 if (INSN_P (p) && reg_overlap_mentioned_p (reg, PATTERN (p)))
3629 return 1;
3631 if (p == loop->end)
3632 p = loop->start;
3635 return 0;
3639 /* Information we collect about arrays that we might want to prefetch. */
3640 struct prefetch_info
3642 struct iv_class *class; /* Class this prefetch is based on. */
3643 struct induction *giv; /* GIV this prefetch is based on. */
3644 rtx base_address; /* Start prefetching from this address plus
3645 index. */
3646 HOST_WIDE_INT index;
3647 HOST_WIDE_INT stride; /* Prefetch stride in bytes in each
3648 iteration. */
3649 unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
3650 prefetch area in one iteration. */
3651 unsigned int total_bytes; /* Total bytes loop will access in this block.
3652 This is set only for loops with known
3653 iteration counts and is 0xffffffff
3654 otherwise. */
3655 int prefetch_in_loop; /* Number of prefetch insns in loop. */
3656 int prefetch_before_loop; /* Number of prefetch insns before loop. */
3657 unsigned int write : 1; /* 1 for read/write prefetches. */
3660 /* Data used by check_store function. */
3661 struct check_store_data
3663 rtx mem_address;
3664 int mem_write;
3667 static void check_store (rtx, rtx, void *);
3668 static void emit_prefetch_instructions (struct loop *);
3669 static int rtx_equal_for_prefetch_p (rtx, rtx);
3671 /* Set mem_write when mem_address is found. Used as callback to
3672 note_stores. */
3673 static void
3674 check_store (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
3676 struct check_store_data *d = (struct check_store_data *) data;
3678 if ((GET_CODE (x) == MEM) && rtx_equal_p (d->mem_address, XEXP (x, 0)))
3679 d->mem_write = 1;
3682 /* Like rtx_equal_p, but attempts to swap commutative operands. This is
3683 important to get some addresses combined. Later more sophisticated
3684 transformations can be added when necessary.
3686 ??? Same trick with swapping operand is done at several other places.
3687 It can be nice to develop some common way to handle this. */
3689 static int
3690 rtx_equal_for_prefetch_p (rtx x, rtx y)
3692 int i;
3693 int j;
3694 enum rtx_code code = GET_CODE (x);
3695 const char *fmt;
3697 if (x == y)
3698 return 1;
3699 if (code != GET_CODE (y))
3700 return 0;
3702 code = GET_CODE (x);
3704 if (GET_RTX_CLASS (code) == 'c')
3706 return ((rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 0))
3707 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 1)))
3708 || (rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 1))
3709 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 0))));
3711 /* Compare the elements. If any pair of corresponding elements fails to
3712 match, return 0 for the whole thing. */
3714 fmt = GET_RTX_FORMAT (code);
3715 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3717 switch (fmt[i])
3719 case 'w':
3720 if (XWINT (x, i) != XWINT (y, i))
3721 return 0;
3722 break;
3724 case 'i':
3725 if (XINT (x, i) != XINT (y, i))
3726 return 0;
3727 break;
3729 case 'E':
3730 /* Two vectors must have the same length. */
3731 if (XVECLEN (x, i) != XVECLEN (y, i))
3732 return 0;
3734 /* And the corresponding elements must match. */
3735 for (j = 0; j < XVECLEN (x, i); j++)
3736 if (rtx_equal_for_prefetch_p (XVECEXP (x, i, j),
3737 XVECEXP (y, i, j)) == 0)
3738 return 0;
3739 break;
3741 case 'e':
3742 if (rtx_equal_for_prefetch_p (XEXP (x, i), XEXP (y, i)) == 0)
3743 return 0;
3744 break;
3746 case 's':
3747 if (strcmp (XSTR (x, i), XSTR (y, i)))
3748 return 0;
3749 break;
3751 case 'u':
3752 /* These are just backpointers, so they don't matter. */
3753 break;
3755 case '0':
3756 break;
3758 /* It is believed that rtx's at this level will never
3759 contain anything but integers and other rtx's,
3760 except for within LABEL_REFs and SYMBOL_REFs. */
3761 default:
3762 abort ();
3765 return 1;
3768 /* Remove constant addition value from the expression X (when present)
3769 and return it. */
3771 static HOST_WIDE_INT
3772 remove_constant_addition (rtx *x)
3774 HOST_WIDE_INT addval = 0;
3775 rtx exp = *x;
3777 /* Avoid clobbering a shared CONST expression. */
3778 if (GET_CODE (exp) == CONST)
3780 if (GET_CODE (XEXP (exp, 0)) == PLUS
3781 && GET_CODE (XEXP (XEXP (exp, 0), 0)) == SYMBOL_REF
3782 && GET_CODE (XEXP (XEXP (exp, 0), 1)) == CONST_INT)
3784 *x = XEXP (XEXP (exp, 0), 0);
3785 return INTVAL (XEXP (XEXP (exp, 0), 1));
3787 return 0;
3790 if (GET_CODE (exp) == CONST_INT)
3792 addval = INTVAL (exp);
3793 *x = const0_rtx;
3796 /* For plus expression recurse on ourself. */
3797 else if (GET_CODE (exp) == PLUS)
3799 addval += remove_constant_addition (&XEXP (exp, 0));
3800 addval += remove_constant_addition (&XEXP (exp, 1));
3802 /* In case our parameter was constant, remove extra zero from the
3803 expression. */
3804 if (XEXP (exp, 0) == const0_rtx)
3805 *x = XEXP (exp, 1);
3806 else if (XEXP (exp, 1) == const0_rtx)
3807 *x = XEXP (exp, 0);
3810 return addval;
3813 /* Attempt to identify accesses to arrays that are most likely to cause cache
3814 misses, and emit prefetch instructions a few prefetch blocks forward.
3816 To detect the arrays we use the GIV information that was collected by the
3817 strength reduction pass.
3819 The prefetch instructions are generated after the GIV information is done
3820 and before the strength reduction process. The new GIVs are injected into
3821 the strength reduction tables, so the prefetch addresses are optimized as
3822 well.
3824 GIVs are split into base address, stride, and constant addition values.
3825 GIVs with the same address, stride and close addition values are combined
3826 into a single prefetch. Also writes to GIVs are detected, so that prefetch
3827 for write instructions can be used for the block we write to, on machines
3828 that support write prefetches.
3830 Several heuristics are used to determine when to prefetch. They are
3831 controlled by defined symbols that can be overridden for each target. */
3833 static void
3834 emit_prefetch_instructions (struct loop *loop)
3836 int num_prefetches = 0;
3837 int num_real_prefetches = 0;
3838 int num_real_write_prefetches = 0;
3839 int num_prefetches_before = 0;
3840 int num_write_prefetches_before = 0;
3841 int ahead = 0;
3842 int i;
3843 struct iv_class *bl;
3844 struct induction *iv;
3845 struct prefetch_info info[MAX_PREFETCHES];
3846 struct loop_ivs *ivs = LOOP_IVS (loop);
3848 if (!HAVE_prefetch)
3849 return;
3851 /* Consider only loops w/o calls. When a call is done, the loop is probably
3852 slow enough to read the memory. */
3853 if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call)
3855 if (loop_dump_stream)
3856 fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n");
3858 return;
3861 /* Don't prefetch in loops known to have few iterations. */
3862 if (PREFETCH_NO_LOW_LOOPCNT
3863 && LOOP_INFO (loop)->n_iterations
3864 && LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT)
3866 if (loop_dump_stream)
3867 fprintf (loop_dump_stream,
3868 "Prefetch: ignoring loop: not enough iterations.\n");
3869 return;
3872 /* Search all induction variables and pick those interesting for the prefetch
3873 machinery. */
3874 for (bl = ivs->list; bl; bl = bl->next)
3876 struct induction *biv = bl->biv, *biv1;
3877 int basestride = 0;
3879 biv1 = biv;
3881 /* Expect all BIVs to be executed in each iteration. This makes our
3882 analysis more conservative. */
3883 while (biv1)
3885 /* Discard non-constant additions that we can't handle well yet, and
3886 BIVs that are executed multiple times; such BIVs ought to be
3887 handled in the nested loop. We accept not_every_iteration BIVs,
3888 since these only result in larger strides and make our
3889 heuristics more conservative. */
3890 if (GET_CODE (biv->add_val) != CONST_INT)
3892 if (loop_dump_stream)
3894 fprintf (loop_dump_stream,
3895 "Prefetch: ignoring biv %d: non-constant addition at insn %d:",
3896 REGNO (biv->src_reg), INSN_UID (biv->insn));
3897 print_rtl (loop_dump_stream, biv->add_val);
3898 fprintf (loop_dump_stream, "\n");
3900 break;
3903 if (biv->maybe_multiple)
3905 if (loop_dump_stream)
3907 fprintf (loop_dump_stream,
3908 "Prefetch: ignoring biv %d: maybe_multiple at insn %i:",
3909 REGNO (biv->src_reg), INSN_UID (biv->insn));
3910 print_rtl (loop_dump_stream, biv->add_val);
3911 fprintf (loop_dump_stream, "\n");
3913 break;
3916 basestride += INTVAL (biv1->add_val);
3917 biv1 = biv1->next_iv;
3920 if (biv1 || !basestride)
3921 continue;
3923 for (iv = bl->giv; iv; iv = iv->next_iv)
3925 rtx address;
3926 rtx temp;
3927 HOST_WIDE_INT index = 0;
3928 int add = 1;
3929 HOST_WIDE_INT stride = 0;
3930 int stride_sign = 1;
3931 struct check_store_data d;
3932 const char *ignore_reason = NULL;
3933 int size = GET_MODE_SIZE (GET_MODE (iv));
3935 /* See whether an induction variable is interesting to us and if
3936 not, report the reason. */
3937 if (iv->giv_type != DEST_ADDR)
3938 ignore_reason = "giv is not a destination address";
3940 /* We are interested only in constant stride memory references
3941 in order to be able to compute density easily. */
3942 else if (GET_CODE (iv->mult_val) != CONST_INT)
3943 ignore_reason = "stride is not constant";
3945 else
3947 stride = INTVAL (iv->mult_val) * basestride;
3948 if (stride < 0)
3950 stride = -stride;
3951 stride_sign = -1;
3954 /* On some targets, reversed order prefetches are not
3955 worthwhile. */
3956 if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0)
3957 ignore_reason = "reversed order stride";
3959 /* Prefetch of accesses with an extreme stride might not be
3960 worthwhile, either. */
3961 else if (PREFETCH_NO_EXTREME_STRIDE
3962 && stride > PREFETCH_EXTREME_STRIDE)
3963 ignore_reason = "extreme stride";
3965 /* Ignore GIVs with varying add values; we can't predict the
3966 value for the next iteration. */
3967 else if (!loop_invariant_p (loop, iv->add_val))
3968 ignore_reason = "giv has varying add value";
3970 /* Ignore GIVs in the nested loops; they ought to have been
3971 handled already. */
3972 else if (iv->maybe_multiple)
3973 ignore_reason = "giv is in nested loop";
3976 if (ignore_reason != NULL)
3978 if (loop_dump_stream)
3979 fprintf (loop_dump_stream,
3980 "Prefetch: ignoring giv at %d: %s.\n",
3981 INSN_UID (iv->insn), ignore_reason);
3982 continue;
3985 /* Determine the pointer to the basic array we are examining. It is
3986 the sum of the BIV's initial value and the GIV's add_val. */
3987 address = copy_rtx (iv->add_val);
3988 temp = copy_rtx (bl->initial_value);
3990 address = simplify_gen_binary (PLUS, Pmode, temp, address);
3991 index = remove_constant_addition (&address);
3993 d.mem_write = 0;
3994 d.mem_address = *iv->location;
3996 /* When the GIV is not always executed, we might be better off by
3997 not dirtying the cache pages. */
3998 if (PREFETCH_CONDITIONAL || iv->always_executed)
3999 note_stores (PATTERN (iv->insn), check_store, &d);
4000 else
4002 if (loop_dump_stream)
4003 fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %d: %s\n",
4004 INSN_UID (iv->insn), "in conditional code.");
4005 continue;
4008 /* Attempt to find another prefetch to the same array and see if we
4009 can merge this one. */
4010 for (i = 0; i < num_prefetches; i++)
4011 if (rtx_equal_for_prefetch_p (address, info[i].base_address)
4012 && stride == info[i].stride)
4014 /* In case both access same array (same location
4015 just with small difference in constant indexes), merge
4016 the prefetches. Just do the later and the earlier will
4017 get prefetched from previous iteration.
4018 The artificial threshold should not be too small,
4019 but also not bigger than small portion of memory usually
4020 traversed by single loop. */
4021 if (index >= info[i].index
4022 && index - info[i].index < PREFETCH_EXTREME_DIFFERENCE)
4024 info[i].write |= d.mem_write;
4025 info[i].bytes_accessed += size;
4026 info[i].index = index;
4027 info[i].giv = iv;
4028 info[i].class = bl;
4029 info[num_prefetches].base_address = address;
4030 add = 0;
4031 break;
4034 if (index < info[i].index
4035 && info[i].index - index < PREFETCH_EXTREME_DIFFERENCE)
4037 info[i].write |= d.mem_write;
4038 info[i].bytes_accessed += size;
4039 add = 0;
4040 break;
4044 /* Merging failed. */
4045 if (add)
4047 info[num_prefetches].giv = iv;
4048 info[num_prefetches].class = bl;
4049 info[num_prefetches].index = index;
4050 info[num_prefetches].stride = stride;
4051 info[num_prefetches].base_address = address;
4052 info[num_prefetches].write = d.mem_write;
4053 info[num_prefetches].bytes_accessed = size;
4054 num_prefetches++;
4055 if (num_prefetches >= MAX_PREFETCHES)
4057 if (loop_dump_stream)
4058 fprintf (loop_dump_stream,
4059 "Maximal number of prefetches exceeded.\n");
4060 return;
4066 for (i = 0; i < num_prefetches; i++)
4068 int density;
4070 /* Attempt to calculate the total number of bytes fetched by all
4071 iterations of the loop. Avoid overflow. */
4072 if (LOOP_INFO (loop)->n_iterations
4073 && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
4074 >= LOOP_INFO (loop)->n_iterations))
4075 info[i].total_bytes = info[i].stride * LOOP_INFO (loop)->n_iterations;
4076 else
4077 info[i].total_bytes = 0xffffffff;
4079 density = info[i].bytes_accessed * 100 / info[i].stride;
4081 /* Prefetch might be worthwhile only when the loads/stores are dense. */
4082 if (PREFETCH_ONLY_DENSE_MEM)
4083 if (density * 256 > PREFETCH_DENSE_MEM * 100
4084 && (info[i].total_bytes / PREFETCH_BLOCK
4085 >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
4087 info[i].prefetch_before_loop = 1;
4088 info[i].prefetch_in_loop
4089 = (info[i].total_bytes / PREFETCH_BLOCK
4090 > PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
4092 else
4094 info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0;
4095 if (loop_dump_stream)
4096 fprintf (loop_dump_stream,
4097 "Prefetch: ignoring giv at %d: %d%% density is too low.\n",
4098 INSN_UID (info[i].giv->insn), density);
4100 else
4101 info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1;
4103 /* Find how many prefetch instructions we'll use within the loop. */
4104 if (info[i].prefetch_in_loop != 0)
4106 info[i].prefetch_in_loop = ((info[i].stride + PREFETCH_BLOCK - 1)
4107 / PREFETCH_BLOCK);
4108 num_real_prefetches += info[i].prefetch_in_loop;
4109 if (info[i].write)
4110 num_real_write_prefetches += info[i].prefetch_in_loop;
4114 /* Determine how many iterations ahead to prefetch within the loop, based
4115 on how many prefetches we currently expect to do within the loop. */
4116 if (num_real_prefetches != 0)
4118 if ((ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches) == 0)
4120 if (loop_dump_stream)
4121 fprintf (loop_dump_stream,
4122 "Prefetch: ignoring prefetches within loop: ahead is zero; %d < %d\n",
4123 SIMULTANEOUS_PREFETCHES, num_real_prefetches);
4124 num_real_prefetches = 0, num_real_write_prefetches = 0;
4127 /* We'll also use AHEAD to determine how many prefetch instructions to
4128 emit before a loop, so don't leave it zero. */
4129 if (ahead == 0)
4130 ahead = PREFETCH_BLOCKS_BEFORE_LOOP_MAX;
4132 for (i = 0; i < num_prefetches; i++)
4134 /* Update if we've decided not to prefetch anything within the loop. */
4135 if (num_real_prefetches == 0)
4136 info[i].prefetch_in_loop = 0;
4138 /* Find how many prefetch instructions we'll use before the loop. */
4139 if (info[i].prefetch_before_loop != 0)
4141 int n = info[i].total_bytes / PREFETCH_BLOCK;
4142 if (n > ahead)
4143 n = ahead;
4144 info[i].prefetch_before_loop = n;
4145 num_prefetches_before += n;
4146 if (info[i].write)
4147 num_write_prefetches_before += n;
4150 if (loop_dump_stream)
4152 if (info[i].prefetch_in_loop == 0
4153 && info[i].prefetch_before_loop == 0)
4154 continue;
4155 fprintf (loop_dump_stream, "Prefetch insn: %d",
4156 INSN_UID (info[i].giv->insn));
4157 fprintf (loop_dump_stream,
4158 "; in loop: %d; before: %d; %s\n",
4159 info[i].prefetch_in_loop,
4160 info[i].prefetch_before_loop,
4161 info[i].write ? "read/write" : "read only");
4162 fprintf (loop_dump_stream,
4163 " density: %d%%; bytes_accessed: %u; total_bytes: %u\n",
4164 (int) (info[i].bytes_accessed * 100 / info[i].stride),
4165 info[i].bytes_accessed, info[i].total_bytes);
4166 fprintf (loop_dump_stream, " index: " HOST_WIDE_INT_PRINT_DEC
4167 "; stride: " HOST_WIDE_INT_PRINT_DEC "; address: ",
4168 info[i].index, info[i].stride);
4169 print_rtl (loop_dump_stream, info[i].base_address);
4170 fprintf (loop_dump_stream, "\n");
4174 if (num_real_prefetches + num_prefetches_before > 0)
4176 /* Record that this loop uses prefetch instructions. */
4177 LOOP_INFO (loop)->has_prefetch = 1;
4179 if (loop_dump_stream)
4181 fprintf (loop_dump_stream, "Real prefetches needed within loop: %d (write: %d)\n",
4182 num_real_prefetches, num_real_write_prefetches);
4183 fprintf (loop_dump_stream, "Real prefetches needed before loop: %d (write: %d)\n",
4184 num_prefetches_before, num_write_prefetches_before);
4188 for (i = 0; i < num_prefetches; i++)
4190 int y;
4192 for (y = 0; y < info[i].prefetch_in_loop; y++)
4194 rtx loc = copy_rtx (*info[i].giv->location);
4195 rtx insn;
4196 int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
4197 rtx before_insn = info[i].giv->insn;
4198 rtx prev_insn = PREV_INSN (info[i].giv->insn);
4199 rtx seq;
4201 /* We can save some effort by offsetting the address on
4202 architectures with offsettable memory references. */
4203 if (offsettable_address_p (0, VOIDmode, loc))
4204 loc = plus_constant (loc, bytes_ahead);
4205 else
4207 rtx reg = gen_reg_rtx (Pmode);
4208 loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
4209 GEN_INT (bytes_ahead), reg,
4210 0, before_insn);
4211 loc = reg;
4214 start_sequence ();
4215 /* Make sure the address operand is valid for prefetch. */
4216 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
4217 (loc, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
4218 loc = force_reg (Pmode, loc);
4219 emit_insn (gen_prefetch (loc, GEN_INT (info[i].write),
4220 GEN_INT (3)));
4221 seq = get_insns ();
4222 end_sequence ();
4223 emit_insn_before (seq, before_insn);
4225 /* Check all insns emitted and record the new GIV
4226 information. */
4227 insn = NEXT_INSN (prev_insn);
4228 while (insn != before_insn)
4230 insn = check_insn_for_givs (loop, insn,
4231 info[i].giv->always_executed,
4232 info[i].giv->maybe_multiple);
4233 insn = NEXT_INSN (insn);
4237 if (PREFETCH_BEFORE_LOOP)
4239 /* Emit insns before the loop to fetch the first cache lines or,
4240 if we're not prefetching within the loop, everything we expect
4241 to need. */
4242 for (y = 0; y < info[i].prefetch_before_loop; y++)
4244 rtx reg = gen_reg_rtx (Pmode);
4245 rtx loop_start = loop->start;
4246 rtx init_val = info[i].class->initial_value;
4247 rtx add_val = simplify_gen_binary (PLUS, Pmode,
4248 info[i].giv->add_val,
4249 GEN_INT (y * PREFETCH_BLOCK));
4251 /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a
4252 non-constant INIT_VAL to have the same mode as REG, which
4253 in this case we know to be Pmode. */
4254 if (GET_MODE (init_val) != Pmode && !CONSTANT_P (init_val))
4256 rtx seq;
4258 start_sequence ();
4259 init_val = convert_to_mode (Pmode, init_val, 0);
4260 seq = get_insns ();
4261 end_sequence ();
4262 loop_insn_emit_before (loop, 0, loop_start, seq);
4264 loop_iv_add_mult_emit_before (loop, init_val,
4265 info[i].giv->mult_val,
4266 add_val, reg, 0, loop_start);
4267 emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write),
4268 GEN_INT (3)),
4269 loop_start);
4274 return;
4277 /* Communication with routines called via `note_stores'. */
4279 static rtx note_insn;
4281 /* Dummy register to have nonzero DEST_REG for DEST_ADDR type givs. */
4283 static rtx addr_placeholder;
4285 /* ??? Unfinished optimizations, and possible future optimizations,
4286 for the strength reduction code. */
4288 /* ??? The interaction of biv elimination, and recognition of 'constant'
4289 bivs, may cause problems. */
4291 /* ??? Add heuristics so that DEST_ADDR strength reduction does not cause
4292 performance problems.
4294 Perhaps don't eliminate things that can be combined with an addressing
4295 mode. Find all givs that have the same biv, mult_val, and add_val;
4296 then for each giv, check to see if its only use dies in a following
4297 memory address. If so, generate a new memory address and check to see
4298 if it is valid. If it is valid, then store the modified memory address,
4299 otherwise, mark the giv as not done so that it will get its own iv. */
4301 /* ??? Could try to optimize branches when it is known that a biv is always
4302 positive. */
4304 /* ??? When replace a biv in a compare insn, we should replace with closest
4305 giv so that an optimized branch can still be recognized by the combiner,
4306 e.g. the VAX acb insn. */
4308 /* ??? Many of the checks involving uid_luid could be simplified if regscan
4309 was rerun in loop_optimize whenever a register was added or moved.
4310 Also, some of the optimizations could be a little less conservative. */
4312 /* Scan the loop body and call FNCALL for each insn. In the addition to the
4313 LOOP and INSN parameters pass MAYBE_MULTIPLE and NOT_EVERY_ITERATION to the
4314 callback.
4316 NOT_EVERY_ITERATION is 1 if current insn is not known to be executed at
4317 least once for every loop iteration except for the last one.
4319 MAYBE_MULTIPLE is 1 if current insn may be executed more than once for every
4320 loop iteration.
4322 void
4323 for_each_insn_in_loop (struct loop *loop, loop_insn_callback fncall)
4325 int not_every_iteration = 0;
4326 int maybe_multiple = 0;
4327 int past_loop_latch = 0;
4328 int loop_depth = 0;
4329 rtx p;
4331 /* If loop_scan_start points to the loop exit test, we have to be wary of
4332 subversive use of gotos inside expression statements. */
4333 if (prev_nonnote_insn (loop->scan_start) != prev_nonnote_insn (loop->start))
4334 maybe_multiple = back_branch_in_range_p (loop, loop->scan_start);
4336 /* Scan through loop and update NOT_EVERY_ITERATION and MAYBE_MULTIPLE. */
4337 for (p = next_insn_in_loop (loop, loop->scan_start);
4338 p != NULL_RTX;
4339 p = next_insn_in_loop (loop, p))
4341 p = fncall (loop, p, not_every_iteration, maybe_multiple);
4343 /* Past CODE_LABEL, we get to insns that may be executed multiple
4344 times. The only way we can be sure that they can't is if every
4345 jump insn between here and the end of the loop either
4346 returns, exits the loop, is a jump to a location that is still
4347 behind the label, or is a jump to the loop start. */
4349 if (GET_CODE (p) == CODE_LABEL)
4351 rtx insn = p;
4353 maybe_multiple = 0;
4355 while (1)
4357 insn = NEXT_INSN (insn);
4358 if (insn == loop->scan_start)
4359 break;
4360 if (insn == loop->end)
4362 if (loop->top != 0)
4363 insn = loop->top;
4364 else
4365 break;
4366 if (insn == loop->scan_start)
4367 break;
4370 if (GET_CODE (insn) == JUMP_INSN
4371 && GET_CODE (PATTERN (insn)) != RETURN
4372 && (!any_condjump_p (insn)
4373 || (JUMP_LABEL (insn) != 0
4374 && JUMP_LABEL (insn) != loop->scan_start
4375 && !loop_insn_first_p (p, JUMP_LABEL (insn)))))
4377 maybe_multiple = 1;
4378 break;
4383 /* Past a jump, we get to insns for which we can't count
4384 on whether they will be executed during each iteration. */
4385 /* This code appears twice in strength_reduce. There is also similar
4386 code in scan_loop. */
4387 if (GET_CODE (p) == JUMP_INSN
4388 /* If we enter the loop in the middle, and scan around to the
4389 beginning, don't set not_every_iteration for that.
4390 This can be any kind of jump, since we want to know if insns
4391 will be executed if the loop is executed. */
4392 && !(JUMP_LABEL (p) == loop->top
4393 && ((NEXT_INSN (NEXT_INSN (p)) == loop->end
4394 && any_uncondjump_p (p))
4395 || (NEXT_INSN (p) == loop->end && any_condjump_p (p)))))
4397 rtx label = 0;
4399 /* If this is a jump outside the loop, then it also doesn't
4400 matter. Check to see if the target of this branch is on the
4401 loop->exits_labels list. */
4403 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
4404 if (XEXP (label, 0) == JUMP_LABEL (p))
4405 break;
4407 if (!label)
4408 not_every_iteration = 1;
4411 else if (GET_CODE (p) == NOTE)
4413 /* At the virtual top of a converted loop, insns are again known to
4414 be executed each iteration: logically, the loop begins here
4415 even though the exit code has been duplicated.
4417 Insns are also again known to be executed each iteration at
4418 the LOOP_CONT note. */
4419 if ((NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP
4420 || NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_CONT)
4421 && loop_depth == 0)
4422 not_every_iteration = 0;
4423 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
4424 loop_depth++;
4425 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
4426 loop_depth--;
4429 /* Note if we pass a loop latch. If we do, then we can not clear
4430 NOT_EVERY_ITERATION below when we pass the last CODE_LABEL in
4431 a loop since a jump before the last CODE_LABEL may have started
4432 a new loop iteration.
4434 Note that LOOP_TOP is only set for rotated loops and we need
4435 this check for all loops, so compare against the CODE_LABEL
4436 which immediately follows LOOP_START. */
4437 if (GET_CODE (p) == JUMP_INSN
4438 && JUMP_LABEL (p) == NEXT_INSN (loop->start))
4439 past_loop_latch = 1;
4441 /* Unlike in the code motion pass where MAYBE_NEVER indicates that
4442 an insn may never be executed, NOT_EVERY_ITERATION indicates whether
4443 or not an insn is known to be executed each iteration of the
4444 loop, whether or not any iterations are known to occur.
4446 Therefore, if we have just passed a label and have no more labels
4447 between here and the test insn of the loop, and we have not passed
4448 a jump to the top of the loop, then we know these insns will be
4449 executed each iteration. */
4451 if (not_every_iteration
4452 && !past_loop_latch
4453 && GET_CODE (p) == CODE_LABEL
4454 && no_labels_between_p (p, loop->end)
4455 && loop_insn_first_p (p, loop->cont))
4456 not_every_iteration = 0;
4460 static void
4461 loop_bivs_find (struct loop *loop)
4463 struct loop_regs *regs = LOOP_REGS (loop);
4464 struct loop_ivs *ivs = LOOP_IVS (loop);
4465 /* Temporary list pointers for traversing ivs->list. */
4466 struct iv_class *bl, **backbl;
4468 ivs->list = 0;
4470 for_each_insn_in_loop (loop, check_insn_for_bivs);
4472 /* Scan ivs->list to remove all regs that proved not to be bivs.
4473 Make a sanity check against regs->n_times_set. */
4474 for (backbl = &ivs->list, bl = *backbl; bl; bl = bl->next)
4476 if (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4477 /* Above happens if register modified by subreg, etc. */
4478 /* Make sure it is not recognized as a basic induction var: */
4479 || regs->array[bl->regno].n_times_set != bl->biv_count
4480 /* If never incremented, it is invariant that we decided not to
4481 move. So leave it alone. */
4482 || ! bl->incremented)
4484 if (loop_dump_stream)
4485 fprintf (loop_dump_stream, "Biv %d: discarded, %s\n",
4486 bl->regno,
4487 (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4488 ? "not induction variable"
4489 : (! bl->incremented ? "never incremented"
4490 : "count error")));
4492 REG_IV_TYPE (ivs, bl->regno) = NOT_BASIC_INDUCT;
4493 *backbl = bl->next;
4495 else
4497 backbl = &bl->next;
4499 if (loop_dump_stream)
4500 fprintf (loop_dump_stream, "Biv %d: verified\n", bl->regno);
4506 /* Determine how BIVS are initialized by looking through pre-header
4507 extended basic block. */
4508 static void
4509 loop_bivs_init_find (struct loop *loop)
4511 struct loop_ivs *ivs = LOOP_IVS (loop);
4512 /* Temporary list pointers for traversing ivs->list. */
4513 struct iv_class *bl;
4514 int call_seen;
4515 rtx p;
4517 /* Find initial value for each biv by searching backwards from loop_start,
4518 halting at first label. Also record any test condition. */
4520 call_seen = 0;
4521 for (p = loop->start; p && GET_CODE (p) != CODE_LABEL; p = PREV_INSN (p))
4523 rtx test;
4525 note_insn = p;
4527 if (GET_CODE (p) == CALL_INSN)
4528 call_seen = 1;
4530 if (INSN_P (p))
4531 note_stores (PATTERN (p), record_initial, ivs);
4533 /* Record any test of a biv that branches around the loop if no store
4534 between it and the start of loop. We only care about tests with
4535 constants and registers and only certain of those. */
4536 if (GET_CODE (p) == JUMP_INSN
4537 && JUMP_LABEL (p) != 0
4538 && next_real_insn (JUMP_LABEL (p)) == next_real_insn (loop->end)
4539 && (test = get_condition_for_loop (loop, p)) != 0
4540 && GET_CODE (XEXP (test, 0)) == REG
4541 && REGNO (XEXP (test, 0)) < max_reg_before_loop
4542 && (bl = REG_IV_CLASS (ivs, REGNO (XEXP (test, 0)))) != 0
4543 && valid_initial_value_p (XEXP (test, 1), p, call_seen, loop->start)
4544 && bl->init_insn == 0)
4546 /* If an NE test, we have an initial value! */
4547 if (GET_CODE (test) == NE)
4549 bl->init_insn = p;
4550 bl->init_set = gen_rtx_SET (VOIDmode,
4551 XEXP (test, 0), XEXP (test, 1));
4553 else
4554 bl->initial_test = test;
4560 /* Look at the each biv and see if we can say anything better about its
4561 initial value from any initializing insns set up above. (This is done
4562 in two passes to avoid missing SETs in a PARALLEL.) */
4563 static void
4564 loop_bivs_check (struct loop *loop)
4566 struct loop_ivs *ivs = LOOP_IVS (loop);
4567 /* Temporary list pointers for traversing ivs->list. */
4568 struct iv_class *bl;
4569 struct iv_class **backbl;
4571 for (backbl = &ivs->list; (bl = *backbl); backbl = &bl->next)
4573 rtx src;
4574 rtx note;
4576 if (! bl->init_insn)
4577 continue;
4579 /* IF INIT_INSN has a REG_EQUAL or REG_EQUIV note and the value
4580 is a constant, use the value of that. */
4581 if (((note = find_reg_note (bl->init_insn, REG_EQUAL, 0)) != NULL
4582 && CONSTANT_P (XEXP (note, 0)))
4583 || ((note = find_reg_note (bl->init_insn, REG_EQUIV, 0)) != NULL
4584 && CONSTANT_P (XEXP (note, 0))))
4585 src = XEXP (note, 0);
4586 else
4587 src = SET_SRC (bl->init_set);
4589 if (loop_dump_stream)
4590 fprintf (loop_dump_stream,
4591 "Biv %d: initialized at insn %d: initial value ",
4592 bl->regno, INSN_UID (bl->init_insn));
4594 if ((GET_MODE (src) == GET_MODE (regno_reg_rtx[bl->regno])
4595 || GET_MODE (src) == VOIDmode)
4596 && valid_initial_value_p (src, bl->init_insn,
4597 LOOP_INFO (loop)->pre_header_has_call,
4598 loop->start))
4600 bl->initial_value = src;
4602 if (loop_dump_stream)
4604 print_simple_rtl (loop_dump_stream, src);
4605 fputc ('\n', loop_dump_stream);
4608 /* If we can't make it a giv,
4609 let biv keep initial value of "itself". */
4610 else if (loop_dump_stream)
4611 fprintf (loop_dump_stream, "is complex\n");
4616 /* Search the loop for general induction variables. */
4618 static void
4619 loop_givs_find (struct loop* loop)
4621 for_each_insn_in_loop (loop, check_insn_for_givs);
4625 /* For each giv for which we still don't know whether or not it is
4626 replaceable, check to see if it is replaceable because its final value
4627 can be calculated. */
4629 static void
4630 loop_givs_check (struct loop *loop)
4632 struct loop_ivs *ivs = LOOP_IVS (loop);
4633 struct iv_class *bl;
4635 for (bl = ivs->list; bl; bl = bl->next)
4637 struct induction *v;
4639 for (v = bl->giv; v; v = v->next_iv)
4640 if (! v->replaceable && ! v->not_replaceable)
4641 check_final_value (loop, v);
4646 /* Return nonzero if it is possible to eliminate the biv BL provided
4647 all givs are reduced. This is possible if either the reg is not
4648 used outside the loop, or we can compute what its final value will
4649 be. */
4651 static int
4652 loop_biv_eliminable_p (struct loop *loop, struct iv_class *bl,
4653 int threshold, int insn_count)
4655 /* For architectures with a decrement_and_branch_until_zero insn,
4656 don't do this if we put a REG_NONNEG note on the endtest for this
4657 biv. */
4659 #ifdef HAVE_decrement_and_branch_until_zero
4660 if (bl->nonneg)
4662 if (loop_dump_stream)
4663 fprintf (loop_dump_stream,
4664 "Cannot eliminate nonneg biv %d.\n", bl->regno);
4665 return 0;
4667 #endif
4669 /* Check that biv is used outside loop or if it has a final value.
4670 Compare against bl->init_insn rather than loop->start. We aren't
4671 concerned with any uses of the biv between init_insn and
4672 loop->start since these won't be affected by the value of the biv
4673 elsewhere in the function, so long as init_insn doesn't use the
4674 biv itself. */
4676 if ((REGNO_LAST_LUID (bl->regno) < INSN_LUID (loop->end)
4677 && bl->init_insn
4678 && INSN_UID (bl->init_insn) < max_uid_for_loop
4679 && REGNO_FIRST_LUID (bl->regno) >= INSN_LUID (bl->init_insn)
4680 && ! reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
4681 || (bl->final_value = final_biv_value (loop, bl)))
4682 return maybe_eliminate_biv (loop, bl, 0, threshold, insn_count);
4684 if (loop_dump_stream)
4686 fprintf (loop_dump_stream,
4687 "Cannot eliminate biv %d.\n",
4688 bl->regno);
4689 fprintf (loop_dump_stream,
4690 "First use: insn %d, last use: insn %d.\n",
4691 REGNO_FIRST_UID (bl->regno),
4692 REGNO_LAST_UID (bl->regno));
4694 return 0;
4698 /* Reduce each giv of BL that we have decided to reduce. */
4700 static void
4701 loop_givs_reduce (struct loop *loop, struct iv_class *bl)
4703 struct induction *v;
4705 for (v = bl->giv; v; v = v->next_iv)
4707 struct induction *tv;
4708 if (! v->ignore && v->same == 0)
4710 int auto_inc_opt = 0;
4712 /* If the code for derived givs immediately below has already
4713 allocated a new_reg, we must keep it. */
4714 if (! v->new_reg)
4715 v->new_reg = gen_reg_rtx (v->mode);
4717 #ifdef AUTO_INC_DEC
4718 /* If the target has auto-increment addressing modes, and
4719 this is an address giv, then try to put the increment
4720 immediately after its use, so that flow can create an
4721 auto-increment addressing mode. */
4722 if (v->giv_type == DEST_ADDR && bl->biv_count == 1
4723 && bl->biv->always_executed && ! bl->biv->maybe_multiple
4724 /* We don't handle reversed biv's because bl->biv->insn
4725 does not have a valid INSN_LUID. */
4726 && ! bl->reversed
4727 && v->always_executed && ! v->maybe_multiple
4728 && INSN_UID (v->insn) < max_uid_for_loop)
4730 /* If other giv's have been combined with this one, then
4731 this will work only if all uses of the other giv's occur
4732 before this giv's insn. This is difficult to check.
4734 We simplify this by looking for the common case where
4735 there is one DEST_REG giv, and this giv's insn is the
4736 last use of the dest_reg of that DEST_REG giv. If the
4737 increment occurs after the address giv, then we can
4738 perform the optimization. (Otherwise, the increment
4739 would have to go before other_giv, and we would not be
4740 able to combine it with the address giv to get an
4741 auto-inc address.) */
4742 if (v->combined_with)
4744 struct induction *other_giv = 0;
4746 for (tv = bl->giv; tv; tv = tv->next_iv)
4747 if (tv->same == v)
4749 if (other_giv)
4750 break;
4751 else
4752 other_giv = tv;
4754 if (! tv && other_giv
4755 && REGNO (other_giv->dest_reg) < max_reg_before_loop
4756 && (REGNO_LAST_UID (REGNO (other_giv->dest_reg))
4757 == INSN_UID (v->insn))
4758 && INSN_LUID (v->insn) < INSN_LUID (bl->biv->insn))
4759 auto_inc_opt = 1;
4761 /* Check for case where increment is before the address
4762 giv. Do this test in "loop order". */
4763 else if ((INSN_LUID (v->insn) > INSN_LUID (bl->biv->insn)
4764 && (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4765 || (INSN_LUID (bl->biv->insn)
4766 > INSN_LUID (loop->scan_start))))
4767 || (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4768 && (INSN_LUID (loop->scan_start)
4769 < INSN_LUID (bl->biv->insn))))
4770 auto_inc_opt = -1;
4771 else
4772 auto_inc_opt = 1;
4774 #ifdef HAVE_cc0
4776 rtx prev;
4778 /* We can't put an insn immediately after one setting
4779 cc0, or immediately before one using cc0. */
4780 if ((auto_inc_opt == 1 && sets_cc0_p (PATTERN (v->insn)))
4781 || (auto_inc_opt == -1
4782 && (prev = prev_nonnote_insn (v->insn)) != 0
4783 && INSN_P (prev)
4784 && sets_cc0_p (PATTERN (prev))))
4785 auto_inc_opt = 0;
4787 #endif
4789 if (auto_inc_opt)
4790 v->auto_inc_opt = 1;
4792 #endif
4794 /* For each place where the biv is incremented, add an insn
4795 to increment the new, reduced reg for the giv. */
4796 for (tv = bl->biv; tv; tv = tv->next_iv)
4798 rtx insert_before;
4800 /* Skip if location is the same as a previous one. */
4801 if (tv->same)
4802 continue;
4803 if (! auto_inc_opt)
4804 insert_before = NEXT_INSN (tv->insn);
4805 else if (auto_inc_opt == 1)
4806 insert_before = NEXT_INSN (v->insn);
4807 else
4808 insert_before = v->insn;
4810 if (tv->mult_val == const1_rtx)
4811 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4812 v->new_reg, v->new_reg,
4813 0, insert_before);
4814 else /* tv->mult_val == const0_rtx */
4815 /* A multiply is acceptable here
4816 since this is presumed to be seldom executed. */
4817 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4818 v->add_val, v->new_reg,
4819 0, insert_before);
4822 /* Add code at loop start to initialize giv's reduced reg. */
4824 loop_iv_add_mult_hoist (loop,
4825 extend_value_for_giv (v, bl->initial_value),
4826 v->mult_val, v->add_val, v->new_reg);
4832 /* Check for givs whose first use is their definition and whose
4833 last use is the definition of another giv. If so, it is likely
4834 dead and should not be used to derive another giv nor to
4835 eliminate a biv. */
4837 static void
4838 loop_givs_dead_check (struct loop *loop ATTRIBUTE_UNUSED, struct iv_class *bl)
4840 struct induction *v;
4842 for (v = bl->giv; v; v = v->next_iv)
4844 if (v->ignore
4845 || (v->same && v->same->ignore))
4846 continue;
4848 if (v->giv_type == DEST_REG
4849 && REGNO_FIRST_UID (REGNO (v->dest_reg)) == INSN_UID (v->insn))
4851 struct induction *v1;
4853 for (v1 = bl->giv; v1; v1 = v1->next_iv)
4854 if (REGNO_LAST_UID (REGNO (v->dest_reg)) == INSN_UID (v1->insn))
4855 v->maybe_dead = 1;
4861 static void
4862 loop_givs_rescan (struct loop *loop, struct iv_class *bl, rtx *reg_map)
4864 struct induction *v;
4866 for (v = bl->giv; v; v = v->next_iv)
4868 if (v->same && v->same->ignore)
4869 v->ignore = 1;
4871 if (v->ignore)
4872 continue;
4874 /* Update expression if this was combined, in case other giv was
4875 replaced. */
4876 if (v->same)
4877 v->new_reg = replace_rtx (v->new_reg,
4878 v->same->dest_reg, v->same->new_reg);
4880 /* See if this register is known to be a pointer to something. If
4881 so, see if we can find the alignment. First see if there is a
4882 destination register that is a pointer. If so, this shares the
4883 alignment too. Next see if we can deduce anything from the
4884 computational information. If not, and this is a DEST_ADDR
4885 giv, at least we know that it's a pointer, though we don't know
4886 the alignment. */
4887 if (GET_CODE (v->new_reg) == REG
4888 && v->giv_type == DEST_REG
4889 && REG_POINTER (v->dest_reg))
4890 mark_reg_pointer (v->new_reg,
4891 REGNO_POINTER_ALIGN (REGNO (v->dest_reg)));
4892 else if (GET_CODE (v->new_reg) == REG
4893 && REG_POINTER (v->src_reg))
4895 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->src_reg));
4897 if (align == 0
4898 || GET_CODE (v->add_val) != CONST_INT
4899 || INTVAL (v->add_val) % (align / BITS_PER_UNIT) != 0)
4900 align = 0;
4902 mark_reg_pointer (v->new_reg, align);
4904 else if (GET_CODE (v->new_reg) == REG
4905 && GET_CODE (v->add_val) == REG
4906 && REG_POINTER (v->add_val))
4908 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->add_val));
4910 if (align == 0 || GET_CODE (v->mult_val) != CONST_INT
4911 || INTVAL (v->mult_val) % (align / BITS_PER_UNIT) != 0)
4912 align = 0;
4914 mark_reg_pointer (v->new_reg, align);
4916 else if (GET_CODE (v->new_reg) == REG && v->giv_type == DEST_ADDR)
4917 mark_reg_pointer (v->new_reg, 0);
4919 if (v->giv_type == DEST_ADDR)
4920 /* Store reduced reg as the address in the memref where we found
4921 this giv. */
4922 validate_change (v->insn, v->location, v->new_reg, 0);
4923 else if (v->replaceable)
4925 reg_map[REGNO (v->dest_reg)] = v->new_reg;
4927 else
4929 rtx original_insn = v->insn;
4930 rtx note;
4932 /* Not replaceable; emit an insn to set the original giv reg from
4933 the reduced giv, same as above. */
4934 v->insn = loop_insn_emit_after (loop, 0, original_insn,
4935 gen_move_insn (v->dest_reg,
4936 v->new_reg));
4938 /* The original insn may have a REG_EQUAL note. This note is
4939 now incorrect and may result in invalid substitutions later.
4940 The original insn is dead, but may be part of a libcall
4941 sequence, which doesn't seem worth the bother of handling. */
4942 note = find_reg_note (original_insn, REG_EQUAL, NULL_RTX);
4943 if (note)
4944 remove_note (original_insn, note);
4947 /* When a loop is reversed, givs which depend on the reversed
4948 biv, and which are live outside the loop, must be set to their
4949 correct final value. This insn is only needed if the giv is
4950 not replaceable. The correct final value is the same as the
4951 value that the giv starts the reversed loop with. */
4952 if (bl->reversed && ! v->replaceable)
4953 loop_iv_add_mult_sink (loop,
4954 extend_value_for_giv (v, bl->initial_value),
4955 v->mult_val, v->add_val, v->dest_reg);
4956 else if (v->final_value)
4957 loop_insn_sink_or_swim (loop,
4958 gen_load_of_final_value (v->dest_reg,
4959 v->final_value));
4961 if (loop_dump_stream)
4963 fprintf (loop_dump_stream, "giv at %d reduced to ",
4964 INSN_UID (v->insn));
4965 print_simple_rtl (loop_dump_stream, v->new_reg);
4966 fprintf (loop_dump_stream, "\n");
4972 static int
4973 loop_giv_reduce_benefit (struct loop *loop ATTRIBUTE_UNUSED,
4974 struct iv_class *bl, struct induction *v,
4975 rtx test_reg)
4977 int add_cost;
4978 int benefit;
4980 benefit = v->benefit;
4981 PUT_MODE (test_reg, v->mode);
4982 add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val,
4983 test_reg, test_reg);
4985 /* Reduce benefit if not replaceable, since we will insert a
4986 move-insn to replace the insn that calculates this giv. Don't do
4987 this unless the giv is a user variable, since it will often be
4988 marked non-replaceable because of the duplication of the exit
4989 code outside the loop. In such a case, the copies we insert are
4990 dead and will be deleted. So they don't have a cost. Similar
4991 situations exist. */
4992 /* ??? The new final_[bg]iv_value code does a much better job of
4993 finding replaceable giv's, and hence this code may no longer be
4994 necessary. */
4995 if (! v->replaceable && ! bl->eliminable
4996 && REG_USERVAR_P (v->dest_reg))
4997 benefit -= copy_cost;
4999 /* Decrease the benefit to count the add-insns that we will insert
5000 to increment the reduced reg for the giv. ??? This can
5001 overestimate the run-time cost of the additional insns, e.g. if
5002 there are multiple basic blocks that increment the biv, but only
5003 one of these blocks is executed during each iteration. There is
5004 no good way to detect cases like this with the current structure
5005 of the loop optimizer. This code is more accurate for
5006 determining code size than run-time benefits. */
5007 benefit -= add_cost * bl->biv_count;
5009 /* Decide whether to strength-reduce this giv or to leave the code
5010 unchanged (recompute it from the biv each time it is used). This
5011 decision can be made independently for each giv. */
5013 #ifdef AUTO_INC_DEC
5014 /* Attempt to guess whether autoincrement will handle some of the
5015 new add insns; if so, increase BENEFIT (undo the subtraction of
5016 add_cost that was done above). */
5017 if (v->giv_type == DEST_ADDR
5018 /* Increasing the benefit is risky, since this is only a guess.
5019 Avoid increasing register pressure in cases where there would
5020 be no other benefit from reducing this giv. */
5021 && benefit > 0
5022 && GET_CODE (v->mult_val) == CONST_INT)
5024 int size = GET_MODE_SIZE (GET_MODE (v->mem));
5026 if (HAVE_POST_INCREMENT
5027 && INTVAL (v->mult_val) == size)
5028 benefit += add_cost * bl->biv_count;
5029 else if (HAVE_PRE_INCREMENT
5030 && INTVAL (v->mult_val) == size)
5031 benefit += add_cost * bl->biv_count;
5032 else if (HAVE_POST_DECREMENT
5033 && -INTVAL (v->mult_val) == size)
5034 benefit += add_cost * bl->biv_count;
5035 else if (HAVE_PRE_DECREMENT
5036 && -INTVAL (v->mult_val) == size)
5037 benefit += add_cost * bl->biv_count;
5039 #endif
5041 return benefit;
5045 /* Free IV structures for LOOP. */
5047 static void
5048 loop_ivs_free (struct loop *loop)
5050 struct loop_ivs *ivs = LOOP_IVS (loop);
5051 struct iv_class *iv = ivs->list;
5053 free (ivs->regs);
5055 while (iv)
5057 struct iv_class *next = iv->next;
5058 struct induction *induction;
5059 struct induction *next_induction;
5061 for (induction = iv->biv; induction; induction = next_induction)
5063 next_induction = induction->next_iv;
5064 free (induction);
5066 for (induction = iv->giv; induction; induction = next_induction)
5068 next_induction = induction->next_iv;
5069 free (induction);
5072 free (iv);
5073 iv = next;
5078 /* Perform strength reduction and induction variable elimination.
5080 Pseudo registers created during this function will be beyond the
5081 last valid index in several tables including
5082 REGS->ARRAY[I].N_TIMES_SET and REGNO_LAST_UID. This does not cause a
5083 problem here, because the added registers cannot be givs outside of
5084 their loop, and hence will never be reconsidered. But scan_loop
5085 must check regnos to make sure they are in bounds. */
5087 static void
5088 strength_reduce (struct loop *loop, int flags)
5090 struct loop_info *loop_info = LOOP_INFO (loop);
5091 struct loop_regs *regs = LOOP_REGS (loop);
5092 struct loop_ivs *ivs = LOOP_IVS (loop);
5093 rtx p;
5094 /* Temporary list pointer for traversing ivs->list. */
5095 struct iv_class *bl;
5096 /* Ratio of extra register life span we can justify
5097 for saving an instruction. More if loop doesn't call subroutines
5098 since in that case saving an insn makes more difference
5099 and more registers are available. */
5100 /* ??? could set this to last value of threshold in move_movables */
5101 int threshold = (loop_info->has_call ? 1 : 2) * (3 + n_non_fixed_regs);
5102 /* Map of pseudo-register replacements. */
5103 rtx *reg_map = NULL;
5104 int reg_map_size;
5105 int unrolled_insn_copies = 0;
5106 rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
5107 int insn_count = count_insns_in_loop (loop);
5109 addr_placeholder = gen_reg_rtx (Pmode);
5111 ivs->n_regs = max_reg_before_loop;
5112 ivs->regs = xcalloc (ivs->n_regs, sizeof (struct iv));
5114 /* Find all BIVs in loop. */
5115 loop_bivs_find (loop);
5117 /* Exit if there are no bivs. */
5118 if (! ivs->list)
5120 /* Can still unroll the loop anyways, but indicate that there is no
5121 strength reduction info available. */
5122 if (flags & LOOP_UNROLL)
5123 unroll_loop (loop, insn_count, 0);
5125 loop_ivs_free (loop);
5126 return;
5129 /* Determine how BIVS are initialized by looking through pre-header
5130 extended basic block. */
5131 loop_bivs_init_find (loop);
5133 /* Look at the each biv and see if we can say anything better about its
5134 initial value from any initializing insns set up above. */
5135 loop_bivs_check (loop);
5137 /* Search the loop for general induction variables. */
5138 loop_givs_find (loop);
5140 /* Try to calculate and save the number of loop iterations. This is
5141 set to zero if the actual number can not be calculated. This must
5142 be called after all giv's have been identified, since otherwise it may
5143 fail if the iteration variable is a giv. */
5144 loop_iterations (loop);
5146 #ifdef HAVE_prefetch
5147 if (flags & LOOP_PREFETCH)
5148 emit_prefetch_instructions (loop);
5149 #endif
5151 /* Now for each giv for which we still don't know whether or not it is
5152 replaceable, check to see if it is replaceable because its final value
5153 can be calculated. This must be done after loop_iterations is called,
5154 so that final_giv_value will work correctly. */
5155 loop_givs_check (loop);
5157 /* Try to prove that the loop counter variable (if any) is always
5158 nonnegative; if so, record that fact with a REG_NONNEG note
5159 so that "decrement and branch until zero" insn can be used. */
5160 check_dbra_loop (loop, insn_count);
5162 /* Create reg_map to hold substitutions for replaceable giv regs.
5163 Some givs might have been made from biv increments, so look at
5164 ivs->reg_iv_type for a suitable size. */
5165 reg_map_size = ivs->n_regs;
5166 reg_map = xcalloc (reg_map_size, sizeof (rtx));
5168 /* Examine each iv class for feasibility of strength reduction/induction
5169 variable elimination. */
5171 for (bl = ivs->list; bl; bl = bl->next)
5173 struct induction *v;
5174 int benefit;
5176 /* Test whether it will be possible to eliminate this biv
5177 provided all givs are reduced. */
5178 bl->eliminable = loop_biv_eliminable_p (loop, bl, threshold, insn_count);
5180 /* This will be true at the end, if all givs which depend on this
5181 biv have been strength reduced.
5182 We can't (currently) eliminate the biv unless this is so. */
5183 bl->all_reduced = 1;
5185 /* Check each extension dependent giv in this class to see if its
5186 root biv is safe from wrapping in the interior mode. */
5187 check_ext_dependent_givs (loop, bl);
5189 /* Combine all giv's for this iv_class. */
5190 combine_givs (regs, bl);
5192 for (v = bl->giv; v; v = v->next_iv)
5194 struct induction *tv;
5196 if (v->ignore || v->same)
5197 continue;
5199 benefit = loop_giv_reduce_benefit (loop, bl, v, test_reg);
5201 /* If an insn is not to be strength reduced, then set its ignore
5202 flag, and clear bl->all_reduced. */
5204 /* A giv that depends on a reversed biv must be reduced if it is
5205 used after the loop exit, otherwise, it would have the wrong
5206 value after the loop exit. To make it simple, just reduce all
5207 of such giv's whether or not we know they are used after the loop
5208 exit. */
5210 if (! flag_reduce_all_givs
5211 && v->lifetime * threshold * benefit < insn_count
5212 && ! bl->reversed)
5214 if (loop_dump_stream)
5215 fprintf (loop_dump_stream,
5216 "giv of insn %d not worth while, %d vs %d.\n",
5217 INSN_UID (v->insn),
5218 v->lifetime * threshold * benefit, insn_count);
5219 v->ignore = 1;
5220 bl->all_reduced = 0;
5222 else
5224 /* Check that we can increment the reduced giv without a
5225 multiply insn. If not, reject it. */
5227 for (tv = bl->biv; tv; tv = tv->next_iv)
5228 if (tv->mult_val == const1_rtx
5229 && ! product_cheap_p (tv->add_val, v->mult_val))
5231 if (loop_dump_stream)
5232 fprintf (loop_dump_stream,
5233 "giv of insn %d: would need a multiply.\n",
5234 INSN_UID (v->insn));
5235 v->ignore = 1;
5236 bl->all_reduced = 0;
5237 break;
5242 /* Check for givs whose first use is their definition and whose
5243 last use is the definition of another giv. If so, it is likely
5244 dead and should not be used to derive another giv nor to
5245 eliminate a biv. */
5246 loop_givs_dead_check (loop, bl);
5248 /* Reduce each giv that we decided to reduce. */
5249 loop_givs_reduce (loop, bl);
5251 /* Rescan all givs. If a giv is the same as a giv not reduced, mark it
5252 as not reduced.
5254 For each giv register that can be reduced now: if replaceable,
5255 substitute reduced reg wherever the old giv occurs;
5256 else add new move insn "giv_reg = reduced_reg". */
5257 loop_givs_rescan (loop, bl, reg_map);
5259 /* All the givs based on the biv bl have been reduced if they
5260 merit it. */
5262 /* For each giv not marked as maybe dead that has been combined with a
5263 second giv, clear any "maybe dead" mark on that second giv.
5264 v->new_reg will either be or refer to the register of the giv it
5265 combined with.
5267 Doing this clearing avoids problems in biv elimination where
5268 a giv's new_reg is a complex value that can't be put in the
5269 insn but the giv combined with (with a reg as new_reg) is
5270 marked maybe_dead. Since the register will be used in either
5271 case, we'd prefer it be used from the simpler giv. */
5273 for (v = bl->giv; v; v = v->next_iv)
5274 if (! v->maybe_dead && v->same)
5275 v->same->maybe_dead = 0;
5277 /* Try to eliminate the biv, if it is a candidate.
5278 This won't work if ! bl->all_reduced,
5279 since the givs we planned to use might not have been reduced.
5281 We have to be careful that we didn't initially think we could
5282 eliminate this biv because of a giv that we now think may be
5283 dead and shouldn't be used as a biv replacement.
5285 Also, there is the possibility that we may have a giv that looks
5286 like it can be used to eliminate a biv, but the resulting insn
5287 isn't valid. This can happen, for example, on the 88k, where a
5288 JUMP_INSN can compare a register only with zero. Attempts to
5289 replace it with a compare with a constant will fail.
5291 Note that in cases where this call fails, we may have replaced some
5292 of the occurrences of the biv with a giv, but no harm was done in
5293 doing so in the rare cases where it can occur. */
5295 if (bl->all_reduced == 1 && bl->eliminable
5296 && maybe_eliminate_biv (loop, bl, 1, threshold, insn_count))
5298 /* ?? If we created a new test to bypass the loop entirely,
5299 or otherwise drop straight in, based on this test, then
5300 we might want to rewrite it also. This way some later
5301 pass has more hope of removing the initialization of this
5302 biv entirely. */
5304 /* If final_value != 0, then the biv may be used after loop end
5305 and we must emit an insn to set it just in case.
5307 Reversed bivs already have an insn after the loop setting their
5308 value, so we don't need another one. We can't calculate the
5309 proper final value for such a biv here anyways. */
5310 if (bl->final_value && ! bl->reversed)
5311 loop_insn_sink_or_swim (loop,
5312 gen_load_of_final_value (bl->biv->dest_reg,
5313 bl->final_value));
5315 if (loop_dump_stream)
5316 fprintf (loop_dump_stream, "Reg %d: biv eliminated\n",
5317 bl->regno);
5319 /* See above note wrt final_value. But since we couldn't eliminate
5320 the biv, we must set the value after the loop instead of before. */
5321 else if (bl->final_value && ! bl->reversed)
5322 loop_insn_sink (loop, gen_load_of_final_value (bl->biv->dest_reg,
5323 bl->final_value));
5326 /* Go through all the instructions in the loop, making all the
5327 register substitutions scheduled in REG_MAP. */
5329 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
5330 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5331 || GET_CODE (p) == CALL_INSN)
5333 replace_regs (PATTERN (p), reg_map, reg_map_size, 0);
5334 replace_regs (REG_NOTES (p), reg_map, reg_map_size, 0);
5335 INSN_CODE (p) = -1;
5338 if (loop_info->n_iterations > 0)
5340 /* When we completely unroll a loop we will likely not need the increment
5341 of the loop BIV and we will not need the conditional branch at the
5342 end of the loop. */
5343 unrolled_insn_copies = insn_count - 2;
5345 #ifdef HAVE_cc0
5346 /* When we completely unroll a loop on a HAVE_cc0 machine we will not
5347 need the comparison before the conditional branch at the end of the
5348 loop. */
5349 unrolled_insn_copies -= 1;
5350 #endif
5352 /* We'll need one copy for each loop iteration. */
5353 unrolled_insn_copies *= loop_info->n_iterations;
5355 /* A little slop to account for the ability to remove initialization
5356 code, better CSE, and other secondary benefits of completely
5357 unrolling some loops. */
5358 unrolled_insn_copies -= 1;
5360 /* Clamp the value. */
5361 if (unrolled_insn_copies < 0)
5362 unrolled_insn_copies = 0;
5365 /* Unroll loops from within strength reduction so that we can use the
5366 induction variable information that strength_reduce has already
5367 collected. Always unroll loops that would be as small or smaller
5368 unrolled than when rolled. */
5369 if ((flags & LOOP_UNROLL)
5370 || ((flags & LOOP_AUTO_UNROLL)
5371 && loop_info->n_iterations > 0
5372 && unrolled_insn_copies <= insn_count))
5373 unroll_loop (loop, insn_count, 1);
5375 #ifdef HAVE_doloop_end
5376 if (HAVE_doloop_end && (flags & LOOP_BCT) && flag_branch_on_count_reg)
5377 doloop_optimize (loop);
5378 #endif /* HAVE_doloop_end */
5380 /* In case number of iterations is known, drop branch prediction note
5381 in the branch. Do that only in second loop pass, as loop unrolling
5382 may change the number of iterations performed. */
5383 if (flags & LOOP_BCT)
5385 unsigned HOST_WIDE_INT n
5386 = loop_info->n_iterations / loop_info->unroll_number;
5387 if (n > 1)
5388 predict_insn (prev_nonnote_insn (loop->end), PRED_LOOP_ITERATIONS,
5389 REG_BR_PROB_BASE - REG_BR_PROB_BASE / n);
5392 if (loop_dump_stream)
5393 fprintf (loop_dump_stream, "\n");
5395 loop_ivs_free (loop);
5396 if (reg_map)
5397 free (reg_map);
5400 /*Record all basic induction variables calculated in the insn. */
5401 static rtx
5402 check_insn_for_bivs (struct loop *loop, rtx p, int not_every_iteration,
5403 int maybe_multiple)
5405 struct loop_ivs *ivs = LOOP_IVS (loop);
5406 rtx set;
5407 rtx dest_reg;
5408 rtx inc_val;
5409 rtx mult_val;
5410 rtx *location;
5412 if (GET_CODE (p) == INSN
5413 && (set = single_set (p))
5414 && GET_CODE (SET_DEST (set)) == REG)
5416 dest_reg = SET_DEST (set);
5417 if (REGNO (dest_reg) < max_reg_before_loop
5418 && REGNO (dest_reg) >= FIRST_PSEUDO_REGISTER
5419 && REG_IV_TYPE (ivs, REGNO (dest_reg)) != NOT_BASIC_INDUCT)
5421 if (basic_induction_var (loop, SET_SRC (set),
5422 GET_MODE (SET_SRC (set)),
5423 dest_reg, p, &inc_val, &mult_val,
5424 &location))
5426 /* It is a possible basic induction variable.
5427 Create and initialize an induction structure for it. */
5429 struct induction *v = xmalloc (sizeof (struct induction));
5431 record_biv (loop, v, p, dest_reg, inc_val, mult_val, location,
5432 not_every_iteration, maybe_multiple);
5433 REG_IV_TYPE (ivs, REGNO (dest_reg)) = BASIC_INDUCT;
5435 else if (REGNO (dest_reg) < ivs->n_regs)
5436 REG_IV_TYPE (ivs, REGNO (dest_reg)) = NOT_BASIC_INDUCT;
5439 return p;
5442 /* Record all givs calculated in the insn.
5443 A register is a giv if: it is only set once, it is a function of a
5444 biv and a constant (or invariant), and it is not a biv. */
5445 static rtx
5446 check_insn_for_givs (struct loop *loop, rtx p, int not_every_iteration,
5447 int maybe_multiple)
5449 struct loop_regs *regs = LOOP_REGS (loop);
5451 rtx set;
5452 /* Look for a general induction variable in a register. */
5453 if (GET_CODE (p) == INSN
5454 && (set = single_set (p))
5455 && GET_CODE (SET_DEST (set)) == REG
5456 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
5458 rtx src_reg;
5459 rtx dest_reg;
5460 rtx add_val;
5461 rtx mult_val;
5462 rtx ext_val;
5463 int benefit;
5464 rtx regnote = 0;
5465 rtx last_consec_insn;
5467 dest_reg = SET_DEST (set);
5468 if (REGNO (dest_reg) < FIRST_PSEUDO_REGISTER)
5469 return p;
5471 if (/* SET_SRC is a giv. */
5472 (general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
5473 &mult_val, &ext_val, 0, &benefit, VOIDmode)
5474 /* Equivalent expression is a giv. */
5475 || ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
5476 && general_induction_var (loop, XEXP (regnote, 0), &src_reg,
5477 &add_val, &mult_val, &ext_val, 0,
5478 &benefit, VOIDmode)))
5479 /* Don't try to handle any regs made by loop optimization.
5480 We have nothing on them in regno_first_uid, etc. */
5481 && REGNO (dest_reg) < max_reg_before_loop
5482 /* Don't recognize a BASIC_INDUCT_VAR here. */
5483 && dest_reg != src_reg
5484 /* This must be the only place where the register is set. */
5485 && (regs->array[REGNO (dest_reg)].n_times_set == 1
5486 /* or all sets must be consecutive and make a giv. */
5487 || (benefit = consec_sets_giv (loop, benefit, p,
5488 src_reg, dest_reg,
5489 &add_val, &mult_val, &ext_val,
5490 &last_consec_insn))))
5492 struct induction *v = xmalloc (sizeof (struct induction));
5494 /* If this is a library call, increase benefit. */
5495 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
5496 benefit += libcall_benefit (p);
5498 /* Skip the consecutive insns, if there are any. */
5499 if (regs->array[REGNO (dest_reg)].n_times_set != 1)
5500 p = last_consec_insn;
5502 record_giv (loop, v, p, src_reg, dest_reg, mult_val, add_val,
5503 ext_val, benefit, DEST_REG, not_every_iteration,
5504 maybe_multiple, (rtx*) 0);
5509 /* Look for givs which are memory addresses. */
5510 if (GET_CODE (p) == INSN)
5511 find_mem_givs (loop, PATTERN (p), p, not_every_iteration,
5512 maybe_multiple);
5514 /* Update the status of whether giv can derive other givs. This can
5515 change when we pass a label or an insn that updates a biv. */
5516 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5517 || GET_CODE (p) == CODE_LABEL)
5518 update_giv_derive (loop, p);
5519 return p;
5522 /* Return 1 if X is a valid source for an initial value (or as value being
5523 compared against in an initial test).
5525 X must be either a register or constant and must not be clobbered between
5526 the current insn and the start of the loop.
5528 INSN is the insn containing X. */
5530 static int
5531 valid_initial_value_p (rtx x, rtx insn, int call_seen, rtx loop_start)
5533 if (CONSTANT_P (x))
5534 return 1;
5536 /* Only consider pseudos we know about initialized in insns whose luids
5537 we know. */
5538 if (GET_CODE (x) != REG
5539 || REGNO (x) >= max_reg_before_loop)
5540 return 0;
5542 /* Don't use call-clobbered registers across a call which clobbers it. On
5543 some machines, don't use any hard registers at all. */
5544 if (REGNO (x) < FIRST_PSEUDO_REGISTER
5545 && (SMALL_REGISTER_CLASSES
5546 || (call_used_regs[REGNO (x)] && call_seen)))
5547 return 0;
5549 /* Don't use registers that have been clobbered before the start of the
5550 loop. */
5551 if (reg_set_between_p (x, insn, loop_start))
5552 return 0;
5554 return 1;
5557 /* Scan X for memory refs and check each memory address
5558 as a possible giv. INSN is the insn whose pattern X comes from.
5559 NOT_EVERY_ITERATION is 1 if the insn might not be executed during
5560 every loop iteration. MAYBE_MULTIPLE is 1 if the insn might be executed
5561 more than once in each loop iteration. */
5563 static void
5564 find_mem_givs (const struct loop *loop, rtx x, rtx insn,
5565 int not_every_iteration, int maybe_multiple)
5567 int i, j;
5568 enum rtx_code code;
5569 const char *fmt;
5571 if (x == 0)
5572 return;
5574 code = GET_CODE (x);
5575 switch (code)
5577 case REG:
5578 case CONST_INT:
5579 case CONST:
5580 case CONST_DOUBLE:
5581 case SYMBOL_REF:
5582 case LABEL_REF:
5583 case PC:
5584 case CC0:
5585 case ADDR_VEC:
5586 case ADDR_DIFF_VEC:
5587 case USE:
5588 case CLOBBER:
5589 return;
5591 case MEM:
5593 rtx src_reg;
5594 rtx add_val;
5595 rtx mult_val;
5596 rtx ext_val;
5597 int benefit;
5599 /* This code used to disable creating GIVs with mult_val == 1 and
5600 add_val == 0. However, this leads to lost optimizations when
5601 it comes time to combine a set of related DEST_ADDR GIVs, since
5602 this one would not be seen. */
5604 if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
5605 &mult_val, &ext_val, 1, &benefit,
5606 GET_MODE (x)))
5608 /* Found one; record it. */
5609 struct induction *v = xmalloc (sizeof (struct induction));
5611 record_giv (loop, v, insn, src_reg, addr_placeholder, mult_val,
5612 add_val, ext_val, benefit, DEST_ADDR,
5613 not_every_iteration, maybe_multiple, &XEXP (x, 0));
5615 v->mem = x;
5618 return;
5620 default:
5621 break;
5624 /* Recursively scan the subexpressions for other mem refs. */
5626 fmt = GET_RTX_FORMAT (code);
5627 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5628 if (fmt[i] == 'e')
5629 find_mem_givs (loop, XEXP (x, i), insn, not_every_iteration,
5630 maybe_multiple);
5631 else if (fmt[i] == 'E')
5632 for (j = 0; j < XVECLEN (x, i); j++)
5633 find_mem_givs (loop, XVECEXP (x, i, j), insn, not_every_iteration,
5634 maybe_multiple);
5637 /* Fill in the data about one biv update.
5638 V is the `struct induction' in which we record the biv. (It is
5639 allocated by the caller, with alloca.)
5640 INSN is the insn that sets it.
5641 DEST_REG is the biv's reg.
5643 MULT_VAL is const1_rtx if the biv is being incremented here, in which case
5644 INC_VAL is the increment. Otherwise, MULT_VAL is const0_rtx and the biv is
5645 being set to INC_VAL.
5647 NOT_EVERY_ITERATION is nonzero if this biv update is not know to be
5648 executed every iteration; MAYBE_MULTIPLE is nonzero if this biv update
5649 can be executed more than once per iteration. If MAYBE_MULTIPLE
5650 and NOT_EVERY_ITERATION are both zero, we know that the biv update is
5651 executed exactly once per iteration. */
5653 static void
5654 record_biv (struct loop *loop, struct induction *v, rtx insn, rtx dest_reg,
5655 rtx inc_val, rtx mult_val, rtx *location,
5656 int not_every_iteration, int maybe_multiple)
5658 struct loop_ivs *ivs = LOOP_IVS (loop);
5659 struct iv_class *bl;
5661 v->insn = insn;
5662 v->src_reg = dest_reg;
5663 v->dest_reg = dest_reg;
5664 v->mult_val = mult_val;
5665 v->add_val = inc_val;
5666 v->ext_dependent = NULL_RTX;
5667 v->location = location;
5668 v->mode = GET_MODE (dest_reg);
5669 v->always_computable = ! not_every_iteration;
5670 v->always_executed = ! not_every_iteration;
5671 v->maybe_multiple = maybe_multiple;
5672 v->same = 0;
5674 /* Add this to the reg's iv_class, creating a class
5675 if this is the first incrementation of the reg. */
5677 bl = REG_IV_CLASS (ivs, REGNO (dest_reg));
5678 if (bl == 0)
5680 /* Create and initialize new iv_class. */
5682 bl = xmalloc (sizeof (struct iv_class));
5684 bl->regno = REGNO (dest_reg);
5685 bl->biv = 0;
5686 bl->giv = 0;
5687 bl->biv_count = 0;
5688 bl->giv_count = 0;
5690 /* Set initial value to the reg itself. */
5691 bl->initial_value = dest_reg;
5692 bl->final_value = 0;
5693 /* We haven't seen the initializing insn yet. */
5694 bl->init_insn = 0;
5695 bl->init_set = 0;
5696 bl->initial_test = 0;
5697 bl->incremented = 0;
5698 bl->eliminable = 0;
5699 bl->nonneg = 0;
5700 bl->reversed = 0;
5701 bl->total_benefit = 0;
5703 /* Add this class to ivs->list. */
5704 bl->next = ivs->list;
5705 ivs->list = bl;
5707 /* Put it in the array of biv register classes. */
5708 REG_IV_CLASS (ivs, REGNO (dest_reg)) = bl;
5710 else
5712 /* Check if location is the same as a previous one. */
5713 struct induction *induction;
5714 for (induction = bl->biv; induction; induction = induction->next_iv)
5715 if (location == induction->location)
5717 v->same = induction;
5718 break;
5722 /* Update IV_CLASS entry for this biv. */
5723 v->next_iv = bl->biv;
5724 bl->biv = v;
5725 bl->biv_count++;
5726 if (mult_val == const1_rtx)
5727 bl->incremented = 1;
5729 if (loop_dump_stream)
5730 loop_biv_dump (v, loop_dump_stream, 0);
5733 /* Fill in the data about one giv.
5734 V is the `struct induction' in which we record the giv. (It is
5735 allocated by the caller, with alloca.)
5736 INSN is the insn that sets it.
5737 BENEFIT estimates the savings from deleting this insn.
5738 TYPE is DEST_REG or DEST_ADDR; it says whether the giv is computed
5739 into a register or is used as a memory address.
5741 SRC_REG is the biv reg which the giv is computed from.
5742 DEST_REG is the giv's reg (if the giv is stored in a reg).
5743 MULT_VAL and ADD_VAL are the coefficients used to compute the giv.
5744 LOCATION points to the place where this giv's value appears in INSN. */
5746 static void
5747 record_giv (const struct loop *loop, struct induction *v, rtx insn,
5748 rtx src_reg, rtx dest_reg, rtx mult_val, rtx add_val,
5749 rtx ext_val, int benefit, enum g_types type,
5750 int not_every_iteration, int maybe_multiple, rtx *location)
5752 struct loop_ivs *ivs = LOOP_IVS (loop);
5753 struct induction *b;
5754 struct iv_class *bl;
5755 rtx set = single_set (insn);
5756 rtx temp;
5758 /* Attempt to prove constantness of the values. Don't let simplify_rtx
5759 undo the MULT canonicalization that we performed earlier. */
5760 temp = simplify_rtx (add_val);
5761 if (temp
5762 && ! (GET_CODE (add_val) == MULT
5763 && GET_CODE (temp) == ASHIFT))
5764 add_val = temp;
5766 v->insn = insn;
5767 v->src_reg = src_reg;
5768 v->giv_type = type;
5769 v->dest_reg = dest_reg;
5770 v->mult_val = mult_val;
5771 v->add_val = add_val;
5772 v->ext_dependent = ext_val;
5773 v->benefit = benefit;
5774 v->location = location;
5775 v->cant_derive = 0;
5776 v->combined_with = 0;
5777 v->maybe_multiple = maybe_multiple;
5778 v->maybe_dead = 0;
5779 v->derive_adjustment = 0;
5780 v->same = 0;
5781 v->ignore = 0;
5782 v->new_reg = 0;
5783 v->final_value = 0;
5784 v->same_insn = 0;
5785 v->auto_inc_opt = 0;
5786 v->unrolled = 0;
5787 v->shared = 0;
5789 /* The v->always_computable field is used in update_giv_derive, to
5790 determine whether a giv can be used to derive another giv. For a
5791 DEST_REG giv, INSN computes a new value for the giv, so its value
5792 isn't computable if INSN insn't executed every iteration.
5793 However, for a DEST_ADDR giv, INSN merely uses the value of the giv;
5794 it does not compute a new value. Hence the value is always computable
5795 regardless of whether INSN is executed each iteration. */
5797 if (type == DEST_ADDR)
5798 v->always_computable = 1;
5799 else
5800 v->always_computable = ! not_every_iteration;
5802 v->always_executed = ! not_every_iteration;
5804 if (type == DEST_ADDR)
5806 v->mode = GET_MODE (*location);
5807 v->lifetime = 1;
5809 else /* type == DEST_REG */
5811 v->mode = GET_MODE (SET_DEST (set));
5813 v->lifetime = LOOP_REG_LIFETIME (loop, REGNO (dest_reg));
5815 /* If the lifetime is zero, it means that this register is
5816 really a dead store. So mark this as a giv that can be
5817 ignored. This will not prevent the biv from being eliminated. */
5818 if (v->lifetime == 0)
5819 v->ignore = 1;
5821 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
5822 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
5825 /* Add the giv to the class of givs computed from one biv. */
5827 bl = REG_IV_CLASS (ivs, REGNO (src_reg));
5828 if (bl)
5830 v->next_iv = bl->giv;
5831 bl->giv = v;
5832 /* Don't count DEST_ADDR. This is supposed to count the number of
5833 insns that calculate givs. */
5834 if (type == DEST_REG)
5835 bl->giv_count++;
5836 bl->total_benefit += benefit;
5838 else
5839 /* Fatal error, biv missing for this giv? */
5840 abort ();
5842 if (type == DEST_ADDR)
5844 v->replaceable = 1;
5845 v->not_replaceable = 0;
5847 else
5849 /* The giv can be replaced outright by the reduced register only if all
5850 of the following conditions are true:
5851 - the insn that sets the giv is always executed on any iteration
5852 on which the giv is used at all
5853 (there are two ways to deduce this:
5854 either the insn is executed on every iteration,
5855 or all uses follow that insn in the same basic block),
5856 - the giv is not used outside the loop
5857 - no assignments to the biv occur during the giv's lifetime. */
5859 if (REGNO_FIRST_UID (REGNO (dest_reg)) == INSN_UID (insn)
5860 /* Previous line always fails if INSN was moved by loop opt. */
5861 && REGNO_LAST_LUID (REGNO (dest_reg))
5862 < INSN_LUID (loop->end)
5863 && (! not_every_iteration
5864 || last_use_this_basic_block (dest_reg, insn)))
5866 /* Now check that there are no assignments to the biv within the
5867 giv's lifetime. This requires two separate checks. */
5869 /* Check each biv update, and fail if any are between the first
5870 and last use of the giv.
5872 If this loop contains an inner loop that was unrolled, then
5873 the insn modifying the biv may have been emitted by the loop
5874 unrolling code, and hence does not have a valid luid. Just
5875 mark the biv as not replaceable in this case. It is not very
5876 useful as a biv, because it is used in two different loops.
5877 It is very unlikely that we would be able to optimize the giv
5878 using this biv anyways. */
5880 v->replaceable = 1;
5881 v->not_replaceable = 0;
5882 for (b = bl->biv; b; b = b->next_iv)
5884 if (INSN_UID (b->insn) >= max_uid_for_loop
5885 || ((INSN_LUID (b->insn)
5886 >= REGNO_FIRST_LUID (REGNO (dest_reg)))
5887 && (INSN_LUID (b->insn)
5888 <= REGNO_LAST_LUID (REGNO (dest_reg)))))
5890 v->replaceable = 0;
5891 v->not_replaceable = 1;
5892 break;
5896 /* If there are any backwards branches that go from after the
5897 biv update to before it, then this giv is not replaceable. */
5898 if (v->replaceable)
5899 for (b = bl->biv; b; b = b->next_iv)
5900 if (back_branch_in_range_p (loop, b->insn))
5902 v->replaceable = 0;
5903 v->not_replaceable = 1;
5904 break;
5907 else
5909 /* May still be replaceable, we don't have enough info here to
5910 decide. */
5911 v->replaceable = 0;
5912 v->not_replaceable = 0;
5916 /* Record whether the add_val contains a const_int, for later use by
5917 combine_givs. */
5919 rtx tem = add_val;
5921 v->no_const_addval = 1;
5922 if (tem == const0_rtx)
5924 else if (CONSTANT_P (add_val))
5925 v->no_const_addval = 0;
5926 if (GET_CODE (tem) == PLUS)
5928 while (1)
5930 if (GET_CODE (XEXP (tem, 0)) == PLUS)
5931 tem = XEXP (tem, 0);
5932 else if (GET_CODE (XEXP (tem, 1)) == PLUS)
5933 tem = XEXP (tem, 1);
5934 else
5935 break;
5937 if (CONSTANT_P (XEXP (tem, 1)))
5938 v->no_const_addval = 0;
5942 if (loop_dump_stream)
5943 loop_giv_dump (v, loop_dump_stream, 0);
5946 /* All this does is determine whether a giv can be made replaceable because
5947 its final value can be calculated. This code can not be part of record_giv
5948 above, because final_giv_value requires that the number of loop iterations
5949 be known, and that can not be accurately calculated until after all givs
5950 have been identified. */
5952 static void
5953 check_final_value (const struct loop *loop, struct induction *v)
5955 rtx final_value = 0;
5957 /* DEST_ADDR givs will never reach here, because they are always marked
5958 replaceable above in record_giv. */
5960 /* The giv can be replaced outright by the reduced register only if all
5961 of the following conditions are true:
5962 - the insn that sets the giv is always executed on any iteration
5963 on which the giv is used at all
5964 (there are two ways to deduce this:
5965 either the insn is executed on every iteration,
5966 or all uses follow that insn in the same basic block),
5967 - its final value can be calculated (this condition is different
5968 than the one above in record_giv)
5969 - it's not used before the it's set
5970 - no assignments to the biv occur during the giv's lifetime. */
5972 #if 0
5973 /* This is only called now when replaceable is known to be false. */
5974 /* Clear replaceable, so that it won't confuse final_giv_value. */
5975 v->replaceable = 0;
5976 #endif
5978 if ((final_value = final_giv_value (loop, v))
5979 && (v->always_executed
5980 || last_use_this_basic_block (v->dest_reg, v->insn)))
5982 int biv_increment_seen = 0, before_giv_insn = 0;
5983 rtx p = v->insn;
5984 rtx last_giv_use;
5986 v->replaceable = 1;
5987 v->not_replaceable = 0;
5989 /* When trying to determine whether or not a biv increment occurs
5990 during the lifetime of the giv, we can ignore uses of the variable
5991 outside the loop because final_value is true. Hence we can not
5992 use regno_last_uid and regno_first_uid as above in record_giv. */
5994 /* Search the loop to determine whether any assignments to the
5995 biv occur during the giv's lifetime. Start with the insn
5996 that sets the giv, and search around the loop until we come
5997 back to that insn again.
5999 Also fail if there is a jump within the giv's lifetime that jumps
6000 to somewhere outside the lifetime but still within the loop. This
6001 catches spaghetti code where the execution order is not linear, and
6002 hence the above test fails. Here we assume that the giv lifetime
6003 does not extend from one iteration of the loop to the next, so as
6004 to make the test easier. Since the lifetime isn't known yet,
6005 this requires two loops. See also record_giv above. */
6007 last_giv_use = v->insn;
6009 while (1)
6011 p = NEXT_INSN (p);
6012 if (p == loop->end)
6014 before_giv_insn = 1;
6015 p = NEXT_INSN (loop->start);
6017 if (p == v->insn)
6018 break;
6020 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
6021 || GET_CODE (p) == CALL_INSN)
6023 /* It is possible for the BIV increment to use the GIV if we
6024 have a cycle. Thus we must be sure to check each insn for
6025 both BIV and GIV uses, and we must check for BIV uses
6026 first. */
6028 if (! biv_increment_seen
6029 && reg_set_p (v->src_reg, PATTERN (p)))
6030 biv_increment_seen = 1;
6032 if (reg_mentioned_p (v->dest_reg, PATTERN (p)))
6034 if (biv_increment_seen || before_giv_insn)
6036 v->replaceable = 0;
6037 v->not_replaceable = 1;
6038 break;
6040 last_giv_use = p;
6045 /* Now that the lifetime of the giv is known, check for branches
6046 from within the lifetime to outside the lifetime if it is still
6047 replaceable. */
6049 if (v->replaceable)
6051 p = v->insn;
6052 while (1)
6054 p = NEXT_INSN (p);
6055 if (p == loop->end)
6056 p = NEXT_INSN (loop->start);
6057 if (p == last_giv_use)
6058 break;
6060 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
6061 && LABEL_NAME (JUMP_LABEL (p))
6062 && ((loop_insn_first_p (JUMP_LABEL (p), v->insn)
6063 && loop_insn_first_p (loop->start, JUMP_LABEL (p)))
6064 || (loop_insn_first_p (last_giv_use, JUMP_LABEL (p))
6065 && loop_insn_first_p (JUMP_LABEL (p), loop->end))))
6067 v->replaceable = 0;
6068 v->not_replaceable = 1;
6070 if (loop_dump_stream)
6071 fprintf (loop_dump_stream,
6072 "Found branch outside giv lifetime.\n");
6074 break;
6079 /* If it is replaceable, then save the final value. */
6080 if (v->replaceable)
6081 v->final_value = final_value;
6084 if (loop_dump_stream && v->replaceable)
6085 fprintf (loop_dump_stream, "Insn %d: giv reg %d final_value replaceable\n",
6086 INSN_UID (v->insn), REGNO (v->dest_reg));
6089 /* Update the status of whether a giv can derive other givs.
6091 We need to do something special if there is or may be an update to the biv
6092 between the time the giv is defined and the time it is used to derive
6093 another giv.
6095 In addition, a giv that is only conditionally set is not allowed to
6096 derive another giv once a label has been passed.
6098 The cases we look at are when a label or an update to a biv is passed. */
6100 static void
6101 update_giv_derive (const struct loop *loop, rtx p)
6103 struct loop_ivs *ivs = LOOP_IVS (loop);
6104 struct iv_class *bl;
6105 struct induction *biv, *giv;
6106 rtx tem;
6107 int dummy;
6109 /* Search all IV classes, then all bivs, and finally all givs.
6111 There are three cases we are concerned with. First we have the situation
6112 of a giv that is only updated conditionally. In that case, it may not
6113 derive any givs after a label is passed.
6115 The second case is when a biv update occurs, or may occur, after the
6116 definition of a giv. For certain biv updates (see below) that are
6117 known to occur between the giv definition and use, we can adjust the
6118 giv definition. For others, or when the biv update is conditional,
6119 we must prevent the giv from deriving any other givs. There are two
6120 sub-cases within this case.
6122 If this is a label, we are concerned with any biv update that is done
6123 conditionally, since it may be done after the giv is defined followed by
6124 a branch here (actually, we need to pass both a jump and a label, but
6125 this extra tracking doesn't seem worth it).
6127 If this is a jump, we are concerned about any biv update that may be
6128 executed multiple times. We are actually only concerned about
6129 backward jumps, but it is probably not worth performing the test
6130 on the jump again here.
6132 If this is a biv update, we must adjust the giv status to show that a
6133 subsequent biv update was performed. If this adjustment cannot be done,
6134 the giv cannot derive further givs. */
6136 for (bl = ivs->list; bl; bl = bl->next)
6137 for (biv = bl->biv; biv; biv = biv->next_iv)
6138 if (GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN
6139 || biv->insn == p)
6141 /* Skip if location is the same as a previous one. */
6142 if (biv->same)
6143 continue;
6145 for (giv = bl->giv; giv; giv = giv->next_iv)
6147 /* If cant_derive is already true, there is no point in
6148 checking all of these conditions again. */
6149 if (giv->cant_derive)
6150 continue;
6152 /* If this giv is conditionally set and we have passed a label,
6153 it cannot derive anything. */
6154 if (GET_CODE (p) == CODE_LABEL && ! giv->always_computable)
6155 giv->cant_derive = 1;
6157 /* Skip givs that have mult_val == 0, since
6158 they are really invariants. Also skip those that are
6159 replaceable, since we know their lifetime doesn't contain
6160 any biv update. */
6161 else if (giv->mult_val == const0_rtx || giv->replaceable)
6162 continue;
6164 /* The only way we can allow this giv to derive another
6165 is if this is a biv increment and we can form the product
6166 of biv->add_val and giv->mult_val. In this case, we will
6167 be able to compute a compensation. */
6168 else if (biv->insn == p)
6170 rtx ext_val_dummy;
6172 tem = 0;
6173 if (biv->mult_val == const1_rtx)
6174 tem = simplify_giv_expr (loop,
6175 gen_rtx_MULT (giv->mode,
6176 biv->add_val,
6177 giv->mult_val),
6178 &ext_val_dummy, &dummy);
6180 if (tem && giv->derive_adjustment)
6181 tem = simplify_giv_expr
6182 (loop,
6183 gen_rtx_PLUS (giv->mode, tem, giv->derive_adjustment),
6184 &ext_val_dummy, &dummy);
6186 if (tem)
6187 giv->derive_adjustment = tem;
6188 else
6189 giv->cant_derive = 1;
6191 else if ((GET_CODE (p) == CODE_LABEL && ! biv->always_computable)
6192 || (GET_CODE (p) == JUMP_INSN && biv->maybe_multiple))
6193 giv->cant_derive = 1;
6198 /* Check whether an insn is an increment legitimate for a basic induction var.
6199 X is the source of insn P, or a part of it.
6200 MODE is the mode in which X should be interpreted.
6202 DEST_REG is the putative biv, also the destination of the insn.
6203 We accept patterns of these forms:
6204 REG = REG + INVARIANT (includes REG = REG - CONSTANT)
6205 REG = INVARIANT + REG
6207 If X is suitable, we return 1, set *MULT_VAL to CONST1_RTX,
6208 store the additive term into *INC_VAL, and store the place where
6209 we found the additive term into *LOCATION.
6211 If X is an assignment of an invariant into DEST_REG, we set
6212 *MULT_VAL to CONST0_RTX, and store the invariant into *INC_VAL.
6214 We also want to detect a BIV when it corresponds to a variable
6215 whose mode was promoted via PROMOTED_MODE. In that case, an increment
6216 of the variable may be a PLUS that adds a SUBREG of that variable to
6217 an invariant and then sign- or zero-extends the result of the PLUS
6218 into the variable.
6220 Most GIVs in such cases will be in the promoted mode, since that is the
6221 probably the natural computation mode (and almost certainly the mode
6222 used for addresses) on the machine. So we view the pseudo-reg containing
6223 the variable as the BIV, as if it were simply incremented.
6225 Note that treating the entire pseudo as a BIV will result in making
6226 simple increments to any GIVs based on it. However, if the variable
6227 overflows in its declared mode but not its promoted mode, the result will
6228 be incorrect. This is acceptable if the variable is signed, since
6229 overflows in such cases are undefined, but not if it is unsigned, since
6230 those overflows are defined. So we only check for SIGN_EXTEND and
6231 not ZERO_EXTEND.
6233 If we cannot find a biv, we return 0. */
6235 static int
6236 basic_induction_var (const struct loop *loop, rtx x, enum machine_mode mode,
6237 rtx dest_reg, rtx p, rtx *inc_val, rtx *mult_val,
6238 rtx **location)
6240 enum rtx_code code;
6241 rtx *argp, arg;
6242 rtx insn, set = 0, last, inc;
6244 code = GET_CODE (x);
6245 *location = NULL;
6246 switch (code)
6248 case PLUS:
6249 if (rtx_equal_p (XEXP (x, 0), dest_reg)
6250 || (GET_CODE (XEXP (x, 0)) == SUBREG
6251 && SUBREG_PROMOTED_VAR_P (XEXP (x, 0))
6252 && SUBREG_REG (XEXP (x, 0)) == dest_reg))
6254 argp = &XEXP (x, 1);
6256 else if (rtx_equal_p (XEXP (x, 1), dest_reg)
6257 || (GET_CODE (XEXP (x, 1)) == SUBREG
6258 && SUBREG_PROMOTED_VAR_P (XEXP (x, 1))
6259 && SUBREG_REG (XEXP (x, 1)) == dest_reg))
6261 argp = &XEXP (x, 0);
6263 else
6264 return 0;
6266 arg = *argp;
6267 if (loop_invariant_p (loop, arg) != 1)
6268 return 0;
6270 /* convert_modes can emit new instructions, e.g. when arg is a loop
6271 invariant MEM and dest_reg has a different mode.
6272 These instructions would be emitted after the end of the function
6273 and then *inc_val would be an uninitialized pseudo.
6274 Detect this and bail in this case.
6275 Other alternatives to solve this can be introducing a convert_modes
6276 variant which is allowed to fail but not allowed to emit new
6277 instructions, emit these instructions before loop start and let
6278 it be garbage collected if *inc_val is never used or saving the
6279 *inc_val initialization sequence generated here and when *inc_val
6280 is going to be actually used, emit it at some suitable place. */
6281 last = get_last_insn ();
6282 inc = convert_modes (GET_MODE (dest_reg), GET_MODE (x), arg, 0);
6283 if (get_last_insn () != last)
6285 delete_insns_since (last);
6286 return 0;
6289 *inc_val = inc;
6290 *mult_val = const1_rtx;
6291 *location = argp;
6292 return 1;
6294 case SUBREG:
6295 /* If what's inside the SUBREG is a BIV, then the SUBREG. This will
6296 handle addition of promoted variables.
6297 ??? The comment at the start of this function is wrong: promoted
6298 variable increments don't look like it says they do. */
6299 return basic_induction_var (loop, SUBREG_REG (x),
6300 GET_MODE (SUBREG_REG (x)),
6301 dest_reg, p, inc_val, mult_val, location);
6303 case REG:
6304 /* If this register is assigned in a previous insn, look at its
6305 source, but don't go outside the loop or past a label. */
6307 /* If this sets a register to itself, we would repeat any previous
6308 biv increment if we applied this strategy blindly. */
6309 if (rtx_equal_p (dest_reg, x))
6310 return 0;
6312 insn = p;
6313 while (1)
6315 rtx dest;
6318 insn = PREV_INSN (insn);
6320 while (insn && GET_CODE (insn) == NOTE
6321 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6323 if (!insn)
6324 break;
6325 set = single_set (insn);
6326 if (set == 0)
6327 break;
6328 dest = SET_DEST (set);
6329 if (dest == x
6330 || (GET_CODE (dest) == SUBREG
6331 && (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
6332 && (GET_MODE_CLASS (GET_MODE (dest)) == MODE_INT)
6333 && SUBREG_REG (dest) == x))
6334 return basic_induction_var (loop, SET_SRC (set),
6335 (GET_MODE (SET_SRC (set)) == VOIDmode
6336 ? GET_MODE (x)
6337 : GET_MODE (SET_SRC (set))),
6338 dest_reg, insn,
6339 inc_val, mult_val, location);
6341 while (GET_CODE (dest) == SIGN_EXTRACT
6342 || GET_CODE (dest) == ZERO_EXTRACT
6343 || GET_CODE (dest) == SUBREG
6344 || GET_CODE (dest) == STRICT_LOW_PART)
6345 dest = XEXP (dest, 0);
6346 if (dest == x)
6347 break;
6349 /* Fall through. */
6351 /* Can accept constant setting of biv only when inside inner most loop.
6352 Otherwise, a biv of an inner loop may be incorrectly recognized
6353 as a biv of the outer loop,
6354 causing code to be moved INTO the inner loop. */
6355 case MEM:
6356 if (loop_invariant_p (loop, x) != 1)
6357 return 0;
6358 case CONST_INT:
6359 case SYMBOL_REF:
6360 case CONST:
6361 /* convert_modes aborts if we try to convert to or from CCmode, so just
6362 exclude that case. It is very unlikely that a condition code value
6363 would be a useful iterator anyways. convert_modes aborts if we try to
6364 convert a float mode to non-float or vice versa too. */
6365 if (loop->level == 1
6366 && GET_MODE_CLASS (mode) == GET_MODE_CLASS (GET_MODE (dest_reg))
6367 && GET_MODE_CLASS (mode) != MODE_CC)
6369 /* Possible bug here? Perhaps we don't know the mode of X. */
6370 last = get_last_insn ();
6371 inc = convert_modes (GET_MODE (dest_reg), mode, x, 0);
6372 if (get_last_insn () != last)
6374 delete_insns_since (last);
6375 return 0;
6378 *inc_val = inc;
6379 *mult_val = const0_rtx;
6380 return 1;
6382 else
6383 return 0;
6385 case SIGN_EXTEND:
6386 /* Ignore this BIV if signed arithmetic overflow is defined. */
6387 if (flag_wrapv)
6388 return 0;
6389 return basic_induction_var (loop, XEXP (x, 0), GET_MODE (XEXP (x, 0)),
6390 dest_reg, p, inc_val, mult_val, location);
6392 case ASHIFTRT:
6393 /* Similar, since this can be a sign extension. */
6394 for (insn = PREV_INSN (p);
6395 (insn && GET_CODE (insn) == NOTE
6396 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6397 insn = PREV_INSN (insn))
6400 if (insn)
6401 set = single_set (insn);
6403 if (! rtx_equal_p (dest_reg, XEXP (x, 0))
6404 && set && SET_DEST (set) == XEXP (x, 0)
6405 && GET_CODE (XEXP (x, 1)) == CONST_INT
6406 && INTVAL (XEXP (x, 1)) >= 0
6407 && GET_CODE (SET_SRC (set)) == ASHIFT
6408 && XEXP (x, 1) == XEXP (SET_SRC (set), 1))
6409 return basic_induction_var (loop, XEXP (SET_SRC (set), 0),
6410 GET_MODE (XEXP (x, 0)),
6411 dest_reg, insn, inc_val, mult_val,
6412 location);
6413 return 0;
6415 default:
6416 return 0;
6420 /* A general induction variable (giv) is any quantity that is a linear
6421 function of a basic induction variable,
6422 i.e. giv = biv * mult_val + add_val.
6423 The coefficients can be any loop invariant quantity.
6424 A giv need not be computed directly from the biv;
6425 it can be computed by way of other givs. */
6427 /* Determine whether X computes a giv.
6428 If it does, return a nonzero value
6429 which is the benefit from eliminating the computation of X;
6430 set *SRC_REG to the register of the biv that it is computed from;
6431 set *ADD_VAL and *MULT_VAL to the coefficients,
6432 such that the value of X is biv * mult + add; */
6434 static int
6435 general_induction_var (const struct loop *loop, rtx x, rtx *src_reg,
6436 rtx *add_val, rtx *mult_val, rtx *ext_val,
6437 int is_addr, int *pbenefit,
6438 enum machine_mode addr_mode)
6440 struct loop_ivs *ivs = LOOP_IVS (loop);
6441 rtx orig_x = x;
6443 /* If this is an invariant, forget it, it isn't a giv. */
6444 if (loop_invariant_p (loop, x) == 1)
6445 return 0;
6447 *pbenefit = 0;
6448 *ext_val = NULL_RTX;
6449 x = simplify_giv_expr (loop, x, ext_val, pbenefit);
6450 if (x == 0)
6451 return 0;
6453 switch (GET_CODE (x))
6455 case USE:
6456 case CONST_INT:
6457 /* Since this is now an invariant and wasn't before, it must be a giv
6458 with MULT_VAL == 0. It doesn't matter which BIV we associate this
6459 with. */
6460 *src_reg = ivs->list->biv->dest_reg;
6461 *mult_val = const0_rtx;
6462 *add_val = x;
6463 break;
6465 case REG:
6466 /* This is equivalent to a BIV. */
6467 *src_reg = x;
6468 *mult_val = const1_rtx;
6469 *add_val = const0_rtx;
6470 break;
6472 case PLUS:
6473 /* Either (plus (biv) (invar)) or
6474 (plus (mult (biv) (invar_1)) (invar_2)). */
6475 if (GET_CODE (XEXP (x, 0)) == MULT)
6477 *src_reg = XEXP (XEXP (x, 0), 0);
6478 *mult_val = XEXP (XEXP (x, 0), 1);
6480 else
6482 *src_reg = XEXP (x, 0);
6483 *mult_val = const1_rtx;
6485 *add_val = XEXP (x, 1);
6486 break;
6488 case MULT:
6489 /* ADD_VAL is zero. */
6490 *src_reg = XEXP (x, 0);
6491 *mult_val = XEXP (x, 1);
6492 *add_val = const0_rtx;
6493 break;
6495 default:
6496 abort ();
6499 /* Remove any enclosing USE from ADD_VAL and MULT_VAL (there will be
6500 unless they are CONST_INT). */
6501 if (GET_CODE (*add_val) == USE)
6502 *add_val = XEXP (*add_val, 0);
6503 if (GET_CODE (*mult_val) == USE)
6504 *mult_val = XEXP (*mult_val, 0);
6506 if (is_addr)
6507 *pbenefit += address_cost (orig_x, addr_mode) - reg_address_cost;
6508 else
6509 *pbenefit += rtx_cost (orig_x, SET);
6511 /* Always return true if this is a giv so it will be detected as such,
6512 even if the benefit is zero or negative. This allows elimination
6513 of bivs that might otherwise not be eliminated. */
6514 return 1;
6517 /* Given an expression, X, try to form it as a linear function of a biv.
6518 We will canonicalize it to be of the form
6519 (plus (mult (BIV) (invar_1))
6520 (invar_2))
6521 with possible degeneracies.
6523 The invariant expressions must each be of a form that can be used as a
6524 machine operand. We surround then with a USE rtx (a hack, but localized
6525 and certainly unambiguous!) if not a CONST_INT for simplicity in this
6526 routine; it is the caller's responsibility to strip them.
6528 If no such canonicalization is possible (i.e., two biv's are used or an
6529 expression that is neither invariant nor a biv or giv), this routine
6530 returns 0.
6532 For a nonzero return, the result will have a code of CONST_INT, USE,
6533 REG (for a BIV), PLUS, or MULT. No other codes will occur.
6535 *BENEFIT will be incremented by the benefit of any sub-giv encountered. */
6537 static rtx sge_plus (enum machine_mode, rtx, rtx);
6538 static rtx sge_plus_constant (rtx, rtx);
6540 static rtx
6541 simplify_giv_expr (const struct loop *loop, rtx x, rtx *ext_val, int *benefit)
6543 struct loop_ivs *ivs = LOOP_IVS (loop);
6544 struct loop_regs *regs = LOOP_REGS (loop);
6545 enum machine_mode mode = GET_MODE (x);
6546 rtx arg0, arg1;
6547 rtx tem;
6549 /* If this is not an integer mode, or if we cannot do arithmetic in this
6550 mode, this can't be a giv. */
6551 if (mode != VOIDmode
6552 && (GET_MODE_CLASS (mode) != MODE_INT
6553 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT))
6554 return NULL_RTX;
6556 switch (GET_CODE (x))
6558 case PLUS:
6559 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6560 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6561 if (arg0 == 0 || arg1 == 0)
6562 return NULL_RTX;
6564 /* Put constant last, CONST_INT last if both constant. */
6565 if ((GET_CODE (arg0) == USE
6566 || GET_CODE (arg0) == CONST_INT)
6567 && ! ((GET_CODE (arg0) == USE
6568 && GET_CODE (arg1) == USE)
6569 || GET_CODE (arg1) == CONST_INT))
6570 tem = arg0, arg0 = arg1, arg1 = tem;
6572 /* Handle addition of zero, then addition of an invariant. */
6573 if (arg1 == const0_rtx)
6574 return arg0;
6575 else if (GET_CODE (arg1) == CONST_INT || GET_CODE (arg1) == USE)
6576 switch (GET_CODE (arg0))
6578 case CONST_INT:
6579 case USE:
6580 /* Adding two invariants must result in an invariant, so enclose
6581 addition operation inside a USE and return it. */
6582 if (GET_CODE (arg0) == USE)
6583 arg0 = XEXP (arg0, 0);
6584 if (GET_CODE (arg1) == USE)
6585 arg1 = XEXP (arg1, 0);
6587 if (GET_CODE (arg0) == CONST_INT)
6588 tem = arg0, arg0 = arg1, arg1 = tem;
6589 if (GET_CODE (arg1) == CONST_INT)
6590 tem = sge_plus_constant (arg0, arg1);
6591 else
6592 tem = sge_plus (mode, arg0, arg1);
6594 if (GET_CODE (tem) != CONST_INT)
6595 tem = gen_rtx_USE (mode, tem);
6596 return tem;
6598 case REG:
6599 case MULT:
6600 /* biv + invar or mult + invar. Return sum. */
6601 return gen_rtx_PLUS (mode, arg0, arg1);
6603 case PLUS:
6604 /* (a + invar_1) + invar_2. Associate. */
6605 return
6606 simplify_giv_expr (loop,
6607 gen_rtx_PLUS (mode,
6608 XEXP (arg0, 0),
6609 gen_rtx_PLUS (mode,
6610 XEXP (arg0, 1),
6611 arg1)),
6612 ext_val, benefit);
6614 default:
6615 abort ();
6618 /* Each argument must be either REG, PLUS, or MULT. Convert REG to
6619 MULT to reduce cases. */
6620 if (GET_CODE (arg0) == REG)
6621 arg0 = gen_rtx_MULT (mode, arg0, const1_rtx);
6622 if (GET_CODE (arg1) == REG)
6623 arg1 = gen_rtx_MULT (mode, arg1, const1_rtx);
6625 /* Now have PLUS + PLUS, PLUS + MULT, MULT + PLUS, or MULT + MULT.
6626 Put a MULT first, leaving PLUS + PLUS, MULT + PLUS, or MULT + MULT.
6627 Recurse to associate the second PLUS. */
6628 if (GET_CODE (arg1) == MULT)
6629 tem = arg0, arg0 = arg1, arg1 = tem;
6631 if (GET_CODE (arg1) == PLUS)
6632 return
6633 simplify_giv_expr (loop,
6634 gen_rtx_PLUS (mode,
6635 gen_rtx_PLUS (mode, arg0,
6636 XEXP (arg1, 0)),
6637 XEXP (arg1, 1)),
6638 ext_val, benefit);
6640 /* Now must have MULT + MULT. Distribute if same biv, else not giv. */
6641 if (GET_CODE (arg0) != MULT || GET_CODE (arg1) != MULT)
6642 return NULL_RTX;
6644 if (!rtx_equal_p (arg0, arg1))
6645 return NULL_RTX;
6647 return simplify_giv_expr (loop,
6648 gen_rtx_MULT (mode,
6649 XEXP (arg0, 0),
6650 gen_rtx_PLUS (mode,
6651 XEXP (arg0, 1),
6652 XEXP (arg1, 1))),
6653 ext_val, benefit);
6655 case MINUS:
6656 /* Handle "a - b" as "a + b * (-1)". */
6657 return simplify_giv_expr (loop,
6658 gen_rtx_PLUS (mode,
6659 XEXP (x, 0),
6660 gen_rtx_MULT (mode,
6661 XEXP (x, 1),
6662 constm1_rtx)),
6663 ext_val, benefit);
6665 case MULT:
6666 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6667 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6668 if (arg0 == 0 || arg1 == 0)
6669 return NULL_RTX;
6671 /* Put constant last, CONST_INT last if both constant. */
6672 if ((GET_CODE (arg0) == USE || GET_CODE (arg0) == CONST_INT)
6673 && GET_CODE (arg1) != CONST_INT)
6674 tem = arg0, arg0 = arg1, arg1 = tem;
6676 /* If second argument is not now constant, not giv. */
6677 if (GET_CODE (arg1) != USE && GET_CODE (arg1) != CONST_INT)
6678 return NULL_RTX;
6680 /* Handle multiply by 0 or 1. */
6681 if (arg1 == const0_rtx)
6682 return const0_rtx;
6684 else if (arg1 == const1_rtx)
6685 return arg0;
6687 switch (GET_CODE (arg0))
6689 case REG:
6690 /* biv * invar. Done. */
6691 return gen_rtx_MULT (mode, arg0, arg1);
6693 case CONST_INT:
6694 /* Product of two constants. */
6695 return GEN_INT (INTVAL (arg0) * INTVAL (arg1));
6697 case USE:
6698 /* invar * invar is a giv, but attempt to simplify it somehow. */
6699 if (GET_CODE (arg1) != CONST_INT)
6700 return NULL_RTX;
6702 arg0 = XEXP (arg0, 0);
6703 if (GET_CODE (arg0) == MULT)
6705 /* (invar_0 * invar_1) * invar_2. Associate. */
6706 return simplify_giv_expr (loop,
6707 gen_rtx_MULT (mode,
6708 XEXP (arg0, 0),
6709 gen_rtx_MULT (mode,
6710 XEXP (arg0,
6712 arg1)),
6713 ext_val, benefit);
6715 /* Propagate the MULT expressions to the innermost nodes. */
6716 else if (GET_CODE (arg0) == PLUS)
6718 /* (invar_0 + invar_1) * invar_2. Distribute. */
6719 return simplify_giv_expr (loop,
6720 gen_rtx_PLUS (mode,
6721 gen_rtx_MULT (mode,
6722 XEXP (arg0,
6724 arg1),
6725 gen_rtx_MULT (mode,
6726 XEXP (arg0,
6728 arg1)),
6729 ext_val, benefit);
6731 return gen_rtx_USE (mode, gen_rtx_MULT (mode, arg0, arg1));
6733 case MULT:
6734 /* (a * invar_1) * invar_2. Associate. */
6735 return simplify_giv_expr (loop,
6736 gen_rtx_MULT (mode,
6737 XEXP (arg0, 0),
6738 gen_rtx_MULT (mode,
6739 XEXP (arg0, 1),
6740 arg1)),
6741 ext_val, benefit);
6743 case PLUS:
6744 /* (a + invar_1) * invar_2. Distribute. */
6745 return simplify_giv_expr (loop,
6746 gen_rtx_PLUS (mode,
6747 gen_rtx_MULT (mode,
6748 XEXP (arg0, 0),
6749 arg1),
6750 gen_rtx_MULT (mode,
6751 XEXP (arg0, 1),
6752 arg1)),
6753 ext_val, benefit);
6755 default:
6756 abort ();
6759 case ASHIFT:
6760 /* Shift by constant is multiply by power of two. */
6761 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6762 return 0;
6764 return
6765 simplify_giv_expr (loop,
6766 gen_rtx_MULT (mode,
6767 XEXP (x, 0),
6768 GEN_INT ((HOST_WIDE_INT) 1
6769 << INTVAL (XEXP (x, 1)))),
6770 ext_val, benefit);
6772 case NEG:
6773 /* "-a" is "a * (-1)" */
6774 return simplify_giv_expr (loop,
6775 gen_rtx_MULT (mode, XEXP (x, 0), constm1_rtx),
6776 ext_val, benefit);
6778 case NOT:
6779 /* "~a" is "-a - 1". Silly, but easy. */
6780 return simplify_giv_expr (loop,
6781 gen_rtx_MINUS (mode,
6782 gen_rtx_NEG (mode, XEXP (x, 0)),
6783 const1_rtx),
6784 ext_val, benefit);
6786 case USE:
6787 /* Already in proper form for invariant. */
6788 return x;
6790 case SIGN_EXTEND:
6791 case ZERO_EXTEND:
6792 case TRUNCATE:
6793 /* Conditionally recognize extensions of simple IVs. After we've
6794 computed loop traversal counts and verified the range of the
6795 source IV, we'll reevaluate this as a GIV. */
6796 if (*ext_val == NULL_RTX)
6798 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6799 if (arg0 && *ext_val == NULL_RTX && GET_CODE (arg0) == REG)
6801 *ext_val = gen_rtx_fmt_e (GET_CODE (x), mode, arg0);
6802 return arg0;
6805 goto do_default;
6807 case REG:
6808 /* If this is a new register, we can't deal with it. */
6809 if (REGNO (x) >= max_reg_before_loop)
6810 return 0;
6812 /* Check for biv or giv. */
6813 switch (REG_IV_TYPE (ivs, REGNO (x)))
6815 case BASIC_INDUCT:
6816 return x;
6817 case GENERAL_INDUCT:
6819 struct induction *v = REG_IV_INFO (ivs, REGNO (x));
6821 /* Form expression from giv and add benefit. Ensure this giv
6822 can derive another and subtract any needed adjustment if so. */
6824 /* Increasing the benefit here is risky. The only case in which it
6825 is arguably correct is if this is the only use of V. In other
6826 cases, this will artificially inflate the benefit of the current
6827 giv, and lead to suboptimal code. Thus, it is disabled, since
6828 potentially not reducing an only marginally beneficial giv is
6829 less harmful than reducing many givs that are not really
6830 beneficial. */
6832 rtx single_use = regs->array[REGNO (x)].single_usage;
6833 if (single_use && single_use != const0_rtx)
6834 *benefit += v->benefit;
6837 if (v->cant_derive)
6838 return 0;
6840 tem = gen_rtx_PLUS (mode, gen_rtx_MULT (mode,
6841 v->src_reg, v->mult_val),
6842 v->add_val);
6844 if (v->derive_adjustment)
6845 tem = gen_rtx_MINUS (mode, tem, v->derive_adjustment);
6846 arg0 = simplify_giv_expr (loop, tem, ext_val, benefit);
6847 if (*ext_val)
6849 if (!v->ext_dependent)
6850 return arg0;
6852 else
6854 *ext_val = v->ext_dependent;
6855 return arg0;
6857 return 0;
6860 default:
6861 do_default:
6862 /* If it isn't an induction variable, and it is invariant, we
6863 may be able to simplify things further by looking through
6864 the bits we just moved outside the loop. */
6865 if (loop_invariant_p (loop, x) == 1)
6867 struct movable *m;
6868 struct loop_movables *movables = LOOP_MOVABLES (loop);
6870 for (m = movables->head; m; m = m->next)
6871 if (rtx_equal_p (x, m->set_dest))
6873 /* Ok, we found a match. Substitute and simplify. */
6875 /* If we match another movable, we must use that, as
6876 this one is going away. */
6877 if (m->match)
6878 return simplify_giv_expr (loop, m->match->set_dest,
6879 ext_val, benefit);
6881 /* If consec is nonzero, this is a member of a group of
6882 instructions that were moved together. We handle this
6883 case only to the point of seeking to the last insn and
6884 looking for a REG_EQUAL. Fail if we don't find one. */
6885 if (m->consec != 0)
6887 int i = m->consec;
6888 tem = m->insn;
6891 tem = NEXT_INSN (tem);
6893 while (--i > 0);
6895 tem = find_reg_note (tem, REG_EQUAL, NULL_RTX);
6896 if (tem)
6897 tem = XEXP (tem, 0);
6899 else
6901 tem = single_set (m->insn);
6902 if (tem)
6903 tem = SET_SRC (tem);
6906 if (tem)
6908 /* What we are most interested in is pointer
6909 arithmetic on invariants -- only take
6910 patterns we may be able to do something with. */
6911 if (GET_CODE (tem) == PLUS
6912 || GET_CODE (tem) == MULT
6913 || GET_CODE (tem) == ASHIFT
6914 || GET_CODE (tem) == CONST_INT
6915 || GET_CODE (tem) == SYMBOL_REF)
6917 tem = simplify_giv_expr (loop, tem, ext_val,
6918 benefit);
6919 if (tem)
6920 return tem;
6922 else if (GET_CODE (tem) == CONST
6923 && GET_CODE (XEXP (tem, 0)) == PLUS
6924 && GET_CODE (XEXP (XEXP (tem, 0), 0)) == SYMBOL_REF
6925 && GET_CODE (XEXP (XEXP (tem, 0), 1)) == CONST_INT)
6927 tem = simplify_giv_expr (loop, XEXP (tem, 0),
6928 ext_val, benefit);
6929 if (tem)
6930 return tem;
6933 break;
6936 break;
6939 /* Fall through to general case. */
6940 default:
6941 /* If invariant, return as USE (unless CONST_INT).
6942 Otherwise, not giv. */
6943 if (GET_CODE (x) == USE)
6944 x = XEXP (x, 0);
6946 if (loop_invariant_p (loop, x) == 1)
6948 if (GET_CODE (x) == CONST_INT)
6949 return x;
6950 if (GET_CODE (x) == CONST
6951 && GET_CODE (XEXP (x, 0)) == PLUS
6952 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6953 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
6954 x = XEXP (x, 0);
6955 return gen_rtx_USE (mode, x);
6957 else
6958 return 0;
6962 /* This routine folds invariants such that there is only ever one
6963 CONST_INT in the summation. It is only used by simplify_giv_expr. */
6965 static rtx
6966 sge_plus_constant (rtx x, rtx c)
6968 if (GET_CODE (x) == CONST_INT)
6969 return GEN_INT (INTVAL (x) + INTVAL (c));
6970 else if (GET_CODE (x) != PLUS)
6971 return gen_rtx_PLUS (GET_MODE (x), x, c);
6972 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6974 return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
6975 GEN_INT (INTVAL (XEXP (x, 1)) + INTVAL (c)));
6977 else if (GET_CODE (XEXP (x, 0)) == PLUS
6978 || GET_CODE (XEXP (x, 1)) != PLUS)
6980 return gen_rtx_PLUS (GET_MODE (x),
6981 sge_plus_constant (XEXP (x, 0), c), XEXP (x, 1));
6983 else
6985 return gen_rtx_PLUS (GET_MODE (x),
6986 sge_plus_constant (XEXP (x, 1), c), XEXP (x, 0));
6990 static rtx
6991 sge_plus (enum machine_mode mode, rtx x, rtx y)
6993 while (GET_CODE (y) == PLUS)
6995 rtx a = XEXP (y, 0);
6996 if (GET_CODE (a) == CONST_INT)
6997 x = sge_plus_constant (x, a);
6998 else
6999 x = gen_rtx_PLUS (mode, x, a);
7000 y = XEXP (y, 1);
7002 if (GET_CODE (y) == CONST_INT)
7003 x = sge_plus_constant (x, y);
7004 else
7005 x = gen_rtx_PLUS (mode, x, y);
7006 return x;
7009 /* Help detect a giv that is calculated by several consecutive insns;
7010 for example,
7011 giv = biv * M
7012 giv = giv + A
7013 The caller has already identified the first insn P as having a giv as dest;
7014 we check that all other insns that set the same register follow
7015 immediately after P, that they alter nothing else,
7016 and that the result of the last is still a giv.
7018 The value is 0 if the reg set in P is not really a giv.
7019 Otherwise, the value is the amount gained by eliminating
7020 all the consecutive insns that compute the value.
7022 FIRST_BENEFIT is the amount gained by eliminating the first insn, P.
7023 SRC_REG is the reg of the biv; DEST_REG is the reg of the giv.
7025 The coefficients of the ultimate giv value are stored in
7026 *MULT_VAL and *ADD_VAL. */
7028 static int
7029 consec_sets_giv (const struct loop *loop, int first_benefit, rtx p,
7030 rtx src_reg, rtx dest_reg, rtx *add_val, rtx *mult_val,
7031 rtx *ext_val, rtx *last_consec_insn)
7033 struct loop_ivs *ivs = LOOP_IVS (loop);
7034 struct loop_regs *regs = LOOP_REGS (loop);
7035 int count;
7036 enum rtx_code code;
7037 int benefit;
7038 rtx temp;
7039 rtx set;
7041 /* Indicate that this is a giv so that we can update the value produced in
7042 each insn of the multi-insn sequence.
7044 This induction structure will be used only by the call to
7045 general_induction_var below, so we can allocate it on our stack.
7046 If this is a giv, our caller will replace the induct var entry with
7047 a new induction structure. */
7048 struct induction *v;
7050 if (REG_IV_TYPE (ivs, REGNO (dest_reg)) != UNKNOWN_INDUCT)
7051 return 0;
7053 v = alloca (sizeof (struct induction));
7054 v->src_reg = src_reg;
7055 v->mult_val = *mult_val;
7056 v->add_val = *add_val;
7057 v->benefit = first_benefit;
7058 v->cant_derive = 0;
7059 v->derive_adjustment = 0;
7060 v->ext_dependent = NULL_RTX;
7062 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
7063 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
7065 count = regs->array[REGNO (dest_reg)].n_times_set - 1;
7067 while (count > 0)
7069 p = NEXT_INSN (p);
7070 code = GET_CODE (p);
7072 /* If libcall, skip to end of call sequence. */
7073 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
7074 p = XEXP (temp, 0);
7076 if (code == INSN
7077 && (set = single_set (p))
7078 && GET_CODE (SET_DEST (set)) == REG
7079 && SET_DEST (set) == dest_reg
7080 && (general_induction_var (loop, SET_SRC (set), &src_reg,
7081 add_val, mult_val, ext_val, 0,
7082 &benefit, VOIDmode)
7083 /* Giv created by equivalent expression. */
7084 || ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
7085 && general_induction_var (loop, XEXP (temp, 0), &src_reg,
7086 add_val, mult_val, ext_val, 0,
7087 &benefit, VOIDmode)))
7088 && src_reg == v->src_reg)
7090 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
7091 benefit += libcall_benefit (p);
7093 count--;
7094 v->mult_val = *mult_val;
7095 v->add_val = *add_val;
7096 v->benefit += benefit;
7098 else if (code != NOTE)
7100 /* Allow insns that set something other than this giv to a
7101 constant. Such insns are needed on machines which cannot
7102 include long constants and should not disqualify a giv. */
7103 if (code == INSN
7104 && (set = single_set (p))
7105 && SET_DEST (set) != dest_reg
7106 && CONSTANT_P (SET_SRC (set)))
7107 continue;
7109 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7110 return 0;
7114 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7115 *last_consec_insn = p;
7116 return v->benefit;
7119 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7120 represented by G1. If no such expression can be found, or it is clear that
7121 it cannot possibly be a valid address, 0 is returned.
7123 To perform the computation, we note that
7124 G1 = x * v + a and
7125 G2 = y * v + b
7126 where `v' is the biv.
7128 So G2 = (y/b) * G1 + (b - a*y/x).
7130 Note that MULT = y/x.
7132 Update: A and B are now allowed to be additive expressions such that
7133 B contains all variables in A. That is, computing B-A will not require
7134 subtracting variables. */
7136 static rtx
7137 express_from_1 (rtx a, rtx b, rtx mult)
7139 /* If MULT is zero, then A*MULT is zero, and our expression is B. */
7141 if (mult == const0_rtx)
7142 return b;
7144 /* If MULT is not 1, we cannot handle A with non-constants, since we
7145 would then be required to subtract multiples of the registers in A.
7146 This is theoretically possible, and may even apply to some Fortran
7147 constructs, but it is a lot of work and we do not attempt it here. */
7149 if (mult != const1_rtx && GET_CODE (a) != CONST_INT)
7150 return NULL_RTX;
7152 /* In general these structures are sorted top to bottom (down the PLUS
7153 chain), but not left to right across the PLUS. If B is a higher
7154 order giv than A, we can strip one level and recurse. If A is higher
7155 order, we'll eventually bail out, but won't know that until the end.
7156 If they are the same, we'll strip one level around this loop. */
7158 while (GET_CODE (a) == PLUS && GET_CODE (b) == PLUS)
7160 rtx ra, rb, oa, ob, tmp;
7162 ra = XEXP (a, 0), oa = XEXP (a, 1);
7163 if (GET_CODE (ra) == PLUS)
7164 tmp = ra, ra = oa, oa = tmp;
7166 rb = XEXP (b, 0), ob = XEXP (b, 1);
7167 if (GET_CODE (rb) == PLUS)
7168 tmp = rb, rb = ob, ob = tmp;
7170 if (rtx_equal_p (ra, rb))
7171 /* We matched: remove one reg completely. */
7172 a = oa, b = ob;
7173 else if (GET_CODE (ob) != PLUS && rtx_equal_p (ra, ob))
7174 /* An alternate match. */
7175 a = oa, b = rb;
7176 else if (GET_CODE (oa) != PLUS && rtx_equal_p (oa, rb))
7177 /* An alternate match. */
7178 a = ra, b = ob;
7179 else
7181 /* Indicates an extra register in B. Strip one level from B and
7182 recurse, hoping B was the higher order expression. */
7183 ob = express_from_1 (a, ob, mult);
7184 if (ob == NULL_RTX)
7185 return NULL_RTX;
7186 return gen_rtx_PLUS (GET_MODE (b), rb, ob);
7190 /* Here we are at the last level of A, go through the cases hoping to
7191 get rid of everything but a constant. */
7193 if (GET_CODE (a) == PLUS)
7195 rtx ra, oa;
7197 ra = XEXP (a, 0), oa = XEXP (a, 1);
7198 if (rtx_equal_p (oa, b))
7199 oa = ra;
7200 else if (!rtx_equal_p (ra, b))
7201 return NULL_RTX;
7203 if (GET_CODE (oa) != CONST_INT)
7204 return NULL_RTX;
7206 return GEN_INT (-INTVAL (oa) * INTVAL (mult));
7208 else if (GET_CODE (a) == CONST_INT)
7210 return plus_constant (b, -INTVAL (a) * INTVAL (mult));
7212 else if (CONSTANT_P (a))
7214 enum machine_mode mode_a = GET_MODE (a);
7215 enum machine_mode mode_b = GET_MODE (b);
7216 enum machine_mode mode = mode_b == VOIDmode ? mode_a : mode_b;
7217 return simplify_gen_binary (MINUS, mode, b, a);
7219 else if (GET_CODE (b) == PLUS)
7221 if (rtx_equal_p (a, XEXP (b, 0)))
7222 return XEXP (b, 1);
7223 else if (rtx_equal_p (a, XEXP (b, 1)))
7224 return XEXP (b, 0);
7225 else
7226 return NULL_RTX;
7228 else if (rtx_equal_p (a, b))
7229 return const0_rtx;
7231 return NULL_RTX;
7235 express_from (struct induction *g1, struct induction *g2)
7237 rtx mult, add;
7239 /* The value that G1 will be multiplied by must be a constant integer. Also,
7240 the only chance we have of getting a valid address is if b*c/a (see above
7241 for notation) is also an integer. */
7242 if (GET_CODE (g1->mult_val) == CONST_INT
7243 && GET_CODE (g2->mult_val) == CONST_INT)
7245 if (g1->mult_val == const0_rtx
7246 || (g1->mult_val == constm1_rtx
7247 && INTVAL (g2->mult_val)
7248 == (HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1))
7249 || INTVAL (g2->mult_val) % INTVAL (g1->mult_val) != 0)
7250 return NULL_RTX;
7251 mult = GEN_INT (INTVAL (g2->mult_val) / INTVAL (g1->mult_val));
7253 else if (rtx_equal_p (g1->mult_val, g2->mult_val))
7254 mult = const1_rtx;
7255 else
7257 /* ??? Find out if the one is a multiple of the other? */
7258 return NULL_RTX;
7261 add = express_from_1 (g1->add_val, g2->add_val, mult);
7262 if (add == NULL_RTX)
7264 /* Failed. If we've got a multiplication factor between G1 and G2,
7265 scale G1's addend and try again. */
7266 if (INTVAL (mult) > 1)
7268 rtx g1_add_val = g1->add_val;
7269 if (GET_CODE (g1_add_val) == MULT
7270 && GET_CODE (XEXP (g1_add_val, 1)) == CONST_INT)
7272 HOST_WIDE_INT m;
7273 m = INTVAL (mult) * INTVAL (XEXP (g1_add_val, 1));
7274 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val),
7275 XEXP (g1_add_val, 0), GEN_INT (m));
7277 else
7279 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val), g1_add_val,
7280 mult);
7283 add = express_from_1 (g1_add_val, g2->add_val, const1_rtx);
7286 if (add == NULL_RTX)
7287 return NULL_RTX;
7289 /* Form simplified final result. */
7290 if (mult == const0_rtx)
7291 return add;
7292 else if (mult == const1_rtx)
7293 mult = g1->dest_reg;
7294 else
7295 mult = gen_rtx_MULT (g2->mode, g1->dest_reg, mult);
7297 if (add == const0_rtx)
7298 return mult;
7299 else
7301 if (GET_CODE (add) == PLUS
7302 && CONSTANT_P (XEXP (add, 1)))
7304 rtx tem = XEXP (add, 1);
7305 mult = gen_rtx_PLUS (g2->mode, mult, XEXP (add, 0));
7306 add = tem;
7309 return gen_rtx_PLUS (g2->mode, mult, add);
7313 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7314 represented by G1. This indicates that G2 should be combined with G1 and
7315 that G2 can use (either directly or via an address expression) a register
7316 used to represent G1. */
7318 static rtx
7319 combine_givs_p (struct induction *g1, struct induction *g2)
7321 rtx comb, ret;
7323 /* With the introduction of ext dependent givs, we must care for modes.
7324 G2 must not use a wider mode than G1. */
7325 if (GET_MODE_SIZE (g1->mode) < GET_MODE_SIZE (g2->mode))
7326 return NULL_RTX;
7328 ret = comb = express_from (g1, g2);
7329 if (comb == NULL_RTX)
7330 return NULL_RTX;
7331 if (g1->mode != g2->mode)
7332 ret = gen_lowpart (g2->mode, comb);
7334 /* If these givs are identical, they can be combined. We use the results
7335 of express_from because the addends are not in a canonical form, so
7336 rtx_equal_p is a weaker test. */
7337 /* But don't combine a DEST_REG giv with a DEST_ADDR giv; we want the
7338 combination to be the other way round. */
7339 if (comb == g1->dest_reg
7340 && (g1->giv_type == DEST_REG || g2->giv_type == DEST_ADDR))
7342 return ret;
7345 /* If G2 can be expressed as a function of G1 and that function is valid
7346 as an address and no more expensive than using a register for G2,
7347 the expression of G2 in terms of G1 can be used. */
7348 if (ret != NULL_RTX
7349 && g2->giv_type == DEST_ADDR
7350 && memory_address_p (GET_MODE (g2->mem), ret))
7351 return ret;
7353 return NULL_RTX;
7356 /* Check each extension dependent giv in this class to see if its
7357 root biv is safe from wrapping in the interior mode, which would
7358 make the giv illegal. */
7360 static void
7361 check_ext_dependent_givs (const struct loop *loop, struct iv_class *bl)
7363 struct loop_info *loop_info = LOOP_INFO (loop);
7364 int ze_ok = 0, se_ok = 0, info_ok = 0;
7365 enum machine_mode biv_mode = GET_MODE (bl->biv->src_reg);
7366 HOST_WIDE_INT start_val;
7367 unsigned HOST_WIDE_INT u_end_val = 0;
7368 unsigned HOST_WIDE_INT u_start_val = 0;
7369 rtx incr = pc_rtx;
7370 struct induction *v;
7372 /* Make sure the iteration data is available. We must have
7373 constants in order to be certain of no overflow. */
7374 if (loop_info->n_iterations > 0
7375 && bl->initial_value
7376 && GET_CODE (bl->initial_value) == CONST_INT
7377 && (incr = biv_total_increment (bl))
7378 && GET_CODE (incr) == CONST_INT
7379 /* Make sure the host can represent the arithmetic. */
7380 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (biv_mode))
7382 unsigned HOST_WIDE_INT abs_incr, total_incr;
7383 HOST_WIDE_INT s_end_val;
7384 int neg_incr;
7386 info_ok = 1;
7387 start_val = INTVAL (bl->initial_value);
7388 u_start_val = start_val;
7390 neg_incr = 0, abs_incr = INTVAL (incr);
7391 if (INTVAL (incr) < 0)
7392 neg_incr = 1, abs_incr = -abs_incr;
7393 total_incr = abs_incr * loop_info->n_iterations;
7395 /* Check for host arithmetic overflow. */
7396 if (total_incr / loop_info->n_iterations == abs_incr)
7398 unsigned HOST_WIDE_INT u_max;
7399 HOST_WIDE_INT s_max;
7401 u_end_val = start_val + (neg_incr ? -total_incr : total_incr);
7402 s_end_val = u_end_val;
7403 u_max = GET_MODE_MASK (biv_mode);
7404 s_max = u_max >> 1;
7406 /* Check zero extension of biv ok. */
7407 if (start_val >= 0
7408 /* Check for host arithmetic overflow. */
7409 && (neg_incr
7410 ? u_end_val < u_start_val
7411 : u_end_val > u_start_val)
7412 /* Check for target arithmetic overflow. */
7413 && (neg_incr
7414 ? 1 /* taken care of with host overflow */
7415 : u_end_val <= u_max))
7417 ze_ok = 1;
7420 /* Check sign extension of biv ok. */
7421 /* ??? While it is true that overflow with signed and pointer
7422 arithmetic is undefined, I fear too many programmers don't
7423 keep this fact in mind -- myself included on occasion.
7424 So leave alone with the signed overflow optimizations. */
7425 if (start_val >= -s_max - 1
7426 /* Check for host arithmetic overflow. */
7427 && (neg_incr
7428 ? s_end_val < start_val
7429 : s_end_val > start_val)
7430 /* Check for target arithmetic overflow. */
7431 && (neg_incr
7432 ? s_end_val >= -s_max - 1
7433 : s_end_val <= s_max))
7435 se_ok = 1;
7440 /* If we know the BIV is compared at run-time against an
7441 invariant value, and the increment is +/- 1, we may also
7442 be able to prove that the BIV cannot overflow. */
7443 else if (bl->biv->src_reg == loop_info->iteration_var
7444 && loop_info->comparison_value
7445 && loop_invariant_p (loop, loop_info->comparison_value)
7446 && (incr = biv_total_increment (bl))
7447 && GET_CODE (incr) == CONST_INT)
7449 /* If the increment is +1, and the exit test is a <,
7450 the BIV cannot overflow. (For <=, we have the
7451 problematic case that the comparison value might
7452 be the maximum value of the range.) */
7453 if (INTVAL (incr) == 1)
7455 if (loop_info->comparison_code == LT)
7456 se_ok = ze_ok = 1;
7457 else if (loop_info->comparison_code == LTU)
7458 ze_ok = 1;
7461 /* Likewise for increment -1 and exit test >. */
7462 if (INTVAL (incr) == -1)
7464 if (loop_info->comparison_code == GT)
7465 se_ok = ze_ok = 1;
7466 else if (loop_info->comparison_code == GTU)
7467 ze_ok = 1;
7471 /* Invalidate givs that fail the tests. */
7472 for (v = bl->giv; v; v = v->next_iv)
7473 if (v->ext_dependent)
7475 enum rtx_code code = GET_CODE (v->ext_dependent);
7476 int ok = 0;
7478 switch (code)
7480 case SIGN_EXTEND:
7481 ok = se_ok;
7482 break;
7483 case ZERO_EXTEND:
7484 ok = ze_ok;
7485 break;
7487 case TRUNCATE:
7488 /* We don't know whether this value is being used as either
7489 signed or unsigned, so to safely truncate we must satisfy
7490 both. The initial check here verifies the BIV itself;
7491 once that is successful we may check its range wrt the
7492 derived GIV. This works only if we were able to determine
7493 constant start and end values above. */
7494 if (se_ok && ze_ok && info_ok)
7496 enum machine_mode outer_mode = GET_MODE (v->ext_dependent);
7497 unsigned HOST_WIDE_INT max = GET_MODE_MASK (outer_mode) >> 1;
7499 /* We know from the above that both endpoints are nonnegative,
7500 and that there is no wrapping. Verify that both endpoints
7501 are within the (signed) range of the outer mode. */
7502 if (u_start_val <= max && u_end_val <= max)
7503 ok = 1;
7505 break;
7507 default:
7508 abort ();
7511 if (ok)
7513 if (loop_dump_stream)
7515 fprintf (loop_dump_stream,
7516 "Verified ext dependent giv at %d of reg %d\n",
7517 INSN_UID (v->insn), bl->regno);
7520 else
7522 if (loop_dump_stream)
7524 const char *why;
7526 if (info_ok)
7527 why = "biv iteration values overflowed";
7528 else
7530 if (incr == pc_rtx)
7531 incr = biv_total_increment (bl);
7532 if (incr == const1_rtx)
7533 why = "biv iteration info incomplete; incr by 1";
7534 else
7535 why = "biv iteration info incomplete";
7538 fprintf (loop_dump_stream,
7539 "Failed ext dependent giv at %d, %s\n",
7540 INSN_UID (v->insn), why);
7542 v->ignore = 1;
7543 bl->all_reduced = 0;
7548 /* Generate a version of VALUE in a mode appropriate for initializing V. */
7551 extend_value_for_giv (struct induction *v, rtx value)
7553 rtx ext_dep = v->ext_dependent;
7555 if (! ext_dep)
7556 return value;
7558 /* Recall that check_ext_dependent_givs verified that the known bounds
7559 of a biv did not overflow or wrap with respect to the extension for
7560 the giv. Therefore, constants need no additional adjustment. */
7561 if (CONSTANT_P (value) && GET_MODE (value) == VOIDmode)
7562 return value;
7564 /* Otherwise, we must adjust the value to compensate for the
7565 differing modes of the biv and the giv. */
7566 return gen_rtx_fmt_e (GET_CODE (ext_dep), GET_MODE (ext_dep), value);
7569 struct combine_givs_stats
7571 int giv_number;
7572 int total_benefit;
7575 static int
7576 cmp_combine_givs_stats (const void *xp, const void *yp)
7578 const struct combine_givs_stats * const x =
7579 (const struct combine_givs_stats *) xp;
7580 const struct combine_givs_stats * const y =
7581 (const struct combine_givs_stats *) yp;
7582 int d;
7583 d = y->total_benefit - x->total_benefit;
7584 /* Stabilize the sort. */
7585 if (!d)
7586 d = x->giv_number - y->giv_number;
7587 return d;
7590 /* Check all pairs of givs for iv_class BL and see if any can be combined with
7591 any other. If so, point SAME to the giv combined with and set NEW_REG to
7592 be an expression (in terms of the other giv's DEST_REG) equivalent to the
7593 giv. Also, update BENEFIT and related fields for cost/benefit analysis. */
7595 static void
7596 combine_givs (struct loop_regs *regs, struct iv_class *bl)
7598 /* Additional benefit to add for being combined multiple times. */
7599 const int extra_benefit = 3;
7601 struct induction *g1, *g2, **giv_array;
7602 int i, j, k, giv_count;
7603 struct combine_givs_stats *stats;
7604 rtx *can_combine;
7606 /* Count givs, because bl->giv_count is incorrect here. */
7607 giv_count = 0;
7608 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7609 if (!g1->ignore)
7610 giv_count++;
7612 giv_array = alloca (giv_count * sizeof (struct induction *));
7613 i = 0;
7614 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7615 if (!g1->ignore)
7616 giv_array[i++] = g1;
7618 stats = xcalloc (giv_count, sizeof (*stats));
7619 can_combine = xcalloc (giv_count, giv_count * sizeof (rtx));
7621 for (i = 0; i < giv_count; i++)
7623 int this_benefit;
7624 rtx single_use;
7626 g1 = giv_array[i];
7627 stats[i].giv_number = i;
7629 /* If a DEST_REG GIV is used only once, do not allow it to combine
7630 with anything, for in doing so we will gain nothing that cannot
7631 be had by simply letting the GIV with which we would have combined
7632 to be reduced on its own. The losage shows up in particular with
7633 DEST_ADDR targets on hosts with reg+reg addressing, though it can
7634 be seen elsewhere as well. */
7635 if (g1->giv_type == DEST_REG
7636 && (single_use = regs->array[REGNO (g1->dest_reg)].single_usage)
7637 && single_use != const0_rtx)
7638 continue;
7640 this_benefit = g1->benefit;
7641 /* Add an additional weight for zero addends. */
7642 if (g1->no_const_addval)
7643 this_benefit += 1;
7645 for (j = 0; j < giv_count; j++)
7647 rtx this_combine;
7649 g2 = giv_array[j];
7650 if (g1 != g2
7651 && (this_combine = combine_givs_p (g1, g2)) != NULL_RTX)
7653 can_combine[i * giv_count + j] = this_combine;
7654 this_benefit += g2->benefit + extra_benefit;
7657 stats[i].total_benefit = this_benefit;
7660 /* Iterate, combining until we can't. */
7661 restart:
7662 qsort (stats, giv_count, sizeof (*stats), cmp_combine_givs_stats);
7664 if (loop_dump_stream)
7666 fprintf (loop_dump_stream, "Sorted combine statistics:\n");
7667 for (k = 0; k < giv_count; k++)
7669 g1 = giv_array[stats[k].giv_number];
7670 if (!g1->combined_with && !g1->same)
7671 fprintf (loop_dump_stream, " {%d, %d}",
7672 INSN_UID (giv_array[stats[k].giv_number]->insn),
7673 stats[k].total_benefit);
7675 putc ('\n', loop_dump_stream);
7678 for (k = 0; k < giv_count; k++)
7680 int g1_add_benefit = 0;
7682 i = stats[k].giv_number;
7683 g1 = giv_array[i];
7685 /* If it has already been combined, skip. */
7686 if (g1->combined_with || g1->same)
7687 continue;
7689 for (j = 0; j < giv_count; j++)
7691 g2 = giv_array[j];
7692 if (g1 != g2 && can_combine[i * giv_count + j]
7693 /* If it has already been combined, skip. */
7694 && ! g2->same && ! g2->combined_with)
7696 int l;
7698 g2->new_reg = can_combine[i * giv_count + j];
7699 g2->same = g1;
7700 /* For destination, we now may replace by mem expression instead
7701 of register. This changes the costs considerably, so add the
7702 compensation. */
7703 if (g2->giv_type == DEST_ADDR)
7704 g2->benefit = (g2->benefit + reg_address_cost
7705 - address_cost (g2->new_reg,
7706 GET_MODE (g2->mem)));
7707 g1->combined_with++;
7708 g1->lifetime += g2->lifetime;
7710 g1_add_benefit += g2->benefit;
7712 /* ??? The new final_[bg]iv_value code does a much better job
7713 of finding replaceable giv's, and hence this code may no
7714 longer be necessary. */
7715 if (! g2->replaceable && REG_USERVAR_P (g2->dest_reg))
7716 g1_add_benefit -= copy_cost;
7718 /* To help optimize the next set of combinations, remove
7719 this giv from the benefits of other potential mates. */
7720 for (l = 0; l < giv_count; ++l)
7722 int m = stats[l].giv_number;
7723 if (can_combine[m * giv_count + j])
7724 stats[l].total_benefit -= g2->benefit + extra_benefit;
7727 if (loop_dump_stream)
7728 fprintf (loop_dump_stream,
7729 "giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n",
7730 INSN_UID (g2->insn), INSN_UID (g1->insn),
7731 g1->benefit, g1_add_benefit, g1->lifetime);
7735 /* To help optimize the next set of combinations, remove
7736 this giv from the benefits of other potential mates. */
7737 if (g1->combined_with)
7739 for (j = 0; j < giv_count; ++j)
7741 int m = stats[j].giv_number;
7742 if (can_combine[m * giv_count + i])
7743 stats[j].total_benefit -= g1->benefit + extra_benefit;
7746 g1->benefit += g1_add_benefit;
7748 /* We've finished with this giv, and everything it touched.
7749 Restart the combination so that proper weights for the
7750 rest of the givs are properly taken into account. */
7751 /* ??? Ideally we would compact the arrays at this point, so
7752 as to not cover old ground. But sanely compacting
7753 can_combine is tricky. */
7754 goto restart;
7758 /* Clean up. */
7759 free (stats);
7760 free (can_combine);
7763 /* Generate sequence for REG = B * M + A. B is the initial value of
7764 the basic induction variable, M a multiplicative constant, A an
7765 additive constant and REG the destination register. */
7767 static rtx
7768 gen_add_mult (rtx b, rtx m, rtx a, rtx reg)
7770 rtx seq;
7771 rtx result;
7773 start_sequence ();
7774 /* Use unsigned arithmetic. */
7775 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7776 if (reg != result)
7777 emit_move_insn (reg, result);
7778 seq = get_insns ();
7779 end_sequence ();
7781 return seq;
7785 /* Update registers created in insn sequence SEQ. */
7787 static void
7788 loop_regs_update (const struct loop *loop ATTRIBUTE_UNUSED, rtx seq)
7790 rtx insn;
7792 /* Update register info for alias analysis. */
7794 insn = seq;
7795 while (insn != NULL_RTX)
7797 rtx set = single_set (insn);
7799 if (set && GET_CODE (SET_DEST (set)) == REG)
7800 record_base_value (REGNO (SET_DEST (set)), SET_SRC (set), 0);
7802 insn = NEXT_INSN (insn);
7807 /* EMIT code before BEFORE_BB/BEFORE_INSN to set REG = B * M + A. B
7808 is the initial value of the basic induction variable, M a
7809 multiplicative constant, A an additive constant and REG the
7810 destination register. */
7812 void
7813 loop_iv_add_mult_emit_before (const struct loop *loop, rtx b, rtx m, rtx a,
7814 rtx reg, basic_block before_bb, rtx before_insn)
7816 rtx seq;
7818 if (! before_insn)
7820 loop_iv_add_mult_hoist (loop, b, m, a, reg);
7821 return;
7824 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7825 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7827 /* Increase the lifetime of any invariants moved further in code. */
7828 update_reg_last_use (a, before_insn);
7829 update_reg_last_use (b, before_insn);
7830 update_reg_last_use (m, before_insn);
7832 /* It is possible that the expansion created lots of new registers.
7833 Iterate over the sequence we just created and record them all. We
7834 must do this before inserting the sequence. */
7835 loop_regs_update (loop, seq);
7837 loop_insn_emit_before (loop, before_bb, before_insn, seq);
7841 /* Emit insns in loop pre-header to set REG = B * M + A. B is the
7842 initial value of the basic induction variable, M a multiplicative
7843 constant, A an additive constant and REG the destination
7844 register. */
7846 void
7847 loop_iv_add_mult_sink (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
7849 rtx seq;
7851 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7852 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7854 /* Increase the lifetime of any invariants moved further in code.
7855 ???? Is this really necessary? */
7856 update_reg_last_use (a, loop->sink);
7857 update_reg_last_use (b, loop->sink);
7858 update_reg_last_use (m, loop->sink);
7860 /* It is possible that the expansion created lots of new registers.
7861 Iterate over the sequence we just created and record them all. We
7862 must do this before inserting the sequence. */
7863 loop_regs_update (loop, seq);
7865 loop_insn_sink (loop, seq);
7869 /* Emit insns after loop to set REG = B * M + A. B is the initial
7870 value of the basic induction variable, M a multiplicative constant,
7871 A an additive constant and REG the destination register. */
7873 void
7874 loop_iv_add_mult_hoist (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
7876 rtx seq;
7878 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7879 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7881 /* It is possible that the expansion created lots of new registers.
7882 Iterate over the sequence we just created and record them all. We
7883 must do this before inserting the sequence. */
7884 loop_regs_update (loop, seq);
7886 loop_insn_hoist (loop, seq);
7891 /* Similar to gen_add_mult, but compute cost rather than generating
7892 sequence. */
7894 static int
7895 iv_add_mult_cost (rtx b, rtx m, rtx a, rtx reg)
7897 int cost = 0;
7898 rtx last, result;
7900 start_sequence ();
7901 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7902 if (reg != result)
7903 emit_move_insn (reg, result);
7904 last = get_last_insn ();
7905 while (last)
7907 rtx t = single_set (last);
7908 if (t)
7909 cost += rtx_cost (SET_SRC (t), SET);
7910 last = PREV_INSN (last);
7912 end_sequence ();
7913 return cost;
7916 /* Test whether A * B can be computed without
7917 an actual multiply insn. Value is 1 if so.
7919 ??? This function stinks because it generates a ton of wasted RTL
7920 ??? and as a result fragments GC memory to no end. There are other
7921 ??? places in the compiler which are invoked a lot and do the same
7922 ??? thing, generate wasted RTL just to see if something is possible. */
7924 static int
7925 product_cheap_p (rtx a, rtx b)
7927 rtx tmp;
7928 int win, n_insns;
7930 /* If only one is constant, make it B. */
7931 if (GET_CODE (a) == CONST_INT)
7932 tmp = a, a = b, b = tmp;
7934 /* If first constant, both constant, so don't need multiply. */
7935 if (GET_CODE (a) == CONST_INT)
7936 return 1;
7938 /* If second not constant, neither is constant, so would need multiply. */
7939 if (GET_CODE (b) != CONST_INT)
7940 return 0;
7942 /* One operand is constant, so might not need multiply insn. Generate the
7943 code for the multiply and see if a call or multiply, or long sequence
7944 of insns is generated. */
7946 start_sequence ();
7947 expand_mult (GET_MODE (a), a, b, NULL_RTX, 1);
7948 tmp = get_insns ();
7949 end_sequence ();
7951 win = 1;
7952 if (INSN_P (tmp))
7954 n_insns = 0;
7955 while (tmp != NULL_RTX)
7957 rtx next = NEXT_INSN (tmp);
7959 if (++n_insns > 3
7960 || GET_CODE (tmp) != INSN
7961 || (GET_CODE (PATTERN (tmp)) == SET
7962 && GET_CODE (SET_SRC (PATTERN (tmp))) == MULT)
7963 || (GET_CODE (PATTERN (tmp)) == PARALLEL
7964 && GET_CODE (XVECEXP (PATTERN (tmp), 0, 0)) == SET
7965 && GET_CODE (SET_SRC (XVECEXP (PATTERN (tmp), 0, 0))) == MULT))
7967 win = 0;
7968 break;
7971 tmp = next;
7974 else if (GET_CODE (tmp) == SET
7975 && GET_CODE (SET_SRC (tmp)) == MULT)
7976 win = 0;
7977 else if (GET_CODE (tmp) == PARALLEL
7978 && GET_CODE (XVECEXP (tmp, 0, 0)) == SET
7979 && GET_CODE (SET_SRC (XVECEXP (tmp, 0, 0))) == MULT)
7980 win = 0;
7982 return win;
7985 /* Check to see if loop can be terminated by a "decrement and branch until
7986 zero" instruction. If so, add a REG_NONNEG note to the branch insn if so.
7987 Also try reversing an increment loop to a decrement loop
7988 to see if the optimization can be performed.
7989 Value is nonzero if optimization was performed. */
7991 /* This is useful even if the architecture doesn't have such an insn,
7992 because it might change a loops which increments from 0 to n to a loop
7993 which decrements from n to 0. A loop that decrements to zero is usually
7994 faster than one that increments from zero. */
7996 /* ??? This could be rewritten to use some of the loop unrolling procedures,
7997 such as approx_final_value, biv_total_increment, loop_iterations, and
7998 final_[bg]iv_value. */
8000 static int
8001 check_dbra_loop (struct loop *loop, int insn_count)
8003 struct loop_info *loop_info = LOOP_INFO (loop);
8004 struct loop_regs *regs = LOOP_REGS (loop);
8005 struct loop_ivs *ivs = LOOP_IVS (loop);
8006 struct iv_class *bl;
8007 rtx reg;
8008 rtx jump_label;
8009 rtx final_value;
8010 rtx start_value;
8011 rtx new_add_val;
8012 rtx comparison;
8013 rtx before_comparison;
8014 rtx p;
8015 rtx jump;
8016 rtx first_compare;
8017 int compare_and_branch;
8018 rtx loop_start = loop->start;
8019 rtx loop_end = loop->end;
8021 /* If last insn is a conditional branch, and the insn before tests a
8022 register value, try to optimize it. Otherwise, we can't do anything. */
8024 jump = PREV_INSN (loop_end);
8025 comparison = get_condition_for_loop (loop, jump);
8026 if (comparison == 0)
8027 return 0;
8028 if (!onlyjump_p (jump))
8029 return 0;
8031 /* Try to compute whether the compare/branch at the loop end is one or
8032 two instructions. */
8033 get_condition (jump, &first_compare, false);
8034 if (first_compare == jump)
8035 compare_and_branch = 1;
8036 else if (first_compare == prev_nonnote_insn (jump))
8037 compare_and_branch = 2;
8038 else
8039 return 0;
8042 /* If more than one condition is present to control the loop, then
8043 do not proceed, as this function does not know how to rewrite
8044 loop tests with more than one condition.
8046 Look backwards from the first insn in the last comparison
8047 sequence and see if we've got another comparison sequence. */
8049 rtx jump1;
8050 if ((jump1 = prev_nonnote_insn (first_compare)) != loop->cont)
8051 if (GET_CODE (jump1) == JUMP_INSN)
8052 return 0;
8055 /* Check all of the bivs to see if the compare uses one of them.
8056 Skip biv's set more than once because we can't guarantee that
8057 it will be zero on the last iteration. Also skip if the biv is
8058 used between its update and the test insn. */
8060 for (bl = ivs->list; bl; bl = bl->next)
8062 if (bl->biv_count == 1
8063 && ! bl->biv->maybe_multiple
8064 && bl->biv->dest_reg == XEXP (comparison, 0)
8065 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
8066 first_compare))
8067 break;
8070 /* Try swapping the comparison to identify a suitable biv. */
8071 if (!bl)
8072 for (bl = ivs->list; bl; bl = bl->next)
8073 if (bl->biv_count == 1
8074 && ! bl->biv->maybe_multiple
8075 && bl->biv->dest_reg == XEXP (comparison, 1)
8076 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
8077 first_compare))
8079 comparison = gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)),
8080 VOIDmode,
8081 XEXP (comparison, 1),
8082 XEXP (comparison, 0));
8083 break;
8086 if (! bl)
8087 return 0;
8089 /* Look for the case where the basic induction variable is always
8090 nonnegative, and equals zero on the last iteration.
8091 In this case, add a reg_note REG_NONNEG, which allows the
8092 m68k DBRA instruction to be used. */
8094 if (((GET_CODE (comparison) == GT && XEXP (comparison, 1) == constm1_rtx)
8095 || (GET_CODE (comparison) == NE && XEXP (comparison, 1) == const0_rtx))
8096 && GET_CODE (bl->biv->add_val) == CONST_INT
8097 && INTVAL (bl->biv->add_val) < 0)
8099 /* Initial value must be greater than 0,
8100 init_val % -dec_value == 0 to ensure that it equals zero on
8101 the last iteration */
8103 if (GET_CODE (bl->initial_value) == CONST_INT
8104 && INTVAL (bl->initial_value) > 0
8105 && (INTVAL (bl->initial_value)
8106 % (-INTVAL (bl->biv->add_val))) == 0)
8108 /* register always nonnegative, add REG_NOTE to branch */
8109 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8110 REG_NOTES (jump)
8111 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8112 REG_NOTES (jump));
8113 bl->nonneg = 1;
8115 return 1;
8118 /* If the decrement is 1 and the value was tested as >= 0 before
8119 the loop, then we can safely optimize. */
8120 for (p = loop_start; p; p = PREV_INSN (p))
8122 if (GET_CODE (p) == CODE_LABEL)
8123 break;
8124 if (GET_CODE (p) != JUMP_INSN)
8125 continue;
8127 before_comparison = get_condition_for_loop (loop, p);
8128 if (before_comparison
8129 && XEXP (before_comparison, 0) == bl->biv->dest_reg
8130 && (GET_CODE (before_comparison) == LT
8131 || GET_CODE (before_comparison) == LTU)
8132 && XEXP (before_comparison, 1) == const0_rtx
8133 && ! reg_set_between_p (bl->biv->dest_reg, p, loop_start)
8134 && INTVAL (bl->biv->add_val) == -1)
8136 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8137 REG_NOTES (jump)
8138 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8139 REG_NOTES (jump));
8140 bl->nonneg = 1;
8142 return 1;
8146 else if (GET_CODE (bl->biv->add_val) == CONST_INT
8147 && INTVAL (bl->biv->add_val) > 0)
8149 /* Try to change inc to dec, so can apply above optimization. */
8150 /* Can do this if:
8151 all registers modified are induction variables or invariant,
8152 all memory references have non-overlapping addresses
8153 (obviously true if only one write)
8154 allow 2 insns for the compare/jump at the end of the loop. */
8155 /* Also, we must avoid any instructions which use both the reversed
8156 biv and another biv. Such instructions will fail if the loop is
8157 reversed. We meet this condition by requiring that either
8158 no_use_except_counting is true, or else that there is only
8159 one biv. */
8160 int num_nonfixed_reads = 0;
8161 /* 1 if the iteration var is used only to count iterations. */
8162 int no_use_except_counting = 0;
8163 /* 1 if the loop has no memory store, or it has a single memory store
8164 which is reversible. */
8165 int reversible_mem_store = 1;
8167 if (bl->giv_count == 0
8168 && !loop->exit_count
8169 && !loop_info->has_multiple_exit_targets)
8171 rtx bivreg = regno_reg_rtx[bl->regno];
8172 struct iv_class *blt;
8174 /* If there are no givs for this biv, and the only exit is the
8175 fall through at the end of the loop, then
8176 see if perhaps there are no uses except to count. */
8177 no_use_except_counting = 1;
8178 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8179 if (INSN_P (p))
8181 rtx set = single_set (p);
8183 if (set && GET_CODE (SET_DEST (set)) == REG
8184 && REGNO (SET_DEST (set)) == bl->regno)
8185 /* An insn that sets the biv is okay. */
8187 else if (!reg_mentioned_p (bivreg, PATTERN (p)))
8188 /* An insn that doesn't mention the biv is okay. */
8190 else if (p == prev_nonnote_insn (prev_nonnote_insn (loop_end))
8191 || p == prev_nonnote_insn (loop_end))
8193 /* If either of these insns uses the biv and sets a pseudo
8194 that has more than one usage, then the biv has uses
8195 other than counting since it's used to derive a value
8196 that is used more than one time. */
8197 note_stores (PATTERN (p), note_set_pseudo_multiple_uses,
8198 regs);
8199 if (regs->multiple_uses)
8201 no_use_except_counting = 0;
8202 break;
8205 else
8207 no_use_except_counting = 0;
8208 break;
8212 /* A biv has uses besides counting if it is used to set
8213 another biv. */
8214 for (blt = ivs->list; blt; blt = blt->next)
8215 if (blt->init_set
8216 && reg_mentioned_p (bivreg, SET_SRC (blt->init_set)))
8218 no_use_except_counting = 0;
8219 break;
8223 if (no_use_except_counting)
8224 /* No need to worry about MEMs. */
8226 else if (loop_info->num_mem_sets <= 1)
8228 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8229 if (INSN_P (p))
8230 num_nonfixed_reads += count_nonfixed_reads (loop, PATTERN (p));
8232 /* If the loop has a single store, and the destination address is
8233 invariant, then we can't reverse the loop, because this address
8234 might then have the wrong value at loop exit.
8235 This would work if the source was invariant also, however, in that
8236 case, the insn should have been moved out of the loop. */
8238 if (loop_info->num_mem_sets == 1)
8240 struct induction *v;
8242 /* If we could prove that each of the memory locations
8243 written to was different, then we could reverse the
8244 store -- but we don't presently have any way of
8245 knowing that. */
8246 reversible_mem_store = 0;
8248 /* If the store depends on a register that is set after the
8249 store, it depends on the initial value, and is thus not
8250 reversible. */
8251 for (v = bl->giv; reversible_mem_store && v; v = v->next_iv)
8253 if (v->giv_type == DEST_REG
8254 && reg_mentioned_p (v->dest_reg,
8255 PATTERN (loop_info->first_loop_store_insn))
8256 && loop_insn_first_p (loop_info->first_loop_store_insn,
8257 v->insn))
8258 reversible_mem_store = 0;
8262 else
8263 return 0;
8265 /* This code only acts for innermost loops. Also it simplifies
8266 the memory address check by only reversing loops with
8267 zero or one memory access.
8268 Two memory accesses could involve parts of the same array,
8269 and that can't be reversed.
8270 If the biv is used only for counting, than we don't need to worry
8271 about all these things. */
8273 if ((num_nonfixed_reads <= 1
8274 && ! loop_info->has_nonconst_call
8275 && ! loop_info->has_prefetch
8276 && ! loop_info->has_volatile
8277 && reversible_mem_store
8278 && (bl->giv_count + bl->biv_count + loop_info->num_mem_sets
8279 + num_unmoved_movables (loop) + compare_and_branch == insn_count)
8280 && (bl == ivs->list && bl->next == 0))
8281 || (no_use_except_counting && ! loop_info->has_prefetch))
8283 rtx tem;
8285 /* Loop can be reversed. */
8286 if (loop_dump_stream)
8287 fprintf (loop_dump_stream, "Can reverse loop\n");
8289 /* Now check other conditions:
8291 The increment must be a constant, as must the initial value,
8292 and the comparison code must be LT.
8294 This test can probably be improved since +/- 1 in the constant
8295 can be obtained by changing LT to LE and vice versa; this is
8296 confusing. */
8298 if (comparison
8299 /* for constants, LE gets turned into LT */
8300 && (GET_CODE (comparison) == LT
8301 || (GET_CODE (comparison) == LE
8302 && no_use_except_counting)
8303 || GET_CODE (comparison) == LTU))
8305 HOST_WIDE_INT add_val, add_adjust, comparison_val = 0;
8306 rtx initial_value, comparison_value;
8307 int nonneg = 0;
8308 enum rtx_code cmp_code;
8309 int comparison_const_width;
8310 unsigned HOST_WIDE_INT comparison_sign_mask;
8312 add_val = INTVAL (bl->biv->add_val);
8313 comparison_value = XEXP (comparison, 1);
8314 if (GET_MODE (comparison_value) == VOIDmode)
8315 comparison_const_width
8316 = GET_MODE_BITSIZE (GET_MODE (XEXP (comparison, 0)));
8317 else
8318 comparison_const_width
8319 = GET_MODE_BITSIZE (GET_MODE (comparison_value));
8320 if (comparison_const_width > HOST_BITS_PER_WIDE_INT)
8321 comparison_const_width = HOST_BITS_PER_WIDE_INT;
8322 comparison_sign_mask
8323 = (unsigned HOST_WIDE_INT) 1 << (comparison_const_width - 1);
8325 /* If the comparison value is not a loop invariant, then we
8326 can not reverse this loop.
8328 ??? If the insns which initialize the comparison value as
8329 a whole compute an invariant result, then we could move
8330 them out of the loop and proceed with loop reversal. */
8331 if (! loop_invariant_p (loop, comparison_value))
8332 return 0;
8334 if (GET_CODE (comparison_value) == CONST_INT)
8335 comparison_val = INTVAL (comparison_value);
8336 initial_value = bl->initial_value;
8338 /* Normalize the initial value if it is an integer and
8339 has no other use except as a counter. This will allow
8340 a few more loops to be reversed. */
8341 if (no_use_except_counting
8342 && GET_CODE (comparison_value) == CONST_INT
8343 && GET_CODE (initial_value) == CONST_INT)
8345 comparison_val = comparison_val - INTVAL (bl->initial_value);
8346 /* The code below requires comparison_val to be a multiple
8347 of add_val in order to do the loop reversal, so
8348 round up comparison_val to a multiple of add_val.
8349 Since comparison_value is constant, we know that the
8350 current comparison code is LT. */
8351 comparison_val = comparison_val + add_val - 1;
8352 comparison_val
8353 -= (unsigned HOST_WIDE_INT) comparison_val % add_val;
8354 /* We postpone overflow checks for COMPARISON_VAL here;
8355 even if there is an overflow, we might still be able to
8356 reverse the loop, if converting the loop exit test to
8357 NE is possible. */
8358 initial_value = const0_rtx;
8361 /* First check if we can do a vanilla loop reversal. */
8362 if (initial_value == const0_rtx
8363 /* If we have a decrement_and_branch_on_count,
8364 prefer the NE test, since this will allow that
8365 instruction to be generated. Note that we must
8366 use a vanilla loop reversal if the biv is used to
8367 calculate a giv or has a non-counting use. */
8368 #if ! defined (HAVE_decrement_and_branch_until_zero) \
8369 && defined (HAVE_decrement_and_branch_on_count)
8370 && (! (add_val == 1 && loop->vtop
8371 && (bl->biv_count == 0
8372 || no_use_except_counting)))
8373 #endif
8374 && GET_CODE (comparison_value) == CONST_INT
8375 /* Now do postponed overflow checks on COMPARISON_VAL. */
8376 && ! (((comparison_val - add_val) ^ INTVAL (comparison_value))
8377 & comparison_sign_mask))
8379 /* Register will always be nonnegative, with value
8380 0 on last iteration */
8381 add_adjust = add_val;
8382 nonneg = 1;
8383 cmp_code = GE;
8385 else if (add_val == 1 && loop->vtop
8386 && (bl->biv_count == 0
8387 || no_use_except_counting))
8389 add_adjust = 0;
8390 cmp_code = NE;
8392 else
8393 return 0;
8395 if (GET_CODE (comparison) == LE)
8396 add_adjust -= add_val;
8398 /* If the initial value is not zero, or if the comparison
8399 value is not an exact multiple of the increment, then we
8400 can not reverse this loop. */
8401 if (initial_value == const0_rtx
8402 && GET_CODE (comparison_value) == CONST_INT)
8404 if (((unsigned HOST_WIDE_INT) comparison_val % add_val) != 0)
8405 return 0;
8407 else
8409 if (! no_use_except_counting || add_val != 1)
8410 return 0;
8413 final_value = comparison_value;
8415 /* Reset these in case we normalized the initial value
8416 and comparison value above. */
8417 if (GET_CODE (comparison_value) == CONST_INT
8418 && GET_CODE (initial_value) == CONST_INT)
8420 comparison_value = GEN_INT (comparison_val);
8421 final_value
8422 = GEN_INT (comparison_val + INTVAL (bl->initial_value));
8424 bl->initial_value = initial_value;
8426 /* Save some info needed to produce the new insns. */
8427 reg = bl->biv->dest_reg;
8428 jump_label = condjump_label (PREV_INSN (loop_end));
8429 new_add_val = GEN_INT (-INTVAL (bl->biv->add_val));
8431 /* Set start_value; if this is not a CONST_INT, we need
8432 to generate a SUB.
8433 Initialize biv to start_value before loop start.
8434 The old initializing insn will be deleted as a
8435 dead store by flow.c. */
8436 if (initial_value == const0_rtx
8437 && GET_CODE (comparison_value) == CONST_INT)
8439 start_value = GEN_INT (comparison_val - add_adjust);
8440 loop_insn_hoist (loop, gen_move_insn (reg, start_value));
8442 else if (GET_CODE (initial_value) == CONST_INT)
8444 enum machine_mode mode = GET_MODE (reg);
8445 rtx offset = GEN_INT (-INTVAL (initial_value) - add_adjust);
8446 rtx add_insn = gen_add3_insn (reg, comparison_value, offset);
8448 if (add_insn == 0)
8449 return 0;
8451 start_value
8452 = gen_rtx_PLUS (mode, comparison_value, offset);
8453 loop_insn_hoist (loop, add_insn);
8454 if (GET_CODE (comparison) == LE)
8455 final_value = gen_rtx_PLUS (mode, comparison_value,
8456 GEN_INT (add_val));
8458 else if (! add_adjust)
8460 enum machine_mode mode = GET_MODE (reg);
8461 rtx sub_insn = gen_sub3_insn (reg, comparison_value,
8462 initial_value);
8464 if (sub_insn == 0)
8465 return 0;
8466 start_value
8467 = gen_rtx_MINUS (mode, comparison_value, initial_value);
8468 loop_insn_hoist (loop, sub_insn);
8470 else
8471 /* We could handle the other cases too, but it'll be
8472 better to have a testcase first. */
8473 return 0;
8475 /* We may not have a single insn which can increment a reg, so
8476 create a sequence to hold all the insns from expand_inc. */
8477 start_sequence ();
8478 expand_inc (reg, new_add_val);
8479 tem = get_insns ();
8480 end_sequence ();
8482 p = loop_insn_emit_before (loop, 0, bl->biv->insn, tem);
8483 delete_insn (bl->biv->insn);
8485 /* Update biv info to reflect its new status. */
8486 bl->biv->insn = p;
8487 bl->initial_value = start_value;
8488 bl->biv->add_val = new_add_val;
8490 /* Update loop info. */
8491 loop_info->initial_value = reg;
8492 loop_info->initial_equiv_value = reg;
8493 loop_info->final_value = const0_rtx;
8494 loop_info->final_equiv_value = const0_rtx;
8495 loop_info->comparison_value = const0_rtx;
8496 loop_info->comparison_code = cmp_code;
8497 loop_info->increment = new_add_val;
8499 /* Inc LABEL_NUSES so that delete_insn will
8500 not delete the label. */
8501 LABEL_NUSES (XEXP (jump_label, 0))++;
8503 /* Emit an insn after the end of the loop to set the biv's
8504 proper exit value if it is used anywhere outside the loop. */
8505 if ((REGNO_LAST_UID (bl->regno) != INSN_UID (first_compare))
8506 || ! bl->init_insn
8507 || REGNO_FIRST_UID (bl->regno) != INSN_UID (bl->init_insn))
8508 loop_insn_sink (loop, gen_load_of_final_value (reg, final_value));
8510 /* Delete compare/branch at end of loop. */
8511 delete_related_insns (PREV_INSN (loop_end));
8512 if (compare_and_branch == 2)
8513 delete_related_insns (first_compare);
8515 /* Add new compare/branch insn at end of loop. */
8516 start_sequence ();
8517 emit_cmp_and_jump_insns (reg, const0_rtx, cmp_code, NULL_RTX,
8518 GET_MODE (reg), 0,
8519 XEXP (jump_label, 0));
8520 tem = get_insns ();
8521 end_sequence ();
8522 emit_jump_insn_before (tem, loop_end);
8524 for (tem = PREV_INSN (loop_end);
8525 tem && GET_CODE (tem) != JUMP_INSN;
8526 tem = PREV_INSN (tem))
8529 if (tem)
8530 JUMP_LABEL (tem) = XEXP (jump_label, 0);
8532 if (nonneg)
8534 if (tem)
8536 /* Increment of LABEL_NUSES done above. */
8537 /* Register is now always nonnegative,
8538 so add REG_NONNEG note to the branch. */
8539 REG_NOTES (tem) = gen_rtx_EXPR_LIST (REG_NONNEG, reg,
8540 REG_NOTES (tem));
8542 bl->nonneg = 1;
8545 /* No insn may reference both the reversed and another biv or it
8546 will fail (see comment near the top of the loop reversal
8547 code).
8548 Earlier on, we have verified that the biv has no use except
8549 counting, or it is the only biv in this function.
8550 However, the code that computes no_use_except_counting does
8551 not verify reg notes. It's possible to have an insn that
8552 references another biv, and has a REG_EQUAL note with an
8553 expression based on the reversed biv. To avoid this case,
8554 remove all REG_EQUAL notes based on the reversed biv
8555 here. */
8556 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8557 if (INSN_P (p))
8559 rtx *pnote;
8560 rtx set = single_set (p);
8561 /* If this is a set of a GIV based on the reversed biv, any
8562 REG_EQUAL notes should still be correct. */
8563 if (! set
8564 || GET_CODE (SET_DEST (set)) != REG
8565 || (size_t) REGNO (SET_DEST (set)) >= ivs->n_regs
8566 || REG_IV_TYPE (ivs, REGNO (SET_DEST (set))) != GENERAL_INDUCT
8567 || REG_IV_INFO (ivs, REGNO (SET_DEST (set)))->src_reg != bl->biv->src_reg)
8568 for (pnote = &REG_NOTES (p); *pnote;)
8570 if (REG_NOTE_KIND (*pnote) == REG_EQUAL
8571 && reg_mentioned_p (regno_reg_rtx[bl->regno],
8572 XEXP (*pnote, 0)))
8573 *pnote = XEXP (*pnote, 1);
8574 else
8575 pnote = &XEXP (*pnote, 1);
8579 /* Mark that this biv has been reversed. Each giv which depends
8580 on this biv, and which is also live past the end of the loop
8581 will have to be fixed up. */
8583 bl->reversed = 1;
8585 if (loop_dump_stream)
8587 fprintf (loop_dump_stream, "Reversed loop");
8588 if (bl->nonneg)
8589 fprintf (loop_dump_stream, " and added reg_nonneg\n");
8590 else
8591 fprintf (loop_dump_stream, "\n");
8594 return 1;
8599 return 0;
8602 /* Verify whether the biv BL appears to be eliminable,
8603 based on the insns in the loop that refer to it.
8605 If ELIMINATE_P is nonzero, actually do the elimination.
8607 THRESHOLD and INSN_COUNT are from loop_optimize and are used to
8608 determine whether invariant insns should be placed inside or at the
8609 start of the loop. */
8611 static int
8612 maybe_eliminate_biv (const struct loop *loop, struct iv_class *bl,
8613 int eliminate_p, int threshold, int insn_count)
8615 struct loop_ivs *ivs = LOOP_IVS (loop);
8616 rtx reg = bl->biv->dest_reg;
8617 rtx p;
8619 /* Scan all insns in the loop, stopping if we find one that uses the
8620 biv in a way that we cannot eliminate. */
8622 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
8624 enum rtx_code code = GET_CODE (p);
8625 basic_block where_bb = 0;
8626 rtx where_insn = threshold >= insn_count ? 0 : p;
8627 rtx note;
8629 /* If this is a libcall that sets a giv, skip ahead to its end. */
8630 if (GET_RTX_CLASS (code) == 'i')
8632 note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
8634 if (note)
8636 rtx last = XEXP (note, 0);
8637 rtx set = single_set (last);
8639 if (set && GET_CODE (SET_DEST (set)) == REG)
8641 unsigned int regno = REGNO (SET_DEST (set));
8643 if (regno < ivs->n_regs
8644 && REG_IV_TYPE (ivs, regno) == GENERAL_INDUCT
8645 && REG_IV_INFO (ivs, regno)->src_reg == bl->biv->src_reg)
8646 p = last;
8651 /* Closely examine the insn if the biv is mentioned. */
8652 if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
8653 && reg_mentioned_p (reg, PATTERN (p))
8654 && ! maybe_eliminate_biv_1 (loop, PATTERN (p), p, bl,
8655 eliminate_p, where_bb, where_insn))
8657 if (loop_dump_stream)
8658 fprintf (loop_dump_stream,
8659 "Cannot eliminate biv %d: biv used in insn %d.\n",
8660 bl->regno, INSN_UID (p));
8661 break;
8664 /* If we are eliminating, kill REG_EQUAL notes mentioning the biv. */
8665 if (eliminate_p
8666 && (note = find_reg_note (p, REG_EQUAL, NULL_RTX)) != NULL_RTX
8667 && reg_mentioned_p (reg, XEXP (note, 0)))
8668 remove_note (p, note);
8671 if (p == loop->end)
8673 if (loop_dump_stream)
8674 fprintf (loop_dump_stream, "biv %d %s eliminated.\n",
8675 bl->regno, eliminate_p ? "was" : "can be");
8676 return 1;
8679 return 0;
8682 /* INSN and REFERENCE are instructions in the same insn chain.
8683 Return nonzero if INSN is first. */
8686 loop_insn_first_p (rtx insn, rtx reference)
8688 rtx p, q;
8690 for (p = insn, q = reference;;)
8692 /* Start with test for not first so that INSN == REFERENCE yields not
8693 first. */
8694 if (q == insn || ! p)
8695 return 0;
8696 if (p == reference || ! q)
8697 return 1;
8699 /* Either of P or Q might be a NOTE. Notes have the same LUID as the
8700 previous insn, hence the <= comparison below does not work if
8701 P is a note. */
8702 if (INSN_UID (p) < max_uid_for_loop
8703 && INSN_UID (q) < max_uid_for_loop
8704 && GET_CODE (p) != NOTE)
8705 return INSN_LUID (p) <= INSN_LUID (q);
8707 if (INSN_UID (p) >= max_uid_for_loop
8708 || GET_CODE (p) == NOTE)
8709 p = NEXT_INSN (p);
8710 if (INSN_UID (q) >= max_uid_for_loop)
8711 q = NEXT_INSN (q);
8715 /* We are trying to eliminate BIV in INSN using GIV. Return nonzero if
8716 the offset that we have to take into account due to auto-increment /
8717 div derivation is zero. */
8718 static int
8719 biv_elimination_giv_has_0_offset (struct induction *biv,
8720 struct induction *giv, rtx insn)
8722 /* If the giv V had the auto-inc address optimization applied
8723 to it, and INSN occurs between the giv insn and the biv
8724 insn, then we'd have to adjust the value used here.
8725 This is rare, so we don't bother to make this possible. */
8726 if (giv->auto_inc_opt
8727 && ((loop_insn_first_p (giv->insn, insn)
8728 && loop_insn_first_p (insn, biv->insn))
8729 || (loop_insn_first_p (biv->insn, insn)
8730 && loop_insn_first_p (insn, giv->insn))))
8731 return 0;
8733 return 1;
8736 /* If BL appears in X (part of the pattern of INSN), see if we can
8737 eliminate its use. If so, return 1. If not, return 0.
8739 If BIV does not appear in X, return 1.
8741 If ELIMINATE_P is nonzero, actually do the elimination.
8742 WHERE_INSN/WHERE_BB indicate where extra insns should be added.
8743 Depending on how many items have been moved out of the loop, it
8744 will either be before INSN (when WHERE_INSN is nonzero) or at the
8745 start of the loop (when WHERE_INSN is zero). */
8747 static int
8748 maybe_eliminate_biv_1 (const struct loop *loop, rtx x, rtx insn,
8749 struct iv_class *bl, int eliminate_p,
8750 basic_block where_bb, rtx where_insn)
8752 enum rtx_code code = GET_CODE (x);
8753 rtx reg = bl->biv->dest_reg;
8754 enum machine_mode mode = GET_MODE (reg);
8755 struct induction *v;
8756 rtx arg, tem;
8757 #ifdef HAVE_cc0
8758 rtx new;
8759 #endif
8760 int arg_operand;
8761 const char *fmt;
8762 int i, j;
8764 switch (code)
8766 case REG:
8767 /* If we haven't already been able to do something with this BIV,
8768 we can't eliminate it. */
8769 if (x == reg)
8770 return 0;
8771 return 1;
8773 case SET:
8774 /* If this sets the BIV, it is not a problem. */
8775 if (SET_DEST (x) == reg)
8776 return 1;
8778 /* If this is an insn that defines a giv, it is also ok because
8779 it will go away when the giv is reduced. */
8780 for (v = bl->giv; v; v = v->next_iv)
8781 if (v->giv_type == DEST_REG && SET_DEST (x) == v->dest_reg)
8782 return 1;
8784 #ifdef HAVE_cc0
8785 if (SET_DEST (x) == cc0_rtx && SET_SRC (x) == reg)
8787 /* Can replace with any giv that was reduced and
8788 that has (MULT_VAL != 0) and (ADD_VAL == 0).
8789 Require a constant for MULT_VAL, so we know it's nonzero.
8790 ??? We disable this optimization to avoid potential
8791 overflows. */
8793 for (v = bl->giv; v; v = v->next_iv)
8794 if (GET_CODE (v->mult_val) == CONST_INT && v->mult_val != const0_rtx
8795 && v->add_val == const0_rtx
8796 && ! v->ignore && ! v->maybe_dead && v->always_computable
8797 && v->mode == mode
8798 && 0)
8800 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8801 continue;
8803 if (! eliminate_p)
8804 return 1;
8806 /* If the giv has the opposite direction of change,
8807 then reverse the comparison. */
8808 if (INTVAL (v->mult_val) < 0)
8809 new = gen_rtx_COMPARE (GET_MODE (v->new_reg),
8810 const0_rtx, v->new_reg);
8811 else
8812 new = v->new_reg;
8814 /* We can probably test that giv's reduced reg. */
8815 if (validate_change (insn, &SET_SRC (x), new, 0))
8816 return 1;
8819 /* Look for a giv with (MULT_VAL != 0) and (ADD_VAL != 0);
8820 replace test insn with a compare insn (cmp REDUCED_GIV ADD_VAL).
8821 Require a constant for MULT_VAL, so we know it's nonzero.
8822 ??? Do this only if ADD_VAL is a pointer to avoid a potential
8823 overflow problem. */
8825 for (v = bl->giv; v; v = v->next_iv)
8826 if (GET_CODE (v->mult_val) == CONST_INT
8827 && v->mult_val != const0_rtx
8828 && ! v->ignore && ! v->maybe_dead && v->always_computable
8829 && v->mode == mode
8830 && (GET_CODE (v->add_val) == SYMBOL_REF
8831 || GET_CODE (v->add_val) == LABEL_REF
8832 || GET_CODE (v->add_val) == CONST
8833 || (GET_CODE (v->add_val) == REG
8834 && REG_POINTER (v->add_val))))
8836 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8837 continue;
8839 if (! eliminate_p)
8840 return 1;
8842 /* If the giv has the opposite direction of change,
8843 then reverse the comparison. */
8844 if (INTVAL (v->mult_val) < 0)
8845 new = gen_rtx_COMPARE (VOIDmode, copy_rtx (v->add_val),
8846 v->new_reg);
8847 else
8848 new = gen_rtx_COMPARE (VOIDmode, v->new_reg,
8849 copy_rtx (v->add_val));
8851 /* Replace biv with the giv's reduced register. */
8852 update_reg_last_use (v->add_val, insn);
8853 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8854 return 1;
8856 /* Insn doesn't support that constant or invariant. Copy it
8857 into a register (it will be a loop invariant.) */
8858 tem = gen_reg_rtx (GET_MODE (v->new_reg));
8860 loop_insn_emit_before (loop, 0, where_insn,
8861 gen_move_insn (tem,
8862 copy_rtx (v->add_val)));
8864 /* Substitute the new register for its invariant value in
8865 the compare expression. */
8866 XEXP (new, (INTVAL (v->mult_val) < 0) ? 0 : 1) = tem;
8867 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8868 return 1;
8871 #endif
8872 break;
8874 case COMPARE:
8875 case EQ: case NE:
8876 case GT: case GE: case GTU: case GEU:
8877 case LT: case LE: case LTU: case LEU:
8878 /* See if either argument is the biv. */
8879 if (XEXP (x, 0) == reg)
8880 arg = XEXP (x, 1), arg_operand = 1;
8881 else if (XEXP (x, 1) == reg)
8882 arg = XEXP (x, 0), arg_operand = 0;
8883 else
8884 break;
8886 if (CONSTANT_P (arg))
8888 /* First try to replace with any giv that has constant positive
8889 mult_val and constant add_val. We might be able to support
8890 negative mult_val, but it seems complex to do it in general. */
8892 for (v = bl->giv; v; v = v->next_iv)
8893 if (GET_CODE (v->mult_val) == CONST_INT
8894 && INTVAL (v->mult_val) > 0
8895 && (GET_CODE (v->add_val) == SYMBOL_REF
8896 || GET_CODE (v->add_val) == LABEL_REF
8897 || GET_CODE (v->add_val) == CONST
8898 || (GET_CODE (v->add_val) == REG
8899 && REG_POINTER (v->add_val)))
8900 && ! v->ignore && ! v->maybe_dead && v->always_computable
8901 && v->mode == mode)
8903 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8904 continue;
8906 /* Don't eliminate if the linear combination that makes up
8907 the giv overflows when it is applied to ARG. */
8908 if (GET_CODE (arg) == CONST_INT)
8910 rtx add_val;
8912 if (GET_CODE (v->add_val) == CONST_INT)
8913 add_val = v->add_val;
8914 else
8915 add_val = const0_rtx;
8917 if (const_mult_add_overflow_p (arg, v->mult_val,
8918 add_val, mode, 1))
8919 continue;
8922 if (! eliminate_p)
8923 return 1;
8925 /* Replace biv with the giv's reduced reg. */
8926 validate_change (insn, &XEXP (x, 1 - arg_operand), v->new_reg, 1);
8928 /* If all constants are actually constant integers and
8929 the derived constant can be directly placed in the COMPARE,
8930 do so. */
8931 if (GET_CODE (arg) == CONST_INT
8932 && GET_CODE (v->add_val) == CONST_INT)
8934 tem = expand_mult_add (arg, NULL_RTX, v->mult_val,
8935 v->add_val, mode, 1);
8937 else
8939 /* Otherwise, load it into a register. */
8940 tem = gen_reg_rtx (mode);
8941 loop_iv_add_mult_emit_before (loop, arg,
8942 v->mult_val, v->add_val,
8943 tem, where_bb, where_insn);
8946 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8948 if (apply_change_group ())
8949 return 1;
8952 /* Look for giv with positive constant mult_val and nonconst add_val.
8953 Insert insns to calculate new compare value.
8954 ??? Turn this off due to possible overflow. */
8956 for (v = bl->giv; v; v = v->next_iv)
8957 if (GET_CODE (v->mult_val) == CONST_INT
8958 && INTVAL (v->mult_val) > 0
8959 && ! v->ignore && ! v->maybe_dead && v->always_computable
8960 && v->mode == mode
8961 && 0)
8963 rtx tem;
8965 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8966 continue;
8968 if (! eliminate_p)
8969 return 1;
8971 tem = gen_reg_rtx (mode);
8973 /* Replace biv with giv's reduced register. */
8974 validate_change (insn, &XEXP (x, 1 - arg_operand),
8975 v->new_reg, 1);
8977 /* Compute value to compare against. */
8978 loop_iv_add_mult_emit_before (loop, arg,
8979 v->mult_val, v->add_val,
8980 tem, where_bb, where_insn);
8981 /* Use it in this insn. */
8982 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8983 if (apply_change_group ())
8984 return 1;
8987 else if (GET_CODE (arg) == REG || GET_CODE (arg) == MEM)
8989 if (loop_invariant_p (loop, arg) == 1)
8991 /* Look for giv with constant positive mult_val and nonconst
8992 add_val. Insert insns to compute new compare value.
8993 ??? Turn this off due to possible overflow. */
8995 for (v = bl->giv; v; v = v->next_iv)
8996 if (GET_CODE (v->mult_val) == CONST_INT && INTVAL (v->mult_val) > 0
8997 && ! v->ignore && ! v->maybe_dead && v->always_computable
8998 && v->mode == mode
8999 && 0)
9001 rtx tem;
9003 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
9004 continue;
9006 if (! eliminate_p)
9007 return 1;
9009 tem = gen_reg_rtx (mode);
9011 /* Replace biv with giv's reduced register. */
9012 validate_change (insn, &XEXP (x, 1 - arg_operand),
9013 v->new_reg, 1);
9015 /* Compute value to compare against. */
9016 loop_iv_add_mult_emit_before (loop, arg,
9017 v->mult_val, v->add_val,
9018 tem, where_bb, where_insn);
9019 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
9020 if (apply_change_group ())
9021 return 1;
9025 /* This code has problems. Basically, you can't know when
9026 seeing if we will eliminate BL, whether a particular giv
9027 of ARG will be reduced. If it isn't going to be reduced,
9028 we can't eliminate BL. We can try forcing it to be reduced,
9029 but that can generate poor code.
9031 The problem is that the benefit of reducing TV, below should
9032 be increased if BL can actually be eliminated, but this means
9033 we might have to do a topological sort of the order in which
9034 we try to process biv. It doesn't seem worthwhile to do
9035 this sort of thing now. */
9037 #if 0
9038 /* Otherwise the reg compared with had better be a biv. */
9039 if (GET_CODE (arg) != REG
9040 || REG_IV_TYPE (ivs, REGNO (arg)) != BASIC_INDUCT)
9041 return 0;
9043 /* Look for a pair of givs, one for each biv,
9044 with identical coefficients. */
9045 for (v = bl->giv; v; v = v->next_iv)
9047 struct induction *tv;
9049 if (v->ignore || v->maybe_dead || v->mode != mode)
9050 continue;
9052 for (tv = REG_IV_CLASS (ivs, REGNO (arg))->giv; tv;
9053 tv = tv->next_iv)
9054 if (! tv->ignore && ! tv->maybe_dead
9055 && rtx_equal_p (tv->mult_val, v->mult_val)
9056 && rtx_equal_p (tv->add_val, v->add_val)
9057 && tv->mode == mode)
9059 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
9060 continue;
9062 if (! eliminate_p)
9063 return 1;
9065 /* Replace biv with its giv's reduced reg. */
9066 XEXP (x, 1 - arg_operand) = v->new_reg;
9067 /* Replace other operand with the other giv's
9068 reduced reg. */
9069 XEXP (x, arg_operand) = tv->new_reg;
9070 return 1;
9073 #endif
9076 /* If we get here, the biv can't be eliminated. */
9077 return 0;
9079 case MEM:
9080 /* If this address is a DEST_ADDR giv, it doesn't matter if the
9081 biv is used in it, since it will be replaced. */
9082 for (v = bl->giv; v; v = v->next_iv)
9083 if (v->giv_type == DEST_ADDR && v->location == &XEXP (x, 0))
9084 return 1;
9085 break;
9087 default:
9088 break;
9091 /* See if any subexpression fails elimination. */
9092 fmt = GET_RTX_FORMAT (code);
9093 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
9095 switch (fmt[i])
9097 case 'e':
9098 if (! maybe_eliminate_biv_1 (loop, XEXP (x, i), insn, bl,
9099 eliminate_p, where_bb, where_insn))
9100 return 0;
9101 break;
9103 case 'E':
9104 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9105 if (! maybe_eliminate_biv_1 (loop, XVECEXP (x, i, j), insn, bl,
9106 eliminate_p, where_bb, where_insn))
9107 return 0;
9108 break;
9112 return 1;
9115 /* Return nonzero if the last use of REG
9116 is in an insn following INSN in the same basic block. */
9118 static int
9119 last_use_this_basic_block (rtx reg, rtx insn)
9121 rtx n;
9122 for (n = insn;
9123 n && GET_CODE (n) != CODE_LABEL && GET_CODE (n) != JUMP_INSN;
9124 n = NEXT_INSN (n))
9126 if (REGNO_LAST_UID (REGNO (reg)) == INSN_UID (n))
9127 return 1;
9129 return 0;
9132 /* Called via `note_stores' to record the initial value of a biv. Here we
9133 just record the location of the set and process it later. */
9135 static void
9136 record_initial (rtx dest, rtx set, void *data ATTRIBUTE_UNUSED)
9138 struct loop_ivs *ivs = (struct loop_ivs *) data;
9139 struct iv_class *bl;
9141 if (GET_CODE (dest) != REG
9142 || REGNO (dest) >= ivs->n_regs
9143 || REG_IV_TYPE (ivs, REGNO (dest)) != BASIC_INDUCT)
9144 return;
9146 bl = REG_IV_CLASS (ivs, REGNO (dest));
9148 /* If this is the first set found, record it. */
9149 if (bl->init_insn == 0)
9151 bl->init_insn = note_insn;
9152 bl->init_set = set;
9156 /* If any of the registers in X are "old" and currently have a last use earlier
9157 than INSN, update them to have a last use of INSN. Their actual last use
9158 will be the previous insn but it will not have a valid uid_luid so we can't
9159 use it. X must be a source expression only. */
9161 static void
9162 update_reg_last_use (rtx x, rtx insn)
9164 /* Check for the case where INSN does not have a valid luid. In this case,
9165 there is no need to modify the regno_last_uid, as this can only happen
9166 when code is inserted after the loop_end to set a pseudo's final value,
9167 and hence this insn will never be the last use of x.
9168 ???? This comment is not correct. See for example loop_givs_reduce.
9169 This may insert an insn before another new insn. */
9170 if (GET_CODE (x) == REG && REGNO (x) < max_reg_before_loop
9171 && INSN_UID (insn) < max_uid_for_loop
9172 && REGNO_LAST_LUID (REGNO (x)) < INSN_LUID (insn))
9174 REGNO_LAST_UID (REGNO (x)) = INSN_UID (insn);
9176 else
9178 int i, j;
9179 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
9180 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9182 if (fmt[i] == 'e')
9183 update_reg_last_use (XEXP (x, i), insn);
9184 else if (fmt[i] == 'E')
9185 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9186 update_reg_last_use (XVECEXP (x, i, j), insn);
9191 /* Given an insn INSN and condition COND, return the condition in a
9192 canonical form to simplify testing by callers. Specifically:
9194 (1) The code will always be a comparison operation (EQ, NE, GT, etc.).
9195 (2) Both operands will be machine operands; (cc0) will have been replaced.
9196 (3) If an operand is a constant, it will be the second operand.
9197 (4) (LE x const) will be replaced with (LT x <const+1>) and similarly
9198 for GE, GEU, and LEU.
9200 If the condition cannot be understood, or is an inequality floating-point
9201 comparison which needs to be reversed, 0 will be returned.
9203 If REVERSE is nonzero, then reverse the condition prior to canonizing it.
9205 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9206 insn used in locating the condition was found. If a replacement test
9207 of the condition is desired, it should be placed in front of that
9208 insn and we will be sure that the inputs are still valid.
9210 If WANT_REG is nonzero, we wish the condition to be relative to that
9211 register, if possible. Therefore, do not canonicalize the condition
9212 further. If ALLOW_CC_MODE is nonzero, allow the condition returned
9213 to be a compare to a CC mode register. */
9216 canonicalize_condition (rtx insn, rtx cond, int reverse, rtx *earliest,
9217 rtx want_reg, int allow_cc_mode)
9219 enum rtx_code code;
9220 rtx prev = insn;
9221 rtx set;
9222 rtx tem;
9223 rtx op0, op1;
9224 int reverse_code = 0;
9225 enum machine_mode mode;
9227 code = GET_CODE (cond);
9228 mode = GET_MODE (cond);
9229 op0 = XEXP (cond, 0);
9230 op1 = XEXP (cond, 1);
9232 if (reverse)
9233 code = reversed_comparison_code (cond, insn);
9234 if (code == UNKNOWN)
9235 return 0;
9237 if (earliest)
9238 *earliest = insn;
9240 /* If we are comparing a register with zero, see if the register is set
9241 in the previous insn to a COMPARE or a comparison operation. Perform
9242 the same tests as a function of STORE_FLAG_VALUE as find_comparison_args
9243 in cse.c */
9245 while (GET_RTX_CLASS (code) == '<'
9246 && op1 == CONST0_RTX (GET_MODE (op0))
9247 && op0 != want_reg)
9249 /* Set nonzero when we find something of interest. */
9250 rtx x = 0;
9252 #ifdef HAVE_cc0
9253 /* If comparison with cc0, import actual comparison from compare
9254 insn. */
9255 if (op0 == cc0_rtx)
9257 if ((prev = prev_nonnote_insn (prev)) == 0
9258 || GET_CODE (prev) != INSN
9259 || (set = single_set (prev)) == 0
9260 || SET_DEST (set) != cc0_rtx)
9261 return 0;
9263 op0 = SET_SRC (set);
9264 op1 = CONST0_RTX (GET_MODE (op0));
9265 if (earliest)
9266 *earliest = prev;
9268 #endif
9270 /* If this is a COMPARE, pick up the two things being compared. */
9271 if (GET_CODE (op0) == COMPARE)
9273 op1 = XEXP (op0, 1);
9274 op0 = XEXP (op0, 0);
9275 continue;
9277 else if (GET_CODE (op0) != REG)
9278 break;
9280 /* Go back to the previous insn. Stop if it is not an INSN. We also
9281 stop if it isn't a single set or if it has a REG_INC note because
9282 we don't want to bother dealing with it. */
9284 if ((prev = prev_nonnote_insn (prev)) == 0
9285 || GET_CODE (prev) != INSN
9286 || FIND_REG_INC_NOTE (prev, NULL_RTX))
9287 break;
9289 set = set_of (op0, prev);
9291 if (set
9292 && (GET_CODE (set) != SET
9293 || !rtx_equal_p (SET_DEST (set), op0)))
9294 break;
9296 /* If this is setting OP0, get what it sets it to if it looks
9297 relevant. */
9298 if (set)
9300 enum machine_mode inner_mode = GET_MODE (SET_DEST (set));
9301 #ifdef FLOAT_STORE_FLAG_VALUE
9302 REAL_VALUE_TYPE fsfv;
9303 #endif
9305 /* ??? We may not combine comparisons done in a CCmode with
9306 comparisons not done in a CCmode. This is to aid targets
9307 like Alpha that have an IEEE compliant EQ instruction, and
9308 a non-IEEE compliant BEQ instruction. The use of CCmode is
9309 actually artificial, simply to prevent the combination, but
9310 should not affect other platforms.
9312 However, we must allow VOIDmode comparisons to match either
9313 CCmode or non-CCmode comparison, because some ports have
9314 modeless comparisons inside branch patterns.
9316 ??? This mode check should perhaps look more like the mode check
9317 in simplify_comparison in combine. */
9319 if ((GET_CODE (SET_SRC (set)) == COMPARE
9320 || (((code == NE
9321 || (code == LT
9322 && GET_MODE_CLASS (inner_mode) == MODE_INT
9323 && (GET_MODE_BITSIZE (inner_mode)
9324 <= HOST_BITS_PER_WIDE_INT)
9325 && (STORE_FLAG_VALUE
9326 & ((HOST_WIDE_INT) 1
9327 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9328 #ifdef FLOAT_STORE_FLAG_VALUE
9329 || (code == LT
9330 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9331 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9332 REAL_VALUE_NEGATIVE (fsfv)))
9333 #endif
9335 && GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'))
9336 && (((GET_MODE_CLASS (mode) == MODE_CC)
9337 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9338 || mode == VOIDmode || inner_mode == VOIDmode))
9339 x = SET_SRC (set);
9340 else if (((code == EQ
9341 || (code == GE
9342 && (GET_MODE_BITSIZE (inner_mode)
9343 <= HOST_BITS_PER_WIDE_INT)
9344 && GET_MODE_CLASS (inner_mode) == MODE_INT
9345 && (STORE_FLAG_VALUE
9346 & ((HOST_WIDE_INT) 1
9347 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9348 #ifdef FLOAT_STORE_FLAG_VALUE
9349 || (code == GE
9350 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9351 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9352 REAL_VALUE_NEGATIVE (fsfv)))
9353 #endif
9355 && GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'
9356 && (((GET_MODE_CLASS (mode) == MODE_CC)
9357 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9358 || mode == VOIDmode || inner_mode == VOIDmode))
9361 reverse_code = 1;
9362 x = SET_SRC (set);
9364 else
9365 break;
9368 else if (reg_set_p (op0, prev))
9369 /* If this sets OP0, but not directly, we have to give up. */
9370 break;
9372 if (x)
9374 if (GET_RTX_CLASS (GET_CODE (x)) == '<')
9375 code = GET_CODE (x);
9376 if (reverse_code)
9378 code = reversed_comparison_code (x, prev);
9379 if (code == UNKNOWN)
9380 return 0;
9381 reverse_code = 0;
9384 op0 = XEXP (x, 0), op1 = XEXP (x, 1);
9385 if (earliest)
9386 *earliest = prev;
9390 /* If constant is first, put it last. */
9391 if (CONSTANT_P (op0))
9392 code = swap_condition (code), tem = op0, op0 = op1, op1 = tem;
9394 /* If OP0 is the result of a comparison, we weren't able to find what
9395 was really being compared, so fail. */
9396 if (!allow_cc_mode
9397 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
9398 return 0;
9400 /* Canonicalize any ordered comparison with integers involving equality
9401 if we can do computations in the relevant mode and we do not
9402 overflow. */
9404 if (GET_MODE_CLASS (GET_MODE (op0)) != MODE_CC
9405 && GET_CODE (op1) == CONST_INT
9406 && GET_MODE (op0) != VOIDmode
9407 && GET_MODE_BITSIZE (GET_MODE (op0)) <= HOST_BITS_PER_WIDE_INT)
9409 HOST_WIDE_INT const_val = INTVAL (op1);
9410 unsigned HOST_WIDE_INT uconst_val = const_val;
9411 unsigned HOST_WIDE_INT max_val
9412 = (unsigned HOST_WIDE_INT) GET_MODE_MASK (GET_MODE (op0));
9414 switch (code)
9416 case LE:
9417 if ((unsigned HOST_WIDE_INT) const_val != max_val >> 1)
9418 code = LT, op1 = gen_int_mode (const_val + 1, GET_MODE (op0));
9419 break;
9421 /* When cross-compiling, const_val might be sign-extended from
9422 BITS_PER_WORD to HOST_BITS_PER_WIDE_INT */
9423 case GE:
9424 if ((HOST_WIDE_INT) (const_val & max_val)
9425 != (((HOST_WIDE_INT) 1
9426 << (GET_MODE_BITSIZE (GET_MODE (op0)) - 1))))
9427 code = GT, op1 = gen_int_mode (const_val - 1, GET_MODE (op0));
9428 break;
9430 case LEU:
9431 if (uconst_val < max_val)
9432 code = LTU, op1 = gen_int_mode (uconst_val + 1, GET_MODE (op0));
9433 break;
9435 case GEU:
9436 if (uconst_val != 0)
9437 code = GTU, op1 = gen_int_mode (uconst_val - 1, GET_MODE (op0));
9438 break;
9440 default:
9441 break;
9445 /* Never return CC0; return zero instead. */
9446 if (CC0_P (op0))
9447 return 0;
9449 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
9452 /* Given a jump insn JUMP, return the condition that will cause it to branch
9453 to its JUMP_LABEL. If the condition cannot be understood, or is an
9454 inequality floating-point comparison which needs to be reversed, 0 will
9455 be returned.
9457 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9458 insn used in locating the condition was found. If a replacement test
9459 of the condition is desired, it should be placed in front of that
9460 insn and we will be sure that the inputs are still valid.
9462 If ALLOW_CC_MODE is nonzero, allow the condition returned to be a
9463 compare CC mode register. */
9466 get_condition (rtx jump, rtx *earliest, int allow_cc_mode)
9468 rtx cond;
9469 int reverse;
9470 rtx set;
9472 /* If this is not a standard conditional jump, we can't parse it. */
9473 if (GET_CODE (jump) != JUMP_INSN
9474 || ! any_condjump_p (jump))
9475 return 0;
9476 set = pc_set (jump);
9478 cond = XEXP (SET_SRC (set), 0);
9480 /* If this branches to JUMP_LABEL when the condition is false, reverse
9481 the condition. */
9482 reverse
9483 = GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF
9484 && XEXP (XEXP (SET_SRC (set), 2), 0) == JUMP_LABEL (jump);
9486 return canonicalize_condition (jump, cond, reverse, earliest, NULL_RTX,
9487 allow_cc_mode);
9490 /* Similar to above routine, except that we also put an invariant last
9491 unless both operands are invariants. */
9494 get_condition_for_loop (const struct loop *loop, rtx x)
9496 rtx comparison = get_condition (x, (rtx*) 0, false);
9498 if (comparison == 0
9499 || ! loop_invariant_p (loop, XEXP (comparison, 0))
9500 || loop_invariant_p (loop, XEXP (comparison, 1)))
9501 return comparison;
9503 return gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)), VOIDmode,
9504 XEXP (comparison, 1), XEXP (comparison, 0));
9507 /* Scan the function and determine whether it has indirect (computed) jumps.
9509 This is taken mostly from flow.c; similar code exists elsewhere
9510 in the compiler. It may be useful to put this into rtlanal.c. */
9511 static int
9512 indirect_jump_in_function_p (rtx start)
9514 rtx insn;
9516 for (insn = start; insn; insn = NEXT_INSN (insn))
9517 if (computed_jump_p (insn))
9518 return 1;
9520 return 0;
9523 /* Add MEM to the LOOP_MEMS array, if appropriate. See the
9524 documentation for LOOP_MEMS for the definition of `appropriate'.
9525 This function is called from prescan_loop via for_each_rtx. */
9527 static int
9528 insert_loop_mem (rtx *mem, void *data ATTRIBUTE_UNUSED)
9530 struct loop_info *loop_info = data;
9531 int i;
9532 rtx m = *mem;
9534 if (m == NULL_RTX)
9535 return 0;
9537 switch (GET_CODE (m))
9539 case MEM:
9540 break;
9542 case CLOBBER:
9543 /* We're not interested in MEMs that are only clobbered. */
9544 return -1;
9546 case CONST_DOUBLE:
9547 /* We're not interested in the MEM associated with a
9548 CONST_DOUBLE, so there's no need to traverse into this. */
9549 return -1;
9551 case EXPR_LIST:
9552 /* We're not interested in any MEMs that only appear in notes. */
9553 return -1;
9555 default:
9556 /* This is not a MEM. */
9557 return 0;
9560 /* See if we've already seen this MEM. */
9561 for (i = 0; i < loop_info->mems_idx; ++i)
9562 if (rtx_equal_p (m, loop_info->mems[i].mem))
9564 if (GET_MODE (m) != GET_MODE (loop_info->mems[i].mem))
9565 /* The modes of the two memory accesses are different. If
9566 this happens, something tricky is going on, and we just
9567 don't optimize accesses to this MEM. */
9568 loop_info->mems[i].optimize = 0;
9570 return 0;
9573 /* Resize the array, if necessary. */
9574 if (loop_info->mems_idx == loop_info->mems_allocated)
9576 if (loop_info->mems_allocated != 0)
9577 loop_info->mems_allocated *= 2;
9578 else
9579 loop_info->mems_allocated = 32;
9581 loop_info->mems = xrealloc (loop_info->mems,
9582 loop_info->mems_allocated * sizeof (loop_mem_info));
9585 /* Actually insert the MEM. */
9586 loop_info->mems[loop_info->mems_idx].mem = m;
9587 /* We can't hoist this MEM out of the loop if it's a BLKmode MEM
9588 because we can't put it in a register. We still store it in the
9589 table, though, so that if we see the same address later, but in a
9590 non-BLK mode, we'll not think we can optimize it at that point. */
9591 loop_info->mems[loop_info->mems_idx].optimize = (GET_MODE (m) != BLKmode);
9592 loop_info->mems[loop_info->mems_idx].reg = NULL_RTX;
9593 ++loop_info->mems_idx;
9595 return 0;
9599 /* Allocate REGS->ARRAY or reallocate it if it is too small.
9601 Increment REGS->ARRAY[I].SET_IN_LOOP at the index I of each
9602 register that is modified by an insn between FROM and TO. If the
9603 value of an element of REGS->array[I].SET_IN_LOOP becomes 127 or
9604 more, stop incrementing it, to avoid overflow.
9606 Store in REGS->ARRAY[I].SINGLE_USAGE the single insn in which
9607 register I is used, if it is only used once. Otherwise, it is set
9608 to 0 (for no uses) or const0_rtx for more than one use. This
9609 parameter may be zero, in which case this processing is not done.
9611 Set REGS->ARRAY[I].MAY_NOT_OPTIMIZE nonzero if we should not
9612 optimize register I. */
9614 static void
9615 loop_regs_scan (const struct loop *loop, int extra_size)
9617 struct loop_regs *regs = LOOP_REGS (loop);
9618 int old_nregs;
9619 /* last_set[n] is nonzero iff reg n has been set in the current
9620 basic block. In that case, it is the insn that last set reg n. */
9621 rtx *last_set;
9622 rtx insn;
9623 int i;
9625 old_nregs = regs->num;
9626 regs->num = max_reg_num ();
9628 /* Grow the regs array if not allocated or too small. */
9629 if (regs->num >= regs->size)
9631 regs->size = regs->num + extra_size;
9633 regs->array = xrealloc (regs->array, regs->size * sizeof (*regs->array));
9635 /* Zero the new elements. */
9636 memset (regs->array + old_nregs, 0,
9637 (regs->size - old_nregs) * sizeof (*regs->array));
9640 /* Clear previously scanned fields but do not clear n_times_set. */
9641 for (i = 0; i < old_nregs; i++)
9643 regs->array[i].set_in_loop = 0;
9644 regs->array[i].may_not_optimize = 0;
9645 regs->array[i].single_usage = NULL_RTX;
9648 last_set = xcalloc (regs->num, sizeof (rtx));
9650 /* Scan the loop, recording register usage. */
9651 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9652 insn = NEXT_INSN (insn))
9654 if (INSN_P (insn))
9656 /* Record registers that have exactly one use. */
9657 find_single_use_in_loop (regs, insn, PATTERN (insn));
9659 /* Include uses in REG_EQUAL notes. */
9660 if (REG_NOTES (insn))
9661 find_single_use_in_loop (regs, insn, REG_NOTES (insn));
9663 if (GET_CODE (PATTERN (insn)) == SET
9664 || GET_CODE (PATTERN (insn)) == CLOBBER)
9665 count_one_set (regs, insn, PATTERN (insn), last_set);
9666 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
9668 int i;
9669 for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
9670 count_one_set (regs, insn, XVECEXP (PATTERN (insn), 0, i),
9671 last_set);
9675 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9676 memset (last_set, 0, regs->num * sizeof (rtx));
9678 /* Invalidate all registers used for function argument passing.
9679 We check rtx_varies_p for the same reason as below, to allow
9680 optimizing PIC calculations. */
9681 if (GET_CODE (insn) == CALL_INSN)
9683 rtx link;
9684 for (link = CALL_INSN_FUNCTION_USAGE (insn);
9685 link;
9686 link = XEXP (link, 1))
9688 rtx op, reg;
9690 if (GET_CODE (op = XEXP (link, 0)) == USE
9691 && GET_CODE (reg = XEXP (op, 0)) == REG
9692 && rtx_varies_p (reg, 1))
9693 regs->array[REGNO (reg)].may_not_optimize = 1;
9698 /* Invalidate all hard registers clobbered by calls. With one exception:
9699 a call-clobbered PIC register is still function-invariant for our
9700 purposes, since we can hoist any PIC calculations out of the loop.
9701 Thus the call to rtx_varies_p. */
9702 if (LOOP_INFO (loop)->has_call)
9703 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
9704 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
9705 && rtx_varies_p (regno_reg_rtx[i], 1))
9707 regs->array[i].may_not_optimize = 1;
9708 regs->array[i].set_in_loop = 1;
9711 #ifdef AVOID_CCMODE_COPIES
9712 /* Don't try to move insns which set CC registers if we should not
9713 create CCmode register copies. */
9714 for (i = regs->num - 1; i >= FIRST_PSEUDO_REGISTER; i--)
9715 if (GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
9716 regs->array[i].may_not_optimize = 1;
9717 #endif
9719 /* Set regs->array[I].n_times_set for the new registers. */
9720 for (i = old_nregs; i < regs->num; i++)
9721 regs->array[i].n_times_set = regs->array[i].set_in_loop;
9723 free (last_set);
9726 /* Returns the number of real INSNs in the LOOP. */
9728 static int
9729 count_insns_in_loop (const struct loop *loop)
9731 int count = 0;
9732 rtx insn;
9734 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9735 insn = NEXT_INSN (insn))
9736 if (INSN_P (insn))
9737 ++count;
9739 return count;
9742 /* Move MEMs into registers for the duration of the loop. */
9744 static void
9745 load_mems (const struct loop *loop)
9747 struct loop_info *loop_info = LOOP_INFO (loop);
9748 struct loop_regs *regs = LOOP_REGS (loop);
9749 int maybe_never = 0;
9750 int i;
9751 rtx p, prev_ebb_head;
9752 rtx label = NULL_RTX;
9753 rtx end_label;
9754 /* Nonzero if the next instruction may never be executed. */
9755 int next_maybe_never = 0;
9756 unsigned int last_max_reg = max_reg_num ();
9758 if (loop_info->mems_idx == 0)
9759 return;
9761 /* We cannot use next_label here because it skips over normal insns. */
9762 end_label = next_nonnote_insn (loop->end);
9763 if (end_label && GET_CODE (end_label) != CODE_LABEL)
9764 end_label = NULL_RTX;
9766 /* Check to see if it's possible that some instructions in the loop are
9767 never executed. Also check if there is a goto out of the loop other
9768 than right after the end of the loop. */
9769 for (p = next_insn_in_loop (loop, loop->scan_start);
9770 p != NULL_RTX;
9771 p = next_insn_in_loop (loop, p))
9773 if (GET_CODE (p) == CODE_LABEL)
9774 maybe_never = 1;
9775 else if (GET_CODE (p) == JUMP_INSN
9776 /* If we enter the loop in the middle, and scan
9777 around to the beginning, don't set maybe_never
9778 for that. This must be an unconditional jump,
9779 otherwise the code at the top of the loop might
9780 never be executed. Unconditional jumps are
9781 followed a by barrier then loop end. */
9782 && ! (GET_CODE (p) == JUMP_INSN
9783 && JUMP_LABEL (p) == loop->top
9784 && NEXT_INSN (NEXT_INSN (p)) == loop->end
9785 && any_uncondjump_p (p)))
9787 /* If this is a jump outside of the loop but not right
9788 after the end of the loop, we would have to emit new fixup
9789 sequences for each such label. */
9790 if (/* If we can't tell where control might go when this
9791 JUMP_INSN is executed, we must be conservative. */
9792 !JUMP_LABEL (p)
9793 || (JUMP_LABEL (p) != end_label
9794 && (INSN_UID (JUMP_LABEL (p)) >= max_uid_for_loop
9795 || INSN_LUID (JUMP_LABEL (p)) < INSN_LUID (loop->start)
9796 || INSN_LUID (JUMP_LABEL (p)) > INSN_LUID (loop->end))))
9797 return;
9799 if (!any_condjump_p (p))
9800 /* Something complicated. */
9801 maybe_never = 1;
9802 else
9803 /* If there are any more instructions in the loop, they
9804 might not be reached. */
9805 next_maybe_never = 1;
9807 else if (next_maybe_never)
9808 maybe_never = 1;
9811 /* Find start of the extended basic block that enters the loop. */
9812 for (p = loop->start;
9813 PREV_INSN (p) && GET_CODE (p) != CODE_LABEL;
9814 p = PREV_INSN (p))
9816 prev_ebb_head = p;
9818 cselib_init ();
9820 /* Build table of mems that get set to constant values before the
9821 loop. */
9822 for (; p != loop->start; p = NEXT_INSN (p))
9823 cselib_process_insn (p);
9825 /* Actually move the MEMs. */
9826 for (i = 0; i < loop_info->mems_idx; ++i)
9828 regset_head load_copies;
9829 regset_head store_copies;
9830 int written = 0;
9831 rtx reg;
9832 rtx mem = loop_info->mems[i].mem;
9833 rtx mem_list_entry;
9835 if (MEM_VOLATILE_P (mem)
9836 || loop_invariant_p (loop, XEXP (mem, 0)) != 1)
9837 /* There's no telling whether or not MEM is modified. */
9838 loop_info->mems[i].optimize = 0;
9840 /* Go through the MEMs written to in the loop to see if this
9841 one is aliased by one of them. */
9842 mem_list_entry = loop_info->store_mems;
9843 while (mem_list_entry)
9845 if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
9846 written = 1;
9847 else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
9848 mem, rtx_varies_p))
9850 /* MEM is indeed aliased by this store. */
9851 loop_info->mems[i].optimize = 0;
9852 break;
9854 mem_list_entry = XEXP (mem_list_entry, 1);
9857 if (flag_float_store && written
9858 && GET_MODE_CLASS (GET_MODE (mem)) == MODE_FLOAT)
9859 loop_info->mems[i].optimize = 0;
9861 /* If this MEM is written to, we must be sure that there
9862 are no reads from another MEM that aliases this one. */
9863 if (loop_info->mems[i].optimize && written)
9865 int j;
9867 for (j = 0; j < loop_info->mems_idx; ++j)
9869 if (j == i)
9870 continue;
9871 else if (true_dependence (mem,
9872 VOIDmode,
9873 loop_info->mems[j].mem,
9874 rtx_varies_p))
9876 /* It's not safe to hoist loop_info->mems[i] out of
9877 the loop because writes to it might not be
9878 seen by reads from loop_info->mems[j]. */
9879 loop_info->mems[i].optimize = 0;
9880 break;
9885 if (maybe_never && may_trap_p (mem))
9886 /* We can't access the MEM outside the loop; it might
9887 cause a trap that wouldn't have happened otherwise. */
9888 loop_info->mems[i].optimize = 0;
9890 if (!loop_info->mems[i].optimize)
9891 /* We thought we were going to lift this MEM out of the
9892 loop, but later discovered that we could not. */
9893 continue;
9895 INIT_REG_SET (&load_copies);
9896 INIT_REG_SET (&store_copies);
9898 /* Allocate a pseudo for this MEM. We set REG_USERVAR_P in
9899 order to keep scan_loop from moving stores to this MEM
9900 out of the loop just because this REG is neither a
9901 user-variable nor used in the loop test. */
9902 reg = gen_reg_rtx (GET_MODE (mem));
9903 REG_USERVAR_P (reg) = 1;
9904 loop_info->mems[i].reg = reg;
9906 /* Now, replace all references to the MEM with the
9907 corresponding pseudos. */
9908 maybe_never = 0;
9909 for (p = next_insn_in_loop (loop, loop->scan_start);
9910 p != NULL_RTX;
9911 p = next_insn_in_loop (loop, p))
9913 if (INSN_P (p))
9915 rtx set;
9917 set = single_set (p);
9919 /* See if this copies the mem into a register that isn't
9920 modified afterwards. We'll try to do copy propagation
9921 a little further on. */
9922 if (set
9923 /* @@@ This test is _way_ too conservative. */
9924 && ! maybe_never
9925 && GET_CODE (SET_DEST (set)) == REG
9926 && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
9927 && REGNO (SET_DEST (set)) < last_max_reg
9928 && regs->array[REGNO (SET_DEST (set))].n_times_set == 1
9929 && rtx_equal_p (SET_SRC (set), mem))
9930 SET_REGNO_REG_SET (&load_copies, REGNO (SET_DEST (set)));
9932 /* See if this copies the mem from a register that isn't
9933 modified afterwards. We'll try to remove the
9934 redundant copy later on by doing a little register
9935 renaming and copy propagation. This will help
9936 to untangle things for the BIV detection code. */
9937 if (set
9938 && ! maybe_never
9939 && GET_CODE (SET_SRC (set)) == REG
9940 && REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER
9941 && REGNO (SET_SRC (set)) < last_max_reg
9942 && regs->array[REGNO (SET_SRC (set))].n_times_set == 1
9943 && rtx_equal_p (SET_DEST (set), mem))
9944 SET_REGNO_REG_SET (&store_copies, REGNO (SET_SRC (set)));
9946 /* If this is a call which uses / clobbers this memory
9947 location, we must not change the interface here. */
9948 if (GET_CODE (p) == CALL_INSN
9949 && reg_mentioned_p (loop_info->mems[i].mem,
9950 CALL_INSN_FUNCTION_USAGE (p)))
9952 cancel_changes (0);
9953 loop_info->mems[i].optimize = 0;
9954 break;
9956 else
9957 /* Replace the memory reference with the shadow register. */
9958 replace_loop_mems (p, loop_info->mems[i].mem,
9959 loop_info->mems[i].reg, written);
9962 if (GET_CODE (p) == CODE_LABEL
9963 || GET_CODE (p) == JUMP_INSN)
9964 maybe_never = 1;
9967 if (! loop_info->mems[i].optimize)
9968 ; /* We found we couldn't do the replacement, so do nothing. */
9969 else if (! apply_change_group ())
9970 /* We couldn't replace all occurrences of the MEM. */
9971 loop_info->mems[i].optimize = 0;
9972 else
9974 /* Load the memory immediately before LOOP->START, which is
9975 the NOTE_LOOP_BEG. */
9976 cselib_val *e = cselib_lookup (mem, VOIDmode, 0);
9977 rtx set;
9978 rtx best = mem;
9979 int j;
9980 struct elt_loc_list *const_equiv = 0;
9982 if (e)
9984 struct elt_loc_list *equiv;
9985 struct elt_loc_list *best_equiv = 0;
9986 for (equiv = e->locs; equiv; equiv = equiv->next)
9988 if (CONSTANT_P (equiv->loc))
9989 const_equiv = equiv;
9990 else if (GET_CODE (equiv->loc) == REG
9991 /* Extending hard register lifetimes causes crash
9992 on SRC targets. Doing so on non-SRC is
9993 probably also not good idea, since we most
9994 probably have pseudoregister equivalence as
9995 well. */
9996 && REGNO (equiv->loc) >= FIRST_PSEUDO_REGISTER)
9997 best_equiv = equiv;
9999 /* Use the constant equivalence if that is cheap enough. */
10000 if (! best_equiv)
10001 best_equiv = const_equiv;
10002 else if (const_equiv
10003 && (rtx_cost (const_equiv->loc, SET)
10004 <= rtx_cost (best_equiv->loc, SET)))
10006 best_equiv = const_equiv;
10007 const_equiv = 0;
10010 /* If best_equiv is nonzero, we know that MEM is set to a
10011 constant or register before the loop. We will use this
10012 knowledge to initialize the shadow register with that
10013 constant or reg rather than by loading from MEM. */
10014 if (best_equiv)
10015 best = copy_rtx (best_equiv->loc);
10018 set = gen_move_insn (reg, best);
10019 set = loop_insn_hoist (loop, set);
10020 if (REG_P (best))
10022 for (p = prev_ebb_head; p != loop->start; p = NEXT_INSN (p))
10023 if (REGNO_LAST_UID (REGNO (best)) == INSN_UID (p))
10025 REGNO_LAST_UID (REGNO (best)) = INSN_UID (set);
10026 break;
10030 if (const_equiv)
10031 set_unique_reg_note (set, REG_EQUAL, copy_rtx (const_equiv->loc));
10033 if (written)
10035 if (label == NULL_RTX)
10037 label = gen_label_rtx ();
10038 emit_label_after (label, loop->end);
10041 /* Store the memory immediately after END, which is
10042 the NOTE_LOOP_END. */
10043 set = gen_move_insn (copy_rtx (mem), reg);
10044 loop_insn_emit_after (loop, 0, label, set);
10047 if (loop_dump_stream)
10049 fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
10050 REGNO (reg), (written ? "r/w" : "r/o"));
10051 print_rtl (loop_dump_stream, mem);
10052 fputc ('\n', loop_dump_stream);
10055 /* Attempt a bit of copy propagation. This helps untangle the
10056 data flow, and enables {basic,general}_induction_var to find
10057 more bivs/givs. */
10058 EXECUTE_IF_SET_IN_REG_SET
10059 (&load_copies, FIRST_PSEUDO_REGISTER, j,
10061 try_copy_prop (loop, reg, j);
10063 CLEAR_REG_SET (&load_copies);
10065 EXECUTE_IF_SET_IN_REG_SET
10066 (&store_copies, FIRST_PSEUDO_REGISTER, j,
10068 try_swap_copy_prop (loop, reg, j);
10070 CLEAR_REG_SET (&store_copies);
10074 /* Now, we need to replace all references to the previous exit
10075 label with the new one. */
10076 if (label != NULL_RTX && end_label != NULL_RTX)
10077 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
10078 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == end_label)
10079 redirect_jump (p, label, false);
10081 cselib_finish ();
10084 /* For communication between note_reg_stored and its caller. */
10085 struct note_reg_stored_arg
10087 int set_seen;
10088 rtx reg;
10091 /* Called via note_stores, record in SET_SEEN whether X, which is written,
10092 is equal to ARG. */
10093 static void
10094 note_reg_stored (rtx x, rtx setter ATTRIBUTE_UNUSED, void *arg)
10096 struct note_reg_stored_arg *t = (struct note_reg_stored_arg *) arg;
10097 if (t->reg == x)
10098 t->set_seen = 1;
10101 /* Try to replace every occurrence of pseudo REGNO with REPLACEMENT.
10102 There must be exactly one insn that sets this pseudo; it will be
10103 deleted if all replacements succeed and we can prove that the register
10104 is not used after the loop. */
10106 static void
10107 try_copy_prop (const struct loop *loop, rtx replacement, unsigned int regno)
10109 /* This is the reg that we are copying from. */
10110 rtx reg_rtx = regno_reg_rtx[regno];
10111 rtx init_insn = 0;
10112 rtx insn;
10113 /* These help keep track of whether we replaced all uses of the reg. */
10114 int replaced_last = 0;
10115 int store_is_first = 0;
10117 for (insn = next_insn_in_loop (loop, loop->scan_start);
10118 insn != NULL_RTX;
10119 insn = next_insn_in_loop (loop, insn))
10121 rtx set;
10123 /* Only substitute within one extended basic block from the initializing
10124 insn. */
10125 if (GET_CODE (insn) == CODE_LABEL && init_insn)
10126 break;
10128 if (! INSN_P (insn))
10129 continue;
10131 /* Is this the initializing insn? */
10132 set = single_set (insn);
10133 if (set
10134 && GET_CODE (SET_DEST (set)) == REG
10135 && REGNO (SET_DEST (set)) == regno)
10137 if (init_insn)
10138 abort ();
10140 init_insn = insn;
10141 if (REGNO_FIRST_UID (regno) == INSN_UID (insn))
10142 store_is_first = 1;
10145 /* Only substitute after seeing the initializing insn. */
10146 if (init_insn && insn != init_insn)
10148 struct note_reg_stored_arg arg;
10150 replace_loop_regs (insn, reg_rtx, replacement);
10151 if (REGNO_LAST_UID (regno) == INSN_UID (insn))
10152 replaced_last = 1;
10154 /* Stop replacing when REPLACEMENT is modified. */
10155 arg.reg = replacement;
10156 arg.set_seen = 0;
10157 note_stores (PATTERN (insn), note_reg_stored, &arg);
10158 if (arg.set_seen)
10160 rtx note = find_reg_note (insn, REG_EQUAL, NULL);
10162 /* It is possible that we've turned previously valid REG_EQUAL to
10163 invalid, as we change the REGNO to REPLACEMENT and unlike REGNO,
10164 REPLACEMENT is modified, we get different meaning. */
10165 if (note && reg_mentioned_p (replacement, XEXP (note, 0)))
10166 remove_note (insn, note);
10167 break;
10171 if (! init_insn)
10172 abort ();
10173 if (apply_change_group ())
10175 if (loop_dump_stream)
10176 fprintf (loop_dump_stream, " Replaced reg %d", regno);
10177 if (store_is_first && replaced_last)
10179 rtx first;
10180 rtx retval_note;
10182 /* Assume we're just deleting INIT_INSN. */
10183 first = init_insn;
10184 /* Look for REG_RETVAL note. If we're deleting the end of
10185 the libcall sequence, the whole sequence can go. */
10186 retval_note = find_reg_note (init_insn, REG_RETVAL, NULL_RTX);
10187 /* If we found a REG_RETVAL note, find the first instruction
10188 in the sequence. */
10189 if (retval_note)
10190 first = XEXP (retval_note, 0);
10192 /* Delete the instructions. */
10193 loop_delete_insns (first, init_insn);
10195 if (loop_dump_stream)
10196 fprintf (loop_dump_stream, ".\n");
10200 /* Replace all the instructions from FIRST up to and including LAST
10201 with NOTE_INSN_DELETED notes. */
10203 static void
10204 loop_delete_insns (rtx first, rtx last)
10206 while (1)
10208 if (loop_dump_stream)
10209 fprintf (loop_dump_stream, ", deleting init_insn (%d)",
10210 INSN_UID (first));
10211 delete_insn (first);
10213 /* If this was the LAST instructions we're supposed to delete,
10214 we're done. */
10215 if (first == last)
10216 break;
10218 first = NEXT_INSN (first);
10222 /* Try to replace occurrences of pseudo REGNO with REPLACEMENT within
10223 loop LOOP if the order of the sets of these registers can be
10224 swapped. There must be exactly one insn within the loop that sets
10225 this pseudo followed immediately by a move insn that sets
10226 REPLACEMENT with REGNO. */
10227 static void
10228 try_swap_copy_prop (const struct loop *loop, rtx replacement,
10229 unsigned int regno)
10231 rtx insn;
10232 rtx set = NULL_RTX;
10233 unsigned int new_regno;
10235 new_regno = REGNO (replacement);
10237 for (insn = next_insn_in_loop (loop, loop->scan_start);
10238 insn != NULL_RTX;
10239 insn = next_insn_in_loop (loop, insn))
10241 /* Search for the insn that copies REGNO to NEW_REGNO? */
10242 if (INSN_P (insn)
10243 && (set = single_set (insn))
10244 && GET_CODE (SET_DEST (set)) == REG
10245 && REGNO (SET_DEST (set)) == new_regno
10246 && GET_CODE (SET_SRC (set)) == REG
10247 && REGNO (SET_SRC (set)) == regno)
10248 break;
10251 if (insn != NULL_RTX)
10253 rtx prev_insn;
10254 rtx prev_set;
10256 /* Some DEF-USE info would come in handy here to make this
10257 function more general. For now, just check the previous insn
10258 which is the most likely candidate for setting REGNO. */
10260 prev_insn = PREV_INSN (insn);
10262 if (INSN_P (insn)
10263 && (prev_set = single_set (prev_insn))
10264 && GET_CODE (SET_DEST (prev_set)) == REG
10265 && REGNO (SET_DEST (prev_set)) == regno)
10267 /* We have:
10268 (set (reg regno) (expr))
10269 (set (reg new_regno) (reg regno))
10271 so try converting this to:
10272 (set (reg new_regno) (expr))
10273 (set (reg regno) (reg new_regno))
10275 The former construct is often generated when a global
10276 variable used for an induction variable is shadowed by a
10277 register (NEW_REGNO). The latter construct improves the
10278 chances of GIV replacement and BIV elimination. */
10280 validate_change (prev_insn, &SET_DEST (prev_set),
10281 replacement, 1);
10282 validate_change (insn, &SET_DEST (set),
10283 SET_SRC (set), 1);
10284 validate_change (insn, &SET_SRC (set),
10285 replacement, 1);
10287 if (apply_change_group ())
10289 if (loop_dump_stream)
10290 fprintf (loop_dump_stream,
10291 " Swapped set of reg %d at %d with reg %d at %d.\n",
10292 regno, INSN_UID (insn),
10293 new_regno, INSN_UID (prev_insn));
10295 /* Update first use of REGNO. */
10296 if (REGNO_FIRST_UID (regno) == INSN_UID (prev_insn))
10297 REGNO_FIRST_UID (regno) = INSN_UID (insn);
10299 /* Now perform copy propagation to hopefully
10300 remove all uses of REGNO within the loop. */
10301 try_copy_prop (loop, replacement, regno);
10307 /* Worker function for find_mem_in_note, called via for_each_rtx. */
10309 static int
10310 find_mem_in_note_1 (rtx *x, void *data)
10312 if (*x != NULL_RTX && GET_CODE (*x) == MEM)
10314 rtx *res = (rtx *) data;
10315 *res = *x;
10316 return 1;
10318 return 0;
10321 /* Returns the first MEM found in NOTE by depth-first search. */
10323 static rtx
10324 find_mem_in_note (rtx note)
10326 if (note && for_each_rtx (&note, find_mem_in_note_1, &note))
10327 return note;
10328 return NULL_RTX;
10331 /* Replace MEM with its associated pseudo register. This function is
10332 called from load_mems via for_each_rtx. DATA is actually a pointer
10333 to a structure describing the instruction currently being scanned
10334 and the MEM we are currently replacing. */
10336 static int
10337 replace_loop_mem (rtx *mem, void *data)
10339 loop_replace_args *args = (loop_replace_args *) data;
10340 rtx m = *mem;
10342 if (m == NULL_RTX)
10343 return 0;
10345 switch (GET_CODE (m))
10347 case MEM:
10348 break;
10350 case CONST_DOUBLE:
10351 /* We're not interested in the MEM associated with a
10352 CONST_DOUBLE, so there's no need to traverse into one. */
10353 return -1;
10355 default:
10356 /* This is not a MEM. */
10357 return 0;
10360 if (!rtx_equal_p (args->match, m))
10361 /* This is not the MEM we are currently replacing. */
10362 return 0;
10364 /* Actually replace the MEM. */
10365 validate_change (args->insn, mem, args->replacement, 1);
10367 return 0;
10370 static void
10371 replace_loop_mems (rtx insn, rtx mem, rtx reg, int written)
10373 loop_replace_args args;
10375 args.insn = insn;
10376 args.match = mem;
10377 args.replacement = reg;
10379 for_each_rtx (&insn, replace_loop_mem, &args);
10381 /* If we hoist a mem write out of the loop, then REG_EQUAL
10382 notes referring to the mem are no longer valid. */
10383 if (written)
10385 rtx note, sub;
10386 rtx *link;
10388 for (link = &REG_NOTES (insn); (note = *link); link = &XEXP (note, 1))
10390 if (REG_NOTE_KIND (note) == REG_EQUAL
10391 && (sub = find_mem_in_note (note))
10392 && true_dependence (mem, VOIDmode, sub, rtx_varies_p))
10394 /* Remove the note. */
10395 validate_change (NULL_RTX, link, XEXP (note, 1), 1);
10396 break;
10402 /* Replace one register with another. Called through for_each_rtx; PX points
10403 to the rtx being scanned. DATA is actually a pointer to
10404 a structure of arguments. */
10406 static int
10407 replace_loop_reg (rtx *px, void *data)
10409 rtx x = *px;
10410 loop_replace_args *args = (loop_replace_args *) data;
10412 if (x == NULL_RTX)
10413 return 0;
10415 if (x == args->match)
10416 validate_change (args->insn, px, args->replacement, 1);
10418 return 0;
10421 static void
10422 replace_loop_regs (rtx insn, rtx reg, rtx replacement)
10424 loop_replace_args args;
10426 args.insn = insn;
10427 args.match = reg;
10428 args.replacement = replacement;
10430 for_each_rtx (&insn, replace_loop_reg, &args);
10433 /* Emit insn for PATTERN after WHERE_INSN in basic block WHERE_BB
10434 (ignored in the interim). */
10436 static rtx
10437 loop_insn_emit_after (const struct loop *loop ATTRIBUTE_UNUSED,
10438 basic_block where_bb ATTRIBUTE_UNUSED, rtx where_insn,
10439 rtx pattern)
10441 return emit_insn_after (pattern, where_insn);
10445 /* If WHERE_INSN is nonzero emit insn for PATTERN before WHERE_INSN
10446 in basic block WHERE_BB (ignored in the interim) within the loop
10447 otherwise hoist PATTERN into the loop pre-header. */
10450 loop_insn_emit_before (const struct loop *loop,
10451 basic_block where_bb ATTRIBUTE_UNUSED,
10452 rtx where_insn, rtx pattern)
10454 if (! where_insn)
10455 return loop_insn_hoist (loop, pattern);
10456 return emit_insn_before (pattern, where_insn);
10460 /* Emit call insn for PATTERN before WHERE_INSN in basic block
10461 WHERE_BB (ignored in the interim) within the loop. */
10463 static rtx
10464 loop_call_insn_emit_before (const struct loop *loop ATTRIBUTE_UNUSED,
10465 basic_block where_bb ATTRIBUTE_UNUSED,
10466 rtx where_insn, rtx pattern)
10468 return emit_call_insn_before (pattern, where_insn);
10472 /* Hoist insn for PATTERN into the loop pre-header. */
10475 loop_insn_hoist (const struct loop *loop, rtx pattern)
10477 return loop_insn_emit_before (loop, 0, loop->start, pattern);
10481 /* Hoist call insn for PATTERN into the loop pre-header. */
10483 static rtx
10484 loop_call_insn_hoist (const struct loop *loop, rtx pattern)
10486 return loop_call_insn_emit_before (loop, 0, loop->start, pattern);
10490 /* Sink insn for PATTERN after the loop end. */
10493 loop_insn_sink (const struct loop *loop, rtx pattern)
10495 return loop_insn_emit_before (loop, 0, loop->sink, pattern);
10498 /* bl->final_value can be either general_operand or PLUS of general_operand
10499 and constant. Emit sequence of instructions to load it into REG. */
10500 static rtx
10501 gen_load_of_final_value (rtx reg, rtx final_value)
10503 rtx seq;
10504 start_sequence ();
10505 final_value = force_operand (final_value, reg);
10506 if (final_value != reg)
10507 emit_move_insn (reg, final_value);
10508 seq = get_insns ();
10509 end_sequence ();
10510 return seq;
10513 /* If the loop has multiple exits, emit insn for PATTERN before the
10514 loop to ensure that it will always be executed no matter how the
10515 loop exits. Otherwise, emit the insn for PATTERN after the loop,
10516 since this is slightly more efficient. */
10518 static rtx
10519 loop_insn_sink_or_swim (const struct loop *loop, rtx pattern)
10521 if (loop->exit_count)
10522 return loop_insn_hoist (loop, pattern);
10523 else
10524 return loop_insn_sink (loop, pattern);
10527 static void
10528 loop_ivs_dump (const struct loop *loop, FILE *file, int verbose)
10530 struct iv_class *bl;
10531 int iv_num = 0;
10533 if (! loop || ! file)
10534 return;
10536 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10537 iv_num++;
10539 fprintf (file, "Loop %d: %d IV classes\n", loop->num, iv_num);
10541 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10543 loop_iv_class_dump (bl, file, verbose);
10544 fputc ('\n', file);
10549 static void
10550 loop_iv_class_dump (const struct iv_class *bl, FILE *file,
10551 int verbose ATTRIBUTE_UNUSED)
10553 struct induction *v;
10554 rtx incr;
10555 int i;
10557 if (! bl || ! file)
10558 return;
10560 fprintf (file, "IV class for reg %d, benefit %d\n",
10561 bl->regno, bl->total_benefit);
10563 fprintf (file, " Init insn %d", INSN_UID (bl->init_insn));
10564 if (bl->initial_value)
10566 fprintf (file, ", init val: ");
10567 print_simple_rtl (file, bl->initial_value);
10569 if (bl->initial_test)
10571 fprintf (file, ", init test: ");
10572 print_simple_rtl (file, bl->initial_test);
10574 fputc ('\n', file);
10576 if (bl->final_value)
10578 fprintf (file, " Final val: ");
10579 print_simple_rtl (file, bl->final_value);
10580 fputc ('\n', file);
10583 if ((incr = biv_total_increment (bl)))
10585 fprintf (file, " Total increment: ");
10586 print_simple_rtl (file, incr);
10587 fputc ('\n', file);
10590 /* List the increments. */
10591 for (i = 0, v = bl->biv; v; v = v->next_iv, i++)
10593 fprintf (file, " Inc%d: insn %d, incr: ", i, INSN_UID (v->insn));
10594 print_simple_rtl (file, v->add_val);
10595 fputc ('\n', file);
10598 /* List the givs. */
10599 for (i = 0, v = bl->giv; v; v = v->next_iv, i++)
10601 fprintf (file, " Giv%d: insn %d, benefit %d, ",
10602 i, INSN_UID (v->insn), v->benefit);
10603 if (v->giv_type == DEST_ADDR)
10604 print_simple_rtl (file, v->mem);
10605 else
10606 print_simple_rtl (file, single_set (v->insn));
10607 fputc ('\n', file);
10612 static void
10613 loop_biv_dump (const struct induction *v, FILE *file, int verbose)
10615 if (! v || ! file)
10616 return;
10618 fprintf (file,
10619 "Biv %d: insn %d",
10620 REGNO (v->dest_reg), INSN_UID (v->insn));
10621 fprintf (file, " const ");
10622 print_simple_rtl (file, v->add_val);
10624 if (verbose && v->final_value)
10626 fputc ('\n', file);
10627 fprintf (file, " final ");
10628 print_simple_rtl (file, v->final_value);
10631 fputc ('\n', file);
10635 static void
10636 loop_giv_dump (const struct induction *v, FILE *file, int verbose)
10638 if (! v || ! file)
10639 return;
10641 if (v->giv_type == DEST_REG)
10642 fprintf (file, "Giv %d: insn %d",
10643 REGNO (v->dest_reg), INSN_UID (v->insn));
10644 else
10645 fprintf (file, "Dest address: insn %d",
10646 INSN_UID (v->insn));
10648 fprintf (file, " src reg %d benefit %d",
10649 REGNO (v->src_reg), v->benefit);
10650 fprintf (file, " lifetime %d",
10651 v->lifetime);
10653 if (v->replaceable)
10654 fprintf (file, " replaceable");
10656 if (v->no_const_addval)
10657 fprintf (file, " ncav");
10659 if (v->ext_dependent)
10661 switch (GET_CODE (v->ext_dependent))
10663 case SIGN_EXTEND:
10664 fprintf (file, " ext se");
10665 break;
10666 case ZERO_EXTEND:
10667 fprintf (file, " ext ze");
10668 break;
10669 case TRUNCATE:
10670 fprintf (file, " ext tr");
10671 break;
10672 default:
10673 abort ();
10677 fputc ('\n', file);
10678 fprintf (file, " mult ");
10679 print_simple_rtl (file, v->mult_val);
10681 fputc ('\n', file);
10682 fprintf (file, " add ");
10683 print_simple_rtl (file, v->add_val);
10685 if (verbose && v->final_value)
10687 fputc ('\n', file);
10688 fprintf (file, " final ");
10689 print_simple_rtl (file, v->final_value);
10692 fputc ('\n', file);
10696 void
10697 debug_ivs (const struct loop *loop)
10699 loop_ivs_dump (loop, stderr, 1);
10703 void
10704 debug_iv_class (const struct iv_class *bl)
10706 loop_iv_class_dump (bl, stderr, 1);
10710 void
10711 debug_biv (const struct induction *v)
10713 loop_biv_dump (v, stderr, 1);
10717 void
10718 debug_giv (const struct induction *v)
10720 loop_giv_dump (v, stderr, 1);
10724 #define LOOP_BLOCK_NUM_1(INSN) \
10725 ((INSN) ? (BLOCK_FOR_INSN (INSN) ? BLOCK_NUM (INSN) : - 1) : -1)
10727 /* The notes do not have an assigned block, so look at the next insn. */
10728 #define LOOP_BLOCK_NUM(INSN) \
10729 ((INSN) ? (GET_CODE (INSN) == NOTE \
10730 ? LOOP_BLOCK_NUM_1 (next_nonnote_insn (INSN)) \
10731 : LOOP_BLOCK_NUM_1 (INSN)) \
10732 : -1)
10734 #define LOOP_INSN_UID(INSN) ((INSN) ? INSN_UID (INSN) : -1)
10736 static void
10737 loop_dump_aux (const struct loop *loop, FILE *file,
10738 int verbose ATTRIBUTE_UNUSED)
10740 rtx label;
10742 if (! loop || ! file)
10743 return;
10745 /* Print diagnostics to compare our concept of a loop with
10746 what the loop notes say. */
10747 if (! PREV_INSN (BB_HEAD (loop->first))
10748 || GET_CODE (PREV_INSN (BB_HEAD (loop->first))) != NOTE
10749 || NOTE_LINE_NUMBER (PREV_INSN (BB_HEAD (loop->first)))
10750 != NOTE_INSN_LOOP_BEG)
10751 fprintf (file, ";; No NOTE_INSN_LOOP_BEG at %d\n",
10752 INSN_UID (PREV_INSN (BB_HEAD (loop->first))));
10753 if (! NEXT_INSN (BB_END (loop->last))
10754 || GET_CODE (NEXT_INSN (BB_END (loop->last))) != NOTE
10755 || NOTE_LINE_NUMBER (NEXT_INSN (BB_END (loop->last)))
10756 != NOTE_INSN_LOOP_END)
10757 fprintf (file, ";; No NOTE_INSN_LOOP_END at %d\n",
10758 INSN_UID (NEXT_INSN (BB_END (loop->last))));
10760 if (loop->start)
10762 fprintf (file,
10763 ";; start %d (%d), cont dom %d (%d), cont %d (%d), vtop %d (%d), end %d (%d)\n",
10764 LOOP_BLOCK_NUM (loop->start),
10765 LOOP_INSN_UID (loop->start),
10766 LOOP_BLOCK_NUM (loop->cont),
10767 LOOP_INSN_UID (loop->cont),
10768 LOOP_BLOCK_NUM (loop->cont),
10769 LOOP_INSN_UID (loop->cont),
10770 LOOP_BLOCK_NUM (loop->vtop),
10771 LOOP_INSN_UID (loop->vtop),
10772 LOOP_BLOCK_NUM (loop->end),
10773 LOOP_INSN_UID (loop->end));
10774 fprintf (file, ";; top %d (%d), scan start %d (%d)\n",
10775 LOOP_BLOCK_NUM (loop->top),
10776 LOOP_INSN_UID (loop->top),
10777 LOOP_BLOCK_NUM (loop->scan_start),
10778 LOOP_INSN_UID (loop->scan_start));
10779 fprintf (file, ";; exit_count %d", loop->exit_count);
10780 if (loop->exit_count)
10782 fputs (", labels:", file);
10783 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
10785 fprintf (file, " %d ",
10786 LOOP_INSN_UID (XEXP (label, 0)));
10789 fputs ("\n", file);
10791 /* This can happen when a marked loop appears as two nested loops,
10792 say from while (a || b) {}. The inner loop won't match
10793 the loop markers but the outer one will. */
10794 if (LOOP_BLOCK_NUM (loop->cont) != loop->latch->index)
10795 fprintf (file, ";; NOTE_INSN_LOOP_CONT not in loop latch\n");
10799 /* Call this function from the debugger to dump LOOP. */
10801 void
10802 debug_loop (const struct loop *loop)
10804 flow_loop_dump (loop, stderr, loop_dump_aux, 1);
10807 /* Call this function from the debugger to dump LOOPS. */
10809 void
10810 debug_loops (const struct loops *loops)
10812 flow_loops_dump (loops, stderr, loop_dump_aux, 1);