PR optimization/9090
[official-gcc.git] / gcc / loop.c
blobc7165e54ed70bc205ef8d6b9e18b4be6ebb0bb87
1 /* Perform various loop optimizations, including strength reduction.
2 Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995, 1996, 1997,
3 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* This is the loop optimization pass of the compiler.
23 It finds invariant computations within loops and moves them
24 to the beginning of the loop. Then it identifies basic and
25 general induction variables. Strength reduction is applied to the general
26 induction variables, and induction variable elimination is applied to
27 the basic induction variables.
29 It also finds cases where
30 a register is set within the loop by zero-extending a narrower value
31 and changes these to zero the entire register once before the loop
32 and merely copy the low part within the loop.
34 Most of the complexity is in heuristics to decide when it is worth
35 while to do these things. */
37 #include "config.h"
38 #include "system.h"
39 #include "coretypes.h"
40 #include "tm.h"
41 #include "rtl.h"
42 #include "tm_p.h"
43 #include "function.h"
44 #include "expr.h"
45 #include "hard-reg-set.h"
46 #include "basic-block.h"
47 #include "insn-config.h"
48 #include "regs.h"
49 #include "recog.h"
50 #include "flags.h"
51 #include "real.h"
52 #include "loop.h"
53 #include "cselib.h"
54 #include "except.h"
55 #include "toplev.h"
56 #include "predict.h"
57 #include "insn-flags.h"
58 #include "optabs.h"
59 #include "cfgloop.h"
61 /* Not really meaningful values, but at least something. */
62 #ifndef SIMULTANEOUS_PREFETCHES
63 #define SIMULTANEOUS_PREFETCHES 3
64 #endif
65 #ifndef PREFETCH_BLOCK
66 #define PREFETCH_BLOCK 32
67 #endif
68 #ifndef HAVE_prefetch
69 #define HAVE_prefetch 0
70 #define CODE_FOR_prefetch 0
71 #define gen_prefetch(a,b,c) (abort(), NULL_RTX)
72 #endif
74 /* Give up the prefetch optimizations once we exceed a given threshhold.
75 It is unlikely that we would be able to optimize something in a loop
76 with so many detected prefetches. */
77 #define MAX_PREFETCHES 100
78 /* The number of prefetch blocks that are beneficial to fetch at once before
79 a loop with a known (and low) iteration count. */
80 #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
81 /* For very tiny loops it is not worthwhile to prefetch even before the loop,
82 since it is likely that the data are already in the cache. */
83 #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
85 /* Parameterize some prefetch heuristics so they can be turned on and off
86 easily for performance testing on new architectures. These can be
87 defined in target-dependent files. */
89 /* Prefetch is worthwhile only when loads/stores are dense. */
90 #ifndef PREFETCH_ONLY_DENSE_MEM
91 #define PREFETCH_ONLY_DENSE_MEM 1
92 #endif
94 /* Define what we mean by "dense" loads and stores; This value divided by 256
95 is the minimum percentage of memory references that worth prefetching. */
96 #ifndef PREFETCH_DENSE_MEM
97 #define PREFETCH_DENSE_MEM 220
98 #endif
100 /* Do not prefetch for a loop whose iteration count is known to be low. */
101 #ifndef PREFETCH_NO_LOW_LOOPCNT
102 #define PREFETCH_NO_LOW_LOOPCNT 1
103 #endif
105 /* Define what we mean by a "low" iteration count. */
106 #ifndef PREFETCH_LOW_LOOPCNT
107 #define PREFETCH_LOW_LOOPCNT 32
108 #endif
110 /* Do not prefetch for a loop that contains a function call; such a loop is
111 probably not an internal loop. */
112 #ifndef PREFETCH_NO_CALL
113 #define PREFETCH_NO_CALL 1
114 #endif
116 /* Do not prefetch accesses with an extreme stride. */
117 #ifndef PREFETCH_NO_EXTREME_STRIDE
118 #define PREFETCH_NO_EXTREME_STRIDE 1
119 #endif
121 /* Define what we mean by an "extreme" stride. */
122 #ifndef PREFETCH_EXTREME_STRIDE
123 #define PREFETCH_EXTREME_STRIDE 4096
124 #endif
126 /* Define a limit to how far apart indices can be and still be merged
127 into a single prefetch. */
128 #ifndef PREFETCH_EXTREME_DIFFERENCE
129 #define PREFETCH_EXTREME_DIFFERENCE 4096
130 #endif
132 /* Issue prefetch instructions before the loop to fetch data to be used
133 in the first few loop iterations. */
134 #ifndef PREFETCH_BEFORE_LOOP
135 #define PREFETCH_BEFORE_LOOP 1
136 #endif
138 /* Do not handle reversed order prefetches (negative stride). */
139 #ifndef PREFETCH_NO_REVERSE_ORDER
140 #define PREFETCH_NO_REVERSE_ORDER 1
141 #endif
143 /* Prefetch even if the GIV is in conditional code. */
144 #ifndef PREFETCH_CONDITIONAL
145 #define PREFETCH_CONDITIONAL 1
146 #endif
148 #define LOOP_REG_LIFETIME(LOOP, REGNO) \
149 ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
151 #define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
152 ((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
153 || REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
155 #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
156 ((REGNO) < FIRST_PSEUDO_REGISTER \
157 ? (int) HARD_REGNO_NREGS ((REGNO), GET_MODE (SET_DEST)) : 1)
160 /* Vector mapping INSN_UIDs to luids.
161 The luids are like uids but increase monotonically always.
162 We use them to see whether a jump comes from outside a given loop. */
164 int *uid_luid;
166 /* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
167 number the insn is contained in. */
169 struct loop **uid_loop;
171 /* 1 + largest uid of any insn. */
173 int max_uid_for_loop;
175 /* Number of loops detected in current function. Used as index to the
176 next few tables. */
178 static int max_loop_num;
180 /* Bound on pseudo register number before loop optimization.
181 A pseudo has valid regscan info if its number is < max_reg_before_loop. */
182 unsigned int max_reg_before_loop;
184 /* The value to pass to the next call of reg_scan_update. */
185 static int loop_max_reg;
187 /* During the analysis of a loop, a chain of `struct movable's
188 is made to record all the movable insns found.
189 Then the entire chain can be scanned to decide which to move. */
191 struct movable
193 rtx insn; /* A movable insn */
194 rtx set_src; /* The expression this reg is set from. */
195 rtx set_dest; /* The destination of this SET. */
196 rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
197 of any registers used within the LIBCALL. */
198 int consec; /* Number of consecutive following insns
199 that must be moved with this one. */
200 unsigned int regno; /* The register it sets */
201 short lifetime; /* lifetime of that register;
202 may be adjusted when matching movables
203 that load the same value are found. */
204 short savings; /* Number of insns we can move for this reg,
205 including other movables that force this
206 or match this one. */
207 unsigned int cond : 1; /* 1 if only conditionally movable */
208 unsigned int force : 1; /* 1 means MUST move this insn */
209 unsigned int global : 1; /* 1 means reg is live outside this loop */
210 /* If PARTIAL is 1, GLOBAL means something different:
211 that the reg is live outside the range from where it is set
212 to the following label. */
213 unsigned int done : 1; /* 1 inhibits further processing of this */
215 unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
216 In particular, moving it does not make it
217 invariant. */
218 unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
219 load SRC, rather than copying INSN. */
220 unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
221 first insn of a consecutive sets group. */
222 unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
223 enum machine_mode savemode; /* Nonzero means it is a mode for a low part
224 that we should avoid changing when clearing
225 the rest of the reg. */
226 struct movable *match; /* First entry for same value */
227 struct movable *forces; /* An insn that must be moved if this is */
228 struct movable *next;
232 FILE *loop_dump_stream;
234 /* Forward declarations. */
236 static void invalidate_loops_containing_label PARAMS ((rtx));
237 static void find_and_verify_loops PARAMS ((rtx, struct loops *));
238 static void mark_loop_jump PARAMS ((rtx, struct loop *));
239 static void prescan_loop PARAMS ((struct loop *));
240 static int reg_in_basic_block_p PARAMS ((rtx, rtx));
241 static int consec_sets_invariant_p PARAMS ((const struct loop *,
242 rtx, int, rtx));
243 static int labels_in_range_p PARAMS ((rtx, int));
244 static void count_one_set PARAMS ((struct loop_regs *, rtx, rtx, rtx *));
245 static void note_addr_stored PARAMS ((rtx, rtx, void *));
246 static void note_set_pseudo_multiple_uses PARAMS ((rtx, rtx, void *));
247 static int loop_reg_used_before_p PARAMS ((const struct loop *, rtx, rtx));
248 static void scan_loop PARAMS ((struct loop*, int));
249 #if 0
250 static void replace_call_address PARAMS ((rtx, rtx, rtx));
251 #endif
252 static rtx skip_consec_insns PARAMS ((rtx, int));
253 static int libcall_benefit PARAMS ((rtx));
254 static void ignore_some_movables PARAMS ((struct loop_movables *));
255 static void force_movables PARAMS ((struct loop_movables *));
256 static void combine_movables PARAMS ((struct loop_movables *,
257 struct loop_regs *));
258 static int num_unmoved_movables PARAMS ((const struct loop *));
259 static int regs_match_p PARAMS ((rtx, rtx, struct loop_movables *));
260 static int rtx_equal_for_loop_p PARAMS ((rtx, rtx, struct loop_movables *,
261 struct loop_regs *));
262 static void add_label_notes PARAMS ((rtx, rtx));
263 static void move_movables PARAMS ((struct loop *loop, struct loop_movables *,
264 int, int));
265 static void loop_movables_add PARAMS((struct loop_movables *,
266 struct movable *));
267 static void loop_movables_free PARAMS((struct loop_movables *));
268 static int count_nonfixed_reads PARAMS ((const struct loop *, rtx));
269 static void loop_bivs_find PARAMS((struct loop *));
270 static void loop_bivs_init_find PARAMS((struct loop *));
271 static void loop_bivs_check PARAMS((struct loop *));
272 static void loop_givs_find PARAMS((struct loop *));
273 static void loop_givs_check PARAMS((struct loop *));
274 static int loop_biv_eliminable_p PARAMS((struct loop *, struct iv_class *,
275 int, int));
276 static int loop_giv_reduce_benefit PARAMS((struct loop *, struct iv_class *,
277 struct induction *, rtx));
278 static void loop_givs_dead_check PARAMS((struct loop *, struct iv_class *));
279 static void loop_givs_reduce PARAMS((struct loop *, struct iv_class *));
280 static void loop_givs_rescan PARAMS((struct loop *, struct iv_class *,
281 rtx *));
282 static void loop_ivs_free PARAMS((struct loop *));
283 static void strength_reduce PARAMS ((struct loop *, int));
284 static void find_single_use_in_loop PARAMS ((struct loop_regs *, rtx, rtx));
285 static int valid_initial_value_p PARAMS ((rtx, rtx, int, rtx));
286 static void find_mem_givs PARAMS ((const struct loop *, rtx, rtx, int, int));
287 static void record_biv PARAMS ((struct loop *, struct induction *,
288 rtx, rtx, rtx, rtx, rtx *,
289 int, int));
290 static void check_final_value PARAMS ((const struct loop *,
291 struct induction *));
292 static void loop_ivs_dump PARAMS((const struct loop *, FILE *, int));
293 static void loop_iv_class_dump PARAMS((const struct iv_class *, FILE *, int));
294 static void loop_biv_dump PARAMS((const struct induction *, FILE *, int));
295 static void loop_giv_dump PARAMS((const struct induction *, FILE *, int));
296 static void record_giv PARAMS ((const struct loop *, struct induction *,
297 rtx, rtx, rtx, rtx, rtx, rtx, int,
298 enum g_types, int, int, rtx *));
299 static void update_giv_derive PARAMS ((const struct loop *, rtx));
300 static void check_ext_dependent_givs PARAMS ((struct iv_class *,
301 struct loop_info *));
302 static int basic_induction_var PARAMS ((const struct loop *, rtx,
303 enum machine_mode, rtx, rtx,
304 rtx *, rtx *, rtx **));
305 static rtx simplify_giv_expr PARAMS ((const struct loop *, rtx, rtx *, int *));
306 static int general_induction_var PARAMS ((const struct loop *loop, rtx, rtx *,
307 rtx *, rtx *, rtx *, int, int *,
308 enum machine_mode));
309 static int consec_sets_giv PARAMS ((const struct loop *, int, rtx,
310 rtx, rtx, rtx *, rtx *, rtx *, rtx *));
311 static int check_dbra_loop PARAMS ((struct loop *, int));
312 static rtx express_from_1 PARAMS ((rtx, rtx, rtx));
313 static rtx combine_givs_p PARAMS ((struct induction *, struct induction *));
314 static int cmp_combine_givs_stats PARAMS ((const PTR, const PTR));
315 static void combine_givs PARAMS ((struct loop_regs *, struct iv_class *));
316 static int product_cheap_p PARAMS ((rtx, rtx));
317 static int maybe_eliminate_biv PARAMS ((const struct loop *, struct iv_class *,
318 int, int, int));
319 static int maybe_eliminate_biv_1 PARAMS ((const struct loop *, rtx, rtx,
320 struct iv_class *, int,
321 basic_block, rtx));
322 static int last_use_this_basic_block PARAMS ((rtx, rtx));
323 static void record_initial PARAMS ((rtx, rtx, void *));
324 static void update_reg_last_use PARAMS ((rtx, rtx));
325 static rtx next_insn_in_loop PARAMS ((const struct loop *, rtx));
326 static void loop_regs_scan PARAMS ((const struct loop *, int));
327 static int count_insns_in_loop PARAMS ((const struct loop *));
328 static void load_mems PARAMS ((const struct loop *));
329 static int insert_loop_mem PARAMS ((rtx *, void *));
330 static int replace_loop_mem PARAMS ((rtx *, void *));
331 static void replace_loop_mems PARAMS ((rtx, rtx, rtx));
332 static int replace_loop_reg PARAMS ((rtx *, void *));
333 static void replace_loop_regs PARAMS ((rtx insn, rtx, rtx));
334 static void note_reg_stored PARAMS ((rtx, rtx, void *));
335 static void try_copy_prop PARAMS ((const struct loop *, rtx, unsigned int));
336 static void try_swap_copy_prop PARAMS ((const struct loop *, rtx,
337 unsigned int));
338 static int replace_label PARAMS ((rtx *, void *));
339 static rtx check_insn_for_givs PARAMS((struct loop *, rtx, int, int));
340 static rtx check_insn_for_bivs PARAMS((struct loop *, rtx, int, int));
341 static rtx gen_add_mult PARAMS ((rtx, rtx, rtx, rtx));
342 static void loop_regs_update PARAMS ((const struct loop *, rtx));
343 static int iv_add_mult_cost PARAMS ((rtx, rtx, rtx, rtx));
345 static rtx loop_insn_emit_after PARAMS((const struct loop *, basic_block,
346 rtx, rtx));
347 static rtx loop_call_insn_emit_before PARAMS((const struct loop *,
348 basic_block, rtx, rtx));
349 static rtx loop_call_insn_hoist PARAMS((const struct loop *, rtx));
350 static rtx loop_insn_sink_or_swim PARAMS((const struct loop *, rtx));
352 static void loop_dump_aux PARAMS ((const struct loop *, FILE *, int));
353 static void loop_delete_insns PARAMS ((rtx, rtx));
354 static HOST_WIDE_INT remove_constant_addition PARAMS ((rtx *));
355 static rtx gen_load_of_final_value PARAMS ((rtx, rtx));
356 void debug_ivs PARAMS ((const struct loop *));
357 void debug_iv_class PARAMS ((const struct iv_class *));
358 void debug_biv PARAMS ((const struct induction *));
359 void debug_giv PARAMS ((const struct induction *));
360 void debug_loop PARAMS ((const struct loop *));
361 void debug_loops PARAMS ((const struct loops *));
363 typedef struct rtx_pair
365 rtx r1;
366 rtx r2;
367 } rtx_pair;
369 typedef struct loop_replace_args
371 rtx match;
372 rtx replacement;
373 rtx insn;
374 } loop_replace_args;
376 /* Nonzero iff INSN is between START and END, inclusive. */
377 #define INSN_IN_RANGE_P(INSN, START, END) \
378 (INSN_UID (INSN) < max_uid_for_loop \
379 && INSN_LUID (INSN) >= INSN_LUID (START) \
380 && INSN_LUID (INSN) <= INSN_LUID (END))
382 /* Indirect_jump_in_function is computed once per function. */
383 static int indirect_jump_in_function;
384 static int indirect_jump_in_function_p PARAMS ((rtx));
386 static int compute_luids PARAMS ((rtx, rtx, int));
388 static int biv_elimination_giv_has_0_offset PARAMS ((struct induction *,
389 struct induction *,
390 rtx));
392 /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
393 copy the value of the strength reduced giv to its original register. */
394 static int copy_cost;
396 /* Cost of using a register, to normalize the benefits of a giv. */
397 static int reg_address_cost;
399 void
400 init_loop ()
402 rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
404 reg_address_cost = address_cost (reg, SImode);
406 copy_cost = COSTS_N_INSNS (1);
409 /* Compute the mapping from uids to luids.
410 LUIDs are numbers assigned to insns, like uids,
411 except that luids increase monotonically through the code.
412 Start at insn START and stop just before END. Assign LUIDs
413 starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
414 static int
415 compute_luids (start, end, prev_luid)
416 rtx start, end;
417 int prev_luid;
419 int i;
420 rtx insn;
422 for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
424 if (INSN_UID (insn) >= max_uid_for_loop)
425 continue;
426 /* Don't assign luids to line-number NOTEs, so that the distance in
427 luids between two insns is not affected by -g. */
428 if (GET_CODE (insn) != NOTE
429 || NOTE_LINE_NUMBER (insn) <= 0)
430 uid_luid[INSN_UID (insn)] = ++i;
431 else
432 /* Give a line number note the same luid as preceding insn. */
433 uid_luid[INSN_UID (insn)] = i;
435 return i + 1;
438 /* Entry point of this file. Perform loop optimization
439 on the current function. F is the first insn of the function
440 and DUMPFILE is a stream for output of a trace of actions taken
441 (or 0 if none should be output). */
443 void
444 loop_optimize (f, dumpfile, flags)
445 /* f is the first instruction of a chain of insns for one function */
446 rtx f;
447 FILE *dumpfile;
448 int flags;
450 rtx insn;
451 int i;
452 struct loops loops_data;
453 struct loops *loops = &loops_data;
454 struct loop_info *loops_info;
456 loop_dump_stream = dumpfile;
458 init_recog_no_volatile ();
460 max_reg_before_loop = max_reg_num ();
461 loop_max_reg = max_reg_before_loop;
463 regs_may_share = 0;
465 /* Count the number of loops. */
467 max_loop_num = 0;
468 for (insn = f; insn; insn = NEXT_INSN (insn))
470 if (GET_CODE (insn) == NOTE
471 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
472 max_loop_num++;
475 /* Don't waste time if no loops. */
476 if (max_loop_num == 0)
477 return;
479 loops->num = max_loop_num;
481 /* Get size to use for tables indexed by uids.
482 Leave some space for labels allocated by find_and_verify_loops. */
483 max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
485 uid_luid = (int *) xcalloc (max_uid_for_loop, sizeof (int));
486 uid_loop = (struct loop **) xcalloc (max_uid_for_loop,
487 sizeof (struct loop *));
489 /* Allocate storage for array of loops. */
490 loops->array = (struct loop *)
491 xcalloc (loops->num, sizeof (struct loop));
493 /* Find and process each loop.
494 First, find them, and record them in order of their beginnings. */
495 find_and_verify_loops (f, loops);
497 /* Allocate and initialize auxiliary loop information. */
498 loops_info = xcalloc (loops->num, sizeof (struct loop_info));
499 for (i = 0; i < (int) loops->num; i++)
500 loops->array[i].aux = loops_info + i;
502 /* Now find all register lifetimes. This must be done after
503 find_and_verify_loops, because it might reorder the insns in the
504 function. */
505 reg_scan (f, max_reg_before_loop, 1);
507 /* This must occur after reg_scan so that registers created by gcse
508 will have entries in the register tables.
510 We could have added a call to reg_scan after gcse_main in toplev.c,
511 but moving this call to init_alias_analysis is more efficient. */
512 init_alias_analysis ();
514 /* See if we went too far. Note that get_max_uid already returns
515 one more that the maximum uid of all insn. */
516 if (get_max_uid () > max_uid_for_loop)
517 abort ();
518 /* Now reset it to the actual size we need. See above. */
519 max_uid_for_loop = get_max_uid ();
521 /* find_and_verify_loops has already called compute_luids, but it
522 might have rearranged code afterwards, so we need to recompute
523 the luids now. */
524 compute_luids (f, NULL_RTX, 0);
526 /* Don't leave gaps in uid_luid for insns that have been
527 deleted. It is possible that the first or last insn
528 using some register has been deleted by cross-jumping.
529 Make sure that uid_luid for that former insn's uid
530 points to the general area where that insn used to be. */
531 for (i = 0; i < max_uid_for_loop; i++)
533 uid_luid[0] = uid_luid[i];
534 if (uid_luid[0] != 0)
535 break;
537 for (i = 0; i < max_uid_for_loop; i++)
538 if (uid_luid[i] == 0)
539 uid_luid[i] = uid_luid[i - 1];
541 /* Determine if the function has indirect jump. On some systems
542 this prevents low overhead loop instructions from being used. */
543 indirect_jump_in_function = indirect_jump_in_function_p (f);
545 /* Now scan the loops, last ones first, since this means inner ones are done
546 before outer ones. */
547 for (i = max_loop_num - 1; i >= 0; i--)
549 struct loop *loop = &loops->array[i];
551 if (! loop->invalid && loop->end)
552 scan_loop (loop, flags);
555 end_alias_analysis ();
557 /* Clean up. */
558 free (uid_luid);
559 free (uid_loop);
560 free (loops_info);
561 free (loops->array);
564 /* Returns the next insn, in execution order, after INSN. START and
565 END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
566 respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
567 insn-stream; it is used with loops that are entered near the
568 bottom. */
570 static rtx
571 next_insn_in_loop (loop, insn)
572 const struct loop *loop;
573 rtx insn;
575 insn = NEXT_INSN (insn);
577 if (insn == loop->end)
579 if (loop->top)
580 /* Go to the top of the loop, and continue there. */
581 insn = loop->top;
582 else
583 /* We're done. */
584 insn = NULL_RTX;
587 if (insn == loop->scan_start)
588 /* We're done. */
589 insn = NULL_RTX;
591 return insn;
594 /* Optimize one loop described by LOOP. */
596 /* ??? Could also move memory writes out of loops if the destination address
597 is invariant, the source is invariant, the memory write is not volatile,
598 and if we can prove that no read inside the loop can read this address
599 before the write occurs. If there is a read of this address after the
600 write, then we can also mark the memory read as invariant. */
602 static void
603 scan_loop (loop, flags)
604 struct loop *loop;
605 int flags;
607 struct loop_info *loop_info = LOOP_INFO (loop);
608 struct loop_regs *regs = LOOP_REGS (loop);
609 int i;
610 rtx loop_start = loop->start;
611 rtx loop_end = loop->end;
612 rtx p;
613 /* 1 if we are scanning insns that could be executed zero times. */
614 int maybe_never = 0;
615 /* 1 if we are scanning insns that might never be executed
616 due to a subroutine call which might exit before they are reached. */
617 int call_passed = 0;
618 /* Number of insns in the loop. */
619 int insn_count;
620 int tem;
621 rtx temp, update_start, update_end;
622 /* The SET from an insn, if it is the only SET in the insn. */
623 rtx set, set1;
624 /* Chain describing insns movable in current loop. */
625 struct loop_movables *movables = LOOP_MOVABLES (loop);
626 /* Ratio of extra register life span we can justify
627 for saving an instruction. More if loop doesn't call subroutines
628 since in that case saving an insn makes more difference
629 and more registers are available. */
630 int threshold;
631 /* Nonzero if we are scanning instructions in a sub-loop. */
632 int loop_depth = 0;
633 int in_libcall;
635 loop->top = 0;
637 movables->head = 0;
638 movables->last = 0;
640 /* Determine whether this loop starts with a jump down to a test at
641 the end. This will occur for a small number of loops with a test
642 that is too complex to duplicate in front of the loop.
644 We search for the first insn or label in the loop, skipping NOTEs.
645 However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
646 (because we might have a loop executed only once that contains a
647 loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
648 (in case we have a degenerate loop).
650 Note that if we mistakenly think that a loop is entered at the top
651 when, in fact, it is entered at the exit test, the only effect will be
652 slightly poorer optimization. Making the opposite error can generate
653 incorrect code. Since very few loops now start with a jump to the
654 exit test, the code here to detect that case is very conservative. */
656 for (p = NEXT_INSN (loop_start);
657 p != loop_end
658 && GET_CODE (p) != CODE_LABEL && ! INSN_P (p)
659 && (GET_CODE (p) != NOTE
660 || (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
661 && NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
662 p = NEXT_INSN (p))
665 loop->scan_start = p;
667 /* If loop end is the end of the current function, then emit a
668 NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
669 note insn. This is the position we use when sinking insns out of
670 the loop. */
671 if (NEXT_INSN (loop->end) != 0)
672 loop->sink = NEXT_INSN (loop->end);
673 else
674 loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
676 /* Set up variables describing this loop. */
677 prescan_loop (loop);
678 threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
680 /* If loop has a jump before the first label,
681 the true entry is the target of that jump.
682 Start scan from there.
683 But record in LOOP->TOP the place where the end-test jumps
684 back to so we can scan that after the end of the loop. */
685 if (GET_CODE (p) == JUMP_INSN
686 /* Loop entry must be unconditional jump (and not a RETURN) */
687 && any_uncondjump_p (p)
688 && JUMP_LABEL (p) != 0
689 /* Check to see whether the jump actually
690 jumps out of the loop (meaning it's no loop).
691 This case can happen for things like
692 do {..} while (0). If this label was generated previously
693 by loop, we can't tell anything about it and have to reject
694 the loop. */
695 && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
697 loop->top = next_label (loop->scan_start);
698 loop->scan_start = JUMP_LABEL (p);
701 /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
702 as required by loop_reg_used_before_p. So skip such loops. (This
703 test may never be true, but it's best to play it safe.)
705 Also, skip loops where we do not start scanning at a label. This
706 test also rejects loops starting with a JUMP_INSN that failed the
707 test above. */
709 if (INSN_UID (loop->scan_start) >= max_uid_for_loop
710 || GET_CODE (loop->scan_start) != CODE_LABEL)
712 if (loop_dump_stream)
713 fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
714 INSN_UID (loop_start), INSN_UID (loop_end));
715 return;
718 /* Allocate extra space for REGs that might be created by load_mems.
719 We allocate a little extra slop as well, in the hopes that we
720 won't have to reallocate the regs array. */
721 loop_regs_scan (loop, loop_info->mems_idx + 16);
722 insn_count = count_insns_in_loop (loop);
724 if (loop_dump_stream)
726 fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
727 INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
728 if (loop->cont)
729 fprintf (loop_dump_stream, "Continue at insn %d.\n",
730 INSN_UID (loop->cont));
733 /* Scan through the loop finding insns that are safe to move.
734 Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
735 this reg will be considered invariant for subsequent insns.
736 We consider whether subsequent insns use the reg
737 in deciding whether it is worth actually moving.
739 MAYBE_NEVER is nonzero if we have passed a conditional jump insn
740 and therefore it is possible that the insns we are scanning
741 would never be executed. At such times, we must make sure
742 that it is safe to execute the insn once instead of zero times.
743 When MAYBE_NEVER is 0, all insns will be executed at least once
744 so that is not a problem. */
746 for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
747 p != NULL_RTX;
748 p = next_insn_in_loop (loop, p))
750 if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
751 in_libcall--;
752 if (GET_CODE (p) == INSN)
754 temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
755 if (temp)
756 in_libcall++;
757 if (! in_libcall
758 && (set = single_set (p))
759 && GET_CODE (SET_DEST (set)) == REG
760 #ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
761 && SET_DEST (set) != pic_offset_table_rtx
762 #endif
763 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
765 int tem1 = 0;
766 int tem2 = 0;
767 int move_insn = 0;
768 rtx src = SET_SRC (set);
769 rtx dependencies = 0;
771 /* Figure out what to use as a source of this insn. If a
772 REG_EQUIV note is given or if a REG_EQUAL note with a
773 constant operand is specified, use it as the source and
774 mark that we should move this insn by calling
775 emit_move_insn rather that duplicating the insn.
777 Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
778 note is present. */
779 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
780 if (temp)
781 src = XEXP (temp, 0), move_insn = 1;
782 else
784 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
785 if (temp && CONSTANT_P (XEXP (temp, 0)))
786 src = XEXP (temp, 0), move_insn = 1;
787 if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
789 src = XEXP (temp, 0);
790 /* A libcall block can use regs that don't appear in
791 the equivalent expression. To move the libcall,
792 we must move those regs too. */
793 dependencies = libcall_other_reg (p, src);
797 /* For parallels, add any possible uses to the dependencies, as
798 we can't move the insn without resolving them first. */
799 if (GET_CODE (PATTERN (p)) == PARALLEL)
801 for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
803 rtx x = XVECEXP (PATTERN (p), 0, i);
804 if (GET_CODE (x) == USE)
805 dependencies
806 = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
807 dependencies);
811 /* Don't try to optimize a register that was made
812 by loop-optimization for an inner loop.
813 We don't know its life-span, so we can't compute
814 the benefit. */
815 if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
817 else if (/* The register is used in basic blocks other
818 than the one where it is set (meaning that
819 something after this point in the loop might
820 depend on its value before the set). */
821 ! reg_in_basic_block_p (p, SET_DEST (set))
822 /* And the set is not guaranteed to be executed once
823 the loop starts, or the value before the set is
824 needed before the set occurs...
826 ??? Note we have quadratic behavior here, mitigated
827 by the fact that the previous test will often fail for
828 large loops. Rather than re-scanning the entire loop
829 each time for register usage, we should build tables
830 of the register usage and use them here instead. */
831 && (maybe_never
832 || loop_reg_used_before_p (loop, set, p)))
833 /* It is unsafe to move the set.
835 This code used to consider it OK to move a set of a variable
836 which was not created by the user and not used in an exit
837 test.
838 That behavior is incorrect and was removed. */
840 else if ((tem = loop_invariant_p (loop, src))
841 && (dependencies == 0
842 || (tem2
843 = loop_invariant_p (loop, dependencies)) != 0)
844 && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
845 || (tem1
846 = consec_sets_invariant_p
847 (loop, SET_DEST (set),
848 regs->array[REGNO (SET_DEST (set))].set_in_loop,
849 p)))
850 /* If the insn can cause a trap (such as divide by zero),
851 can't move it unless it's guaranteed to be executed
852 once loop is entered. Even a function call might
853 prevent the trap insn from being reached
854 (since it might exit!) */
855 && ! ((maybe_never || call_passed)
856 && may_trap_p (src)))
858 struct movable *m;
859 int regno = REGNO (SET_DEST (set));
861 /* A potential lossage is where we have a case where two insns
862 can be combined as long as they are both in the loop, but
863 we move one of them outside the loop. For large loops,
864 this can lose. The most common case of this is the address
865 of a function being called.
867 Therefore, if this register is marked as being used
868 exactly once if we are in a loop with calls
869 (a "large loop"), see if we can replace the usage of
870 this register with the source of this SET. If we can,
871 delete this insn.
873 Don't do this if P has a REG_RETVAL note or if we have
874 SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */
876 if (loop_info->has_call
877 && regs->array[regno].single_usage != 0
878 && regs->array[regno].single_usage != const0_rtx
879 && REGNO_FIRST_UID (regno) == INSN_UID (p)
880 && (REGNO_LAST_UID (regno)
881 == INSN_UID (regs->array[regno].single_usage))
882 && regs->array[regno].set_in_loop == 1
883 && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
884 && ! side_effects_p (SET_SRC (set))
885 && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
886 && (! SMALL_REGISTER_CLASSES
887 || (! (GET_CODE (SET_SRC (set)) == REG
888 && (REGNO (SET_SRC (set))
889 < FIRST_PSEUDO_REGISTER))))
890 /* This test is not redundant; SET_SRC (set) might be
891 a call-clobbered register and the life of REGNO
892 might span a call. */
893 && ! modified_between_p (SET_SRC (set), p,
894 regs->array[regno].single_usage)
895 && no_labels_between_p (p,
896 regs->array[regno].single_usage)
897 && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
898 regs->array[regno].single_usage))
900 /* Replace any usage in a REG_EQUAL note. Must copy
901 the new source, so that we don't get rtx sharing
902 between the SET_SOURCE and REG_NOTES of insn p. */
903 REG_NOTES (regs->array[regno].single_usage)
904 = (replace_rtx
905 (REG_NOTES (regs->array[regno].single_usage),
906 SET_DEST (set), copy_rtx (SET_SRC (set))));
908 delete_insn (p);
909 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
910 i++)
911 regs->array[regno+i].set_in_loop = 0;
912 continue;
915 m = (struct movable *) xmalloc (sizeof (struct movable));
916 m->next = 0;
917 m->insn = p;
918 m->set_src = src;
919 m->dependencies = dependencies;
920 m->set_dest = SET_DEST (set);
921 m->force = 0;
922 m->consec
923 = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
924 m->done = 0;
925 m->forces = 0;
926 m->partial = 0;
927 m->move_insn = move_insn;
928 m->move_insn_first = 0;
929 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
930 m->savemode = VOIDmode;
931 m->regno = regno;
932 /* Set M->cond if either loop_invariant_p
933 or consec_sets_invariant_p returned 2
934 (only conditionally invariant). */
935 m->cond = ((tem | tem1 | tem2) > 1);
936 m->global = LOOP_REG_GLOBAL_P (loop, regno);
937 m->match = 0;
938 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
939 m->savings = regs->array[regno].n_times_set;
940 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
941 m->savings += libcall_benefit (p);
942 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
943 regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
944 /* Add M to the end of the chain MOVABLES. */
945 loop_movables_add (movables, m);
947 if (m->consec > 0)
949 /* It is possible for the first instruction to have a
950 REG_EQUAL note but a non-invariant SET_SRC, so we must
951 remember the status of the first instruction in case
952 the last instruction doesn't have a REG_EQUAL note. */
953 m->move_insn_first = m->move_insn;
955 /* Skip this insn, not checking REG_LIBCALL notes. */
956 p = next_nonnote_insn (p);
957 /* Skip the consecutive insns, if there are any. */
958 p = skip_consec_insns (p, m->consec);
959 /* Back up to the last insn of the consecutive group. */
960 p = prev_nonnote_insn (p);
962 /* We must now reset m->move_insn, m->is_equiv, and
963 possibly m->set_src to correspond to the effects of
964 all the insns. */
965 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
966 if (temp)
967 m->set_src = XEXP (temp, 0), m->move_insn = 1;
968 else
970 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
971 if (temp && CONSTANT_P (XEXP (temp, 0)))
972 m->set_src = XEXP (temp, 0), m->move_insn = 1;
973 else
974 m->move_insn = 0;
977 m->is_equiv
978 = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
981 /* If this register is always set within a STRICT_LOW_PART
982 or set to zero, then its high bytes are constant.
983 So clear them outside the loop and within the loop
984 just load the low bytes.
985 We must check that the machine has an instruction to do so.
986 Also, if the value loaded into the register
987 depends on the same register, this cannot be done. */
988 else if (SET_SRC (set) == const0_rtx
989 && GET_CODE (NEXT_INSN (p)) == INSN
990 && (set1 = single_set (NEXT_INSN (p)))
991 && GET_CODE (set1) == SET
992 && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
993 && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
994 && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
995 == SET_DEST (set))
996 && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
998 int regno = REGNO (SET_DEST (set));
999 if (regs->array[regno].set_in_loop == 2)
1001 struct movable *m;
1002 m = (struct movable *) xmalloc (sizeof (struct movable));
1003 m->next = 0;
1004 m->insn = p;
1005 m->set_dest = SET_DEST (set);
1006 m->dependencies = 0;
1007 m->force = 0;
1008 m->consec = 0;
1009 m->done = 0;
1010 m->forces = 0;
1011 m->move_insn = 0;
1012 m->move_insn_first = 0;
1013 m->partial = 1;
1014 /* If the insn may not be executed on some cycles,
1015 we can't clear the whole reg; clear just high part.
1016 Not even if the reg is used only within this loop.
1017 Consider this:
1018 while (1)
1019 while (s != t) {
1020 if (foo ()) x = *s;
1021 use (x);
1023 Clearing x before the inner loop could clobber a value
1024 being saved from the last time around the outer loop.
1025 However, if the reg is not used outside this loop
1026 and all uses of the register are in the same
1027 basic block as the store, there is no problem.
1029 If this insn was made by loop, we don't know its
1030 INSN_LUID and hence must make a conservative
1031 assumption. */
1032 m->global = (INSN_UID (p) >= max_uid_for_loop
1033 || LOOP_REG_GLOBAL_P (loop, regno)
1034 || (labels_in_range_p
1035 (p, REGNO_FIRST_LUID (regno))));
1036 if (maybe_never && m->global)
1037 m->savemode = GET_MODE (SET_SRC (set1));
1038 else
1039 m->savemode = VOIDmode;
1040 m->regno = regno;
1041 m->cond = 0;
1042 m->match = 0;
1043 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1044 m->savings = 1;
1045 for (i = 0;
1046 i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1047 i++)
1048 regs->array[regno+i].set_in_loop = -1;
1049 /* Add M to the end of the chain MOVABLES. */
1050 loop_movables_add (movables, m);
1055 /* Past a call insn, we get to insns which might not be executed
1056 because the call might exit. This matters for insns that trap.
1057 Constant and pure call insns always return, so they don't count. */
1058 else if (GET_CODE (p) == CALL_INSN && ! CONST_OR_PURE_CALL_P (p))
1059 call_passed = 1;
1060 /* Past a label or a jump, we get to insns for which we
1061 can't count on whether or how many times they will be
1062 executed during each iteration. Therefore, we can
1063 only move out sets of trivial variables
1064 (those not used after the loop). */
1065 /* Similar code appears twice in strength_reduce. */
1066 else if ((GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN)
1067 /* If we enter the loop in the middle, and scan around to the
1068 beginning, don't set maybe_never for that. This must be an
1069 unconditional jump, otherwise the code at the top of the
1070 loop might never be executed. Unconditional jumps are
1071 followed by a barrier then the loop_end. */
1072 && ! (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == loop->top
1073 && NEXT_INSN (NEXT_INSN (p)) == loop_end
1074 && any_uncondjump_p (p)))
1075 maybe_never = 1;
1076 else if (GET_CODE (p) == NOTE)
1078 /* At the virtual top of a converted loop, insns are again known to
1079 be executed: logically, the loop begins here even though the exit
1080 code has been duplicated. */
1081 if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP && loop_depth == 0)
1082 maybe_never = call_passed = 0;
1083 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
1084 loop_depth++;
1085 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
1086 loop_depth--;
1090 /* If one movable subsumes another, ignore that other. */
1092 ignore_some_movables (movables);
1094 /* For each movable insn, see if the reg that it loads
1095 leads when it dies right into another conditionally movable insn.
1096 If so, record that the second insn "forces" the first one,
1097 since the second can be moved only if the first is. */
1099 force_movables (movables);
1101 /* See if there are multiple movable insns that load the same value.
1102 If there are, make all but the first point at the first one
1103 through the `match' field, and add the priorities of them
1104 all together as the priority of the first. */
1106 combine_movables (movables, regs);
1108 /* Now consider each movable insn to decide whether it is worth moving.
1109 Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
1111 Generally this increases code size, so do not move moveables when
1112 optimizing for code size. */
1114 if (! optimize_size)
1116 move_movables (loop, movables, threshold, insn_count);
1118 /* Recalculate regs->array if move_movables has created new
1119 registers. */
1120 if (max_reg_num () > regs->num)
1122 loop_regs_scan (loop, 0);
1123 for (update_start = loop_start;
1124 PREV_INSN (update_start)
1125 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1126 update_start = PREV_INSN (update_start))
1128 update_end = NEXT_INSN (loop_end);
1130 reg_scan_update (update_start, update_end, loop_max_reg);
1131 loop_max_reg = max_reg_num ();
1135 /* Now candidates that still are negative are those not moved.
1136 Change regs->array[I].set_in_loop to indicate that those are not actually
1137 invariant. */
1138 for (i = 0; i < regs->num; i++)
1139 if (regs->array[i].set_in_loop < 0)
1140 regs->array[i].set_in_loop = regs->array[i].n_times_set;
1142 /* Now that we've moved some things out of the loop, we might be able to
1143 hoist even more memory references. */
1144 load_mems (loop);
1146 /* Recalculate regs->array if load_mems has created new registers. */
1147 if (max_reg_num () > regs->num)
1148 loop_regs_scan (loop, 0);
1150 for (update_start = loop_start;
1151 PREV_INSN (update_start)
1152 && GET_CODE (PREV_INSN (update_start)) != CODE_LABEL;
1153 update_start = PREV_INSN (update_start))
1155 update_end = NEXT_INSN (loop_end);
1157 reg_scan_update (update_start, update_end, loop_max_reg);
1158 loop_max_reg = max_reg_num ();
1160 if (flag_strength_reduce)
1162 if (update_end && GET_CODE (update_end) == CODE_LABEL)
1163 /* Ensure our label doesn't go away. */
1164 LABEL_NUSES (update_end)++;
1166 strength_reduce (loop, flags);
1168 reg_scan_update (update_start, update_end, loop_max_reg);
1169 loop_max_reg = max_reg_num ();
1171 if (update_end && GET_CODE (update_end) == CODE_LABEL
1172 && --LABEL_NUSES (update_end) == 0)
1173 delete_related_insns (update_end);
1177 /* The movable information is required for strength reduction. */
1178 loop_movables_free (movables);
1180 free (regs->array);
1181 regs->array = 0;
1182 regs->num = 0;
1185 /* Add elements to *OUTPUT to record all the pseudo-regs
1186 mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
1188 void
1189 record_excess_regs (in_this, not_in_this, output)
1190 rtx in_this, not_in_this;
1191 rtx *output;
1193 enum rtx_code code;
1194 const char *fmt;
1195 int i;
1197 code = GET_CODE (in_this);
1199 switch (code)
1201 case PC:
1202 case CC0:
1203 case CONST_INT:
1204 case CONST_DOUBLE:
1205 case CONST:
1206 case SYMBOL_REF:
1207 case LABEL_REF:
1208 return;
1210 case REG:
1211 if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
1212 && ! reg_mentioned_p (in_this, not_in_this))
1213 *output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
1214 return;
1216 default:
1217 break;
1220 fmt = GET_RTX_FORMAT (code);
1221 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1223 int j;
1225 switch (fmt[i])
1227 case 'E':
1228 for (j = 0; j < XVECLEN (in_this, i); j++)
1229 record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
1230 break;
1232 case 'e':
1233 record_excess_regs (XEXP (in_this, i), not_in_this, output);
1234 break;
1239 /* Check what regs are referred to in the libcall block ending with INSN,
1240 aside from those mentioned in the equivalent value.
1241 If there are none, return 0.
1242 If there are one or more, return an EXPR_LIST containing all of them. */
1245 libcall_other_reg (insn, equiv)
1246 rtx insn, equiv;
1248 rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
1249 rtx p = XEXP (note, 0);
1250 rtx output = 0;
1252 /* First, find all the regs used in the libcall block
1253 that are not mentioned as inputs to the result. */
1255 while (p != insn)
1257 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
1258 || GET_CODE (p) == CALL_INSN)
1259 record_excess_regs (PATTERN (p), equiv, &output);
1260 p = NEXT_INSN (p);
1263 return output;
1266 /* Return 1 if all uses of REG
1267 are between INSN and the end of the basic block. */
1269 static int
1270 reg_in_basic_block_p (insn, reg)
1271 rtx insn, reg;
1273 int regno = REGNO (reg);
1274 rtx p;
1276 if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
1277 return 0;
1279 /* Search this basic block for the already recorded last use of the reg. */
1280 for (p = insn; p; p = NEXT_INSN (p))
1282 switch (GET_CODE (p))
1284 case NOTE:
1285 break;
1287 case INSN:
1288 case CALL_INSN:
1289 /* Ordinary insn: if this is the last use, we win. */
1290 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1291 return 1;
1292 break;
1294 case JUMP_INSN:
1295 /* Jump insn: if this is the last use, we win. */
1296 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1297 return 1;
1298 /* Otherwise, it's the end of the basic block, so we lose. */
1299 return 0;
1301 case CODE_LABEL:
1302 case BARRIER:
1303 /* It's the end of the basic block, so we lose. */
1304 return 0;
1306 default:
1307 break;
1311 /* The "last use" that was recorded can't be found after the first
1312 use. This can happen when the last use was deleted while
1313 processing an inner loop, this inner loop was then completely
1314 unrolled, and the outer loop is always exited after the inner loop,
1315 so that everything after the first use becomes a single basic block. */
1316 return 1;
1319 /* Compute the benefit of eliminating the insns in the block whose
1320 last insn is LAST. This may be a group of insns used to compute a
1321 value directly or can contain a library call. */
1323 static int
1324 libcall_benefit (last)
1325 rtx last;
1327 rtx insn;
1328 int benefit = 0;
1330 for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
1331 insn != last; insn = NEXT_INSN (insn))
1333 if (GET_CODE (insn) == CALL_INSN)
1334 benefit += 10; /* Assume at least this many insns in a library
1335 routine. */
1336 else if (GET_CODE (insn) == INSN
1337 && GET_CODE (PATTERN (insn)) != USE
1338 && GET_CODE (PATTERN (insn)) != CLOBBER)
1339 benefit++;
1342 return benefit;
1345 /* Skip COUNT insns from INSN, counting library calls as 1 insn. */
1347 static rtx
1348 skip_consec_insns (insn, count)
1349 rtx insn;
1350 int count;
1352 for (; count > 0; count--)
1354 rtx temp;
1356 /* If first insn of libcall sequence, skip to end. */
1357 /* Do this at start of loop, since INSN is guaranteed to
1358 be an insn here. */
1359 if (GET_CODE (insn) != NOTE
1360 && (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
1361 insn = XEXP (temp, 0);
1364 insn = NEXT_INSN (insn);
1365 while (GET_CODE (insn) == NOTE);
1368 return insn;
1371 /* Ignore any movable whose insn falls within a libcall
1372 which is part of another movable.
1373 We make use of the fact that the movable for the libcall value
1374 was made later and so appears later on the chain. */
1376 static void
1377 ignore_some_movables (movables)
1378 struct loop_movables *movables;
1380 struct movable *m, *m1;
1382 for (m = movables->head; m; m = m->next)
1384 /* Is this a movable for the value of a libcall? */
1385 rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
1386 if (note)
1388 rtx insn;
1389 /* Check for earlier movables inside that range,
1390 and mark them invalid. We cannot use LUIDs here because
1391 insns created by loop.c for prior loops don't have LUIDs.
1392 Rather than reject all such insns from movables, we just
1393 explicitly check each insn in the libcall (since invariant
1394 libcalls aren't that common). */
1395 for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
1396 for (m1 = movables->head; m1 != m; m1 = m1->next)
1397 if (m1->insn == insn)
1398 m1->done = 1;
1403 /* For each movable insn, see if the reg that it loads
1404 leads when it dies right into another conditionally movable insn.
1405 If so, record that the second insn "forces" the first one,
1406 since the second can be moved only if the first is. */
1408 static void
1409 force_movables (movables)
1410 struct loop_movables *movables;
1412 struct movable *m, *m1;
1414 for (m1 = movables->head; m1; m1 = m1->next)
1415 /* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
1416 if (!m1->partial && !m1->done)
1418 int regno = m1->regno;
1419 for (m = m1->next; m; m = m->next)
1420 /* ??? Could this be a bug? What if CSE caused the
1421 register of M1 to be used after this insn?
1422 Since CSE does not update regno_last_uid,
1423 this insn M->insn might not be where it dies.
1424 But very likely this doesn't matter; what matters is
1425 that M's reg is computed from M1's reg. */
1426 if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
1427 && !m->done)
1428 break;
1429 if (m != 0 && m->set_src == m1->set_dest
1430 /* If m->consec, m->set_src isn't valid. */
1431 && m->consec == 0)
1432 m = 0;
1434 /* Increase the priority of the moving the first insn
1435 since it permits the second to be moved as well. */
1436 if (m != 0)
1438 m->forces = m1;
1439 m1->lifetime += m->lifetime;
1440 m1->savings += m->savings;
1445 /* Find invariant expressions that are equal and can be combined into
1446 one register. */
1448 static void
1449 combine_movables (movables, regs)
1450 struct loop_movables *movables;
1451 struct loop_regs *regs;
1453 struct movable *m;
1454 char *matched_regs = (char *) xmalloc (regs->num);
1455 enum machine_mode mode;
1457 /* Regs that are set more than once are not allowed to match
1458 or be matched. I'm no longer sure why not. */
1459 /* Only pseudo registers are allowed to match or be matched,
1460 since move_movables does not validate the change. */
1461 /* Perhaps testing m->consec_sets would be more appropriate here? */
1463 for (m = movables->head; m; m = m->next)
1464 if (m->match == 0 && regs->array[m->regno].n_times_set == 1
1465 && m->regno >= FIRST_PSEUDO_REGISTER
1466 && !m->partial)
1468 struct movable *m1;
1469 int regno = m->regno;
1471 memset (matched_regs, 0, regs->num);
1472 matched_regs[regno] = 1;
1474 /* We want later insns to match the first one. Don't make the first
1475 one match any later ones. So start this loop at m->next. */
1476 for (m1 = m->next; m1; m1 = m1->next)
1477 if (m != m1 && m1->match == 0
1478 && regs->array[m1->regno].n_times_set == 1
1479 && m1->regno >= FIRST_PSEUDO_REGISTER
1480 /* A reg used outside the loop mustn't be eliminated. */
1481 && !m1->global
1482 /* A reg used for zero-extending mustn't be eliminated. */
1483 && !m1->partial
1484 && (matched_regs[m1->regno]
1487 /* Can combine regs with different modes loaded from the
1488 same constant only if the modes are the same or
1489 if both are integer modes with M wider or the same
1490 width as M1. The check for integer is redundant, but
1491 safe, since the only case of differing destination
1492 modes with equal sources is when both sources are
1493 VOIDmode, i.e., CONST_INT. */
1494 (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
1495 || (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT
1496 && GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT
1497 && (GET_MODE_BITSIZE (GET_MODE (m->set_dest))
1498 >= GET_MODE_BITSIZE (GET_MODE (m1->set_dest)))))
1499 /* See if the source of M1 says it matches M. */
1500 && ((GET_CODE (m1->set_src) == REG
1501 && matched_regs[REGNO (m1->set_src)])
1502 || rtx_equal_for_loop_p (m->set_src, m1->set_src,
1503 movables, regs))))
1504 && ((m->dependencies == m1->dependencies)
1505 || rtx_equal_p (m->dependencies, m1->dependencies)))
1507 m->lifetime += m1->lifetime;
1508 m->savings += m1->savings;
1509 m1->done = 1;
1510 m1->match = m;
1511 matched_regs[m1->regno] = 1;
1515 /* Now combine the regs used for zero-extension.
1516 This can be done for those not marked `global'
1517 provided their lives don't overlap. */
1519 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1520 mode = GET_MODE_WIDER_MODE (mode))
1522 struct movable *m0 = 0;
1524 /* Combine all the registers for extension from mode MODE.
1525 Don't combine any that are used outside this loop. */
1526 for (m = movables->head; m; m = m->next)
1527 if (m->partial && ! m->global
1528 && mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
1530 struct movable *m1;
1532 int first = REGNO_FIRST_LUID (m->regno);
1533 int last = REGNO_LAST_LUID (m->regno);
1535 if (m0 == 0)
1537 /* First one: don't check for overlap, just record it. */
1538 m0 = m;
1539 continue;
1542 /* Make sure they extend to the same mode.
1543 (Almost always true.) */
1544 if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
1545 continue;
1547 /* We already have one: check for overlap with those
1548 already combined together. */
1549 for (m1 = movables->head; m1 != m; m1 = m1->next)
1550 if (m1 == m0 || (m1->partial && m1->match == m0))
1551 if (! (REGNO_FIRST_LUID (m1->regno) > last
1552 || REGNO_LAST_LUID (m1->regno) < first))
1553 goto overlap;
1555 /* No overlap: we can combine this with the others. */
1556 m0->lifetime += m->lifetime;
1557 m0->savings += m->savings;
1558 m->done = 1;
1559 m->match = m0;
1561 overlap:
1566 /* Clean up. */
1567 free (matched_regs);
1570 /* Returns the number of movable instructions in LOOP that were not
1571 moved outside the loop. */
1573 static int
1574 num_unmoved_movables (loop)
1575 const struct loop *loop;
1577 int num = 0;
1578 struct movable *m;
1580 for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
1581 if (!m->done)
1582 ++num;
1584 return num;
1588 /* Return 1 if regs X and Y will become the same if moved. */
1590 static int
1591 regs_match_p (x, y, movables)
1592 rtx x, y;
1593 struct loop_movables *movables;
1595 unsigned int xn = REGNO (x);
1596 unsigned int yn = REGNO (y);
1597 struct movable *mx, *my;
1599 for (mx = movables->head; mx; mx = mx->next)
1600 if (mx->regno == xn)
1601 break;
1603 for (my = movables->head; my; my = my->next)
1604 if (my->regno == yn)
1605 break;
1607 return (mx && my
1608 && ((mx->match == my->match && mx->match != 0)
1609 || mx->match == my
1610 || mx == my->match));
1613 /* Return 1 if X and Y are identical-looking rtx's.
1614 This is the Lisp function EQUAL for rtx arguments.
1616 If two registers are matching movables or a movable register and an
1617 equivalent constant, consider them equal. */
1619 static int
1620 rtx_equal_for_loop_p (x, y, movables, regs)
1621 rtx x, y;
1622 struct loop_movables *movables;
1623 struct loop_regs *regs;
1625 int i;
1626 int j;
1627 struct movable *m;
1628 enum rtx_code code;
1629 const char *fmt;
1631 if (x == y)
1632 return 1;
1633 if (x == 0 || y == 0)
1634 return 0;
1636 code = GET_CODE (x);
1638 /* If we have a register and a constant, they may sometimes be
1639 equal. */
1640 if (GET_CODE (x) == REG && regs->array[REGNO (x)].set_in_loop == -2
1641 && CONSTANT_P (y))
1643 for (m = movables->head; m; m = m->next)
1644 if (m->move_insn && m->regno == REGNO (x)
1645 && rtx_equal_p (m->set_src, y))
1646 return 1;
1648 else if (GET_CODE (y) == REG && regs->array[REGNO (y)].set_in_loop == -2
1649 && CONSTANT_P (x))
1651 for (m = movables->head; m; m = m->next)
1652 if (m->move_insn && m->regno == REGNO (y)
1653 && rtx_equal_p (m->set_src, x))
1654 return 1;
1657 /* Otherwise, rtx's of different codes cannot be equal. */
1658 if (code != GET_CODE (y))
1659 return 0;
1661 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
1662 (REG:SI x) and (REG:HI x) are NOT equivalent. */
1664 if (GET_MODE (x) != GET_MODE (y))
1665 return 0;
1667 /* These three types of rtx's can be compared nonrecursively. */
1668 if (code == REG)
1669 return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
1671 if (code == LABEL_REF)
1672 return XEXP (x, 0) == XEXP (y, 0);
1673 if (code == SYMBOL_REF)
1674 return XSTR (x, 0) == XSTR (y, 0);
1676 /* Compare the elements. If any pair of corresponding elements
1677 fail to match, return 0 for the whole things. */
1679 fmt = GET_RTX_FORMAT (code);
1680 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1682 switch (fmt[i])
1684 case 'w':
1685 if (XWINT (x, i) != XWINT (y, i))
1686 return 0;
1687 break;
1689 case 'i':
1690 if (XINT (x, i) != XINT (y, i))
1691 return 0;
1692 break;
1694 case 'E':
1695 /* Two vectors must have the same length. */
1696 if (XVECLEN (x, i) != XVECLEN (y, i))
1697 return 0;
1699 /* And the corresponding elements must match. */
1700 for (j = 0; j < XVECLEN (x, i); j++)
1701 if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
1702 movables, regs) == 0)
1703 return 0;
1704 break;
1706 case 'e':
1707 if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
1708 == 0)
1709 return 0;
1710 break;
1712 case 's':
1713 if (strcmp (XSTR (x, i), XSTR (y, i)))
1714 return 0;
1715 break;
1717 case 'u':
1718 /* These are just backpointers, so they don't matter. */
1719 break;
1721 case '0':
1722 break;
1724 /* It is believed that rtx's at this level will never
1725 contain anything but integers and other rtx's,
1726 except for within LABEL_REFs and SYMBOL_REFs. */
1727 default:
1728 abort ();
1731 return 1;
1734 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
1735 insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
1736 references is incremented once for each added note. */
1738 static void
1739 add_label_notes (x, insns)
1740 rtx x;
1741 rtx insns;
1743 enum rtx_code code = GET_CODE (x);
1744 int i, j;
1745 const char *fmt;
1746 rtx insn;
1748 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
1750 /* This code used to ignore labels that referred to dispatch tables to
1751 avoid flow generating (slighly) worse code.
1753 We no longer ignore such label references (see LABEL_REF handling in
1754 mark_jump_label for additional information). */
1755 for (insn = insns; insn; insn = NEXT_INSN (insn))
1756 if (reg_mentioned_p (XEXP (x, 0), insn))
1758 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
1759 REG_NOTES (insn));
1760 if (LABEL_P (XEXP (x, 0)))
1761 LABEL_NUSES (XEXP (x, 0))++;
1765 fmt = GET_RTX_FORMAT (code);
1766 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1768 if (fmt[i] == 'e')
1769 add_label_notes (XEXP (x, i), insns);
1770 else if (fmt[i] == 'E')
1771 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
1772 add_label_notes (XVECEXP (x, i, j), insns);
1776 /* Scan MOVABLES, and move the insns that deserve to be moved.
1777 If two matching movables are combined, replace one reg with the
1778 other throughout. */
1780 static void
1781 move_movables (loop, movables, threshold, insn_count)
1782 struct loop *loop;
1783 struct loop_movables *movables;
1784 int threshold;
1785 int insn_count;
1787 struct loop_regs *regs = LOOP_REGS (loop);
1788 int nregs = regs->num;
1789 rtx new_start = 0;
1790 struct movable *m;
1791 rtx p;
1792 rtx loop_start = loop->start;
1793 rtx loop_end = loop->end;
1794 /* Map of pseudo-register replacements to handle combining
1795 when we move several insns that load the same value
1796 into different pseudo-registers. */
1797 rtx *reg_map = (rtx *) xcalloc (nregs, sizeof (rtx));
1798 char *already_moved = (char *) xcalloc (nregs, sizeof (char));
1800 for (m = movables->head; m; m = m->next)
1802 /* Describe this movable insn. */
1804 if (loop_dump_stream)
1806 fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
1807 INSN_UID (m->insn), m->regno, m->lifetime);
1808 if (m->consec > 0)
1809 fprintf (loop_dump_stream, "consec %d, ", m->consec);
1810 if (m->cond)
1811 fprintf (loop_dump_stream, "cond ");
1812 if (m->force)
1813 fprintf (loop_dump_stream, "force ");
1814 if (m->global)
1815 fprintf (loop_dump_stream, "global ");
1816 if (m->done)
1817 fprintf (loop_dump_stream, "done ");
1818 if (m->move_insn)
1819 fprintf (loop_dump_stream, "move-insn ");
1820 if (m->match)
1821 fprintf (loop_dump_stream, "matches %d ",
1822 INSN_UID (m->match->insn));
1823 if (m->forces)
1824 fprintf (loop_dump_stream, "forces %d ",
1825 INSN_UID (m->forces->insn));
1828 /* Ignore the insn if it's already done (it matched something else).
1829 Otherwise, see if it is now safe to move. */
1831 if (!m->done
1832 && (! m->cond
1833 || (1 == loop_invariant_p (loop, m->set_src)
1834 && (m->dependencies == 0
1835 || 1 == loop_invariant_p (loop, m->dependencies))
1836 && (m->consec == 0
1837 || 1 == consec_sets_invariant_p (loop, m->set_dest,
1838 m->consec + 1,
1839 m->insn))))
1840 && (! m->forces || m->forces->done))
1842 int regno;
1843 rtx p;
1844 int savings = m->savings;
1846 /* We have an insn that is safe to move.
1847 Compute its desirability. */
1849 p = m->insn;
1850 regno = m->regno;
1852 if (loop_dump_stream)
1853 fprintf (loop_dump_stream, "savings %d ", savings);
1855 if (regs->array[regno].moved_once && loop_dump_stream)
1856 fprintf (loop_dump_stream, "halved since already moved ");
1858 /* An insn MUST be moved if we already moved something else
1859 which is safe only if this one is moved too: that is,
1860 if already_moved[REGNO] is nonzero. */
1862 /* An insn is desirable to move if the new lifetime of the
1863 register is no more than THRESHOLD times the old lifetime.
1864 If it's not desirable, it means the loop is so big
1865 that moving won't speed things up much,
1866 and it is liable to make register usage worse. */
1868 /* It is also desirable to move if it can be moved at no
1869 extra cost because something else was already moved. */
1871 if (already_moved[regno]
1872 || flag_move_all_movables
1873 || (threshold * savings * m->lifetime) >=
1874 (regs->array[regno].moved_once ? insn_count * 2 : insn_count)
1875 || (m->forces && m->forces->done
1876 && regs->array[m->forces->regno].n_times_set == 1))
1878 int count;
1879 struct movable *m1;
1880 rtx first = NULL_RTX;
1882 /* Now move the insns that set the reg. */
1884 if (m->partial && m->match)
1886 rtx newpat, i1;
1887 rtx r1, r2;
1888 /* Find the end of this chain of matching regs.
1889 Thus, we load each reg in the chain from that one reg.
1890 And that reg is loaded with 0 directly,
1891 since it has ->match == 0. */
1892 for (m1 = m; m1->match; m1 = m1->match);
1893 newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
1894 SET_DEST (PATTERN (m1->insn)));
1895 i1 = loop_insn_hoist (loop, newpat);
1897 /* Mark the moved, invariant reg as being allowed to
1898 share a hard reg with the other matching invariant. */
1899 REG_NOTES (i1) = REG_NOTES (m->insn);
1900 r1 = SET_DEST (PATTERN (m->insn));
1901 r2 = SET_DEST (PATTERN (m1->insn));
1902 regs_may_share
1903 = gen_rtx_EXPR_LIST (VOIDmode, r1,
1904 gen_rtx_EXPR_LIST (VOIDmode, r2,
1905 regs_may_share));
1906 delete_insn (m->insn);
1908 if (new_start == 0)
1909 new_start = i1;
1911 if (loop_dump_stream)
1912 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
1914 /* If we are to re-generate the item being moved with a
1915 new move insn, first delete what we have and then emit
1916 the move insn before the loop. */
1917 else if (m->move_insn)
1919 rtx i1, temp, seq;
1921 for (count = m->consec; count >= 0; count--)
1923 /* If this is the first insn of a library call sequence,
1924 something is very wrong. */
1925 if (GET_CODE (p) != NOTE
1926 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
1927 abort ();
1929 /* If this is the last insn of a libcall sequence, then
1930 delete every insn in the sequence except the last.
1931 The last insn is handled in the normal manner. */
1932 if (GET_CODE (p) != NOTE
1933 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
1935 temp = XEXP (temp, 0);
1936 while (temp != p)
1937 temp = delete_insn (temp);
1940 temp = p;
1941 p = delete_insn (p);
1943 /* simplify_giv_expr expects that it can walk the insns
1944 at m->insn forwards and see this old sequence we are
1945 tossing here. delete_insn does preserve the next
1946 pointers, but when we skip over a NOTE we must fix
1947 it up. Otherwise that code walks into the non-deleted
1948 insn stream. */
1949 while (p && GET_CODE (p) == NOTE)
1950 p = NEXT_INSN (temp) = NEXT_INSN (p);
1953 start_sequence ();
1954 emit_move_insn (m->set_dest, m->set_src);
1955 seq = get_insns ();
1956 end_sequence ();
1958 add_label_notes (m->set_src, seq);
1960 i1 = loop_insn_hoist (loop, seq);
1961 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
1962 set_unique_reg_note (i1,
1963 m->is_equiv ? REG_EQUIV : REG_EQUAL,
1964 m->set_src);
1966 if (loop_dump_stream)
1967 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
1969 /* The more regs we move, the less we like moving them. */
1970 threshold -= 3;
1972 else
1974 for (count = m->consec; count >= 0; count--)
1976 rtx i1, temp;
1978 /* If first insn of libcall sequence, skip to end. */
1979 /* Do this at start of loop, since p is guaranteed to
1980 be an insn here. */
1981 if (GET_CODE (p) != NOTE
1982 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
1983 p = XEXP (temp, 0);
1985 /* If last insn of libcall sequence, move all
1986 insns except the last before the loop. The last
1987 insn is handled in the normal manner. */
1988 if (GET_CODE (p) != NOTE
1989 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
1991 rtx fn_address = 0;
1992 rtx fn_reg = 0;
1993 rtx fn_address_insn = 0;
1995 first = 0;
1996 for (temp = XEXP (temp, 0); temp != p;
1997 temp = NEXT_INSN (temp))
1999 rtx body;
2000 rtx n;
2001 rtx next;
2003 if (GET_CODE (temp) == NOTE)
2004 continue;
2006 body = PATTERN (temp);
2008 /* Find the next insn after TEMP,
2009 not counting USE or NOTE insns. */
2010 for (next = NEXT_INSN (temp); next != p;
2011 next = NEXT_INSN (next))
2012 if (! (GET_CODE (next) == INSN
2013 && GET_CODE (PATTERN (next)) == USE)
2014 && GET_CODE (next) != NOTE)
2015 break;
2017 /* If that is the call, this may be the insn
2018 that loads the function address.
2020 Extract the function address from the insn
2021 that loads it into a register.
2022 If this insn was cse'd, we get incorrect code.
2024 So emit a new move insn that copies the
2025 function address into the register that the
2026 call insn will use. flow.c will delete any
2027 redundant stores that we have created. */
2028 if (GET_CODE (next) == CALL_INSN
2029 && GET_CODE (body) == SET
2030 && GET_CODE (SET_DEST (body)) == REG
2031 && (n = find_reg_note (temp, REG_EQUAL,
2032 NULL_RTX)))
2034 fn_reg = SET_SRC (body);
2035 if (GET_CODE (fn_reg) != REG)
2036 fn_reg = SET_DEST (body);
2037 fn_address = XEXP (n, 0);
2038 fn_address_insn = temp;
2040 /* We have the call insn.
2041 If it uses the register we suspect it might,
2042 load it with the correct address directly. */
2043 if (GET_CODE (temp) == CALL_INSN
2044 && fn_address != 0
2045 && reg_referenced_p (fn_reg, body))
2046 loop_insn_emit_after (loop, 0, fn_address_insn,
2047 gen_move_insn
2048 (fn_reg, fn_address));
2050 if (GET_CODE (temp) == CALL_INSN)
2052 i1 = loop_call_insn_hoist (loop, body);
2053 /* Because the USAGE information potentially
2054 contains objects other than hard registers
2055 we need to copy it. */
2056 if (CALL_INSN_FUNCTION_USAGE (temp))
2057 CALL_INSN_FUNCTION_USAGE (i1)
2058 = copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
2060 else
2061 i1 = loop_insn_hoist (loop, body);
2062 if (first == 0)
2063 first = i1;
2064 if (temp == fn_address_insn)
2065 fn_address_insn = i1;
2066 REG_NOTES (i1) = REG_NOTES (temp);
2067 REG_NOTES (temp) = NULL;
2068 delete_insn (temp);
2070 if (new_start == 0)
2071 new_start = first;
2073 if (m->savemode != VOIDmode)
2075 /* P sets REG to zero; but we should clear only
2076 the bits that are not covered by the mode
2077 m->savemode. */
2078 rtx reg = m->set_dest;
2079 rtx sequence;
2080 rtx tem;
2082 start_sequence ();
2083 tem = expand_simple_binop
2084 (GET_MODE (reg), AND, reg,
2085 GEN_INT ((((HOST_WIDE_INT) 1
2086 << GET_MODE_BITSIZE (m->savemode)))
2087 - 1),
2088 reg, 1, OPTAB_LIB_WIDEN);
2089 if (tem == 0)
2090 abort ();
2091 if (tem != reg)
2092 emit_move_insn (reg, tem);
2093 sequence = get_insns ();
2094 end_sequence ();
2095 i1 = loop_insn_hoist (loop, sequence);
2097 else if (GET_CODE (p) == CALL_INSN)
2099 i1 = loop_call_insn_hoist (loop, PATTERN (p));
2100 /* Because the USAGE information potentially
2101 contains objects other than hard registers
2102 we need to copy it. */
2103 if (CALL_INSN_FUNCTION_USAGE (p))
2104 CALL_INSN_FUNCTION_USAGE (i1)
2105 = copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
2107 else if (count == m->consec && m->move_insn_first)
2109 rtx seq;
2110 /* The SET_SRC might not be invariant, so we must
2111 use the REG_EQUAL note. */
2112 start_sequence ();
2113 emit_move_insn (m->set_dest, m->set_src);
2114 seq = get_insns ();
2115 end_sequence ();
2117 add_label_notes (m->set_src, seq);
2119 i1 = loop_insn_hoist (loop, seq);
2120 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2121 set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
2122 : REG_EQUAL, m->set_src);
2124 else
2125 i1 = loop_insn_hoist (loop, PATTERN (p));
2127 if (REG_NOTES (i1) == 0)
2129 REG_NOTES (i1) = REG_NOTES (p);
2130 REG_NOTES (p) = NULL;
2132 /* If there is a REG_EQUAL note present whose value
2133 is not loop invariant, then delete it, since it
2134 may cause problems with later optimization passes.
2135 It is possible for cse to create such notes
2136 like this as a result of record_jump_cond. */
2138 if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
2139 && ! loop_invariant_p (loop, XEXP (temp, 0)))
2140 remove_note (i1, temp);
2143 if (new_start == 0)
2144 new_start = i1;
2146 if (loop_dump_stream)
2147 fprintf (loop_dump_stream, " moved to %d",
2148 INSN_UID (i1));
2150 /* If library call, now fix the REG_NOTES that contain
2151 insn pointers, namely REG_LIBCALL on FIRST
2152 and REG_RETVAL on I1. */
2153 if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
2155 XEXP (temp, 0) = first;
2156 temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
2157 XEXP (temp, 0) = i1;
2160 temp = p;
2161 delete_insn (p);
2162 p = NEXT_INSN (p);
2164 /* simplify_giv_expr expects that it can walk the insns
2165 at m->insn forwards and see this old sequence we are
2166 tossing here. delete_insn does preserve the next
2167 pointers, but when we skip over a NOTE we must fix
2168 it up. Otherwise that code walks into the non-deleted
2169 insn stream. */
2170 while (p && GET_CODE (p) == NOTE)
2171 p = NEXT_INSN (temp) = NEXT_INSN (p);
2174 /* The more regs we move, the less we like moving them. */
2175 threshold -= 3;
2178 /* Any other movable that loads the same register
2179 MUST be moved. */
2180 already_moved[regno] = 1;
2182 /* This reg has been moved out of one loop. */
2183 regs->array[regno].moved_once = 1;
2185 /* The reg set here is now invariant. */
2186 if (! m->partial)
2188 int i;
2189 for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
2190 regs->array[regno+i].set_in_loop = 0;
2193 m->done = 1;
2195 /* Change the length-of-life info for the register
2196 to say it lives at least the full length of this loop.
2197 This will help guide optimizations in outer loops. */
2199 if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
2200 /* This is the old insn before all the moved insns.
2201 We can't use the moved insn because it is out of range
2202 in uid_luid. Only the old insns have luids. */
2203 REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
2204 if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
2205 REGNO_LAST_UID (regno) = INSN_UID (loop_end);
2207 /* Combine with this moved insn any other matching movables. */
2209 if (! m->partial)
2210 for (m1 = movables->head; m1; m1 = m1->next)
2211 if (m1->match == m)
2213 rtx temp;
2215 /* Schedule the reg loaded by M1
2216 for replacement so that shares the reg of M.
2217 If the modes differ (only possible in restricted
2218 circumstances, make a SUBREG.
2220 Note this assumes that the target dependent files
2221 treat REG and SUBREG equally, including within
2222 GO_IF_LEGITIMATE_ADDRESS and in all the
2223 predicates since we never verify that replacing the
2224 original register with a SUBREG results in a
2225 recognizable insn. */
2226 if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest))
2227 reg_map[m1->regno] = m->set_dest;
2228 else
2229 reg_map[m1->regno]
2230 = gen_lowpart_common (GET_MODE (m1->set_dest),
2231 m->set_dest);
2233 /* Get rid of the matching insn
2234 and prevent further processing of it. */
2235 m1->done = 1;
2237 /* if library call, delete all insns. */
2238 if ((temp = find_reg_note (m1->insn, REG_RETVAL,
2239 NULL_RTX)))
2240 delete_insn_chain (XEXP (temp, 0), m1->insn);
2241 else
2242 delete_insn (m1->insn);
2244 /* Any other movable that loads the same register
2245 MUST be moved. */
2246 already_moved[m1->regno] = 1;
2248 /* The reg merged here is now invariant,
2249 if the reg it matches is invariant. */
2250 if (! m->partial)
2252 int i;
2253 for (i = 0;
2254 i < LOOP_REGNO_NREGS (regno, m1->set_dest);
2255 i++)
2256 regs->array[m1->regno+i].set_in_loop = 0;
2260 else if (loop_dump_stream)
2261 fprintf (loop_dump_stream, "not desirable");
2263 else if (loop_dump_stream && !m->match)
2264 fprintf (loop_dump_stream, "not safe");
2266 if (loop_dump_stream)
2267 fprintf (loop_dump_stream, "\n");
2270 if (new_start == 0)
2271 new_start = loop_start;
2273 /* Go through all the instructions in the loop, making
2274 all the register substitutions scheduled in REG_MAP. */
2275 for (p = new_start; p != loop_end; p = NEXT_INSN (p))
2276 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
2277 || GET_CODE (p) == CALL_INSN)
2279 replace_regs (PATTERN (p), reg_map, nregs, 0);
2280 replace_regs (REG_NOTES (p), reg_map, nregs, 0);
2281 INSN_CODE (p) = -1;
2284 /* Clean up. */
2285 free (reg_map);
2286 free (already_moved);
2290 static void
2291 loop_movables_add (movables, m)
2292 struct loop_movables *movables;
2293 struct movable *m;
2295 if (movables->head == 0)
2296 movables->head = m;
2297 else
2298 movables->last->next = m;
2299 movables->last = m;
2303 static void
2304 loop_movables_free (movables)
2305 struct loop_movables *movables;
2307 struct movable *m;
2308 struct movable *m_next;
2310 for (m = movables->head; m; m = m_next)
2312 m_next = m->next;
2313 free (m);
2317 #if 0
2318 /* Scan X and replace the address of any MEM in it with ADDR.
2319 REG is the address that MEM should have before the replacement. */
2321 static void
2322 replace_call_address (x, reg, addr)
2323 rtx x, reg, addr;
2325 enum rtx_code code;
2326 int i;
2327 const char *fmt;
2329 if (x == 0)
2330 return;
2331 code = GET_CODE (x);
2332 switch (code)
2334 case PC:
2335 case CC0:
2336 case CONST_INT:
2337 case CONST_DOUBLE:
2338 case CONST:
2339 case SYMBOL_REF:
2340 case LABEL_REF:
2341 case REG:
2342 return;
2344 case SET:
2345 /* Short cut for very common case. */
2346 replace_call_address (XEXP (x, 1), reg, addr);
2347 return;
2349 case CALL:
2350 /* Short cut for very common case. */
2351 replace_call_address (XEXP (x, 0), reg, addr);
2352 return;
2354 case MEM:
2355 /* If this MEM uses a reg other than the one we expected,
2356 something is wrong. */
2357 if (XEXP (x, 0) != reg)
2358 abort ();
2359 XEXP (x, 0) = addr;
2360 return;
2362 default:
2363 break;
2366 fmt = GET_RTX_FORMAT (code);
2367 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2369 if (fmt[i] == 'e')
2370 replace_call_address (XEXP (x, i), reg, addr);
2371 else if (fmt[i] == 'E')
2373 int j;
2374 for (j = 0; j < XVECLEN (x, i); j++)
2375 replace_call_address (XVECEXP (x, i, j), reg, addr);
2379 #endif
2381 /* Return the number of memory refs to addresses that vary
2382 in the rtx X. */
2384 static int
2385 count_nonfixed_reads (loop, x)
2386 const struct loop *loop;
2387 rtx x;
2389 enum rtx_code code;
2390 int i;
2391 const char *fmt;
2392 int value;
2394 if (x == 0)
2395 return 0;
2397 code = GET_CODE (x);
2398 switch (code)
2400 case PC:
2401 case CC0:
2402 case CONST_INT:
2403 case CONST_DOUBLE:
2404 case CONST:
2405 case SYMBOL_REF:
2406 case LABEL_REF:
2407 case REG:
2408 return 0;
2410 case MEM:
2411 return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
2412 + count_nonfixed_reads (loop, XEXP (x, 0)));
2414 default:
2415 break;
2418 value = 0;
2419 fmt = GET_RTX_FORMAT (code);
2420 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2422 if (fmt[i] == 'e')
2423 value += count_nonfixed_reads (loop, XEXP (x, i));
2424 if (fmt[i] == 'E')
2426 int j;
2427 for (j = 0; j < XVECLEN (x, i); j++)
2428 value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
2431 return value;
2434 /* Scan a loop setting the elements `cont', `vtop', `loops_enclosed',
2435 `has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
2436 `unknown_address_altered', `unknown_constant_address_altered', and
2437 `num_mem_sets' in LOOP. Also, fill in the array `mems' and the
2438 list `store_mems' in LOOP. */
2440 static void
2441 prescan_loop (loop)
2442 struct loop *loop;
2444 int level = 1;
2445 rtx insn;
2446 struct loop_info *loop_info = LOOP_INFO (loop);
2447 rtx start = loop->start;
2448 rtx end = loop->end;
2449 /* The label after END. Jumping here is just like falling off the
2450 end of the loop. We use next_nonnote_insn instead of next_label
2451 as a hedge against the (pathological) case where some actual insn
2452 might end up between the two. */
2453 rtx exit_target = next_nonnote_insn (end);
2455 loop_info->has_indirect_jump = indirect_jump_in_function;
2456 loop_info->pre_header_has_call = 0;
2457 loop_info->has_call = 0;
2458 loop_info->has_nonconst_call = 0;
2459 loop_info->has_prefetch = 0;
2460 loop_info->has_volatile = 0;
2461 loop_info->has_tablejump = 0;
2462 loop_info->has_multiple_exit_targets = 0;
2463 loop->level = 1;
2465 loop_info->unknown_address_altered = 0;
2466 loop_info->unknown_constant_address_altered = 0;
2467 loop_info->store_mems = NULL_RTX;
2468 loop_info->first_loop_store_insn = NULL_RTX;
2469 loop_info->mems_idx = 0;
2470 loop_info->num_mem_sets = 0;
2471 /* If loop opts run twice, this was set on 1st pass for 2nd. */
2472 loop_info->preconditioned = NOTE_PRECONDITIONED (end);
2474 for (insn = start; insn && GET_CODE (insn) != CODE_LABEL;
2475 insn = PREV_INSN (insn))
2477 if (GET_CODE (insn) == CALL_INSN)
2479 loop_info->pre_header_has_call = 1;
2480 break;
2484 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2485 insn = NEXT_INSN (insn))
2487 switch (GET_CODE (insn))
2489 case NOTE:
2490 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
2492 ++level;
2493 /* Count number of loops contained in this one. */
2494 loop->level++;
2496 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
2497 --level;
2498 break;
2500 case CALL_INSN:
2501 if (! CONST_OR_PURE_CALL_P (insn))
2503 loop_info->unknown_address_altered = 1;
2504 loop_info->has_nonconst_call = 1;
2506 else if (pure_call_p (insn))
2507 loop_info->has_nonconst_call = 1;
2508 loop_info->has_call = 1;
2509 if (can_throw_internal (insn))
2510 loop_info->has_multiple_exit_targets = 1;
2511 break;
2513 case JUMP_INSN:
2514 if (! loop_info->has_multiple_exit_targets)
2516 rtx set = pc_set (insn);
2518 if (set)
2520 rtx src = SET_SRC (set);
2521 rtx label1, label2;
2523 if (GET_CODE (src) == IF_THEN_ELSE)
2525 label1 = XEXP (src, 1);
2526 label2 = XEXP (src, 2);
2528 else
2530 label1 = src;
2531 label2 = NULL_RTX;
2536 if (label1 && label1 != pc_rtx)
2538 if (GET_CODE (label1) != LABEL_REF)
2540 /* Something tricky. */
2541 loop_info->has_multiple_exit_targets = 1;
2542 break;
2544 else if (XEXP (label1, 0) != exit_target
2545 && LABEL_OUTSIDE_LOOP_P (label1))
2547 /* A jump outside the current loop. */
2548 loop_info->has_multiple_exit_targets = 1;
2549 break;
2553 label1 = label2;
2554 label2 = NULL_RTX;
2556 while (label1);
2558 else
2560 /* A return, or something tricky. */
2561 loop_info->has_multiple_exit_targets = 1;
2564 /* FALLTHRU */
2566 case INSN:
2567 if (volatile_refs_p (PATTERN (insn)))
2568 loop_info->has_volatile = 1;
2570 if (GET_CODE (insn) == JUMP_INSN
2571 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2572 || GET_CODE (PATTERN (insn)) == ADDR_VEC))
2573 loop_info->has_tablejump = 1;
2575 note_stores (PATTERN (insn), note_addr_stored, loop_info);
2576 if (! loop_info->first_loop_store_insn && loop_info->store_mems)
2577 loop_info->first_loop_store_insn = insn;
2579 if (flag_non_call_exceptions && can_throw_internal (insn))
2580 loop_info->has_multiple_exit_targets = 1;
2581 break;
2583 default:
2584 break;
2588 /* Now, rescan the loop, setting up the LOOP_MEMS array. */
2589 if (/* An exception thrown by a called function might land us
2590 anywhere. */
2591 ! loop_info->has_nonconst_call
2592 /* We don't want loads for MEMs moved to a location before the
2593 one at which their stack memory becomes allocated. (Note
2594 that this is not a problem for malloc, etc., since those
2595 require actual function calls. */
2596 && ! current_function_calls_alloca
2597 /* There are ways to leave the loop other than falling off the
2598 end. */
2599 && ! loop_info->has_multiple_exit_targets)
2600 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2601 insn = NEXT_INSN (insn))
2602 for_each_rtx (&insn, insert_loop_mem, loop_info);
2604 /* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
2605 that loop_invariant_p and load_mems can use true_dependence
2606 to determine what is really clobbered. */
2607 if (loop_info->unknown_address_altered)
2609 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2611 loop_info->store_mems
2612 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2614 if (loop_info->unknown_constant_address_altered)
2616 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
2618 RTX_UNCHANGING_P (mem) = 1;
2619 loop_info->store_mems
2620 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
2624 /* Invalidate all loops containing LABEL. */
2626 static void
2627 invalidate_loops_containing_label (label)
2628 rtx label;
2630 struct loop *loop;
2631 for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
2632 loop->invalid = 1;
2635 /* Scan the function looking for loops. Record the start and end of each loop.
2636 Also mark as invalid loops any loops that contain a setjmp or are branched
2637 to from outside the loop. */
2639 static void
2640 find_and_verify_loops (f, loops)
2641 rtx f;
2642 struct loops *loops;
2644 rtx insn;
2645 rtx label;
2646 int num_loops;
2647 struct loop *current_loop;
2648 struct loop *next_loop;
2649 struct loop *loop;
2651 num_loops = loops->num;
2653 compute_luids (f, NULL_RTX, 0);
2655 /* If there are jumps to undefined labels,
2656 treat them as jumps out of any/all loops.
2657 This also avoids writing past end of tables when there are no loops. */
2658 uid_loop[0] = NULL;
2660 /* Find boundaries of loops, mark which loops are contained within
2661 loops, and invalidate loops that have setjmp. */
2663 num_loops = 0;
2664 current_loop = NULL;
2665 for (insn = f; insn; insn = NEXT_INSN (insn))
2667 if (GET_CODE (insn) == NOTE)
2668 switch (NOTE_LINE_NUMBER (insn))
2670 case NOTE_INSN_LOOP_BEG:
2671 next_loop = loops->array + num_loops;
2672 next_loop->num = num_loops;
2673 num_loops++;
2674 next_loop->start = insn;
2675 next_loop->outer = current_loop;
2676 current_loop = next_loop;
2677 break;
2679 case NOTE_INSN_LOOP_CONT:
2680 current_loop->cont = insn;
2681 break;
2683 case NOTE_INSN_LOOP_VTOP:
2684 current_loop->vtop = insn;
2685 break;
2687 case NOTE_INSN_LOOP_END:
2688 if (! current_loop)
2689 abort ();
2691 current_loop->end = insn;
2692 current_loop = current_loop->outer;
2693 break;
2695 default:
2696 break;
2699 if (GET_CODE (insn) == CALL_INSN
2700 && find_reg_note (insn, REG_SETJMP, NULL))
2702 /* In this case, we must invalidate our current loop and any
2703 enclosing loop. */
2704 for (loop = current_loop; loop; loop = loop->outer)
2706 loop->invalid = 1;
2707 if (loop_dump_stream)
2708 fprintf (loop_dump_stream,
2709 "\nLoop at %d ignored due to setjmp.\n",
2710 INSN_UID (loop->start));
2714 /* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
2715 enclosing loop, but this doesn't matter. */
2716 uid_loop[INSN_UID (insn)] = current_loop;
2719 /* Any loop containing a label used in an initializer must be invalidated,
2720 because it can be jumped into from anywhere. */
2721 for (label = forced_labels; label; label = XEXP (label, 1))
2722 invalidate_loops_containing_label (XEXP (label, 0));
2724 /* Any loop containing a label used for an exception handler must be
2725 invalidated, because it can be jumped into from anywhere. */
2726 for_each_eh_label (invalidate_loops_containing_label);
2728 /* Now scan all insn's in the function. If any JUMP_INSN branches into a
2729 loop that it is not contained within, that loop is marked invalid.
2730 If any INSN or CALL_INSN uses a label's address, then the loop containing
2731 that label is marked invalid, because it could be jumped into from
2732 anywhere.
2734 Also look for blocks of code ending in an unconditional branch that
2735 exits the loop. If such a block is surrounded by a conditional
2736 branch around the block, move the block elsewhere (see below) and
2737 invert the jump to point to the code block. This may eliminate a
2738 label in our loop and will simplify processing by both us and a
2739 possible second cse pass. */
2741 for (insn = f; insn; insn = NEXT_INSN (insn))
2742 if (INSN_P (insn))
2744 struct loop *this_loop = uid_loop[INSN_UID (insn)];
2746 if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)
2748 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
2749 if (note)
2750 invalidate_loops_containing_label (XEXP (note, 0));
2753 if (GET_CODE (insn) != JUMP_INSN)
2754 continue;
2756 mark_loop_jump (PATTERN (insn), this_loop);
2758 /* See if this is an unconditional branch outside the loop. */
2759 if (this_loop
2760 && (GET_CODE (PATTERN (insn)) == RETURN
2761 || (any_uncondjump_p (insn)
2762 && onlyjump_p (insn)
2763 && (uid_loop[INSN_UID (JUMP_LABEL (insn))]
2764 != this_loop)))
2765 && get_max_uid () < max_uid_for_loop)
2767 rtx p;
2768 rtx our_next = next_real_insn (insn);
2769 rtx last_insn_to_move = NEXT_INSN (insn);
2770 struct loop *dest_loop;
2771 struct loop *outer_loop = NULL;
2773 /* Go backwards until we reach the start of the loop, a label,
2774 or a JUMP_INSN. */
2775 for (p = PREV_INSN (insn);
2776 GET_CODE (p) != CODE_LABEL
2777 && ! (GET_CODE (p) == NOTE
2778 && NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
2779 && GET_CODE (p) != JUMP_INSN;
2780 p = PREV_INSN (p))
2783 /* Check for the case where we have a jump to an inner nested
2784 loop, and do not perform the optimization in that case. */
2786 if (JUMP_LABEL (insn))
2788 dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
2789 if (dest_loop)
2791 for (outer_loop = dest_loop; outer_loop;
2792 outer_loop = outer_loop->outer)
2793 if (outer_loop == this_loop)
2794 break;
2798 /* Make sure that the target of P is within the current loop. */
2800 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
2801 && uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
2802 outer_loop = this_loop;
2804 /* If we stopped on a JUMP_INSN to the next insn after INSN,
2805 we have a block of code to try to move.
2807 We look backward and then forward from the target of INSN
2808 to find a BARRIER at the same loop depth as the target.
2809 If we find such a BARRIER, we make a new label for the start
2810 of the block, invert the jump in P and point it to that label,
2811 and move the block of code to the spot we found. */
2813 if (! outer_loop
2814 && GET_CODE (p) == JUMP_INSN
2815 && JUMP_LABEL (p) != 0
2816 /* Just ignore jumps to labels that were never emitted.
2817 These always indicate compilation errors. */
2818 && INSN_UID (JUMP_LABEL (p)) != 0
2819 && any_condjump_p (p) && onlyjump_p (p)
2820 && next_real_insn (JUMP_LABEL (p)) == our_next
2821 /* If it's not safe to move the sequence, then we
2822 mustn't try. */
2823 && insns_safe_to_move_p (p, NEXT_INSN (insn),
2824 &last_insn_to_move))
2826 rtx target
2827 = JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
2828 struct loop *target_loop = uid_loop[INSN_UID (target)];
2829 rtx loc, loc2;
2830 rtx tmp;
2832 /* Search for possible garbage past the conditional jumps
2833 and look for the last barrier. */
2834 for (tmp = last_insn_to_move;
2835 tmp && GET_CODE (tmp) != CODE_LABEL; tmp = NEXT_INSN (tmp))
2836 if (GET_CODE (tmp) == BARRIER)
2837 last_insn_to_move = tmp;
2839 for (loc = target; loc; loc = PREV_INSN (loc))
2840 if (GET_CODE (loc) == BARRIER
2841 /* Don't move things inside a tablejump. */
2842 && ((loc2 = next_nonnote_insn (loc)) == 0
2843 || GET_CODE (loc2) != CODE_LABEL
2844 || (loc2 = next_nonnote_insn (loc2)) == 0
2845 || GET_CODE (loc2) != JUMP_INSN
2846 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2847 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2848 && uid_loop[INSN_UID (loc)] == target_loop)
2849 break;
2851 if (loc == 0)
2852 for (loc = target; loc; loc = NEXT_INSN (loc))
2853 if (GET_CODE (loc) == BARRIER
2854 /* Don't move things inside a tablejump. */
2855 && ((loc2 = next_nonnote_insn (loc)) == 0
2856 || GET_CODE (loc2) != CODE_LABEL
2857 || (loc2 = next_nonnote_insn (loc2)) == 0
2858 || GET_CODE (loc2) != JUMP_INSN
2859 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
2860 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
2861 && uid_loop[INSN_UID (loc)] == target_loop)
2862 break;
2864 if (loc)
2866 rtx cond_label = JUMP_LABEL (p);
2867 rtx new_label = get_label_after (p);
2869 /* Ensure our label doesn't go away. */
2870 LABEL_NUSES (cond_label)++;
2872 /* Verify that uid_loop is large enough and that
2873 we can invert P. */
2874 if (invert_jump (p, new_label, 1))
2876 rtx q, r;
2878 /* If no suitable BARRIER was found, create a suitable
2879 one before TARGET. Since TARGET is a fall through
2880 path, we'll need to insert a jump around our block
2881 and add a BARRIER before TARGET.
2883 This creates an extra unconditional jump outside
2884 the loop. However, the benefits of removing rarely
2885 executed instructions from inside the loop usually
2886 outweighs the cost of the extra unconditional jump
2887 outside the loop. */
2888 if (loc == 0)
2890 rtx temp;
2892 temp = gen_jump (JUMP_LABEL (insn));
2893 temp = emit_jump_insn_before (temp, target);
2894 JUMP_LABEL (temp) = JUMP_LABEL (insn);
2895 LABEL_NUSES (JUMP_LABEL (insn))++;
2896 loc = emit_barrier_before (target);
2899 /* Include the BARRIER after INSN and copy the
2900 block after LOC. */
2901 if (squeeze_notes (&new_label, &last_insn_to_move))
2902 abort ();
2903 reorder_insns (new_label, last_insn_to_move, loc);
2905 /* All those insns are now in TARGET_LOOP. */
2906 for (q = new_label;
2907 q != NEXT_INSN (last_insn_to_move);
2908 q = NEXT_INSN (q))
2909 uid_loop[INSN_UID (q)] = target_loop;
2911 /* The label jumped to by INSN is no longer a loop
2912 exit. Unless INSN does not have a label (e.g.,
2913 it is a RETURN insn), search loop->exit_labels
2914 to find its label_ref, and remove it. Also turn
2915 off LABEL_OUTSIDE_LOOP_P bit. */
2916 if (JUMP_LABEL (insn))
2918 for (q = 0, r = this_loop->exit_labels;
2920 q = r, r = LABEL_NEXTREF (r))
2921 if (XEXP (r, 0) == JUMP_LABEL (insn))
2923 LABEL_OUTSIDE_LOOP_P (r) = 0;
2924 if (q)
2925 LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
2926 else
2927 this_loop->exit_labels = LABEL_NEXTREF (r);
2928 break;
2931 for (loop = this_loop; loop && loop != target_loop;
2932 loop = loop->outer)
2933 loop->exit_count--;
2935 /* If we didn't find it, then something is
2936 wrong. */
2937 if (! r)
2938 abort ();
2941 /* P is now a jump outside the loop, so it must be put
2942 in loop->exit_labels, and marked as such.
2943 The easiest way to do this is to just call
2944 mark_loop_jump again for P. */
2945 mark_loop_jump (PATTERN (p), this_loop);
2947 /* If INSN now jumps to the insn after it,
2948 delete INSN. */
2949 if (JUMP_LABEL (insn) != 0
2950 && (next_real_insn (JUMP_LABEL (insn))
2951 == next_real_insn (insn)))
2952 delete_related_insns (insn);
2955 /* Continue the loop after where the conditional
2956 branch used to jump, since the only branch insn
2957 in the block (if it still remains) is an inter-loop
2958 branch and hence needs no processing. */
2959 insn = NEXT_INSN (cond_label);
2961 if (--LABEL_NUSES (cond_label) == 0)
2962 delete_related_insns (cond_label);
2964 /* This loop will be continued with NEXT_INSN (insn). */
2965 insn = PREV_INSN (insn);
2972 /* If any label in X jumps to a loop different from LOOP_NUM and any of the
2973 loops it is contained in, mark the target loop invalid.
2975 For speed, we assume that X is part of a pattern of a JUMP_INSN. */
2977 static void
2978 mark_loop_jump (x, loop)
2979 rtx x;
2980 struct loop *loop;
2982 struct loop *dest_loop;
2983 struct loop *outer_loop;
2984 int i;
2986 switch (GET_CODE (x))
2988 case PC:
2989 case USE:
2990 case CLOBBER:
2991 case REG:
2992 case MEM:
2993 case CONST_INT:
2994 case CONST_DOUBLE:
2995 case RETURN:
2996 return;
2998 case CONST:
2999 /* There could be a label reference in here. */
3000 mark_loop_jump (XEXP (x, 0), loop);
3001 return;
3003 case PLUS:
3004 case MINUS:
3005 case MULT:
3006 mark_loop_jump (XEXP (x, 0), loop);
3007 mark_loop_jump (XEXP (x, 1), loop);
3008 return;
3010 case LO_SUM:
3011 /* This may refer to a LABEL_REF or SYMBOL_REF. */
3012 mark_loop_jump (XEXP (x, 1), loop);
3013 return;
3015 case SIGN_EXTEND:
3016 case ZERO_EXTEND:
3017 mark_loop_jump (XEXP (x, 0), loop);
3018 return;
3020 case LABEL_REF:
3021 dest_loop = uid_loop[INSN_UID (XEXP (x, 0))];
3023 /* Link together all labels that branch outside the loop. This
3024 is used by final_[bg]iv_value and the loop unrolling code. Also
3025 mark this LABEL_REF so we know that this branch should predict
3026 false. */
3028 /* A check to make sure the label is not in an inner nested loop,
3029 since this does not count as a loop exit. */
3030 if (dest_loop)
3032 for (outer_loop = dest_loop; outer_loop;
3033 outer_loop = outer_loop->outer)
3034 if (outer_loop == loop)
3035 break;
3037 else
3038 outer_loop = NULL;
3040 if (loop && ! outer_loop)
3042 LABEL_OUTSIDE_LOOP_P (x) = 1;
3043 LABEL_NEXTREF (x) = loop->exit_labels;
3044 loop->exit_labels = x;
3046 for (outer_loop = loop;
3047 outer_loop && outer_loop != dest_loop;
3048 outer_loop = outer_loop->outer)
3049 outer_loop->exit_count++;
3052 /* If this is inside a loop, but not in the current loop or one enclosed
3053 by it, it invalidates at least one loop. */
3055 if (! dest_loop)
3056 return;
3058 /* We must invalidate every nested loop containing the target of this
3059 label, except those that also contain the jump insn. */
3061 for (; dest_loop; dest_loop = dest_loop->outer)
3063 /* Stop when we reach a loop that also contains the jump insn. */
3064 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3065 if (dest_loop == outer_loop)
3066 return;
3068 /* If we get here, we know we need to invalidate a loop. */
3069 if (loop_dump_stream && ! dest_loop->invalid)
3070 fprintf (loop_dump_stream,
3071 "\nLoop at %d ignored due to multiple entry points.\n",
3072 INSN_UID (dest_loop->start));
3074 dest_loop->invalid = 1;
3076 return;
3078 case SET:
3079 /* If this is not setting pc, ignore. */
3080 if (SET_DEST (x) == pc_rtx)
3081 mark_loop_jump (SET_SRC (x), loop);
3082 return;
3084 case IF_THEN_ELSE:
3085 mark_loop_jump (XEXP (x, 1), loop);
3086 mark_loop_jump (XEXP (x, 2), loop);
3087 return;
3089 case PARALLEL:
3090 case ADDR_VEC:
3091 for (i = 0; i < XVECLEN (x, 0); i++)
3092 mark_loop_jump (XVECEXP (x, 0, i), loop);
3093 return;
3095 case ADDR_DIFF_VEC:
3096 for (i = 0; i < XVECLEN (x, 1); i++)
3097 mark_loop_jump (XVECEXP (x, 1, i), loop);
3098 return;
3100 default:
3101 /* Strictly speaking this is not a jump into the loop, only a possible
3102 jump out of the loop. However, we have no way to link the destination
3103 of this jump onto the list of exit labels. To be safe we mark this
3104 loop and any containing loops as invalid. */
3105 if (loop)
3107 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3109 if (loop_dump_stream && ! outer_loop->invalid)
3110 fprintf (loop_dump_stream,
3111 "\nLoop at %d ignored due to unknown exit jump.\n",
3112 INSN_UID (outer_loop->start));
3113 outer_loop->invalid = 1;
3116 return;
3120 /* Return nonzero if there is a label in the range from
3121 insn INSN to and including the insn whose luid is END
3122 INSN must have an assigned luid (i.e., it must not have
3123 been previously created by loop.c). */
3125 static int
3126 labels_in_range_p (insn, end)
3127 rtx insn;
3128 int end;
3130 while (insn && INSN_LUID (insn) <= end)
3132 if (GET_CODE (insn) == CODE_LABEL)
3133 return 1;
3134 insn = NEXT_INSN (insn);
3137 return 0;
3140 /* Record that a memory reference X is being set. */
3142 static void
3143 note_addr_stored (x, y, data)
3144 rtx x;
3145 rtx y ATTRIBUTE_UNUSED;
3146 void *data ATTRIBUTE_UNUSED;
3148 struct loop_info *loop_info = data;
3150 if (x == 0 || GET_CODE (x) != MEM)
3151 return;
3153 /* Count number of memory writes.
3154 This affects heuristics in strength_reduce. */
3155 loop_info->num_mem_sets++;
3157 /* BLKmode MEM means all memory is clobbered. */
3158 if (GET_MODE (x) == BLKmode)
3160 if (RTX_UNCHANGING_P (x))
3161 loop_info->unknown_constant_address_altered = 1;
3162 else
3163 loop_info->unknown_address_altered = 1;
3165 return;
3168 loop_info->store_mems = gen_rtx_EXPR_LIST (VOIDmode, x,
3169 loop_info->store_mems);
3172 /* X is a value modified by an INSN that references a biv inside a loop
3173 exit test (ie, X is somehow related to the value of the biv). If X
3174 is a pseudo that is used more than once, then the biv is (effectively)
3175 used more than once. DATA is a pointer to a loop_regs structure. */
3177 static void
3178 note_set_pseudo_multiple_uses (x, y, data)
3179 rtx x;
3180 rtx y ATTRIBUTE_UNUSED;
3181 void *data;
3183 struct loop_regs *regs = (struct loop_regs *) data;
3185 if (x == 0)
3186 return;
3188 while (GET_CODE (x) == STRICT_LOW_PART
3189 || GET_CODE (x) == SIGN_EXTRACT
3190 || GET_CODE (x) == ZERO_EXTRACT
3191 || GET_CODE (x) == SUBREG)
3192 x = XEXP (x, 0);
3194 if (GET_CODE (x) != REG || REGNO (x) < FIRST_PSEUDO_REGISTER)
3195 return;
3197 /* If we do not have usage information, or if we know the register
3198 is used more than once, note that fact for check_dbra_loop. */
3199 if (REGNO (x) >= max_reg_before_loop
3200 || ! regs->array[REGNO (x)].single_usage
3201 || regs->array[REGNO (x)].single_usage == const0_rtx)
3202 regs->multiple_uses = 1;
3205 /* Return nonzero if the rtx X is invariant over the current loop.
3207 The value is 2 if we refer to something only conditionally invariant.
3209 A memory ref is invariant if it is not volatile and does not conflict
3210 with anything stored in `loop_info->store_mems'. */
3213 loop_invariant_p (loop, x)
3214 const struct loop *loop;
3215 rtx x;
3217 struct loop_info *loop_info = LOOP_INFO (loop);
3218 struct loop_regs *regs = LOOP_REGS (loop);
3219 int i;
3220 enum rtx_code code;
3221 const char *fmt;
3222 int conditional = 0;
3223 rtx mem_list_entry;
3225 if (x == 0)
3226 return 1;
3227 code = GET_CODE (x);
3228 switch (code)
3230 case CONST_INT:
3231 case CONST_DOUBLE:
3232 case SYMBOL_REF:
3233 case CONST:
3234 return 1;
3236 case LABEL_REF:
3237 /* A LABEL_REF is normally invariant, however, if we are unrolling
3238 loops, and this label is inside the loop, then it isn't invariant.
3239 This is because each unrolled copy of the loop body will have
3240 a copy of this label. If this was invariant, then an insn loading
3241 the address of this label into a register might get moved outside
3242 the loop, and then each loop body would end up using the same label.
3244 We don't know the loop bounds here though, so just fail for all
3245 labels. */
3246 if (flag_unroll_loops)
3247 return 0;
3248 else
3249 return 1;
3251 case PC:
3252 case CC0:
3253 case UNSPEC_VOLATILE:
3254 return 0;
3256 case REG:
3257 /* We used to check RTX_UNCHANGING_P (x) here, but that is invalid
3258 since the reg might be set by initialization within the loop. */
3260 if ((x == frame_pointer_rtx || x == hard_frame_pointer_rtx
3261 || x == arg_pointer_rtx || x == pic_offset_table_rtx)
3262 && ! current_function_has_nonlocal_goto)
3263 return 1;
3265 if (LOOP_INFO (loop)->has_call
3266 && REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
3267 return 0;
3269 /* Out-of-range regs can occur when we are called from unrolling.
3270 These have always been created by the unroller and are set in
3271 the loop, hence are never invariant. */
3273 if (REGNO (x) >= (unsigned) regs->num)
3274 return 0;
3276 if (regs->array[REGNO (x)].set_in_loop < 0)
3277 return 2;
3279 return regs->array[REGNO (x)].set_in_loop == 0;
3281 case MEM:
3282 /* Volatile memory references must be rejected. Do this before
3283 checking for read-only items, so that volatile read-only items
3284 will be rejected also. */
3285 if (MEM_VOLATILE_P (x))
3286 return 0;
3288 /* See if there is any dependence between a store and this load. */
3289 mem_list_entry = loop_info->store_mems;
3290 while (mem_list_entry)
3292 if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
3293 x, rtx_varies_p))
3294 return 0;
3296 mem_list_entry = XEXP (mem_list_entry, 1);
3299 /* It's not invalidated by a store in memory
3300 but we must still verify the address is invariant. */
3301 break;
3303 case ASM_OPERANDS:
3304 /* Don't mess with insns declared volatile. */
3305 if (MEM_VOLATILE_P (x))
3306 return 0;
3307 break;
3309 default:
3310 break;
3313 fmt = GET_RTX_FORMAT (code);
3314 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3316 if (fmt[i] == 'e')
3318 int tem = loop_invariant_p (loop, XEXP (x, i));
3319 if (tem == 0)
3320 return 0;
3321 if (tem == 2)
3322 conditional = 1;
3324 else if (fmt[i] == 'E')
3326 int j;
3327 for (j = 0; j < XVECLEN (x, i); j++)
3329 int tem = loop_invariant_p (loop, XVECEXP (x, i, j));
3330 if (tem == 0)
3331 return 0;
3332 if (tem == 2)
3333 conditional = 1;
3339 return 1 + conditional;
3342 /* Return nonzero if all the insns in the loop that set REG
3343 are INSN and the immediately following insns,
3344 and if each of those insns sets REG in an invariant way
3345 (not counting uses of REG in them).
3347 The value is 2 if some of these insns are only conditionally invariant.
3349 We assume that INSN itself is the first set of REG
3350 and that its source is invariant. */
3352 static int
3353 consec_sets_invariant_p (loop, reg, n_sets, insn)
3354 const struct loop *loop;
3355 int n_sets;
3356 rtx reg, insn;
3358 struct loop_regs *regs = LOOP_REGS (loop);
3359 rtx p = insn;
3360 unsigned int regno = REGNO (reg);
3361 rtx temp;
3362 /* Number of sets we have to insist on finding after INSN. */
3363 int count = n_sets - 1;
3364 int old = regs->array[regno].set_in_loop;
3365 int value = 0;
3366 int this;
3368 /* If N_SETS hit the limit, we can't rely on its value. */
3369 if (n_sets == 127)
3370 return 0;
3372 regs->array[regno].set_in_loop = 0;
3374 while (count > 0)
3376 enum rtx_code code;
3377 rtx set;
3379 p = NEXT_INSN (p);
3380 code = GET_CODE (p);
3382 /* If library call, skip to end of it. */
3383 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
3384 p = XEXP (temp, 0);
3386 this = 0;
3387 if (code == INSN
3388 && (set = single_set (p))
3389 && GET_CODE (SET_DEST (set)) == REG
3390 && REGNO (SET_DEST (set)) == regno)
3392 this = loop_invariant_p (loop, SET_SRC (set));
3393 if (this != 0)
3394 value |= this;
3395 else if ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX)))
3397 /* If this is a libcall, then any invariant REG_EQUAL note is OK.
3398 If this is an ordinary insn, then only CONSTANT_P REG_EQUAL
3399 notes are OK. */
3400 this = (CONSTANT_P (XEXP (temp, 0))
3401 || (find_reg_note (p, REG_RETVAL, NULL_RTX)
3402 && loop_invariant_p (loop, XEXP (temp, 0))));
3403 if (this != 0)
3404 value |= this;
3407 if (this != 0)
3408 count--;
3409 else if (code != NOTE)
3411 regs->array[regno].set_in_loop = old;
3412 return 0;
3416 regs->array[regno].set_in_loop = old;
3417 /* If loop_invariant_p ever returned 2, we return 2. */
3418 return 1 + (value & 2);
3421 #if 0
3422 /* I don't think this condition is sufficient to allow INSN
3423 to be moved, so we no longer test it. */
3425 /* Return 1 if all insns in the basic block of INSN and following INSN
3426 that set REG are invariant according to TABLE. */
3428 static int
3429 all_sets_invariant_p (reg, insn, table)
3430 rtx reg, insn;
3431 short *table;
3433 rtx p = insn;
3434 int regno = REGNO (reg);
3436 while (1)
3438 enum rtx_code code;
3439 p = NEXT_INSN (p);
3440 code = GET_CODE (p);
3441 if (code == CODE_LABEL || code == JUMP_INSN)
3442 return 1;
3443 if (code == INSN && GET_CODE (PATTERN (p)) == SET
3444 && GET_CODE (SET_DEST (PATTERN (p))) == REG
3445 && REGNO (SET_DEST (PATTERN (p))) == regno)
3447 if (! loop_invariant_p (loop, SET_SRC (PATTERN (p)), table))
3448 return 0;
3452 #endif /* 0 */
3454 /* Look at all uses (not sets) of registers in X. For each, if it is
3455 the single use, set USAGE[REGNO] to INSN; if there was a previous use in
3456 a different insn, set USAGE[REGNO] to const0_rtx. */
3458 static void
3459 find_single_use_in_loop (regs, insn, x)
3460 struct loop_regs *regs;
3461 rtx insn;
3462 rtx x;
3464 enum rtx_code code = GET_CODE (x);
3465 const char *fmt = GET_RTX_FORMAT (code);
3466 int i, j;
3468 if (code == REG)
3469 regs->array[REGNO (x)].single_usage
3470 = (regs->array[REGNO (x)].single_usage != 0
3471 && regs->array[REGNO (x)].single_usage != insn)
3472 ? const0_rtx : insn;
3474 else if (code == SET)
3476 /* Don't count SET_DEST if it is a REG; otherwise count things
3477 in SET_DEST because if a register is partially modified, it won't
3478 show up as a potential movable so we don't care how USAGE is set
3479 for it. */
3480 if (GET_CODE (SET_DEST (x)) != REG)
3481 find_single_use_in_loop (regs, insn, SET_DEST (x));
3482 find_single_use_in_loop (regs, insn, SET_SRC (x));
3484 else
3485 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3487 if (fmt[i] == 'e' && XEXP (x, i) != 0)
3488 find_single_use_in_loop (regs, insn, XEXP (x, i));
3489 else if (fmt[i] == 'E')
3490 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3491 find_single_use_in_loop (regs, insn, XVECEXP (x, i, j));
3495 /* Count and record any set in X which is contained in INSN. Update
3496 REGS->array[I].MAY_NOT_OPTIMIZE and LAST_SET for any register I set
3497 in X. */
3499 static void
3500 count_one_set (regs, insn, x, last_set)
3501 struct loop_regs *regs;
3502 rtx insn, x;
3503 rtx *last_set;
3505 if (GET_CODE (x) == CLOBBER && GET_CODE (XEXP (x, 0)) == REG)
3506 /* Don't move a reg that has an explicit clobber.
3507 It's not worth the pain to try to do it correctly. */
3508 regs->array[REGNO (XEXP (x, 0))].may_not_optimize = 1;
3510 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
3512 rtx dest = SET_DEST (x);
3513 while (GET_CODE (dest) == SUBREG
3514 || GET_CODE (dest) == ZERO_EXTRACT
3515 || GET_CODE (dest) == SIGN_EXTRACT
3516 || GET_CODE (dest) == STRICT_LOW_PART)
3517 dest = XEXP (dest, 0);
3518 if (GET_CODE (dest) == REG)
3520 int i;
3521 int regno = REGNO (dest);
3522 for (i = 0; i < LOOP_REGNO_NREGS (regno, dest); i++)
3524 /* If this is the first setting of this reg
3525 in current basic block, and it was set before,
3526 it must be set in two basic blocks, so it cannot
3527 be moved out of the loop. */
3528 if (regs->array[regno].set_in_loop > 0
3529 && last_set == 0)
3530 regs->array[regno+i].may_not_optimize = 1;
3531 /* If this is not first setting in current basic block,
3532 see if reg was used in between previous one and this.
3533 If so, neither one can be moved. */
3534 if (last_set[regno] != 0
3535 && reg_used_between_p (dest, last_set[regno], insn))
3536 regs->array[regno+i].may_not_optimize = 1;
3537 if (regs->array[regno+i].set_in_loop < 127)
3538 ++regs->array[regno+i].set_in_loop;
3539 last_set[regno+i] = insn;
3545 /* Given a loop that is bounded by LOOP->START and LOOP->END and that
3546 is entered at LOOP->SCAN_START, return 1 if the register set in SET
3547 contained in insn INSN is used by any insn that precedes INSN in
3548 cyclic order starting from the loop entry point.
3550 We don't want to use INSN_LUID here because if we restrict INSN to those
3551 that have a valid INSN_LUID, it means we cannot move an invariant out
3552 from an inner loop past two loops. */
3554 static int
3555 loop_reg_used_before_p (loop, set, insn)
3556 const struct loop *loop;
3557 rtx set, insn;
3559 rtx reg = SET_DEST (set);
3560 rtx p;
3562 /* Scan forward checking for register usage. If we hit INSN, we
3563 are done. Otherwise, if we hit LOOP->END, wrap around to LOOP->START. */
3564 for (p = loop->scan_start; p != insn; p = NEXT_INSN (p))
3566 if (INSN_P (p) && reg_overlap_mentioned_p (reg, PATTERN (p)))
3567 return 1;
3569 if (p == loop->end)
3570 p = loop->start;
3573 return 0;
3577 /* Information we collect about arrays that we might want to prefetch. */
3578 struct prefetch_info
3580 struct iv_class *class; /* Class this prefetch is based on. */
3581 struct induction *giv; /* GIV this prefetch is based on. */
3582 rtx base_address; /* Start prefetching from this address plus
3583 index. */
3584 HOST_WIDE_INT index;
3585 HOST_WIDE_INT stride; /* Prefetch stride in bytes in each
3586 iteration. */
3587 unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
3588 prefetch area in one iteration. */
3589 unsigned int total_bytes; /* Total bytes loop will access in this block.
3590 This is set only for loops with known
3591 iteration counts and is 0xffffffff
3592 otherwise. */
3593 int prefetch_in_loop; /* Number of prefetch insns in loop. */
3594 int prefetch_before_loop; /* Number of prefetch insns before loop. */
3595 unsigned int write : 1; /* 1 for read/write prefetches. */
3598 /* Data used by check_store function. */
3599 struct check_store_data
3601 rtx mem_address;
3602 int mem_write;
3605 static void check_store PARAMS ((rtx, rtx, void *));
3606 static void emit_prefetch_instructions PARAMS ((struct loop *));
3607 static int rtx_equal_for_prefetch_p PARAMS ((rtx, rtx));
3609 /* Set mem_write when mem_address is found. Used as callback to
3610 note_stores. */
3611 static void
3612 check_store (x, pat, data)
3613 rtx x, pat ATTRIBUTE_UNUSED;
3614 void *data;
3616 struct check_store_data *d = (struct check_store_data *) data;
3618 if ((GET_CODE (x) == MEM) && rtx_equal_p (d->mem_address, XEXP (x, 0)))
3619 d->mem_write = 1;
3622 /* Like rtx_equal_p, but attempts to swap commutative operands. This is
3623 important to get some addresses combined. Later more sophisticated
3624 transformations can be added when necessary.
3626 ??? Same trick with swapping operand is done at several other places.
3627 It can be nice to develop some common way to handle this. */
3629 static int
3630 rtx_equal_for_prefetch_p (x, y)
3631 rtx x, y;
3633 int i;
3634 int j;
3635 enum rtx_code code = GET_CODE (x);
3636 const char *fmt;
3638 if (x == y)
3639 return 1;
3640 if (code != GET_CODE (y))
3641 return 0;
3643 code = GET_CODE (x);
3645 if (GET_RTX_CLASS (code) == 'c')
3647 return ((rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 0))
3648 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 1)))
3649 || (rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 1))
3650 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 0))));
3652 /* Compare the elements. If any pair of corresponding elements fails to
3653 match, return 0 for the whole thing. */
3655 fmt = GET_RTX_FORMAT (code);
3656 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3658 switch (fmt[i])
3660 case 'w':
3661 if (XWINT (x, i) != XWINT (y, i))
3662 return 0;
3663 break;
3665 case 'i':
3666 if (XINT (x, i) != XINT (y, i))
3667 return 0;
3668 break;
3670 case 'E':
3671 /* Two vectors must have the same length. */
3672 if (XVECLEN (x, i) != XVECLEN (y, i))
3673 return 0;
3675 /* And the corresponding elements must match. */
3676 for (j = 0; j < XVECLEN (x, i); j++)
3677 if (rtx_equal_for_prefetch_p (XVECEXP (x, i, j),
3678 XVECEXP (y, i, j)) == 0)
3679 return 0;
3680 break;
3682 case 'e':
3683 if (rtx_equal_for_prefetch_p (XEXP (x, i), XEXP (y, i)) == 0)
3684 return 0;
3685 break;
3687 case 's':
3688 if (strcmp (XSTR (x, i), XSTR (y, i)))
3689 return 0;
3690 break;
3692 case 'u':
3693 /* These are just backpointers, so they don't matter. */
3694 break;
3696 case '0':
3697 break;
3699 /* It is believed that rtx's at this level will never
3700 contain anything but integers and other rtx's,
3701 except for within LABEL_REFs and SYMBOL_REFs. */
3702 default:
3703 abort ();
3706 return 1;
3709 /* Remove constant addition value from the expression X (when present)
3710 and return it. */
3712 static HOST_WIDE_INT
3713 remove_constant_addition (x)
3714 rtx *x;
3716 HOST_WIDE_INT addval = 0;
3717 rtx exp = *x;
3719 /* Avoid clobbering a shared CONST expression. */
3720 if (GET_CODE (exp) == CONST)
3722 if (GET_CODE (XEXP (exp, 0)) == PLUS
3723 && GET_CODE (XEXP (XEXP (exp, 0), 0)) == SYMBOL_REF
3724 && GET_CODE (XEXP (XEXP (exp, 0), 1)) == CONST_INT)
3726 *x = XEXP (XEXP (exp, 0), 0);
3727 return INTVAL (XEXP (XEXP (exp, 0), 1));
3729 return 0;
3732 if (GET_CODE (exp) == CONST_INT)
3734 addval = INTVAL (exp);
3735 *x = const0_rtx;
3738 /* For plus expression recurse on ourself. */
3739 else if (GET_CODE (exp) == PLUS)
3741 addval += remove_constant_addition (&XEXP (exp, 0));
3742 addval += remove_constant_addition (&XEXP (exp, 1));
3744 /* In case our parameter was constant, remove extra zero from the
3745 expression. */
3746 if (XEXP (exp, 0) == const0_rtx)
3747 *x = XEXP (exp, 1);
3748 else if (XEXP (exp, 1) == const0_rtx)
3749 *x = XEXP (exp, 0);
3752 return addval;
3755 /* Attempt to identify accesses to arrays that are most likely to cause cache
3756 misses, and emit prefetch instructions a few prefetch blocks forward.
3758 To detect the arrays we use the GIV information that was collected by the
3759 strength reduction pass.
3761 The prefetch instructions are generated after the GIV information is done
3762 and before the strength reduction process. The new GIVs are injected into
3763 the strength reduction tables, so the prefetch addresses are optimized as
3764 well.
3766 GIVs are split into base address, stride, and constant addition values.
3767 GIVs with the same address, stride and close addition values are combined
3768 into a single prefetch. Also writes to GIVs are detected, so that prefetch
3769 for write instructions can be used for the block we write to, on machines
3770 that support write prefetches.
3772 Several heuristics are used to determine when to prefetch. They are
3773 controlled by defined symbols that can be overridden for each target. */
3775 static void
3776 emit_prefetch_instructions (loop)
3777 struct loop *loop;
3779 int num_prefetches = 0;
3780 int num_real_prefetches = 0;
3781 int num_real_write_prefetches = 0;
3782 int num_prefetches_before = 0;
3783 int num_write_prefetches_before = 0;
3784 int ahead = 0;
3785 int i;
3786 struct iv_class *bl;
3787 struct induction *iv;
3788 struct prefetch_info info[MAX_PREFETCHES];
3789 struct loop_ivs *ivs = LOOP_IVS (loop);
3791 if (!HAVE_prefetch)
3792 return;
3794 /* Consider only loops w/o calls. When a call is done, the loop is probably
3795 slow enough to read the memory. */
3796 if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call)
3798 if (loop_dump_stream)
3799 fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n");
3801 return;
3804 /* Don't prefetch in loops known to have few iterations. */
3805 if (PREFETCH_NO_LOW_LOOPCNT
3806 && LOOP_INFO (loop)->n_iterations
3807 && LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT)
3809 if (loop_dump_stream)
3810 fprintf (loop_dump_stream,
3811 "Prefetch: ignoring loop: not enough iterations.\n");
3812 return;
3815 /* Search all induction variables and pick those interesting for the prefetch
3816 machinery. */
3817 for (bl = ivs->list; bl; bl = bl->next)
3819 struct induction *biv = bl->biv, *biv1;
3820 int basestride = 0;
3822 biv1 = biv;
3824 /* Expect all BIVs to be executed in each iteration. This makes our
3825 analysis more conservative. */
3826 while (biv1)
3828 /* Discard non-constant additions that we can't handle well yet, and
3829 BIVs that are executed multiple times; such BIVs ought to be
3830 handled in the nested loop. We accept not_every_iteration BIVs,
3831 since these only result in larger strides and make our
3832 heuristics more conservative. */
3833 if (GET_CODE (biv->add_val) != CONST_INT)
3835 if (loop_dump_stream)
3837 fprintf (loop_dump_stream,
3838 "Prefetch: ignoring biv %d: non-constant addition at insn %d:",
3839 REGNO (biv->src_reg), INSN_UID (biv->insn));
3840 print_rtl (loop_dump_stream, biv->add_val);
3841 fprintf (loop_dump_stream, "\n");
3843 break;
3846 if (biv->maybe_multiple)
3848 if (loop_dump_stream)
3850 fprintf (loop_dump_stream,
3851 "Prefetch: ignoring biv %d: maybe_multiple at insn %i:",
3852 REGNO (biv->src_reg), INSN_UID (biv->insn));
3853 print_rtl (loop_dump_stream, biv->add_val);
3854 fprintf (loop_dump_stream, "\n");
3856 break;
3859 basestride += INTVAL (biv1->add_val);
3860 biv1 = biv1->next_iv;
3863 if (biv1 || !basestride)
3864 continue;
3866 for (iv = bl->giv; iv; iv = iv->next_iv)
3868 rtx address;
3869 rtx temp;
3870 HOST_WIDE_INT index = 0;
3871 int add = 1;
3872 HOST_WIDE_INT stride = 0;
3873 int stride_sign = 1;
3874 struct check_store_data d;
3875 const char *ignore_reason = NULL;
3876 int size = GET_MODE_SIZE (GET_MODE (iv));
3878 /* See whether an induction variable is interesting to us and if
3879 not, report the reason. */
3880 if (iv->giv_type != DEST_ADDR)
3881 ignore_reason = "giv is not a destination address";
3883 /* We are interested only in constant stride memory references
3884 in order to be able to compute density easily. */
3885 else if (GET_CODE (iv->mult_val) != CONST_INT)
3886 ignore_reason = "stride is not constant";
3888 else
3890 stride = INTVAL (iv->mult_val) * basestride;
3891 if (stride < 0)
3893 stride = -stride;
3894 stride_sign = -1;
3897 /* On some targets, reversed order prefetches are not
3898 worthwhile. */
3899 if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0)
3900 ignore_reason = "reversed order stride";
3902 /* Prefetch of accesses with an extreme stride might not be
3903 worthwhile, either. */
3904 else if (PREFETCH_NO_EXTREME_STRIDE
3905 && stride > PREFETCH_EXTREME_STRIDE)
3906 ignore_reason = "extreme stride";
3908 /* Ignore GIVs with varying add values; we can't predict the
3909 value for the next iteration. */
3910 else if (!loop_invariant_p (loop, iv->add_val))
3911 ignore_reason = "giv has varying add value";
3913 /* Ignore GIVs in the nested loops; they ought to have been
3914 handled already. */
3915 else if (iv->maybe_multiple)
3916 ignore_reason = "giv is in nested loop";
3919 if (ignore_reason != NULL)
3921 if (loop_dump_stream)
3922 fprintf (loop_dump_stream,
3923 "Prefetch: ignoring giv at %d: %s.\n",
3924 INSN_UID (iv->insn), ignore_reason);
3925 continue;
3928 /* Determine the pointer to the basic array we are examining. It is
3929 the sum of the BIV's initial value and the GIV's add_val. */
3930 address = copy_rtx (iv->add_val);
3931 temp = copy_rtx (bl->initial_value);
3933 address = simplify_gen_binary (PLUS, Pmode, temp, address);
3934 index = remove_constant_addition (&address);
3936 d.mem_write = 0;
3937 d.mem_address = *iv->location;
3939 /* When the GIV is not always executed, we might be better off by
3940 not dirtying the cache pages. */
3941 if (PREFETCH_CONDITIONAL || iv->always_executed)
3942 note_stores (PATTERN (iv->insn), check_store, &d);
3943 else
3945 if (loop_dump_stream)
3946 fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %d: %s\n",
3947 INSN_UID (iv->insn), "in conditional code.");
3948 continue;
3951 /* Attempt to find another prefetch to the same array and see if we
3952 can merge this one. */
3953 for (i = 0; i < num_prefetches; i++)
3954 if (rtx_equal_for_prefetch_p (address, info[i].base_address)
3955 && stride == info[i].stride)
3957 /* In case both access same array (same location
3958 just with small difference in constant indexes), merge
3959 the prefetches. Just do the later and the earlier will
3960 get prefetched from previous iteration.
3961 The artificial threshold should not be too small,
3962 but also not bigger than small portion of memory usually
3963 traversed by single loop. */
3964 if (index >= info[i].index
3965 && index - info[i].index < PREFETCH_EXTREME_DIFFERENCE)
3967 info[i].write |= d.mem_write;
3968 info[i].bytes_accessed += size;
3969 info[i].index = index;
3970 info[i].giv = iv;
3971 info[i].class = bl;
3972 info[num_prefetches].base_address = address;
3973 add = 0;
3974 break;
3977 if (index < info[i].index
3978 && info[i].index - index < PREFETCH_EXTREME_DIFFERENCE)
3980 info[i].write |= d.mem_write;
3981 info[i].bytes_accessed += size;
3982 add = 0;
3983 break;
3987 /* Merging failed. */
3988 if (add)
3990 info[num_prefetches].giv = iv;
3991 info[num_prefetches].class = bl;
3992 info[num_prefetches].index = index;
3993 info[num_prefetches].stride = stride;
3994 info[num_prefetches].base_address = address;
3995 info[num_prefetches].write = d.mem_write;
3996 info[num_prefetches].bytes_accessed = size;
3997 num_prefetches++;
3998 if (num_prefetches >= MAX_PREFETCHES)
4000 if (loop_dump_stream)
4001 fprintf (loop_dump_stream,
4002 "Maximal number of prefetches exceeded.\n");
4003 return;
4009 for (i = 0; i < num_prefetches; i++)
4011 int density;
4013 /* Attempt to calculate the total number of bytes fetched by all
4014 iterations of the loop. Avoid overflow. */
4015 if (LOOP_INFO (loop)->n_iterations
4016 && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
4017 >= LOOP_INFO (loop)->n_iterations))
4018 info[i].total_bytes = info[i].stride * LOOP_INFO (loop)->n_iterations;
4019 else
4020 info[i].total_bytes = 0xffffffff;
4022 density = info[i].bytes_accessed * 100 / info[i].stride;
4024 /* Prefetch might be worthwhile only when the loads/stores are dense. */
4025 if (PREFETCH_ONLY_DENSE_MEM)
4026 if (density * 256 > PREFETCH_DENSE_MEM * 100
4027 && (info[i].total_bytes / PREFETCH_BLOCK
4028 >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
4030 info[i].prefetch_before_loop = 1;
4031 info[i].prefetch_in_loop
4032 = (info[i].total_bytes / PREFETCH_BLOCK
4033 > PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
4035 else
4037 info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0;
4038 if (loop_dump_stream)
4039 fprintf (loop_dump_stream,
4040 "Prefetch: ignoring giv at %d: %d%% density is too low.\n",
4041 INSN_UID (info[i].giv->insn), density);
4043 else
4044 info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1;
4046 /* Find how many prefetch instructions we'll use within the loop. */
4047 if (info[i].prefetch_in_loop != 0)
4049 info[i].prefetch_in_loop = ((info[i].stride + PREFETCH_BLOCK - 1)
4050 / PREFETCH_BLOCK);
4051 num_real_prefetches += info[i].prefetch_in_loop;
4052 if (info[i].write)
4053 num_real_write_prefetches += info[i].prefetch_in_loop;
4057 /* Determine how many iterations ahead to prefetch within the loop, based
4058 on how many prefetches we currently expect to do within the loop. */
4059 if (num_real_prefetches != 0)
4061 if ((ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches) == 0)
4063 if (loop_dump_stream)
4064 fprintf (loop_dump_stream,
4065 "Prefetch: ignoring prefetches within loop: ahead is zero; %d < %d\n",
4066 SIMULTANEOUS_PREFETCHES, num_real_prefetches);
4067 num_real_prefetches = 0, num_real_write_prefetches = 0;
4070 /* We'll also use AHEAD to determine how many prefetch instructions to
4071 emit before a loop, so don't leave it zero. */
4072 if (ahead == 0)
4073 ahead = PREFETCH_BLOCKS_BEFORE_LOOP_MAX;
4075 for (i = 0; i < num_prefetches; i++)
4077 /* Update if we've decided not to prefetch anything within the loop. */
4078 if (num_real_prefetches == 0)
4079 info[i].prefetch_in_loop = 0;
4081 /* Find how many prefetch instructions we'll use before the loop. */
4082 if (info[i].prefetch_before_loop != 0)
4084 int n = info[i].total_bytes / PREFETCH_BLOCK;
4085 if (n > ahead)
4086 n = ahead;
4087 info[i].prefetch_before_loop = n;
4088 num_prefetches_before += n;
4089 if (info[i].write)
4090 num_write_prefetches_before += n;
4093 if (loop_dump_stream)
4095 if (info[i].prefetch_in_loop == 0
4096 && info[i].prefetch_before_loop == 0)
4097 continue;
4098 fprintf (loop_dump_stream, "Prefetch insn: %d",
4099 INSN_UID (info[i].giv->insn));
4100 fprintf (loop_dump_stream,
4101 "; in loop: %d; before: %d; %s\n",
4102 info[i].prefetch_in_loop,
4103 info[i].prefetch_before_loop,
4104 info[i].write ? "read/write" : "read only");
4105 fprintf (loop_dump_stream,
4106 " density: %d%%; bytes_accessed: %u; total_bytes: %u\n",
4107 (int) (info[i].bytes_accessed * 100 / info[i].stride),
4108 info[i].bytes_accessed, info[i].total_bytes);
4109 fprintf (loop_dump_stream, " index: ");
4110 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, info[i].index);
4111 fprintf (loop_dump_stream, "; stride: ");
4112 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC, info[i].stride);
4113 fprintf (loop_dump_stream, "; address: ");
4114 print_rtl (loop_dump_stream, info[i].base_address);
4115 fprintf (loop_dump_stream, "\n");
4119 if (num_real_prefetches + num_prefetches_before > 0)
4121 /* Record that this loop uses prefetch instructions. */
4122 LOOP_INFO (loop)->has_prefetch = 1;
4124 if (loop_dump_stream)
4126 fprintf (loop_dump_stream, "Real prefetches needed within loop: %d (write: %d)\n",
4127 num_real_prefetches, num_real_write_prefetches);
4128 fprintf (loop_dump_stream, "Real prefetches needed before loop: %d (write: %d)\n",
4129 num_prefetches_before, num_write_prefetches_before);
4133 for (i = 0; i < num_prefetches; i++)
4135 int y;
4137 for (y = 0; y < info[i].prefetch_in_loop; y++)
4139 rtx loc = copy_rtx (*info[i].giv->location);
4140 rtx insn;
4141 int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
4142 rtx before_insn = info[i].giv->insn;
4143 rtx prev_insn = PREV_INSN (info[i].giv->insn);
4144 rtx seq;
4146 /* We can save some effort by offsetting the address on
4147 architectures with offsettable memory references. */
4148 if (offsettable_address_p (0, VOIDmode, loc))
4149 loc = plus_constant (loc, bytes_ahead);
4150 else
4152 rtx reg = gen_reg_rtx (Pmode);
4153 loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
4154 GEN_INT (bytes_ahead), reg,
4155 0, before_insn);
4156 loc = reg;
4159 start_sequence ();
4160 /* Make sure the address operand is valid for prefetch. */
4161 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
4162 (loc, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
4163 loc = force_reg (Pmode, loc);
4164 emit_insn (gen_prefetch (loc, GEN_INT (info[i].write),
4165 GEN_INT (3)));
4166 seq = get_insns ();
4167 end_sequence ();
4168 emit_insn_before (seq, before_insn);
4170 /* Check all insns emitted and record the new GIV
4171 information. */
4172 insn = NEXT_INSN (prev_insn);
4173 while (insn != before_insn)
4175 insn = check_insn_for_givs (loop, insn,
4176 info[i].giv->always_executed,
4177 info[i].giv->maybe_multiple);
4178 insn = NEXT_INSN (insn);
4182 if (PREFETCH_BEFORE_LOOP)
4184 /* Emit insns before the loop to fetch the first cache lines or,
4185 if we're not prefetching within the loop, everything we expect
4186 to need. */
4187 for (y = 0; y < info[i].prefetch_before_loop; y++)
4189 rtx reg = gen_reg_rtx (Pmode);
4190 rtx loop_start = loop->start;
4191 rtx init_val = info[i].class->initial_value;
4192 rtx add_val = simplify_gen_binary (PLUS, Pmode,
4193 info[i].giv->add_val,
4194 GEN_INT (y * PREFETCH_BLOCK));
4196 /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a
4197 non-constant INIT_VAL to have the same mode as REG, which
4198 in this case we know to be Pmode. */
4199 if (GET_MODE (init_val) != Pmode && !CONSTANT_P (init_val))
4200 init_val = convert_to_mode (Pmode, init_val, 0);
4201 loop_iv_add_mult_emit_before (loop, init_val,
4202 info[i].giv->mult_val,
4203 add_val, reg, 0, loop_start);
4204 emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write),
4205 GEN_INT (3)),
4206 loop_start);
4211 return;
4214 /* A "basic induction variable" or biv is a pseudo reg that is set
4215 (within this loop) only by incrementing or decrementing it. */
4216 /* A "general induction variable" or giv is a pseudo reg whose
4217 value is a linear function of a biv. */
4219 /* Bivs are recognized by `basic_induction_var';
4220 Givs by `general_induction_var'. */
4222 /* Communication with routines called via `note_stores'. */
4224 static rtx note_insn;
4226 /* Dummy register to have nonzero DEST_REG for DEST_ADDR type givs. */
4228 static rtx addr_placeholder;
4230 /* ??? Unfinished optimizations, and possible future optimizations,
4231 for the strength reduction code. */
4233 /* ??? The interaction of biv elimination, and recognition of 'constant'
4234 bivs, may cause problems. */
4236 /* ??? Add heuristics so that DEST_ADDR strength reduction does not cause
4237 performance problems.
4239 Perhaps don't eliminate things that can be combined with an addressing
4240 mode. Find all givs that have the same biv, mult_val, and add_val;
4241 then for each giv, check to see if its only use dies in a following
4242 memory address. If so, generate a new memory address and check to see
4243 if it is valid. If it is valid, then store the modified memory address,
4244 otherwise, mark the giv as not done so that it will get its own iv. */
4246 /* ??? Could try to optimize branches when it is known that a biv is always
4247 positive. */
4249 /* ??? When replace a biv in a compare insn, we should replace with closest
4250 giv so that an optimized branch can still be recognized by the combiner,
4251 e.g. the VAX acb insn. */
4253 /* ??? Many of the checks involving uid_luid could be simplified if regscan
4254 was rerun in loop_optimize whenever a register was added or moved.
4255 Also, some of the optimizations could be a little less conservative. */
4257 /* Scan the loop body and call FNCALL for each insn. In the addition to the
4258 LOOP and INSN parameters pass MAYBE_MULTIPLE and NOT_EVERY_ITERATION to the
4259 callback.
4261 NOT_EVERY_ITERATION is 1 if current insn is not known to be executed at
4262 least once for every loop iteration except for the last one.
4264 MAYBE_MULTIPLE is 1 if current insn may be executed more than once for every
4265 loop iteration.
4267 void
4268 for_each_insn_in_loop (loop, fncall)
4269 struct loop *loop;
4270 loop_insn_callback fncall;
4272 int not_every_iteration = 0;
4273 int maybe_multiple = 0;
4274 int past_loop_latch = 0;
4275 int loop_depth = 0;
4276 rtx p;
4278 /* If loop_scan_start points to the loop exit test, we have to be wary of
4279 subversive use of gotos inside expression statements. */
4280 if (prev_nonnote_insn (loop->scan_start) != prev_nonnote_insn (loop->start))
4281 maybe_multiple = back_branch_in_range_p (loop, loop->scan_start);
4283 /* Scan through loop and update NOT_EVERY_ITERATION and MAYBE_MULTIPLE. */
4284 for (p = next_insn_in_loop (loop, loop->scan_start);
4285 p != NULL_RTX;
4286 p = next_insn_in_loop (loop, p))
4288 p = fncall (loop, p, not_every_iteration, maybe_multiple);
4290 /* Past CODE_LABEL, we get to insns that may be executed multiple
4291 times. The only way we can be sure that they can't is if every
4292 jump insn between here and the end of the loop either
4293 returns, exits the loop, is a jump to a location that is still
4294 behind the label, or is a jump to the loop start. */
4296 if (GET_CODE (p) == CODE_LABEL)
4298 rtx insn = p;
4300 maybe_multiple = 0;
4302 while (1)
4304 insn = NEXT_INSN (insn);
4305 if (insn == loop->scan_start)
4306 break;
4307 if (insn == loop->end)
4309 if (loop->top != 0)
4310 insn = loop->top;
4311 else
4312 break;
4313 if (insn == loop->scan_start)
4314 break;
4317 if (GET_CODE (insn) == JUMP_INSN
4318 && GET_CODE (PATTERN (insn)) != RETURN
4319 && (!any_condjump_p (insn)
4320 || (JUMP_LABEL (insn) != 0
4321 && JUMP_LABEL (insn) != loop->scan_start
4322 && !loop_insn_first_p (p, JUMP_LABEL (insn)))))
4324 maybe_multiple = 1;
4325 break;
4330 /* Past a jump, we get to insns for which we can't count
4331 on whether they will be executed during each iteration. */
4332 /* This code appears twice in strength_reduce. There is also similar
4333 code in scan_loop. */
4334 if (GET_CODE (p) == JUMP_INSN
4335 /* If we enter the loop in the middle, and scan around to the
4336 beginning, don't set not_every_iteration for that.
4337 This can be any kind of jump, since we want to know if insns
4338 will be executed if the loop is executed. */
4339 && !(JUMP_LABEL (p) == loop->top
4340 && ((NEXT_INSN (NEXT_INSN (p)) == loop->end
4341 && any_uncondjump_p (p))
4342 || (NEXT_INSN (p) == loop->end && any_condjump_p (p)))))
4344 rtx label = 0;
4346 /* If this is a jump outside the loop, then it also doesn't
4347 matter. Check to see if the target of this branch is on the
4348 loop->exits_labels list. */
4350 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
4351 if (XEXP (label, 0) == JUMP_LABEL (p))
4352 break;
4354 if (!label)
4355 not_every_iteration = 1;
4358 else if (GET_CODE (p) == NOTE)
4360 /* At the virtual top of a converted loop, insns are again known to
4361 be executed each iteration: logically, the loop begins here
4362 even though the exit code has been duplicated.
4364 Insns are also again known to be executed each iteration at
4365 the LOOP_CONT note. */
4366 if ((NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_VTOP
4367 || NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_CONT)
4368 && loop_depth == 0)
4369 not_every_iteration = 0;
4370 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
4371 loop_depth++;
4372 else if (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)
4373 loop_depth--;
4376 /* Note if we pass a loop latch. If we do, then we can not clear
4377 NOT_EVERY_ITERATION below when we pass the last CODE_LABEL in
4378 a loop since a jump before the last CODE_LABEL may have started
4379 a new loop iteration.
4381 Note that LOOP_TOP is only set for rotated loops and we need
4382 this check for all loops, so compare against the CODE_LABEL
4383 which immediately follows LOOP_START. */
4384 if (GET_CODE (p) == JUMP_INSN
4385 && JUMP_LABEL (p) == NEXT_INSN (loop->start))
4386 past_loop_latch = 1;
4388 /* Unlike in the code motion pass where MAYBE_NEVER indicates that
4389 an insn may never be executed, NOT_EVERY_ITERATION indicates whether
4390 or not an insn is known to be executed each iteration of the
4391 loop, whether or not any iterations are known to occur.
4393 Therefore, if we have just passed a label and have no more labels
4394 between here and the test insn of the loop, and we have not passed
4395 a jump to the top of the loop, then we know these insns will be
4396 executed each iteration. */
4398 if (not_every_iteration
4399 && !past_loop_latch
4400 && GET_CODE (p) == CODE_LABEL
4401 && no_labels_between_p (p, loop->end)
4402 && loop_insn_first_p (p, loop->cont))
4403 not_every_iteration = 0;
4407 static void
4408 loop_bivs_find (loop)
4409 struct loop *loop;
4411 struct loop_regs *regs = LOOP_REGS (loop);
4412 struct loop_ivs *ivs = LOOP_IVS (loop);
4413 /* Temporary list pointers for traversing ivs->list. */
4414 struct iv_class *bl, **backbl;
4416 ivs->list = 0;
4418 for_each_insn_in_loop (loop, check_insn_for_bivs);
4420 /* Scan ivs->list to remove all regs that proved not to be bivs.
4421 Make a sanity check against regs->n_times_set. */
4422 for (backbl = &ivs->list, bl = *backbl; bl; bl = bl->next)
4424 if (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4425 /* Above happens if register modified by subreg, etc. */
4426 /* Make sure it is not recognized as a basic induction var: */
4427 || regs->array[bl->regno].n_times_set != bl->biv_count
4428 /* If never incremented, it is invariant that we decided not to
4429 move. So leave it alone. */
4430 || ! bl->incremented)
4432 if (loop_dump_stream)
4433 fprintf (loop_dump_stream, "Biv %d: discarded, %s\n",
4434 bl->regno,
4435 (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4436 ? "not induction variable"
4437 : (! bl->incremented ? "never incremented"
4438 : "count error")));
4440 REG_IV_TYPE (ivs, bl->regno) = NOT_BASIC_INDUCT;
4441 *backbl = bl->next;
4443 else
4445 backbl = &bl->next;
4447 if (loop_dump_stream)
4448 fprintf (loop_dump_stream, "Biv %d: verified\n", bl->regno);
4454 /* Determine how BIVS are initialized by looking through pre-header
4455 extended basic block. */
4456 static void
4457 loop_bivs_init_find (loop)
4458 struct loop *loop;
4460 struct loop_ivs *ivs = LOOP_IVS (loop);
4461 /* Temporary list pointers for traversing ivs->list. */
4462 struct iv_class *bl;
4463 int call_seen;
4464 rtx p;
4466 /* Find initial value for each biv by searching backwards from loop_start,
4467 halting at first label. Also record any test condition. */
4469 call_seen = 0;
4470 for (p = loop->start; p && GET_CODE (p) != CODE_LABEL; p = PREV_INSN (p))
4472 rtx test;
4474 note_insn = p;
4476 if (GET_CODE (p) == CALL_INSN)
4477 call_seen = 1;
4479 if (INSN_P (p))
4480 note_stores (PATTERN (p), record_initial, ivs);
4482 /* Record any test of a biv that branches around the loop if no store
4483 between it and the start of loop. We only care about tests with
4484 constants and registers and only certain of those. */
4485 if (GET_CODE (p) == JUMP_INSN
4486 && JUMP_LABEL (p) != 0
4487 && next_real_insn (JUMP_LABEL (p)) == next_real_insn (loop->end)
4488 && (test = get_condition_for_loop (loop, p)) != 0
4489 && GET_CODE (XEXP (test, 0)) == REG
4490 && REGNO (XEXP (test, 0)) < max_reg_before_loop
4491 && (bl = REG_IV_CLASS (ivs, REGNO (XEXP (test, 0)))) != 0
4492 && valid_initial_value_p (XEXP (test, 1), p, call_seen, loop->start)
4493 && bl->init_insn == 0)
4495 /* If an NE test, we have an initial value! */
4496 if (GET_CODE (test) == NE)
4498 bl->init_insn = p;
4499 bl->init_set = gen_rtx_SET (VOIDmode,
4500 XEXP (test, 0), XEXP (test, 1));
4502 else
4503 bl->initial_test = test;
4509 /* Look at the each biv and see if we can say anything better about its
4510 initial value from any initializing insns set up above. (This is done
4511 in two passes to avoid missing SETs in a PARALLEL.) */
4512 static void
4513 loop_bivs_check (loop)
4514 struct loop *loop;
4516 struct loop_ivs *ivs = LOOP_IVS (loop);
4517 /* Temporary list pointers for traversing ivs->list. */
4518 struct iv_class *bl;
4519 struct iv_class **backbl;
4521 for (backbl = &ivs->list; (bl = *backbl); backbl = &bl->next)
4523 rtx src;
4524 rtx note;
4526 if (! bl->init_insn)
4527 continue;
4529 /* IF INIT_INSN has a REG_EQUAL or REG_EQUIV note and the value
4530 is a constant, use the value of that. */
4531 if (((note = find_reg_note (bl->init_insn, REG_EQUAL, 0)) != NULL
4532 && CONSTANT_P (XEXP (note, 0)))
4533 || ((note = find_reg_note (bl->init_insn, REG_EQUIV, 0)) != NULL
4534 && CONSTANT_P (XEXP (note, 0))))
4535 src = XEXP (note, 0);
4536 else
4537 src = SET_SRC (bl->init_set);
4539 if (loop_dump_stream)
4540 fprintf (loop_dump_stream,
4541 "Biv %d: initialized at insn %d: initial value ",
4542 bl->regno, INSN_UID (bl->init_insn));
4544 if ((GET_MODE (src) == GET_MODE (regno_reg_rtx[bl->regno])
4545 || GET_MODE (src) == VOIDmode)
4546 && valid_initial_value_p (src, bl->init_insn,
4547 LOOP_INFO (loop)->pre_header_has_call,
4548 loop->start))
4550 bl->initial_value = src;
4552 if (loop_dump_stream)
4554 print_simple_rtl (loop_dump_stream, src);
4555 fputc ('\n', loop_dump_stream);
4558 /* If we can't make it a giv,
4559 let biv keep initial value of "itself". */
4560 else if (loop_dump_stream)
4561 fprintf (loop_dump_stream, "is complex\n");
4566 /* Search the loop for general induction variables. */
4568 static void
4569 loop_givs_find (loop)
4570 struct loop* loop;
4572 for_each_insn_in_loop (loop, check_insn_for_givs);
4576 /* For each giv for which we still don't know whether or not it is
4577 replaceable, check to see if it is replaceable because its final value
4578 can be calculated. */
4580 static void
4581 loop_givs_check (loop)
4582 struct loop *loop;
4584 struct loop_ivs *ivs = LOOP_IVS (loop);
4585 struct iv_class *bl;
4587 for (bl = ivs->list; bl; bl = bl->next)
4589 struct induction *v;
4591 for (v = bl->giv; v; v = v->next_iv)
4592 if (! v->replaceable && ! v->not_replaceable)
4593 check_final_value (loop, v);
4598 /* Return nonzero if it is possible to eliminate the biv BL provided
4599 all givs are reduced. This is possible if either the reg is not
4600 used outside the loop, or we can compute what its final value will
4601 be. */
4603 static int
4604 loop_biv_eliminable_p (loop, bl, threshold, insn_count)
4605 struct loop *loop;
4606 struct iv_class *bl;
4607 int threshold;
4608 int insn_count;
4610 /* For architectures with a decrement_and_branch_until_zero insn,
4611 don't do this if we put a REG_NONNEG note on the endtest for this
4612 biv. */
4614 #ifdef HAVE_decrement_and_branch_until_zero
4615 if (bl->nonneg)
4617 if (loop_dump_stream)
4618 fprintf (loop_dump_stream,
4619 "Cannot eliminate nonneg biv %d.\n", bl->regno);
4620 return 0;
4622 #endif
4624 /* Check that biv is used outside loop or if it has a final value.
4625 Compare against bl->init_insn rather than loop->start. We aren't
4626 concerned with any uses of the biv between init_insn and
4627 loop->start since these won't be affected by the value of the biv
4628 elsewhere in the function, so long as init_insn doesn't use the
4629 biv itself. */
4631 if ((REGNO_LAST_LUID (bl->regno) < INSN_LUID (loop->end)
4632 && bl->init_insn
4633 && INSN_UID (bl->init_insn) < max_uid_for_loop
4634 && REGNO_FIRST_LUID (bl->regno) >= INSN_LUID (bl->init_insn)
4635 && ! reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
4636 || (bl->final_value = final_biv_value (loop, bl)))
4637 return maybe_eliminate_biv (loop, bl, 0, threshold, insn_count);
4639 if (loop_dump_stream)
4641 fprintf (loop_dump_stream,
4642 "Cannot eliminate biv %d.\n",
4643 bl->regno);
4644 fprintf (loop_dump_stream,
4645 "First use: insn %d, last use: insn %d.\n",
4646 REGNO_FIRST_UID (bl->regno),
4647 REGNO_LAST_UID (bl->regno));
4649 return 0;
4653 /* Reduce each giv of BL that we have decided to reduce. */
4655 static void
4656 loop_givs_reduce (loop, bl)
4657 struct loop *loop;
4658 struct iv_class *bl;
4660 struct induction *v;
4662 for (v = bl->giv; v; v = v->next_iv)
4664 struct induction *tv;
4665 if (! v->ignore && v->same == 0)
4667 int auto_inc_opt = 0;
4669 /* If the code for derived givs immediately below has already
4670 allocated a new_reg, we must keep it. */
4671 if (! v->new_reg)
4672 v->new_reg = gen_reg_rtx (v->mode);
4674 #ifdef AUTO_INC_DEC
4675 /* If the target has auto-increment addressing modes, and
4676 this is an address giv, then try to put the increment
4677 immediately after its use, so that flow can create an
4678 auto-increment addressing mode. */
4679 if (v->giv_type == DEST_ADDR && bl->biv_count == 1
4680 && bl->biv->always_executed && ! bl->biv->maybe_multiple
4681 /* We don't handle reversed biv's because bl->biv->insn
4682 does not have a valid INSN_LUID. */
4683 && ! bl->reversed
4684 && v->always_executed && ! v->maybe_multiple
4685 && INSN_UID (v->insn) < max_uid_for_loop)
4687 /* If other giv's have been combined with this one, then
4688 this will work only if all uses of the other giv's occur
4689 before this giv's insn. This is difficult to check.
4691 We simplify this by looking for the common case where
4692 there is one DEST_REG giv, and this giv's insn is the
4693 last use of the dest_reg of that DEST_REG giv. If the
4694 increment occurs after the address giv, then we can
4695 perform the optimization. (Otherwise, the increment
4696 would have to go before other_giv, and we would not be
4697 able to combine it with the address giv to get an
4698 auto-inc address.) */
4699 if (v->combined_with)
4701 struct induction *other_giv = 0;
4703 for (tv = bl->giv; tv; tv = tv->next_iv)
4704 if (tv->same == v)
4706 if (other_giv)
4707 break;
4708 else
4709 other_giv = tv;
4711 if (! tv && other_giv
4712 && REGNO (other_giv->dest_reg) < max_reg_before_loop
4713 && (REGNO_LAST_UID (REGNO (other_giv->dest_reg))
4714 == INSN_UID (v->insn))
4715 && INSN_LUID (v->insn) < INSN_LUID (bl->biv->insn))
4716 auto_inc_opt = 1;
4718 /* Check for case where increment is before the address
4719 giv. Do this test in "loop order". */
4720 else if ((INSN_LUID (v->insn) > INSN_LUID (bl->biv->insn)
4721 && (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4722 || (INSN_LUID (bl->biv->insn)
4723 > INSN_LUID (loop->scan_start))))
4724 || (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
4725 && (INSN_LUID (loop->scan_start)
4726 < INSN_LUID (bl->biv->insn))))
4727 auto_inc_opt = -1;
4728 else
4729 auto_inc_opt = 1;
4731 #ifdef HAVE_cc0
4733 rtx prev;
4735 /* We can't put an insn immediately after one setting
4736 cc0, or immediately before one using cc0. */
4737 if ((auto_inc_opt == 1 && sets_cc0_p (PATTERN (v->insn)))
4738 || (auto_inc_opt == -1
4739 && (prev = prev_nonnote_insn (v->insn)) != 0
4740 && INSN_P (prev)
4741 && sets_cc0_p (PATTERN (prev))))
4742 auto_inc_opt = 0;
4744 #endif
4746 if (auto_inc_opt)
4747 v->auto_inc_opt = 1;
4749 #endif
4751 /* For each place where the biv is incremented, add an insn
4752 to increment the new, reduced reg for the giv. */
4753 for (tv = bl->biv; tv; tv = tv->next_iv)
4755 rtx insert_before;
4757 if (! auto_inc_opt)
4758 insert_before = NEXT_INSN (tv->insn);
4759 else if (auto_inc_opt == 1)
4760 insert_before = NEXT_INSN (v->insn);
4761 else
4762 insert_before = v->insn;
4764 if (tv->mult_val == const1_rtx)
4765 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4766 v->new_reg, v->new_reg,
4767 0, insert_before);
4768 else /* tv->mult_val == const0_rtx */
4769 /* A multiply is acceptable here
4770 since this is presumed to be seldom executed. */
4771 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
4772 v->add_val, v->new_reg,
4773 0, insert_before);
4776 /* Add code at loop start to initialize giv's reduced reg. */
4778 loop_iv_add_mult_hoist (loop,
4779 extend_value_for_giv (v, bl->initial_value),
4780 v->mult_val, v->add_val, v->new_reg);
4786 /* Check for givs whose first use is their definition and whose
4787 last use is the definition of another giv. If so, it is likely
4788 dead and should not be used to derive another giv nor to
4789 eliminate a biv. */
4791 static void
4792 loop_givs_dead_check (loop, bl)
4793 struct loop *loop ATTRIBUTE_UNUSED;
4794 struct iv_class *bl;
4796 struct induction *v;
4798 for (v = bl->giv; v; v = v->next_iv)
4800 if (v->ignore
4801 || (v->same && v->same->ignore))
4802 continue;
4804 if (v->giv_type == DEST_REG
4805 && REGNO_FIRST_UID (REGNO (v->dest_reg)) == INSN_UID (v->insn))
4807 struct induction *v1;
4809 for (v1 = bl->giv; v1; v1 = v1->next_iv)
4810 if (REGNO_LAST_UID (REGNO (v->dest_reg)) == INSN_UID (v1->insn))
4811 v->maybe_dead = 1;
4817 static void
4818 loop_givs_rescan (loop, bl, reg_map)
4819 struct loop *loop;
4820 struct iv_class *bl;
4821 rtx *reg_map;
4823 struct induction *v;
4825 for (v = bl->giv; v; v = v->next_iv)
4827 if (v->same && v->same->ignore)
4828 v->ignore = 1;
4830 if (v->ignore)
4831 continue;
4833 /* Update expression if this was combined, in case other giv was
4834 replaced. */
4835 if (v->same)
4836 v->new_reg = replace_rtx (v->new_reg,
4837 v->same->dest_reg, v->same->new_reg);
4839 /* See if this register is known to be a pointer to something. If
4840 so, see if we can find the alignment. First see if there is a
4841 destination register that is a pointer. If so, this shares the
4842 alignment too. Next see if we can deduce anything from the
4843 computational information. If not, and this is a DEST_ADDR
4844 giv, at least we know that it's a pointer, though we don't know
4845 the alignment. */
4846 if (GET_CODE (v->new_reg) == REG
4847 && v->giv_type == DEST_REG
4848 && REG_POINTER (v->dest_reg))
4849 mark_reg_pointer (v->new_reg,
4850 REGNO_POINTER_ALIGN (REGNO (v->dest_reg)));
4851 else if (GET_CODE (v->new_reg) == REG
4852 && REG_POINTER (v->src_reg))
4854 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->src_reg));
4856 if (align == 0
4857 || GET_CODE (v->add_val) != CONST_INT
4858 || INTVAL (v->add_val) % (align / BITS_PER_UNIT) != 0)
4859 align = 0;
4861 mark_reg_pointer (v->new_reg, align);
4863 else if (GET_CODE (v->new_reg) == REG
4864 && GET_CODE (v->add_val) == REG
4865 && REG_POINTER (v->add_val))
4867 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->add_val));
4869 if (align == 0 || GET_CODE (v->mult_val) != CONST_INT
4870 || INTVAL (v->mult_val) % (align / BITS_PER_UNIT) != 0)
4871 align = 0;
4873 mark_reg_pointer (v->new_reg, align);
4875 else if (GET_CODE (v->new_reg) == REG && v->giv_type == DEST_ADDR)
4876 mark_reg_pointer (v->new_reg, 0);
4878 if (v->giv_type == DEST_ADDR)
4879 /* Store reduced reg as the address in the memref where we found
4880 this giv. */
4881 validate_change (v->insn, v->location, v->new_reg, 0);
4882 else if (v->replaceable)
4884 reg_map[REGNO (v->dest_reg)] = v->new_reg;
4886 else
4888 rtx original_insn = v->insn;
4889 rtx note;
4891 /* Not replaceable; emit an insn to set the original giv reg from
4892 the reduced giv, same as above. */
4893 v->insn = loop_insn_emit_after (loop, 0, original_insn,
4894 gen_move_insn (v->dest_reg,
4895 v->new_reg));
4897 /* The original insn may have a REG_EQUAL note. This note is
4898 now incorrect and may result in invalid substitutions later.
4899 The original insn is dead, but may be part of a libcall
4900 sequence, which doesn't seem worth the bother of handling. */
4901 note = find_reg_note (original_insn, REG_EQUAL, NULL_RTX);
4902 if (note)
4903 remove_note (original_insn, note);
4906 /* When a loop is reversed, givs which depend on the reversed
4907 biv, and which are live outside the loop, must be set to their
4908 correct final value. This insn is only needed if the giv is
4909 not replaceable. The correct final value is the same as the
4910 value that the giv starts the reversed loop with. */
4911 if (bl->reversed && ! v->replaceable)
4912 loop_iv_add_mult_sink (loop,
4913 extend_value_for_giv (v, bl->initial_value),
4914 v->mult_val, v->add_val, v->dest_reg);
4915 else if (v->final_value)
4916 loop_insn_sink_or_swim (loop,
4917 gen_load_of_final_value (v->dest_reg,
4918 v->final_value));
4920 if (loop_dump_stream)
4922 fprintf (loop_dump_stream, "giv at %d reduced to ",
4923 INSN_UID (v->insn));
4924 print_simple_rtl (loop_dump_stream, v->new_reg);
4925 fprintf (loop_dump_stream, "\n");
4931 static int
4932 loop_giv_reduce_benefit (loop, bl, v, test_reg)
4933 struct loop *loop ATTRIBUTE_UNUSED;
4934 struct iv_class *bl;
4935 struct induction *v;
4936 rtx test_reg;
4938 int add_cost;
4939 int benefit;
4941 benefit = v->benefit;
4942 PUT_MODE (test_reg, v->mode);
4943 add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val,
4944 test_reg, test_reg);
4946 /* Reduce benefit if not replaceable, since we will insert a
4947 move-insn to replace the insn that calculates this giv. Don't do
4948 this unless the giv is a user variable, since it will often be
4949 marked non-replaceable because of the duplication of the exit
4950 code outside the loop. In such a case, the copies we insert are
4951 dead and will be deleted. So they don't have a cost. Similar
4952 situations exist. */
4953 /* ??? The new final_[bg]iv_value code does a much better job of
4954 finding replaceable giv's, and hence this code may no longer be
4955 necessary. */
4956 if (! v->replaceable && ! bl->eliminable
4957 && REG_USERVAR_P (v->dest_reg))
4958 benefit -= copy_cost;
4960 /* Decrease the benefit to count the add-insns that we will insert
4961 to increment the reduced reg for the giv. ??? This can
4962 overestimate the run-time cost of the additional insns, e.g. if
4963 there are multiple basic blocks that increment the biv, but only
4964 one of these blocks is executed during each iteration. There is
4965 no good way to detect cases like this with the current structure
4966 of the loop optimizer. This code is more accurate for
4967 determining code size than run-time benefits. */
4968 benefit -= add_cost * bl->biv_count;
4970 /* Decide whether to strength-reduce this giv or to leave the code
4971 unchanged (recompute it from the biv each time it is used). This
4972 decision can be made independently for each giv. */
4974 #ifdef AUTO_INC_DEC
4975 /* Attempt to guess whether autoincrement will handle some of the
4976 new add insns; if so, increase BENEFIT (undo the subtraction of
4977 add_cost that was done above). */
4978 if (v->giv_type == DEST_ADDR
4979 /* Increasing the benefit is risky, since this is only a guess.
4980 Avoid increasing register pressure in cases where there would
4981 be no other benefit from reducing this giv. */
4982 && benefit > 0
4983 && GET_CODE (v->mult_val) == CONST_INT)
4985 int size = GET_MODE_SIZE (GET_MODE (v->mem));
4987 if (HAVE_POST_INCREMENT
4988 && INTVAL (v->mult_val) == size)
4989 benefit += add_cost * bl->biv_count;
4990 else if (HAVE_PRE_INCREMENT
4991 && INTVAL (v->mult_val) == size)
4992 benefit += add_cost * bl->biv_count;
4993 else if (HAVE_POST_DECREMENT
4994 && -INTVAL (v->mult_val) == size)
4995 benefit += add_cost * bl->biv_count;
4996 else if (HAVE_PRE_DECREMENT
4997 && -INTVAL (v->mult_val) == size)
4998 benefit += add_cost * bl->biv_count;
5000 #endif
5002 return benefit;
5006 /* Free IV structures for LOOP. */
5008 static void
5009 loop_ivs_free (loop)
5010 struct loop *loop;
5012 struct loop_ivs *ivs = LOOP_IVS (loop);
5013 struct iv_class *iv = ivs->list;
5015 free (ivs->regs);
5017 while (iv)
5019 struct iv_class *next = iv->next;
5020 struct induction *induction;
5021 struct induction *next_induction;
5023 for (induction = iv->biv; induction; induction = next_induction)
5025 next_induction = induction->next_iv;
5026 free (induction);
5028 for (induction = iv->giv; induction; induction = next_induction)
5030 next_induction = induction->next_iv;
5031 free (induction);
5034 free (iv);
5035 iv = next;
5040 /* Perform strength reduction and induction variable elimination.
5042 Pseudo registers created during this function will be beyond the
5043 last valid index in several tables including
5044 REGS->ARRAY[I].N_TIMES_SET and REGNO_LAST_UID. This does not cause a
5045 problem here, because the added registers cannot be givs outside of
5046 their loop, and hence will never be reconsidered. But scan_loop
5047 must check regnos to make sure they are in bounds. */
5049 static void
5050 strength_reduce (loop, flags)
5051 struct loop *loop;
5052 int flags;
5054 struct loop_info *loop_info = LOOP_INFO (loop);
5055 struct loop_regs *regs = LOOP_REGS (loop);
5056 struct loop_ivs *ivs = LOOP_IVS (loop);
5057 rtx p;
5058 /* Temporary list pointer for traversing ivs->list. */
5059 struct iv_class *bl;
5060 /* Ratio of extra register life span we can justify
5061 for saving an instruction. More if loop doesn't call subroutines
5062 since in that case saving an insn makes more difference
5063 and more registers are available. */
5064 /* ??? could set this to last value of threshold in move_movables */
5065 int threshold = (loop_info->has_call ? 1 : 2) * (3 + n_non_fixed_regs);
5066 /* Map of pseudo-register replacements. */
5067 rtx *reg_map = NULL;
5068 int reg_map_size;
5069 int unrolled_insn_copies = 0;
5070 rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
5071 int insn_count = count_insns_in_loop (loop);
5073 addr_placeholder = gen_reg_rtx (Pmode);
5075 ivs->n_regs = max_reg_before_loop;
5076 ivs->regs = (struct iv *) xcalloc (ivs->n_regs, sizeof (struct iv));
5078 /* Find all BIVs in loop. */
5079 loop_bivs_find (loop);
5081 /* Exit if there are no bivs. */
5082 if (! ivs->list)
5084 /* Can still unroll the loop anyways, but indicate that there is no
5085 strength reduction info available. */
5086 if (flags & LOOP_UNROLL)
5087 unroll_loop (loop, insn_count, 0);
5089 loop_ivs_free (loop);
5090 return;
5093 /* Determine how BIVS are initialized by looking through pre-header
5094 extended basic block. */
5095 loop_bivs_init_find (loop);
5097 /* Look at the each biv and see if we can say anything better about its
5098 initial value from any initializing insns set up above. */
5099 loop_bivs_check (loop);
5101 /* Search the loop for general induction variables. */
5102 loop_givs_find (loop);
5104 /* Try to calculate and save the number of loop iterations. This is
5105 set to zero if the actual number can not be calculated. This must
5106 be called after all giv's have been identified, since otherwise it may
5107 fail if the iteration variable is a giv. */
5108 loop_iterations (loop);
5110 #ifdef HAVE_prefetch
5111 if (flags & LOOP_PREFETCH)
5112 emit_prefetch_instructions (loop);
5113 #endif
5115 /* Now for each giv for which we still don't know whether or not it is
5116 replaceable, check to see if it is replaceable because its final value
5117 can be calculated. This must be done after loop_iterations is called,
5118 so that final_giv_value will work correctly. */
5119 loop_givs_check (loop);
5121 /* Try to prove that the loop counter variable (if any) is always
5122 nonnegative; if so, record that fact with a REG_NONNEG note
5123 so that "decrement and branch until zero" insn can be used. */
5124 check_dbra_loop (loop, insn_count);
5126 /* Create reg_map to hold substitutions for replaceable giv regs.
5127 Some givs might have been made from biv increments, so look at
5128 ivs->reg_iv_type for a suitable size. */
5129 reg_map_size = ivs->n_regs;
5130 reg_map = (rtx *) xcalloc (reg_map_size, sizeof (rtx));
5132 /* Examine each iv class for feasibility of strength reduction/induction
5133 variable elimination. */
5135 for (bl = ivs->list; bl; bl = bl->next)
5137 struct induction *v;
5138 int benefit;
5140 /* Test whether it will be possible to eliminate this biv
5141 provided all givs are reduced. */
5142 bl->eliminable = loop_biv_eliminable_p (loop, bl, threshold, insn_count);
5144 /* This will be true at the end, if all givs which depend on this
5145 biv have been strength reduced.
5146 We can't (currently) eliminate the biv unless this is so. */
5147 bl->all_reduced = 1;
5149 /* Check each extension dependent giv in this class to see if its
5150 root biv is safe from wrapping in the interior mode. */
5151 check_ext_dependent_givs (bl, loop_info);
5153 /* Combine all giv's for this iv_class. */
5154 combine_givs (regs, bl);
5156 for (v = bl->giv; v; v = v->next_iv)
5158 struct induction *tv;
5160 if (v->ignore || v->same)
5161 continue;
5163 benefit = loop_giv_reduce_benefit (loop, bl, v, test_reg);
5165 /* If an insn is not to be strength reduced, then set its ignore
5166 flag, and clear bl->all_reduced. */
5168 /* A giv that depends on a reversed biv must be reduced if it is
5169 used after the loop exit, otherwise, it would have the wrong
5170 value after the loop exit. To make it simple, just reduce all
5171 of such giv's whether or not we know they are used after the loop
5172 exit. */
5174 if (! flag_reduce_all_givs
5175 && v->lifetime * threshold * benefit < insn_count
5176 && ! bl->reversed)
5178 if (loop_dump_stream)
5179 fprintf (loop_dump_stream,
5180 "giv of insn %d not worth while, %d vs %d.\n",
5181 INSN_UID (v->insn),
5182 v->lifetime * threshold * benefit, insn_count);
5183 v->ignore = 1;
5184 bl->all_reduced = 0;
5186 else
5188 /* Check that we can increment the reduced giv without a
5189 multiply insn. If not, reject it. */
5191 for (tv = bl->biv; tv; tv = tv->next_iv)
5192 if (tv->mult_val == const1_rtx
5193 && ! product_cheap_p (tv->add_val, v->mult_val))
5195 if (loop_dump_stream)
5196 fprintf (loop_dump_stream,
5197 "giv of insn %d: would need a multiply.\n",
5198 INSN_UID (v->insn));
5199 v->ignore = 1;
5200 bl->all_reduced = 0;
5201 break;
5206 /* Check for givs whose first use is their definition and whose
5207 last use is the definition of another giv. If so, it is likely
5208 dead and should not be used to derive another giv nor to
5209 eliminate a biv. */
5210 loop_givs_dead_check (loop, bl);
5212 /* Reduce each giv that we decided to reduce. */
5213 loop_givs_reduce (loop, bl);
5215 /* Rescan all givs. If a giv is the same as a giv not reduced, mark it
5216 as not reduced.
5218 For each giv register that can be reduced now: if replaceable,
5219 substitute reduced reg wherever the old giv occurs;
5220 else add new move insn "giv_reg = reduced_reg". */
5221 loop_givs_rescan (loop, bl, reg_map);
5223 /* All the givs based on the biv bl have been reduced if they
5224 merit it. */
5226 /* For each giv not marked as maybe dead that has been combined with a
5227 second giv, clear any "maybe dead" mark on that second giv.
5228 v->new_reg will either be or refer to the register of the giv it
5229 combined with.
5231 Doing this clearing avoids problems in biv elimination where
5232 a giv's new_reg is a complex value that can't be put in the
5233 insn but the giv combined with (with a reg as new_reg) is
5234 marked maybe_dead. Since the register will be used in either
5235 case, we'd prefer it be used from the simpler giv. */
5237 for (v = bl->giv; v; v = v->next_iv)
5238 if (! v->maybe_dead && v->same)
5239 v->same->maybe_dead = 0;
5241 /* Try to eliminate the biv, if it is a candidate.
5242 This won't work if ! bl->all_reduced,
5243 since the givs we planned to use might not have been reduced.
5245 We have to be careful that we didn't initially think we could
5246 eliminate this biv because of a giv that we now think may be
5247 dead and shouldn't be used as a biv replacement.
5249 Also, there is the possibility that we may have a giv that looks
5250 like it can be used to eliminate a biv, but the resulting insn
5251 isn't valid. This can happen, for example, on the 88k, where a
5252 JUMP_INSN can compare a register only with zero. Attempts to
5253 replace it with a compare with a constant will fail.
5255 Note that in cases where this call fails, we may have replaced some
5256 of the occurrences of the biv with a giv, but no harm was done in
5257 doing so in the rare cases where it can occur. */
5259 if (bl->all_reduced == 1 && bl->eliminable
5260 && maybe_eliminate_biv (loop, bl, 1, threshold, insn_count))
5262 /* ?? If we created a new test to bypass the loop entirely,
5263 or otherwise drop straight in, based on this test, then
5264 we might want to rewrite it also. This way some later
5265 pass has more hope of removing the initialization of this
5266 biv entirely. */
5268 /* If final_value != 0, then the biv may be used after loop end
5269 and we must emit an insn to set it just in case.
5271 Reversed bivs already have an insn after the loop setting their
5272 value, so we don't need another one. We can't calculate the
5273 proper final value for such a biv here anyways. */
5274 if (bl->final_value && ! bl->reversed)
5275 loop_insn_sink_or_swim (loop,
5276 gen_load_of_final_value (bl->biv->dest_reg,
5277 bl->final_value));
5279 if (loop_dump_stream)
5280 fprintf (loop_dump_stream, "Reg %d: biv eliminated\n",
5281 bl->regno);
5283 /* See above note wrt final_value. But since we couldn't eliminate
5284 the biv, we must set the value after the loop instead of before. */
5285 else if (bl->final_value && ! bl->reversed)
5286 loop_insn_sink (loop, gen_load_of_final_value (bl->biv->dest_reg,
5287 bl->final_value));
5290 /* Go through all the instructions in the loop, making all the
5291 register substitutions scheduled in REG_MAP. */
5293 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
5294 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5295 || GET_CODE (p) == CALL_INSN)
5297 replace_regs (PATTERN (p), reg_map, reg_map_size, 0);
5298 replace_regs (REG_NOTES (p), reg_map, reg_map_size, 0);
5299 INSN_CODE (p) = -1;
5302 if (loop_info->n_iterations > 0)
5304 /* When we completely unroll a loop we will likely not need the increment
5305 of the loop BIV and we will not need the conditional branch at the
5306 end of the loop. */
5307 unrolled_insn_copies = insn_count - 2;
5309 #ifdef HAVE_cc0
5310 /* When we completely unroll a loop on a HAVE_cc0 machine we will not
5311 need the comparison before the conditional branch at the end of the
5312 loop. */
5313 unrolled_insn_copies -= 1;
5314 #endif
5316 /* We'll need one copy for each loop iteration. */
5317 unrolled_insn_copies *= loop_info->n_iterations;
5319 /* A little slop to account for the ability to remove initialization
5320 code, better CSE, and other secondary benefits of completely
5321 unrolling some loops. */
5322 unrolled_insn_copies -= 1;
5324 /* Clamp the value. */
5325 if (unrolled_insn_copies < 0)
5326 unrolled_insn_copies = 0;
5329 /* Unroll loops from within strength reduction so that we can use the
5330 induction variable information that strength_reduce has already
5331 collected. Always unroll loops that would be as small or smaller
5332 unrolled than when rolled. */
5333 if ((flags & LOOP_UNROLL)
5334 || ((flags & LOOP_AUTO_UNROLL)
5335 && loop_info->n_iterations > 0
5336 && unrolled_insn_copies <= insn_count))
5337 unroll_loop (loop, insn_count, 1);
5339 #ifdef HAVE_doloop_end
5340 if (HAVE_doloop_end && (flags & LOOP_BCT) && flag_branch_on_count_reg)
5341 doloop_optimize (loop);
5342 #endif /* HAVE_doloop_end */
5344 /* In case number of iterations is known, drop branch prediction note
5345 in the branch. Do that only in second loop pass, as loop unrolling
5346 may change the number of iterations performed. */
5347 if (flags & LOOP_BCT)
5349 unsigned HOST_WIDE_INT n
5350 = loop_info->n_iterations / loop_info->unroll_number;
5351 if (n > 1)
5352 predict_insn (prev_nonnote_insn (loop->end), PRED_LOOP_ITERATIONS,
5353 REG_BR_PROB_BASE - REG_BR_PROB_BASE / n);
5356 if (loop_dump_stream)
5357 fprintf (loop_dump_stream, "\n");
5359 loop_ivs_free (loop);
5360 if (reg_map)
5361 free (reg_map);
5364 /*Record all basic induction variables calculated in the insn. */
5365 static rtx
5366 check_insn_for_bivs (loop, p, not_every_iteration, maybe_multiple)
5367 struct loop *loop;
5368 rtx p;
5369 int not_every_iteration;
5370 int maybe_multiple;
5372 struct loop_ivs *ivs = LOOP_IVS (loop);
5373 rtx set;
5374 rtx dest_reg;
5375 rtx inc_val;
5376 rtx mult_val;
5377 rtx *location;
5379 if (GET_CODE (p) == INSN
5380 && (set = single_set (p))
5381 && GET_CODE (SET_DEST (set)) == REG)
5383 dest_reg = SET_DEST (set);
5384 if (REGNO (dest_reg) < max_reg_before_loop
5385 && REGNO (dest_reg) >= FIRST_PSEUDO_REGISTER
5386 && REG_IV_TYPE (ivs, REGNO (dest_reg)) != NOT_BASIC_INDUCT)
5388 if (basic_induction_var (loop, SET_SRC (set),
5389 GET_MODE (SET_SRC (set)),
5390 dest_reg, p, &inc_val, &mult_val,
5391 &location))
5393 /* It is a possible basic induction variable.
5394 Create and initialize an induction structure for it. */
5396 struct induction *v
5397 = (struct induction *) xmalloc (sizeof (struct induction));
5399 record_biv (loop, v, p, dest_reg, inc_val, mult_val, location,
5400 not_every_iteration, maybe_multiple);
5401 REG_IV_TYPE (ivs, REGNO (dest_reg)) = BASIC_INDUCT;
5403 else if (REGNO (dest_reg) < ivs->n_regs)
5404 REG_IV_TYPE (ivs, REGNO (dest_reg)) = NOT_BASIC_INDUCT;
5407 return p;
5410 /* Record all givs calculated in the insn.
5411 A register is a giv if: it is only set once, it is a function of a
5412 biv and a constant (or invariant), and it is not a biv. */
5413 static rtx
5414 check_insn_for_givs (loop, p, not_every_iteration, maybe_multiple)
5415 struct loop *loop;
5416 rtx p;
5417 int not_every_iteration;
5418 int maybe_multiple;
5420 struct loop_regs *regs = LOOP_REGS (loop);
5422 rtx set;
5423 /* Look for a general induction variable in a register. */
5424 if (GET_CODE (p) == INSN
5425 && (set = single_set (p))
5426 && GET_CODE (SET_DEST (set)) == REG
5427 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
5429 rtx src_reg;
5430 rtx dest_reg;
5431 rtx add_val;
5432 rtx mult_val;
5433 rtx ext_val;
5434 int benefit;
5435 rtx regnote = 0;
5436 rtx last_consec_insn;
5438 dest_reg = SET_DEST (set);
5439 if (REGNO (dest_reg) < FIRST_PSEUDO_REGISTER)
5440 return p;
5442 if (/* SET_SRC is a giv. */
5443 (general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
5444 &mult_val, &ext_val, 0, &benefit, VOIDmode)
5445 /* Equivalent expression is a giv. */
5446 || ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
5447 && general_induction_var (loop, XEXP (regnote, 0), &src_reg,
5448 &add_val, &mult_val, &ext_val, 0,
5449 &benefit, VOIDmode)))
5450 /* Don't try to handle any regs made by loop optimization.
5451 We have nothing on them in regno_first_uid, etc. */
5452 && REGNO (dest_reg) < max_reg_before_loop
5453 /* Don't recognize a BASIC_INDUCT_VAR here. */
5454 && dest_reg != src_reg
5455 /* This must be the only place where the register is set. */
5456 && (regs->array[REGNO (dest_reg)].n_times_set == 1
5457 /* or all sets must be consecutive and make a giv. */
5458 || (benefit = consec_sets_giv (loop, benefit, p,
5459 src_reg, dest_reg,
5460 &add_val, &mult_val, &ext_val,
5461 &last_consec_insn))))
5463 struct induction *v
5464 = (struct induction *) xmalloc (sizeof (struct induction));
5466 /* If this is a library call, increase benefit. */
5467 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
5468 benefit += libcall_benefit (p);
5470 /* Skip the consecutive insns, if there are any. */
5471 if (regs->array[REGNO (dest_reg)].n_times_set != 1)
5472 p = last_consec_insn;
5474 record_giv (loop, v, p, src_reg, dest_reg, mult_val, add_val,
5475 ext_val, benefit, DEST_REG, not_every_iteration,
5476 maybe_multiple, (rtx*) 0);
5481 #ifndef DONT_REDUCE_ADDR
5482 /* Look for givs which are memory addresses. */
5483 /* This resulted in worse code on a VAX 8600. I wonder if it
5484 still does. */
5485 if (GET_CODE (p) == INSN)
5486 find_mem_givs (loop, PATTERN (p), p, not_every_iteration,
5487 maybe_multiple);
5488 #endif
5490 /* Update the status of whether giv can derive other givs. This can
5491 change when we pass a label or an insn that updates a biv. */
5492 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
5493 || GET_CODE (p) == CODE_LABEL)
5494 update_giv_derive (loop, p);
5495 return p;
5498 /* Return 1 if X is a valid source for an initial value (or as value being
5499 compared against in an initial test).
5501 X must be either a register or constant and must not be clobbered between
5502 the current insn and the start of the loop.
5504 INSN is the insn containing X. */
5506 static int
5507 valid_initial_value_p (x, insn, call_seen, loop_start)
5508 rtx x;
5509 rtx insn;
5510 int call_seen;
5511 rtx loop_start;
5513 if (CONSTANT_P (x))
5514 return 1;
5516 /* Only consider pseudos we know about initialized in insns whose luids
5517 we know. */
5518 if (GET_CODE (x) != REG
5519 || REGNO (x) >= max_reg_before_loop)
5520 return 0;
5522 /* Don't use call-clobbered registers across a call which clobbers it. On
5523 some machines, don't use any hard registers at all. */
5524 if (REGNO (x) < FIRST_PSEUDO_REGISTER
5525 && (SMALL_REGISTER_CLASSES
5526 || (call_used_regs[REGNO (x)] && call_seen)))
5527 return 0;
5529 /* Don't use registers that have been clobbered before the start of the
5530 loop. */
5531 if (reg_set_between_p (x, insn, loop_start))
5532 return 0;
5534 return 1;
5537 /* Scan X for memory refs and check each memory address
5538 as a possible giv. INSN is the insn whose pattern X comes from.
5539 NOT_EVERY_ITERATION is 1 if the insn might not be executed during
5540 every loop iteration. MAYBE_MULTIPLE is 1 if the insn might be executed
5541 more than once in each loop iteration. */
5543 static void
5544 find_mem_givs (loop, x, insn, not_every_iteration, maybe_multiple)
5545 const struct loop *loop;
5546 rtx x;
5547 rtx insn;
5548 int not_every_iteration, maybe_multiple;
5550 int i, j;
5551 enum rtx_code code;
5552 const char *fmt;
5554 if (x == 0)
5555 return;
5557 code = GET_CODE (x);
5558 switch (code)
5560 case REG:
5561 case CONST_INT:
5562 case CONST:
5563 case CONST_DOUBLE:
5564 case SYMBOL_REF:
5565 case LABEL_REF:
5566 case PC:
5567 case CC0:
5568 case ADDR_VEC:
5569 case ADDR_DIFF_VEC:
5570 case USE:
5571 case CLOBBER:
5572 return;
5574 case MEM:
5576 rtx src_reg;
5577 rtx add_val;
5578 rtx mult_val;
5579 rtx ext_val;
5580 int benefit;
5582 /* This code used to disable creating GIVs with mult_val == 1 and
5583 add_val == 0. However, this leads to lost optimizations when
5584 it comes time to combine a set of related DEST_ADDR GIVs, since
5585 this one would not be seen. */
5587 if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
5588 &mult_val, &ext_val, 1, &benefit,
5589 GET_MODE (x)))
5591 /* Found one; record it. */
5592 struct induction *v
5593 = (struct induction *) xmalloc (sizeof (struct induction));
5595 record_giv (loop, v, insn, src_reg, addr_placeholder, mult_val,
5596 add_val, ext_val, benefit, DEST_ADDR,
5597 not_every_iteration, maybe_multiple, &XEXP (x, 0));
5599 v->mem = x;
5602 return;
5604 default:
5605 break;
5608 /* Recursively scan the subexpressions for other mem refs. */
5610 fmt = GET_RTX_FORMAT (code);
5611 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5612 if (fmt[i] == 'e')
5613 find_mem_givs (loop, XEXP (x, i), insn, not_every_iteration,
5614 maybe_multiple);
5615 else if (fmt[i] == 'E')
5616 for (j = 0; j < XVECLEN (x, i); j++)
5617 find_mem_givs (loop, XVECEXP (x, i, j), insn, not_every_iteration,
5618 maybe_multiple);
5621 /* Fill in the data about one biv update.
5622 V is the `struct induction' in which we record the biv. (It is
5623 allocated by the caller, with alloca.)
5624 INSN is the insn that sets it.
5625 DEST_REG is the biv's reg.
5627 MULT_VAL is const1_rtx if the biv is being incremented here, in which case
5628 INC_VAL is the increment. Otherwise, MULT_VAL is const0_rtx and the biv is
5629 being set to INC_VAL.
5631 NOT_EVERY_ITERATION is nonzero if this biv update is not know to be
5632 executed every iteration; MAYBE_MULTIPLE is nonzero if this biv update
5633 can be executed more than once per iteration. If MAYBE_MULTIPLE
5634 and NOT_EVERY_ITERATION are both zero, we know that the biv update is
5635 executed exactly once per iteration. */
5637 static void
5638 record_biv (loop, v, insn, dest_reg, inc_val, mult_val, location,
5639 not_every_iteration, maybe_multiple)
5640 struct loop *loop;
5641 struct induction *v;
5642 rtx insn;
5643 rtx dest_reg;
5644 rtx inc_val;
5645 rtx mult_val;
5646 rtx *location;
5647 int not_every_iteration;
5648 int maybe_multiple;
5650 struct loop_ivs *ivs = LOOP_IVS (loop);
5651 struct iv_class *bl;
5653 v->insn = insn;
5654 v->src_reg = dest_reg;
5655 v->dest_reg = dest_reg;
5656 v->mult_val = mult_val;
5657 v->add_val = inc_val;
5658 v->ext_dependent = NULL_RTX;
5659 v->location = location;
5660 v->mode = GET_MODE (dest_reg);
5661 v->always_computable = ! not_every_iteration;
5662 v->always_executed = ! not_every_iteration;
5663 v->maybe_multiple = maybe_multiple;
5665 /* Add this to the reg's iv_class, creating a class
5666 if this is the first incrementation of the reg. */
5668 bl = REG_IV_CLASS (ivs, REGNO (dest_reg));
5669 if (bl == 0)
5671 /* Create and initialize new iv_class. */
5673 bl = (struct iv_class *) xmalloc (sizeof (struct iv_class));
5675 bl->regno = REGNO (dest_reg);
5676 bl->biv = 0;
5677 bl->giv = 0;
5678 bl->biv_count = 0;
5679 bl->giv_count = 0;
5681 /* Set initial value to the reg itself. */
5682 bl->initial_value = dest_reg;
5683 bl->final_value = 0;
5684 /* We haven't seen the initializing insn yet */
5685 bl->init_insn = 0;
5686 bl->init_set = 0;
5687 bl->initial_test = 0;
5688 bl->incremented = 0;
5689 bl->eliminable = 0;
5690 bl->nonneg = 0;
5691 bl->reversed = 0;
5692 bl->total_benefit = 0;
5694 /* Add this class to ivs->list. */
5695 bl->next = ivs->list;
5696 ivs->list = bl;
5698 /* Put it in the array of biv register classes. */
5699 REG_IV_CLASS (ivs, REGNO (dest_reg)) = bl;
5702 /* Update IV_CLASS entry for this biv. */
5703 v->next_iv = bl->biv;
5704 bl->biv = v;
5705 bl->biv_count++;
5706 if (mult_val == const1_rtx)
5707 bl->incremented = 1;
5709 if (loop_dump_stream)
5710 loop_biv_dump (v, loop_dump_stream, 0);
5713 /* Fill in the data about one giv.
5714 V is the `struct induction' in which we record the giv. (It is
5715 allocated by the caller, with alloca.)
5716 INSN is the insn that sets it.
5717 BENEFIT estimates the savings from deleting this insn.
5718 TYPE is DEST_REG or DEST_ADDR; it says whether the giv is computed
5719 into a register or is used as a memory address.
5721 SRC_REG is the biv reg which the giv is computed from.
5722 DEST_REG is the giv's reg (if the giv is stored in a reg).
5723 MULT_VAL and ADD_VAL are the coefficients used to compute the giv.
5724 LOCATION points to the place where this giv's value appears in INSN. */
5726 static void
5727 record_giv (loop, v, insn, src_reg, dest_reg, mult_val, add_val, ext_val,
5728 benefit, type, not_every_iteration, maybe_multiple, location)
5729 const struct loop *loop;
5730 struct induction *v;
5731 rtx insn;
5732 rtx src_reg;
5733 rtx dest_reg;
5734 rtx mult_val, add_val, ext_val;
5735 int benefit;
5736 enum g_types type;
5737 int not_every_iteration, maybe_multiple;
5738 rtx *location;
5740 struct loop_ivs *ivs = LOOP_IVS (loop);
5741 struct induction *b;
5742 struct iv_class *bl;
5743 rtx set = single_set (insn);
5744 rtx temp;
5746 /* Attempt to prove constantness of the values. Don't let simplify_rtx
5747 undo the MULT canonicalization that we performed earlier. */
5748 temp = simplify_rtx (add_val);
5749 if (temp
5750 && ! (GET_CODE (add_val) == MULT
5751 && GET_CODE (temp) == ASHIFT))
5752 add_val = temp;
5754 v->insn = insn;
5755 v->src_reg = src_reg;
5756 v->giv_type = type;
5757 v->dest_reg = dest_reg;
5758 v->mult_val = mult_val;
5759 v->add_val = add_val;
5760 v->ext_dependent = ext_val;
5761 v->benefit = benefit;
5762 v->location = location;
5763 v->cant_derive = 0;
5764 v->combined_with = 0;
5765 v->maybe_multiple = maybe_multiple;
5766 v->maybe_dead = 0;
5767 v->derive_adjustment = 0;
5768 v->same = 0;
5769 v->ignore = 0;
5770 v->new_reg = 0;
5771 v->final_value = 0;
5772 v->same_insn = 0;
5773 v->auto_inc_opt = 0;
5774 v->unrolled = 0;
5775 v->shared = 0;
5777 /* The v->always_computable field is used in update_giv_derive, to
5778 determine whether a giv can be used to derive another giv. For a
5779 DEST_REG giv, INSN computes a new value for the giv, so its value
5780 isn't computable if INSN insn't executed every iteration.
5781 However, for a DEST_ADDR giv, INSN merely uses the value of the giv;
5782 it does not compute a new value. Hence the value is always computable
5783 regardless of whether INSN is executed each iteration. */
5785 if (type == DEST_ADDR)
5786 v->always_computable = 1;
5787 else
5788 v->always_computable = ! not_every_iteration;
5790 v->always_executed = ! not_every_iteration;
5792 if (type == DEST_ADDR)
5794 v->mode = GET_MODE (*location);
5795 v->lifetime = 1;
5797 else /* type == DEST_REG */
5799 v->mode = GET_MODE (SET_DEST (set));
5801 v->lifetime = LOOP_REG_LIFETIME (loop, REGNO (dest_reg));
5803 /* If the lifetime is zero, it means that this register is
5804 really a dead store. So mark this as a giv that can be
5805 ignored. This will not prevent the biv from being eliminated. */
5806 if (v->lifetime == 0)
5807 v->ignore = 1;
5809 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
5810 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
5813 /* Add the giv to the class of givs computed from one biv. */
5815 bl = REG_IV_CLASS (ivs, REGNO (src_reg));
5816 if (bl)
5818 v->next_iv = bl->giv;
5819 bl->giv = v;
5820 /* Don't count DEST_ADDR. This is supposed to count the number of
5821 insns that calculate givs. */
5822 if (type == DEST_REG)
5823 bl->giv_count++;
5824 bl->total_benefit += benefit;
5826 else
5827 /* Fatal error, biv missing for this giv? */
5828 abort ();
5830 if (type == DEST_ADDR)
5832 v->replaceable = 1;
5833 v->not_replaceable = 0;
5835 else
5837 /* The giv can be replaced outright by the reduced register only if all
5838 of the following conditions are true:
5839 - the insn that sets the giv is always executed on any iteration
5840 on which the giv is used at all
5841 (there are two ways to deduce this:
5842 either the insn is executed on every iteration,
5843 or all uses follow that insn in the same basic block),
5844 - the giv is not used outside the loop
5845 - no assignments to the biv occur during the giv's lifetime. */
5847 if (REGNO_FIRST_UID (REGNO (dest_reg)) == INSN_UID (insn)
5848 /* Previous line always fails if INSN was moved by loop opt. */
5849 && REGNO_LAST_LUID (REGNO (dest_reg))
5850 < INSN_LUID (loop->end)
5851 && (! not_every_iteration
5852 || last_use_this_basic_block (dest_reg, insn)))
5854 /* Now check that there are no assignments to the biv within the
5855 giv's lifetime. This requires two separate checks. */
5857 /* Check each biv update, and fail if any are between the first
5858 and last use of the giv.
5860 If this loop contains an inner loop that was unrolled, then
5861 the insn modifying the biv may have been emitted by the loop
5862 unrolling code, and hence does not have a valid luid. Just
5863 mark the biv as not replaceable in this case. It is not very
5864 useful as a biv, because it is used in two different loops.
5865 It is very unlikely that we would be able to optimize the giv
5866 using this biv anyways. */
5868 v->replaceable = 1;
5869 v->not_replaceable = 0;
5870 for (b = bl->biv; b; b = b->next_iv)
5872 if (INSN_UID (b->insn) >= max_uid_for_loop
5873 || ((INSN_LUID (b->insn)
5874 >= REGNO_FIRST_LUID (REGNO (dest_reg)))
5875 && (INSN_LUID (b->insn)
5876 <= REGNO_LAST_LUID (REGNO (dest_reg)))))
5878 v->replaceable = 0;
5879 v->not_replaceable = 1;
5880 break;
5884 /* If there are any backwards branches that go from after the
5885 biv update to before it, then this giv is not replaceable. */
5886 if (v->replaceable)
5887 for (b = bl->biv; b; b = b->next_iv)
5888 if (back_branch_in_range_p (loop, b->insn))
5890 v->replaceable = 0;
5891 v->not_replaceable = 1;
5892 break;
5895 else
5897 /* May still be replaceable, we don't have enough info here to
5898 decide. */
5899 v->replaceable = 0;
5900 v->not_replaceable = 0;
5904 /* Record whether the add_val contains a const_int, for later use by
5905 combine_givs. */
5907 rtx tem = add_val;
5909 v->no_const_addval = 1;
5910 if (tem == const0_rtx)
5912 else if (CONSTANT_P (add_val))
5913 v->no_const_addval = 0;
5914 if (GET_CODE (tem) == PLUS)
5916 while (1)
5918 if (GET_CODE (XEXP (tem, 0)) == PLUS)
5919 tem = XEXP (tem, 0);
5920 else if (GET_CODE (XEXP (tem, 1)) == PLUS)
5921 tem = XEXP (tem, 1);
5922 else
5923 break;
5925 if (CONSTANT_P (XEXP (tem, 1)))
5926 v->no_const_addval = 0;
5930 if (loop_dump_stream)
5931 loop_giv_dump (v, loop_dump_stream, 0);
5934 /* All this does is determine whether a giv can be made replaceable because
5935 its final value can be calculated. This code can not be part of record_giv
5936 above, because final_giv_value requires that the number of loop iterations
5937 be known, and that can not be accurately calculated until after all givs
5938 have been identified. */
5940 static void
5941 check_final_value (loop, v)
5942 const struct loop *loop;
5943 struct induction *v;
5945 rtx final_value = 0;
5947 /* DEST_ADDR givs will never reach here, because they are always marked
5948 replaceable above in record_giv. */
5950 /* The giv can be replaced outright by the reduced register only if all
5951 of the following conditions are true:
5952 - the insn that sets the giv is always executed on any iteration
5953 on which the giv is used at all
5954 (there are two ways to deduce this:
5955 either the insn is executed on every iteration,
5956 or all uses follow that insn in the same basic block),
5957 - its final value can be calculated (this condition is different
5958 than the one above in record_giv)
5959 - it's not used before the it's set
5960 - no assignments to the biv occur during the giv's lifetime. */
5962 #if 0
5963 /* This is only called now when replaceable is known to be false. */
5964 /* Clear replaceable, so that it won't confuse final_giv_value. */
5965 v->replaceable = 0;
5966 #endif
5968 if ((final_value = final_giv_value (loop, v))
5969 && (v->always_executed
5970 || last_use_this_basic_block (v->dest_reg, v->insn)))
5972 int biv_increment_seen = 0, before_giv_insn = 0;
5973 rtx p = v->insn;
5974 rtx last_giv_use;
5976 v->replaceable = 1;
5977 v->not_replaceable = 0;
5979 /* When trying to determine whether or not a biv increment occurs
5980 during the lifetime of the giv, we can ignore uses of the variable
5981 outside the loop because final_value is true. Hence we can not
5982 use regno_last_uid and regno_first_uid as above in record_giv. */
5984 /* Search the loop to determine whether any assignments to the
5985 biv occur during the giv's lifetime. Start with the insn
5986 that sets the giv, and search around the loop until we come
5987 back to that insn again.
5989 Also fail if there is a jump within the giv's lifetime that jumps
5990 to somewhere outside the lifetime but still within the loop. This
5991 catches spaghetti code where the execution order is not linear, and
5992 hence the above test fails. Here we assume that the giv lifetime
5993 does not extend from one iteration of the loop to the next, so as
5994 to make the test easier. Since the lifetime isn't known yet,
5995 this requires two loops. See also record_giv above. */
5997 last_giv_use = v->insn;
5999 while (1)
6001 p = NEXT_INSN (p);
6002 if (p == loop->end)
6004 before_giv_insn = 1;
6005 p = NEXT_INSN (loop->start);
6007 if (p == v->insn)
6008 break;
6010 if (GET_CODE (p) == INSN || GET_CODE (p) == JUMP_INSN
6011 || GET_CODE (p) == CALL_INSN)
6013 /* It is possible for the BIV increment to use the GIV if we
6014 have a cycle. Thus we must be sure to check each insn for
6015 both BIV and GIV uses, and we must check for BIV uses
6016 first. */
6018 if (! biv_increment_seen
6019 && reg_set_p (v->src_reg, PATTERN (p)))
6020 biv_increment_seen = 1;
6022 if (reg_mentioned_p (v->dest_reg, PATTERN (p)))
6024 if (biv_increment_seen || before_giv_insn)
6026 v->replaceable = 0;
6027 v->not_replaceable = 1;
6028 break;
6030 last_giv_use = p;
6035 /* Now that the lifetime of the giv is known, check for branches
6036 from within the lifetime to outside the lifetime if it is still
6037 replaceable. */
6039 if (v->replaceable)
6041 p = v->insn;
6042 while (1)
6044 p = NEXT_INSN (p);
6045 if (p == loop->end)
6046 p = NEXT_INSN (loop->start);
6047 if (p == last_giv_use)
6048 break;
6050 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p)
6051 && LABEL_NAME (JUMP_LABEL (p))
6052 && ((loop_insn_first_p (JUMP_LABEL (p), v->insn)
6053 && loop_insn_first_p (loop->start, JUMP_LABEL (p)))
6054 || (loop_insn_first_p (last_giv_use, JUMP_LABEL (p))
6055 && loop_insn_first_p (JUMP_LABEL (p), loop->end))))
6057 v->replaceable = 0;
6058 v->not_replaceable = 1;
6060 if (loop_dump_stream)
6061 fprintf (loop_dump_stream,
6062 "Found branch outside giv lifetime.\n");
6064 break;
6069 /* If it is replaceable, then save the final value. */
6070 if (v->replaceable)
6071 v->final_value = final_value;
6074 if (loop_dump_stream && v->replaceable)
6075 fprintf (loop_dump_stream, "Insn %d: giv reg %d final_value replaceable\n",
6076 INSN_UID (v->insn), REGNO (v->dest_reg));
6079 /* Update the status of whether a giv can derive other givs.
6081 We need to do something special if there is or may be an update to the biv
6082 between the time the giv is defined and the time it is used to derive
6083 another giv.
6085 In addition, a giv that is only conditionally set is not allowed to
6086 derive another giv once a label has been passed.
6088 The cases we look at are when a label or an update to a biv is passed. */
6090 static void
6091 update_giv_derive (loop, p)
6092 const struct loop *loop;
6093 rtx p;
6095 struct loop_ivs *ivs = LOOP_IVS (loop);
6096 struct iv_class *bl;
6097 struct induction *biv, *giv;
6098 rtx tem;
6099 int dummy;
6101 /* Search all IV classes, then all bivs, and finally all givs.
6103 There are three cases we are concerned with. First we have the situation
6104 of a giv that is only updated conditionally. In that case, it may not
6105 derive any givs after a label is passed.
6107 The second case is when a biv update occurs, or may occur, after the
6108 definition of a giv. For certain biv updates (see below) that are
6109 known to occur between the giv definition and use, we can adjust the
6110 giv definition. For others, or when the biv update is conditional,
6111 we must prevent the giv from deriving any other givs. There are two
6112 sub-cases within this case.
6114 If this is a label, we are concerned with any biv update that is done
6115 conditionally, since it may be done after the giv is defined followed by
6116 a branch here (actually, we need to pass both a jump and a label, but
6117 this extra tracking doesn't seem worth it).
6119 If this is a jump, we are concerned about any biv update that may be
6120 executed multiple times. We are actually only concerned about
6121 backward jumps, but it is probably not worth performing the test
6122 on the jump again here.
6124 If this is a biv update, we must adjust the giv status to show that a
6125 subsequent biv update was performed. If this adjustment cannot be done,
6126 the giv cannot derive further givs. */
6128 for (bl = ivs->list; bl; bl = bl->next)
6129 for (biv = bl->biv; biv; biv = biv->next_iv)
6130 if (GET_CODE (p) == CODE_LABEL || GET_CODE (p) == JUMP_INSN
6131 || biv->insn == p)
6133 for (giv = bl->giv; giv; giv = giv->next_iv)
6135 /* If cant_derive is already true, there is no point in
6136 checking all of these conditions again. */
6137 if (giv->cant_derive)
6138 continue;
6140 /* If this giv is conditionally set and we have passed a label,
6141 it cannot derive anything. */
6142 if (GET_CODE (p) == CODE_LABEL && ! giv->always_computable)
6143 giv->cant_derive = 1;
6145 /* Skip givs that have mult_val == 0, since
6146 they are really invariants. Also skip those that are
6147 replaceable, since we know their lifetime doesn't contain
6148 any biv update. */
6149 else if (giv->mult_val == const0_rtx || giv->replaceable)
6150 continue;
6152 /* The only way we can allow this giv to derive another
6153 is if this is a biv increment and we can form the product
6154 of biv->add_val and giv->mult_val. In this case, we will
6155 be able to compute a compensation. */
6156 else if (biv->insn == p)
6158 rtx ext_val_dummy;
6160 tem = 0;
6161 if (biv->mult_val == const1_rtx)
6162 tem = simplify_giv_expr (loop,
6163 gen_rtx_MULT (giv->mode,
6164 biv->add_val,
6165 giv->mult_val),
6166 &ext_val_dummy, &dummy);
6168 if (tem && giv->derive_adjustment)
6169 tem = simplify_giv_expr
6170 (loop,
6171 gen_rtx_PLUS (giv->mode, tem, giv->derive_adjustment),
6172 &ext_val_dummy, &dummy);
6174 if (tem)
6175 giv->derive_adjustment = tem;
6176 else
6177 giv->cant_derive = 1;
6179 else if ((GET_CODE (p) == CODE_LABEL && ! biv->always_computable)
6180 || (GET_CODE (p) == JUMP_INSN && biv->maybe_multiple))
6181 giv->cant_derive = 1;
6186 /* Check whether an insn is an increment legitimate for a basic induction var.
6187 X is the source of insn P, or a part of it.
6188 MODE is the mode in which X should be interpreted.
6190 DEST_REG is the putative biv, also the destination of the insn.
6191 We accept patterns of these forms:
6192 REG = REG + INVARIANT (includes REG = REG - CONSTANT)
6193 REG = INVARIANT + REG
6195 If X is suitable, we return 1, set *MULT_VAL to CONST1_RTX,
6196 store the additive term into *INC_VAL, and store the place where
6197 we found the additive term into *LOCATION.
6199 If X is an assignment of an invariant into DEST_REG, we set
6200 *MULT_VAL to CONST0_RTX, and store the invariant into *INC_VAL.
6202 We also want to detect a BIV when it corresponds to a variable
6203 whose mode was promoted via PROMOTED_MODE. In that case, an increment
6204 of the variable may be a PLUS that adds a SUBREG of that variable to
6205 an invariant and then sign- or zero-extends the result of the PLUS
6206 into the variable.
6208 Most GIVs in such cases will be in the promoted mode, since that is the
6209 probably the natural computation mode (and almost certainly the mode
6210 used for addresses) on the machine. So we view the pseudo-reg containing
6211 the variable as the BIV, as if it were simply incremented.
6213 Note that treating the entire pseudo as a BIV will result in making
6214 simple increments to any GIVs based on it. However, if the variable
6215 overflows in its declared mode but not its promoted mode, the result will
6216 be incorrect. This is acceptable if the variable is signed, since
6217 overflows in such cases are undefined, but not if it is unsigned, since
6218 those overflows are defined. So we only check for SIGN_EXTEND and
6219 not ZERO_EXTEND.
6221 If we cannot find a biv, we return 0. */
6223 static int
6224 basic_induction_var (loop, x, mode, dest_reg, p, inc_val, mult_val, location)
6225 const struct loop *loop;
6226 rtx x;
6227 enum machine_mode mode;
6228 rtx dest_reg;
6229 rtx p;
6230 rtx *inc_val;
6231 rtx *mult_val;
6232 rtx **location;
6234 enum rtx_code code;
6235 rtx *argp, arg;
6236 rtx insn, set = 0;
6238 code = GET_CODE (x);
6239 *location = NULL;
6240 switch (code)
6242 case PLUS:
6243 if (rtx_equal_p (XEXP (x, 0), dest_reg)
6244 || (GET_CODE (XEXP (x, 0)) == SUBREG
6245 && SUBREG_PROMOTED_VAR_P (XEXP (x, 0))
6246 && SUBREG_REG (XEXP (x, 0)) == dest_reg))
6248 argp = &XEXP (x, 1);
6250 else if (rtx_equal_p (XEXP (x, 1), dest_reg)
6251 || (GET_CODE (XEXP (x, 1)) == SUBREG
6252 && SUBREG_PROMOTED_VAR_P (XEXP (x, 1))
6253 && SUBREG_REG (XEXP (x, 1)) == dest_reg))
6255 argp = &XEXP (x, 0);
6257 else
6258 return 0;
6260 arg = *argp;
6261 if (loop_invariant_p (loop, arg) != 1)
6262 return 0;
6264 *inc_val = convert_modes (GET_MODE (dest_reg), GET_MODE (x), arg, 0);
6265 *mult_val = const1_rtx;
6266 *location = argp;
6267 return 1;
6269 case SUBREG:
6270 /* If what's inside the SUBREG is a BIV, then the SUBREG. This will
6271 handle addition of promoted variables.
6272 ??? The comment at the start of this function is wrong: promoted
6273 variable increments don't look like it says they do. */
6274 return basic_induction_var (loop, SUBREG_REG (x),
6275 GET_MODE (SUBREG_REG (x)),
6276 dest_reg, p, inc_val, mult_val, location);
6278 case REG:
6279 /* If this register is assigned in a previous insn, look at its
6280 source, but don't go outside the loop or past a label. */
6282 /* If this sets a register to itself, we would repeat any previous
6283 biv increment if we applied this strategy blindly. */
6284 if (rtx_equal_p (dest_reg, x))
6285 return 0;
6287 insn = p;
6288 while (1)
6290 rtx dest;
6293 insn = PREV_INSN (insn);
6295 while (insn && GET_CODE (insn) == NOTE
6296 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6298 if (!insn)
6299 break;
6300 set = single_set (insn);
6301 if (set == 0)
6302 break;
6303 dest = SET_DEST (set);
6304 if (dest == x
6305 || (GET_CODE (dest) == SUBREG
6306 && (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
6307 && (GET_MODE_CLASS (GET_MODE (dest)) == MODE_INT)
6308 && SUBREG_REG (dest) == x))
6309 return basic_induction_var (loop, SET_SRC (set),
6310 (GET_MODE (SET_SRC (set)) == VOIDmode
6311 ? GET_MODE (x)
6312 : GET_MODE (SET_SRC (set))),
6313 dest_reg, insn,
6314 inc_val, mult_val, location);
6316 while (GET_CODE (dest) == SIGN_EXTRACT
6317 || GET_CODE (dest) == ZERO_EXTRACT
6318 || GET_CODE (dest) == SUBREG
6319 || GET_CODE (dest) == STRICT_LOW_PART)
6320 dest = XEXP (dest, 0);
6321 if (dest == x)
6322 break;
6324 /* Fall through. */
6326 /* Can accept constant setting of biv only when inside inner most loop.
6327 Otherwise, a biv of an inner loop may be incorrectly recognized
6328 as a biv of the outer loop,
6329 causing code to be moved INTO the inner loop. */
6330 case MEM:
6331 if (loop_invariant_p (loop, x) != 1)
6332 return 0;
6333 case CONST_INT:
6334 case SYMBOL_REF:
6335 case CONST:
6336 /* convert_modes aborts if we try to convert to or from CCmode, so just
6337 exclude that case. It is very unlikely that a condition code value
6338 would be a useful iterator anyways. convert_modes aborts if we try to
6339 convert a float mode to non-float or vice versa too. */
6340 if (loop->level == 1
6341 && GET_MODE_CLASS (mode) == GET_MODE_CLASS (GET_MODE (dest_reg))
6342 && GET_MODE_CLASS (mode) != MODE_CC)
6344 /* Possible bug here? Perhaps we don't know the mode of X. */
6345 *inc_val = convert_modes (GET_MODE (dest_reg), mode, x, 0);
6346 *mult_val = const0_rtx;
6347 return 1;
6349 else
6350 return 0;
6352 case SIGN_EXTEND:
6353 return basic_induction_var (loop, XEXP (x, 0), GET_MODE (XEXP (x, 0)),
6354 dest_reg, p, inc_val, mult_val, location);
6356 case ASHIFTRT:
6357 /* Similar, since this can be a sign extension. */
6358 for (insn = PREV_INSN (p);
6359 (insn && GET_CODE (insn) == NOTE
6360 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
6361 insn = PREV_INSN (insn))
6364 if (insn)
6365 set = single_set (insn);
6367 if (! rtx_equal_p (dest_reg, XEXP (x, 0))
6368 && set && SET_DEST (set) == XEXP (x, 0)
6369 && GET_CODE (XEXP (x, 1)) == CONST_INT
6370 && INTVAL (XEXP (x, 1)) >= 0
6371 && GET_CODE (SET_SRC (set)) == ASHIFT
6372 && XEXP (x, 1) == XEXP (SET_SRC (set), 1))
6373 return basic_induction_var (loop, XEXP (SET_SRC (set), 0),
6374 GET_MODE (XEXP (x, 0)),
6375 dest_reg, insn, inc_val, mult_val,
6376 location);
6377 return 0;
6379 default:
6380 return 0;
6384 /* A general induction variable (giv) is any quantity that is a linear
6385 function of a basic induction variable,
6386 i.e. giv = biv * mult_val + add_val.
6387 The coefficients can be any loop invariant quantity.
6388 A giv need not be computed directly from the biv;
6389 it can be computed by way of other givs. */
6391 /* Determine whether X computes a giv.
6392 If it does, return a nonzero value
6393 which is the benefit from eliminating the computation of X;
6394 set *SRC_REG to the register of the biv that it is computed from;
6395 set *ADD_VAL and *MULT_VAL to the coefficients,
6396 such that the value of X is biv * mult + add; */
6398 static int
6399 general_induction_var (loop, x, src_reg, add_val, mult_val, ext_val,
6400 is_addr, pbenefit, addr_mode)
6401 const struct loop *loop;
6402 rtx x;
6403 rtx *src_reg;
6404 rtx *add_val;
6405 rtx *mult_val;
6406 rtx *ext_val;
6407 int is_addr;
6408 int *pbenefit;
6409 enum machine_mode addr_mode;
6411 struct loop_ivs *ivs = LOOP_IVS (loop);
6412 rtx orig_x = x;
6414 /* If this is an invariant, forget it, it isn't a giv. */
6415 if (loop_invariant_p (loop, x) == 1)
6416 return 0;
6418 *pbenefit = 0;
6419 *ext_val = NULL_RTX;
6420 x = simplify_giv_expr (loop, x, ext_val, pbenefit);
6421 if (x == 0)
6422 return 0;
6424 switch (GET_CODE (x))
6426 case USE:
6427 case CONST_INT:
6428 /* Since this is now an invariant and wasn't before, it must be a giv
6429 with MULT_VAL == 0. It doesn't matter which BIV we associate this
6430 with. */
6431 *src_reg = ivs->list->biv->dest_reg;
6432 *mult_val = const0_rtx;
6433 *add_val = x;
6434 break;
6436 case REG:
6437 /* This is equivalent to a BIV. */
6438 *src_reg = x;
6439 *mult_val = const1_rtx;
6440 *add_val = const0_rtx;
6441 break;
6443 case PLUS:
6444 /* Either (plus (biv) (invar)) or
6445 (plus (mult (biv) (invar_1)) (invar_2)). */
6446 if (GET_CODE (XEXP (x, 0)) == MULT)
6448 *src_reg = XEXP (XEXP (x, 0), 0);
6449 *mult_val = XEXP (XEXP (x, 0), 1);
6451 else
6453 *src_reg = XEXP (x, 0);
6454 *mult_val = const1_rtx;
6456 *add_val = XEXP (x, 1);
6457 break;
6459 case MULT:
6460 /* ADD_VAL is zero. */
6461 *src_reg = XEXP (x, 0);
6462 *mult_val = XEXP (x, 1);
6463 *add_val = const0_rtx;
6464 break;
6466 default:
6467 abort ();
6470 /* Remove any enclosing USE from ADD_VAL and MULT_VAL (there will be
6471 unless they are CONST_INT). */
6472 if (GET_CODE (*add_val) == USE)
6473 *add_val = XEXP (*add_val, 0);
6474 if (GET_CODE (*mult_val) == USE)
6475 *mult_val = XEXP (*mult_val, 0);
6477 if (is_addr)
6478 *pbenefit += address_cost (orig_x, addr_mode) - reg_address_cost;
6479 else
6480 *pbenefit += rtx_cost (orig_x, SET);
6482 /* Always return true if this is a giv so it will be detected as such,
6483 even if the benefit is zero or negative. This allows elimination
6484 of bivs that might otherwise not be eliminated. */
6485 return 1;
6488 /* Given an expression, X, try to form it as a linear function of a biv.
6489 We will canonicalize it to be of the form
6490 (plus (mult (BIV) (invar_1))
6491 (invar_2))
6492 with possible degeneracies.
6494 The invariant expressions must each be of a form that can be used as a
6495 machine operand. We surround then with a USE rtx (a hack, but localized
6496 and certainly unambiguous!) if not a CONST_INT for simplicity in this
6497 routine; it is the caller's responsibility to strip them.
6499 If no such canonicalization is possible (i.e., two biv's are used or an
6500 expression that is neither invariant nor a biv or giv), this routine
6501 returns 0.
6503 For a nonzero return, the result will have a code of CONST_INT, USE,
6504 REG (for a BIV), PLUS, or MULT. No other codes will occur.
6506 *BENEFIT will be incremented by the benefit of any sub-giv encountered. */
6508 static rtx sge_plus PARAMS ((enum machine_mode, rtx, rtx));
6509 static rtx sge_plus_constant PARAMS ((rtx, rtx));
6511 static rtx
6512 simplify_giv_expr (loop, x, ext_val, benefit)
6513 const struct loop *loop;
6514 rtx x;
6515 rtx *ext_val;
6516 int *benefit;
6518 struct loop_ivs *ivs = LOOP_IVS (loop);
6519 struct loop_regs *regs = LOOP_REGS (loop);
6520 enum machine_mode mode = GET_MODE (x);
6521 rtx arg0, arg1;
6522 rtx tem;
6524 /* If this is not an integer mode, or if we cannot do arithmetic in this
6525 mode, this can't be a giv. */
6526 if (mode != VOIDmode
6527 && (GET_MODE_CLASS (mode) != MODE_INT
6528 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT))
6529 return NULL_RTX;
6531 switch (GET_CODE (x))
6533 case PLUS:
6534 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6535 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6536 if (arg0 == 0 || arg1 == 0)
6537 return NULL_RTX;
6539 /* Put constant last, CONST_INT last if both constant. */
6540 if ((GET_CODE (arg0) == USE
6541 || GET_CODE (arg0) == CONST_INT)
6542 && ! ((GET_CODE (arg0) == USE
6543 && GET_CODE (arg1) == USE)
6544 || GET_CODE (arg1) == CONST_INT))
6545 tem = arg0, arg0 = arg1, arg1 = tem;
6547 /* Handle addition of zero, then addition of an invariant. */
6548 if (arg1 == const0_rtx)
6549 return arg0;
6550 else if (GET_CODE (arg1) == CONST_INT || GET_CODE (arg1) == USE)
6551 switch (GET_CODE (arg0))
6553 case CONST_INT:
6554 case USE:
6555 /* Adding two invariants must result in an invariant, so enclose
6556 addition operation inside a USE and return it. */
6557 if (GET_CODE (arg0) == USE)
6558 arg0 = XEXP (arg0, 0);
6559 if (GET_CODE (arg1) == USE)
6560 arg1 = XEXP (arg1, 0);
6562 if (GET_CODE (arg0) == CONST_INT)
6563 tem = arg0, arg0 = arg1, arg1 = tem;
6564 if (GET_CODE (arg1) == CONST_INT)
6565 tem = sge_plus_constant (arg0, arg1);
6566 else
6567 tem = sge_plus (mode, arg0, arg1);
6569 if (GET_CODE (tem) != CONST_INT)
6570 tem = gen_rtx_USE (mode, tem);
6571 return tem;
6573 case REG:
6574 case MULT:
6575 /* biv + invar or mult + invar. Return sum. */
6576 return gen_rtx_PLUS (mode, arg0, arg1);
6578 case PLUS:
6579 /* (a + invar_1) + invar_2. Associate. */
6580 return
6581 simplify_giv_expr (loop,
6582 gen_rtx_PLUS (mode,
6583 XEXP (arg0, 0),
6584 gen_rtx_PLUS (mode,
6585 XEXP (arg0, 1),
6586 arg1)),
6587 ext_val, benefit);
6589 default:
6590 abort ();
6593 /* Each argument must be either REG, PLUS, or MULT. Convert REG to
6594 MULT to reduce cases. */
6595 if (GET_CODE (arg0) == REG)
6596 arg0 = gen_rtx_MULT (mode, arg0, const1_rtx);
6597 if (GET_CODE (arg1) == REG)
6598 arg1 = gen_rtx_MULT (mode, arg1, const1_rtx);
6600 /* Now have PLUS + PLUS, PLUS + MULT, MULT + PLUS, or MULT + MULT.
6601 Put a MULT first, leaving PLUS + PLUS, MULT + PLUS, or MULT + MULT.
6602 Recurse to associate the second PLUS. */
6603 if (GET_CODE (arg1) == MULT)
6604 tem = arg0, arg0 = arg1, arg1 = tem;
6606 if (GET_CODE (arg1) == PLUS)
6607 return
6608 simplify_giv_expr (loop,
6609 gen_rtx_PLUS (mode,
6610 gen_rtx_PLUS (mode, arg0,
6611 XEXP (arg1, 0)),
6612 XEXP (arg1, 1)),
6613 ext_val, benefit);
6615 /* Now must have MULT + MULT. Distribute if same biv, else not giv. */
6616 if (GET_CODE (arg0) != MULT || GET_CODE (arg1) != MULT)
6617 return NULL_RTX;
6619 if (!rtx_equal_p (arg0, arg1))
6620 return NULL_RTX;
6622 return simplify_giv_expr (loop,
6623 gen_rtx_MULT (mode,
6624 XEXP (arg0, 0),
6625 gen_rtx_PLUS (mode,
6626 XEXP (arg0, 1),
6627 XEXP (arg1, 1))),
6628 ext_val, benefit);
6630 case MINUS:
6631 /* Handle "a - b" as "a + b * (-1)". */
6632 return simplify_giv_expr (loop,
6633 gen_rtx_PLUS (mode,
6634 XEXP (x, 0),
6635 gen_rtx_MULT (mode,
6636 XEXP (x, 1),
6637 constm1_rtx)),
6638 ext_val, benefit);
6640 case MULT:
6641 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6642 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
6643 if (arg0 == 0 || arg1 == 0)
6644 return NULL_RTX;
6646 /* Put constant last, CONST_INT last if both constant. */
6647 if ((GET_CODE (arg0) == USE || GET_CODE (arg0) == CONST_INT)
6648 && GET_CODE (arg1) != CONST_INT)
6649 tem = arg0, arg0 = arg1, arg1 = tem;
6651 /* If second argument is not now constant, not giv. */
6652 if (GET_CODE (arg1) != USE && GET_CODE (arg1) != CONST_INT)
6653 return NULL_RTX;
6655 /* Handle multiply by 0 or 1. */
6656 if (arg1 == const0_rtx)
6657 return const0_rtx;
6659 else if (arg1 == const1_rtx)
6660 return arg0;
6662 switch (GET_CODE (arg0))
6664 case REG:
6665 /* biv * invar. Done. */
6666 return gen_rtx_MULT (mode, arg0, arg1);
6668 case CONST_INT:
6669 /* Product of two constants. */
6670 return GEN_INT (INTVAL (arg0) * INTVAL (arg1));
6672 case USE:
6673 /* invar * invar is a giv, but attempt to simplify it somehow. */
6674 if (GET_CODE (arg1) != CONST_INT)
6675 return NULL_RTX;
6677 arg0 = XEXP (arg0, 0);
6678 if (GET_CODE (arg0) == MULT)
6680 /* (invar_0 * invar_1) * invar_2. Associate. */
6681 return simplify_giv_expr (loop,
6682 gen_rtx_MULT (mode,
6683 XEXP (arg0, 0),
6684 gen_rtx_MULT (mode,
6685 XEXP (arg0,
6687 arg1)),
6688 ext_val, benefit);
6690 /* Propagate the MULT expressions to the intermost nodes. */
6691 else if (GET_CODE (arg0) == PLUS)
6693 /* (invar_0 + invar_1) * invar_2. Distribute. */
6694 return simplify_giv_expr (loop,
6695 gen_rtx_PLUS (mode,
6696 gen_rtx_MULT (mode,
6697 XEXP (arg0,
6699 arg1),
6700 gen_rtx_MULT (mode,
6701 XEXP (arg0,
6703 arg1)),
6704 ext_val, benefit);
6706 return gen_rtx_USE (mode, gen_rtx_MULT (mode, arg0, arg1));
6708 case MULT:
6709 /* (a * invar_1) * invar_2. Associate. */
6710 return simplify_giv_expr (loop,
6711 gen_rtx_MULT (mode,
6712 XEXP (arg0, 0),
6713 gen_rtx_MULT (mode,
6714 XEXP (arg0, 1),
6715 arg1)),
6716 ext_val, benefit);
6718 case PLUS:
6719 /* (a + invar_1) * invar_2. Distribute. */
6720 return simplify_giv_expr (loop,
6721 gen_rtx_PLUS (mode,
6722 gen_rtx_MULT (mode,
6723 XEXP (arg0, 0),
6724 arg1),
6725 gen_rtx_MULT (mode,
6726 XEXP (arg0, 1),
6727 arg1)),
6728 ext_val, benefit);
6730 default:
6731 abort ();
6734 case ASHIFT:
6735 /* Shift by constant is multiply by power of two. */
6736 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6737 return 0;
6739 return
6740 simplify_giv_expr (loop,
6741 gen_rtx_MULT (mode,
6742 XEXP (x, 0),
6743 GEN_INT ((HOST_WIDE_INT) 1
6744 << INTVAL (XEXP (x, 1)))),
6745 ext_val, benefit);
6747 case NEG:
6748 /* "-a" is "a * (-1)" */
6749 return simplify_giv_expr (loop,
6750 gen_rtx_MULT (mode, XEXP (x, 0), constm1_rtx),
6751 ext_val, benefit);
6753 case NOT:
6754 /* "~a" is "-a - 1". Silly, but easy. */
6755 return simplify_giv_expr (loop,
6756 gen_rtx_MINUS (mode,
6757 gen_rtx_NEG (mode, XEXP (x, 0)),
6758 const1_rtx),
6759 ext_val, benefit);
6761 case USE:
6762 /* Already in proper form for invariant. */
6763 return x;
6765 case SIGN_EXTEND:
6766 case ZERO_EXTEND:
6767 case TRUNCATE:
6768 /* Conditionally recognize extensions of simple IVs. After we've
6769 computed loop traversal counts and verified the range of the
6770 source IV, we'll reevaluate this as a GIV. */
6771 if (*ext_val == NULL_RTX)
6773 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
6774 if (arg0 && *ext_val == NULL_RTX && GET_CODE (arg0) == REG)
6776 *ext_val = gen_rtx_fmt_e (GET_CODE (x), mode, arg0);
6777 return arg0;
6780 goto do_default;
6782 case REG:
6783 /* If this is a new register, we can't deal with it. */
6784 if (REGNO (x) >= max_reg_before_loop)
6785 return 0;
6787 /* Check for biv or giv. */
6788 switch (REG_IV_TYPE (ivs, REGNO (x)))
6790 case BASIC_INDUCT:
6791 return x;
6792 case GENERAL_INDUCT:
6794 struct induction *v = REG_IV_INFO (ivs, REGNO (x));
6796 /* Form expression from giv and add benefit. Ensure this giv
6797 can derive another and subtract any needed adjustment if so. */
6799 /* Increasing the benefit here is risky. The only case in which it
6800 is arguably correct is if this is the only use of V. In other
6801 cases, this will artificially inflate the benefit of the current
6802 giv, and lead to suboptimal code. Thus, it is disabled, since
6803 potentially not reducing an only marginally beneficial giv is
6804 less harmful than reducing many givs that are not really
6805 beneficial. */
6807 rtx single_use = regs->array[REGNO (x)].single_usage;
6808 if (single_use && single_use != const0_rtx)
6809 *benefit += v->benefit;
6812 if (v->cant_derive)
6813 return 0;
6815 tem = gen_rtx_PLUS (mode, gen_rtx_MULT (mode,
6816 v->src_reg, v->mult_val),
6817 v->add_val);
6819 if (v->derive_adjustment)
6820 tem = gen_rtx_MINUS (mode, tem, v->derive_adjustment);
6821 arg0 = simplify_giv_expr (loop, tem, ext_val, benefit);
6822 if (*ext_val)
6824 if (!v->ext_dependent)
6825 return arg0;
6827 else
6829 *ext_val = v->ext_dependent;
6830 return arg0;
6832 return 0;
6835 default:
6836 do_default:
6837 /* If it isn't an induction variable, and it is invariant, we
6838 may be able to simplify things further by looking through
6839 the bits we just moved outside the loop. */
6840 if (loop_invariant_p (loop, x) == 1)
6842 struct movable *m;
6843 struct loop_movables *movables = LOOP_MOVABLES (loop);
6845 for (m = movables->head; m; m = m->next)
6846 if (rtx_equal_p (x, m->set_dest))
6848 /* Ok, we found a match. Substitute and simplify. */
6850 /* If we match another movable, we must use that, as
6851 this one is going away. */
6852 if (m->match)
6853 return simplify_giv_expr (loop, m->match->set_dest,
6854 ext_val, benefit);
6856 /* If consec is nonzero, this is a member of a group of
6857 instructions that were moved together. We handle this
6858 case only to the point of seeking to the last insn and
6859 looking for a REG_EQUAL. Fail if we don't find one. */
6860 if (m->consec != 0)
6862 int i = m->consec;
6863 tem = m->insn;
6866 tem = NEXT_INSN (tem);
6868 while (--i > 0);
6870 tem = find_reg_note (tem, REG_EQUAL, NULL_RTX);
6871 if (tem)
6872 tem = XEXP (tem, 0);
6874 else
6876 tem = single_set (m->insn);
6877 if (tem)
6878 tem = SET_SRC (tem);
6881 if (tem)
6883 /* What we are most interested in is pointer
6884 arithmetic on invariants -- only take
6885 patterns we may be able to do something with. */
6886 if (GET_CODE (tem) == PLUS
6887 || GET_CODE (tem) == MULT
6888 || GET_CODE (tem) == ASHIFT
6889 || GET_CODE (tem) == CONST_INT
6890 || GET_CODE (tem) == SYMBOL_REF)
6892 tem = simplify_giv_expr (loop, tem, ext_val,
6893 benefit);
6894 if (tem)
6895 return tem;
6897 else if (GET_CODE (tem) == CONST
6898 && GET_CODE (XEXP (tem, 0)) == PLUS
6899 && GET_CODE (XEXP (XEXP (tem, 0), 0)) == SYMBOL_REF
6900 && GET_CODE (XEXP (XEXP (tem, 0), 1)) == CONST_INT)
6902 tem = simplify_giv_expr (loop, XEXP (tem, 0),
6903 ext_val, benefit);
6904 if (tem)
6905 return tem;
6908 break;
6911 break;
6914 /* Fall through to general case. */
6915 default:
6916 /* If invariant, return as USE (unless CONST_INT).
6917 Otherwise, not giv. */
6918 if (GET_CODE (x) == USE)
6919 x = XEXP (x, 0);
6921 if (loop_invariant_p (loop, x) == 1)
6923 if (GET_CODE (x) == CONST_INT)
6924 return x;
6925 if (GET_CODE (x) == CONST
6926 && GET_CODE (XEXP (x, 0)) == PLUS
6927 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6928 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
6929 x = XEXP (x, 0);
6930 return gen_rtx_USE (mode, x);
6932 else
6933 return 0;
6937 /* This routine folds invariants such that there is only ever one
6938 CONST_INT in the summation. It is only used by simplify_giv_expr. */
6940 static rtx
6941 sge_plus_constant (x, c)
6942 rtx x, c;
6944 if (GET_CODE (x) == CONST_INT)
6945 return GEN_INT (INTVAL (x) + INTVAL (c));
6946 else if (GET_CODE (x) != PLUS)
6947 return gen_rtx_PLUS (GET_MODE (x), x, c);
6948 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6950 return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
6951 GEN_INT (INTVAL (XEXP (x, 1)) + INTVAL (c)));
6953 else if (GET_CODE (XEXP (x, 0)) == PLUS
6954 || GET_CODE (XEXP (x, 1)) != PLUS)
6956 return gen_rtx_PLUS (GET_MODE (x),
6957 sge_plus_constant (XEXP (x, 0), c), XEXP (x, 1));
6959 else
6961 return gen_rtx_PLUS (GET_MODE (x),
6962 sge_plus_constant (XEXP (x, 1), c), XEXP (x, 0));
6966 static rtx
6967 sge_plus (mode, x, y)
6968 enum machine_mode mode;
6969 rtx x, y;
6971 while (GET_CODE (y) == PLUS)
6973 rtx a = XEXP (y, 0);
6974 if (GET_CODE (a) == CONST_INT)
6975 x = sge_plus_constant (x, a);
6976 else
6977 x = gen_rtx_PLUS (mode, x, a);
6978 y = XEXP (y, 1);
6980 if (GET_CODE (y) == CONST_INT)
6981 x = sge_plus_constant (x, y);
6982 else
6983 x = gen_rtx_PLUS (mode, x, y);
6984 return x;
6987 /* Help detect a giv that is calculated by several consecutive insns;
6988 for example,
6989 giv = biv * M
6990 giv = giv + A
6991 The caller has already identified the first insn P as having a giv as dest;
6992 we check that all other insns that set the same register follow
6993 immediately after P, that they alter nothing else,
6994 and that the result of the last is still a giv.
6996 The value is 0 if the reg set in P is not really a giv.
6997 Otherwise, the value is the amount gained by eliminating
6998 all the consecutive insns that compute the value.
7000 FIRST_BENEFIT is the amount gained by eliminating the first insn, P.
7001 SRC_REG is the reg of the biv; DEST_REG is the reg of the giv.
7003 The coefficients of the ultimate giv value are stored in
7004 *MULT_VAL and *ADD_VAL. */
7006 static int
7007 consec_sets_giv (loop, first_benefit, p, src_reg, dest_reg,
7008 add_val, mult_val, ext_val, last_consec_insn)
7009 const struct loop *loop;
7010 int first_benefit;
7011 rtx p;
7012 rtx src_reg;
7013 rtx dest_reg;
7014 rtx *add_val;
7015 rtx *mult_val;
7016 rtx *ext_val;
7017 rtx *last_consec_insn;
7019 struct loop_ivs *ivs = LOOP_IVS (loop);
7020 struct loop_regs *regs = LOOP_REGS (loop);
7021 int count;
7022 enum rtx_code code;
7023 int benefit;
7024 rtx temp;
7025 rtx set;
7027 /* Indicate that this is a giv so that we can update the value produced in
7028 each insn of the multi-insn sequence.
7030 This induction structure will be used only by the call to
7031 general_induction_var below, so we can allocate it on our stack.
7032 If this is a giv, our caller will replace the induct var entry with
7033 a new induction structure. */
7034 struct induction *v;
7036 if (REG_IV_TYPE (ivs, REGNO (dest_reg)) != UNKNOWN_INDUCT)
7037 return 0;
7039 v = (struct induction *) alloca (sizeof (struct induction));
7040 v->src_reg = src_reg;
7041 v->mult_val = *mult_val;
7042 v->add_val = *add_val;
7043 v->benefit = first_benefit;
7044 v->cant_derive = 0;
7045 v->derive_adjustment = 0;
7046 v->ext_dependent = NULL_RTX;
7048 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
7049 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
7051 count = regs->array[REGNO (dest_reg)].n_times_set - 1;
7053 while (count > 0)
7055 p = NEXT_INSN (p);
7056 code = GET_CODE (p);
7058 /* If libcall, skip to end of call sequence. */
7059 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
7060 p = XEXP (temp, 0);
7062 if (code == INSN
7063 && (set = single_set (p))
7064 && GET_CODE (SET_DEST (set)) == REG
7065 && SET_DEST (set) == dest_reg
7066 && (general_induction_var (loop, SET_SRC (set), &src_reg,
7067 add_val, mult_val, ext_val, 0,
7068 &benefit, VOIDmode)
7069 /* Giv created by equivalent expression. */
7070 || ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
7071 && general_induction_var (loop, XEXP (temp, 0), &src_reg,
7072 add_val, mult_val, ext_val, 0,
7073 &benefit, VOIDmode)))
7074 && src_reg == v->src_reg)
7076 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
7077 benefit += libcall_benefit (p);
7079 count--;
7080 v->mult_val = *mult_val;
7081 v->add_val = *add_val;
7082 v->benefit += benefit;
7084 else if (code != NOTE)
7086 /* Allow insns that set something other than this giv to a
7087 constant. Such insns are needed on machines which cannot
7088 include long constants and should not disqualify a giv. */
7089 if (code == INSN
7090 && (set = single_set (p))
7091 && SET_DEST (set) != dest_reg
7092 && CONSTANT_P (SET_SRC (set)))
7093 continue;
7095 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7096 return 0;
7100 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
7101 *last_consec_insn = p;
7102 return v->benefit;
7105 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7106 represented by G1. If no such expression can be found, or it is clear that
7107 it cannot possibly be a valid address, 0 is returned.
7109 To perform the computation, we note that
7110 G1 = x * v + a and
7111 G2 = y * v + b
7112 where `v' is the biv.
7114 So G2 = (y/b) * G1 + (b - a*y/x).
7116 Note that MULT = y/x.
7118 Update: A and B are now allowed to be additive expressions such that
7119 B contains all variables in A. That is, computing B-A will not require
7120 subtracting variables. */
7122 static rtx
7123 express_from_1 (a, b, mult)
7124 rtx a, b, mult;
7126 /* If MULT is zero, then A*MULT is zero, and our expression is B. */
7128 if (mult == const0_rtx)
7129 return b;
7131 /* If MULT is not 1, we cannot handle A with non-constants, since we
7132 would then be required to subtract multiples of the registers in A.
7133 This is theoretically possible, and may even apply to some Fortran
7134 constructs, but it is a lot of work and we do not attempt it here. */
7136 if (mult != const1_rtx && GET_CODE (a) != CONST_INT)
7137 return NULL_RTX;
7139 /* In general these structures are sorted top to bottom (down the PLUS
7140 chain), but not left to right across the PLUS. If B is a higher
7141 order giv than A, we can strip one level and recurse. If A is higher
7142 order, we'll eventually bail out, but won't know that until the end.
7143 If they are the same, we'll strip one level around this loop. */
7145 while (GET_CODE (a) == PLUS && GET_CODE (b) == PLUS)
7147 rtx ra, rb, oa, ob, tmp;
7149 ra = XEXP (a, 0), oa = XEXP (a, 1);
7150 if (GET_CODE (ra) == PLUS)
7151 tmp = ra, ra = oa, oa = tmp;
7153 rb = XEXP (b, 0), ob = XEXP (b, 1);
7154 if (GET_CODE (rb) == PLUS)
7155 tmp = rb, rb = ob, ob = tmp;
7157 if (rtx_equal_p (ra, rb))
7158 /* We matched: remove one reg completely. */
7159 a = oa, b = ob;
7160 else if (GET_CODE (ob) != PLUS && rtx_equal_p (ra, ob))
7161 /* An alternate match. */
7162 a = oa, b = rb;
7163 else if (GET_CODE (oa) != PLUS && rtx_equal_p (oa, rb))
7164 /* An alternate match. */
7165 a = ra, b = ob;
7166 else
7168 /* Indicates an extra register in B. Strip one level from B and
7169 recurse, hoping B was the higher order expression. */
7170 ob = express_from_1 (a, ob, mult);
7171 if (ob == NULL_RTX)
7172 return NULL_RTX;
7173 return gen_rtx_PLUS (GET_MODE (b), rb, ob);
7177 /* Here we are at the last level of A, go through the cases hoping to
7178 get rid of everything but a constant. */
7180 if (GET_CODE (a) == PLUS)
7182 rtx ra, oa;
7184 ra = XEXP (a, 0), oa = XEXP (a, 1);
7185 if (rtx_equal_p (oa, b))
7186 oa = ra;
7187 else if (!rtx_equal_p (ra, b))
7188 return NULL_RTX;
7190 if (GET_CODE (oa) != CONST_INT)
7191 return NULL_RTX;
7193 return GEN_INT (-INTVAL (oa) * INTVAL (mult));
7195 else if (GET_CODE (a) == CONST_INT)
7197 return plus_constant (b, -INTVAL (a) * INTVAL (mult));
7199 else if (CONSTANT_P (a))
7201 enum machine_mode mode_a = GET_MODE (a);
7202 enum machine_mode mode_b = GET_MODE (b);
7203 enum machine_mode mode = mode_b == VOIDmode ? mode_a : mode_b;
7204 return simplify_gen_binary (MINUS, mode, b, a);
7206 else if (GET_CODE (b) == PLUS)
7208 if (rtx_equal_p (a, XEXP (b, 0)))
7209 return XEXP (b, 1);
7210 else if (rtx_equal_p (a, XEXP (b, 1)))
7211 return XEXP (b, 0);
7212 else
7213 return NULL_RTX;
7215 else if (rtx_equal_p (a, b))
7216 return const0_rtx;
7218 return NULL_RTX;
7222 express_from (g1, g2)
7223 struct induction *g1, *g2;
7225 rtx mult, add;
7227 /* The value that G1 will be multiplied by must be a constant integer. Also,
7228 the only chance we have of getting a valid address is if b*c/a (see above
7229 for notation) is also an integer. */
7230 if (GET_CODE (g1->mult_val) == CONST_INT
7231 && GET_CODE (g2->mult_val) == CONST_INT)
7233 if (g1->mult_val == const0_rtx
7234 || INTVAL (g2->mult_val) % INTVAL (g1->mult_val) != 0)
7235 return NULL_RTX;
7236 mult = GEN_INT (INTVAL (g2->mult_val) / INTVAL (g1->mult_val));
7238 else if (rtx_equal_p (g1->mult_val, g2->mult_val))
7239 mult = const1_rtx;
7240 else
7242 /* ??? Find out if the one is a multiple of the other? */
7243 return NULL_RTX;
7246 add = express_from_1 (g1->add_val, g2->add_val, mult);
7247 if (add == NULL_RTX)
7249 /* Failed. If we've got a multiplication factor between G1 and G2,
7250 scale G1's addend and try again. */
7251 if (INTVAL (mult) > 1)
7253 rtx g1_add_val = g1->add_val;
7254 if (GET_CODE (g1_add_val) == MULT
7255 && GET_CODE (XEXP (g1_add_val, 1)) == CONST_INT)
7257 HOST_WIDE_INT m;
7258 m = INTVAL (mult) * INTVAL (XEXP (g1_add_val, 1));
7259 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val),
7260 XEXP (g1_add_val, 0), GEN_INT (m));
7262 else
7264 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val), g1_add_val,
7265 mult);
7268 add = express_from_1 (g1_add_val, g2->add_val, const1_rtx);
7271 if (add == NULL_RTX)
7272 return NULL_RTX;
7274 /* Form simplified final result. */
7275 if (mult == const0_rtx)
7276 return add;
7277 else if (mult == const1_rtx)
7278 mult = g1->dest_reg;
7279 else
7280 mult = gen_rtx_MULT (g2->mode, g1->dest_reg, mult);
7282 if (add == const0_rtx)
7283 return mult;
7284 else
7286 if (GET_CODE (add) == PLUS
7287 && CONSTANT_P (XEXP (add, 1)))
7289 rtx tem = XEXP (add, 1);
7290 mult = gen_rtx_PLUS (g2->mode, mult, XEXP (add, 0));
7291 add = tem;
7294 return gen_rtx_PLUS (g2->mode, mult, add);
7298 /* Return an rtx, if any, that expresses giv G2 as a function of the register
7299 represented by G1. This indicates that G2 should be combined with G1 and
7300 that G2 can use (either directly or via an address expression) a register
7301 used to represent G1. */
7303 static rtx
7304 combine_givs_p (g1, g2)
7305 struct induction *g1, *g2;
7307 rtx comb, ret;
7309 /* With the introduction of ext dependent givs, we must care for modes.
7310 G2 must not use a wider mode than G1. */
7311 if (GET_MODE_SIZE (g1->mode) < GET_MODE_SIZE (g2->mode))
7312 return NULL_RTX;
7314 ret = comb = express_from (g1, g2);
7315 if (comb == NULL_RTX)
7316 return NULL_RTX;
7317 if (g1->mode != g2->mode)
7318 ret = gen_lowpart (g2->mode, comb);
7320 /* If these givs are identical, they can be combined. We use the results
7321 of express_from because the addends are not in a canonical form, so
7322 rtx_equal_p is a weaker test. */
7323 /* But don't combine a DEST_REG giv with a DEST_ADDR giv; we want the
7324 combination to be the other way round. */
7325 if (comb == g1->dest_reg
7326 && (g1->giv_type == DEST_REG || g2->giv_type == DEST_ADDR))
7328 return ret;
7331 /* If G2 can be expressed as a function of G1 and that function is valid
7332 as an address and no more expensive than using a register for G2,
7333 the expression of G2 in terms of G1 can be used. */
7334 if (ret != NULL_RTX
7335 && g2->giv_type == DEST_ADDR
7336 && memory_address_p (GET_MODE (g2->mem), ret)
7337 /* ??? Looses, especially with -fforce-addr, where *g2->location
7338 will always be a register, and so anything more complicated
7339 gets discarded. */
7340 #if 0
7341 #ifdef ADDRESS_COST
7342 && ADDRESS_COST (tem) <= ADDRESS_COST (*g2->location)
7343 #else
7344 && rtx_cost (tem, MEM) <= rtx_cost (*g2->location, MEM)
7345 #endif
7346 #endif
7349 return ret;
7352 return NULL_RTX;
7355 /* Check each extension dependent giv in this class to see if its
7356 root biv is safe from wrapping in the interior mode, which would
7357 make the giv illegal. */
7359 static void
7360 check_ext_dependent_givs (bl, loop_info)
7361 struct iv_class *bl;
7362 struct loop_info *loop_info;
7364 int ze_ok = 0, se_ok = 0, info_ok = 0;
7365 enum machine_mode biv_mode = GET_MODE (bl->biv->src_reg);
7366 HOST_WIDE_INT start_val;
7367 unsigned HOST_WIDE_INT u_end_val = 0;
7368 unsigned HOST_WIDE_INT u_start_val = 0;
7369 rtx incr = pc_rtx;
7370 struct induction *v;
7372 /* Make sure the iteration data is available. We must have
7373 constants in order to be certain of no overflow. */
7374 /* ??? An unknown iteration count with an increment of +-1
7375 combined with friendly exit tests of against an invariant
7376 value is also amenable to optimization. Not implemented. */
7377 if (loop_info->n_iterations > 0
7378 && bl->initial_value
7379 && GET_CODE (bl->initial_value) == CONST_INT
7380 && (incr = biv_total_increment (bl))
7381 && GET_CODE (incr) == CONST_INT
7382 /* Make sure the host can represent the arithmetic. */
7383 && HOST_BITS_PER_WIDE_INT >= GET_MODE_BITSIZE (biv_mode))
7385 unsigned HOST_WIDE_INT abs_incr, total_incr;
7386 HOST_WIDE_INT s_end_val;
7387 int neg_incr;
7389 info_ok = 1;
7390 start_val = INTVAL (bl->initial_value);
7391 u_start_val = start_val;
7393 neg_incr = 0, abs_incr = INTVAL (incr);
7394 if (INTVAL (incr) < 0)
7395 neg_incr = 1, abs_incr = -abs_incr;
7396 total_incr = abs_incr * loop_info->n_iterations;
7398 /* Check for host arithmetic overflow. */
7399 if (total_incr / loop_info->n_iterations == abs_incr)
7401 unsigned HOST_WIDE_INT u_max;
7402 HOST_WIDE_INT s_max;
7404 u_end_val = start_val + (neg_incr ? -total_incr : total_incr);
7405 s_end_val = u_end_val;
7406 u_max = GET_MODE_MASK (biv_mode);
7407 s_max = u_max >> 1;
7409 /* Check zero extension of biv ok. */
7410 if (start_val >= 0
7411 /* Check for host arithmetic overflow. */
7412 && (neg_incr
7413 ? u_end_val < u_start_val
7414 : u_end_val > u_start_val)
7415 /* Check for target arithmetic overflow. */
7416 && (neg_incr
7417 ? 1 /* taken care of with host overflow */
7418 : u_end_val <= u_max))
7420 ze_ok = 1;
7423 /* Check sign extension of biv ok. */
7424 /* ??? While it is true that overflow with signed and pointer
7425 arithmetic is undefined, I fear too many programmers don't
7426 keep this fact in mind -- myself included on occasion.
7427 So leave alone with the signed overflow optimizations. */
7428 if (start_val >= -s_max - 1
7429 /* Check for host arithmetic overflow. */
7430 && (neg_incr
7431 ? s_end_val < start_val
7432 : s_end_val > start_val)
7433 /* Check for target arithmetic overflow. */
7434 && (neg_incr
7435 ? s_end_val >= -s_max - 1
7436 : s_end_val <= s_max))
7438 se_ok = 1;
7443 /* Invalidate givs that fail the tests. */
7444 for (v = bl->giv; v; v = v->next_iv)
7445 if (v->ext_dependent)
7447 enum rtx_code code = GET_CODE (v->ext_dependent);
7448 int ok = 0;
7450 switch (code)
7452 case SIGN_EXTEND:
7453 ok = se_ok;
7454 break;
7455 case ZERO_EXTEND:
7456 ok = ze_ok;
7457 break;
7459 case TRUNCATE:
7460 /* We don't know whether this value is being used as either
7461 signed or unsigned, so to safely truncate we must satisfy
7462 both. The initial check here verifies the BIV itself;
7463 once that is successful we may check its range wrt the
7464 derived GIV. */
7465 if (se_ok && ze_ok)
7467 enum machine_mode outer_mode = GET_MODE (v->ext_dependent);
7468 unsigned HOST_WIDE_INT max = GET_MODE_MASK (outer_mode) >> 1;
7470 /* We know from the above that both endpoints are nonnegative,
7471 and that there is no wrapping. Verify that both endpoints
7472 are within the (signed) range of the outer mode. */
7473 if (u_start_val <= max && u_end_val <= max)
7474 ok = 1;
7476 break;
7478 default:
7479 abort ();
7482 if (ok)
7484 if (loop_dump_stream)
7486 fprintf (loop_dump_stream,
7487 "Verified ext dependent giv at %d of reg %d\n",
7488 INSN_UID (v->insn), bl->regno);
7491 else
7493 if (loop_dump_stream)
7495 const char *why;
7497 if (info_ok)
7498 why = "biv iteration values overflowed";
7499 else
7501 if (incr == pc_rtx)
7502 incr = biv_total_increment (bl);
7503 if (incr == const1_rtx)
7504 why = "biv iteration info incomplete; incr by 1";
7505 else
7506 why = "biv iteration info incomplete";
7509 fprintf (loop_dump_stream,
7510 "Failed ext dependent giv at %d, %s\n",
7511 INSN_UID (v->insn), why);
7513 v->ignore = 1;
7514 bl->all_reduced = 0;
7519 /* Generate a version of VALUE in a mode appropriate for initializing V. */
7522 extend_value_for_giv (v, value)
7523 struct induction *v;
7524 rtx value;
7526 rtx ext_dep = v->ext_dependent;
7528 if (! ext_dep)
7529 return value;
7531 /* Recall that check_ext_dependent_givs verified that the known bounds
7532 of a biv did not overflow or wrap with respect to the extension for
7533 the giv. Therefore, constants need no additional adjustment. */
7534 if (CONSTANT_P (value) && GET_MODE (value) == VOIDmode)
7535 return value;
7537 /* Otherwise, we must adjust the value to compensate for the
7538 differing modes of the biv and the giv. */
7539 return gen_rtx_fmt_e (GET_CODE (ext_dep), GET_MODE (ext_dep), value);
7542 struct combine_givs_stats
7544 int giv_number;
7545 int total_benefit;
7548 static int
7549 cmp_combine_givs_stats (xp, yp)
7550 const PTR xp;
7551 const PTR yp;
7553 const struct combine_givs_stats * const x =
7554 (const struct combine_givs_stats *) xp;
7555 const struct combine_givs_stats * const y =
7556 (const struct combine_givs_stats *) yp;
7557 int d;
7558 d = y->total_benefit - x->total_benefit;
7559 /* Stabilize the sort. */
7560 if (!d)
7561 d = x->giv_number - y->giv_number;
7562 return d;
7565 /* Check all pairs of givs for iv_class BL and see if any can be combined with
7566 any other. If so, point SAME to the giv combined with and set NEW_REG to
7567 be an expression (in terms of the other giv's DEST_REG) equivalent to the
7568 giv. Also, update BENEFIT and related fields for cost/benefit analysis. */
7570 static void
7571 combine_givs (regs, bl)
7572 struct loop_regs *regs;
7573 struct iv_class *bl;
7575 /* Additional benefit to add for being combined multiple times. */
7576 const int extra_benefit = 3;
7578 struct induction *g1, *g2, **giv_array;
7579 int i, j, k, giv_count;
7580 struct combine_givs_stats *stats;
7581 rtx *can_combine;
7583 /* Count givs, because bl->giv_count is incorrect here. */
7584 giv_count = 0;
7585 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7586 if (!g1->ignore)
7587 giv_count++;
7589 giv_array
7590 = (struct induction **) alloca (giv_count * sizeof (struct induction *));
7591 i = 0;
7592 for (g1 = bl->giv; g1; g1 = g1->next_iv)
7593 if (!g1->ignore)
7594 giv_array[i++] = g1;
7596 stats = (struct combine_givs_stats *) xcalloc (giv_count, sizeof (*stats));
7597 can_combine = (rtx *) xcalloc (giv_count, giv_count * sizeof (rtx));
7599 for (i = 0; i < giv_count; i++)
7601 int this_benefit;
7602 rtx single_use;
7604 g1 = giv_array[i];
7605 stats[i].giv_number = i;
7607 /* If a DEST_REG GIV is used only once, do not allow it to combine
7608 with anything, for in doing so we will gain nothing that cannot
7609 be had by simply letting the GIV with which we would have combined
7610 to be reduced on its own. The losage shows up in particular with
7611 DEST_ADDR targets on hosts with reg+reg addressing, though it can
7612 be seen elsewhere as well. */
7613 if (g1->giv_type == DEST_REG
7614 && (single_use = regs->array[REGNO (g1->dest_reg)].single_usage)
7615 && single_use != const0_rtx)
7616 continue;
7618 this_benefit = g1->benefit;
7619 /* Add an additional weight for zero addends. */
7620 if (g1->no_const_addval)
7621 this_benefit += 1;
7623 for (j = 0; j < giv_count; j++)
7625 rtx this_combine;
7627 g2 = giv_array[j];
7628 if (g1 != g2
7629 && (this_combine = combine_givs_p (g1, g2)) != NULL_RTX)
7631 can_combine[i * giv_count + j] = this_combine;
7632 this_benefit += g2->benefit + extra_benefit;
7635 stats[i].total_benefit = this_benefit;
7638 /* Iterate, combining until we can't. */
7639 restart:
7640 qsort (stats, giv_count, sizeof (*stats), cmp_combine_givs_stats);
7642 if (loop_dump_stream)
7644 fprintf (loop_dump_stream, "Sorted combine statistics:\n");
7645 for (k = 0; k < giv_count; k++)
7647 g1 = giv_array[stats[k].giv_number];
7648 if (!g1->combined_with && !g1->same)
7649 fprintf (loop_dump_stream, " {%d, %d}",
7650 INSN_UID (giv_array[stats[k].giv_number]->insn),
7651 stats[k].total_benefit);
7653 putc ('\n', loop_dump_stream);
7656 for (k = 0; k < giv_count; k++)
7658 int g1_add_benefit = 0;
7660 i = stats[k].giv_number;
7661 g1 = giv_array[i];
7663 /* If it has already been combined, skip. */
7664 if (g1->combined_with || g1->same)
7665 continue;
7667 for (j = 0; j < giv_count; j++)
7669 g2 = giv_array[j];
7670 if (g1 != g2 && can_combine[i * giv_count + j]
7671 /* If it has already been combined, skip. */
7672 && ! g2->same && ! g2->combined_with)
7674 int l;
7676 g2->new_reg = can_combine[i * giv_count + j];
7677 g2->same = g1;
7678 /* For destination, we now may replace by mem expression instead
7679 of register. This changes the costs considerably, so add the
7680 compensation. */
7681 if (g2->giv_type == DEST_ADDR)
7682 g2->benefit = (g2->benefit + reg_address_cost
7683 - address_cost (g2->new_reg,
7684 GET_MODE (g2->mem)));
7685 g1->combined_with++;
7686 g1->lifetime += g2->lifetime;
7688 g1_add_benefit += g2->benefit;
7690 /* ??? The new final_[bg]iv_value code does a much better job
7691 of finding replaceable giv's, and hence this code may no
7692 longer be necessary. */
7693 if (! g2->replaceable && REG_USERVAR_P (g2->dest_reg))
7694 g1_add_benefit -= copy_cost;
7696 /* To help optimize the next set of combinations, remove
7697 this giv from the benefits of other potential mates. */
7698 for (l = 0; l < giv_count; ++l)
7700 int m = stats[l].giv_number;
7701 if (can_combine[m * giv_count + j])
7702 stats[l].total_benefit -= g2->benefit + extra_benefit;
7705 if (loop_dump_stream)
7706 fprintf (loop_dump_stream,
7707 "giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n",
7708 INSN_UID (g2->insn), INSN_UID (g1->insn),
7709 g1->benefit, g1_add_benefit, g1->lifetime);
7713 /* To help optimize the next set of combinations, remove
7714 this giv from the benefits of other potential mates. */
7715 if (g1->combined_with)
7717 for (j = 0; j < giv_count; ++j)
7719 int m = stats[j].giv_number;
7720 if (can_combine[m * giv_count + i])
7721 stats[j].total_benefit -= g1->benefit + extra_benefit;
7724 g1->benefit += g1_add_benefit;
7726 /* We've finished with this giv, and everything it touched.
7727 Restart the combination so that proper weights for the
7728 rest of the givs are properly taken into account. */
7729 /* ??? Ideally we would compact the arrays at this point, so
7730 as to not cover old ground. But sanely compacting
7731 can_combine is tricky. */
7732 goto restart;
7736 /* Clean up. */
7737 free (stats);
7738 free (can_combine);
7741 /* Generate sequence for REG = B * M + A. */
7743 static rtx
7744 gen_add_mult (b, m, a, reg)
7745 rtx b; /* initial value of basic induction variable */
7746 rtx m; /* multiplicative constant */
7747 rtx a; /* additive constant */
7748 rtx reg; /* destination register */
7750 rtx seq;
7751 rtx result;
7753 start_sequence ();
7754 /* Use unsigned arithmetic. */
7755 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7756 if (reg != result)
7757 emit_move_insn (reg, result);
7758 seq = get_insns ();
7759 end_sequence ();
7761 return seq;
7765 /* Update registers created in insn sequence SEQ. */
7767 static void
7768 loop_regs_update (loop, seq)
7769 const struct loop *loop ATTRIBUTE_UNUSED;
7770 rtx seq;
7772 rtx insn;
7774 /* Update register info for alias analysis. */
7776 if (seq == NULL_RTX)
7777 return;
7779 if (INSN_P (seq))
7781 insn = seq;
7782 while (insn != NULL_RTX)
7784 rtx set = single_set (insn);
7786 if (set && GET_CODE (SET_DEST (set)) == REG)
7787 record_base_value (REGNO (SET_DEST (set)), SET_SRC (set), 0);
7789 insn = NEXT_INSN (insn);
7792 else if (GET_CODE (seq) == SET
7793 && GET_CODE (SET_DEST (seq)) == REG)
7794 record_base_value (REGNO (SET_DEST (seq)), SET_SRC (seq), 0);
7798 /* EMIT code before BEFORE_BB/BEFORE_INSN to set REG = B * M + A. */
7800 void
7801 loop_iv_add_mult_emit_before (loop, b, m, a, reg, before_bb, before_insn)
7802 const struct loop *loop;
7803 rtx b; /* initial value of basic induction variable */
7804 rtx m; /* multiplicative constant */
7805 rtx a; /* additive constant */
7806 rtx reg; /* destination register */
7807 basic_block before_bb;
7808 rtx before_insn;
7810 rtx seq;
7812 if (! before_insn)
7814 loop_iv_add_mult_hoist (loop, b, m, a, reg);
7815 return;
7818 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7819 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7821 /* Increase the lifetime of any invariants moved further in code. */
7822 update_reg_last_use (a, before_insn);
7823 update_reg_last_use (b, before_insn);
7824 update_reg_last_use (m, before_insn);
7826 loop_insn_emit_before (loop, before_bb, before_insn, seq);
7828 /* It is possible that the expansion created lots of new registers.
7829 Iterate over the sequence we just created and record them all. */
7830 loop_regs_update (loop, seq);
7834 /* Emit insns in loop pre-header to set REG = B * M + A. */
7836 void
7837 loop_iv_add_mult_sink (loop, b, m, a, reg)
7838 const struct loop *loop;
7839 rtx b; /* initial value of basic induction variable */
7840 rtx m; /* multiplicative constant */
7841 rtx a; /* additive constant */
7842 rtx reg; /* destination register */
7844 rtx seq;
7846 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7847 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7849 /* Increase the lifetime of any invariants moved further in code.
7850 ???? Is this really necessary? */
7851 update_reg_last_use (a, loop->sink);
7852 update_reg_last_use (b, loop->sink);
7853 update_reg_last_use (m, loop->sink);
7855 loop_insn_sink (loop, seq);
7857 /* It is possible that the expansion created lots of new registers.
7858 Iterate over the sequence we just created and record them all. */
7859 loop_regs_update (loop, seq);
7863 /* Emit insns after loop to set REG = B * M + A. */
7865 void
7866 loop_iv_add_mult_hoist (loop, b, m, a, reg)
7867 const struct loop *loop;
7868 rtx b; /* initial value of basic induction variable */
7869 rtx m; /* multiplicative constant */
7870 rtx a; /* additive constant */
7871 rtx reg; /* destination register */
7873 rtx seq;
7875 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
7876 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
7878 loop_insn_hoist (loop, seq);
7880 /* It is possible that the expansion created lots of new registers.
7881 Iterate over the sequence we just created and record them all. */
7882 loop_regs_update (loop, seq);
7887 /* Similar to gen_add_mult, but compute cost rather than generating
7888 sequence. */
7890 static int
7891 iv_add_mult_cost (b, m, a, reg)
7892 rtx b; /* initial value of basic induction variable */
7893 rtx m; /* multiplicative constant */
7894 rtx a; /* additive constant */
7895 rtx reg; /* destination register */
7897 int cost = 0;
7898 rtx last, result;
7900 start_sequence ();
7901 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
7902 if (reg != result)
7903 emit_move_insn (reg, result);
7904 last = get_last_insn ();
7905 while (last)
7907 rtx t = single_set (last);
7908 if (t)
7909 cost += rtx_cost (SET_SRC (t), SET);
7910 last = PREV_INSN (last);
7912 end_sequence ();
7913 return cost;
7916 /* Test whether A * B can be computed without
7917 an actual multiply insn. Value is 1 if so.
7919 ??? This function stinks because it generates a ton of wasted RTL
7920 ??? and as a result fragments GC memory to no end. There are other
7921 ??? places in the compiler which are invoked a lot and do the same
7922 ??? thing, generate wasted RTL just to see if something is possible. */
7924 static int
7925 product_cheap_p (a, b)
7926 rtx a;
7927 rtx b;
7929 rtx tmp;
7930 int win, n_insns;
7932 /* If only one is constant, make it B. */
7933 if (GET_CODE (a) == CONST_INT)
7934 tmp = a, a = b, b = tmp;
7936 /* If first constant, both constant, so don't need multiply. */
7937 if (GET_CODE (a) == CONST_INT)
7938 return 1;
7940 /* If second not constant, neither is constant, so would need multiply. */
7941 if (GET_CODE (b) != CONST_INT)
7942 return 0;
7944 /* One operand is constant, so might not need multiply insn. Generate the
7945 code for the multiply and see if a call or multiply, or long sequence
7946 of insns is generated. */
7948 start_sequence ();
7949 expand_mult (GET_MODE (a), a, b, NULL_RTX, 1);
7950 tmp = get_insns ();
7951 end_sequence ();
7953 win = 1;
7954 if (INSN_P (tmp))
7956 n_insns = 0;
7957 while (tmp != NULL_RTX)
7959 rtx next = NEXT_INSN (tmp);
7961 if (++n_insns > 3
7962 || GET_CODE (tmp) != INSN
7963 || (GET_CODE (PATTERN (tmp)) == SET
7964 && GET_CODE (SET_SRC (PATTERN (tmp))) == MULT)
7965 || (GET_CODE (PATTERN (tmp)) == PARALLEL
7966 && GET_CODE (XVECEXP (PATTERN (tmp), 0, 0)) == SET
7967 && GET_CODE (SET_SRC (XVECEXP (PATTERN (tmp), 0, 0))) == MULT))
7969 win = 0;
7970 break;
7973 tmp = next;
7976 else if (GET_CODE (tmp) == SET
7977 && GET_CODE (SET_SRC (tmp)) == MULT)
7978 win = 0;
7979 else if (GET_CODE (tmp) == PARALLEL
7980 && GET_CODE (XVECEXP (tmp, 0, 0)) == SET
7981 && GET_CODE (SET_SRC (XVECEXP (tmp, 0, 0))) == MULT)
7982 win = 0;
7984 return win;
7987 /* Check to see if loop can be terminated by a "decrement and branch until
7988 zero" instruction. If so, add a REG_NONNEG note to the branch insn if so.
7989 Also try reversing an increment loop to a decrement loop
7990 to see if the optimization can be performed.
7991 Value is nonzero if optimization was performed. */
7993 /* This is useful even if the architecture doesn't have such an insn,
7994 because it might change a loops which increments from 0 to n to a loop
7995 which decrements from n to 0. A loop that decrements to zero is usually
7996 faster than one that increments from zero. */
7998 /* ??? This could be rewritten to use some of the loop unrolling procedures,
7999 such as approx_final_value, biv_total_increment, loop_iterations, and
8000 final_[bg]iv_value. */
8002 static int
8003 check_dbra_loop (loop, insn_count)
8004 struct loop *loop;
8005 int insn_count;
8007 struct loop_info *loop_info = LOOP_INFO (loop);
8008 struct loop_regs *regs = LOOP_REGS (loop);
8009 struct loop_ivs *ivs = LOOP_IVS (loop);
8010 struct iv_class *bl;
8011 rtx reg;
8012 rtx jump_label;
8013 rtx final_value;
8014 rtx start_value;
8015 rtx new_add_val;
8016 rtx comparison;
8017 rtx before_comparison;
8018 rtx p;
8019 rtx jump;
8020 rtx first_compare;
8021 int compare_and_branch;
8022 rtx loop_start = loop->start;
8023 rtx loop_end = loop->end;
8025 /* If last insn is a conditional branch, and the insn before tests a
8026 register value, try to optimize it. Otherwise, we can't do anything. */
8028 jump = PREV_INSN (loop_end);
8029 comparison = get_condition_for_loop (loop, jump);
8030 if (comparison == 0)
8031 return 0;
8032 if (!onlyjump_p (jump))
8033 return 0;
8035 /* Try to compute whether the compare/branch at the loop end is one or
8036 two instructions. */
8037 get_condition (jump, &first_compare);
8038 if (first_compare == jump)
8039 compare_and_branch = 1;
8040 else if (first_compare == prev_nonnote_insn (jump))
8041 compare_and_branch = 2;
8042 else
8043 return 0;
8046 /* If more than one condition is present to control the loop, then
8047 do not proceed, as this function does not know how to rewrite
8048 loop tests with more than one condition.
8050 Look backwards from the first insn in the last comparison
8051 sequence and see if we've got another comparison sequence. */
8053 rtx jump1;
8054 if ((jump1 = prev_nonnote_insn (first_compare)) != loop->cont)
8055 if (GET_CODE (jump1) == JUMP_INSN)
8056 return 0;
8059 /* Check all of the bivs to see if the compare uses one of them.
8060 Skip biv's set more than once because we can't guarantee that
8061 it will be zero on the last iteration. Also skip if the biv is
8062 used between its update and the test insn. */
8064 for (bl = ivs->list; bl; bl = bl->next)
8066 if (bl->biv_count == 1
8067 && ! bl->biv->maybe_multiple
8068 && bl->biv->dest_reg == XEXP (comparison, 0)
8069 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
8070 first_compare))
8071 break;
8074 if (! bl)
8075 return 0;
8077 /* Look for the case where the basic induction variable is always
8078 nonnegative, and equals zero on the last iteration.
8079 In this case, add a reg_note REG_NONNEG, which allows the
8080 m68k DBRA instruction to be used. */
8082 if (((GET_CODE (comparison) == GT
8083 && GET_CODE (XEXP (comparison, 1)) == CONST_INT
8084 && INTVAL (XEXP (comparison, 1)) == -1)
8085 || (GET_CODE (comparison) == NE && XEXP (comparison, 1) == const0_rtx))
8086 && GET_CODE (bl->biv->add_val) == CONST_INT
8087 && INTVAL (bl->biv->add_val) < 0)
8089 /* Initial value must be greater than 0,
8090 init_val % -dec_value == 0 to ensure that it equals zero on
8091 the last iteration */
8093 if (GET_CODE (bl->initial_value) == CONST_INT
8094 && INTVAL (bl->initial_value) > 0
8095 && (INTVAL (bl->initial_value)
8096 % (-INTVAL (bl->biv->add_val))) == 0)
8098 /* register always nonnegative, add REG_NOTE to branch */
8099 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8100 REG_NOTES (jump)
8101 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8102 REG_NOTES (jump));
8103 bl->nonneg = 1;
8105 return 1;
8108 /* If the decrement is 1 and the value was tested as >= 0 before
8109 the loop, then we can safely optimize. */
8110 for (p = loop_start; p; p = PREV_INSN (p))
8112 if (GET_CODE (p) == CODE_LABEL)
8113 break;
8114 if (GET_CODE (p) != JUMP_INSN)
8115 continue;
8117 before_comparison = get_condition_for_loop (loop, p);
8118 if (before_comparison
8119 && XEXP (before_comparison, 0) == bl->biv->dest_reg
8120 && GET_CODE (before_comparison) == LT
8121 && XEXP (before_comparison, 1) == const0_rtx
8122 && ! reg_set_between_p (bl->biv->dest_reg, p, loop_start)
8123 && INTVAL (bl->biv->add_val) == -1)
8125 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
8126 REG_NOTES (jump)
8127 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
8128 REG_NOTES (jump));
8129 bl->nonneg = 1;
8131 return 1;
8135 else if (GET_CODE (bl->biv->add_val) == CONST_INT
8136 && INTVAL (bl->biv->add_val) > 0)
8138 /* Try to change inc to dec, so can apply above optimization. */
8139 /* Can do this if:
8140 all registers modified are induction variables or invariant,
8141 all memory references have non-overlapping addresses
8142 (obviously true if only one write)
8143 allow 2 insns for the compare/jump at the end of the loop. */
8144 /* Also, we must avoid any instructions which use both the reversed
8145 biv and another biv. Such instructions will fail if the loop is
8146 reversed. We meet this condition by requiring that either
8147 no_use_except_counting is true, or else that there is only
8148 one biv. */
8149 int num_nonfixed_reads = 0;
8150 /* 1 if the iteration var is used only to count iterations. */
8151 int no_use_except_counting = 0;
8152 /* 1 if the loop has no memory store, or it has a single memory store
8153 which is reversible. */
8154 int reversible_mem_store = 1;
8156 if (bl->giv_count == 0
8157 && !loop->exit_count
8158 && !loop_info->has_multiple_exit_targets)
8160 rtx bivreg = regno_reg_rtx[bl->regno];
8161 struct iv_class *blt;
8163 /* If there are no givs for this biv, and the only exit is the
8164 fall through at the end of the loop, then
8165 see if perhaps there are no uses except to count. */
8166 no_use_except_counting = 1;
8167 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8168 if (INSN_P (p))
8170 rtx set = single_set (p);
8172 if (set && GET_CODE (SET_DEST (set)) == REG
8173 && REGNO (SET_DEST (set)) == bl->regno)
8174 /* An insn that sets the biv is okay. */
8176 else if ((p == prev_nonnote_insn (prev_nonnote_insn (loop_end))
8177 || p == prev_nonnote_insn (loop_end))
8178 && reg_mentioned_p (bivreg, PATTERN (p)))
8180 /* If either of these insns uses the biv and sets a pseudo
8181 that has more than one usage, then the biv has uses
8182 other than counting since it's used to derive a value
8183 that is used more than one time. */
8184 note_stores (PATTERN (p), note_set_pseudo_multiple_uses,
8185 regs);
8186 if (regs->multiple_uses)
8188 no_use_except_counting = 0;
8189 break;
8192 else if (reg_mentioned_p (bivreg, PATTERN (p)))
8194 no_use_except_counting = 0;
8195 break;
8199 /* A biv has uses besides counting if it is used to set
8200 another biv. */
8201 for (blt = ivs->list; blt; blt = blt->next)
8202 if (blt->init_set
8203 && reg_mentioned_p (bivreg, SET_SRC (blt->init_set)))
8205 no_use_except_counting = 0;
8206 break;
8210 if (no_use_except_counting)
8211 /* No need to worry about MEMs. */
8213 else if (loop_info->num_mem_sets <= 1)
8215 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8216 if (INSN_P (p))
8217 num_nonfixed_reads += count_nonfixed_reads (loop, PATTERN (p));
8219 /* If the loop has a single store, and the destination address is
8220 invariant, then we can't reverse the loop, because this address
8221 might then have the wrong value at loop exit.
8222 This would work if the source was invariant also, however, in that
8223 case, the insn should have been moved out of the loop. */
8225 if (loop_info->num_mem_sets == 1)
8227 struct induction *v;
8229 /* If we could prove that each of the memory locations
8230 written to was different, then we could reverse the
8231 store -- but we don't presently have any way of
8232 knowing that. */
8233 reversible_mem_store = 0;
8235 /* If the store depends on a register that is set after the
8236 store, it depends on the initial value, and is thus not
8237 reversible. */
8238 for (v = bl->giv; reversible_mem_store && v; v = v->next_iv)
8240 if (v->giv_type == DEST_REG
8241 && reg_mentioned_p (v->dest_reg,
8242 PATTERN (loop_info->first_loop_store_insn))
8243 && loop_insn_first_p (loop_info->first_loop_store_insn,
8244 v->insn))
8245 reversible_mem_store = 0;
8249 else
8250 return 0;
8252 /* This code only acts for innermost loops. Also it simplifies
8253 the memory address check by only reversing loops with
8254 zero or one memory access.
8255 Two memory accesses could involve parts of the same array,
8256 and that can't be reversed.
8257 If the biv is used only for counting, than we don't need to worry
8258 about all these things. */
8260 if ((num_nonfixed_reads <= 1
8261 && ! loop_info->has_nonconst_call
8262 && ! loop_info->has_prefetch
8263 && ! loop_info->has_volatile
8264 && reversible_mem_store
8265 && (bl->giv_count + bl->biv_count + loop_info->num_mem_sets
8266 + num_unmoved_movables (loop) + compare_and_branch == insn_count)
8267 && (bl == ivs->list && bl->next == 0))
8268 || (no_use_except_counting && ! loop_info->has_prefetch))
8270 rtx tem;
8272 /* Loop can be reversed. */
8273 if (loop_dump_stream)
8274 fprintf (loop_dump_stream, "Can reverse loop\n");
8276 /* Now check other conditions:
8278 The increment must be a constant, as must the initial value,
8279 and the comparison code must be LT.
8281 This test can probably be improved since +/- 1 in the constant
8282 can be obtained by changing LT to LE and vice versa; this is
8283 confusing. */
8285 if (comparison
8286 /* for constants, LE gets turned into LT */
8287 && (GET_CODE (comparison) == LT
8288 || (GET_CODE (comparison) == LE
8289 && no_use_except_counting)))
8291 HOST_WIDE_INT add_val, add_adjust, comparison_val = 0;
8292 rtx initial_value, comparison_value;
8293 int nonneg = 0;
8294 enum rtx_code cmp_code;
8295 int comparison_const_width;
8296 unsigned HOST_WIDE_INT comparison_sign_mask;
8298 add_val = INTVAL (bl->biv->add_val);
8299 comparison_value = XEXP (comparison, 1);
8300 if (GET_MODE (comparison_value) == VOIDmode)
8301 comparison_const_width
8302 = GET_MODE_BITSIZE (GET_MODE (XEXP (comparison, 0)));
8303 else
8304 comparison_const_width
8305 = GET_MODE_BITSIZE (GET_MODE (comparison_value));
8306 if (comparison_const_width > HOST_BITS_PER_WIDE_INT)
8307 comparison_const_width = HOST_BITS_PER_WIDE_INT;
8308 comparison_sign_mask
8309 = (unsigned HOST_WIDE_INT) 1 << (comparison_const_width - 1);
8311 /* If the comparison value is not a loop invariant, then we
8312 can not reverse this loop.
8314 ??? If the insns which initialize the comparison value as
8315 a whole compute an invariant result, then we could move
8316 them out of the loop and proceed with loop reversal. */
8317 if (! loop_invariant_p (loop, comparison_value))
8318 return 0;
8320 if (GET_CODE (comparison_value) == CONST_INT)
8321 comparison_val = INTVAL (comparison_value);
8322 initial_value = bl->initial_value;
8324 /* Normalize the initial value if it is an integer and
8325 has no other use except as a counter. This will allow
8326 a few more loops to be reversed. */
8327 if (no_use_except_counting
8328 && GET_CODE (comparison_value) == CONST_INT
8329 && GET_CODE (initial_value) == CONST_INT)
8331 comparison_val = comparison_val - INTVAL (bl->initial_value);
8332 /* The code below requires comparison_val to be a multiple
8333 of add_val in order to do the loop reversal, so
8334 round up comparison_val to a multiple of add_val.
8335 Since comparison_value is constant, we know that the
8336 current comparison code is LT. */
8337 comparison_val = comparison_val + add_val - 1;
8338 comparison_val
8339 -= (unsigned HOST_WIDE_INT) comparison_val % add_val;
8340 /* We postpone overflow checks for COMPARISON_VAL here;
8341 even if there is an overflow, we might still be able to
8342 reverse the loop, if converting the loop exit test to
8343 NE is possible. */
8344 initial_value = const0_rtx;
8347 /* First check if we can do a vanilla loop reversal. */
8348 if (initial_value == const0_rtx
8349 /* If we have a decrement_and_branch_on_count,
8350 prefer the NE test, since this will allow that
8351 instruction to be generated. Note that we must
8352 use a vanilla loop reversal if the biv is used to
8353 calculate a giv or has a non-counting use. */
8354 #if ! defined (HAVE_decrement_and_branch_until_zero) \
8355 && defined (HAVE_decrement_and_branch_on_count)
8356 && (! (add_val == 1 && loop->vtop
8357 && (bl->biv_count == 0
8358 || no_use_except_counting)))
8359 #endif
8360 && GET_CODE (comparison_value) == CONST_INT
8361 /* Now do postponed overflow checks on COMPARISON_VAL. */
8362 && ! (((comparison_val - add_val) ^ INTVAL (comparison_value))
8363 & comparison_sign_mask))
8365 /* Register will always be nonnegative, with value
8366 0 on last iteration */
8367 add_adjust = add_val;
8368 nonneg = 1;
8369 cmp_code = GE;
8371 else if (add_val == 1 && loop->vtop
8372 && (bl->biv_count == 0
8373 || no_use_except_counting))
8375 add_adjust = 0;
8376 cmp_code = NE;
8378 else
8379 return 0;
8381 if (GET_CODE (comparison) == LE)
8382 add_adjust -= add_val;
8384 /* If the initial value is not zero, or if the comparison
8385 value is not an exact multiple of the increment, then we
8386 can not reverse this loop. */
8387 if (initial_value == const0_rtx
8388 && GET_CODE (comparison_value) == CONST_INT)
8390 if (((unsigned HOST_WIDE_INT) comparison_val % add_val) != 0)
8391 return 0;
8393 else
8395 if (! no_use_except_counting || add_val != 1)
8396 return 0;
8399 final_value = comparison_value;
8401 /* Reset these in case we normalized the initial value
8402 and comparison value above. */
8403 if (GET_CODE (comparison_value) == CONST_INT
8404 && GET_CODE (initial_value) == CONST_INT)
8406 comparison_value = GEN_INT (comparison_val);
8407 final_value
8408 = GEN_INT (comparison_val + INTVAL (bl->initial_value));
8410 bl->initial_value = initial_value;
8412 /* Save some info needed to produce the new insns. */
8413 reg = bl->biv->dest_reg;
8414 jump_label = condjump_label (PREV_INSN (loop_end));
8415 new_add_val = GEN_INT (-INTVAL (bl->biv->add_val));
8417 /* Set start_value; if this is not a CONST_INT, we need
8418 to generate a SUB.
8419 Initialize biv to start_value before loop start.
8420 The old initializing insn will be deleted as a
8421 dead store by flow.c. */
8422 if (initial_value == const0_rtx
8423 && GET_CODE (comparison_value) == CONST_INT)
8425 start_value = GEN_INT (comparison_val - add_adjust);
8426 loop_insn_hoist (loop, gen_move_insn (reg, start_value));
8428 else if (GET_CODE (initial_value) == CONST_INT)
8430 enum machine_mode mode = GET_MODE (reg);
8431 rtx offset = GEN_INT (-INTVAL (initial_value) - add_adjust);
8432 rtx add_insn = gen_add3_insn (reg, comparison_value, offset);
8434 if (add_insn == 0)
8435 return 0;
8437 start_value
8438 = gen_rtx_PLUS (mode, comparison_value, offset);
8439 loop_insn_hoist (loop, add_insn);
8440 if (GET_CODE (comparison) == LE)
8441 final_value = gen_rtx_PLUS (mode, comparison_value,
8442 GEN_INT (add_val));
8444 else if (! add_adjust)
8446 enum machine_mode mode = GET_MODE (reg);
8447 rtx sub_insn = gen_sub3_insn (reg, comparison_value,
8448 initial_value);
8450 if (sub_insn == 0)
8451 return 0;
8452 start_value
8453 = gen_rtx_MINUS (mode, comparison_value, initial_value);
8454 loop_insn_hoist (loop, sub_insn);
8456 else
8457 /* We could handle the other cases too, but it'll be
8458 better to have a testcase first. */
8459 return 0;
8461 /* We may not have a single insn which can increment a reg, so
8462 create a sequence to hold all the insns from expand_inc. */
8463 start_sequence ();
8464 expand_inc (reg, new_add_val);
8465 tem = get_insns ();
8466 end_sequence ();
8468 p = loop_insn_emit_before (loop, 0, bl->biv->insn, tem);
8469 delete_insn (bl->biv->insn);
8471 /* Update biv info to reflect its new status. */
8472 bl->biv->insn = p;
8473 bl->initial_value = start_value;
8474 bl->biv->add_val = new_add_val;
8476 /* Update loop info. */
8477 loop_info->initial_value = reg;
8478 loop_info->initial_equiv_value = reg;
8479 loop_info->final_value = const0_rtx;
8480 loop_info->final_equiv_value = const0_rtx;
8481 loop_info->comparison_value = const0_rtx;
8482 loop_info->comparison_code = cmp_code;
8483 loop_info->increment = new_add_val;
8485 /* Inc LABEL_NUSES so that delete_insn will
8486 not delete the label. */
8487 LABEL_NUSES (XEXP (jump_label, 0))++;
8489 /* Emit an insn after the end of the loop to set the biv's
8490 proper exit value if it is used anywhere outside the loop. */
8491 if ((REGNO_LAST_UID (bl->regno) != INSN_UID (first_compare))
8492 || ! bl->init_insn
8493 || REGNO_FIRST_UID (bl->regno) != INSN_UID (bl->init_insn))
8494 loop_insn_sink (loop, gen_load_of_final_value (reg, final_value));
8496 /* Delete compare/branch at end of loop. */
8497 delete_related_insns (PREV_INSN (loop_end));
8498 if (compare_and_branch == 2)
8499 delete_related_insns (first_compare);
8501 /* Add new compare/branch insn at end of loop. */
8502 start_sequence ();
8503 emit_cmp_and_jump_insns (reg, const0_rtx, cmp_code, NULL_RTX,
8504 GET_MODE (reg), 0,
8505 XEXP (jump_label, 0));
8506 tem = get_insns ();
8507 end_sequence ();
8508 emit_jump_insn_before (tem, loop_end);
8510 for (tem = PREV_INSN (loop_end);
8511 tem && GET_CODE (tem) != JUMP_INSN;
8512 tem = PREV_INSN (tem))
8515 if (tem)
8516 JUMP_LABEL (tem) = XEXP (jump_label, 0);
8518 if (nonneg)
8520 if (tem)
8522 /* Increment of LABEL_NUSES done above. */
8523 /* Register is now always nonnegative,
8524 so add REG_NONNEG note to the branch. */
8525 REG_NOTES (tem) = gen_rtx_EXPR_LIST (REG_NONNEG, reg,
8526 REG_NOTES (tem));
8528 bl->nonneg = 1;
8531 /* No insn may reference both the reversed and another biv or it
8532 will fail (see comment near the top of the loop reversal
8533 code).
8534 Earlier on, we have verified that the biv has no use except
8535 counting, or it is the only biv in this function.
8536 However, the code that computes no_use_except_counting does
8537 not verify reg notes. It's possible to have an insn that
8538 references another biv, and has a REG_EQUAL note with an
8539 expression based on the reversed biv. To avoid this case,
8540 remove all REG_EQUAL notes based on the reversed biv
8541 here. */
8542 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
8543 if (INSN_P (p))
8545 rtx *pnote;
8546 rtx set = single_set (p);
8547 /* If this is a set of a GIV based on the reversed biv, any
8548 REG_EQUAL notes should still be correct. */
8549 if (! set
8550 || GET_CODE (SET_DEST (set)) != REG
8551 || (size_t) REGNO (SET_DEST (set)) >= ivs->n_regs
8552 || REG_IV_TYPE (ivs, REGNO (SET_DEST (set))) != GENERAL_INDUCT
8553 || REG_IV_INFO (ivs, REGNO (SET_DEST (set)))->src_reg != bl->biv->src_reg)
8554 for (pnote = &REG_NOTES (p); *pnote;)
8556 if (REG_NOTE_KIND (*pnote) == REG_EQUAL
8557 && reg_mentioned_p (regno_reg_rtx[bl->regno],
8558 XEXP (*pnote, 0)))
8559 *pnote = XEXP (*pnote, 1);
8560 else
8561 pnote = &XEXP (*pnote, 1);
8565 /* Mark that this biv has been reversed. Each giv which depends
8566 on this biv, and which is also live past the end of the loop
8567 will have to be fixed up. */
8569 bl->reversed = 1;
8571 if (loop_dump_stream)
8573 fprintf (loop_dump_stream, "Reversed loop");
8574 if (bl->nonneg)
8575 fprintf (loop_dump_stream, " and added reg_nonneg\n");
8576 else
8577 fprintf (loop_dump_stream, "\n");
8580 return 1;
8585 return 0;
8588 /* Verify whether the biv BL appears to be eliminable,
8589 based on the insns in the loop that refer to it.
8591 If ELIMINATE_P is nonzero, actually do the elimination.
8593 THRESHOLD and INSN_COUNT are from loop_optimize and are used to
8594 determine whether invariant insns should be placed inside or at the
8595 start of the loop. */
8597 static int
8598 maybe_eliminate_biv (loop, bl, eliminate_p, threshold, insn_count)
8599 const struct loop *loop;
8600 struct iv_class *bl;
8601 int eliminate_p;
8602 int threshold, insn_count;
8604 struct loop_ivs *ivs = LOOP_IVS (loop);
8605 rtx reg = bl->biv->dest_reg;
8606 rtx p;
8608 /* Scan all insns in the loop, stopping if we find one that uses the
8609 biv in a way that we cannot eliminate. */
8611 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
8613 enum rtx_code code = GET_CODE (p);
8614 basic_block where_bb = 0;
8615 rtx where_insn = threshold >= insn_count ? 0 : p;
8616 rtx note;
8618 /* If this is a libcall that sets a giv, skip ahead to its end. */
8619 if (GET_RTX_CLASS (code) == 'i')
8621 note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
8623 if (note)
8625 rtx last = XEXP (note, 0);
8626 rtx set = single_set (last);
8628 if (set && GET_CODE (SET_DEST (set)) == REG)
8630 unsigned int regno = REGNO (SET_DEST (set));
8632 if (regno < ivs->n_regs
8633 && REG_IV_TYPE (ivs, regno) == GENERAL_INDUCT
8634 && REG_IV_INFO (ivs, regno)->src_reg == bl->biv->src_reg)
8635 p = last;
8640 /* Closely examine the insn if the biv is mentioned. */
8641 if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
8642 && reg_mentioned_p (reg, PATTERN (p))
8643 && ! maybe_eliminate_biv_1 (loop, PATTERN (p), p, bl,
8644 eliminate_p, where_bb, where_insn))
8646 if (loop_dump_stream)
8647 fprintf (loop_dump_stream,
8648 "Cannot eliminate biv %d: biv used in insn %d.\n",
8649 bl->regno, INSN_UID (p));
8650 break;
8653 /* If we are eliminating, kill REG_EQUAL notes mentioning the biv. */
8654 if (eliminate_p
8655 && (note = find_reg_note (p, REG_EQUAL, NULL_RTX)) != NULL_RTX
8656 && reg_mentioned_p (reg, XEXP (note, 0)))
8657 remove_note (p, note);
8660 if (p == loop->end)
8662 if (loop_dump_stream)
8663 fprintf (loop_dump_stream, "biv %d %s eliminated.\n",
8664 bl->regno, eliminate_p ? "was" : "can be");
8665 return 1;
8668 return 0;
8671 /* INSN and REFERENCE are instructions in the same insn chain.
8672 Return nonzero if INSN is first. */
8675 loop_insn_first_p (insn, reference)
8676 rtx insn, reference;
8678 rtx p, q;
8680 for (p = insn, q = reference;;)
8682 /* Start with test for not first so that INSN == REFERENCE yields not
8683 first. */
8684 if (q == insn || ! p)
8685 return 0;
8686 if (p == reference || ! q)
8687 return 1;
8689 /* Either of P or Q might be a NOTE. Notes have the same LUID as the
8690 previous insn, hence the <= comparison below does not work if
8691 P is a note. */
8692 if (INSN_UID (p) < max_uid_for_loop
8693 && INSN_UID (q) < max_uid_for_loop
8694 && GET_CODE (p) != NOTE)
8695 return INSN_LUID (p) <= INSN_LUID (q);
8697 if (INSN_UID (p) >= max_uid_for_loop
8698 || GET_CODE (p) == NOTE)
8699 p = NEXT_INSN (p);
8700 if (INSN_UID (q) >= max_uid_for_loop)
8701 q = NEXT_INSN (q);
8705 /* We are trying to eliminate BIV in INSN using GIV. Return nonzero if
8706 the offset that we have to take into account due to auto-increment /
8707 div derivation is zero. */
8708 static int
8709 biv_elimination_giv_has_0_offset (biv, giv, insn)
8710 struct induction *biv, *giv;
8711 rtx insn;
8713 /* If the giv V had the auto-inc address optimization applied
8714 to it, and INSN occurs between the giv insn and the biv
8715 insn, then we'd have to adjust the value used here.
8716 This is rare, so we don't bother to make this possible. */
8717 if (giv->auto_inc_opt
8718 && ((loop_insn_first_p (giv->insn, insn)
8719 && loop_insn_first_p (insn, biv->insn))
8720 || (loop_insn_first_p (biv->insn, insn)
8721 && loop_insn_first_p (insn, giv->insn))))
8722 return 0;
8724 return 1;
8727 /* If BL appears in X (part of the pattern of INSN), see if we can
8728 eliminate its use. If so, return 1. If not, return 0.
8730 If BIV does not appear in X, return 1.
8732 If ELIMINATE_P is nonzero, actually do the elimination.
8733 WHERE_INSN/WHERE_BB indicate where extra insns should be added.
8734 Depending on how many items have been moved out of the loop, it
8735 will either be before INSN (when WHERE_INSN is nonzero) or at the
8736 start of the loop (when WHERE_INSN is zero). */
8738 static int
8739 maybe_eliminate_biv_1 (loop, x, insn, bl, eliminate_p, where_bb, where_insn)
8740 const struct loop *loop;
8741 rtx x, insn;
8742 struct iv_class *bl;
8743 int eliminate_p;
8744 basic_block where_bb;
8745 rtx where_insn;
8747 enum rtx_code code = GET_CODE (x);
8748 rtx reg = bl->biv->dest_reg;
8749 enum machine_mode mode = GET_MODE (reg);
8750 struct induction *v;
8751 rtx arg, tem;
8752 #ifdef HAVE_cc0
8753 rtx new;
8754 #endif
8755 int arg_operand;
8756 const char *fmt;
8757 int i, j;
8759 switch (code)
8761 case REG:
8762 /* If we haven't already been able to do something with this BIV,
8763 we can't eliminate it. */
8764 if (x == reg)
8765 return 0;
8766 return 1;
8768 case SET:
8769 /* If this sets the BIV, it is not a problem. */
8770 if (SET_DEST (x) == reg)
8771 return 1;
8773 /* If this is an insn that defines a giv, it is also ok because
8774 it will go away when the giv is reduced. */
8775 for (v = bl->giv; v; v = v->next_iv)
8776 if (v->giv_type == DEST_REG && SET_DEST (x) == v->dest_reg)
8777 return 1;
8779 #ifdef HAVE_cc0
8780 if (SET_DEST (x) == cc0_rtx && SET_SRC (x) == reg)
8782 /* Can replace with any giv that was reduced and
8783 that has (MULT_VAL != 0) and (ADD_VAL == 0).
8784 Require a constant for MULT_VAL, so we know it's nonzero.
8785 ??? We disable this optimization to avoid potential
8786 overflows. */
8788 for (v = bl->giv; v; v = v->next_iv)
8789 if (GET_CODE (v->mult_val) == CONST_INT && v->mult_val != const0_rtx
8790 && v->add_val == const0_rtx
8791 && ! v->ignore && ! v->maybe_dead && v->always_computable
8792 && v->mode == mode
8793 && 0)
8795 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8796 continue;
8798 if (! eliminate_p)
8799 return 1;
8801 /* If the giv has the opposite direction of change,
8802 then reverse the comparison. */
8803 if (INTVAL (v->mult_val) < 0)
8804 new = gen_rtx_COMPARE (GET_MODE (v->new_reg),
8805 const0_rtx, v->new_reg);
8806 else
8807 new = v->new_reg;
8809 /* We can probably test that giv's reduced reg. */
8810 if (validate_change (insn, &SET_SRC (x), new, 0))
8811 return 1;
8814 /* Look for a giv with (MULT_VAL != 0) and (ADD_VAL != 0);
8815 replace test insn with a compare insn (cmp REDUCED_GIV ADD_VAL).
8816 Require a constant for MULT_VAL, so we know it's nonzero.
8817 ??? Do this only if ADD_VAL is a pointer to avoid a potential
8818 overflow problem. */
8820 for (v = bl->giv; v; v = v->next_iv)
8821 if (GET_CODE (v->mult_val) == CONST_INT
8822 && v->mult_val != const0_rtx
8823 && ! v->ignore && ! v->maybe_dead && v->always_computable
8824 && v->mode == mode
8825 && (GET_CODE (v->add_val) == SYMBOL_REF
8826 || GET_CODE (v->add_val) == LABEL_REF
8827 || GET_CODE (v->add_val) == CONST
8828 || (GET_CODE (v->add_val) == REG
8829 && REG_POINTER (v->add_val))))
8831 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8832 continue;
8834 if (! eliminate_p)
8835 return 1;
8837 /* If the giv has the opposite direction of change,
8838 then reverse the comparison. */
8839 if (INTVAL (v->mult_val) < 0)
8840 new = gen_rtx_COMPARE (VOIDmode, copy_rtx (v->add_val),
8841 v->new_reg);
8842 else
8843 new = gen_rtx_COMPARE (VOIDmode, v->new_reg,
8844 copy_rtx (v->add_val));
8846 /* Replace biv with the giv's reduced register. */
8847 update_reg_last_use (v->add_val, insn);
8848 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8849 return 1;
8851 /* Insn doesn't support that constant or invariant. Copy it
8852 into a register (it will be a loop invariant.) */
8853 tem = gen_reg_rtx (GET_MODE (v->new_reg));
8855 loop_insn_emit_before (loop, 0, where_insn,
8856 gen_move_insn (tem,
8857 copy_rtx (v->add_val)));
8859 /* Substitute the new register for its invariant value in
8860 the compare expression. */
8861 XEXP (new, (INTVAL (v->mult_val) < 0) ? 0 : 1) = tem;
8862 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
8863 return 1;
8866 #endif
8867 break;
8869 case COMPARE:
8870 case EQ: case NE:
8871 case GT: case GE: case GTU: case GEU:
8872 case LT: case LE: case LTU: case LEU:
8873 /* See if either argument is the biv. */
8874 if (XEXP (x, 0) == reg)
8875 arg = XEXP (x, 1), arg_operand = 1;
8876 else if (XEXP (x, 1) == reg)
8877 arg = XEXP (x, 0), arg_operand = 0;
8878 else
8879 break;
8881 if (CONSTANT_P (arg))
8883 /* First try to replace with any giv that has constant positive
8884 mult_val and constant add_val. We might be able to support
8885 negative mult_val, but it seems complex to do it in general. */
8887 for (v = bl->giv; v; v = v->next_iv)
8888 if (GET_CODE (v->mult_val) == CONST_INT
8889 && INTVAL (v->mult_val) > 0
8890 && (GET_CODE (v->add_val) == SYMBOL_REF
8891 || GET_CODE (v->add_val) == LABEL_REF
8892 || GET_CODE (v->add_val) == CONST
8893 || (GET_CODE (v->add_val) == REG
8894 && REG_POINTER (v->add_val)))
8895 && ! v->ignore && ! v->maybe_dead && v->always_computable
8896 && v->mode == mode)
8898 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8899 continue;
8901 /* Don't eliminate if the linear combination that makes up
8902 the giv overflows when it is applied to ARG. */
8903 if (GET_CODE (arg) == CONST_INT)
8905 rtx add_val;
8907 if (GET_CODE (v->add_val) == CONST_INT)
8908 add_val = v->add_val;
8909 else
8910 add_val = const0_rtx;
8912 if (const_mult_add_overflow_p (arg, v->mult_val,
8913 add_val, mode, 1))
8914 continue;
8917 if (! eliminate_p)
8918 return 1;
8920 /* Replace biv with the giv's reduced reg. */
8921 validate_change (insn, &XEXP (x, 1 - arg_operand), v->new_reg, 1);
8923 /* If all constants are actually constant integers and
8924 the derived constant can be directly placed in the COMPARE,
8925 do so. */
8926 if (GET_CODE (arg) == CONST_INT
8927 && GET_CODE (v->add_val) == CONST_INT)
8929 tem = expand_mult_add (arg, NULL_RTX, v->mult_val,
8930 v->add_val, mode, 1);
8932 else
8934 /* Otherwise, load it into a register. */
8935 tem = gen_reg_rtx (mode);
8936 loop_iv_add_mult_emit_before (loop, arg,
8937 v->mult_val, v->add_val,
8938 tem, where_bb, where_insn);
8941 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8943 if (apply_change_group ())
8944 return 1;
8947 /* Look for giv with positive constant mult_val and nonconst add_val.
8948 Insert insns to calculate new compare value.
8949 ??? Turn this off due to possible overflow. */
8951 for (v = bl->giv; v; v = v->next_iv)
8952 if (GET_CODE (v->mult_val) == CONST_INT
8953 && INTVAL (v->mult_val) > 0
8954 && ! v->ignore && ! v->maybe_dead && v->always_computable
8955 && v->mode == mode
8956 && 0)
8958 rtx tem;
8960 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8961 continue;
8963 if (! eliminate_p)
8964 return 1;
8966 tem = gen_reg_rtx (mode);
8968 /* Replace biv with giv's reduced register. */
8969 validate_change (insn, &XEXP (x, 1 - arg_operand),
8970 v->new_reg, 1);
8972 /* Compute value to compare against. */
8973 loop_iv_add_mult_emit_before (loop, arg,
8974 v->mult_val, v->add_val,
8975 tem, where_bb, where_insn);
8976 /* Use it in this insn. */
8977 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
8978 if (apply_change_group ())
8979 return 1;
8982 else if (GET_CODE (arg) == REG || GET_CODE (arg) == MEM)
8984 if (loop_invariant_p (loop, arg) == 1)
8986 /* Look for giv with constant positive mult_val and nonconst
8987 add_val. Insert insns to compute new compare value.
8988 ??? Turn this off due to possible overflow. */
8990 for (v = bl->giv; v; v = v->next_iv)
8991 if (GET_CODE (v->mult_val) == CONST_INT && INTVAL (v->mult_val) > 0
8992 && ! v->ignore && ! v->maybe_dead && v->always_computable
8993 && v->mode == mode
8994 && 0)
8996 rtx tem;
8998 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
8999 continue;
9001 if (! eliminate_p)
9002 return 1;
9004 tem = gen_reg_rtx (mode);
9006 /* Replace biv with giv's reduced register. */
9007 validate_change (insn, &XEXP (x, 1 - arg_operand),
9008 v->new_reg, 1);
9010 /* Compute value to compare against. */
9011 loop_iv_add_mult_emit_before (loop, arg,
9012 v->mult_val, v->add_val,
9013 tem, where_bb, where_insn);
9014 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
9015 if (apply_change_group ())
9016 return 1;
9020 /* This code has problems. Basically, you can't know when
9021 seeing if we will eliminate BL, whether a particular giv
9022 of ARG will be reduced. If it isn't going to be reduced,
9023 we can't eliminate BL. We can try forcing it to be reduced,
9024 but that can generate poor code.
9026 The problem is that the benefit of reducing TV, below should
9027 be increased if BL can actually be eliminated, but this means
9028 we might have to do a topological sort of the order in which
9029 we try to process biv. It doesn't seem worthwhile to do
9030 this sort of thing now. */
9032 #if 0
9033 /* Otherwise the reg compared with had better be a biv. */
9034 if (GET_CODE (arg) != REG
9035 || REG_IV_TYPE (ivs, REGNO (arg)) != BASIC_INDUCT)
9036 return 0;
9038 /* Look for a pair of givs, one for each biv,
9039 with identical coefficients. */
9040 for (v = bl->giv; v; v = v->next_iv)
9042 struct induction *tv;
9044 if (v->ignore || v->maybe_dead || v->mode != mode)
9045 continue;
9047 for (tv = REG_IV_CLASS (ivs, REGNO (arg))->giv; tv;
9048 tv = tv->next_iv)
9049 if (! tv->ignore && ! tv->maybe_dead
9050 && rtx_equal_p (tv->mult_val, v->mult_val)
9051 && rtx_equal_p (tv->add_val, v->add_val)
9052 && tv->mode == mode)
9054 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
9055 continue;
9057 if (! eliminate_p)
9058 return 1;
9060 /* Replace biv with its giv's reduced reg. */
9061 XEXP (x, 1 - arg_operand) = v->new_reg;
9062 /* Replace other operand with the other giv's
9063 reduced reg. */
9064 XEXP (x, arg_operand) = tv->new_reg;
9065 return 1;
9068 #endif
9071 /* If we get here, the biv can't be eliminated. */
9072 return 0;
9074 case MEM:
9075 /* If this address is a DEST_ADDR giv, it doesn't matter if the
9076 biv is used in it, since it will be replaced. */
9077 for (v = bl->giv; v; v = v->next_iv)
9078 if (v->giv_type == DEST_ADDR && v->location == &XEXP (x, 0))
9079 return 1;
9080 break;
9082 default:
9083 break;
9086 /* See if any subexpression fails elimination. */
9087 fmt = GET_RTX_FORMAT (code);
9088 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
9090 switch (fmt[i])
9092 case 'e':
9093 if (! maybe_eliminate_biv_1 (loop, XEXP (x, i), insn, bl,
9094 eliminate_p, where_bb, where_insn))
9095 return 0;
9096 break;
9098 case 'E':
9099 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9100 if (! maybe_eliminate_biv_1 (loop, XVECEXP (x, i, j), insn, bl,
9101 eliminate_p, where_bb, where_insn))
9102 return 0;
9103 break;
9107 return 1;
9110 /* Return nonzero if the last use of REG
9111 is in an insn following INSN in the same basic block. */
9113 static int
9114 last_use_this_basic_block (reg, insn)
9115 rtx reg;
9116 rtx insn;
9118 rtx n;
9119 for (n = insn;
9120 n && GET_CODE (n) != CODE_LABEL && GET_CODE (n) != JUMP_INSN;
9121 n = NEXT_INSN (n))
9123 if (REGNO_LAST_UID (REGNO (reg)) == INSN_UID (n))
9124 return 1;
9126 return 0;
9129 /* Called via `note_stores' to record the initial value of a biv. Here we
9130 just record the location of the set and process it later. */
9132 static void
9133 record_initial (dest, set, data)
9134 rtx dest;
9135 rtx set;
9136 void *data ATTRIBUTE_UNUSED;
9138 struct loop_ivs *ivs = (struct loop_ivs *) data;
9139 struct iv_class *bl;
9141 if (GET_CODE (dest) != REG
9142 || REGNO (dest) >= ivs->n_regs
9143 || REG_IV_TYPE (ivs, REGNO (dest)) != BASIC_INDUCT)
9144 return;
9146 bl = REG_IV_CLASS (ivs, REGNO (dest));
9148 /* If this is the first set found, record it. */
9149 if (bl->init_insn == 0)
9151 bl->init_insn = note_insn;
9152 bl->init_set = set;
9156 /* If any of the registers in X are "old" and currently have a last use earlier
9157 than INSN, update them to have a last use of INSN. Their actual last use
9158 will be the previous insn but it will not have a valid uid_luid so we can't
9159 use it. X must be a source expression only. */
9161 static void
9162 update_reg_last_use (x, insn)
9163 rtx x;
9164 rtx insn;
9166 /* Check for the case where INSN does not have a valid luid. In this case,
9167 there is no need to modify the regno_last_uid, as this can only happen
9168 when code is inserted after the loop_end to set a pseudo's final value,
9169 and hence this insn will never be the last use of x.
9170 ???? This comment is not correct. See for example loop_givs_reduce.
9171 This may insert an insn before another new insn. */
9172 if (GET_CODE (x) == REG && REGNO (x) < max_reg_before_loop
9173 && INSN_UID (insn) < max_uid_for_loop
9174 && REGNO_LAST_LUID (REGNO (x)) < INSN_LUID (insn))
9176 REGNO_LAST_UID (REGNO (x)) = INSN_UID (insn);
9178 else
9180 int i, j;
9181 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
9182 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9184 if (fmt[i] == 'e')
9185 update_reg_last_use (XEXP (x, i), insn);
9186 else if (fmt[i] == 'E')
9187 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
9188 update_reg_last_use (XVECEXP (x, i, j), insn);
9193 /* Given an insn INSN and condition COND, return the condition in a
9194 canonical form to simplify testing by callers. Specifically:
9196 (1) The code will always be a comparison operation (EQ, NE, GT, etc.).
9197 (2) Both operands will be machine operands; (cc0) will have been replaced.
9198 (3) If an operand is a constant, it will be the second operand.
9199 (4) (LE x const) will be replaced with (LT x <const+1>) and similarly
9200 for GE, GEU, and LEU.
9202 If the condition cannot be understood, or is an inequality floating-point
9203 comparison which needs to be reversed, 0 will be returned.
9205 If REVERSE is nonzero, then reverse the condition prior to canonizing it.
9207 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9208 insn used in locating the condition was found. If a replacement test
9209 of the condition is desired, it should be placed in front of that
9210 insn and we will be sure that the inputs are still valid.
9212 If WANT_REG is nonzero, we wish the condition to be relative to that
9213 register, if possible. Therefore, do not canonicalize the condition
9214 further. */
9217 canonicalize_condition (insn, cond, reverse, earliest, want_reg)
9218 rtx insn;
9219 rtx cond;
9220 int reverse;
9221 rtx *earliest;
9222 rtx want_reg;
9224 enum rtx_code code;
9225 rtx prev = insn;
9226 rtx set;
9227 rtx tem;
9228 rtx op0, op1;
9229 int reverse_code = 0;
9230 enum machine_mode mode;
9232 code = GET_CODE (cond);
9233 mode = GET_MODE (cond);
9234 op0 = XEXP (cond, 0);
9235 op1 = XEXP (cond, 1);
9237 if (reverse)
9238 code = reversed_comparison_code (cond, insn);
9239 if (code == UNKNOWN)
9240 return 0;
9242 if (earliest)
9243 *earliest = insn;
9245 /* If we are comparing a register with zero, see if the register is set
9246 in the previous insn to a COMPARE or a comparison operation. Perform
9247 the same tests as a function of STORE_FLAG_VALUE as find_comparison_args
9248 in cse.c */
9250 while (GET_RTX_CLASS (code) == '<'
9251 && op1 == CONST0_RTX (GET_MODE (op0))
9252 && op0 != want_reg)
9254 /* Set nonzero when we find something of interest. */
9255 rtx x = 0;
9257 #ifdef HAVE_cc0
9258 /* If comparison with cc0, import actual comparison from compare
9259 insn. */
9260 if (op0 == cc0_rtx)
9262 if ((prev = prev_nonnote_insn (prev)) == 0
9263 || GET_CODE (prev) != INSN
9264 || (set = single_set (prev)) == 0
9265 || SET_DEST (set) != cc0_rtx)
9266 return 0;
9268 op0 = SET_SRC (set);
9269 op1 = CONST0_RTX (GET_MODE (op0));
9270 if (earliest)
9271 *earliest = prev;
9273 #endif
9275 /* If this is a COMPARE, pick up the two things being compared. */
9276 if (GET_CODE (op0) == COMPARE)
9278 op1 = XEXP (op0, 1);
9279 op0 = XEXP (op0, 0);
9280 continue;
9282 else if (GET_CODE (op0) != REG)
9283 break;
9285 /* Go back to the previous insn. Stop if it is not an INSN. We also
9286 stop if it isn't a single set or if it has a REG_INC note because
9287 we don't want to bother dealing with it. */
9289 if ((prev = prev_nonnote_insn (prev)) == 0
9290 || GET_CODE (prev) != INSN
9291 || FIND_REG_INC_NOTE (prev, NULL_RTX))
9292 break;
9294 set = set_of (op0, prev);
9296 if (set
9297 && (GET_CODE (set) != SET
9298 || !rtx_equal_p (SET_DEST (set), op0)))
9299 break;
9301 /* If this is setting OP0, get what it sets it to if it looks
9302 relevant. */
9303 if (set)
9305 enum machine_mode inner_mode = GET_MODE (SET_DEST (set));
9306 #ifdef FLOAT_STORE_FLAG_VALUE
9307 REAL_VALUE_TYPE fsfv;
9308 #endif
9310 /* ??? We may not combine comparisons done in a CCmode with
9311 comparisons not done in a CCmode. This is to aid targets
9312 like Alpha that have an IEEE compliant EQ instruction, and
9313 a non-IEEE compliant BEQ instruction. The use of CCmode is
9314 actually artificial, simply to prevent the combination, but
9315 should not affect other platforms.
9317 However, we must allow VOIDmode comparisons to match either
9318 CCmode or non-CCmode comparison, because some ports have
9319 modeless comparisons inside branch patterns.
9321 ??? This mode check should perhaps look more like the mode check
9322 in simplify_comparison in combine. */
9324 if ((GET_CODE (SET_SRC (set)) == COMPARE
9325 || (((code == NE
9326 || (code == LT
9327 && GET_MODE_CLASS (inner_mode) == MODE_INT
9328 && (GET_MODE_BITSIZE (inner_mode)
9329 <= HOST_BITS_PER_WIDE_INT)
9330 && (STORE_FLAG_VALUE
9331 & ((HOST_WIDE_INT) 1
9332 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9333 #ifdef FLOAT_STORE_FLAG_VALUE
9334 || (code == LT
9335 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9336 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9337 REAL_VALUE_NEGATIVE (fsfv)))
9338 #endif
9340 && GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'))
9341 && (((GET_MODE_CLASS (mode) == MODE_CC)
9342 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9343 || mode == VOIDmode || inner_mode == VOIDmode))
9344 x = SET_SRC (set);
9345 else if (((code == EQ
9346 || (code == GE
9347 && (GET_MODE_BITSIZE (inner_mode)
9348 <= HOST_BITS_PER_WIDE_INT)
9349 && GET_MODE_CLASS (inner_mode) == MODE_INT
9350 && (STORE_FLAG_VALUE
9351 & ((HOST_WIDE_INT) 1
9352 << (GET_MODE_BITSIZE (inner_mode) - 1))))
9353 #ifdef FLOAT_STORE_FLAG_VALUE
9354 || (code == GE
9355 && GET_MODE_CLASS (inner_mode) == MODE_FLOAT
9356 && (fsfv = FLOAT_STORE_FLAG_VALUE (inner_mode),
9357 REAL_VALUE_NEGATIVE (fsfv)))
9358 #endif
9360 && GET_RTX_CLASS (GET_CODE (SET_SRC (set))) == '<'
9361 && (((GET_MODE_CLASS (mode) == MODE_CC)
9362 == (GET_MODE_CLASS (inner_mode) == MODE_CC))
9363 || mode == VOIDmode || inner_mode == VOIDmode))
9366 reverse_code = 1;
9367 x = SET_SRC (set);
9369 else
9370 break;
9373 else if (reg_set_p (op0, prev))
9374 /* If this sets OP0, but not directly, we have to give up. */
9375 break;
9377 if (x)
9379 if (GET_RTX_CLASS (GET_CODE (x)) == '<')
9380 code = GET_CODE (x);
9381 if (reverse_code)
9383 code = reversed_comparison_code (x, prev);
9384 if (code == UNKNOWN)
9385 return 0;
9386 reverse_code = 0;
9389 op0 = XEXP (x, 0), op1 = XEXP (x, 1);
9390 if (earliest)
9391 *earliest = prev;
9395 /* If constant is first, put it last. */
9396 if (CONSTANT_P (op0))
9397 code = swap_condition (code), tem = op0, op0 = op1, op1 = tem;
9399 /* If OP0 is the result of a comparison, we weren't able to find what
9400 was really being compared, so fail. */
9401 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
9402 return 0;
9404 /* Canonicalize any ordered comparison with integers involving equality
9405 if we can do computations in the relevant mode and we do not
9406 overflow. */
9408 if (GET_CODE (op1) == CONST_INT
9409 && GET_MODE (op0) != VOIDmode
9410 && GET_MODE_BITSIZE (GET_MODE (op0)) <= HOST_BITS_PER_WIDE_INT)
9412 HOST_WIDE_INT const_val = INTVAL (op1);
9413 unsigned HOST_WIDE_INT uconst_val = const_val;
9414 unsigned HOST_WIDE_INT max_val
9415 = (unsigned HOST_WIDE_INT) GET_MODE_MASK (GET_MODE (op0));
9417 switch (code)
9419 case LE:
9420 if ((unsigned HOST_WIDE_INT) const_val != max_val >> 1)
9421 code = LT, op1 = gen_int_mode (const_val + 1, GET_MODE (op0));
9422 break;
9424 /* When cross-compiling, const_val might be sign-extended from
9425 BITS_PER_WORD to HOST_BITS_PER_WIDE_INT */
9426 case GE:
9427 if ((HOST_WIDE_INT) (const_val & max_val)
9428 != (((HOST_WIDE_INT) 1
9429 << (GET_MODE_BITSIZE (GET_MODE (op0)) - 1))))
9430 code = GT, op1 = gen_int_mode (const_val - 1, GET_MODE (op0));
9431 break;
9433 case LEU:
9434 if (uconst_val < max_val)
9435 code = LTU, op1 = gen_int_mode (uconst_val + 1, GET_MODE (op0));
9436 break;
9438 case GEU:
9439 if (uconst_val != 0)
9440 code = GTU, op1 = gen_int_mode (uconst_val - 1, GET_MODE (op0));
9441 break;
9443 default:
9444 break;
9448 #ifdef HAVE_cc0
9449 /* Never return CC0; return zero instead. */
9450 if (op0 == cc0_rtx)
9451 return 0;
9452 #endif
9454 return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
9457 /* Given a jump insn JUMP, return the condition that will cause it to branch
9458 to its JUMP_LABEL. If the condition cannot be understood, or is an
9459 inequality floating-point comparison which needs to be reversed, 0 will
9460 be returned.
9462 If EARLIEST is nonzero, it is a pointer to a place where the earliest
9463 insn used in locating the condition was found. If a replacement test
9464 of the condition is desired, it should be placed in front of that
9465 insn and we will be sure that the inputs are still valid. */
9468 get_condition (jump, earliest)
9469 rtx jump;
9470 rtx *earliest;
9472 rtx cond;
9473 int reverse;
9474 rtx set;
9476 /* If this is not a standard conditional jump, we can't parse it. */
9477 if (GET_CODE (jump) != JUMP_INSN
9478 || ! any_condjump_p (jump))
9479 return 0;
9480 set = pc_set (jump);
9482 cond = XEXP (SET_SRC (set), 0);
9484 /* If this branches to JUMP_LABEL when the condition is false, reverse
9485 the condition. */
9486 reverse
9487 = GET_CODE (XEXP (SET_SRC (set), 2)) == LABEL_REF
9488 && XEXP (XEXP (SET_SRC (set), 2), 0) == JUMP_LABEL (jump);
9490 return canonicalize_condition (jump, cond, reverse, earliest, NULL_RTX);
9493 /* Similar to above routine, except that we also put an invariant last
9494 unless both operands are invariants. */
9497 get_condition_for_loop (loop, x)
9498 const struct loop *loop;
9499 rtx x;
9501 rtx comparison = get_condition (x, (rtx*) 0);
9503 if (comparison == 0
9504 || ! loop_invariant_p (loop, XEXP (comparison, 0))
9505 || loop_invariant_p (loop, XEXP (comparison, 1)))
9506 return comparison;
9508 return gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)), VOIDmode,
9509 XEXP (comparison, 1), XEXP (comparison, 0));
9512 /* Scan the function and determine whether it has indirect (computed) jumps.
9514 This is taken mostly from flow.c; similar code exists elsewhere
9515 in the compiler. It may be useful to put this into rtlanal.c. */
9516 static int
9517 indirect_jump_in_function_p (start)
9518 rtx start;
9520 rtx insn;
9522 for (insn = start; insn; insn = NEXT_INSN (insn))
9523 if (computed_jump_p (insn))
9524 return 1;
9526 return 0;
9529 /* Add MEM to the LOOP_MEMS array, if appropriate. See the
9530 documentation for LOOP_MEMS for the definition of `appropriate'.
9531 This function is called from prescan_loop via for_each_rtx. */
9533 static int
9534 insert_loop_mem (mem, data)
9535 rtx *mem;
9536 void *data ATTRIBUTE_UNUSED;
9538 struct loop_info *loop_info = data;
9539 int i;
9540 rtx m = *mem;
9542 if (m == NULL_RTX)
9543 return 0;
9545 switch (GET_CODE (m))
9547 case MEM:
9548 break;
9550 case CLOBBER:
9551 /* We're not interested in MEMs that are only clobbered. */
9552 return -1;
9554 case CONST_DOUBLE:
9555 /* We're not interested in the MEM associated with a
9556 CONST_DOUBLE, so there's no need to traverse into this. */
9557 return -1;
9559 case EXPR_LIST:
9560 /* We're not interested in any MEMs that only appear in notes. */
9561 return -1;
9563 default:
9564 /* This is not a MEM. */
9565 return 0;
9568 /* See if we've already seen this MEM. */
9569 for (i = 0; i < loop_info->mems_idx; ++i)
9570 if (rtx_equal_p (m, loop_info->mems[i].mem))
9572 if (GET_MODE (m) != GET_MODE (loop_info->mems[i].mem))
9573 /* The modes of the two memory accesses are different. If
9574 this happens, something tricky is going on, and we just
9575 don't optimize accesses to this MEM. */
9576 loop_info->mems[i].optimize = 0;
9578 return 0;
9581 /* Resize the array, if necessary. */
9582 if (loop_info->mems_idx == loop_info->mems_allocated)
9584 if (loop_info->mems_allocated != 0)
9585 loop_info->mems_allocated *= 2;
9586 else
9587 loop_info->mems_allocated = 32;
9589 loop_info->mems = (loop_mem_info *)
9590 xrealloc (loop_info->mems,
9591 loop_info->mems_allocated * sizeof (loop_mem_info));
9594 /* Actually insert the MEM. */
9595 loop_info->mems[loop_info->mems_idx].mem = m;
9596 /* We can't hoist this MEM out of the loop if it's a BLKmode MEM
9597 because we can't put it in a register. We still store it in the
9598 table, though, so that if we see the same address later, but in a
9599 non-BLK mode, we'll not think we can optimize it at that point. */
9600 loop_info->mems[loop_info->mems_idx].optimize = (GET_MODE (m) != BLKmode);
9601 loop_info->mems[loop_info->mems_idx].reg = NULL_RTX;
9602 ++loop_info->mems_idx;
9604 return 0;
9608 /* Allocate REGS->ARRAY or reallocate it if it is too small.
9610 Increment REGS->ARRAY[I].SET_IN_LOOP at the index I of each
9611 register that is modified by an insn between FROM and TO. If the
9612 value of an element of REGS->array[I].SET_IN_LOOP becomes 127 or
9613 more, stop incrementing it, to avoid overflow.
9615 Store in REGS->ARRAY[I].SINGLE_USAGE the single insn in which
9616 register I is used, if it is only used once. Otherwise, it is set
9617 to 0 (for no uses) or const0_rtx for more than one use. This
9618 parameter may be zero, in which case this processing is not done.
9620 Set REGS->ARRAY[I].MAY_NOT_OPTIMIZE nonzero if we should not
9621 optimize register I. */
9623 static void
9624 loop_regs_scan (loop, extra_size)
9625 const struct loop *loop;
9626 int extra_size;
9628 struct loop_regs *regs = LOOP_REGS (loop);
9629 int old_nregs;
9630 /* last_set[n] is nonzero iff reg n has been set in the current
9631 basic block. In that case, it is the insn that last set reg n. */
9632 rtx *last_set;
9633 rtx insn;
9634 int i;
9636 old_nregs = regs->num;
9637 regs->num = max_reg_num ();
9639 /* Grow the regs array if not allocated or too small. */
9640 if (regs->num >= regs->size)
9642 regs->size = regs->num + extra_size;
9644 regs->array = (struct loop_reg *)
9645 xrealloc (regs->array, regs->size * sizeof (*regs->array));
9647 /* Zero the new elements. */
9648 memset (regs->array + old_nregs, 0,
9649 (regs->size - old_nregs) * sizeof (*regs->array));
9652 /* Clear previously scanned fields but do not clear n_times_set. */
9653 for (i = 0; i < old_nregs; i++)
9655 regs->array[i].set_in_loop = 0;
9656 regs->array[i].may_not_optimize = 0;
9657 regs->array[i].single_usage = NULL_RTX;
9660 last_set = (rtx *) xcalloc (regs->num, sizeof (rtx));
9662 /* Scan the loop, recording register usage. */
9663 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9664 insn = NEXT_INSN (insn))
9666 if (INSN_P (insn))
9668 /* Record registers that have exactly one use. */
9669 find_single_use_in_loop (regs, insn, PATTERN (insn));
9671 /* Include uses in REG_EQUAL notes. */
9672 if (REG_NOTES (insn))
9673 find_single_use_in_loop (regs, insn, REG_NOTES (insn));
9675 if (GET_CODE (PATTERN (insn)) == SET
9676 || GET_CODE (PATTERN (insn)) == CLOBBER)
9677 count_one_set (regs, insn, PATTERN (insn), last_set);
9678 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
9680 int i;
9681 for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
9682 count_one_set (regs, insn, XVECEXP (PATTERN (insn), 0, i),
9683 last_set);
9687 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9688 memset (last_set, 0, regs->num * sizeof (rtx));
9690 /* Invalidate all registers used for function argument passing.
9691 We check rtx_varies_p for the same reason as below, to allow
9692 optimizing PIC calculations. */
9693 if (GET_CODE (insn) == CALL_INSN)
9695 rtx link;
9696 for (link = CALL_INSN_FUNCTION_USAGE (insn);
9697 link;
9698 link = XEXP (link, 1))
9700 rtx op, reg;
9702 if (GET_CODE (op = XEXP (link, 0)) == USE
9703 && GET_CODE (reg = XEXP (op, 0)) == REG
9704 && rtx_varies_p (reg, 1))
9705 regs->array[REGNO (reg)].may_not_optimize = 1;
9710 /* Invalidate all hard registers clobbered by calls. With one exception:
9711 a call-clobbered PIC register is still function-invariant for our
9712 purposes, since we can hoist any PIC calculations out of the loop.
9713 Thus the call to rtx_varies_p. */
9714 if (LOOP_INFO (loop)->has_call)
9715 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
9716 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
9717 && rtx_varies_p (regno_reg_rtx[i], 1))
9719 regs->array[i].may_not_optimize = 1;
9720 regs->array[i].set_in_loop = 1;
9723 #ifdef AVOID_CCMODE_COPIES
9724 /* Don't try to move insns which set CC registers if we should not
9725 create CCmode register copies. */
9726 for (i = regs->num - 1; i >= FIRST_PSEUDO_REGISTER; i--)
9727 if (GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
9728 regs->array[i].may_not_optimize = 1;
9729 #endif
9731 /* Set regs->array[I].n_times_set for the new registers. */
9732 for (i = old_nregs; i < regs->num; i++)
9733 regs->array[i].n_times_set = regs->array[i].set_in_loop;
9735 free (last_set);
9738 /* Returns the number of real INSNs in the LOOP. */
9740 static int
9741 count_insns_in_loop (loop)
9742 const struct loop *loop;
9744 int count = 0;
9745 rtx insn;
9747 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
9748 insn = NEXT_INSN (insn))
9749 if (INSN_P (insn))
9750 ++count;
9752 return count;
9755 /* Move MEMs into registers for the duration of the loop. */
9757 static void
9758 load_mems (loop)
9759 const struct loop *loop;
9761 struct loop_info *loop_info = LOOP_INFO (loop);
9762 struct loop_regs *regs = LOOP_REGS (loop);
9763 int maybe_never = 0;
9764 int i;
9765 rtx p, prev_ebb_head;
9766 rtx label = NULL_RTX;
9767 rtx end_label;
9768 /* Nonzero if the next instruction may never be executed. */
9769 int next_maybe_never = 0;
9770 unsigned int last_max_reg = max_reg_num ();
9772 if (loop_info->mems_idx == 0)
9773 return;
9775 /* We cannot use next_label here because it skips over normal insns. */
9776 end_label = next_nonnote_insn (loop->end);
9777 if (end_label && GET_CODE (end_label) != CODE_LABEL)
9778 end_label = NULL_RTX;
9780 /* Check to see if it's possible that some instructions in the loop are
9781 never executed. Also check if there is a goto out of the loop other
9782 than right after the end of the loop. */
9783 for (p = next_insn_in_loop (loop, loop->scan_start);
9784 p != NULL_RTX;
9785 p = next_insn_in_loop (loop, p))
9787 if (GET_CODE (p) == CODE_LABEL)
9788 maybe_never = 1;
9789 else if (GET_CODE (p) == JUMP_INSN
9790 /* If we enter the loop in the middle, and scan
9791 around to the beginning, don't set maybe_never
9792 for that. This must be an unconditional jump,
9793 otherwise the code at the top of the loop might
9794 never be executed. Unconditional jumps are
9795 followed a by barrier then loop end. */
9796 && ! (GET_CODE (p) == JUMP_INSN
9797 && JUMP_LABEL (p) == loop->top
9798 && NEXT_INSN (NEXT_INSN (p)) == loop->end
9799 && any_uncondjump_p (p)))
9801 /* If this is a jump outside of the loop but not right
9802 after the end of the loop, we would have to emit new fixup
9803 sequences for each such label. */
9804 if (/* If we can't tell where control might go when this
9805 JUMP_INSN is executed, we must be conservative. */
9806 !JUMP_LABEL (p)
9807 || (JUMP_LABEL (p) != end_label
9808 && (INSN_UID (JUMP_LABEL (p)) >= max_uid_for_loop
9809 || INSN_LUID (JUMP_LABEL (p)) < INSN_LUID (loop->start)
9810 || INSN_LUID (JUMP_LABEL (p)) > INSN_LUID (loop->end))))
9811 return;
9813 if (!any_condjump_p (p))
9814 /* Something complicated. */
9815 maybe_never = 1;
9816 else
9817 /* If there are any more instructions in the loop, they
9818 might not be reached. */
9819 next_maybe_never = 1;
9821 else if (next_maybe_never)
9822 maybe_never = 1;
9825 /* Find start of the extended basic block that enters the loop. */
9826 for (p = loop->start;
9827 PREV_INSN (p) && GET_CODE (p) != CODE_LABEL;
9828 p = PREV_INSN (p))
9830 prev_ebb_head = p;
9832 cselib_init ();
9834 /* Build table of mems that get set to constant values before the
9835 loop. */
9836 for (; p != loop->start; p = NEXT_INSN (p))
9837 cselib_process_insn (p);
9839 /* Actually move the MEMs. */
9840 for (i = 0; i < loop_info->mems_idx; ++i)
9842 regset_head load_copies;
9843 regset_head store_copies;
9844 int written = 0;
9845 rtx reg;
9846 rtx mem = loop_info->mems[i].mem;
9847 rtx mem_list_entry;
9849 if (MEM_VOLATILE_P (mem)
9850 || loop_invariant_p (loop, XEXP (mem, 0)) != 1)
9851 /* There's no telling whether or not MEM is modified. */
9852 loop_info->mems[i].optimize = 0;
9854 /* Go through the MEMs written to in the loop to see if this
9855 one is aliased by one of them. */
9856 mem_list_entry = loop_info->store_mems;
9857 while (mem_list_entry)
9859 if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
9860 written = 1;
9861 else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
9862 mem, rtx_varies_p))
9864 /* MEM is indeed aliased by this store. */
9865 loop_info->mems[i].optimize = 0;
9866 break;
9868 mem_list_entry = XEXP (mem_list_entry, 1);
9871 if (flag_float_store && written
9872 && GET_MODE_CLASS (GET_MODE (mem)) == MODE_FLOAT)
9873 loop_info->mems[i].optimize = 0;
9875 /* If this MEM is written to, we must be sure that there
9876 are no reads from another MEM that aliases this one. */
9877 if (loop_info->mems[i].optimize && written)
9879 int j;
9881 for (j = 0; j < loop_info->mems_idx; ++j)
9883 if (j == i)
9884 continue;
9885 else if (true_dependence (mem,
9886 VOIDmode,
9887 loop_info->mems[j].mem,
9888 rtx_varies_p))
9890 /* It's not safe to hoist loop_info->mems[i] out of
9891 the loop because writes to it might not be
9892 seen by reads from loop_info->mems[j]. */
9893 loop_info->mems[i].optimize = 0;
9894 break;
9899 if (maybe_never && may_trap_p (mem))
9900 /* We can't access the MEM outside the loop; it might
9901 cause a trap that wouldn't have happened otherwise. */
9902 loop_info->mems[i].optimize = 0;
9904 if (!loop_info->mems[i].optimize)
9905 /* We thought we were going to lift this MEM out of the
9906 loop, but later discovered that we could not. */
9907 continue;
9909 INIT_REG_SET (&load_copies);
9910 INIT_REG_SET (&store_copies);
9912 /* Allocate a pseudo for this MEM. We set REG_USERVAR_P in
9913 order to keep scan_loop from moving stores to this MEM
9914 out of the loop just because this REG is neither a
9915 user-variable nor used in the loop test. */
9916 reg = gen_reg_rtx (GET_MODE (mem));
9917 REG_USERVAR_P (reg) = 1;
9918 loop_info->mems[i].reg = reg;
9920 /* Now, replace all references to the MEM with the
9921 corresponding pseudos. */
9922 maybe_never = 0;
9923 for (p = next_insn_in_loop (loop, loop->scan_start);
9924 p != NULL_RTX;
9925 p = next_insn_in_loop (loop, p))
9927 if (INSN_P (p))
9929 rtx set;
9931 set = single_set (p);
9933 /* See if this copies the mem into a register that isn't
9934 modified afterwards. We'll try to do copy propagation
9935 a little further on. */
9936 if (set
9937 /* @@@ This test is _way_ too conservative. */
9938 && ! maybe_never
9939 && GET_CODE (SET_DEST (set)) == REG
9940 && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
9941 && REGNO (SET_DEST (set)) < last_max_reg
9942 && regs->array[REGNO (SET_DEST (set))].n_times_set == 1
9943 && rtx_equal_p (SET_SRC (set), mem))
9944 SET_REGNO_REG_SET (&load_copies, REGNO (SET_DEST (set)));
9946 /* See if this copies the mem from a register that isn't
9947 modified afterwards. We'll try to remove the
9948 redundant copy later on by doing a little register
9949 renaming and copy propagation. This will help
9950 to untangle things for the BIV detection code. */
9951 if (set
9952 && ! maybe_never
9953 && GET_CODE (SET_SRC (set)) == REG
9954 && REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER
9955 && REGNO (SET_SRC (set)) < last_max_reg
9956 && regs->array[REGNO (SET_SRC (set))].n_times_set == 1
9957 && rtx_equal_p (SET_DEST (set), mem))
9958 SET_REGNO_REG_SET (&store_copies, REGNO (SET_SRC (set)));
9960 /* If this is a call which uses / clobbers this memory
9961 location, we must not change the interface here. */
9962 if (GET_CODE (p) == CALL_INSN
9963 && reg_mentioned_p (loop_info->mems[i].mem,
9964 CALL_INSN_FUNCTION_USAGE (p)))
9966 cancel_changes (0);
9967 loop_info->mems[i].optimize = 0;
9968 break;
9970 else
9971 /* Replace the memory reference with the shadow register. */
9972 replace_loop_mems (p, loop_info->mems[i].mem,
9973 loop_info->mems[i].reg);
9976 if (GET_CODE (p) == CODE_LABEL
9977 || GET_CODE (p) == JUMP_INSN)
9978 maybe_never = 1;
9981 if (! loop_info->mems[i].optimize)
9982 ; /* We found we couldn't do the replacement, so do nothing. */
9983 else if (! apply_change_group ())
9984 /* We couldn't replace all occurrences of the MEM. */
9985 loop_info->mems[i].optimize = 0;
9986 else
9988 /* Load the memory immediately before LOOP->START, which is
9989 the NOTE_LOOP_BEG. */
9990 cselib_val *e = cselib_lookup (mem, VOIDmode, 0);
9991 rtx set;
9992 rtx best = mem;
9993 int j;
9994 struct elt_loc_list *const_equiv = 0;
9996 if (e)
9998 struct elt_loc_list *equiv;
9999 struct elt_loc_list *best_equiv = 0;
10000 for (equiv = e->locs; equiv; equiv = equiv->next)
10002 if (CONSTANT_P (equiv->loc))
10003 const_equiv = equiv;
10004 else if (GET_CODE (equiv->loc) == REG
10005 /* Extending hard register lifetimes causes crash
10006 on SRC targets. Doing so on non-SRC is
10007 probably also not good idea, since we most
10008 probably have pseudoregister equivalence as
10009 well. */
10010 && REGNO (equiv->loc) >= FIRST_PSEUDO_REGISTER)
10011 best_equiv = equiv;
10013 /* Use the constant equivalence if that is cheap enough. */
10014 if (! best_equiv)
10015 best_equiv = const_equiv;
10016 else if (const_equiv
10017 && (rtx_cost (const_equiv->loc, SET)
10018 <= rtx_cost (best_equiv->loc, SET)))
10020 best_equiv = const_equiv;
10021 const_equiv = 0;
10024 /* If best_equiv is nonzero, we know that MEM is set to a
10025 constant or register before the loop. We will use this
10026 knowledge to initialize the shadow register with that
10027 constant or reg rather than by loading from MEM. */
10028 if (best_equiv)
10029 best = copy_rtx (best_equiv->loc);
10032 set = gen_move_insn (reg, best);
10033 set = loop_insn_hoist (loop, set);
10034 if (REG_P (best))
10036 for (p = prev_ebb_head; p != loop->start; p = NEXT_INSN (p))
10037 if (REGNO_LAST_UID (REGNO (best)) == INSN_UID (p))
10039 REGNO_LAST_UID (REGNO (best)) = INSN_UID (set);
10040 break;
10044 if (const_equiv)
10045 set_unique_reg_note (set, REG_EQUAL, copy_rtx (const_equiv->loc));
10047 if (written)
10049 if (label == NULL_RTX)
10051 label = gen_label_rtx ();
10052 emit_label_after (label, loop->end);
10055 /* Store the memory immediately after END, which is
10056 the NOTE_LOOP_END. */
10057 set = gen_move_insn (copy_rtx (mem), reg);
10058 loop_insn_emit_after (loop, 0, label, set);
10061 if (loop_dump_stream)
10063 fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
10064 REGNO (reg), (written ? "r/w" : "r/o"));
10065 print_rtl (loop_dump_stream, mem);
10066 fputc ('\n', loop_dump_stream);
10069 /* Attempt a bit of copy propagation. This helps untangle the
10070 data flow, and enables {basic,general}_induction_var to find
10071 more bivs/givs. */
10072 EXECUTE_IF_SET_IN_REG_SET
10073 (&load_copies, FIRST_PSEUDO_REGISTER, j,
10075 try_copy_prop (loop, reg, j);
10077 CLEAR_REG_SET (&load_copies);
10079 EXECUTE_IF_SET_IN_REG_SET
10080 (&store_copies, FIRST_PSEUDO_REGISTER, j,
10082 try_swap_copy_prop (loop, reg, j);
10084 CLEAR_REG_SET (&store_copies);
10088 if (label != NULL_RTX && end_label != NULL_RTX)
10090 /* Now, we need to replace all references to the previous exit
10091 label with the new one. */
10092 rtx_pair rr;
10093 rr.r1 = end_label;
10094 rr.r2 = label;
10096 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
10098 for_each_rtx (&p, replace_label, &rr);
10100 /* If this is a JUMP_INSN, then we also need to fix the JUMP_LABEL
10101 field. This is not handled by for_each_rtx because it doesn't
10102 handle unprinted ('0') fields. We need to update JUMP_LABEL
10103 because the immediately following unroll pass will use it.
10104 replace_label would not work anyways, because that only handles
10105 LABEL_REFs. */
10106 if (GET_CODE (p) == JUMP_INSN && JUMP_LABEL (p) == end_label)
10107 JUMP_LABEL (p) = label;
10111 cselib_finish ();
10114 /* For communication between note_reg_stored and its caller. */
10115 struct note_reg_stored_arg
10117 int set_seen;
10118 rtx reg;
10121 /* Called via note_stores, record in SET_SEEN whether X, which is written,
10122 is equal to ARG. */
10123 static void
10124 note_reg_stored (x, setter, arg)
10125 rtx x, setter ATTRIBUTE_UNUSED;
10126 void *arg;
10128 struct note_reg_stored_arg *t = (struct note_reg_stored_arg *) arg;
10129 if (t->reg == x)
10130 t->set_seen = 1;
10133 /* Try to replace every occurrence of pseudo REGNO with REPLACEMENT.
10134 There must be exactly one insn that sets this pseudo; it will be
10135 deleted if all replacements succeed and we can prove that the register
10136 is not used after the loop. */
10138 static void
10139 try_copy_prop (loop, replacement, regno)
10140 const struct loop *loop;
10141 rtx replacement;
10142 unsigned int regno;
10144 /* This is the reg that we are copying from. */
10145 rtx reg_rtx = regno_reg_rtx[regno];
10146 rtx init_insn = 0;
10147 rtx insn;
10148 /* These help keep track of whether we replaced all uses of the reg. */
10149 int replaced_last = 0;
10150 int store_is_first = 0;
10152 for (insn = next_insn_in_loop (loop, loop->scan_start);
10153 insn != NULL_RTX;
10154 insn = next_insn_in_loop (loop, insn))
10156 rtx set;
10158 /* Only substitute within one extended basic block from the initializing
10159 insn. */
10160 if (GET_CODE (insn) == CODE_LABEL && init_insn)
10161 break;
10163 if (! INSN_P (insn))
10164 continue;
10166 /* Is this the initializing insn? */
10167 set = single_set (insn);
10168 if (set
10169 && GET_CODE (SET_DEST (set)) == REG
10170 && REGNO (SET_DEST (set)) == regno)
10172 if (init_insn)
10173 abort ();
10175 init_insn = insn;
10176 if (REGNO_FIRST_UID (regno) == INSN_UID (insn))
10177 store_is_first = 1;
10180 /* Only substitute after seeing the initializing insn. */
10181 if (init_insn && insn != init_insn)
10183 struct note_reg_stored_arg arg;
10185 replace_loop_regs (insn, reg_rtx, replacement);
10186 if (REGNO_LAST_UID (regno) == INSN_UID (insn))
10187 replaced_last = 1;
10189 /* Stop replacing when REPLACEMENT is modified. */
10190 arg.reg = replacement;
10191 arg.set_seen = 0;
10192 note_stores (PATTERN (insn), note_reg_stored, &arg);
10193 if (arg.set_seen)
10195 rtx note = find_reg_note (insn, REG_EQUAL, NULL);
10197 /* It is possible that we've turned previously valid REG_EQUAL to
10198 invalid, as we change the REGNO to REPLACEMENT and unlike REGNO,
10199 REPLACEMENT is modified, we get different meaning. */
10200 if (note && reg_mentioned_p (replacement, XEXP (note, 0)))
10201 remove_note (insn, note);
10202 break;
10206 if (! init_insn)
10207 abort ();
10208 if (apply_change_group ())
10210 if (loop_dump_stream)
10211 fprintf (loop_dump_stream, " Replaced reg %d", regno);
10212 if (store_is_first && replaced_last)
10214 rtx first;
10215 rtx retval_note;
10217 /* Assume we're just deleting INIT_INSN. */
10218 first = init_insn;
10219 /* Look for REG_RETVAL note. If we're deleting the end of
10220 the libcall sequence, the whole sequence can go. */
10221 retval_note = find_reg_note (init_insn, REG_RETVAL, NULL_RTX);
10222 /* If we found a REG_RETVAL note, find the first instruction
10223 in the sequence. */
10224 if (retval_note)
10225 first = XEXP (retval_note, 0);
10227 /* Delete the instructions. */
10228 loop_delete_insns (first, init_insn);
10230 if (loop_dump_stream)
10231 fprintf (loop_dump_stream, ".\n");
10235 /* Replace all the instructions from FIRST up to and including LAST
10236 with NOTE_INSN_DELETED notes. */
10238 static void
10239 loop_delete_insns (first, last)
10240 rtx first;
10241 rtx last;
10243 while (1)
10245 if (loop_dump_stream)
10246 fprintf (loop_dump_stream, ", deleting init_insn (%d)",
10247 INSN_UID (first));
10248 delete_insn (first);
10250 /* If this was the LAST instructions we're supposed to delete,
10251 we're done. */
10252 if (first == last)
10253 break;
10255 first = NEXT_INSN (first);
10259 /* Try to replace occurrences of pseudo REGNO with REPLACEMENT within
10260 loop LOOP if the order of the sets of these registers can be
10261 swapped. There must be exactly one insn within the loop that sets
10262 this pseudo followed immediately by a move insn that sets
10263 REPLACEMENT with REGNO. */
10264 static void
10265 try_swap_copy_prop (loop, replacement, regno)
10266 const struct loop *loop;
10267 rtx replacement;
10268 unsigned int regno;
10270 rtx insn;
10271 rtx set = NULL_RTX;
10272 unsigned int new_regno;
10274 new_regno = REGNO (replacement);
10276 for (insn = next_insn_in_loop (loop, loop->scan_start);
10277 insn != NULL_RTX;
10278 insn = next_insn_in_loop (loop, insn))
10280 /* Search for the insn that copies REGNO to NEW_REGNO? */
10281 if (INSN_P (insn)
10282 && (set = single_set (insn))
10283 && GET_CODE (SET_DEST (set)) == REG
10284 && REGNO (SET_DEST (set)) == new_regno
10285 && GET_CODE (SET_SRC (set)) == REG
10286 && REGNO (SET_SRC (set)) == regno)
10287 break;
10290 if (insn != NULL_RTX)
10292 rtx prev_insn;
10293 rtx prev_set;
10295 /* Some DEF-USE info would come in handy here to make this
10296 function more general. For now, just check the previous insn
10297 which is the most likely candidate for setting REGNO. */
10299 prev_insn = PREV_INSN (insn);
10301 if (INSN_P (insn)
10302 && (prev_set = single_set (prev_insn))
10303 && GET_CODE (SET_DEST (prev_set)) == REG
10304 && REGNO (SET_DEST (prev_set)) == regno)
10306 /* We have:
10307 (set (reg regno) (expr))
10308 (set (reg new_regno) (reg regno))
10310 so try converting this to:
10311 (set (reg new_regno) (expr))
10312 (set (reg regno) (reg new_regno))
10314 The former construct is often generated when a global
10315 variable used for an induction variable is shadowed by a
10316 register (NEW_REGNO). The latter construct improves the
10317 chances of GIV replacement and BIV elimination. */
10319 validate_change (prev_insn, &SET_DEST (prev_set),
10320 replacement, 1);
10321 validate_change (insn, &SET_DEST (set),
10322 SET_SRC (set), 1);
10323 validate_change (insn, &SET_SRC (set),
10324 replacement, 1);
10326 if (apply_change_group ())
10328 if (loop_dump_stream)
10329 fprintf (loop_dump_stream,
10330 " Swapped set of reg %d at %d with reg %d at %d.\n",
10331 regno, INSN_UID (insn),
10332 new_regno, INSN_UID (prev_insn));
10334 /* Update first use of REGNO. */
10335 if (REGNO_FIRST_UID (regno) == INSN_UID (prev_insn))
10336 REGNO_FIRST_UID (regno) = INSN_UID (insn);
10338 /* Now perform copy propagation to hopefully
10339 remove all uses of REGNO within the loop. */
10340 try_copy_prop (loop, replacement, regno);
10346 /* Replace MEM with its associated pseudo register. This function is
10347 called from load_mems via for_each_rtx. DATA is actually a pointer
10348 to a structure describing the instruction currently being scanned
10349 and the MEM we are currently replacing. */
10351 static int
10352 replace_loop_mem (mem, data)
10353 rtx *mem;
10354 void *data;
10356 loop_replace_args *args = (loop_replace_args *) data;
10357 rtx m = *mem;
10359 if (m == NULL_RTX)
10360 return 0;
10362 switch (GET_CODE (m))
10364 case MEM:
10365 break;
10367 case CONST_DOUBLE:
10368 /* We're not interested in the MEM associated with a
10369 CONST_DOUBLE, so there's no need to traverse into one. */
10370 return -1;
10372 default:
10373 /* This is not a MEM. */
10374 return 0;
10377 if (!rtx_equal_p (args->match, m))
10378 /* This is not the MEM we are currently replacing. */
10379 return 0;
10381 /* Actually replace the MEM. */
10382 validate_change (args->insn, mem, args->replacement, 1);
10384 return 0;
10387 static void
10388 replace_loop_mems (insn, mem, reg)
10389 rtx insn;
10390 rtx mem;
10391 rtx reg;
10393 loop_replace_args args;
10395 args.insn = insn;
10396 args.match = mem;
10397 args.replacement = reg;
10399 for_each_rtx (&insn, replace_loop_mem, &args);
10402 /* Replace one register with another. Called through for_each_rtx; PX points
10403 to the rtx being scanned. DATA is actually a pointer to
10404 a structure of arguments. */
10406 static int
10407 replace_loop_reg (px, data)
10408 rtx *px;
10409 void *data;
10411 rtx x = *px;
10412 loop_replace_args *args = (loop_replace_args *) data;
10414 if (x == NULL_RTX)
10415 return 0;
10417 if (x == args->match)
10418 validate_change (args->insn, px, args->replacement, 1);
10420 return 0;
10423 static void
10424 replace_loop_regs (insn, reg, replacement)
10425 rtx insn;
10426 rtx reg;
10427 rtx replacement;
10429 loop_replace_args args;
10431 args.insn = insn;
10432 args.match = reg;
10433 args.replacement = replacement;
10435 for_each_rtx (&insn, replace_loop_reg, &args);
10438 /* Replace occurrences of the old exit label for the loop with the new
10439 one. DATA is an rtx_pair containing the old and new labels,
10440 respectively. */
10442 static int
10443 replace_label (x, data)
10444 rtx *x;
10445 void *data;
10447 rtx l = *x;
10448 rtx old_label = ((rtx_pair *) data)->r1;
10449 rtx new_label = ((rtx_pair *) data)->r2;
10451 if (l == NULL_RTX)
10452 return 0;
10454 if (GET_CODE (l) != LABEL_REF)
10455 return 0;
10457 if (XEXP (l, 0) != old_label)
10458 return 0;
10460 XEXP (l, 0) = new_label;
10461 ++LABEL_NUSES (new_label);
10462 --LABEL_NUSES (old_label);
10464 return 0;
10467 /* Emit insn for PATTERN after WHERE_INSN in basic block WHERE_BB
10468 (ignored in the interim). */
10470 static rtx
10471 loop_insn_emit_after (loop, where_bb, where_insn, pattern)
10472 const struct loop *loop ATTRIBUTE_UNUSED;
10473 basic_block where_bb ATTRIBUTE_UNUSED;
10474 rtx where_insn;
10475 rtx pattern;
10477 return emit_insn_after (pattern, where_insn);
10481 /* If WHERE_INSN is nonzero emit insn for PATTERN before WHERE_INSN
10482 in basic block WHERE_BB (ignored in the interim) within the loop
10483 otherwise hoist PATTERN into the loop pre-header. */
10486 loop_insn_emit_before (loop, where_bb, where_insn, pattern)
10487 const struct loop *loop;
10488 basic_block where_bb ATTRIBUTE_UNUSED;
10489 rtx where_insn;
10490 rtx pattern;
10492 if (! where_insn)
10493 return loop_insn_hoist (loop, pattern);
10494 return emit_insn_before (pattern, where_insn);
10498 /* Emit call insn for PATTERN before WHERE_INSN in basic block
10499 WHERE_BB (ignored in the interim) within the loop. */
10501 static rtx
10502 loop_call_insn_emit_before (loop, where_bb, where_insn, pattern)
10503 const struct loop *loop ATTRIBUTE_UNUSED;
10504 basic_block where_bb ATTRIBUTE_UNUSED;
10505 rtx where_insn;
10506 rtx pattern;
10508 return emit_call_insn_before (pattern, where_insn);
10512 /* Hoist insn for PATTERN into the loop pre-header. */
10515 loop_insn_hoist (loop, pattern)
10516 const struct loop *loop;
10517 rtx pattern;
10519 return loop_insn_emit_before (loop, 0, loop->start, pattern);
10523 /* Hoist call insn for PATTERN into the loop pre-header. */
10525 static rtx
10526 loop_call_insn_hoist (loop, pattern)
10527 const struct loop *loop;
10528 rtx pattern;
10530 return loop_call_insn_emit_before (loop, 0, loop->start, pattern);
10534 /* Sink insn for PATTERN after the loop end. */
10537 loop_insn_sink (loop, pattern)
10538 const struct loop *loop;
10539 rtx pattern;
10541 return loop_insn_emit_before (loop, 0, loop->sink, pattern);
10544 /* bl->final_value can be eighter general_operand or PLUS of general_operand
10545 and constant. Emit sequence of instructions to load it into REG. */
10546 static rtx
10547 gen_load_of_final_value (reg, final_value)
10548 rtx reg, final_value;
10550 rtx seq;
10551 start_sequence ();
10552 final_value = force_operand (final_value, reg);
10553 if (final_value != reg)
10554 emit_move_insn (reg, final_value);
10555 seq = get_insns ();
10556 end_sequence ();
10557 return seq;
10560 /* If the loop has multiple exits, emit insn for PATTERN before the
10561 loop to ensure that it will always be executed no matter how the
10562 loop exits. Otherwise, emit the insn for PATTERN after the loop,
10563 since this is slightly more efficient. */
10565 static rtx
10566 loop_insn_sink_or_swim (loop, pattern)
10567 const struct loop *loop;
10568 rtx pattern;
10570 if (loop->exit_count)
10571 return loop_insn_hoist (loop, pattern);
10572 else
10573 return loop_insn_sink (loop, pattern);
10576 static void
10577 loop_ivs_dump (loop, file, verbose)
10578 const struct loop *loop;
10579 FILE *file;
10580 int verbose;
10582 struct iv_class *bl;
10583 int iv_num = 0;
10585 if (! loop || ! file)
10586 return;
10588 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10589 iv_num++;
10591 fprintf (file, "Loop %d: %d IV classes\n", loop->num, iv_num);
10593 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
10595 loop_iv_class_dump (bl, file, verbose);
10596 fputc ('\n', file);
10601 static void
10602 loop_iv_class_dump (bl, file, verbose)
10603 const struct iv_class *bl;
10604 FILE *file;
10605 int verbose ATTRIBUTE_UNUSED;
10607 struct induction *v;
10608 rtx incr;
10609 int i;
10611 if (! bl || ! file)
10612 return;
10614 fprintf (file, "IV class for reg %d, benefit %d\n",
10615 bl->regno, bl->total_benefit);
10617 fprintf (file, " Init insn %d", INSN_UID (bl->init_insn));
10618 if (bl->initial_value)
10620 fprintf (file, ", init val: ");
10621 print_simple_rtl (file, bl->initial_value);
10623 if (bl->initial_test)
10625 fprintf (file, ", init test: ");
10626 print_simple_rtl (file, bl->initial_test);
10628 fputc ('\n', file);
10630 if (bl->final_value)
10632 fprintf (file, " Final val: ");
10633 print_simple_rtl (file, bl->final_value);
10634 fputc ('\n', file);
10637 if ((incr = biv_total_increment (bl)))
10639 fprintf (file, " Total increment: ");
10640 print_simple_rtl (file, incr);
10641 fputc ('\n', file);
10644 /* List the increments. */
10645 for (i = 0, v = bl->biv; v; v = v->next_iv, i++)
10647 fprintf (file, " Inc%d: insn %d, incr: ", i, INSN_UID (v->insn));
10648 print_simple_rtl (file, v->add_val);
10649 fputc ('\n', file);
10652 /* List the givs. */
10653 for (i = 0, v = bl->giv; v; v = v->next_iv, i++)
10655 fprintf (file, " Giv%d: insn %d, benefit %d, ",
10656 i, INSN_UID (v->insn), v->benefit);
10657 if (v->giv_type == DEST_ADDR)
10658 print_simple_rtl (file, v->mem);
10659 else
10660 print_simple_rtl (file, single_set (v->insn));
10661 fputc ('\n', file);
10666 static void
10667 loop_biv_dump (v, file, verbose)
10668 const struct induction *v;
10669 FILE *file;
10670 int verbose;
10672 if (! v || ! file)
10673 return;
10675 fprintf (file,
10676 "Biv %d: insn %d",
10677 REGNO (v->dest_reg), INSN_UID (v->insn));
10678 fprintf (file, " const ");
10679 print_simple_rtl (file, v->add_val);
10681 if (verbose && v->final_value)
10683 fputc ('\n', file);
10684 fprintf (file, " final ");
10685 print_simple_rtl (file, v->final_value);
10688 fputc ('\n', file);
10692 static void
10693 loop_giv_dump (v, file, verbose)
10694 const struct induction *v;
10695 FILE *file;
10696 int verbose;
10698 if (! v || ! file)
10699 return;
10701 if (v->giv_type == DEST_REG)
10702 fprintf (file, "Giv %d: insn %d",
10703 REGNO (v->dest_reg), INSN_UID (v->insn));
10704 else
10705 fprintf (file, "Dest address: insn %d",
10706 INSN_UID (v->insn));
10708 fprintf (file, " src reg %d benefit %d",
10709 REGNO (v->src_reg), v->benefit);
10710 fprintf (file, " lifetime %d",
10711 v->lifetime);
10713 if (v->replaceable)
10714 fprintf (file, " replaceable");
10716 if (v->no_const_addval)
10717 fprintf (file, " ncav");
10719 if (v->ext_dependent)
10721 switch (GET_CODE (v->ext_dependent))
10723 case SIGN_EXTEND:
10724 fprintf (file, " ext se");
10725 break;
10726 case ZERO_EXTEND:
10727 fprintf (file, " ext ze");
10728 break;
10729 case TRUNCATE:
10730 fprintf (file, " ext tr");
10731 break;
10732 default:
10733 abort ();
10737 fputc ('\n', file);
10738 fprintf (file, " mult ");
10739 print_simple_rtl (file, v->mult_val);
10741 fputc ('\n', file);
10742 fprintf (file, " add ");
10743 print_simple_rtl (file, v->add_val);
10745 if (verbose && v->final_value)
10747 fputc ('\n', file);
10748 fprintf (file, " final ");
10749 print_simple_rtl (file, v->final_value);
10752 fputc ('\n', file);
10756 void
10757 debug_ivs (loop)
10758 const struct loop *loop;
10760 loop_ivs_dump (loop, stderr, 1);
10764 void
10765 debug_iv_class (bl)
10766 const struct iv_class *bl;
10768 loop_iv_class_dump (bl, stderr, 1);
10772 void
10773 debug_biv (v)
10774 const struct induction *v;
10776 loop_biv_dump (v, stderr, 1);
10780 void
10781 debug_giv (v)
10782 const struct induction *v;
10784 loop_giv_dump (v, stderr, 1);
10788 #define LOOP_BLOCK_NUM_1(INSN) \
10789 ((INSN) ? (BLOCK_FOR_INSN (INSN) ? BLOCK_NUM (INSN) : - 1) : -1)
10791 /* The notes do not have an assigned block, so look at the next insn. */
10792 #define LOOP_BLOCK_NUM(INSN) \
10793 ((INSN) ? (GET_CODE (INSN) == NOTE \
10794 ? LOOP_BLOCK_NUM_1 (next_nonnote_insn (INSN)) \
10795 : LOOP_BLOCK_NUM_1 (INSN)) \
10796 : -1)
10798 #define LOOP_INSN_UID(INSN) ((INSN) ? INSN_UID (INSN) : -1)
10800 static void
10801 loop_dump_aux (loop, file, verbose)
10802 const struct loop *loop;
10803 FILE *file;
10804 int verbose ATTRIBUTE_UNUSED;
10806 rtx label;
10808 if (! loop || ! file)
10809 return;
10811 /* Print diagnostics to compare our concept of a loop with
10812 what the loop notes say. */
10813 if (! PREV_INSN (loop->first->head)
10814 || GET_CODE (PREV_INSN (loop->first->head)) != NOTE
10815 || NOTE_LINE_NUMBER (PREV_INSN (loop->first->head))
10816 != NOTE_INSN_LOOP_BEG)
10817 fprintf (file, ";; No NOTE_INSN_LOOP_BEG at %d\n",
10818 INSN_UID (PREV_INSN (loop->first->head)));
10819 if (! NEXT_INSN (loop->last->end)
10820 || GET_CODE (NEXT_INSN (loop->last->end)) != NOTE
10821 || NOTE_LINE_NUMBER (NEXT_INSN (loop->last->end))
10822 != NOTE_INSN_LOOP_END)
10823 fprintf (file, ";; No NOTE_INSN_LOOP_END at %d\n",
10824 INSN_UID (NEXT_INSN (loop->last->end)));
10826 if (loop->start)
10828 fprintf (file,
10829 ";; start %d (%d), cont dom %d (%d), cont %d (%d), vtop %d (%d), end %d (%d)\n",
10830 LOOP_BLOCK_NUM (loop->start),
10831 LOOP_INSN_UID (loop->start),
10832 LOOP_BLOCK_NUM (loop->cont),
10833 LOOP_INSN_UID (loop->cont),
10834 LOOP_BLOCK_NUM (loop->cont),
10835 LOOP_INSN_UID (loop->cont),
10836 LOOP_BLOCK_NUM (loop->vtop),
10837 LOOP_INSN_UID (loop->vtop),
10838 LOOP_BLOCK_NUM (loop->end),
10839 LOOP_INSN_UID (loop->end));
10840 fprintf (file, ";; top %d (%d), scan start %d (%d)\n",
10841 LOOP_BLOCK_NUM (loop->top),
10842 LOOP_INSN_UID (loop->top),
10843 LOOP_BLOCK_NUM (loop->scan_start),
10844 LOOP_INSN_UID (loop->scan_start));
10845 fprintf (file, ";; exit_count %d", loop->exit_count);
10846 if (loop->exit_count)
10848 fputs (", labels:", file);
10849 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
10851 fprintf (file, " %d ",
10852 LOOP_INSN_UID (XEXP (label, 0)));
10855 fputs ("\n", file);
10857 /* This can happen when a marked loop appears as two nested loops,
10858 say from while (a || b) {}. The inner loop won't match
10859 the loop markers but the outer one will. */
10860 if (LOOP_BLOCK_NUM (loop->cont) != loop->latch->index)
10861 fprintf (file, ";; NOTE_INSN_LOOP_CONT not in loop latch\n");
10865 /* Call this function from the debugger to dump LOOP. */
10867 void
10868 debug_loop (loop)
10869 const struct loop *loop;
10871 flow_loop_dump (loop, stderr, loop_dump_aux, 1);
10874 /* Call this function from the debugger to dump LOOPS. */
10876 void
10877 debug_loops (loops)
10878 const struct loops *loops;
10880 flow_loops_dump (loops, stderr, loop_dump_aux, 1);