2015-01-03 Sandra Loosemore <sandra@codesourcery.com>
[official-gcc.git] / gcc / lower-subreg.c
blob1eef803b394ad9079de8a9622f22ae46b0e08dc5
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "obstack.h"
33 #include "predict.h"
34 #include "vec.h"
35 #include "hashtab.h"
36 #include "hash-set.h"
37 #include "hard-reg-set.h"
38 #include "input.h"
39 #include "function.h"
40 #include "dominance.h"
41 #include "cfg.h"
42 #include "cfgrtl.h"
43 #include "cfgbuild.h"
44 #include "basic-block.h"
45 #include "recog.h"
46 #include "bitmap.h"
47 #include "dce.h"
48 #include "expr.h"
49 #include "except.h"
50 #include "regs.h"
51 #include "tree-pass.h"
52 #include "df.h"
53 #include "lower-subreg.h"
54 #include "rtl-iter.h"
56 #ifdef STACK_GROWS_DOWNWARD
57 # undef STACK_GROWS_DOWNWARD
58 # define STACK_GROWS_DOWNWARD 1
59 #else
60 # define STACK_GROWS_DOWNWARD 0
61 #endif
64 /* Decompose multi-word pseudo-registers into individual
65 pseudo-registers when possible and profitable. This is possible
66 when all the uses of a multi-word register are via SUBREG, or are
67 copies of the register to another location. Breaking apart the
68 register permits more CSE and permits better register allocation.
69 This is profitable if the machine does not have move instructions
70 to do this.
72 This pass only splits moves with modes that are wider than
73 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
74 integer modes that are twice the width of word_mode. The latter
75 could be generalized if there was a need to do this, but the trend in
76 architectures is to not need this.
78 There are two useful preprocessor defines for use by maintainers:
80 #define LOG_COSTS 1
82 if you wish to see the actual cost estimates that are being used
83 for each mode wider than word mode and the cost estimates for zero
84 extension and the shifts. This can be useful when port maintainers
85 are tuning insn rtx costs.
87 #define FORCE_LOWERING 1
89 if you wish to test the pass with all the transformation forced on.
90 This can be useful for finding bugs in the transformations. */
92 #define LOG_COSTS 0
93 #define FORCE_LOWERING 0
95 /* Bit N in this bitmap is set if regno N is used in a context in
96 which we can decompose it. */
97 static bitmap decomposable_context;
99 /* Bit N in this bitmap is set if regno N is used in a context in
100 which it can not be decomposed. */
101 static bitmap non_decomposable_context;
103 /* Bit N in this bitmap is set if regno N is used in a subreg
104 which changes the mode but not the size. This typically happens
105 when the register accessed as a floating-point value; we want to
106 avoid generating accesses to its subwords in integer modes. */
107 static bitmap subreg_context;
109 /* Bit N in the bitmap in element M of this array is set if there is a
110 copy from reg M to reg N. */
111 static vec<bitmap> reg_copy_graph;
113 struct target_lower_subreg default_target_lower_subreg;
114 #if SWITCHABLE_TARGET
115 struct target_lower_subreg *this_target_lower_subreg
116 = &default_target_lower_subreg;
117 #endif
119 #define twice_word_mode \
120 this_target_lower_subreg->x_twice_word_mode
121 #define choices \
122 this_target_lower_subreg->x_choices
124 /* RTXes used while computing costs. */
125 struct cost_rtxes {
126 /* Source and target registers. */
127 rtx source;
128 rtx target;
130 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
131 rtx zext;
133 /* A shift of SOURCE. */
134 rtx shift;
136 /* A SET of TARGET. */
137 rtx set;
140 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
141 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
143 static int
144 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
145 machine_mode mode, int op1)
147 PUT_CODE (rtxes->shift, code);
148 PUT_MODE (rtxes->shift, mode);
149 PUT_MODE (rtxes->source, mode);
150 XEXP (rtxes->shift, 1) = GEN_INT (op1);
151 return set_src_cost (rtxes->shift, speed_p);
154 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
155 to true if it is profitable to split a double-word CODE shift
156 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
157 for speed or size profitability.
159 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
160 the cost of moving zero into a word-mode register. WORD_MOVE_COST
161 is the cost of moving between word registers. */
163 static void
164 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
165 bool *splitting, enum rtx_code code,
166 int word_move_zero_cost, int word_move_cost)
168 int wide_cost, narrow_cost, upper_cost, i;
170 for (i = 0; i < BITS_PER_WORD; i++)
172 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
173 i + BITS_PER_WORD);
174 if (i == 0)
175 narrow_cost = word_move_cost;
176 else
177 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
179 if (code != ASHIFTRT)
180 upper_cost = word_move_zero_cost;
181 else if (i == BITS_PER_WORD - 1)
182 upper_cost = word_move_cost;
183 else
184 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
185 BITS_PER_WORD - 1);
187 if (LOG_COSTS)
188 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
189 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
190 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
192 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
193 splitting[i] = true;
197 /* Compute what we should do when optimizing for speed or size; SPEED_P
198 selects which. Use RTXES for computing costs. */
200 static void
201 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
203 unsigned int i;
204 int word_move_zero_cost, word_move_cost;
206 PUT_MODE (rtxes->target, word_mode);
207 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
208 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
210 SET_SRC (rtxes->set) = rtxes->source;
211 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
213 if (LOG_COSTS)
214 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
215 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
217 for (i = 0; i < MAX_MACHINE_MODE; i++)
219 machine_mode mode = (machine_mode) i;
220 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
221 if (factor > 1)
223 int mode_move_cost;
225 PUT_MODE (rtxes->target, mode);
226 PUT_MODE (rtxes->source, mode);
227 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
229 if (LOG_COSTS)
230 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
231 GET_MODE_NAME (mode), mode_move_cost,
232 word_move_cost, factor);
234 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
236 choices[speed_p].move_modes_to_split[i] = true;
237 choices[speed_p].something_to_do = true;
242 /* For the moves and shifts, the only case that is checked is one
243 where the mode of the target is an integer mode twice the width
244 of the word_mode.
246 If it is not profitable to split a double word move then do not
247 even consider the shifts or the zero extension. */
248 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
250 int zext_cost;
252 /* The only case here to check to see if moving the upper part with a
253 zero is cheaper than doing the zext itself. */
254 PUT_MODE (rtxes->source, word_mode);
255 zext_cost = set_src_cost (rtxes->zext, speed_p);
257 if (LOG_COSTS)
258 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
259 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
260 zext_cost, word_move_cost, word_move_zero_cost);
262 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
263 choices[speed_p].splitting_zext = true;
265 compute_splitting_shift (speed_p, rtxes,
266 choices[speed_p].splitting_ashift, ASHIFT,
267 word_move_zero_cost, word_move_cost);
268 compute_splitting_shift (speed_p, rtxes,
269 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
270 word_move_zero_cost, word_move_cost);
271 compute_splitting_shift (speed_p, rtxes,
272 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
273 word_move_zero_cost, word_move_cost);
277 /* Do one-per-target initialisation. This involves determining
278 which operations on the machine are profitable. If none are found,
279 then the pass just returns when called. */
281 void
282 init_lower_subreg (void)
284 struct cost_rtxes rtxes;
286 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
288 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
290 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
291 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
292 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
293 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
294 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
296 if (LOG_COSTS)
297 fprintf (stderr, "\nSize costs\n==========\n\n");
298 compute_costs (false, &rtxes);
300 if (LOG_COSTS)
301 fprintf (stderr, "\nSpeed costs\n===========\n\n");
302 compute_costs (true, &rtxes);
305 static bool
306 simple_move_operand (rtx x)
308 if (GET_CODE (x) == SUBREG)
309 x = SUBREG_REG (x);
311 if (!OBJECT_P (x))
312 return false;
314 if (GET_CODE (x) == LABEL_REF
315 || GET_CODE (x) == SYMBOL_REF
316 || GET_CODE (x) == HIGH
317 || GET_CODE (x) == CONST)
318 return false;
320 if (MEM_P (x)
321 && (MEM_VOLATILE_P (x)
322 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
323 return false;
325 return true;
328 /* If INSN is a single set between two objects that we want to split,
329 return the single set. SPEED_P says whether we are optimizing
330 INSN for speed or size.
332 INSN should have been passed to recog and extract_insn before this
333 is called. */
335 static rtx
336 simple_move (rtx_insn *insn, bool speed_p)
338 rtx x;
339 rtx set;
340 machine_mode mode;
342 if (recog_data.n_operands != 2)
343 return NULL_RTX;
345 set = single_set (insn);
346 if (!set)
347 return NULL_RTX;
349 x = SET_DEST (set);
350 if (x != recog_data.operand[0] && x != recog_data.operand[1])
351 return NULL_RTX;
352 if (!simple_move_operand (x))
353 return NULL_RTX;
355 x = SET_SRC (set);
356 if (x != recog_data.operand[0] && x != recog_data.operand[1])
357 return NULL_RTX;
358 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
359 things like x86 rdtsc which returns a DImode value. */
360 if (GET_CODE (x) != ASM_OPERANDS
361 && !simple_move_operand (x))
362 return NULL_RTX;
364 /* We try to decompose in integer modes, to avoid generating
365 inefficient code copying between integer and floating point
366 registers. That means that we can't decompose if this is a
367 non-integer mode for which there is no integer mode of the same
368 size. */
369 mode = GET_MODE (SET_DEST (set));
370 if (!SCALAR_INT_MODE_P (mode)
371 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
372 == BLKmode))
373 return NULL_RTX;
375 /* Reject PARTIAL_INT modes. They are used for processor specific
376 purposes and it's probably best not to tamper with them. */
377 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
378 return NULL_RTX;
380 if (!choices[speed_p].move_modes_to_split[(int) mode])
381 return NULL_RTX;
383 return set;
386 /* If SET is a copy from one multi-word pseudo-register to another,
387 record that in reg_copy_graph. Return whether it is such a
388 copy. */
390 static bool
391 find_pseudo_copy (rtx set)
393 rtx dest = SET_DEST (set);
394 rtx src = SET_SRC (set);
395 unsigned int rd, rs;
396 bitmap b;
398 if (!REG_P (dest) || !REG_P (src))
399 return false;
401 rd = REGNO (dest);
402 rs = REGNO (src);
403 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
404 return false;
406 b = reg_copy_graph[rs];
407 if (b == NULL)
409 b = BITMAP_ALLOC (NULL);
410 reg_copy_graph[rs] = b;
413 bitmap_set_bit (b, rd);
415 return true;
418 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
419 where they are copied to another register, add the register to
420 which they are copied to DECOMPOSABLE_CONTEXT. Use
421 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
422 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
424 static void
425 propagate_pseudo_copies (void)
427 bitmap queue, propagate;
429 queue = BITMAP_ALLOC (NULL);
430 propagate = BITMAP_ALLOC (NULL);
432 bitmap_copy (queue, decomposable_context);
435 bitmap_iterator iter;
436 unsigned int i;
438 bitmap_clear (propagate);
440 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
442 bitmap b = reg_copy_graph[i];
443 if (b)
444 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
447 bitmap_and_compl (queue, propagate, decomposable_context);
448 bitmap_ior_into (decomposable_context, propagate);
450 while (!bitmap_empty_p (queue));
452 BITMAP_FREE (queue);
453 BITMAP_FREE (propagate);
456 /* A pointer to one of these values is passed to
457 find_decomposable_subregs. */
459 enum classify_move_insn
461 /* Not a simple move from one location to another. */
462 NOT_SIMPLE_MOVE,
463 /* A simple move we want to decompose. */
464 DECOMPOSABLE_SIMPLE_MOVE,
465 /* Any other simple move. */
466 SIMPLE_MOVE
469 /* If we find a SUBREG in *LOC which we could use to decompose a
470 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
471 unadorned register which is not a simple pseudo-register copy,
472 DATA will point at the type of move, and we set a bit in
473 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
475 static void
476 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
478 subrtx_var_iterator::array_type array;
479 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
481 rtx x = *iter;
482 if (GET_CODE (x) == SUBREG)
484 rtx inner = SUBREG_REG (x);
485 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
487 if (!REG_P (inner))
488 continue;
490 regno = REGNO (inner);
491 if (HARD_REGISTER_NUM_P (regno))
493 iter.skip_subrtxes ();
494 continue;
497 outer_size = GET_MODE_SIZE (GET_MODE (x));
498 inner_size = GET_MODE_SIZE (GET_MODE (inner));
499 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
500 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
502 /* We only try to decompose single word subregs of multi-word
503 registers. When we find one, we return -1 to avoid iterating
504 over the inner register.
506 ??? This doesn't allow, e.g., DImode subregs of TImode values
507 on 32-bit targets. We would need to record the way the
508 pseudo-register was used, and only decompose if all the uses
509 were the same number and size of pieces. Hopefully this
510 doesn't happen much. */
512 if (outer_words == 1 && inner_words > 1)
514 bitmap_set_bit (decomposable_context, regno);
515 iter.skip_subrtxes ();
516 continue;
519 /* If this is a cast from one mode to another, where the modes
520 have the same size, and they are not tieable, then mark this
521 register as non-decomposable. If we decompose it we are
522 likely to mess up whatever the backend is trying to do. */
523 if (outer_words > 1
524 && outer_size == inner_size
525 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
527 bitmap_set_bit (non_decomposable_context, regno);
528 bitmap_set_bit (subreg_context, regno);
529 iter.skip_subrtxes ();
530 continue;
533 else if (REG_P (x))
535 unsigned int regno;
537 /* We will see an outer SUBREG before we see the inner REG, so
538 when we see a plain REG here it means a direct reference to
539 the register.
541 If this is not a simple copy from one location to another,
542 then we can not decompose this register. If this is a simple
543 copy we want to decompose, and the mode is right,
544 then we mark the register as decomposable.
545 Otherwise we don't say anything about this register --
546 it could be decomposed, but whether that would be
547 profitable depends upon how it is used elsewhere.
549 We only set bits in the bitmap for multi-word
550 pseudo-registers, since those are the only ones we care about
551 and it keeps the size of the bitmaps down. */
553 regno = REGNO (x);
554 if (!HARD_REGISTER_NUM_P (regno)
555 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
557 switch (*pcmi)
559 case NOT_SIMPLE_MOVE:
560 bitmap_set_bit (non_decomposable_context, regno);
561 break;
562 case DECOMPOSABLE_SIMPLE_MOVE:
563 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
564 bitmap_set_bit (decomposable_context, regno);
565 break;
566 case SIMPLE_MOVE:
567 break;
568 default:
569 gcc_unreachable ();
573 else if (MEM_P (x))
575 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
577 /* Any registers used in a MEM do not participate in a
578 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
579 here, and return -1 to block the parent's recursion. */
580 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
581 iter.skip_subrtxes ();
586 /* Decompose REGNO into word-sized components. We smash the REG node
587 in place. This ensures that (1) something goes wrong quickly if we
588 fail to make some replacement, and (2) the debug information inside
589 the symbol table is automatically kept up to date. */
591 static void
592 decompose_register (unsigned int regno)
594 rtx reg;
595 unsigned int words, i;
596 rtvec v;
598 reg = regno_reg_rtx[regno];
600 regno_reg_rtx[regno] = NULL_RTX;
602 words = GET_MODE_SIZE (GET_MODE (reg));
603 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
605 v = rtvec_alloc (words);
606 for (i = 0; i < words; ++i)
607 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
609 PUT_CODE (reg, CONCATN);
610 XVEC (reg, 0) = v;
612 if (dump_file)
614 fprintf (dump_file, "; Splitting reg %u ->", regno);
615 for (i = 0; i < words; ++i)
616 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
617 fputc ('\n', dump_file);
621 /* Get a SUBREG of a CONCATN. */
623 static rtx
624 simplify_subreg_concatn (machine_mode outermode, rtx op,
625 unsigned int byte)
627 unsigned int inner_size;
628 machine_mode innermode, partmode;
629 rtx part;
630 unsigned int final_offset;
632 gcc_assert (GET_CODE (op) == CONCATN);
633 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
635 innermode = GET_MODE (op);
636 gcc_assert (byte < GET_MODE_SIZE (innermode));
637 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
639 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
640 part = XVECEXP (op, 0, byte / inner_size);
641 partmode = GET_MODE (part);
643 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
644 regular CONST_VECTORs. They have vector or integer modes, depending
645 on the capabilities of the target. Cope with them. */
646 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
647 partmode = GET_MODE_INNER (innermode);
648 else if (partmode == VOIDmode)
650 enum mode_class mclass = GET_MODE_CLASS (innermode);
651 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
654 final_offset = byte % inner_size;
655 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
656 return NULL_RTX;
658 return simplify_gen_subreg (outermode, part, partmode, final_offset);
661 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
663 static rtx
664 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
665 machine_mode innermode, unsigned int byte)
667 rtx ret;
669 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
670 If OP is a SUBREG of a CONCATN, then it must be a simple mode
671 change with the same size and offset 0, or it must extract a
672 part. We shouldn't see anything else here. */
673 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
675 rtx op2;
677 if ((GET_MODE_SIZE (GET_MODE (op))
678 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
679 && SUBREG_BYTE (op) == 0)
680 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
681 GET_MODE (SUBREG_REG (op)), byte);
683 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
684 SUBREG_BYTE (op));
685 if (op2 == NULL_RTX)
687 /* We don't handle paradoxical subregs here. */
688 gcc_assert (GET_MODE_SIZE (outermode)
689 <= GET_MODE_SIZE (GET_MODE (op)));
690 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
691 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
692 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
693 byte + SUBREG_BYTE (op));
694 gcc_assert (op2 != NULL_RTX);
695 return op2;
698 op = op2;
699 gcc_assert (op != NULL_RTX);
700 gcc_assert (innermode == GET_MODE (op));
703 if (GET_CODE (op) == CONCATN)
704 return simplify_subreg_concatn (outermode, op, byte);
706 ret = simplify_gen_subreg (outermode, op, innermode, byte);
708 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
709 resolve_simple_move will ask for the high part of the paradoxical
710 subreg, which does not have a value. Just return a zero. */
711 if (ret == NULL_RTX
712 && GET_CODE (op) == SUBREG
713 && SUBREG_BYTE (op) == 0
714 && (GET_MODE_SIZE (innermode)
715 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
716 return CONST0_RTX (outermode);
718 gcc_assert (ret != NULL_RTX);
719 return ret;
722 /* Return whether we should resolve X into the registers into which it
723 was decomposed. */
725 static bool
726 resolve_reg_p (rtx x)
728 return GET_CODE (x) == CONCATN;
731 /* Return whether X is a SUBREG of a register which we need to
732 resolve. */
734 static bool
735 resolve_subreg_p (rtx x)
737 if (GET_CODE (x) != SUBREG)
738 return false;
739 return resolve_reg_p (SUBREG_REG (x));
742 /* Look for SUBREGs in *LOC which need to be decomposed. */
744 static bool
745 resolve_subreg_use (rtx *loc, rtx insn)
747 subrtx_ptr_iterator::array_type array;
748 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
750 rtx *loc = *iter;
751 rtx x = *loc;
752 if (resolve_subreg_p (x))
754 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
755 SUBREG_BYTE (x));
757 /* It is possible for a note to contain a reference which we can
758 decompose. In this case, return 1 to the caller to indicate
759 that the note must be removed. */
760 if (!x)
762 gcc_assert (!insn);
763 return true;
766 validate_change (insn, loc, x, 1);
767 iter.skip_subrtxes ();
769 else if (resolve_reg_p (x))
770 /* Return 1 to the caller to indicate that we found a direct
771 reference to a register which is being decomposed. This can
772 happen inside notes, multiword shift or zero-extend
773 instructions. */
774 return true;
777 return false;
780 /* Resolve any decomposed registers which appear in register notes on
781 INSN. */
783 static void
784 resolve_reg_notes (rtx_insn *insn)
786 rtx *pnote, note;
788 note = find_reg_equal_equiv_note (insn);
789 if (note)
791 int old_count = num_validated_changes ();
792 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
793 remove_note (insn, note);
794 else
795 if (old_count != num_validated_changes ())
796 df_notes_rescan (insn);
799 pnote = &REG_NOTES (insn);
800 while (*pnote != NULL_RTX)
802 bool del = false;
804 note = *pnote;
805 switch (REG_NOTE_KIND (note))
807 case REG_DEAD:
808 case REG_UNUSED:
809 if (resolve_reg_p (XEXP (note, 0)))
810 del = true;
811 break;
813 default:
814 break;
817 if (del)
818 *pnote = XEXP (note, 1);
819 else
820 pnote = &XEXP (note, 1);
824 /* Return whether X can be decomposed into subwords. */
826 static bool
827 can_decompose_p (rtx x)
829 if (REG_P (x))
831 unsigned int regno = REGNO (x);
833 if (HARD_REGISTER_NUM_P (regno))
835 unsigned int byte, num_bytes;
837 num_bytes = GET_MODE_SIZE (GET_MODE (x));
838 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
839 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
840 return false;
841 return true;
843 else
844 return !bitmap_bit_p (subreg_context, regno);
847 return true;
850 /* Decompose the registers used in a simple move SET within INSN. If
851 we don't change anything, return INSN, otherwise return the start
852 of the sequence of moves. */
854 static rtx_insn *
855 resolve_simple_move (rtx set, rtx_insn *insn)
857 rtx src, dest, real_dest;
858 rtx_insn *insns;
859 machine_mode orig_mode, dest_mode;
860 unsigned int words;
861 bool pushing;
863 src = SET_SRC (set);
864 dest = SET_DEST (set);
865 orig_mode = GET_MODE (dest);
867 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
868 gcc_assert (words > 1);
870 start_sequence ();
872 /* We have to handle copying from a SUBREG of a decomposed reg where
873 the SUBREG is larger than word size. Rather than assume that we
874 can take a word_mode SUBREG of the destination, we copy to a new
875 register and then copy that to the destination. */
877 real_dest = NULL_RTX;
879 if (GET_CODE (src) == SUBREG
880 && resolve_reg_p (SUBREG_REG (src))
881 && (SUBREG_BYTE (src) != 0
882 || (GET_MODE_SIZE (orig_mode)
883 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
885 real_dest = dest;
886 dest = gen_reg_rtx (orig_mode);
887 if (REG_P (real_dest))
888 REG_ATTRS (dest) = REG_ATTRS (real_dest);
891 /* Similarly if we are copying to a SUBREG of a decomposed reg where
892 the SUBREG is larger than word size. */
894 if (GET_CODE (dest) == SUBREG
895 && resolve_reg_p (SUBREG_REG (dest))
896 && (SUBREG_BYTE (dest) != 0
897 || (GET_MODE_SIZE (orig_mode)
898 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
900 rtx reg, smove;
901 rtx_insn *minsn;
903 reg = gen_reg_rtx (orig_mode);
904 minsn = emit_move_insn (reg, src);
905 smove = single_set (minsn);
906 gcc_assert (smove != NULL_RTX);
907 resolve_simple_move (smove, minsn);
908 src = reg;
911 /* If we didn't have any big SUBREGS of decomposed registers, and
912 neither side of the move is a register we are decomposing, then
913 we don't have to do anything here. */
915 if (src == SET_SRC (set)
916 && dest == SET_DEST (set)
917 && !resolve_reg_p (src)
918 && !resolve_subreg_p (src)
919 && !resolve_reg_p (dest)
920 && !resolve_subreg_p (dest))
922 end_sequence ();
923 return insn;
926 /* It's possible for the code to use a subreg of a decomposed
927 register while forming an address. We need to handle that before
928 passing the address to emit_move_insn. We pass NULL_RTX as the
929 insn parameter to resolve_subreg_use because we can not validate
930 the insn yet. */
931 if (MEM_P (src) || MEM_P (dest))
933 int acg;
935 if (MEM_P (src))
936 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
937 if (MEM_P (dest))
938 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
939 acg = apply_change_group ();
940 gcc_assert (acg);
943 /* If SRC is a register which we can't decompose, or has side
944 effects, we need to move via a temporary register. */
946 if (!can_decompose_p (src)
947 || side_effects_p (src)
948 || GET_CODE (src) == ASM_OPERANDS)
950 rtx reg;
952 reg = gen_reg_rtx (orig_mode);
954 #ifdef AUTO_INC_DEC
956 rtx move = emit_move_insn (reg, src);
957 if (MEM_P (src))
959 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
960 if (note)
961 add_reg_note (move, REG_INC, XEXP (note, 0));
964 #else
965 emit_move_insn (reg, src);
966 #endif
967 src = reg;
970 /* If DEST is a register which we can't decompose, or has side
971 effects, we need to first move to a temporary register. We
972 handle the common case of pushing an operand directly. We also
973 go through a temporary register if it holds a floating point
974 value. This gives us better code on systems which can't move
975 data easily between integer and floating point registers. */
977 dest_mode = orig_mode;
978 pushing = push_operand (dest, dest_mode);
979 if (!can_decompose_p (dest)
980 || (side_effects_p (dest) && !pushing)
981 || (!SCALAR_INT_MODE_P (dest_mode)
982 && !resolve_reg_p (dest)
983 && !resolve_subreg_p (dest)))
985 if (real_dest == NULL_RTX)
986 real_dest = dest;
987 if (!SCALAR_INT_MODE_P (dest_mode))
989 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
990 MODE_INT, 0);
991 gcc_assert (dest_mode != BLKmode);
993 dest = gen_reg_rtx (dest_mode);
994 if (REG_P (real_dest))
995 REG_ATTRS (dest) = REG_ATTRS (real_dest);
998 if (pushing)
1000 unsigned int i, j, jinc;
1002 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1003 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1004 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1006 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1008 j = 0;
1009 jinc = 1;
1011 else
1013 j = words - 1;
1014 jinc = -1;
1017 for (i = 0; i < words; ++i, j += jinc)
1019 rtx temp;
1021 temp = copy_rtx (XEXP (dest, 0));
1022 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1023 j * UNITS_PER_WORD);
1024 emit_move_insn (temp,
1025 simplify_gen_subreg_concatn (word_mode, src,
1026 orig_mode,
1027 j * UNITS_PER_WORD));
1030 else
1032 unsigned int i;
1034 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1035 emit_clobber (dest);
1037 for (i = 0; i < words; ++i)
1038 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1039 dest_mode,
1040 i * UNITS_PER_WORD),
1041 simplify_gen_subreg_concatn (word_mode, src,
1042 orig_mode,
1043 i * UNITS_PER_WORD));
1046 if (real_dest != NULL_RTX)
1048 rtx mdest, smove;
1049 rtx_insn *minsn;
1051 if (dest_mode == orig_mode)
1052 mdest = dest;
1053 else
1054 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1055 minsn = emit_move_insn (real_dest, mdest);
1057 #ifdef AUTO_INC_DEC
1058 if (MEM_P (real_dest)
1059 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1061 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1062 if (note)
1063 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1065 #endif
1067 smove = single_set (minsn);
1068 gcc_assert (smove != NULL_RTX);
1070 resolve_simple_move (smove, minsn);
1073 insns = get_insns ();
1074 end_sequence ();
1076 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1078 emit_insn_before (insns, insn);
1080 /* If we get here via self-recursion, then INSN is not yet in the insns
1081 chain and delete_insn will fail. We only want to remove INSN from the
1082 current sequence. See PR56738. */
1083 if (in_sequence_p ())
1084 remove_insn (insn);
1085 else
1086 delete_insn (insn);
1088 return insns;
1091 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1092 component registers. Return whether we changed something. */
1094 static bool
1095 resolve_clobber (rtx pat, rtx_insn *insn)
1097 rtx reg;
1098 machine_mode orig_mode;
1099 unsigned int words, i;
1100 int ret;
1102 reg = XEXP (pat, 0);
1103 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1104 return false;
1106 orig_mode = GET_MODE (reg);
1107 words = GET_MODE_SIZE (orig_mode);
1108 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1110 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1111 simplify_gen_subreg_concatn (word_mode, reg,
1112 orig_mode, 0),
1114 df_insn_rescan (insn);
1115 gcc_assert (ret != 0);
1117 for (i = words - 1; i > 0; --i)
1119 rtx x;
1121 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1122 i * UNITS_PER_WORD);
1123 x = gen_rtx_CLOBBER (VOIDmode, x);
1124 emit_insn_after (x, insn);
1127 resolve_reg_notes (insn);
1129 return true;
1132 /* A USE of a decomposed register is no longer meaningful. Return
1133 whether we changed something. */
1135 static bool
1136 resolve_use (rtx pat, rtx_insn *insn)
1138 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1140 delete_insn (insn);
1141 return true;
1144 resolve_reg_notes (insn);
1146 return false;
1149 /* A VAR_LOCATION can be simplified. */
1151 static void
1152 resolve_debug (rtx_insn *insn)
1154 subrtx_ptr_iterator::array_type array;
1155 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1157 rtx *loc = *iter;
1158 rtx x = *loc;
1159 if (resolve_subreg_p (x))
1161 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1162 SUBREG_BYTE (x));
1164 if (x)
1165 *loc = x;
1166 else
1167 x = copy_rtx (*loc);
1169 if (resolve_reg_p (x))
1170 *loc = copy_rtx (x);
1173 df_insn_rescan (insn);
1175 resolve_reg_notes (insn);
1178 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1179 set the decomposable_context bitmap accordingly. SPEED_P is true
1180 if we are optimizing INSN for speed rather than size. Return true
1181 if INSN is decomposable. */
1183 static bool
1184 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1186 rtx set;
1187 rtx op;
1188 rtx op_operand;
1190 set = single_set (insn);
1191 if (!set)
1192 return false;
1194 op = SET_SRC (set);
1195 if (GET_CODE (op) != ASHIFT
1196 && GET_CODE (op) != LSHIFTRT
1197 && GET_CODE (op) != ASHIFTRT
1198 && GET_CODE (op) != ZERO_EXTEND)
1199 return false;
1201 op_operand = XEXP (op, 0);
1202 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1203 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1204 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1205 || GET_MODE (op) != twice_word_mode)
1206 return false;
1208 if (GET_CODE (op) == ZERO_EXTEND)
1210 if (GET_MODE (op_operand) != word_mode
1211 || !choices[speed_p].splitting_zext)
1212 return false;
1214 else /* left or right shift */
1216 bool *splitting = (GET_CODE (op) == ASHIFT
1217 ? choices[speed_p].splitting_ashift
1218 : GET_CODE (op) == ASHIFTRT
1219 ? choices[speed_p].splitting_ashiftrt
1220 : choices[speed_p].splitting_lshiftrt);
1221 if (!CONST_INT_P (XEXP (op, 1))
1222 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1223 2 * BITS_PER_WORD - 1)
1224 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1225 return false;
1227 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1230 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1232 return true;
1235 /* Decompose a more than word wide shift (in INSN) of a multiword
1236 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1237 and 'set to zero' insn. Return a pointer to the new insn when a
1238 replacement was done. */
1240 static rtx_insn *
1241 resolve_shift_zext (rtx_insn *insn)
1243 rtx set;
1244 rtx op;
1245 rtx op_operand;
1246 rtx_insn *insns;
1247 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1248 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1250 set = single_set (insn);
1251 if (!set)
1252 return NULL;
1254 op = SET_SRC (set);
1255 if (GET_CODE (op) != ASHIFT
1256 && GET_CODE (op) != LSHIFTRT
1257 && GET_CODE (op) != ASHIFTRT
1258 && GET_CODE (op) != ZERO_EXTEND)
1259 return NULL;
1261 op_operand = XEXP (op, 0);
1263 /* We can tear this operation apart only if the regs were already
1264 torn apart. */
1265 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1266 return NULL;
1268 /* src_reg_num is the number of the word mode register which we
1269 are operating on. For a left shift and a zero_extend on little
1270 endian machines this is register 0. */
1271 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1272 ? 1 : 0;
1274 if (WORDS_BIG_ENDIAN
1275 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1276 src_reg_num = 1 - src_reg_num;
1278 if (GET_CODE (op) == ZERO_EXTEND)
1279 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1280 else
1281 dest_reg_num = 1 - src_reg_num;
1283 offset1 = UNITS_PER_WORD * dest_reg_num;
1284 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1285 src_offset = UNITS_PER_WORD * src_reg_num;
1287 start_sequence ();
1289 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1290 GET_MODE (SET_DEST (set)),
1291 offset1);
1292 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1293 GET_MODE (SET_DEST (set)),
1294 offset2);
1295 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1296 GET_MODE (op_operand),
1297 src_offset);
1298 if (GET_CODE (op) == ASHIFTRT
1299 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1300 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1301 BITS_PER_WORD - 1, NULL_RTX, 0);
1303 if (GET_CODE (op) != ZERO_EXTEND)
1305 int shift_count = INTVAL (XEXP (op, 1));
1306 if (shift_count > BITS_PER_WORD)
1307 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1308 LSHIFT_EXPR : RSHIFT_EXPR,
1309 word_mode, src_reg,
1310 shift_count - BITS_PER_WORD,
1311 dest_reg, GET_CODE (op) != ASHIFTRT);
1314 if (dest_reg != src_reg)
1315 emit_move_insn (dest_reg, src_reg);
1316 if (GET_CODE (op) != ASHIFTRT)
1317 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1318 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1319 emit_move_insn (dest_upper, copy_rtx (src_reg));
1320 else
1321 emit_move_insn (dest_upper, upper_src);
1322 insns = get_insns ();
1324 end_sequence ();
1326 emit_insn_before (insns, insn);
1328 if (dump_file)
1330 rtx_insn *in;
1331 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1332 for (in = insns; in != insn; in = NEXT_INSN (in))
1333 fprintf (dump_file, "%d ", INSN_UID (in));
1334 fprintf (dump_file, "\n");
1337 delete_insn (insn);
1338 return insns;
1341 /* Print to dump_file a description of what we're doing with shift code CODE.
1342 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1344 static void
1345 dump_shift_choices (enum rtx_code code, bool *splitting)
1347 int i;
1348 const char *sep;
1350 fprintf (dump_file,
1351 " Splitting mode %s for %s lowering with shift amounts = ",
1352 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1353 sep = "";
1354 for (i = 0; i < BITS_PER_WORD; i++)
1355 if (splitting[i])
1357 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1358 sep = ",";
1360 fprintf (dump_file, "\n");
1363 /* Print to dump_file a description of what we're doing when optimizing
1364 for speed or size; SPEED_P says which. DESCRIPTION is a description
1365 of the SPEED_P choice. */
1367 static void
1368 dump_choices (bool speed_p, const char *description)
1370 unsigned int i;
1372 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1374 for (i = 0; i < MAX_MACHINE_MODE; i++)
1375 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1376 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1377 choices[speed_p].move_modes_to_split[i]
1378 ? "Splitting"
1379 : "Skipping",
1380 GET_MODE_NAME ((machine_mode) i));
1382 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1383 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1384 GET_MODE_NAME (twice_word_mode));
1386 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1387 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1388 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1389 fprintf (dump_file, "\n");
1392 /* Look for registers which are always accessed via word-sized SUBREGs
1393 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1394 registers into several word-sized pseudo-registers. */
1396 static void
1397 decompose_multiword_subregs (bool decompose_copies)
1399 unsigned int max;
1400 basic_block bb;
1401 bool speed_p;
1403 if (dump_file)
1405 dump_choices (false, "size");
1406 dump_choices (true, "speed");
1409 /* Check if this target even has any modes to consider lowering. */
1410 if (!choices[false].something_to_do && !choices[true].something_to_do)
1412 if (dump_file)
1413 fprintf (dump_file, "Nothing to do!\n");
1414 return;
1417 max = max_reg_num ();
1419 /* First see if there are any multi-word pseudo-registers. If there
1420 aren't, there is nothing we can do. This should speed up this
1421 pass in the normal case, since it should be faster than scanning
1422 all the insns. */
1424 unsigned int i;
1425 bool useful_modes_seen = false;
1427 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1428 if (regno_reg_rtx[i] != NULL)
1430 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1431 if (choices[false].move_modes_to_split[(int) mode]
1432 || choices[true].move_modes_to_split[(int) mode])
1434 useful_modes_seen = true;
1435 break;
1439 if (!useful_modes_seen)
1441 if (dump_file)
1442 fprintf (dump_file, "Nothing to lower in this function.\n");
1443 return;
1447 if (df)
1449 df_set_flags (DF_DEFER_INSN_RESCAN);
1450 run_word_dce ();
1453 /* FIXME: It may be possible to change this code to look for each
1454 multi-word pseudo-register and to find each insn which sets or
1455 uses that register. That should be faster than scanning all the
1456 insns. */
1458 decomposable_context = BITMAP_ALLOC (NULL);
1459 non_decomposable_context = BITMAP_ALLOC (NULL);
1460 subreg_context = BITMAP_ALLOC (NULL);
1462 reg_copy_graph.create (max);
1463 reg_copy_graph.safe_grow_cleared (max);
1464 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1466 speed_p = optimize_function_for_speed_p (cfun);
1467 FOR_EACH_BB_FN (bb, cfun)
1469 rtx_insn *insn;
1471 FOR_BB_INSNS (bb, insn)
1473 rtx set;
1474 enum classify_move_insn cmi;
1475 int i, n;
1477 if (!INSN_P (insn)
1478 || GET_CODE (PATTERN (insn)) == CLOBBER
1479 || GET_CODE (PATTERN (insn)) == USE)
1480 continue;
1482 recog_memoized (insn);
1484 if (find_decomposable_shift_zext (insn, speed_p))
1485 continue;
1487 extract_insn (insn);
1489 set = simple_move (insn, speed_p);
1491 if (!set)
1492 cmi = NOT_SIMPLE_MOVE;
1493 else
1495 /* We mark pseudo-to-pseudo copies as decomposable during the
1496 second pass only. The first pass is so early that there is
1497 good chance such moves will be optimized away completely by
1498 subsequent optimizations anyway.
1500 However, we call find_pseudo_copy even during the first pass
1501 so as to properly set up the reg_copy_graph. */
1502 if (find_pseudo_copy (set))
1503 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1504 else
1505 cmi = SIMPLE_MOVE;
1508 n = recog_data.n_operands;
1509 for (i = 0; i < n; ++i)
1511 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1513 /* We handle ASM_OPERANDS as a special case to support
1514 things like x86 rdtsc which returns a DImode value.
1515 We can decompose the output, which will certainly be
1516 operand 0, but not the inputs. */
1518 if (cmi == SIMPLE_MOVE
1519 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1521 gcc_assert (i == 0);
1522 cmi = NOT_SIMPLE_MOVE;
1528 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1529 if (!bitmap_empty_p (decomposable_context))
1531 sbitmap sub_blocks;
1532 unsigned int i;
1533 sbitmap_iterator sbi;
1534 bitmap_iterator iter;
1535 unsigned int regno;
1537 propagate_pseudo_copies ();
1539 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1540 bitmap_clear (sub_blocks);
1542 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1543 decompose_register (regno);
1545 FOR_EACH_BB_FN (bb, cfun)
1547 rtx_insn *insn;
1549 FOR_BB_INSNS (bb, insn)
1551 rtx pat;
1553 if (!INSN_P (insn))
1554 continue;
1556 pat = PATTERN (insn);
1557 if (GET_CODE (pat) == CLOBBER)
1558 resolve_clobber (pat, insn);
1559 else if (GET_CODE (pat) == USE)
1560 resolve_use (pat, insn);
1561 else if (DEBUG_INSN_P (insn))
1562 resolve_debug (insn);
1563 else
1565 rtx set;
1566 int i;
1568 recog_memoized (insn);
1569 extract_insn (insn);
1571 set = simple_move (insn, speed_p);
1572 if (set)
1574 rtx_insn *orig_insn = insn;
1575 bool cfi = control_flow_insn_p (insn);
1577 /* We can end up splitting loads to multi-word pseudos
1578 into separate loads to machine word size pseudos.
1579 When this happens, we first had one load that can
1580 throw, and after resolve_simple_move we'll have a
1581 bunch of loads (at least two). All those loads may
1582 trap if we can have non-call exceptions, so they
1583 all will end the current basic block. We split the
1584 block after the outer loop over all insns, but we
1585 make sure here that we will be able to split the
1586 basic block and still produce the correct control
1587 flow graph for it. */
1588 gcc_assert (!cfi
1589 || (cfun->can_throw_non_call_exceptions
1590 && can_throw_internal (insn)));
1592 insn = resolve_simple_move (set, insn);
1593 if (insn != orig_insn)
1595 recog_memoized (insn);
1596 extract_insn (insn);
1598 if (cfi)
1599 bitmap_set_bit (sub_blocks, bb->index);
1602 else
1604 rtx_insn *decomposed_shift;
1606 decomposed_shift = resolve_shift_zext (insn);
1607 if (decomposed_shift != NULL_RTX)
1609 insn = decomposed_shift;
1610 recog_memoized (insn);
1611 extract_insn (insn);
1615 for (i = recog_data.n_operands - 1; i >= 0; --i)
1616 resolve_subreg_use (recog_data.operand_loc[i], insn);
1618 resolve_reg_notes (insn);
1620 if (num_validated_changes () > 0)
1622 for (i = recog_data.n_dups - 1; i >= 0; --i)
1624 rtx *pl = recog_data.dup_loc[i];
1625 int dup_num = recog_data.dup_num[i];
1626 rtx *px = recog_data.operand_loc[dup_num];
1628 validate_unshare_change (insn, pl, *px, 1);
1631 i = apply_change_group ();
1632 gcc_assert (i);
1638 /* If we had insns to split that caused control flow insns in the middle
1639 of a basic block, split those blocks now. Note that we only handle
1640 the case where splitting a load has caused multiple possibly trapping
1641 loads to appear. */
1642 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1644 rtx_insn *insn, *end;
1645 edge fallthru;
1647 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1648 insn = BB_HEAD (bb);
1649 end = BB_END (bb);
1651 while (insn != end)
1653 if (control_flow_insn_p (insn))
1655 /* Split the block after insn. There will be a fallthru
1656 edge, which is OK so we keep it. We have to create the
1657 exception edges ourselves. */
1658 fallthru = split_block (bb, insn);
1659 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1660 bb = fallthru->dest;
1661 insn = BB_HEAD (bb);
1663 else
1664 insn = NEXT_INSN (insn);
1668 sbitmap_free (sub_blocks);
1672 unsigned int i;
1673 bitmap b;
1675 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1676 if (b)
1677 BITMAP_FREE (b);
1680 reg_copy_graph.release ();
1682 BITMAP_FREE (decomposable_context);
1683 BITMAP_FREE (non_decomposable_context);
1684 BITMAP_FREE (subreg_context);
1687 /* Implement first lower subreg pass. */
1689 namespace {
1691 const pass_data pass_data_lower_subreg =
1693 RTL_PASS, /* type */
1694 "subreg1", /* name */
1695 OPTGROUP_NONE, /* optinfo_flags */
1696 TV_LOWER_SUBREG, /* tv_id */
1697 0, /* properties_required */
1698 0, /* properties_provided */
1699 0, /* properties_destroyed */
1700 0, /* todo_flags_start */
1701 0, /* todo_flags_finish */
1704 class pass_lower_subreg : public rtl_opt_pass
1706 public:
1707 pass_lower_subreg (gcc::context *ctxt)
1708 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1711 /* opt_pass methods: */
1712 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1713 virtual unsigned int execute (function *)
1715 decompose_multiword_subregs (false);
1716 return 0;
1719 }; // class pass_lower_subreg
1721 } // anon namespace
1723 rtl_opt_pass *
1724 make_pass_lower_subreg (gcc::context *ctxt)
1726 return new pass_lower_subreg (ctxt);
1729 /* Implement second lower subreg pass. */
1731 namespace {
1733 const pass_data pass_data_lower_subreg2 =
1735 RTL_PASS, /* type */
1736 "subreg2", /* name */
1737 OPTGROUP_NONE, /* optinfo_flags */
1738 TV_LOWER_SUBREG, /* tv_id */
1739 0, /* properties_required */
1740 0, /* properties_provided */
1741 0, /* properties_destroyed */
1742 0, /* todo_flags_start */
1743 TODO_df_finish, /* todo_flags_finish */
1746 class pass_lower_subreg2 : public rtl_opt_pass
1748 public:
1749 pass_lower_subreg2 (gcc::context *ctxt)
1750 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1753 /* opt_pass methods: */
1754 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1755 virtual unsigned int execute (function *)
1757 decompose_multiword_subregs (true);
1758 return 0;
1761 }; // class pass_lower_subreg2
1763 } // anon namespace
1765 rtl_opt_pass *
1766 make_pass_lower_subreg2 (gcc::context *ctxt)
1768 return new pass_lower_subreg2 (ctxt);