Update ChangeLog and version files for release
[official-gcc.git] / gcc / lower-subreg.c
blobf7b3ac4170ec28e60ba3bcfb26b791277326bd25
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2016 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "tm_p.h"
31 #include "expmed.h"
32 #include "insn-config.h"
33 #include "emit-rtl.h"
34 #include "recog.h"
35 #include "cfgrtl.h"
36 #include "cfgbuild.h"
37 #include "dce.h"
38 #include "expr.h"
39 #include "tree-pass.h"
40 #include "lower-subreg.h"
41 #include "rtl-iter.h"
44 /* Decompose multi-word pseudo-registers into individual
45 pseudo-registers when possible and profitable. This is possible
46 when all the uses of a multi-word register are via SUBREG, or are
47 copies of the register to another location. Breaking apart the
48 register permits more CSE and permits better register allocation.
49 This is profitable if the machine does not have move instructions
50 to do this.
52 This pass only splits moves with modes that are wider than
53 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
54 integer modes that are twice the width of word_mode. The latter
55 could be generalized if there was a need to do this, but the trend in
56 architectures is to not need this.
58 There are two useful preprocessor defines for use by maintainers:
60 #define LOG_COSTS 1
62 if you wish to see the actual cost estimates that are being used
63 for each mode wider than word mode and the cost estimates for zero
64 extension and the shifts. This can be useful when port maintainers
65 are tuning insn rtx costs.
67 #define FORCE_LOWERING 1
69 if you wish to test the pass with all the transformation forced on.
70 This can be useful for finding bugs in the transformations. */
72 #define LOG_COSTS 0
73 #define FORCE_LOWERING 0
75 /* Bit N in this bitmap is set if regno N is used in a context in
76 which we can decompose it. */
77 static bitmap decomposable_context;
79 /* Bit N in this bitmap is set if regno N is used in a context in
80 which it can not be decomposed. */
81 static bitmap non_decomposable_context;
83 /* Bit N in this bitmap is set if regno N is used in a subreg
84 which changes the mode but not the size. This typically happens
85 when the register accessed as a floating-point value; we want to
86 avoid generating accesses to its subwords in integer modes. */
87 static bitmap subreg_context;
89 /* Bit N in the bitmap in element M of this array is set if there is a
90 copy from reg M to reg N. */
91 static vec<bitmap> reg_copy_graph;
93 struct target_lower_subreg default_target_lower_subreg;
94 #if SWITCHABLE_TARGET
95 struct target_lower_subreg *this_target_lower_subreg
96 = &default_target_lower_subreg;
97 #endif
99 #define twice_word_mode \
100 this_target_lower_subreg->x_twice_word_mode
101 #define choices \
102 this_target_lower_subreg->x_choices
104 /* RTXes used while computing costs. */
105 struct cost_rtxes {
106 /* Source and target registers. */
107 rtx source;
108 rtx target;
110 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
111 rtx zext;
113 /* A shift of SOURCE. */
114 rtx shift;
116 /* A SET of TARGET. */
117 rtx set;
120 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
121 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
123 static int
124 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
125 machine_mode mode, int op1)
127 PUT_CODE (rtxes->shift, code);
128 PUT_MODE (rtxes->shift, mode);
129 PUT_MODE (rtxes->source, mode);
130 XEXP (rtxes->shift, 1) = GEN_INT (op1);
131 return set_src_cost (rtxes->shift, mode, speed_p);
134 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
135 to true if it is profitable to split a double-word CODE shift
136 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
137 for speed or size profitability.
139 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
140 the cost of moving zero into a word-mode register. WORD_MOVE_COST
141 is the cost of moving between word registers. */
143 static void
144 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
145 bool *splitting, enum rtx_code code,
146 int word_move_zero_cost, int word_move_cost)
148 int wide_cost, narrow_cost, upper_cost, i;
150 for (i = 0; i < BITS_PER_WORD; i++)
152 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
153 i + BITS_PER_WORD);
154 if (i == 0)
155 narrow_cost = word_move_cost;
156 else
157 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
159 if (code != ASHIFTRT)
160 upper_cost = word_move_zero_cost;
161 else if (i == BITS_PER_WORD - 1)
162 upper_cost = word_move_cost;
163 else
164 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
165 BITS_PER_WORD - 1);
167 if (LOG_COSTS)
168 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
169 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
170 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
172 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
173 splitting[i] = true;
177 /* Compute what we should do when optimizing for speed or size; SPEED_P
178 selects which. Use RTXES for computing costs. */
180 static void
181 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
183 unsigned int i;
184 int word_move_zero_cost, word_move_cost;
186 PUT_MODE (rtxes->target, word_mode);
187 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
188 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
190 SET_SRC (rtxes->set) = rtxes->source;
191 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
193 if (LOG_COSTS)
194 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
195 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
197 for (i = 0; i < MAX_MACHINE_MODE; i++)
199 machine_mode mode = (machine_mode) i;
200 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
201 if (factor > 1)
203 int mode_move_cost;
205 PUT_MODE (rtxes->target, mode);
206 PUT_MODE (rtxes->source, mode);
207 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
209 if (LOG_COSTS)
210 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
211 GET_MODE_NAME (mode), mode_move_cost,
212 word_move_cost, factor);
214 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
216 choices[speed_p].move_modes_to_split[i] = true;
217 choices[speed_p].something_to_do = true;
222 /* For the moves and shifts, the only case that is checked is one
223 where the mode of the target is an integer mode twice the width
224 of the word_mode.
226 If it is not profitable to split a double word move then do not
227 even consider the shifts or the zero extension. */
228 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
230 int zext_cost;
232 /* The only case here to check to see if moving the upper part with a
233 zero is cheaper than doing the zext itself. */
234 PUT_MODE (rtxes->source, word_mode);
235 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
237 if (LOG_COSTS)
238 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
239 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
240 zext_cost, word_move_cost, word_move_zero_cost);
242 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
243 choices[speed_p].splitting_zext = true;
245 compute_splitting_shift (speed_p, rtxes,
246 choices[speed_p].splitting_ashift, ASHIFT,
247 word_move_zero_cost, word_move_cost);
248 compute_splitting_shift (speed_p, rtxes,
249 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
250 word_move_zero_cost, word_move_cost);
251 compute_splitting_shift (speed_p, rtxes,
252 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
253 word_move_zero_cost, word_move_cost);
257 /* Do one-per-target initialisation. This involves determining
258 which operations on the machine are profitable. If none are found,
259 then the pass just returns when called. */
261 void
262 init_lower_subreg (void)
264 struct cost_rtxes rtxes;
266 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
268 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
270 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
271 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
272 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
273 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
274 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
276 if (LOG_COSTS)
277 fprintf (stderr, "\nSize costs\n==========\n\n");
278 compute_costs (false, &rtxes);
280 if (LOG_COSTS)
281 fprintf (stderr, "\nSpeed costs\n===========\n\n");
282 compute_costs (true, &rtxes);
285 static bool
286 simple_move_operand (rtx x)
288 if (GET_CODE (x) == SUBREG)
289 x = SUBREG_REG (x);
291 if (!OBJECT_P (x))
292 return false;
294 if (GET_CODE (x) == LABEL_REF
295 || GET_CODE (x) == SYMBOL_REF
296 || GET_CODE (x) == HIGH
297 || GET_CODE (x) == CONST)
298 return false;
300 if (MEM_P (x)
301 && (MEM_VOLATILE_P (x)
302 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
303 return false;
305 return true;
308 /* If INSN is a single set between two objects that we want to split,
309 return the single set. SPEED_P says whether we are optimizing
310 INSN for speed or size.
312 INSN should have been passed to recog and extract_insn before this
313 is called. */
315 static rtx
316 simple_move (rtx_insn *insn, bool speed_p)
318 rtx x;
319 rtx set;
320 machine_mode mode;
322 if (recog_data.n_operands != 2)
323 return NULL_RTX;
325 set = single_set (insn);
326 if (!set)
327 return NULL_RTX;
329 x = SET_DEST (set);
330 if (x != recog_data.operand[0] && x != recog_data.operand[1])
331 return NULL_RTX;
332 if (!simple_move_operand (x))
333 return NULL_RTX;
335 x = SET_SRC (set);
336 if (x != recog_data.operand[0] && x != recog_data.operand[1])
337 return NULL_RTX;
338 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
339 things like x86 rdtsc which returns a DImode value. */
340 if (GET_CODE (x) != ASM_OPERANDS
341 && !simple_move_operand (x))
342 return NULL_RTX;
344 /* We try to decompose in integer modes, to avoid generating
345 inefficient code copying between integer and floating point
346 registers. That means that we can't decompose if this is a
347 non-integer mode for which there is no integer mode of the same
348 size. */
349 mode = GET_MODE (SET_DEST (set));
350 if (!SCALAR_INT_MODE_P (mode)
351 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
352 == BLKmode))
353 return NULL_RTX;
355 /* Reject PARTIAL_INT modes. They are used for processor specific
356 purposes and it's probably best not to tamper with them. */
357 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
358 return NULL_RTX;
360 if (!choices[speed_p].move_modes_to_split[(int) mode])
361 return NULL_RTX;
363 return set;
366 /* If SET is a copy from one multi-word pseudo-register to another,
367 record that in reg_copy_graph. Return whether it is such a
368 copy. */
370 static bool
371 find_pseudo_copy (rtx set)
373 rtx dest = SET_DEST (set);
374 rtx src = SET_SRC (set);
375 unsigned int rd, rs;
376 bitmap b;
378 if (!REG_P (dest) || !REG_P (src))
379 return false;
381 rd = REGNO (dest);
382 rs = REGNO (src);
383 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
384 return false;
386 b = reg_copy_graph[rs];
387 if (b == NULL)
389 b = BITMAP_ALLOC (NULL);
390 reg_copy_graph[rs] = b;
393 bitmap_set_bit (b, rd);
395 return true;
398 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
399 where they are copied to another register, add the register to
400 which they are copied to DECOMPOSABLE_CONTEXT. Use
401 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
402 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
404 static void
405 propagate_pseudo_copies (void)
407 bitmap queue, propagate;
409 queue = BITMAP_ALLOC (NULL);
410 propagate = BITMAP_ALLOC (NULL);
412 bitmap_copy (queue, decomposable_context);
415 bitmap_iterator iter;
416 unsigned int i;
418 bitmap_clear (propagate);
420 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
422 bitmap b = reg_copy_graph[i];
423 if (b)
424 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
427 bitmap_and_compl (queue, propagate, decomposable_context);
428 bitmap_ior_into (decomposable_context, propagate);
430 while (!bitmap_empty_p (queue));
432 BITMAP_FREE (queue);
433 BITMAP_FREE (propagate);
436 /* A pointer to one of these values is passed to
437 find_decomposable_subregs. */
439 enum classify_move_insn
441 /* Not a simple move from one location to another. */
442 NOT_SIMPLE_MOVE,
443 /* A simple move we want to decompose. */
444 DECOMPOSABLE_SIMPLE_MOVE,
445 /* Any other simple move. */
446 SIMPLE_MOVE
449 /* If we find a SUBREG in *LOC which we could use to decompose a
450 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
451 unadorned register which is not a simple pseudo-register copy,
452 DATA will point at the type of move, and we set a bit in
453 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
455 static void
456 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
458 subrtx_var_iterator::array_type array;
459 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
461 rtx x = *iter;
462 if (GET_CODE (x) == SUBREG)
464 rtx inner = SUBREG_REG (x);
465 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
467 if (!REG_P (inner))
468 continue;
470 regno = REGNO (inner);
471 if (HARD_REGISTER_NUM_P (regno))
473 iter.skip_subrtxes ();
474 continue;
477 outer_size = GET_MODE_SIZE (GET_MODE (x));
478 inner_size = GET_MODE_SIZE (GET_MODE (inner));
479 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
480 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
482 /* We only try to decompose single word subregs of multi-word
483 registers. When we find one, we return -1 to avoid iterating
484 over the inner register.
486 ??? This doesn't allow, e.g., DImode subregs of TImode values
487 on 32-bit targets. We would need to record the way the
488 pseudo-register was used, and only decompose if all the uses
489 were the same number and size of pieces. Hopefully this
490 doesn't happen much. */
492 if (outer_words == 1 && inner_words > 1)
494 bitmap_set_bit (decomposable_context, regno);
495 iter.skip_subrtxes ();
496 continue;
499 /* If this is a cast from one mode to another, where the modes
500 have the same size, and they are not tieable, then mark this
501 register as non-decomposable. If we decompose it we are
502 likely to mess up whatever the backend is trying to do. */
503 if (outer_words > 1
504 && outer_size == inner_size
505 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
507 bitmap_set_bit (non_decomposable_context, regno);
508 bitmap_set_bit (subreg_context, regno);
509 iter.skip_subrtxes ();
510 continue;
513 else if (REG_P (x))
515 unsigned int regno;
517 /* We will see an outer SUBREG before we see the inner REG, so
518 when we see a plain REG here it means a direct reference to
519 the register.
521 If this is not a simple copy from one location to another,
522 then we can not decompose this register. If this is a simple
523 copy we want to decompose, and the mode is right,
524 then we mark the register as decomposable.
525 Otherwise we don't say anything about this register --
526 it could be decomposed, but whether that would be
527 profitable depends upon how it is used elsewhere.
529 We only set bits in the bitmap for multi-word
530 pseudo-registers, since those are the only ones we care about
531 and it keeps the size of the bitmaps down. */
533 regno = REGNO (x);
534 if (!HARD_REGISTER_NUM_P (regno)
535 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
537 switch (*pcmi)
539 case NOT_SIMPLE_MOVE:
540 bitmap_set_bit (non_decomposable_context, regno);
541 break;
542 case DECOMPOSABLE_SIMPLE_MOVE:
543 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
544 bitmap_set_bit (decomposable_context, regno);
545 break;
546 case SIMPLE_MOVE:
547 break;
548 default:
549 gcc_unreachable ();
553 else if (MEM_P (x))
555 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
557 /* Any registers used in a MEM do not participate in a
558 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
559 here, and return -1 to block the parent's recursion. */
560 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
561 iter.skip_subrtxes ();
566 /* Decompose REGNO into word-sized components. We smash the REG node
567 in place. This ensures that (1) something goes wrong quickly if we
568 fail to make some replacement, and (2) the debug information inside
569 the symbol table is automatically kept up to date. */
571 static void
572 decompose_register (unsigned int regno)
574 rtx reg;
575 unsigned int words, i;
576 rtvec v;
578 reg = regno_reg_rtx[regno];
580 regno_reg_rtx[regno] = NULL_RTX;
582 words = GET_MODE_SIZE (GET_MODE (reg));
583 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
585 v = rtvec_alloc (words);
586 for (i = 0; i < words; ++i)
587 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
589 PUT_CODE (reg, CONCATN);
590 XVEC (reg, 0) = v;
592 if (dump_file)
594 fprintf (dump_file, "; Splitting reg %u ->", regno);
595 for (i = 0; i < words; ++i)
596 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
597 fputc ('\n', dump_file);
601 /* Get a SUBREG of a CONCATN. */
603 static rtx
604 simplify_subreg_concatn (machine_mode outermode, rtx op,
605 unsigned int byte)
607 unsigned int inner_size;
608 machine_mode innermode, partmode;
609 rtx part;
610 unsigned int final_offset;
612 gcc_assert (GET_CODE (op) == CONCATN);
613 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
615 innermode = GET_MODE (op);
616 gcc_assert (byte < GET_MODE_SIZE (innermode));
617 if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode))
618 return NULL_RTX;
620 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
621 part = XVECEXP (op, 0, byte / inner_size);
622 partmode = GET_MODE (part);
624 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
625 regular CONST_VECTORs. They have vector or integer modes, depending
626 on the capabilities of the target. Cope with them. */
627 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
628 partmode = GET_MODE_INNER (innermode);
629 else if (partmode == VOIDmode)
631 enum mode_class mclass = GET_MODE_CLASS (innermode);
632 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
635 final_offset = byte % inner_size;
636 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
637 return NULL_RTX;
639 return simplify_gen_subreg (outermode, part, partmode, final_offset);
642 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
644 static rtx
645 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
646 machine_mode innermode, unsigned int byte)
648 rtx ret;
650 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
651 If OP is a SUBREG of a CONCATN, then it must be a simple mode
652 change with the same size and offset 0, or it must extract a
653 part. We shouldn't see anything else here. */
654 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
656 rtx op2;
658 if ((GET_MODE_SIZE (GET_MODE (op))
659 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
660 && SUBREG_BYTE (op) == 0)
661 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
662 GET_MODE (SUBREG_REG (op)), byte);
664 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
665 SUBREG_BYTE (op));
666 if (op2 == NULL_RTX)
668 /* We don't handle paradoxical subregs here. */
669 gcc_assert (GET_MODE_SIZE (outermode)
670 <= GET_MODE_SIZE (GET_MODE (op)));
671 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
672 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
673 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
674 byte + SUBREG_BYTE (op));
675 gcc_assert (op2 != NULL_RTX);
676 return op2;
679 op = op2;
680 gcc_assert (op != NULL_RTX);
681 gcc_assert (innermode == GET_MODE (op));
684 if (GET_CODE (op) == CONCATN)
685 return simplify_subreg_concatn (outermode, op, byte);
687 ret = simplify_gen_subreg (outermode, op, innermode, byte);
689 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
690 resolve_simple_move will ask for the high part of the paradoxical
691 subreg, which does not have a value. Just return a zero. */
692 if (ret == NULL_RTX
693 && GET_CODE (op) == SUBREG
694 && SUBREG_BYTE (op) == 0
695 && (GET_MODE_SIZE (innermode)
696 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
697 return CONST0_RTX (outermode);
699 gcc_assert (ret != NULL_RTX);
700 return ret;
703 /* Return whether we should resolve X into the registers into which it
704 was decomposed. */
706 static bool
707 resolve_reg_p (rtx x)
709 return GET_CODE (x) == CONCATN;
712 /* Return whether X is a SUBREG of a register which we need to
713 resolve. */
715 static bool
716 resolve_subreg_p (rtx x)
718 if (GET_CODE (x) != SUBREG)
719 return false;
720 return resolve_reg_p (SUBREG_REG (x));
723 /* Look for SUBREGs in *LOC which need to be decomposed. */
725 static bool
726 resolve_subreg_use (rtx *loc, rtx insn)
728 subrtx_ptr_iterator::array_type array;
729 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
731 rtx *loc = *iter;
732 rtx x = *loc;
733 if (resolve_subreg_p (x))
735 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
736 SUBREG_BYTE (x));
738 /* It is possible for a note to contain a reference which we can
739 decompose. In this case, return 1 to the caller to indicate
740 that the note must be removed. */
741 if (!x)
743 gcc_assert (!insn);
744 return true;
747 validate_change (insn, loc, x, 1);
748 iter.skip_subrtxes ();
750 else if (resolve_reg_p (x))
751 /* Return 1 to the caller to indicate that we found a direct
752 reference to a register which is being decomposed. This can
753 happen inside notes, multiword shift or zero-extend
754 instructions. */
755 return true;
758 return false;
761 /* Resolve any decomposed registers which appear in register notes on
762 INSN. */
764 static void
765 resolve_reg_notes (rtx_insn *insn)
767 rtx *pnote, note;
769 note = find_reg_equal_equiv_note (insn);
770 if (note)
772 int old_count = num_validated_changes ();
773 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
774 remove_note (insn, note);
775 else
776 if (old_count != num_validated_changes ())
777 df_notes_rescan (insn);
780 pnote = &REG_NOTES (insn);
781 while (*pnote != NULL_RTX)
783 bool del = false;
785 note = *pnote;
786 switch (REG_NOTE_KIND (note))
788 case REG_DEAD:
789 case REG_UNUSED:
790 if (resolve_reg_p (XEXP (note, 0)))
791 del = true;
792 break;
794 default:
795 break;
798 if (del)
799 *pnote = XEXP (note, 1);
800 else
801 pnote = &XEXP (note, 1);
805 /* Return whether X can be decomposed into subwords. */
807 static bool
808 can_decompose_p (rtx x)
810 if (REG_P (x))
812 unsigned int regno = REGNO (x);
814 if (HARD_REGISTER_NUM_P (regno))
816 unsigned int byte, num_bytes;
818 num_bytes = GET_MODE_SIZE (GET_MODE (x));
819 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
820 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
821 return false;
822 return true;
824 else
825 return !bitmap_bit_p (subreg_context, regno);
828 return true;
831 /* Decompose the registers used in a simple move SET within INSN. If
832 we don't change anything, return INSN, otherwise return the start
833 of the sequence of moves. */
835 static rtx_insn *
836 resolve_simple_move (rtx set, rtx_insn *insn)
838 rtx src, dest, real_dest;
839 rtx_insn *insns;
840 machine_mode orig_mode, dest_mode;
841 unsigned int words;
842 bool pushing;
844 src = SET_SRC (set);
845 dest = SET_DEST (set);
846 orig_mode = GET_MODE (dest);
848 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
849 gcc_assert (words > 1);
851 start_sequence ();
853 /* We have to handle copying from a SUBREG of a decomposed reg where
854 the SUBREG is larger than word size. Rather than assume that we
855 can take a word_mode SUBREG of the destination, we copy to a new
856 register and then copy that to the destination. */
858 real_dest = NULL_RTX;
860 if (GET_CODE (src) == SUBREG
861 && resolve_reg_p (SUBREG_REG (src))
862 && (SUBREG_BYTE (src) != 0
863 || (GET_MODE_SIZE (orig_mode)
864 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
866 real_dest = dest;
867 dest = gen_reg_rtx (orig_mode);
868 if (REG_P (real_dest))
869 REG_ATTRS (dest) = REG_ATTRS (real_dest);
872 /* Similarly if we are copying to a SUBREG of a decomposed reg where
873 the SUBREG is larger than word size. */
875 if (GET_CODE (dest) == SUBREG
876 && resolve_reg_p (SUBREG_REG (dest))
877 && (SUBREG_BYTE (dest) != 0
878 || (GET_MODE_SIZE (orig_mode)
879 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
881 rtx reg, smove;
882 rtx_insn *minsn;
884 reg = gen_reg_rtx (orig_mode);
885 minsn = emit_move_insn (reg, src);
886 smove = single_set (minsn);
887 gcc_assert (smove != NULL_RTX);
888 resolve_simple_move (smove, minsn);
889 src = reg;
892 /* If we didn't have any big SUBREGS of decomposed registers, and
893 neither side of the move is a register we are decomposing, then
894 we don't have to do anything here. */
896 if (src == SET_SRC (set)
897 && dest == SET_DEST (set)
898 && !resolve_reg_p (src)
899 && !resolve_subreg_p (src)
900 && !resolve_reg_p (dest)
901 && !resolve_subreg_p (dest))
903 end_sequence ();
904 return insn;
907 /* It's possible for the code to use a subreg of a decomposed
908 register while forming an address. We need to handle that before
909 passing the address to emit_move_insn. We pass NULL_RTX as the
910 insn parameter to resolve_subreg_use because we can not validate
911 the insn yet. */
912 if (MEM_P (src) || MEM_P (dest))
914 int acg;
916 if (MEM_P (src))
917 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
918 if (MEM_P (dest))
919 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
920 acg = apply_change_group ();
921 gcc_assert (acg);
924 /* If SRC is a register which we can't decompose, or has side
925 effects, we need to move via a temporary register. */
927 if (!can_decompose_p (src)
928 || side_effects_p (src)
929 || GET_CODE (src) == ASM_OPERANDS)
931 rtx reg;
933 reg = gen_reg_rtx (orig_mode);
935 if (AUTO_INC_DEC)
937 rtx move = emit_move_insn (reg, src);
938 if (MEM_P (src))
940 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
941 if (note)
942 add_reg_note (move, REG_INC, XEXP (note, 0));
945 else
946 emit_move_insn (reg, src);
948 src = reg;
951 /* If DEST is a register which we can't decompose, or has side
952 effects, we need to first move to a temporary register. We
953 handle the common case of pushing an operand directly. We also
954 go through a temporary register if it holds a floating point
955 value. This gives us better code on systems which can't move
956 data easily between integer and floating point registers. */
958 dest_mode = orig_mode;
959 pushing = push_operand (dest, dest_mode);
960 if (!can_decompose_p (dest)
961 || (side_effects_p (dest) && !pushing)
962 || (!SCALAR_INT_MODE_P (dest_mode)
963 && !resolve_reg_p (dest)
964 && !resolve_subreg_p (dest)))
966 if (real_dest == NULL_RTX)
967 real_dest = dest;
968 if (!SCALAR_INT_MODE_P (dest_mode))
970 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
971 MODE_INT, 0);
972 gcc_assert (dest_mode != BLKmode);
974 dest = gen_reg_rtx (dest_mode);
975 if (REG_P (real_dest))
976 REG_ATTRS (dest) = REG_ATTRS (real_dest);
979 if (pushing)
981 unsigned int i, j, jinc;
983 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
984 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
985 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
987 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
989 j = 0;
990 jinc = 1;
992 else
994 j = words - 1;
995 jinc = -1;
998 for (i = 0; i < words; ++i, j += jinc)
1000 rtx temp;
1002 temp = copy_rtx (XEXP (dest, 0));
1003 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1004 j * UNITS_PER_WORD);
1005 emit_move_insn (temp,
1006 simplify_gen_subreg_concatn (word_mode, src,
1007 orig_mode,
1008 j * UNITS_PER_WORD));
1011 else
1013 unsigned int i;
1015 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1016 emit_clobber (dest);
1018 for (i = 0; i < words; ++i)
1019 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1020 dest_mode,
1021 i * UNITS_PER_WORD),
1022 simplify_gen_subreg_concatn (word_mode, src,
1023 orig_mode,
1024 i * UNITS_PER_WORD));
1027 if (real_dest != NULL_RTX)
1029 rtx mdest, smove;
1030 rtx_insn *minsn;
1032 if (dest_mode == orig_mode)
1033 mdest = dest;
1034 else
1035 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1036 minsn = emit_move_insn (real_dest, mdest);
1038 if (AUTO_INC_DEC && MEM_P (real_dest)
1039 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1041 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1042 if (note)
1043 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1046 smove = single_set (minsn);
1047 gcc_assert (smove != NULL_RTX);
1049 resolve_simple_move (smove, minsn);
1052 insns = get_insns ();
1053 end_sequence ();
1055 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1057 emit_insn_before (insns, insn);
1059 /* If we get here via self-recursion, then INSN is not yet in the insns
1060 chain and delete_insn will fail. We only want to remove INSN from the
1061 current sequence. See PR56738. */
1062 if (in_sequence_p ())
1063 remove_insn (insn);
1064 else
1065 delete_insn (insn);
1067 return insns;
1070 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1071 component registers. Return whether we changed something. */
1073 static bool
1074 resolve_clobber (rtx pat, rtx_insn *insn)
1076 rtx reg;
1077 machine_mode orig_mode;
1078 unsigned int words, i;
1079 int ret;
1081 reg = XEXP (pat, 0);
1082 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1083 return false;
1085 orig_mode = GET_MODE (reg);
1086 words = GET_MODE_SIZE (orig_mode);
1087 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1089 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1090 simplify_gen_subreg_concatn (word_mode, reg,
1091 orig_mode, 0),
1093 df_insn_rescan (insn);
1094 gcc_assert (ret != 0);
1096 for (i = words - 1; i > 0; --i)
1098 rtx x;
1100 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1101 i * UNITS_PER_WORD);
1102 x = gen_rtx_CLOBBER (VOIDmode, x);
1103 emit_insn_after (x, insn);
1106 resolve_reg_notes (insn);
1108 return true;
1111 /* A USE of a decomposed register is no longer meaningful. Return
1112 whether we changed something. */
1114 static bool
1115 resolve_use (rtx pat, rtx_insn *insn)
1117 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1119 delete_insn (insn);
1120 return true;
1123 resolve_reg_notes (insn);
1125 return false;
1128 /* A VAR_LOCATION can be simplified. */
1130 static void
1131 resolve_debug (rtx_insn *insn)
1133 subrtx_ptr_iterator::array_type array;
1134 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1136 rtx *loc = *iter;
1137 rtx x = *loc;
1138 if (resolve_subreg_p (x))
1140 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1141 SUBREG_BYTE (x));
1143 if (x)
1144 *loc = x;
1145 else
1146 x = copy_rtx (*loc);
1148 if (resolve_reg_p (x))
1149 *loc = copy_rtx (x);
1152 df_insn_rescan (insn);
1154 resolve_reg_notes (insn);
1157 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1158 set the decomposable_context bitmap accordingly. SPEED_P is true
1159 if we are optimizing INSN for speed rather than size. Return true
1160 if INSN is decomposable. */
1162 static bool
1163 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1165 rtx set;
1166 rtx op;
1167 rtx op_operand;
1169 set = single_set (insn);
1170 if (!set)
1171 return false;
1173 op = SET_SRC (set);
1174 if (GET_CODE (op) != ASHIFT
1175 && GET_CODE (op) != LSHIFTRT
1176 && GET_CODE (op) != ASHIFTRT
1177 && GET_CODE (op) != ZERO_EXTEND)
1178 return false;
1180 op_operand = XEXP (op, 0);
1181 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1182 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1183 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1184 || GET_MODE (op) != twice_word_mode)
1185 return false;
1187 if (GET_CODE (op) == ZERO_EXTEND)
1189 if (GET_MODE (op_operand) != word_mode
1190 || !choices[speed_p].splitting_zext)
1191 return false;
1193 else /* left or right shift */
1195 bool *splitting = (GET_CODE (op) == ASHIFT
1196 ? choices[speed_p].splitting_ashift
1197 : GET_CODE (op) == ASHIFTRT
1198 ? choices[speed_p].splitting_ashiftrt
1199 : choices[speed_p].splitting_lshiftrt);
1200 if (!CONST_INT_P (XEXP (op, 1))
1201 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1202 2 * BITS_PER_WORD - 1)
1203 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1204 return false;
1206 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1209 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1211 return true;
1214 /* Decompose a more than word wide shift (in INSN) of a multiword
1215 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1216 and 'set to zero' insn. Return a pointer to the new insn when a
1217 replacement was done. */
1219 static rtx_insn *
1220 resolve_shift_zext (rtx_insn *insn)
1222 rtx set;
1223 rtx op;
1224 rtx op_operand;
1225 rtx_insn *insns;
1226 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1227 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1229 set = single_set (insn);
1230 if (!set)
1231 return NULL;
1233 op = SET_SRC (set);
1234 if (GET_CODE (op) != ASHIFT
1235 && GET_CODE (op) != LSHIFTRT
1236 && GET_CODE (op) != ASHIFTRT
1237 && GET_CODE (op) != ZERO_EXTEND)
1238 return NULL;
1240 op_operand = XEXP (op, 0);
1242 /* We can tear this operation apart only if the regs were already
1243 torn apart. */
1244 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1245 return NULL;
1247 /* src_reg_num is the number of the word mode register which we
1248 are operating on. For a left shift and a zero_extend on little
1249 endian machines this is register 0. */
1250 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1251 ? 1 : 0;
1253 if (WORDS_BIG_ENDIAN
1254 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1255 src_reg_num = 1 - src_reg_num;
1257 if (GET_CODE (op) == ZERO_EXTEND)
1258 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1259 else
1260 dest_reg_num = 1 - src_reg_num;
1262 offset1 = UNITS_PER_WORD * dest_reg_num;
1263 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1264 src_offset = UNITS_PER_WORD * src_reg_num;
1266 start_sequence ();
1268 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1269 GET_MODE (SET_DEST (set)),
1270 offset1);
1271 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1272 GET_MODE (SET_DEST (set)),
1273 offset2);
1274 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1275 GET_MODE (op_operand),
1276 src_offset);
1277 if (GET_CODE (op) == ASHIFTRT
1278 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1279 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1280 BITS_PER_WORD - 1, NULL_RTX, 0);
1282 if (GET_CODE (op) != ZERO_EXTEND)
1284 int shift_count = INTVAL (XEXP (op, 1));
1285 if (shift_count > BITS_PER_WORD)
1286 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1287 LSHIFT_EXPR : RSHIFT_EXPR,
1288 word_mode, src_reg,
1289 shift_count - BITS_PER_WORD,
1290 dest_reg, GET_CODE (op) != ASHIFTRT);
1293 if (dest_reg != src_reg)
1294 emit_move_insn (dest_reg, src_reg);
1295 if (GET_CODE (op) != ASHIFTRT)
1296 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1297 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1298 emit_move_insn (dest_upper, copy_rtx (src_reg));
1299 else
1300 emit_move_insn (dest_upper, upper_src);
1301 insns = get_insns ();
1303 end_sequence ();
1305 emit_insn_before (insns, insn);
1307 if (dump_file)
1309 rtx_insn *in;
1310 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1311 for (in = insns; in != insn; in = NEXT_INSN (in))
1312 fprintf (dump_file, "%d ", INSN_UID (in));
1313 fprintf (dump_file, "\n");
1316 delete_insn (insn);
1317 return insns;
1320 /* Print to dump_file a description of what we're doing with shift code CODE.
1321 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1323 static void
1324 dump_shift_choices (enum rtx_code code, bool *splitting)
1326 int i;
1327 const char *sep;
1329 fprintf (dump_file,
1330 " Splitting mode %s for %s lowering with shift amounts = ",
1331 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1332 sep = "";
1333 for (i = 0; i < BITS_PER_WORD; i++)
1334 if (splitting[i])
1336 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1337 sep = ",";
1339 fprintf (dump_file, "\n");
1342 /* Print to dump_file a description of what we're doing when optimizing
1343 for speed or size; SPEED_P says which. DESCRIPTION is a description
1344 of the SPEED_P choice. */
1346 static void
1347 dump_choices (bool speed_p, const char *description)
1349 unsigned int i;
1351 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1353 for (i = 0; i < MAX_MACHINE_MODE; i++)
1354 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1355 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1356 choices[speed_p].move_modes_to_split[i]
1357 ? "Splitting"
1358 : "Skipping",
1359 GET_MODE_NAME ((machine_mode) i));
1361 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1362 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1363 GET_MODE_NAME (twice_word_mode));
1365 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1366 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1367 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1368 fprintf (dump_file, "\n");
1371 /* Look for registers which are always accessed via word-sized SUBREGs
1372 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1373 registers into several word-sized pseudo-registers. */
1375 static void
1376 decompose_multiword_subregs (bool decompose_copies)
1378 unsigned int max;
1379 basic_block bb;
1380 bool speed_p;
1382 if (dump_file)
1384 dump_choices (false, "size");
1385 dump_choices (true, "speed");
1388 /* Check if this target even has any modes to consider lowering. */
1389 if (!choices[false].something_to_do && !choices[true].something_to_do)
1391 if (dump_file)
1392 fprintf (dump_file, "Nothing to do!\n");
1393 return;
1396 max = max_reg_num ();
1398 /* First see if there are any multi-word pseudo-registers. If there
1399 aren't, there is nothing we can do. This should speed up this
1400 pass in the normal case, since it should be faster than scanning
1401 all the insns. */
1403 unsigned int i;
1404 bool useful_modes_seen = false;
1406 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1407 if (regno_reg_rtx[i] != NULL)
1409 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1410 if (choices[false].move_modes_to_split[(int) mode]
1411 || choices[true].move_modes_to_split[(int) mode])
1413 useful_modes_seen = true;
1414 break;
1418 if (!useful_modes_seen)
1420 if (dump_file)
1421 fprintf (dump_file, "Nothing to lower in this function.\n");
1422 return;
1426 if (df)
1428 df_set_flags (DF_DEFER_INSN_RESCAN);
1429 run_word_dce ();
1432 /* FIXME: It may be possible to change this code to look for each
1433 multi-word pseudo-register and to find each insn which sets or
1434 uses that register. That should be faster than scanning all the
1435 insns. */
1437 decomposable_context = BITMAP_ALLOC (NULL);
1438 non_decomposable_context = BITMAP_ALLOC (NULL);
1439 subreg_context = BITMAP_ALLOC (NULL);
1441 reg_copy_graph.create (max);
1442 reg_copy_graph.safe_grow_cleared (max);
1443 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1445 speed_p = optimize_function_for_speed_p (cfun);
1446 FOR_EACH_BB_FN (bb, cfun)
1448 rtx_insn *insn;
1450 FOR_BB_INSNS (bb, insn)
1452 rtx set;
1453 enum classify_move_insn cmi;
1454 int i, n;
1456 if (!INSN_P (insn)
1457 || GET_CODE (PATTERN (insn)) == CLOBBER
1458 || GET_CODE (PATTERN (insn)) == USE)
1459 continue;
1461 recog_memoized (insn);
1463 if (find_decomposable_shift_zext (insn, speed_p))
1464 continue;
1466 extract_insn (insn);
1468 set = simple_move (insn, speed_p);
1470 if (!set)
1471 cmi = NOT_SIMPLE_MOVE;
1472 else
1474 /* We mark pseudo-to-pseudo copies as decomposable during the
1475 second pass only. The first pass is so early that there is
1476 good chance such moves will be optimized away completely by
1477 subsequent optimizations anyway.
1479 However, we call find_pseudo_copy even during the first pass
1480 so as to properly set up the reg_copy_graph. */
1481 if (find_pseudo_copy (set))
1482 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1483 else
1484 cmi = SIMPLE_MOVE;
1487 n = recog_data.n_operands;
1488 for (i = 0; i < n; ++i)
1490 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1492 /* We handle ASM_OPERANDS as a special case to support
1493 things like x86 rdtsc which returns a DImode value.
1494 We can decompose the output, which will certainly be
1495 operand 0, but not the inputs. */
1497 if (cmi == SIMPLE_MOVE
1498 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1500 gcc_assert (i == 0);
1501 cmi = NOT_SIMPLE_MOVE;
1507 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1508 if (!bitmap_empty_p (decomposable_context))
1510 sbitmap sub_blocks;
1511 unsigned int i;
1512 sbitmap_iterator sbi;
1513 bitmap_iterator iter;
1514 unsigned int regno;
1516 propagate_pseudo_copies ();
1518 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1519 bitmap_clear (sub_blocks);
1521 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1522 decompose_register (regno);
1524 FOR_EACH_BB_FN (bb, cfun)
1526 rtx_insn *insn;
1528 FOR_BB_INSNS (bb, insn)
1530 rtx pat;
1532 if (!INSN_P (insn))
1533 continue;
1535 pat = PATTERN (insn);
1536 if (GET_CODE (pat) == CLOBBER)
1537 resolve_clobber (pat, insn);
1538 else if (GET_CODE (pat) == USE)
1539 resolve_use (pat, insn);
1540 else if (DEBUG_INSN_P (insn))
1541 resolve_debug (insn);
1542 else
1544 rtx set;
1545 int i;
1547 recog_memoized (insn);
1548 extract_insn (insn);
1550 set = simple_move (insn, speed_p);
1551 if (set)
1553 rtx_insn *orig_insn = insn;
1554 bool cfi = control_flow_insn_p (insn);
1556 /* We can end up splitting loads to multi-word pseudos
1557 into separate loads to machine word size pseudos.
1558 When this happens, we first had one load that can
1559 throw, and after resolve_simple_move we'll have a
1560 bunch of loads (at least two). All those loads may
1561 trap if we can have non-call exceptions, so they
1562 all will end the current basic block. We split the
1563 block after the outer loop over all insns, but we
1564 make sure here that we will be able to split the
1565 basic block and still produce the correct control
1566 flow graph for it. */
1567 gcc_assert (!cfi
1568 || (cfun->can_throw_non_call_exceptions
1569 && can_throw_internal (insn)));
1571 insn = resolve_simple_move (set, insn);
1572 if (insn != orig_insn)
1574 recog_memoized (insn);
1575 extract_insn (insn);
1577 if (cfi)
1578 bitmap_set_bit (sub_blocks, bb->index);
1581 else
1583 rtx_insn *decomposed_shift;
1585 decomposed_shift = resolve_shift_zext (insn);
1586 if (decomposed_shift != NULL_RTX)
1588 insn = decomposed_shift;
1589 recog_memoized (insn);
1590 extract_insn (insn);
1594 for (i = recog_data.n_operands - 1; i >= 0; --i)
1595 resolve_subreg_use (recog_data.operand_loc[i], insn);
1597 resolve_reg_notes (insn);
1599 if (num_validated_changes () > 0)
1601 for (i = recog_data.n_dups - 1; i >= 0; --i)
1603 rtx *pl = recog_data.dup_loc[i];
1604 int dup_num = recog_data.dup_num[i];
1605 rtx *px = recog_data.operand_loc[dup_num];
1607 validate_unshare_change (insn, pl, *px, 1);
1610 i = apply_change_group ();
1611 gcc_assert (i);
1617 /* If we had insns to split that caused control flow insns in the middle
1618 of a basic block, split those blocks now. Note that we only handle
1619 the case where splitting a load has caused multiple possibly trapping
1620 loads to appear. */
1621 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1623 rtx_insn *insn, *end;
1624 edge fallthru;
1626 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1627 insn = BB_HEAD (bb);
1628 end = BB_END (bb);
1630 while (insn != end)
1632 if (control_flow_insn_p (insn))
1634 /* Split the block after insn. There will be a fallthru
1635 edge, which is OK so we keep it. We have to create the
1636 exception edges ourselves. */
1637 fallthru = split_block (bb, insn);
1638 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1639 bb = fallthru->dest;
1640 insn = BB_HEAD (bb);
1642 else
1643 insn = NEXT_INSN (insn);
1647 sbitmap_free (sub_blocks);
1651 unsigned int i;
1652 bitmap b;
1654 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1655 if (b)
1656 BITMAP_FREE (b);
1659 reg_copy_graph.release ();
1661 BITMAP_FREE (decomposable_context);
1662 BITMAP_FREE (non_decomposable_context);
1663 BITMAP_FREE (subreg_context);
1666 /* Implement first lower subreg pass. */
1668 namespace {
1670 const pass_data pass_data_lower_subreg =
1672 RTL_PASS, /* type */
1673 "subreg1", /* name */
1674 OPTGROUP_NONE, /* optinfo_flags */
1675 TV_LOWER_SUBREG, /* tv_id */
1676 0, /* properties_required */
1677 0, /* properties_provided */
1678 0, /* properties_destroyed */
1679 0, /* todo_flags_start */
1680 0, /* todo_flags_finish */
1683 class pass_lower_subreg : public rtl_opt_pass
1685 public:
1686 pass_lower_subreg (gcc::context *ctxt)
1687 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1690 /* opt_pass methods: */
1691 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1692 virtual unsigned int execute (function *)
1694 decompose_multiword_subregs (false);
1695 return 0;
1698 }; // class pass_lower_subreg
1700 } // anon namespace
1702 rtl_opt_pass *
1703 make_pass_lower_subreg (gcc::context *ctxt)
1705 return new pass_lower_subreg (ctxt);
1708 /* Implement second lower subreg pass. */
1710 namespace {
1712 const pass_data pass_data_lower_subreg2 =
1714 RTL_PASS, /* type */
1715 "subreg2", /* name */
1716 OPTGROUP_NONE, /* optinfo_flags */
1717 TV_LOWER_SUBREG, /* tv_id */
1718 0, /* properties_required */
1719 0, /* properties_provided */
1720 0, /* properties_destroyed */
1721 0, /* todo_flags_start */
1722 TODO_df_finish, /* todo_flags_finish */
1725 class pass_lower_subreg2 : public rtl_opt_pass
1727 public:
1728 pass_lower_subreg2 (gcc::context *ctxt)
1729 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1732 /* opt_pass methods: */
1733 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1734 virtual unsigned int execute (function *)
1736 decompose_multiword_subregs (true);
1737 return 0;
1740 }; // class pass_lower_subreg2
1742 } // anon namespace
1744 rtl_opt_pass *
1745 make_pass_lower_subreg2 (gcc::context *ctxt)
1747 return new pass_lower_subreg2 (ctxt);