final.c: Use rtx_sequence
[official-gcc.git] / gcc / lower-subreg.c
blobdf1c6679aa54e06ffba4920f68a14a86aae8cb02
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-config.h"
32 #include "obstack.h"
33 #include "basic-block.h"
34 #include "recog.h"
35 #include "bitmap.h"
36 #include "dce.h"
37 #include "expr.h"
38 #include "except.h"
39 #include "regs.h"
40 #include "tree-pass.h"
41 #include "df.h"
42 #include "lower-subreg.h"
44 #ifdef STACK_GROWS_DOWNWARD
45 # undef STACK_GROWS_DOWNWARD
46 # define STACK_GROWS_DOWNWARD 1
47 #else
48 # define STACK_GROWS_DOWNWARD 0
49 #endif
52 /* Decompose multi-word pseudo-registers into individual
53 pseudo-registers when possible and profitable. This is possible
54 when all the uses of a multi-word register are via SUBREG, or are
55 copies of the register to another location. Breaking apart the
56 register permits more CSE and permits better register allocation.
57 This is profitable if the machine does not have move instructions
58 to do this.
60 This pass only splits moves with modes that are wider than
61 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
62 integer modes that are twice the width of word_mode. The latter
63 could be generalized if there was a need to do this, but the trend in
64 architectures is to not need this.
66 There are two useful preprocessor defines for use by maintainers:
68 #define LOG_COSTS 1
70 if you wish to see the actual cost estimates that are being used
71 for each mode wider than word mode and the cost estimates for zero
72 extension and the shifts. This can be useful when port maintainers
73 are tuning insn rtx costs.
75 #define FORCE_LOWERING 1
77 if you wish to test the pass with all the transformation forced on.
78 This can be useful for finding bugs in the transformations. */
80 #define LOG_COSTS 0
81 #define FORCE_LOWERING 0
83 /* Bit N in this bitmap is set if regno N is used in a context in
84 which we can decompose it. */
85 static bitmap decomposable_context;
87 /* Bit N in this bitmap is set if regno N is used in a context in
88 which it can not be decomposed. */
89 static bitmap non_decomposable_context;
91 /* Bit N in this bitmap is set if regno N is used in a subreg
92 which changes the mode but not the size. This typically happens
93 when the register accessed as a floating-point value; we want to
94 avoid generating accesses to its subwords in integer modes. */
95 static bitmap subreg_context;
97 /* Bit N in the bitmap in element M of this array is set if there is a
98 copy from reg M to reg N. */
99 static vec<bitmap> reg_copy_graph;
101 struct target_lower_subreg default_target_lower_subreg;
102 #if SWITCHABLE_TARGET
103 struct target_lower_subreg *this_target_lower_subreg
104 = &default_target_lower_subreg;
105 #endif
107 #define twice_word_mode \
108 this_target_lower_subreg->x_twice_word_mode
109 #define choices \
110 this_target_lower_subreg->x_choices
112 /* RTXes used while computing costs. */
113 struct cost_rtxes {
114 /* Source and target registers. */
115 rtx source;
116 rtx target;
118 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
119 rtx zext;
121 /* A shift of SOURCE. */
122 rtx shift;
124 /* A SET of TARGET. */
125 rtx set;
128 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
129 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
131 static int
132 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
133 enum machine_mode mode, int op1)
135 PUT_CODE (rtxes->shift, code);
136 PUT_MODE (rtxes->shift, mode);
137 PUT_MODE (rtxes->source, mode);
138 XEXP (rtxes->shift, 1) = GEN_INT (op1);
139 return set_src_cost (rtxes->shift, speed_p);
142 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
143 to true if it is profitable to split a double-word CODE shift
144 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
145 for speed or size profitability.
147 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
148 the cost of moving zero into a word-mode register. WORD_MOVE_COST
149 is the cost of moving between word registers. */
151 static void
152 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
153 bool *splitting, enum rtx_code code,
154 int word_move_zero_cost, int word_move_cost)
156 int wide_cost, narrow_cost, upper_cost, i;
158 for (i = 0; i < BITS_PER_WORD; i++)
160 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
161 i + BITS_PER_WORD);
162 if (i == 0)
163 narrow_cost = word_move_cost;
164 else
165 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
167 if (code != ASHIFTRT)
168 upper_cost = word_move_zero_cost;
169 else if (i == BITS_PER_WORD - 1)
170 upper_cost = word_move_cost;
171 else
172 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
173 BITS_PER_WORD - 1);
175 if (LOG_COSTS)
176 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
177 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
178 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
180 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
181 splitting[i] = true;
185 /* Compute what we should do when optimizing for speed or size; SPEED_P
186 selects which. Use RTXES for computing costs. */
188 static void
189 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
191 unsigned int i;
192 int word_move_zero_cost, word_move_cost;
194 PUT_MODE (rtxes->target, word_mode);
195 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
196 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
198 SET_SRC (rtxes->set) = rtxes->source;
199 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
201 if (LOG_COSTS)
202 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
203 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
205 for (i = 0; i < MAX_MACHINE_MODE; i++)
207 enum machine_mode mode = (enum machine_mode) i;
208 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
209 if (factor > 1)
211 int mode_move_cost;
213 PUT_MODE (rtxes->target, mode);
214 PUT_MODE (rtxes->source, mode);
215 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
217 if (LOG_COSTS)
218 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
219 GET_MODE_NAME (mode), mode_move_cost,
220 word_move_cost, factor);
222 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
224 choices[speed_p].move_modes_to_split[i] = true;
225 choices[speed_p].something_to_do = true;
230 /* For the moves and shifts, the only case that is checked is one
231 where the mode of the target is an integer mode twice the width
232 of the word_mode.
234 If it is not profitable to split a double word move then do not
235 even consider the shifts or the zero extension. */
236 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
238 int zext_cost;
240 /* The only case here to check to see if moving the upper part with a
241 zero is cheaper than doing the zext itself. */
242 PUT_MODE (rtxes->source, word_mode);
243 zext_cost = set_src_cost (rtxes->zext, speed_p);
245 if (LOG_COSTS)
246 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
247 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
248 zext_cost, word_move_cost, word_move_zero_cost);
250 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
251 choices[speed_p].splitting_zext = true;
253 compute_splitting_shift (speed_p, rtxes,
254 choices[speed_p].splitting_ashift, ASHIFT,
255 word_move_zero_cost, word_move_cost);
256 compute_splitting_shift (speed_p, rtxes,
257 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
258 word_move_zero_cost, word_move_cost);
259 compute_splitting_shift (speed_p, rtxes,
260 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
261 word_move_zero_cost, word_move_cost);
265 /* Do one-per-target initialisation. This involves determining
266 which operations on the machine are profitable. If none are found,
267 then the pass just returns when called. */
269 void
270 init_lower_subreg (void)
272 struct cost_rtxes rtxes;
274 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
276 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
278 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
279 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
280 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
281 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
282 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
284 if (LOG_COSTS)
285 fprintf (stderr, "\nSize costs\n==========\n\n");
286 compute_costs (false, &rtxes);
288 if (LOG_COSTS)
289 fprintf (stderr, "\nSpeed costs\n===========\n\n");
290 compute_costs (true, &rtxes);
293 static bool
294 simple_move_operand (rtx x)
296 if (GET_CODE (x) == SUBREG)
297 x = SUBREG_REG (x);
299 if (!OBJECT_P (x))
300 return false;
302 if (GET_CODE (x) == LABEL_REF
303 || GET_CODE (x) == SYMBOL_REF
304 || GET_CODE (x) == HIGH
305 || GET_CODE (x) == CONST)
306 return false;
308 if (MEM_P (x)
309 && (MEM_VOLATILE_P (x)
310 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
311 return false;
313 return true;
316 /* If INSN is a single set between two objects that we want to split,
317 return the single set. SPEED_P says whether we are optimizing
318 INSN for speed or size.
320 INSN should have been passed to recog and extract_insn before this
321 is called. */
323 static rtx
324 simple_move (rtx_insn *insn, bool speed_p)
326 rtx x;
327 rtx set;
328 enum machine_mode mode;
330 if (recog_data.n_operands != 2)
331 return NULL_RTX;
333 set = single_set (insn);
334 if (!set)
335 return NULL_RTX;
337 x = SET_DEST (set);
338 if (x != recog_data.operand[0] && x != recog_data.operand[1])
339 return NULL_RTX;
340 if (!simple_move_operand (x))
341 return NULL_RTX;
343 x = SET_SRC (set);
344 if (x != recog_data.operand[0] && x != recog_data.operand[1])
345 return NULL_RTX;
346 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
347 things like x86 rdtsc which returns a DImode value. */
348 if (GET_CODE (x) != ASM_OPERANDS
349 && !simple_move_operand (x))
350 return NULL_RTX;
352 /* We try to decompose in integer modes, to avoid generating
353 inefficient code copying between integer and floating point
354 registers. That means that we can't decompose if this is a
355 non-integer mode for which there is no integer mode of the same
356 size. */
357 mode = GET_MODE (SET_DEST (set));
358 if (!SCALAR_INT_MODE_P (mode)
359 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
360 == BLKmode))
361 return NULL_RTX;
363 /* Reject PARTIAL_INT modes. They are used for processor specific
364 purposes and it's probably best not to tamper with them. */
365 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
366 return NULL_RTX;
368 if (!choices[speed_p].move_modes_to_split[(int) mode])
369 return NULL_RTX;
371 return set;
374 /* If SET is a copy from one multi-word pseudo-register to another,
375 record that in reg_copy_graph. Return whether it is such a
376 copy. */
378 static bool
379 find_pseudo_copy (rtx set)
381 rtx dest = SET_DEST (set);
382 rtx src = SET_SRC (set);
383 unsigned int rd, rs;
384 bitmap b;
386 if (!REG_P (dest) || !REG_P (src))
387 return false;
389 rd = REGNO (dest);
390 rs = REGNO (src);
391 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
392 return false;
394 b = reg_copy_graph[rs];
395 if (b == NULL)
397 b = BITMAP_ALLOC (NULL);
398 reg_copy_graph[rs] = b;
401 bitmap_set_bit (b, rd);
403 return true;
406 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
407 where they are copied to another register, add the register to
408 which they are copied to DECOMPOSABLE_CONTEXT. Use
409 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
410 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
412 static void
413 propagate_pseudo_copies (void)
415 bitmap queue, propagate;
417 queue = BITMAP_ALLOC (NULL);
418 propagate = BITMAP_ALLOC (NULL);
420 bitmap_copy (queue, decomposable_context);
423 bitmap_iterator iter;
424 unsigned int i;
426 bitmap_clear (propagate);
428 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
430 bitmap b = reg_copy_graph[i];
431 if (b)
432 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
435 bitmap_and_compl (queue, propagate, decomposable_context);
436 bitmap_ior_into (decomposable_context, propagate);
438 while (!bitmap_empty_p (queue));
440 BITMAP_FREE (queue);
441 BITMAP_FREE (propagate);
444 /* A pointer to one of these values is passed to
445 find_decomposable_subregs via for_each_rtx. */
447 enum classify_move_insn
449 /* Not a simple move from one location to another. */
450 NOT_SIMPLE_MOVE,
451 /* A simple move we want to decompose. */
452 DECOMPOSABLE_SIMPLE_MOVE,
453 /* Any other simple move. */
454 SIMPLE_MOVE
457 /* This is called via for_each_rtx. If we find a SUBREG which we
458 could use to decompose a pseudo-register, set a bit in
459 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
460 not a simple pseudo-register copy, DATA will point at the type of
461 move, and we set a bit in DECOMPOSABLE_CONTEXT or
462 NON_DECOMPOSABLE_CONTEXT as appropriate. */
464 static int
465 find_decomposable_subregs (rtx *px, void *data)
467 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
468 rtx x = *px;
470 if (x == NULL_RTX)
471 return 0;
473 if (GET_CODE (x) == SUBREG)
475 rtx inner = SUBREG_REG (x);
476 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
478 if (!REG_P (inner))
479 return 0;
481 regno = REGNO (inner);
482 if (HARD_REGISTER_NUM_P (regno))
483 return -1;
485 outer_size = GET_MODE_SIZE (GET_MODE (x));
486 inner_size = GET_MODE_SIZE (GET_MODE (inner));
487 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
488 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
490 /* We only try to decompose single word subregs of multi-word
491 registers. When we find one, we return -1 to avoid iterating
492 over the inner register.
494 ??? This doesn't allow, e.g., DImode subregs of TImode values
495 on 32-bit targets. We would need to record the way the
496 pseudo-register was used, and only decompose if all the uses
497 were the same number and size of pieces. Hopefully this
498 doesn't happen much. */
500 if (outer_words == 1 && inner_words > 1)
502 bitmap_set_bit (decomposable_context, regno);
503 return -1;
506 /* If this is a cast from one mode to another, where the modes
507 have the same size, and they are not tieable, then mark this
508 register as non-decomposable. If we decompose it we are
509 likely to mess up whatever the backend is trying to do. */
510 if (outer_words > 1
511 && outer_size == inner_size
512 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
514 bitmap_set_bit (non_decomposable_context, regno);
515 bitmap_set_bit (subreg_context, regno);
516 return -1;
519 else if (REG_P (x))
521 unsigned int regno;
523 /* We will see an outer SUBREG before we see the inner REG, so
524 when we see a plain REG here it means a direct reference to
525 the register.
527 If this is not a simple copy from one location to another,
528 then we can not decompose this register. If this is a simple
529 copy we want to decompose, and the mode is right,
530 then we mark the register as decomposable.
531 Otherwise we don't say anything about this register --
532 it could be decomposed, but whether that would be
533 profitable depends upon how it is used elsewhere.
535 We only set bits in the bitmap for multi-word
536 pseudo-registers, since those are the only ones we care about
537 and it keeps the size of the bitmaps down. */
539 regno = REGNO (x);
540 if (!HARD_REGISTER_NUM_P (regno)
541 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
543 switch (*pcmi)
545 case NOT_SIMPLE_MOVE:
546 bitmap_set_bit (non_decomposable_context, regno);
547 break;
548 case DECOMPOSABLE_SIMPLE_MOVE:
549 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
550 bitmap_set_bit (decomposable_context, regno);
551 break;
552 case SIMPLE_MOVE:
553 break;
554 default:
555 gcc_unreachable ();
559 else if (MEM_P (x))
561 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
563 /* Any registers used in a MEM do not participate in a
564 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
565 here, and return -1 to block the parent's recursion. */
566 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
567 return -1;
570 return 0;
573 /* Decompose REGNO into word-sized components. We smash the REG node
574 in place. This ensures that (1) something goes wrong quickly if we
575 fail to make some replacement, and (2) the debug information inside
576 the symbol table is automatically kept up to date. */
578 static void
579 decompose_register (unsigned int regno)
581 rtx reg;
582 unsigned int words, i;
583 rtvec v;
585 reg = regno_reg_rtx[regno];
587 regno_reg_rtx[regno] = NULL_RTX;
589 words = GET_MODE_SIZE (GET_MODE (reg));
590 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
592 v = rtvec_alloc (words);
593 for (i = 0; i < words; ++i)
594 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
596 PUT_CODE (reg, CONCATN);
597 XVEC (reg, 0) = v;
599 if (dump_file)
601 fprintf (dump_file, "; Splitting reg %u ->", regno);
602 for (i = 0; i < words; ++i)
603 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
604 fputc ('\n', dump_file);
608 /* Get a SUBREG of a CONCATN. */
610 static rtx
611 simplify_subreg_concatn (enum machine_mode outermode, rtx op,
612 unsigned int byte)
614 unsigned int inner_size;
615 enum machine_mode innermode, partmode;
616 rtx part;
617 unsigned int final_offset;
619 gcc_assert (GET_CODE (op) == CONCATN);
620 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
622 innermode = GET_MODE (op);
623 gcc_assert (byte < GET_MODE_SIZE (innermode));
624 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
626 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
627 part = XVECEXP (op, 0, byte / inner_size);
628 partmode = GET_MODE (part);
630 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
631 regular CONST_VECTORs. They have vector or integer modes, depending
632 on the capabilities of the target. Cope with them. */
633 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
634 partmode = GET_MODE_INNER (innermode);
635 else if (partmode == VOIDmode)
637 enum mode_class mclass = GET_MODE_CLASS (innermode);
638 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
641 final_offset = byte % inner_size;
642 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
643 return NULL_RTX;
645 return simplify_gen_subreg (outermode, part, partmode, final_offset);
648 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
650 static rtx
651 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
652 enum machine_mode innermode, unsigned int byte)
654 rtx ret;
656 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
657 If OP is a SUBREG of a CONCATN, then it must be a simple mode
658 change with the same size and offset 0, or it must extract a
659 part. We shouldn't see anything else here. */
660 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
662 rtx op2;
664 if ((GET_MODE_SIZE (GET_MODE (op))
665 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
666 && SUBREG_BYTE (op) == 0)
667 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
668 GET_MODE (SUBREG_REG (op)), byte);
670 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
671 SUBREG_BYTE (op));
672 if (op2 == NULL_RTX)
674 /* We don't handle paradoxical subregs here. */
675 gcc_assert (GET_MODE_SIZE (outermode)
676 <= GET_MODE_SIZE (GET_MODE (op)));
677 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
678 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
679 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
680 byte + SUBREG_BYTE (op));
681 gcc_assert (op2 != NULL_RTX);
682 return op2;
685 op = op2;
686 gcc_assert (op != NULL_RTX);
687 gcc_assert (innermode == GET_MODE (op));
690 if (GET_CODE (op) == CONCATN)
691 return simplify_subreg_concatn (outermode, op, byte);
693 ret = simplify_gen_subreg (outermode, op, innermode, byte);
695 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
696 resolve_simple_move will ask for the high part of the paradoxical
697 subreg, which does not have a value. Just return a zero. */
698 if (ret == NULL_RTX
699 && GET_CODE (op) == SUBREG
700 && SUBREG_BYTE (op) == 0
701 && (GET_MODE_SIZE (innermode)
702 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
703 return CONST0_RTX (outermode);
705 gcc_assert (ret != NULL_RTX);
706 return ret;
709 /* Return whether we should resolve X into the registers into which it
710 was decomposed. */
712 static bool
713 resolve_reg_p (rtx x)
715 return GET_CODE (x) == CONCATN;
718 /* Return whether X is a SUBREG of a register which we need to
719 resolve. */
721 static bool
722 resolve_subreg_p (rtx x)
724 if (GET_CODE (x) != SUBREG)
725 return false;
726 return resolve_reg_p (SUBREG_REG (x));
729 /* This is called via for_each_rtx. Look for SUBREGs which need to be
730 decomposed. */
732 static int
733 resolve_subreg_use (rtx *px, void *data)
735 rtx insn = (rtx) data;
736 rtx x = *px;
738 if (x == NULL_RTX)
739 return 0;
741 if (resolve_subreg_p (x))
743 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
744 SUBREG_BYTE (x));
746 /* It is possible for a note to contain a reference which we can
747 decompose. In this case, return 1 to the caller to indicate
748 that the note must be removed. */
749 if (!x)
751 gcc_assert (!insn);
752 return 1;
755 validate_change (insn, px, x, 1);
756 return -1;
759 if (resolve_reg_p (x))
761 /* Return 1 to the caller to indicate that we found a direct
762 reference to a register which is being decomposed. This can
763 happen inside notes, multiword shift or zero-extend
764 instructions. */
765 return 1;
768 return 0;
771 /* This is called via for_each_rtx. Look for SUBREGs which can be
772 decomposed and decomposed REGs that need copying. */
774 static int
775 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
777 rtx x = *px;
779 if (x == NULL_RTX)
780 return 0;
782 if (resolve_subreg_p (x))
784 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
785 SUBREG_BYTE (x));
787 if (x)
788 *px = x;
789 else
790 x = copy_rtx (*px);
793 if (resolve_reg_p (x))
794 *px = copy_rtx (x);
796 return 0;
799 /* Resolve any decomposed registers which appear in register notes on
800 INSN. */
802 static void
803 resolve_reg_notes (rtx_insn *insn)
805 rtx *pnote, note;
807 note = find_reg_equal_equiv_note (insn);
808 if (note)
810 int old_count = num_validated_changes ();
811 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
812 remove_note (insn, note);
813 else
814 if (old_count != num_validated_changes ())
815 df_notes_rescan (insn);
818 pnote = &REG_NOTES (insn);
819 while (*pnote != NULL_RTX)
821 bool del = false;
823 note = *pnote;
824 switch (REG_NOTE_KIND (note))
826 case REG_DEAD:
827 case REG_UNUSED:
828 if (resolve_reg_p (XEXP (note, 0)))
829 del = true;
830 break;
832 default:
833 break;
836 if (del)
837 *pnote = XEXP (note, 1);
838 else
839 pnote = &XEXP (note, 1);
843 /* Return whether X can be decomposed into subwords. */
845 static bool
846 can_decompose_p (rtx x)
848 if (REG_P (x))
850 unsigned int regno = REGNO (x);
852 if (HARD_REGISTER_NUM_P (regno))
854 unsigned int byte, num_bytes;
856 num_bytes = GET_MODE_SIZE (GET_MODE (x));
857 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
858 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
859 return false;
860 return true;
862 else
863 return !bitmap_bit_p (subreg_context, regno);
866 return true;
869 /* Decompose the registers used in a simple move SET within INSN. If
870 we don't change anything, return INSN, otherwise return the start
871 of the sequence of moves. */
873 static rtx_insn *
874 resolve_simple_move (rtx set, rtx_insn *insn)
876 rtx src, dest, real_dest;
877 rtx_insn *insns;
878 enum machine_mode orig_mode, dest_mode;
879 unsigned int words;
880 bool pushing;
882 src = SET_SRC (set);
883 dest = SET_DEST (set);
884 orig_mode = GET_MODE (dest);
886 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
887 gcc_assert (words > 1);
889 start_sequence ();
891 /* We have to handle copying from a SUBREG of a decomposed reg where
892 the SUBREG is larger than word size. Rather than assume that we
893 can take a word_mode SUBREG of the destination, we copy to a new
894 register and then copy that to the destination. */
896 real_dest = NULL_RTX;
898 if (GET_CODE (src) == SUBREG
899 && resolve_reg_p (SUBREG_REG (src))
900 && (SUBREG_BYTE (src) != 0
901 || (GET_MODE_SIZE (orig_mode)
902 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
904 real_dest = dest;
905 dest = gen_reg_rtx (orig_mode);
906 if (REG_P (real_dest))
907 REG_ATTRS (dest) = REG_ATTRS (real_dest);
910 /* Similarly if we are copying to a SUBREG of a decomposed reg where
911 the SUBREG is larger than word size. */
913 if (GET_CODE (dest) == SUBREG
914 && resolve_reg_p (SUBREG_REG (dest))
915 && (SUBREG_BYTE (dest) != 0
916 || (GET_MODE_SIZE (orig_mode)
917 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
919 rtx reg, smove;
920 rtx_insn *minsn;
922 reg = gen_reg_rtx (orig_mode);
923 minsn = emit_move_insn (reg, src);
924 smove = single_set (minsn);
925 gcc_assert (smove != NULL_RTX);
926 resolve_simple_move (smove, minsn);
927 src = reg;
930 /* If we didn't have any big SUBREGS of decomposed registers, and
931 neither side of the move is a register we are decomposing, then
932 we don't have to do anything here. */
934 if (src == SET_SRC (set)
935 && dest == SET_DEST (set)
936 && !resolve_reg_p (src)
937 && !resolve_subreg_p (src)
938 && !resolve_reg_p (dest)
939 && !resolve_subreg_p (dest))
941 end_sequence ();
942 return insn;
945 /* It's possible for the code to use a subreg of a decomposed
946 register while forming an address. We need to handle that before
947 passing the address to emit_move_insn. We pass NULL_RTX as the
948 insn parameter to resolve_subreg_use because we can not validate
949 the insn yet. */
950 if (MEM_P (src) || MEM_P (dest))
952 int acg;
954 if (MEM_P (src))
955 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
956 if (MEM_P (dest))
957 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
958 acg = apply_change_group ();
959 gcc_assert (acg);
962 /* If SRC is a register which we can't decompose, or has side
963 effects, we need to move via a temporary register. */
965 if (!can_decompose_p (src)
966 || side_effects_p (src)
967 || GET_CODE (src) == ASM_OPERANDS)
969 rtx reg;
971 reg = gen_reg_rtx (orig_mode);
973 #ifdef AUTO_INC_DEC
975 rtx move = emit_move_insn (reg, src);
976 if (MEM_P (src))
978 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
979 if (note)
980 add_reg_note (move, REG_INC, XEXP (note, 0));
983 #else
984 emit_move_insn (reg, src);
985 #endif
986 src = reg;
989 /* If DEST is a register which we can't decompose, or has side
990 effects, we need to first move to a temporary register. We
991 handle the common case of pushing an operand directly. We also
992 go through a temporary register if it holds a floating point
993 value. This gives us better code on systems which can't move
994 data easily between integer and floating point registers. */
996 dest_mode = orig_mode;
997 pushing = push_operand (dest, dest_mode);
998 if (!can_decompose_p (dest)
999 || (side_effects_p (dest) && !pushing)
1000 || (!SCALAR_INT_MODE_P (dest_mode)
1001 && !resolve_reg_p (dest)
1002 && !resolve_subreg_p (dest)))
1004 if (real_dest == NULL_RTX)
1005 real_dest = dest;
1006 if (!SCALAR_INT_MODE_P (dest_mode))
1008 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
1009 MODE_INT, 0);
1010 gcc_assert (dest_mode != BLKmode);
1012 dest = gen_reg_rtx (dest_mode);
1013 if (REG_P (real_dest))
1014 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1017 if (pushing)
1019 unsigned int i, j, jinc;
1021 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1022 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1023 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1025 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1027 j = 0;
1028 jinc = 1;
1030 else
1032 j = words - 1;
1033 jinc = -1;
1036 for (i = 0; i < words; ++i, j += jinc)
1038 rtx temp;
1040 temp = copy_rtx (XEXP (dest, 0));
1041 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1042 j * UNITS_PER_WORD);
1043 emit_move_insn (temp,
1044 simplify_gen_subreg_concatn (word_mode, src,
1045 orig_mode,
1046 j * UNITS_PER_WORD));
1049 else
1051 unsigned int i;
1053 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1054 emit_clobber (dest);
1056 for (i = 0; i < words; ++i)
1057 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1058 dest_mode,
1059 i * UNITS_PER_WORD),
1060 simplify_gen_subreg_concatn (word_mode, src,
1061 orig_mode,
1062 i * UNITS_PER_WORD));
1065 if (real_dest != NULL_RTX)
1067 rtx mdest, smove;
1068 rtx_insn *minsn;
1070 if (dest_mode == orig_mode)
1071 mdest = dest;
1072 else
1073 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1074 minsn = emit_move_insn (real_dest, mdest);
1076 #ifdef AUTO_INC_DEC
1077 if (MEM_P (real_dest)
1078 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1080 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1081 if (note)
1082 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1084 #endif
1086 smove = single_set (minsn);
1087 gcc_assert (smove != NULL_RTX);
1089 resolve_simple_move (smove, minsn);
1092 insns = get_insns ();
1093 end_sequence ();
1095 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1097 emit_insn_before (insns, insn);
1099 /* If we get here via self-recursion, then INSN is not yet in the insns
1100 chain and delete_insn will fail. We only want to remove INSN from the
1101 current sequence. See PR56738. */
1102 if (in_sequence_p ())
1103 remove_insn (insn);
1104 else
1105 delete_insn (insn);
1107 return insns;
1110 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1111 component registers. Return whether we changed something. */
1113 static bool
1114 resolve_clobber (rtx pat, rtx_insn *insn)
1116 rtx reg;
1117 enum machine_mode orig_mode;
1118 unsigned int words, i;
1119 int ret;
1121 reg = XEXP (pat, 0);
1122 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1123 return false;
1125 orig_mode = GET_MODE (reg);
1126 words = GET_MODE_SIZE (orig_mode);
1127 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1129 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1130 simplify_gen_subreg_concatn (word_mode, reg,
1131 orig_mode, 0),
1133 df_insn_rescan (insn);
1134 gcc_assert (ret != 0);
1136 for (i = words - 1; i > 0; --i)
1138 rtx x;
1140 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1141 i * UNITS_PER_WORD);
1142 x = gen_rtx_CLOBBER (VOIDmode, x);
1143 emit_insn_after (x, insn);
1146 resolve_reg_notes (insn);
1148 return true;
1151 /* A USE of a decomposed register is no longer meaningful. Return
1152 whether we changed something. */
1154 static bool
1155 resolve_use (rtx pat, rtx_insn *insn)
1157 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1159 delete_insn (insn);
1160 return true;
1163 resolve_reg_notes (insn);
1165 return false;
1168 /* A VAR_LOCATION can be simplified. */
1170 static void
1171 resolve_debug (rtx_insn *insn)
1173 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1175 df_insn_rescan (insn);
1177 resolve_reg_notes (insn);
1180 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1181 set the decomposable_context bitmap accordingly. SPEED_P is true
1182 if we are optimizing INSN for speed rather than size. Return true
1183 if INSN is decomposable. */
1185 static bool
1186 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1188 rtx set;
1189 rtx op;
1190 rtx op_operand;
1192 set = single_set (insn);
1193 if (!set)
1194 return false;
1196 op = SET_SRC (set);
1197 if (GET_CODE (op) != ASHIFT
1198 && GET_CODE (op) != LSHIFTRT
1199 && GET_CODE (op) != ASHIFTRT
1200 && GET_CODE (op) != ZERO_EXTEND)
1201 return false;
1203 op_operand = XEXP (op, 0);
1204 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1205 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1206 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1207 || GET_MODE (op) != twice_word_mode)
1208 return false;
1210 if (GET_CODE (op) == ZERO_EXTEND)
1212 if (GET_MODE (op_operand) != word_mode
1213 || !choices[speed_p].splitting_zext)
1214 return false;
1216 else /* left or right shift */
1218 bool *splitting = (GET_CODE (op) == ASHIFT
1219 ? choices[speed_p].splitting_ashift
1220 : GET_CODE (op) == ASHIFTRT
1221 ? choices[speed_p].splitting_ashiftrt
1222 : choices[speed_p].splitting_lshiftrt);
1223 if (!CONST_INT_P (XEXP (op, 1))
1224 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1225 2 * BITS_PER_WORD - 1)
1226 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1227 return false;
1229 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1232 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1234 return true;
1237 /* Decompose a more than word wide shift (in INSN) of a multiword
1238 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1239 and 'set to zero' insn. Return a pointer to the new insn when a
1240 replacement was done. */
1242 static rtx_insn *
1243 resolve_shift_zext (rtx_insn *insn)
1245 rtx set;
1246 rtx op;
1247 rtx op_operand;
1248 rtx_insn *insns;
1249 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1250 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1252 set = single_set (insn);
1253 if (!set)
1254 return NULL;
1256 op = SET_SRC (set);
1257 if (GET_CODE (op) != ASHIFT
1258 && GET_CODE (op) != LSHIFTRT
1259 && GET_CODE (op) != ASHIFTRT
1260 && GET_CODE (op) != ZERO_EXTEND)
1261 return NULL;
1263 op_operand = XEXP (op, 0);
1265 /* We can tear this operation apart only if the regs were already
1266 torn apart. */
1267 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1268 return NULL;
1270 /* src_reg_num is the number of the word mode register which we
1271 are operating on. For a left shift and a zero_extend on little
1272 endian machines this is register 0. */
1273 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1274 ? 1 : 0;
1276 if (WORDS_BIG_ENDIAN
1277 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1278 src_reg_num = 1 - src_reg_num;
1280 if (GET_CODE (op) == ZERO_EXTEND)
1281 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1282 else
1283 dest_reg_num = 1 - src_reg_num;
1285 offset1 = UNITS_PER_WORD * dest_reg_num;
1286 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1287 src_offset = UNITS_PER_WORD * src_reg_num;
1289 start_sequence ();
1291 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1292 GET_MODE (SET_DEST (set)),
1293 offset1);
1294 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1295 GET_MODE (SET_DEST (set)),
1296 offset2);
1297 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1298 GET_MODE (op_operand),
1299 src_offset);
1300 if (GET_CODE (op) == ASHIFTRT
1301 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1302 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1303 BITS_PER_WORD - 1, NULL_RTX, 0);
1305 if (GET_CODE (op) != ZERO_EXTEND)
1307 int shift_count = INTVAL (XEXP (op, 1));
1308 if (shift_count > BITS_PER_WORD)
1309 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1310 LSHIFT_EXPR : RSHIFT_EXPR,
1311 word_mode, src_reg,
1312 shift_count - BITS_PER_WORD,
1313 dest_reg, GET_CODE (op) != ASHIFTRT);
1316 if (dest_reg != src_reg)
1317 emit_move_insn (dest_reg, src_reg);
1318 if (GET_CODE (op) != ASHIFTRT)
1319 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1320 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1321 emit_move_insn (dest_upper, copy_rtx (src_reg));
1322 else
1323 emit_move_insn (dest_upper, upper_src);
1324 insns = get_insns ();
1326 end_sequence ();
1328 emit_insn_before (insns, insn);
1330 if (dump_file)
1332 rtx_insn *in;
1333 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1334 for (in = insns; in != insn; in = NEXT_INSN (in))
1335 fprintf (dump_file, "%d ", INSN_UID (in));
1336 fprintf (dump_file, "\n");
1339 delete_insn (insn);
1340 return insns;
1343 /* Print to dump_file a description of what we're doing with shift code CODE.
1344 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1346 static void
1347 dump_shift_choices (enum rtx_code code, bool *splitting)
1349 int i;
1350 const char *sep;
1352 fprintf (dump_file,
1353 " Splitting mode %s for %s lowering with shift amounts = ",
1354 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1355 sep = "";
1356 for (i = 0; i < BITS_PER_WORD; i++)
1357 if (splitting[i])
1359 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1360 sep = ",";
1362 fprintf (dump_file, "\n");
1365 /* Print to dump_file a description of what we're doing when optimizing
1366 for speed or size; SPEED_P says which. DESCRIPTION is a description
1367 of the SPEED_P choice. */
1369 static void
1370 dump_choices (bool speed_p, const char *description)
1372 unsigned int i;
1374 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1376 for (i = 0; i < MAX_MACHINE_MODE; i++)
1377 if (GET_MODE_SIZE ((enum machine_mode) i) > UNITS_PER_WORD)
1378 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1379 choices[speed_p].move_modes_to_split[i]
1380 ? "Splitting"
1381 : "Skipping",
1382 GET_MODE_NAME ((enum machine_mode) i));
1384 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1385 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1386 GET_MODE_NAME (twice_word_mode));
1388 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1389 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1390 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1391 fprintf (dump_file, "\n");
1394 /* Look for registers which are always accessed via word-sized SUBREGs
1395 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1396 registers into several word-sized pseudo-registers. */
1398 static void
1399 decompose_multiword_subregs (bool decompose_copies)
1401 unsigned int max;
1402 basic_block bb;
1403 bool speed_p;
1405 if (dump_file)
1407 dump_choices (false, "size");
1408 dump_choices (true, "speed");
1411 /* Check if this target even has any modes to consider lowering. */
1412 if (!choices[false].something_to_do && !choices[true].something_to_do)
1414 if (dump_file)
1415 fprintf (dump_file, "Nothing to do!\n");
1416 return;
1419 max = max_reg_num ();
1421 /* First see if there are any multi-word pseudo-registers. If there
1422 aren't, there is nothing we can do. This should speed up this
1423 pass in the normal case, since it should be faster than scanning
1424 all the insns. */
1426 unsigned int i;
1427 bool useful_modes_seen = false;
1429 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1430 if (regno_reg_rtx[i] != NULL)
1432 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1433 if (choices[false].move_modes_to_split[(int) mode]
1434 || choices[true].move_modes_to_split[(int) mode])
1436 useful_modes_seen = true;
1437 break;
1441 if (!useful_modes_seen)
1443 if (dump_file)
1444 fprintf (dump_file, "Nothing to lower in this function.\n");
1445 return;
1449 if (df)
1451 df_set_flags (DF_DEFER_INSN_RESCAN);
1452 run_word_dce ();
1455 /* FIXME: It may be possible to change this code to look for each
1456 multi-word pseudo-register and to find each insn which sets or
1457 uses that register. That should be faster than scanning all the
1458 insns. */
1460 decomposable_context = BITMAP_ALLOC (NULL);
1461 non_decomposable_context = BITMAP_ALLOC (NULL);
1462 subreg_context = BITMAP_ALLOC (NULL);
1464 reg_copy_graph.create (max);
1465 reg_copy_graph.safe_grow_cleared (max);
1466 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1468 speed_p = optimize_function_for_speed_p (cfun);
1469 FOR_EACH_BB_FN (bb, cfun)
1471 rtx_insn *insn;
1473 FOR_BB_INSNS (bb, insn)
1475 rtx set;
1476 enum classify_move_insn cmi;
1477 int i, n;
1479 if (!INSN_P (insn)
1480 || GET_CODE (PATTERN (insn)) == CLOBBER
1481 || GET_CODE (PATTERN (insn)) == USE)
1482 continue;
1484 recog_memoized (insn);
1486 if (find_decomposable_shift_zext (insn, speed_p))
1487 continue;
1489 extract_insn (insn);
1491 set = simple_move (insn, speed_p);
1493 if (!set)
1494 cmi = NOT_SIMPLE_MOVE;
1495 else
1497 /* We mark pseudo-to-pseudo copies as decomposable during the
1498 second pass only. The first pass is so early that there is
1499 good chance such moves will be optimized away completely by
1500 subsequent optimizations anyway.
1502 However, we call find_pseudo_copy even during the first pass
1503 so as to properly set up the reg_copy_graph. */
1504 if (find_pseudo_copy (set))
1505 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1506 else
1507 cmi = SIMPLE_MOVE;
1510 n = recog_data.n_operands;
1511 for (i = 0; i < n; ++i)
1513 for_each_rtx (&recog_data.operand[i],
1514 find_decomposable_subregs,
1515 &cmi);
1517 /* We handle ASM_OPERANDS as a special case to support
1518 things like x86 rdtsc which returns a DImode value.
1519 We can decompose the output, which will certainly be
1520 operand 0, but not the inputs. */
1522 if (cmi == SIMPLE_MOVE
1523 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1525 gcc_assert (i == 0);
1526 cmi = NOT_SIMPLE_MOVE;
1532 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1533 if (!bitmap_empty_p (decomposable_context))
1535 sbitmap sub_blocks;
1536 unsigned int i;
1537 sbitmap_iterator sbi;
1538 bitmap_iterator iter;
1539 unsigned int regno;
1541 propagate_pseudo_copies ();
1543 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1544 bitmap_clear (sub_blocks);
1546 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1547 decompose_register (regno);
1549 FOR_EACH_BB_FN (bb, cfun)
1551 rtx_insn *insn;
1553 FOR_BB_INSNS (bb, insn)
1555 rtx pat;
1557 if (!INSN_P (insn))
1558 continue;
1560 pat = PATTERN (insn);
1561 if (GET_CODE (pat) == CLOBBER)
1562 resolve_clobber (pat, insn);
1563 else if (GET_CODE (pat) == USE)
1564 resolve_use (pat, insn);
1565 else if (DEBUG_INSN_P (insn))
1566 resolve_debug (insn);
1567 else
1569 rtx set;
1570 int i;
1572 recog_memoized (insn);
1573 extract_insn (insn);
1575 set = simple_move (insn, speed_p);
1576 if (set)
1578 rtx_insn *orig_insn = insn;
1579 bool cfi = control_flow_insn_p (insn);
1581 /* We can end up splitting loads to multi-word pseudos
1582 into separate loads to machine word size pseudos.
1583 When this happens, we first had one load that can
1584 throw, and after resolve_simple_move we'll have a
1585 bunch of loads (at least two). All those loads may
1586 trap if we can have non-call exceptions, so they
1587 all will end the current basic block. We split the
1588 block after the outer loop over all insns, but we
1589 make sure here that we will be able to split the
1590 basic block and still produce the correct control
1591 flow graph for it. */
1592 gcc_assert (!cfi
1593 || (cfun->can_throw_non_call_exceptions
1594 && can_throw_internal (insn)));
1596 insn = resolve_simple_move (set, insn);
1597 if (insn != orig_insn)
1599 recog_memoized (insn);
1600 extract_insn (insn);
1602 if (cfi)
1603 bitmap_set_bit (sub_blocks, bb->index);
1606 else
1608 rtx_insn *decomposed_shift;
1610 decomposed_shift = resolve_shift_zext (insn);
1611 if (decomposed_shift != NULL_RTX)
1613 insn = decomposed_shift;
1614 recog_memoized (insn);
1615 extract_insn (insn);
1619 for (i = recog_data.n_operands - 1; i >= 0; --i)
1620 for_each_rtx (recog_data.operand_loc[i],
1621 resolve_subreg_use,
1622 insn);
1624 resolve_reg_notes (insn);
1626 if (num_validated_changes () > 0)
1628 for (i = recog_data.n_dups - 1; i >= 0; --i)
1630 rtx *pl = recog_data.dup_loc[i];
1631 int dup_num = recog_data.dup_num[i];
1632 rtx *px = recog_data.operand_loc[dup_num];
1634 validate_unshare_change (insn, pl, *px, 1);
1637 i = apply_change_group ();
1638 gcc_assert (i);
1644 /* If we had insns to split that caused control flow insns in the middle
1645 of a basic block, split those blocks now. Note that we only handle
1646 the case where splitting a load has caused multiple possibly trapping
1647 loads to appear. */
1648 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1650 rtx_insn *insn, *end;
1651 edge fallthru;
1653 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1654 insn = BB_HEAD (bb);
1655 end = BB_END (bb);
1657 while (insn != end)
1659 if (control_flow_insn_p (insn))
1661 /* Split the block after insn. There will be a fallthru
1662 edge, which is OK so we keep it. We have to create the
1663 exception edges ourselves. */
1664 fallthru = split_block (bb, insn);
1665 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1666 bb = fallthru->dest;
1667 insn = BB_HEAD (bb);
1669 else
1670 insn = NEXT_INSN (insn);
1674 sbitmap_free (sub_blocks);
1678 unsigned int i;
1679 bitmap b;
1681 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1682 if (b)
1683 BITMAP_FREE (b);
1686 reg_copy_graph.release ();
1688 BITMAP_FREE (decomposable_context);
1689 BITMAP_FREE (non_decomposable_context);
1690 BITMAP_FREE (subreg_context);
1693 /* Implement first lower subreg pass. */
1695 namespace {
1697 const pass_data pass_data_lower_subreg =
1699 RTL_PASS, /* type */
1700 "subreg1", /* name */
1701 OPTGROUP_NONE, /* optinfo_flags */
1702 TV_LOWER_SUBREG, /* tv_id */
1703 0, /* properties_required */
1704 0, /* properties_provided */
1705 0, /* properties_destroyed */
1706 0, /* todo_flags_start */
1707 0, /* todo_flags_finish */
1710 class pass_lower_subreg : public rtl_opt_pass
1712 public:
1713 pass_lower_subreg (gcc::context *ctxt)
1714 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1717 /* opt_pass methods: */
1718 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1719 virtual unsigned int execute (function *)
1721 decompose_multiword_subregs (false);
1722 return 0;
1725 }; // class pass_lower_subreg
1727 } // anon namespace
1729 rtl_opt_pass *
1730 make_pass_lower_subreg (gcc::context *ctxt)
1732 return new pass_lower_subreg (ctxt);
1735 /* Implement second lower subreg pass. */
1737 namespace {
1739 const pass_data pass_data_lower_subreg2 =
1741 RTL_PASS, /* type */
1742 "subreg2", /* name */
1743 OPTGROUP_NONE, /* optinfo_flags */
1744 TV_LOWER_SUBREG, /* tv_id */
1745 0, /* properties_required */
1746 0, /* properties_provided */
1747 0, /* properties_destroyed */
1748 0, /* todo_flags_start */
1749 TODO_df_finish, /* todo_flags_finish */
1752 class pass_lower_subreg2 : public rtl_opt_pass
1754 public:
1755 pass_lower_subreg2 (gcc::context *ctxt)
1756 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1759 /* opt_pass methods: */
1760 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1761 virtual unsigned int execute (function *)
1763 decompose_multiword_subregs (true);
1764 return 0;
1767 }; // class pass_lower_subreg2
1769 } // anon namespace
1771 rtl_opt_pass *
1772 make_pass_lower_subreg2 (gcc::context *ctxt)
1774 return new pass_lower_subreg2 (ctxt);