2015-05-01 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc.git] / gcc / lower-subreg.c
blob9d5fd3be31f34106716e195ecb97fd1e0c5367dc
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "vec.h"
29 #include "double-int.h"
30 #include "input.h"
31 #include "alias.h"
32 #include "symtab.h"
33 #include "wide-int.h"
34 #include "inchash.h"
35 #include "tree.h"
36 #include "rtl.h"
37 #include "tm_p.h"
38 #include "flags.h"
39 #include "insn-config.h"
40 #include "obstack.h"
41 #include "predict.h"
42 #include "hard-reg-set.h"
43 #include "function.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "cfgrtl.h"
47 #include "cfgbuild.h"
48 #include "basic-block.h"
49 #include "recog.h"
50 #include "bitmap.h"
51 #include "dce.h"
52 #include "hashtab.h"
53 #include "statistics.h"
54 #include "real.h"
55 #include "fixed-value.h"
56 #include "expmed.h"
57 #include "dojump.h"
58 #include "explow.h"
59 #include "calls.h"
60 #include "emit-rtl.h"
61 #include "varasm.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "except.h"
65 #include "regs.h"
66 #include "tree-pass.h"
67 #include "df.h"
68 #include "lower-subreg.h"
69 #include "rtl-iter.h"
71 #ifdef STACK_GROWS_DOWNWARD
72 # undef STACK_GROWS_DOWNWARD
73 # define STACK_GROWS_DOWNWARD 1
74 #else
75 # define STACK_GROWS_DOWNWARD 0
76 #endif
79 /* Decompose multi-word pseudo-registers into individual
80 pseudo-registers when possible and profitable. This is possible
81 when all the uses of a multi-word register are via SUBREG, or are
82 copies of the register to another location. Breaking apart the
83 register permits more CSE and permits better register allocation.
84 This is profitable if the machine does not have move instructions
85 to do this.
87 This pass only splits moves with modes that are wider than
88 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
89 integer modes that are twice the width of word_mode. The latter
90 could be generalized if there was a need to do this, but the trend in
91 architectures is to not need this.
93 There are two useful preprocessor defines for use by maintainers:
95 #define LOG_COSTS 1
97 if you wish to see the actual cost estimates that are being used
98 for each mode wider than word mode and the cost estimates for zero
99 extension and the shifts. This can be useful when port maintainers
100 are tuning insn rtx costs.
102 #define FORCE_LOWERING 1
104 if you wish to test the pass with all the transformation forced on.
105 This can be useful for finding bugs in the transformations. */
107 #define LOG_COSTS 0
108 #define FORCE_LOWERING 0
110 /* Bit N in this bitmap is set if regno N is used in a context in
111 which we can decompose it. */
112 static bitmap decomposable_context;
114 /* Bit N in this bitmap is set if regno N is used in a context in
115 which it can not be decomposed. */
116 static bitmap non_decomposable_context;
118 /* Bit N in this bitmap is set if regno N is used in a subreg
119 which changes the mode but not the size. This typically happens
120 when the register accessed as a floating-point value; we want to
121 avoid generating accesses to its subwords in integer modes. */
122 static bitmap subreg_context;
124 /* Bit N in the bitmap in element M of this array is set if there is a
125 copy from reg M to reg N. */
126 static vec<bitmap> reg_copy_graph;
128 struct target_lower_subreg default_target_lower_subreg;
129 #if SWITCHABLE_TARGET
130 struct target_lower_subreg *this_target_lower_subreg
131 = &default_target_lower_subreg;
132 #endif
134 #define twice_word_mode \
135 this_target_lower_subreg->x_twice_word_mode
136 #define choices \
137 this_target_lower_subreg->x_choices
139 /* RTXes used while computing costs. */
140 struct cost_rtxes {
141 /* Source and target registers. */
142 rtx source;
143 rtx target;
145 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
146 rtx zext;
148 /* A shift of SOURCE. */
149 rtx shift;
151 /* A SET of TARGET. */
152 rtx set;
155 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
156 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
158 static int
159 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
160 machine_mode mode, int op1)
162 PUT_CODE (rtxes->shift, code);
163 PUT_MODE (rtxes->shift, mode);
164 PUT_MODE (rtxes->source, mode);
165 XEXP (rtxes->shift, 1) = GEN_INT (op1);
166 return set_src_cost (rtxes->shift, speed_p);
169 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
170 to true if it is profitable to split a double-word CODE shift
171 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
172 for speed or size profitability.
174 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
175 the cost of moving zero into a word-mode register. WORD_MOVE_COST
176 is the cost of moving between word registers. */
178 static void
179 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
180 bool *splitting, enum rtx_code code,
181 int word_move_zero_cost, int word_move_cost)
183 int wide_cost, narrow_cost, upper_cost, i;
185 for (i = 0; i < BITS_PER_WORD; i++)
187 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
188 i + BITS_PER_WORD);
189 if (i == 0)
190 narrow_cost = word_move_cost;
191 else
192 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
194 if (code != ASHIFTRT)
195 upper_cost = word_move_zero_cost;
196 else if (i == BITS_PER_WORD - 1)
197 upper_cost = word_move_cost;
198 else
199 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
200 BITS_PER_WORD - 1);
202 if (LOG_COSTS)
203 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
204 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
205 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
207 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
208 splitting[i] = true;
212 /* Compute what we should do when optimizing for speed or size; SPEED_P
213 selects which. Use RTXES for computing costs. */
215 static void
216 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
218 unsigned int i;
219 int word_move_zero_cost, word_move_cost;
221 PUT_MODE (rtxes->target, word_mode);
222 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
223 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
225 SET_SRC (rtxes->set) = rtxes->source;
226 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
228 if (LOG_COSTS)
229 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
230 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
232 for (i = 0; i < MAX_MACHINE_MODE; i++)
234 machine_mode mode = (machine_mode) i;
235 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
236 if (factor > 1)
238 int mode_move_cost;
240 PUT_MODE (rtxes->target, mode);
241 PUT_MODE (rtxes->source, mode);
242 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
244 if (LOG_COSTS)
245 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
246 GET_MODE_NAME (mode), mode_move_cost,
247 word_move_cost, factor);
249 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
251 choices[speed_p].move_modes_to_split[i] = true;
252 choices[speed_p].something_to_do = true;
257 /* For the moves and shifts, the only case that is checked is one
258 where the mode of the target is an integer mode twice the width
259 of the word_mode.
261 If it is not profitable to split a double word move then do not
262 even consider the shifts or the zero extension. */
263 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
265 int zext_cost;
267 /* The only case here to check to see if moving the upper part with a
268 zero is cheaper than doing the zext itself. */
269 PUT_MODE (rtxes->source, word_mode);
270 zext_cost = set_src_cost (rtxes->zext, speed_p);
272 if (LOG_COSTS)
273 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
274 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
275 zext_cost, word_move_cost, word_move_zero_cost);
277 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
278 choices[speed_p].splitting_zext = true;
280 compute_splitting_shift (speed_p, rtxes,
281 choices[speed_p].splitting_ashift, ASHIFT,
282 word_move_zero_cost, word_move_cost);
283 compute_splitting_shift (speed_p, rtxes,
284 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
285 word_move_zero_cost, word_move_cost);
286 compute_splitting_shift (speed_p, rtxes,
287 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
288 word_move_zero_cost, word_move_cost);
292 /* Do one-per-target initialisation. This involves determining
293 which operations on the machine are profitable. If none are found,
294 then the pass just returns when called. */
296 void
297 init_lower_subreg (void)
299 struct cost_rtxes rtxes;
301 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
303 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
305 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
306 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
307 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
308 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
309 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
311 if (LOG_COSTS)
312 fprintf (stderr, "\nSize costs\n==========\n\n");
313 compute_costs (false, &rtxes);
315 if (LOG_COSTS)
316 fprintf (stderr, "\nSpeed costs\n===========\n\n");
317 compute_costs (true, &rtxes);
320 static bool
321 simple_move_operand (rtx x)
323 if (GET_CODE (x) == SUBREG)
324 x = SUBREG_REG (x);
326 if (!OBJECT_P (x))
327 return false;
329 if (GET_CODE (x) == LABEL_REF
330 || GET_CODE (x) == SYMBOL_REF
331 || GET_CODE (x) == HIGH
332 || GET_CODE (x) == CONST)
333 return false;
335 if (MEM_P (x)
336 && (MEM_VOLATILE_P (x)
337 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
338 return false;
340 return true;
343 /* If INSN is a single set between two objects that we want to split,
344 return the single set. SPEED_P says whether we are optimizing
345 INSN for speed or size.
347 INSN should have been passed to recog and extract_insn before this
348 is called. */
350 static rtx
351 simple_move (rtx_insn *insn, bool speed_p)
353 rtx x;
354 rtx set;
355 machine_mode mode;
357 if (recog_data.n_operands != 2)
358 return NULL_RTX;
360 set = single_set (insn);
361 if (!set)
362 return NULL_RTX;
364 x = SET_DEST (set);
365 if (x != recog_data.operand[0] && x != recog_data.operand[1])
366 return NULL_RTX;
367 if (!simple_move_operand (x))
368 return NULL_RTX;
370 x = SET_SRC (set);
371 if (x != recog_data.operand[0] && x != recog_data.operand[1])
372 return NULL_RTX;
373 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
374 things like x86 rdtsc which returns a DImode value. */
375 if (GET_CODE (x) != ASM_OPERANDS
376 && !simple_move_operand (x))
377 return NULL_RTX;
379 /* We try to decompose in integer modes, to avoid generating
380 inefficient code copying between integer and floating point
381 registers. That means that we can't decompose if this is a
382 non-integer mode for which there is no integer mode of the same
383 size. */
384 mode = GET_MODE (SET_DEST (set));
385 if (!SCALAR_INT_MODE_P (mode)
386 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
387 == BLKmode))
388 return NULL_RTX;
390 /* Reject PARTIAL_INT modes. They are used for processor specific
391 purposes and it's probably best not to tamper with them. */
392 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
393 return NULL_RTX;
395 if (!choices[speed_p].move_modes_to_split[(int) mode])
396 return NULL_RTX;
398 return set;
401 /* If SET is a copy from one multi-word pseudo-register to another,
402 record that in reg_copy_graph. Return whether it is such a
403 copy. */
405 static bool
406 find_pseudo_copy (rtx set)
408 rtx dest = SET_DEST (set);
409 rtx src = SET_SRC (set);
410 unsigned int rd, rs;
411 bitmap b;
413 if (!REG_P (dest) || !REG_P (src))
414 return false;
416 rd = REGNO (dest);
417 rs = REGNO (src);
418 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
419 return false;
421 b = reg_copy_graph[rs];
422 if (b == NULL)
424 b = BITMAP_ALLOC (NULL);
425 reg_copy_graph[rs] = b;
428 bitmap_set_bit (b, rd);
430 return true;
433 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
434 where they are copied to another register, add the register to
435 which they are copied to DECOMPOSABLE_CONTEXT. Use
436 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
437 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
439 static void
440 propagate_pseudo_copies (void)
442 bitmap queue, propagate;
444 queue = BITMAP_ALLOC (NULL);
445 propagate = BITMAP_ALLOC (NULL);
447 bitmap_copy (queue, decomposable_context);
450 bitmap_iterator iter;
451 unsigned int i;
453 bitmap_clear (propagate);
455 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
457 bitmap b = reg_copy_graph[i];
458 if (b)
459 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
462 bitmap_and_compl (queue, propagate, decomposable_context);
463 bitmap_ior_into (decomposable_context, propagate);
465 while (!bitmap_empty_p (queue));
467 BITMAP_FREE (queue);
468 BITMAP_FREE (propagate);
471 /* A pointer to one of these values is passed to
472 find_decomposable_subregs. */
474 enum classify_move_insn
476 /* Not a simple move from one location to another. */
477 NOT_SIMPLE_MOVE,
478 /* A simple move we want to decompose. */
479 DECOMPOSABLE_SIMPLE_MOVE,
480 /* Any other simple move. */
481 SIMPLE_MOVE
484 /* If we find a SUBREG in *LOC which we could use to decompose a
485 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
486 unadorned register which is not a simple pseudo-register copy,
487 DATA will point at the type of move, and we set a bit in
488 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
490 static void
491 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
493 subrtx_var_iterator::array_type array;
494 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
496 rtx x = *iter;
497 if (GET_CODE (x) == SUBREG)
499 rtx inner = SUBREG_REG (x);
500 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
502 if (!REG_P (inner))
503 continue;
505 regno = REGNO (inner);
506 if (HARD_REGISTER_NUM_P (regno))
508 iter.skip_subrtxes ();
509 continue;
512 outer_size = GET_MODE_SIZE (GET_MODE (x));
513 inner_size = GET_MODE_SIZE (GET_MODE (inner));
514 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
515 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
517 /* We only try to decompose single word subregs of multi-word
518 registers. When we find one, we return -1 to avoid iterating
519 over the inner register.
521 ??? This doesn't allow, e.g., DImode subregs of TImode values
522 on 32-bit targets. We would need to record the way the
523 pseudo-register was used, and only decompose if all the uses
524 were the same number and size of pieces. Hopefully this
525 doesn't happen much. */
527 if (outer_words == 1 && inner_words > 1)
529 bitmap_set_bit (decomposable_context, regno);
530 iter.skip_subrtxes ();
531 continue;
534 /* If this is a cast from one mode to another, where the modes
535 have the same size, and they are not tieable, then mark this
536 register as non-decomposable. If we decompose it we are
537 likely to mess up whatever the backend is trying to do. */
538 if (outer_words > 1
539 && outer_size == inner_size
540 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
542 bitmap_set_bit (non_decomposable_context, regno);
543 bitmap_set_bit (subreg_context, regno);
544 iter.skip_subrtxes ();
545 continue;
548 else if (REG_P (x))
550 unsigned int regno;
552 /* We will see an outer SUBREG before we see the inner REG, so
553 when we see a plain REG here it means a direct reference to
554 the register.
556 If this is not a simple copy from one location to another,
557 then we can not decompose this register. If this is a simple
558 copy we want to decompose, and the mode is right,
559 then we mark the register as decomposable.
560 Otherwise we don't say anything about this register --
561 it could be decomposed, but whether that would be
562 profitable depends upon how it is used elsewhere.
564 We only set bits in the bitmap for multi-word
565 pseudo-registers, since those are the only ones we care about
566 and it keeps the size of the bitmaps down. */
568 regno = REGNO (x);
569 if (!HARD_REGISTER_NUM_P (regno)
570 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
572 switch (*pcmi)
574 case NOT_SIMPLE_MOVE:
575 bitmap_set_bit (non_decomposable_context, regno);
576 break;
577 case DECOMPOSABLE_SIMPLE_MOVE:
578 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
579 bitmap_set_bit (decomposable_context, regno);
580 break;
581 case SIMPLE_MOVE:
582 break;
583 default:
584 gcc_unreachable ();
588 else if (MEM_P (x))
590 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
592 /* Any registers used in a MEM do not participate in a
593 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
594 here, and return -1 to block the parent's recursion. */
595 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
596 iter.skip_subrtxes ();
601 /* Decompose REGNO into word-sized components. We smash the REG node
602 in place. This ensures that (1) something goes wrong quickly if we
603 fail to make some replacement, and (2) the debug information inside
604 the symbol table is automatically kept up to date. */
606 static void
607 decompose_register (unsigned int regno)
609 rtx reg;
610 unsigned int words, i;
611 rtvec v;
613 reg = regno_reg_rtx[regno];
615 regno_reg_rtx[regno] = NULL_RTX;
617 words = GET_MODE_SIZE (GET_MODE (reg));
618 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
620 v = rtvec_alloc (words);
621 for (i = 0; i < words; ++i)
622 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
624 PUT_CODE (reg, CONCATN);
625 XVEC (reg, 0) = v;
627 if (dump_file)
629 fprintf (dump_file, "; Splitting reg %u ->", regno);
630 for (i = 0; i < words; ++i)
631 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
632 fputc ('\n', dump_file);
636 /* Get a SUBREG of a CONCATN. */
638 static rtx
639 simplify_subreg_concatn (machine_mode outermode, rtx op,
640 unsigned int byte)
642 unsigned int inner_size;
643 machine_mode innermode, partmode;
644 rtx part;
645 unsigned int final_offset;
647 gcc_assert (GET_CODE (op) == CONCATN);
648 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
650 innermode = GET_MODE (op);
651 gcc_assert (byte < GET_MODE_SIZE (innermode));
652 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
654 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
655 part = XVECEXP (op, 0, byte / inner_size);
656 partmode = GET_MODE (part);
658 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
659 regular CONST_VECTORs. They have vector or integer modes, depending
660 on the capabilities of the target. Cope with them. */
661 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
662 partmode = GET_MODE_INNER (innermode);
663 else if (partmode == VOIDmode)
665 enum mode_class mclass = GET_MODE_CLASS (innermode);
666 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
669 final_offset = byte % inner_size;
670 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
671 return NULL_RTX;
673 return simplify_gen_subreg (outermode, part, partmode, final_offset);
676 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
678 static rtx
679 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
680 machine_mode innermode, unsigned int byte)
682 rtx ret;
684 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
685 If OP is a SUBREG of a CONCATN, then it must be a simple mode
686 change with the same size and offset 0, or it must extract a
687 part. We shouldn't see anything else here. */
688 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
690 rtx op2;
692 if ((GET_MODE_SIZE (GET_MODE (op))
693 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
694 && SUBREG_BYTE (op) == 0)
695 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
696 GET_MODE (SUBREG_REG (op)), byte);
698 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
699 SUBREG_BYTE (op));
700 if (op2 == NULL_RTX)
702 /* We don't handle paradoxical subregs here. */
703 gcc_assert (GET_MODE_SIZE (outermode)
704 <= GET_MODE_SIZE (GET_MODE (op)));
705 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
706 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
707 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
708 byte + SUBREG_BYTE (op));
709 gcc_assert (op2 != NULL_RTX);
710 return op2;
713 op = op2;
714 gcc_assert (op != NULL_RTX);
715 gcc_assert (innermode == GET_MODE (op));
718 if (GET_CODE (op) == CONCATN)
719 return simplify_subreg_concatn (outermode, op, byte);
721 ret = simplify_gen_subreg (outermode, op, innermode, byte);
723 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
724 resolve_simple_move will ask for the high part of the paradoxical
725 subreg, which does not have a value. Just return a zero. */
726 if (ret == NULL_RTX
727 && GET_CODE (op) == SUBREG
728 && SUBREG_BYTE (op) == 0
729 && (GET_MODE_SIZE (innermode)
730 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
731 return CONST0_RTX (outermode);
733 gcc_assert (ret != NULL_RTX);
734 return ret;
737 /* Return whether we should resolve X into the registers into which it
738 was decomposed. */
740 static bool
741 resolve_reg_p (rtx x)
743 return GET_CODE (x) == CONCATN;
746 /* Return whether X is a SUBREG of a register which we need to
747 resolve. */
749 static bool
750 resolve_subreg_p (rtx x)
752 if (GET_CODE (x) != SUBREG)
753 return false;
754 return resolve_reg_p (SUBREG_REG (x));
757 /* Look for SUBREGs in *LOC which need to be decomposed. */
759 static bool
760 resolve_subreg_use (rtx *loc, rtx insn)
762 subrtx_ptr_iterator::array_type array;
763 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
765 rtx *loc = *iter;
766 rtx x = *loc;
767 if (resolve_subreg_p (x))
769 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
770 SUBREG_BYTE (x));
772 /* It is possible for a note to contain a reference which we can
773 decompose. In this case, return 1 to the caller to indicate
774 that the note must be removed. */
775 if (!x)
777 gcc_assert (!insn);
778 return true;
781 validate_change (insn, loc, x, 1);
782 iter.skip_subrtxes ();
784 else if (resolve_reg_p (x))
785 /* Return 1 to the caller to indicate that we found a direct
786 reference to a register which is being decomposed. This can
787 happen inside notes, multiword shift or zero-extend
788 instructions. */
789 return true;
792 return false;
795 /* Resolve any decomposed registers which appear in register notes on
796 INSN. */
798 static void
799 resolve_reg_notes (rtx_insn *insn)
801 rtx *pnote, note;
803 note = find_reg_equal_equiv_note (insn);
804 if (note)
806 int old_count = num_validated_changes ();
807 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
808 remove_note (insn, note);
809 else
810 if (old_count != num_validated_changes ())
811 df_notes_rescan (insn);
814 pnote = &REG_NOTES (insn);
815 while (*pnote != NULL_RTX)
817 bool del = false;
819 note = *pnote;
820 switch (REG_NOTE_KIND (note))
822 case REG_DEAD:
823 case REG_UNUSED:
824 if (resolve_reg_p (XEXP (note, 0)))
825 del = true;
826 break;
828 default:
829 break;
832 if (del)
833 *pnote = XEXP (note, 1);
834 else
835 pnote = &XEXP (note, 1);
839 /* Return whether X can be decomposed into subwords. */
841 static bool
842 can_decompose_p (rtx x)
844 if (REG_P (x))
846 unsigned int regno = REGNO (x);
848 if (HARD_REGISTER_NUM_P (regno))
850 unsigned int byte, num_bytes;
852 num_bytes = GET_MODE_SIZE (GET_MODE (x));
853 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
854 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
855 return false;
856 return true;
858 else
859 return !bitmap_bit_p (subreg_context, regno);
862 return true;
865 /* Decompose the registers used in a simple move SET within INSN. If
866 we don't change anything, return INSN, otherwise return the start
867 of the sequence of moves. */
869 static rtx_insn *
870 resolve_simple_move (rtx set, rtx_insn *insn)
872 rtx src, dest, real_dest;
873 rtx_insn *insns;
874 machine_mode orig_mode, dest_mode;
875 unsigned int words;
876 bool pushing;
878 src = SET_SRC (set);
879 dest = SET_DEST (set);
880 orig_mode = GET_MODE (dest);
882 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
883 gcc_assert (words > 1);
885 start_sequence ();
887 /* We have to handle copying from a SUBREG of a decomposed reg where
888 the SUBREG is larger than word size. Rather than assume that we
889 can take a word_mode SUBREG of the destination, we copy to a new
890 register and then copy that to the destination. */
892 real_dest = NULL_RTX;
894 if (GET_CODE (src) == SUBREG
895 && resolve_reg_p (SUBREG_REG (src))
896 && (SUBREG_BYTE (src) != 0
897 || (GET_MODE_SIZE (orig_mode)
898 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
900 real_dest = dest;
901 dest = gen_reg_rtx (orig_mode);
902 if (REG_P (real_dest))
903 REG_ATTRS (dest) = REG_ATTRS (real_dest);
906 /* Similarly if we are copying to a SUBREG of a decomposed reg where
907 the SUBREG is larger than word size. */
909 if (GET_CODE (dest) == SUBREG
910 && resolve_reg_p (SUBREG_REG (dest))
911 && (SUBREG_BYTE (dest) != 0
912 || (GET_MODE_SIZE (orig_mode)
913 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
915 rtx reg, smove;
916 rtx_insn *minsn;
918 reg = gen_reg_rtx (orig_mode);
919 minsn = emit_move_insn (reg, src);
920 smove = single_set (minsn);
921 gcc_assert (smove != NULL_RTX);
922 resolve_simple_move (smove, minsn);
923 src = reg;
926 /* If we didn't have any big SUBREGS of decomposed registers, and
927 neither side of the move is a register we are decomposing, then
928 we don't have to do anything here. */
930 if (src == SET_SRC (set)
931 && dest == SET_DEST (set)
932 && !resolve_reg_p (src)
933 && !resolve_subreg_p (src)
934 && !resolve_reg_p (dest)
935 && !resolve_subreg_p (dest))
937 end_sequence ();
938 return insn;
941 /* It's possible for the code to use a subreg of a decomposed
942 register while forming an address. We need to handle that before
943 passing the address to emit_move_insn. We pass NULL_RTX as the
944 insn parameter to resolve_subreg_use because we can not validate
945 the insn yet. */
946 if (MEM_P (src) || MEM_P (dest))
948 int acg;
950 if (MEM_P (src))
951 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
952 if (MEM_P (dest))
953 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
954 acg = apply_change_group ();
955 gcc_assert (acg);
958 /* If SRC is a register which we can't decompose, or has side
959 effects, we need to move via a temporary register. */
961 if (!can_decompose_p (src)
962 || side_effects_p (src)
963 || GET_CODE (src) == ASM_OPERANDS)
965 rtx reg;
967 reg = gen_reg_rtx (orig_mode);
969 #ifdef AUTO_INC_DEC
971 rtx move = emit_move_insn (reg, src);
972 if (MEM_P (src))
974 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
975 if (note)
976 add_reg_note (move, REG_INC, XEXP (note, 0));
979 #else
980 emit_move_insn (reg, src);
981 #endif
982 src = reg;
985 /* If DEST is a register which we can't decompose, or has side
986 effects, we need to first move to a temporary register. We
987 handle the common case of pushing an operand directly. We also
988 go through a temporary register if it holds a floating point
989 value. This gives us better code on systems which can't move
990 data easily between integer and floating point registers. */
992 dest_mode = orig_mode;
993 pushing = push_operand (dest, dest_mode);
994 if (!can_decompose_p (dest)
995 || (side_effects_p (dest) && !pushing)
996 || (!SCALAR_INT_MODE_P (dest_mode)
997 && !resolve_reg_p (dest)
998 && !resolve_subreg_p (dest)))
1000 if (real_dest == NULL_RTX)
1001 real_dest = dest;
1002 if (!SCALAR_INT_MODE_P (dest_mode))
1004 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
1005 MODE_INT, 0);
1006 gcc_assert (dest_mode != BLKmode);
1008 dest = gen_reg_rtx (dest_mode);
1009 if (REG_P (real_dest))
1010 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1013 if (pushing)
1015 unsigned int i, j, jinc;
1017 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1018 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1019 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1021 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1023 j = 0;
1024 jinc = 1;
1026 else
1028 j = words - 1;
1029 jinc = -1;
1032 for (i = 0; i < words; ++i, j += jinc)
1034 rtx temp;
1036 temp = copy_rtx (XEXP (dest, 0));
1037 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1038 j * UNITS_PER_WORD);
1039 emit_move_insn (temp,
1040 simplify_gen_subreg_concatn (word_mode, src,
1041 orig_mode,
1042 j * UNITS_PER_WORD));
1045 else
1047 unsigned int i;
1049 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1050 emit_clobber (dest);
1052 for (i = 0; i < words; ++i)
1053 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1054 dest_mode,
1055 i * UNITS_PER_WORD),
1056 simplify_gen_subreg_concatn (word_mode, src,
1057 orig_mode,
1058 i * UNITS_PER_WORD));
1061 if (real_dest != NULL_RTX)
1063 rtx mdest, smove;
1064 rtx_insn *minsn;
1066 if (dest_mode == orig_mode)
1067 mdest = dest;
1068 else
1069 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1070 minsn = emit_move_insn (real_dest, mdest);
1072 #ifdef AUTO_INC_DEC
1073 if (MEM_P (real_dest)
1074 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1076 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1077 if (note)
1078 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1080 #endif
1082 smove = single_set (minsn);
1083 gcc_assert (smove != NULL_RTX);
1085 resolve_simple_move (smove, minsn);
1088 insns = get_insns ();
1089 end_sequence ();
1091 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1093 emit_insn_before (insns, insn);
1095 /* If we get here via self-recursion, then INSN is not yet in the insns
1096 chain and delete_insn will fail. We only want to remove INSN from the
1097 current sequence. See PR56738. */
1098 if (in_sequence_p ())
1099 remove_insn (insn);
1100 else
1101 delete_insn (insn);
1103 return insns;
1106 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1107 component registers. Return whether we changed something. */
1109 static bool
1110 resolve_clobber (rtx pat, rtx_insn *insn)
1112 rtx reg;
1113 machine_mode orig_mode;
1114 unsigned int words, i;
1115 int ret;
1117 reg = XEXP (pat, 0);
1118 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1119 return false;
1121 orig_mode = GET_MODE (reg);
1122 words = GET_MODE_SIZE (orig_mode);
1123 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1125 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1126 simplify_gen_subreg_concatn (word_mode, reg,
1127 orig_mode, 0),
1129 df_insn_rescan (insn);
1130 gcc_assert (ret != 0);
1132 for (i = words - 1; i > 0; --i)
1134 rtx x;
1136 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1137 i * UNITS_PER_WORD);
1138 x = gen_rtx_CLOBBER (VOIDmode, x);
1139 emit_insn_after (x, insn);
1142 resolve_reg_notes (insn);
1144 return true;
1147 /* A USE of a decomposed register is no longer meaningful. Return
1148 whether we changed something. */
1150 static bool
1151 resolve_use (rtx pat, rtx_insn *insn)
1153 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1155 delete_insn (insn);
1156 return true;
1159 resolve_reg_notes (insn);
1161 return false;
1164 /* A VAR_LOCATION can be simplified. */
1166 static void
1167 resolve_debug (rtx_insn *insn)
1169 subrtx_ptr_iterator::array_type array;
1170 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1172 rtx *loc = *iter;
1173 rtx x = *loc;
1174 if (resolve_subreg_p (x))
1176 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1177 SUBREG_BYTE (x));
1179 if (x)
1180 *loc = x;
1181 else
1182 x = copy_rtx (*loc);
1184 if (resolve_reg_p (x))
1185 *loc = copy_rtx (x);
1188 df_insn_rescan (insn);
1190 resolve_reg_notes (insn);
1193 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1194 set the decomposable_context bitmap accordingly. SPEED_P is true
1195 if we are optimizing INSN for speed rather than size. Return true
1196 if INSN is decomposable. */
1198 static bool
1199 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1201 rtx set;
1202 rtx op;
1203 rtx op_operand;
1205 set = single_set (insn);
1206 if (!set)
1207 return false;
1209 op = SET_SRC (set);
1210 if (GET_CODE (op) != ASHIFT
1211 && GET_CODE (op) != LSHIFTRT
1212 && GET_CODE (op) != ASHIFTRT
1213 && GET_CODE (op) != ZERO_EXTEND)
1214 return false;
1216 op_operand = XEXP (op, 0);
1217 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1218 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1219 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1220 || GET_MODE (op) != twice_word_mode)
1221 return false;
1223 if (GET_CODE (op) == ZERO_EXTEND)
1225 if (GET_MODE (op_operand) != word_mode
1226 || !choices[speed_p].splitting_zext)
1227 return false;
1229 else /* left or right shift */
1231 bool *splitting = (GET_CODE (op) == ASHIFT
1232 ? choices[speed_p].splitting_ashift
1233 : GET_CODE (op) == ASHIFTRT
1234 ? choices[speed_p].splitting_ashiftrt
1235 : choices[speed_p].splitting_lshiftrt);
1236 if (!CONST_INT_P (XEXP (op, 1))
1237 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1238 2 * BITS_PER_WORD - 1)
1239 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1240 return false;
1242 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1245 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1247 return true;
1250 /* Decompose a more than word wide shift (in INSN) of a multiword
1251 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1252 and 'set to zero' insn. Return a pointer to the new insn when a
1253 replacement was done. */
1255 static rtx_insn *
1256 resolve_shift_zext (rtx_insn *insn)
1258 rtx set;
1259 rtx op;
1260 rtx op_operand;
1261 rtx_insn *insns;
1262 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1263 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1265 set = single_set (insn);
1266 if (!set)
1267 return NULL;
1269 op = SET_SRC (set);
1270 if (GET_CODE (op) != ASHIFT
1271 && GET_CODE (op) != LSHIFTRT
1272 && GET_CODE (op) != ASHIFTRT
1273 && GET_CODE (op) != ZERO_EXTEND)
1274 return NULL;
1276 op_operand = XEXP (op, 0);
1278 /* We can tear this operation apart only if the regs were already
1279 torn apart. */
1280 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1281 return NULL;
1283 /* src_reg_num is the number of the word mode register which we
1284 are operating on. For a left shift and a zero_extend on little
1285 endian machines this is register 0. */
1286 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1287 ? 1 : 0;
1289 if (WORDS_BIG_ENDIAN
1290 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1291 src_reg_num = 1 - src_reg_num;
1293 if (GET_CODE (op) == ZERO_EXTEND)
1294 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1295 else
1296 dest_reg_num = 1 - src_reg_num;
1298 offset1 = UNITS_PER_WORD * dest_reg_num;
1299 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1300 src_offset = UNITS_PER_WORD * src_reg_num;
1302 start_sequence ();
1304 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1305 GET_MODE (SET_DEST (set)),
1306 offset1);
1307 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1308 GET_MODE (SET_DEST (set)),
1309 offset2);
1310 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1311 GET_MODE (op_operand),
1312 src_offset);
1313 if (GET_CODE (op) == ASHIFTRT
1314 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1315 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1316 BITS_PER_WORD - 1, NULL_RTX, 0);
1318 if (GET_CODE (op) != ZERO_EXTEND)
1320 int shift_count = INTVAL (XEXP (op, 1));
1321 if (shift_count > BITS_PER_WORD)
1322 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1323 LSHIFT_EXPR : RSHIFT_EXPR,
1324 word_mode, src_reg,
1325 shift_count - BITS_PER_WORD,
1326 dest_reg, GET_CODE (op) != ASHIFTRT);
1329 if (dest_reg != src_reg)
1330 emit_move_insn (dest_reg, src_reg);
1331 if (GET_CODE (op) != ASHIFTRT)
1332 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1333 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1334 emit_move_insn (dest_upper, copy_rtx (src_reg));
1335 else
1336 emit_move_insn (dest_upper, upper_src);
1337 insns = get_insns ();
1339 end_sequence ();
1341 emit_insn_before (insns, insn);
1343 if (dump_file)
1345 rtx_insn *in;
1346 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1347 for (in = insns; in != insn; in = NEXT_INSN (in))
1348 fprintf (dump_file, "%d ", INSN_UID (in));
1349 fprintf (dump_file, "\n");
1352 delete_insn (insn);
1353 return insns;
1356 /* Print to dump_file a description of what we're doing with shift code CODE.
1357 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1359 static void
1360 dump_shift_choices (enum rtx_code code, bool *splitting)
1362 int i;
1363 const char *sep;
1365 fprintf (dump_file,
1366 " Splitting mode %s for %s lowering with shift amounts = ",
1367 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1368 sep = "";
1369 for (i = 0; i < BITS_PER_WORD; i++)
1370 if (splitting[i])
1372 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1373 sep = ",";
1375 fprintf (dump_file, "\n");
1378 /* Print to dump_file a description of what we're doing when optimizing
1379 for speed or size; SPEED_P says which. DESCRIPTION is a description
1380 of the SPEED_P choice. */
1382 static void
1383 dump_choices (bool speed_p, const char *description)
1385 unsigned int i;
1387 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1389 for (i = 0; i < MAX_MACHINE_MODE; i++)
1390 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1391 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1392 choices[speed_p].move_modes_to_split[i]
1393 ? "Splitting"
1394 : "Skipping",
1395 GET_MODE_NAME ((machine_mode) i));
1397 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1398 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1399 GET_MODE_NAME (twice_word_mode));
1401 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1402 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1403 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1404 fprintf (dump_file, "\n");
1407 /* Look for registers which are always accessed via word-sized SUBREGs
1408 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1409 registers into several word-sized pseudo-registers. */
1411 static void
1412 decompose_multiword_subregs (bool decompose_copies)
1414 unsigned int max;
1415 basic_block bb;
1416 bool speed_p;
1418 if (dump_file)
1420 dump_choices (false, "size");
1421 dump_choices (true, "speed");
1424 /* Check if this target even has any modes to consider lowering. */
1425 if (!choices[false].something_to_do && !choices[true].something_to_do)
1427 if (dump_file)
1428 fprintf (dump_file, "Nothing to do!\n");
1429 return;
1432 max = max_reg_num ();
1434 /* First see if there are any multi-word pseudo-registers. If there
1435 aren't, there is nothing we can do. This should speed up this
1436 pass in the normal case, since it should be faster than scanning
1437 all the insns. */
1439 unsigned int i;
1440 bool useful_modes_seen = false;
1442 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1443 if (regno_reg_rtx[i] != NULL)
1445 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1446 if (choices[false].move_modes_to_split[(int) mode]
1447 || choices[true].move_modes_to_split[(int) mode])
1449 useful_modes_seen = true;
1450 break;
1454 if (!useful_modes_seen)
1456 if (dump_file)
1457 fprintf (dump_file, "Nothing to lower in this function.\n");
1458 return;
1462 if (df)
1464 df_set_flags (DF_DEFER_INSN_RESCAN);
1465 run_word_dce ();
1468 /* FIXME: It may be possible to change this code to look for each
1469 multi-word pseudo-register and to find each insn which sets or
1470 uses that register. That should be faster than scanning all the
1471 insns. */
1473 decomposable_context = BITMAP_ALLOC (NULL);
1474 non_decomposable_context = BITMAP_ALLOC (NULL);
1475 subreg_context = BITMAP_ALLOC (NULL);
1477 reg_copy_graph.create (max);
1478 reg_copy_graph.safe_grow_cleared (max);
1479 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1481 speed_p = optimize_function_for_speed_p (cfun);
1482 FOR_EACH_BB_FN (bb, cfun)
1484 rtx_insn *insn;
1486 FOR_BB_INSNS (bb, insn)
1488 rtx set;
1489 enum classify_move_insn cmi;
1490 int i, n;
1492 if (!INSN_P (insn)
1493 || GET_CODE (PATTERN (insn)) == CLOBBER
1494 || GET_CODE (PATTERN (insn)) == USE)
1495 continue;
1497 recog_memoized (insn);
1499 if (find_decomposable_shift_zext (insn, speed_p))
1500 continue;
1502 extract_insn (insn);
1504 set = simple_move (insn, speed_p);
1506 if (!set)
1507 cmi = NOT_SIMPLE_MOVE;
1508 else
1510 /* We mark pseudo-to-pseudo copies as decomposable during the
1511 second pass only. The first pass is so early that there is
1512 good chance such moves will be optimized away completely by
1513 subsequent optimizations anyway.
1515 However, we call find_pseudo_copy even during the first pass
1516 so as to properly set up the reg_copy_graph. */
1517 if (find_pseudo_copy (set))
1518 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1519 else
1520 cmi = SIMPLE_MOVE;
1523 n = recog_data.n_operands;
1524 for (i = 0; i < n; ++i)
1526 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1528 /* We handle ASM_OPERANDS as a special case to support
1529 things like x86 rdtsc which returns a DImode value.
1530 We can decompose the output, which will certainly be
1531 operand 0, but not the inputs. */
1533 if (cmi == SIMPLE_MOVE
1534 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1536 gcc_assert (i == 0);
1537 cmi = NOT_SIMPLE_MOVE;
1543 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1544 if (!bitmap_empty_p (decomposable_context))
1546 sbitmap sub_blocks;
1547 unsigned int i;
1548 sbitmap_iterator sbi;
1549 bitmap_iterator iter;
1550 unsigned int regno;
1552 propagate_pseudo_copies ();
1554 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1555 bitmap_clear (sub_blocks);
1557 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1558 decompose_register (regno);
1560 FOR_EACH_BB_FN (bb, cfun)
1562 rtx_insn *insn;
1564 FOR_BB_INSNS (bb, insn)
1566 rtx pat;
1568 if (!INSN_P (insn))
1569 continue;
1571 pat = PATTERN (insn);
1572 if (GET_CODE (pat) == CLOBBER)
1573 resolve_clobber (pat, insn);
1574 else if (GET_CODE (pat) == USE)
1575 resolve_use (pat, insn);
1576 else if (DEBUG_INSN_P (insn))
1577 resolve_debug (insn);
1578 else
1580 rtx set;
1581 int i;
1583 recog_memoized (insn);
1584 extract_insn (insn);
1586 set = simple_move (insn, speed_p);
1587 if (set)
1589 rtx_insn *orig_insn = insn;
1590 bool cfi = control_flow_insn_p (insn);
1592 /* We can end up splitting loads to multi-word pseudos
1593 into separate loads to machine word size pseudos.
1594 When this happens, we first had one load that can
1595 throw, and after resolve_simple_move we'll have a
1596 bunch of loads (at least two). All those loads may
1597 trap if we can have non-call exceptions, so they
1598 all will end the current basic block. We split the
1599 block after the outer loop over all insns, but we
1600 make sure here that we will be able to split the
1601 basic block and still produce the correct control
1602 flow graph for it. */
1603 gcc_assert (!cfi
1604 || (cfun->can_throw_non_call_exceptions
1605 && can_throw_internal (insn)));
1607 insn = resolve_simple_move (set, insn);
1608 if (insn != orig_insn)
1610 recog_memoized (insn);
1611 extract_insn (insn);
1613 if (cfi)
1614 bitmap_set_bit (sub_blocks, bb->index);
1617 else
1619 rtx_insn *decomposed_shift;
1621 decomposed_shift = resolve_shift_zext (insn);
1622 if (decomposed_shift != NULL_RTX)
1624 insn = decomposed_shift;
1625 recog_memoized (insn);
1626 extract_insn (insn);
1630 for (i = recog_data.n_operands - 1; i >= 0; --i)
1631 resolve_subreg_use (recog_data.operand_loc[i], insn);
1633 resolve_reg_notes (insn);
1635 if (num_validated_changes () > 0)
1637 for (i = recog_data.n_dups - 1; i >= 0; --i)
1639 rtx *pl = recog_data.dup_loc[i];
1640 int dup_num = recog_data.dup_num[i];
1641 rtx *px = recog_data.operand_loc[dup_num];
1643 validate_unshare_change (insn, pl, *px, 1);
1646 i = apply_change_group ();
1647 gcc_assert (i);
1653 /* If we had insns to split that caused control flow insns in the middle
1654 of a basic block, split those blocks now. Note that we only handle
1655 the case where splitting a load has caused multiple possibly trapping
1656 loads to appear. */
1657 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1659 rtx_insn *insn, *end;
1660 edge fallthru;
1662 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1663 insn = BB_HEAD (bb);
1664 end = BB_END (bb);
1666 while (insn != end)
1668 if (control_flow_insn_p (insn))
1670 /* Split the block after insn. There will be a fallthru
1671 edge, which is OK so we keep it. We have to create the
1672 exception edges ourselves. */
1673 fallthru = split_block (bb, insn);
1674 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1675 bb = fallthru->dest;
1676 insn = BB_HEAD (bb);
1678 else
1679 insn = NEXT_INSN (insn);
1683 sbitmap_free (sub_blocks);
1687 unsigned int i;
1688 bitmap b;
1690 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1691 if (b)
1692 BITMAP_FREE (b);
1695 reg_copy_graph.release ();
1697 BITMAP_FREE (decomposable_context);
1698 BITMAP_FREE (non_decomposable_context);
1699 BITMAP_FREE (subreg_context);
1702 /* Implement first lower subreg pass. */
1704 namespace {
1706 const pass_data pass_data_lower_subreg =
1708 RTL_PASS, /* type */
1709 "subreg1", /* name */
1710 OPTGROUP_NONE, /* optinfo_flags */
1711 TV_LOWER_SUBREG, /* tv_id */
1712 0, /* properties_required */
1713 0, /* properties_provided */
1714 0, /* properties_destroyed */
1715 0, /* todo_flags_start */
1716 0, /* todo_flags_finish */
1719 class pass_lower_subreg : public rtl_opt_pass
1721 public:
1722 pass_lower_subreg (gcc::context *ctxt)
1723 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1726 /* opt_pass methods: */
1727 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1728 virtual unsigned int execute (function *)
1730 decompose_multiword_subregs (false);
1731 return 0;
1734 }; // class pass_lower_subreg
1736 } // anon namespace
1738 rtl_opt_pass *
1739 make_pass_lower_subreg (gcc::context *ctxt)
1741 return new pass_lower_subreg (ctxt);
1744 /* Implement second lower subreg pass. */
1746 namespace {
1748 const pass_data pass_data_lower_subreg2 =
1750 RTL_PASS, /* type */
1751 "subreg2", /* name */
1752 OPTGROUP_NONE, /* optinfo_flags */
1753 TV_LOWER_SUBREG, /* tv_id */
1754 0, /* properties_required */
1755 0, /* properties_provided */
1756 0, /* properties_destroyed */
1757 0, /* todo_flags_start */
1758 TODO_df_finish, /* todo_flags_finish */
1761 class pass_lower_subreg2 : public rtl_opt_pass
1763 public:
1764 pass_lower_subreg2 (gcc::context *ctxt)
1765 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1768 /* opt_pass methods: */
1769 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1770 virtual unsigned int execute (function *)
1772 decompose_multiword_subregs (true);
1773 return 0;
1776 }; // class pass_lower_subreg2
1778 } // anon namespace
1780 rtl_opt_pass *
1781 make_pass_lower_subreg2 (gcc::context *ctxt)
1783 return new pass_lower_subreg2 (ctxt);