2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / lower-subreg.c
blobe89c55dfa377cbd9218af203851540c68fe765d7
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "input.h"
27 #include "alias.h"
28 #include "symtab.h"
29 #include "tree.h"
30 #include "rtl.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "obstack.h"
35 #include "predict.h"
36 #include "hard-reg-set.h"
37 #include "function.h"
38 #include "dominance.h"
39 #include "cfg.h"
40 #include "cfgrtl.h"
41 #include "cfgbuild.h"
42 #include "basic-block.h"
43 #include "recog.h"
44 #include "bitmap.h"
45 #include "dce.h"
46 #include "expmed.h"
47 #include "dojump.h"
48 #include "explow.h"
49 #include "calls.h"
50 #include "emit-rtl.h"
51 #include "varasm.h"
52 #include "stmt.h"
53 #include "expr.h"
54 #include "except.h"
55 #include "regs.h"
56 #include "tree-pass.h"
57 #include "df.h"
58 #include "lower-subreg.h"
59 #include "rtl-iter.h"
62 /* Decompose multi-word pseudo-registers into individual
63 pseudo-registers when possible and profitable. This is possible
64 when all the uses of a multi-word register are via SUBREG, or are
65 copies of the register to another location. Breaking apart the
66 register permits more CSE and permits better register allocation.
67 This is profitable if the machine does not have move instructions
68 to do this.
70 This pass only splits moves with modes that are wider than
71 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
72 integer modes that are twice the width of word_mode. The latter
73 could be generalized if there was a need to do this, but the trend in
74 architectures is to not need this.
76 There are two useful preprocessor defines for use by maintainers:
78 #define LOG_COSTS 1
80 if you wish to see the actual cost estimates that are being used
81 for each mode wider than word mode and the cost estimates for zero
82 extension and the shifts. This can be useful when port maintainers
83 are tuning insn rtx costs.
85 #define FORCE_LOWERING 1
87 if you wish to test the pass with all the transformation forced on.
88 This can be useful for finding bugs in the transformations. */
90 #define LOG_COSTS 0
91 #define FORCE_LOWERING 0
93 /* Bit N in this bitmap is set if regno N is used in a context in
94 which we can decompose it. */
95 static bitmap decomposable_context;
97 /* Bit N in this bitmap is set if regno N is used in a context in
98 which it can not be decomposed. */
99 static bitmap non_decomposable_context;
101 /* Bit N in this bitmap is set if regno N is used in a subreg
102 which changes the mode but not the size. This typically happens
103 when the register accessed as a floating-point value; we want to
104 avoid generating accesses to its subwords in integer modes. */
105 static bitmap subreg_context;
107 /* Bit N in the bitmap in element M of this array is set if there is a
108 copy from reg M to reg N. */
109 static vec<bitmap> reg_copy_graph;
111 struct target_lower_subreg default_target_lower_subreg;
112 #if SWITCHABLE_TARGET
113 struct target_lower_subreg *this_target_lower_subreg
114 = &default_target_lower_subreg;
115 #endif
117 #define twice_word_mode \
118 this_target_lower_subreg->x_twice_word_mode
119 #define choices \
120 this_target_lower_subreg->x_choices
122 /* RTXes used while computing costs. */
123 struct cost_rtxes {
124 /* Source and target registers. */
125 rtx source;
126 rtx target;
128 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
129 rtx zext;
131 /* A shift of SOURCE. */
132 rtx shift;
134 /* A SET of TARGET. */
135 rtx set;
138 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
139 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
141 static int
142 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
143 machine_mode mode, int op1)
145 PUT_CODE (rtxes->shift, code);
146 PUT_MODE (rtxes->shift, mode);
147 PUT_MODE (rtxes->source, mode);
148 XEXP (rtxes->shift, 1) = GEN_INT (op1);
149 return set_src_cost (rtxes->shift, speed_p);
152 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
153 to true if it is profitable to split a double-word CODE shift
154 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
155 for speed or size profitability.
157 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
158 the cost of moving zero into a word-mode register. WORD_MOVE_COST
159 is the cost of moving between word registers. */
161 static void
162 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
163 bool *splitting, enum rtx_code code,
164 int word_move_zero_cost, int word_move_cost)
166 int wide_cost, narrow_cost, upper_cost, i;
168 for (i = 0; i < BITS_PER_WORD; i++)
170 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
171 i + BITS_PER_WORD);
172 if (i == 0)
173 narrow_cost = word_move_cost;
174 else
175 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
177 if (code != ASHIFTRT)
178 upper_cost = word_move_zero_cost;
179 else if (i == BITS_PER_WORD - 1)
180 upper_cost = word_move_cost;
181 else
182 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
183 BITS_PER_WORD - 1);
185 if (LOG_COSTS)
186 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
187 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
188 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
190 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
191 splitting[i] = true;
195 /* Compute what we should do when optimizing for speed or size; SPEED_P
196 selects which. Use RTXES for computing costs. */
198 static void
199 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
201 unsigned int i;
202 int word_move_zero_cost, word_move_cost;
204 PUT_MODE (rtxes->target, word_mode);
205 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
206 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
208 SET_SRC (rtxes->set) = rtxes->source;
209 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
211 if (LOG_COSTS)
212 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
213 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
215 for (i = 0; i < MAX_MACHINE_MODE; i++)
217 machine_mode mode = (machine_mode) i;
218 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
219 if (factor > 1)
221 int mode_move_cost;
223 PUT_MODE (rtxes->target, mode);
224 PUT_MODE (rtxes->source, mode);
225 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
227 if (LOG_COSTS)
228 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
229 GET_MODE_NAME (mode), mode_move_cost,
230 word_move_cost, factor);
232 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
234 choices[speed_p].move_modes_to_split[i] = true;
235 choices[speed_p].something_to_do = true;
240 /* For the moves and shifts, the only case that is checked is one
241 where the mode of the target is an integer mode twice the width
242 of the word_mode.
244 If it is not profitable to split a double word move then do not
245 even consider the shifts or the zero extension. */
246 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
248 int zext_cost;
250 /* The only case here to check to see if moving the upper part with a
251 zero is cheaper than doing the zext itself. */
252 PUT_MODE (rtxes->source, word_mode);
253 zext_cost = set_src_cost (rtxes->zext, speed_p);
255 if (LOG_COSTS)
256 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
257 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
258 zext_cost, word_move_cost, word_move_zero_cost);
260 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
261 choices[speed_p].splitting_zext = true;
263 compute_splitting_shift (speed_p, rtxes,
264 choices[speed_p].splitting_ashift, ASHIFT,
265 word_move_zero_cost, word_move_cost);
266 compute_splitting_shift (speed_p, rtxes,
267 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
268 word_move_zero_cost, word_move_cost);
269 compute_splitting_shift (speed_p, rtxes,
270 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
271 word_move_zero_cost, word_move_cost);
275 /* Do one-per-target initialisation. This involves determining
276 which operations on the machine are profitable. If none are found,
277 then the pass just returns when called. */
279 void
280 init_lower_subreg (void)
282 struct cost_rtxes rtxes;
284 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
286 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
288 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
289 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
290 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
291 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
292 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
294 if (LOG_COSTS)
295 fprintf (stderr, "\nSize costs\n==========\n\n");
296 compute_costs (false, &rtxes);
298 if (LOG_COSTS)
299 fprintf (stderr, "\nSpeed costs\n===========\n\n");
300 compute_costs (true, &rtxes);
303 static bool
304 simple_move_operand (rtx x)
306 if (GET_CODE (x) == SUBREG)
307 x = SUBREG_REG (x);
309 if (!OBJECT_P (x))
310 return false;
312 if (GET_CODE (x) == LABEL_REF
313 || GET_CODE (x) == SYMBOL_REF
314 || GET_CODE (x) == HIGH
315 || GET_CODE (x) == CONST)
316 return false;
318 if (MEM_P (x)
319 && (MEM_VOLATILE_P (x)
320 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
321 return false;
323 return true;
326 /* If INSN is a single set between two objects that we want to split,
327 return the single set. SPEED_P says whether we are optimizing
328 INSN for speed or size.
330 INSN should have been passed to recog and extract_insn before this
331 is called. */
333 static rtx
334 simple_move (rtx_insn *insn, bool speed_p)
336 rtx x;
337 rtx set;
338 machine_mode mode;
340 if (recog_data.n_operands != 2)
341 return NULL_RTX;
343 set = single_set (insn);
344 if (!set)
345 return NULL_RTX;
347 x = SET_DEST (set);
348 if (x != recog_data.operand[0] && x != recog_data.operand[1])
349 return NULL_RTX;
350 if (!simple_move_operand (x))
351 return NULL_RTX;
353 x = SET_SRC (set);
354 if (x != recog_data.operand[0] && x != recog_data.operand[1])
355 return NULL_RTX;
356 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
357 things like x86 rdtsc which returns a DImode value. */
358 if (GET_CODE (x) != ASM_OPERANDS
359 && !simple_move_operand (x))
360 return NULL_RTX;
362 /* We try to decompose in integer modes, to avoid generating
363 inefficient code copying between integer and floating point
364 registers. That means that we can't decompose if this is a
365 non-integer mode for which there is no integer mode of the same
366 size. */
367 mode = GET_MODE (SET_DEST (set));
368 if (!SCALAR_INT_MODE_P (mode)
369 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
370 == BLKmode))
371 return NULL_RTX;
373 /* Reject PARTIAL_INT modes. They are used for processor specific
374 purposes and it's probably best not to tamper with them. */
375 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
376 return NULL_RTX;
378 if (!choices[speed_p].move_modes_to_split[(int) mode])
379 return NULL_RTX;
381 return set;
384 /* If SET is a copy from one multi-word pseudo-register to another,
385 record that in reg_copy_graph. Return whether it is such a
386 copy. */
388 static bool
389 find_pseudo_copy (rtx set)
391 rtx dest = SET_DEST (set);
392 rtx src = SET_SRC (set);
393 unsigned int rd, rs;
394 bitmap b;
396 if (!REG_P (dest) || !REG_P (src))
397 return false;
399 rd = REGNO (dest);
400 rs = REGNO (src);
401 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
402 return false;
404 b = reg_copy_graph[rs];
405 if (b == NULL)
407 b = BITMAP_ALLOC (NULL);
408 reg_copy_graph[rs] = b;
411 bitmap_set_bit (b, rd);
413 return true;
416 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
417 where they are copied to another register, add the register to
418 which they are copied to DECOMPOSABLE_CONTEXT. Use
419 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
420 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
422 static void
423 propagate_pseudo_copies (void)
425 bitmap queue, propagate;
427 queue = BITMAP_ALLOC (NULL);
428 propagate = BITMAP_ALLOC (NULL);
430 bitmap_copy (queue, decomposable_context);
433 bitmap_iterator iter;
434 unsigned int i;
436 bitmap_clear (propagate);
438 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
440 bitmap b = reg_copy_graph[i];
441 if (b)
442 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
445 bitmap_and_compl (queue, propagate, decomposable_context);
446 bitmap_ior_into (decomposable_context, propagate);
448 while (!bitmap_empty_p (queue));
450 BITMAP_FREE (queue);
451 BITMAP_FREE (propagate);
454 /* A pointer to one of these values is passed to
455 find_decomposable_subregs. */
457 enum classify_move_insn
459 /* Not a simple move from one location to another. */
460 NOT_SIMPLE_MOVE,
461 /* A simple move we want to decompose. */
462 DECOMPOSABLE_SIMPLE_MOVE,
463 /* Any other simple move. */
464 SIMPLE_MOVE
467 /* If we find a SUBREG in *LOC which we could use to decompose a
468 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
469 unadorned register which is not a simple pseudo-register copy,
470 DATA will point at the type of move, and we set a bit in
471 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
473 static void
474 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
476 subrtx_var_iterator::array_type array;
477 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
479 rtx x = *iter;
480 if (GET_CODE (x) == SUBREG)
482 rtx inner = SUBREG_REG (x);
483 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
485 if (!REG_P (inner))
486 continue;
488 regno = REGNO (inner);
489 if (HARD_REGISTER_NUM_P (regno))
491 iter.skip_subrtxes ();
492 continue;
495 outer_size = GET_MODE_SIZE (GET_MODE (x));
496 inner_size = GET_MODE_SIZE (GET_MODE (inner));
497 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
498 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
500 /* We only try to decompose single word subregs of multi-word
501 registers. When we find one, we return -1 to avoid iterating
502 over the inner register.
504 ??? This doesn't allow, e.g., DImode subregs of TImode values
505 on 32-bit targets. We would need to record the way the
506 pseudo-register was used, and only decompose if all the uses
507 were the same number and size of pieces. Hopefully this
508 doesn't happen much. */
510 if (outer_words == 1 && inner_words > 1)
512 bitmap_set_bit (decomposable_context, regno);
513 iter.skip_subrtxes ();
514 continue;
517 /* If this is a cast from one mode to another, where the modes
518 have the same size, and they are not tieable, then mark this
519 register as non-decomposable. If we decompose it we are
520 likely to mess up whatever the backend is trying to do. */
521 if (outer_words > 1
522 && outer_size == inner_size
523 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
525 bitmap_set_bit (non_decomposable_context, regno);
526 bitmap_set_bit (subreg_context, regno);
527 iter.skip_subrtxes ();
528 continue;
531 else if (REG_P (x))
533 unsigned int regno;
535 /* We will see an outer SUBREG before we see the inner REG, so
536 when we see a plain REG here it means a direct reference to
537 the register.
539 If this is not a simple copy from one location to another,
540 then we can not decompose this register. If this is a simple
541 copy we want to decompose, and the mode is right,
542 then we mark the register as decomposable.
543 Otherwise we don't say anything about this register --
544 it could be decomposed, but whether that would be
545 profitable depends upon how it is used elsewhere.
547 We only set bits in the bitmap for multi-word
548 pseudo-registers, since those are the only ones we care about
549 and it keeps the size of the bitmaps down. */
551 regno = REGNO (x);
552 if (!HARD_REGISTER_NUM_P (regno)
553 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
555 switch (*pcmi)
557 case NOT_SIMPLE_MOVE:
558 bitmap_set_bit (non_decomposable_context, regno);
559 break;
560 case DECOMPOSABLE_SIMPLE_MOVE:
561 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
562 bitmap_set_bit (decomposable_context, regno);
563 break;
564 case SIMPLE_MOVE:
565 break;
566 default:
567 gcc_unreachable ();
571 else if (MEM_P (x))
573 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
575 /* Any registers used in a MEM do not participate in a
576 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
577 here, and return -1 to block the parent's recursion. */
578 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
579 iter.skip_subrtxes ();
584 /* Decompose REGNO into word-sized components. We smash the REG node
585 in place. This ensures that (1) something goes wrong quickly if we
586 fail to make some replacement, and (2) the debug information inside
587 the symbol table is automatically kept up to date. */
589 static void
590 decompose_register (unsigned int regno)
592 rtx reg;
593 unsigned int words, i;
594 rtvec v;
596 reg = regno_reg_rtx[regno];
598 regno_reg_rtx[regno] = NULL_RTX;
600 words = GET_MODE_SIZE (GET_MODE (reg));
601 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
603 v = rtvec_alloc (words);
604 for (i = 0; i < words; ++i)
605 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
607 PUT_CODE (reg, CONCATN);
608 XVEC (reg, 0) = v;
610 if (dump_file)
612 fprintf (dump_file, "; Splitting reg %u ->", regno);
613 for (i = 0; i < words; ++i)
614 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
615 fputc ('\n', dump_file);
619 /* Get a SUBREG of a CONCATN. */
621 static rtx
622 simplify_subreg_concatn (machine_mode outermode, rtx op,
623 unsigned int byte)
625 unsigned int inner_size;
626 machine_mode innermode, partmode;
627 rtx part;
628 unsigned int final_offset;
630 gcc_assert (GET_CODE (op) == CONCATN);
631 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
633 innermode = GET_MODE (op);
634 gcc_assert (byte < GET_MODE_SIZE (innermode));
635 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
637 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
638 part = XVECEXP (op, 0, byte / inner_size);
639 partmode = GET_MODE (part);
641 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
642 regular CONST_VECTORs. They have vector or integer modes, depending
643 on the capabilities of the target. Cope with them. */
644 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
645 partmode = GET_MODE_INNER (innermode);
646 else if (partmode == VOIDmode)
648 enum mode_class mclass = GET_MODE_CLASS (innermode);
649 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
652 final_offset = byte % inner_size;
653 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
654 return NULL_RTX;
656 return simplify_gen_subreg (outermode, part, partmode, final_offset);
659 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
661 static rtx
662 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
663 machine_mode innermode, unsigned int byte)
665 rtx ret;
667 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
668 If OP is a SUBREG of a CONCATN, then it must be a simple mode
669 change with the same size and offset 0, or it must extract a
670 part. We shouldn't see anything else here. */
671 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
673 rtx op2;
675 if ((GET_MODE_SIZE (GET_MODE (op))
676 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
677 && SUBREG_BYTE (op) == 0)
678 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
679 GET_MODE (SUBREG_REG (op)), byte);
681 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
682 SUBREG_BYTE (op));
683 if (op2 == NULL_RTX)
685 /* We don't handle paradoxical subregs here. */
686 gcc_assert (GET_MODE_SIZE (outermode)
687 <= GET_MODE_SIZE (GET_MODE (op)));
688 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
689 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
690 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
691 byte + SUBREG_BYTE (op));
692 gcc_assert (op2 != NULL_RTX);
693 return op2;
696 op = op2;
697 gcc_assert (op != NULL_RTX);
698 gcc_assert (innermode == GET_MODE (op));
701 if (GET_CODE (op) == CONCATN)
702 return simplify_subreg_concatn (outermode, op, byte);
704 ret = simplify_gen_subreg (outermode, op, innermode, byte);
706 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
707 resolve_simple_move will ask for the high part of the paradoxical
708 subreg, which does not have a value. Just return a zero. */
709 if (ret == NULL_RTX
710 && GET_CODE (op) == SUBREG
711 && SUBREG_BYTE (op) == 0
712 && (GET_MODE_SIZE (innermode)
713 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
714 return CONST0_RTX (outermode);
716 gcc_assert (ret != NULL_RTX);
717 return ret;
720 /* Return whether we should resolve X into the registers into which it
721 was decomposed. */
723 static bool
724 resolve_reg_p (rtx x)
726 return GET_CODE (x) == CONCATN;
729 /* Return whether X is a SUBREG of a register which we need to
730 resolve. */
732 static bool
733 resolve_subreg_p (rtx x)
735 if (GET_CODE (x) != SUBREG)
736 return false;
737 return resolve_reg_p (SUBREG_REG (x));
740 /* Look for SUBREGs in *LOC which need to be decomposed. */
742 static bool
743 resolve_subreg_use (rtx *loc, rtx insn)
745 subrtx_ptr_iterator::array_type array;
746 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
748 rtx *loc = *iter;
749 rtx x = *loc;
750 if (resolve_subreg_p (x))
752 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
753 SUBREG_BYTE (x));
755 /* It is possible for a note to contain a reference which we can
756 decompose. In this case, return 1 to the caller to indicate
757 that the note must be removed. */
758 if (!x)
760 gcc_assert (!insn);
761 return true;
764 validate_change (insn, loc, x, 1);
765 iter.skip_subrtxes ();
767 else if (resolve_reg_p (x))
768 /* Return 1 to the caller to indicate that we found a direct
769 reference to a register which is being decomposed. This can
770 happen inside notes, multiword shift or zero-extend
771 instructions. */
772 return true;
775 return false;
778 /* Resolve any decomposed registers which appear in register notes on
779 INSN. */
781 static void
782 resolve_reg_notes (rtx_insn *insn)
784 rtx *pnote, note;
786 note = find_reg_equal_equiv_note (insn);
787 if (note)
789 int old_count = num_validated_changes ();
790 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
791 remove_note (insn, note);
792 else
793 if (old_count != num_validated_changes ())
794 df_notes_rescan (insn);
797 pnote = &REG_NOTES (insn);
798 while (*pnote != NULL_RTX)
800 bool del = false;
802 note = *pnote;
803 switch (REG_NOTE_KIND (note))
805 case REG_DEAD:
806 case REG_UNUSED:
807 if (resolve_reg_p (XEXP (note, 0)))
808 del = true;
809 break;
811 default:
812 break;
815 if (del)
816 *pnote = XEXP (note, 1);
817 else
818 pnote = &XEXP (note, 1);
822 /* Return whether X can be decomposed into subwords. */
824 static bool
825 can_decompose_p (rtx x)
827 if (REG_P (x))
829 unsigned int regno = REGNO (x);
831 if (HARD_REGISTER_NUM_P (regno))
833 unsigned int byte, num_bytes;
835 num_bytes = GET_MODE_SIZE (GET_MODE (x));
836 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
837 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
838 return false;
839 return true;
841 else
842 return !bitmap_bit_p (subreg_context, regno);
845 return true;
848 /* Decompose the registers used in a simple move SET within INSN. If
849 we don't change anything, return INSN, otherwise return the start
850 of the sequence of moves. */
852 static rtx_insn *
853 resolve_simple_move (rtx set, rtx_insn *insn)
855 rtx src, dest, real_dest;
856 rtx_insn *insns;
857 machine_mode orig_mode, dest_mode;
858 unsigned int words;
859 bool pushing;
861 src = SET_SRC (set);
862 dest = SET_DEST (set);
863 orig_mode = GET_MODE (dest);
865 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
866 gcc_assert (words > 1);
868 start_sequence ();
870 /* We have to handle copying from a SUBREG of a decomposed reg where
871 the SUBREG is larger than word size. Rather than assume that we
872 can take a word_mode SUBREG of the destination, we copy to a new
873 register and then copy that to the destination. */
875 real_dest = NULL_RTX;
877 if (GET_CODE (src) == SUBREG
878 && resolve_reg_p (SUBREG_REG (src))
879 && (SUBREG_BYTE (src) != 0
880 || (GET_MODE_SIZE (orig_mode)
881 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
883 real_dest = dest;
884 dest = gen_reg_rtx (orig_mode);
885 if (REG_P (real_dest))
886 REG_ATTRS (dest) = REG_ATTRS (real_dest);
889 /* Similarly if we are copying to a SUBREG of a decomposed reg where
890 the SUBREG is larger than word size. */
892 if (GET_CODE (dest) == SUBREG
893 && resolve_reg_p (SUBREG_REG (dest))
894 && (SUBREG_BYTE (dest) != 0
895 || (GET_MODE_SIZE (orig_mode)
896 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
898 rtx reg, smove;
899 rtx_insn *minsn;
901 reg = gen_reg_rtx (orig_mode);
902 minsn = emit_move_insn (reg, src);
903 smove = single_set (minsn);
904 gcc_assert (smove != NULL_RTX);
905 resolve_simple_move (smove, minsn);
906 src = reg;
909 /* If we didn't have any big SUBREGS of decomposed registers, and
910 neither side of the move is a register we are decomposing, then
911 we don't have to do anything here. */
913 if (src == SET_SRC (set)
914 && dest == SET_DEST (set)
915 && !resolve_reg_p (src)
916 && !resolve_subreg_p (src)
917 && !resolve_reg_p (dest)
918 && !resolve_subreg_p (dest))
920 end_sequence ();
921 return insn;
924 /* It's possible for the code to use a subreg of a decomposed
925 register while forming an address. We need to handle that before
926 passing the address to emit_move_insn. We pass NULL_RTX as the
927 insn parameter to resolve_subreg_use because we can not validate
928 the insn yet. */
929 if (MEM_P (src) || MEM_P (dest))
931 int acg;
933 if (MEM_P (src))
934 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
935 if (MEM_P (dest))
936 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
937 acg = apply_change_group ();
938 gcc_assert (acg);
941 /* If SRC is a register which we can't decompose, or has side
942 effects, we need to move via a temporary register. */
944 if (!can_decompose_p (src)
945 || side_effects_p (src)
946 || GET_CODE (src) == ASM_OPERANDS)
948 rtx reg;
950 reg = gen_reg_rtx (orig_mode);
952 #ifdef AUTO_INC_DEC
954 rtx move = emit_move_insn (reg, src);
955 if (MEM_P (src))
957 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
958 if (note)
959 add_reg_note (move, REG_INC, XEXP (note, 0));
962 #else
963 emit_move_insn (reg, src);
964 #endif
965 src = reg;
968 /* If DEST is a register which we can't decompose, or has side
969 effects, we need to first move to a temporary register. We
970 handle the common case of pushing an operand directly. We also
971 go through a temporary register if it holds a floating point
972 value. This gives us better code on systems which can't move
973 data easily between integer and floating point registers. */
975 dest_mode = orig_mode;
976 pushing = push_operand (dest, dest_mode);
977 if (!can_decompose_p (dest)
978 || (side_effects_p (dest) && !pushing)
979 || (!SCALAR_INT_MODE_P (dest_mode)
980 && !resolve_reg_p (dest)
981 && !resolve_subreg_p (dest)))
983 if (real_dest == NULL_RTX)
984 real_dest = dest;
985 if (!SCALAR_INT_MODE_P (dest_mode))
987 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
988 MODE_INT, 0);
989 gcc_assert (dest_mode != BLKmode);
991 dest = gen_reg_rtx (dest_mode);
992 if (REG_P (real_dest))
993 REG_ATTRS (dest) = REG_ATTRS (real_dest);
996 if (pushing)
998 unsigned int i, j, jinc;
1000 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1001 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1002 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1004 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1006 j = 0;
1007 jinc = 1;
1009 else
1011 j = words - 1;
1012 jinc = -1;
1015 for (i = 0; i < words; ++i, j += jinc)
1017 rtx temp;
1019 temp = copy_rtx (XEXP (dest, 0));
1020 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1021 j * UNITS_PER_WORD);
1022 emit_move_insn (temp,
1023 simplify_gen_subreg_concatn (word_mode, src,
1024 orig_mode,
1025 j * UNITS_PER_WORD));
1028 else
1030 unsigned int i;
1032 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1033 emit_clobber (dest);
1035 for (i = 0; i < words; ++i)
1036 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1037 dest_mode,
1038 i * UNITS_PER_WORD),
1039 simplify_gen_subreg_concatn (word_mode, src,
1040 orig_mode,
1041 i * UNITS_PER_WORD));
1044 if (real_dest != NULL_RTX)
1046 rtx mdest, smove;
1047 rtx_insn *minsn;
1049 if (dest_mode == orig_mode)
1050 mdest = dest;
1051 else
1052 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1053 minsn = emit_move_insn (real_dest, mdest);
1055 #ifdef AUTO_INC_DEC
1056 if (MEM_P (real_dest)
1057 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1059 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1060 if (note)
1061 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1063 #endif
1065 smove = single_set (minsn);
1066 gcc_assert (smove != NULL_RTX);
1068 resolve_simple_move (smove, minsn);
1071 insns = get_insns ();
1072 end_sequence ();
1074 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1076 emit_insn_before (insns, insn);
1078 /* If we get here via self-recursion, then INSN is not yet in the insns
1079 chain and delete_insn will fail. We only want to remove INSN from the
1080 current sequence. See PR56738. */
1081 if (in_sequence_p ())
1082 remove_insn (insn);
1083 else
1084 delete_insn (insn);
1086 return insns;
1089 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1090 component registers. Return whether we changed something. */
1092 static bool
1093 resolve_clobber (rtx pat, rtx_insn *insn)
1095 rtx reg;
1096 machine_mode orig_mode;
1097 unsigned int words, i;
1098 int ret;
1100 reg = XEXP (pat, 0);
1101 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1102 return false;
1104 orig_mode = GET_MODE (reg);
1105 words = GET_MODE_SIZE (orig_mode);
1106 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1108 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1109 simplify_gen_subreg_concatn (word_mode, reg,
1110 orig_mode, 0),
1112 df_insn_rescan (insn);
1113 gcc_assert (ret != 0);
1115 for (i = words - 1; i > 0; --i)
1117 rtx x;
1119 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1120 i * UNITS_PER_WORD);
1121 x = gen_rtx_CLOBBER (VOIDmode, x);
1122 emit_insn_after (x, insn);
1125 resolve_reg_notes (insn);
1127 return true;
1130 /* A USE of a decomposed register is no longer meaningful. Return
1131 whether we changed something. */
1133 static bool
1134 resolve_use (rtx pat, rtx_insn *insn)
1136 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1138 delete_insn (insn);
1139 return true;
1142 resolve_reg_notes (insn);
1144 return false;
1147 /* A VAR_LOCATION can be simplified. */
1149 static void
1150 resolve_debug (rtx_insn *insn)
1152 subrtx_ptr_iterator::array_type array;
1153 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1155 rtx *loc = *iter;
1156 rtx x = *loc;
1157 if (resolve_subreg_p (x))
1159 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1160 SUBREG_BYTE (x));
1162 if (x)
1163 *loc = x;
1164 else
1165 x = copy_rtx (*loc);
1167 if (resolve_reg_p (x))
1168 *loc = copy_rtx (x);
1171 df_insn_rescan (insn);
1173 resolve_reg_notes (insn);
1176 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1177 set the decomposable_context bitmap accordingly. SPEED_P is true
1178 if we are optimizing INSN for speed rather than size. Return true
1179 if INSN is decomposable. */
1181 static bool
1182 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1184 rtx set;
1185 rtx op;
1186 rtx op_operand;
1188 set = single_set (insn);
1189 if (!set)
1190 return false;
1192 op = SET_SRC (set);
1193 if (GET_CODE (op) != ASHIFT
1194 && GET_CODE (op) != LSHIFTRT
1195 && GET_CODE (op) != ASHIFTRT
1196 && GET_CODE (op) != ZERO_EXTEND)
1197 return false;
1199 op_operand = XEXP (op, 0);
1200 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1201 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1202 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1203 || GET_MODE (op) != twice_word_mode)
1204 return false;
1206 if (GET_CODE (op) == ZERO_EXTEND)
1208 if (GET_MODE (op_operand) != word_mode
1209 || !choices[speed_p].splitting_zext)
1210 return false;
1212 else /* left or right shift */
1214 bool *splitting = (GET_CODE (op) == ASHIFT
1215 ? choices[speed_p].splitting_ashift
1216 : GET_CODE (op) == ASHIFTRT
1217 ? choices[speed_p].splitting_ashiftrt
1218 : choices[speed_p].splitting_lshiftrt);
1219 if (!CONST_INT_P (XEXP (op, 1))
1220 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1221 2 * BITS_PER_WORD - 1)
1222 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1223 return false;
1225 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1228 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1230 return true;
1233 /* Decompose a more than word wide shift (in INSN) of a multiword
1234 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1235 and 'set to zero' insn. Return a pointer to the new insn when a
1236 replacement was done. */
1238 static rtx_insn *
1239 resolve_shift_zext (rtx_insn *insn)
1241 rtx set;
1242 rtx op;
1243 rtx op_operand;
1244 rtx_insn *insns;
1245 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1246 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1248 set = single_set (insn);
1249 if (!set)
1250 return NULL;
1252 op = SET_SRC (set);
1253 if (GET_CODE (op) != ASHIFT
1254 && GET_CODE (op) != LSHIFTRT
1255 && GET_CODE (op) != ASHIFTRT
1256 && GET_CODE (op) != ZERO_EXTEND)
1257 return NULL;
1259 op_operand = XEXP (op, 0);
1261 /* We can tear this operation apart only if the regs were already
1262 torn apart. */
1263 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1264 return NULL;
1266 /* src_reg_num is the number of the word mode register which we
1267 are operating on. For a left shift and a zero_extend on little
1268 endian machines this is register 0. */
1269 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1270 ? 1 : 0;
1272 if (WORDS_BIG_ENDIAN
1273 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1274 src_reg_num = 1 - src_reg_num;
1276 if (GET_CODE (op) == ZERO_EXTEND)
1277 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1278 else
1279 dest_reg_num = 1 - src_reg_num;
1281 offset1 = UNITS_PER_WORD * dest_reg_num;
1282 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1283 src_offset = UNITS_PER_WORD * src_reg_num;
1285 start_sequence ();
1287 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1288 GET_MODE (SET_DEST (set)),
1289 offset1);
1290 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1291 GET_MODE (SET_DEST (set)),
1292 offset2);
1293 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1294 GET_MODE (op_operand),
1295 src_offset);
1296 if (GET_CODE (op) == ASHIFTRT
1297 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1298 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1299 BITS_PER_WORD - 1, NULL_RTX, 0);
1301 if (GET_CODE (op) != ZERO_EXTEND)
1303 int shift_count = INTVAL (XEXP (op, 1));
1304 if (shift_count > BITS_PER_WORD)
1305 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1306 LSHIFT_EXPR : RSHIFT_EXPR,
1307 word_mode, src_reg,
1308 shift_count - BITS_PER_WORD,
1309 dest_reg, GET_CODE (op) != ASHIFTRT);
1312 if (dest_reg != src_reg)
1313 emit_move_insn (dest_reg, src_reg);
1314 if (GET_CODE (op) != ASHIFTRT)
1315 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1316 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1317 emit_move_insn (dest_upper, copy_rtx (src_reg));
1318 else
1319 emit_move_insn (dest_upper, upper_src);
1320 insns = get_insns ();
1322 end_sequence ();
1324 emit_insn_before (insns, insn);
1326 if (dump_file)
1328 rtx_insn *in;
1329 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1330 for (in = insns; in != insn; in = NEXT_INSN (in))
1331 fprintf (dump_file, "%d ", INSN_UID (in));
1332 fprintf (dump_file, "\n");
1335 delete_insn (insn);
1336 return insns;
1339 /* Print to dump_file a description of what we're doing with shift code CODE.
1340 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1342 static void
1343 dump_shift_choices (enum rtx_code code, bool *splitting)
1345 int i;
1346 const char *sep;
1348 fprintf (dump_file,
1349 " Splitting mode %s for %s lowering with shift amounts = ",
1350 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1351 sep = "";
1352 for (i = 0; i < BITS_PER_WORD; i++)
1353 if (splitting[i])
1355 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1356 sep = ",";
1358 fprintf (dump_file, "\n");
1361 /* Print to dump_file a description of what we're doing when optimizing
1362 for speed or size; SPEED_P says which. DESCRIPTION is a description
1363 of the SPEED_P choice. */
1365 static void
1366 dump_choices (bool speed_p, const char *description)
1368 unsigned int i;
1370 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1372 for (i = 0; i < MAX_MACHINE_MODE; i++)
1373 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1374 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1375 choices[speed_p].move_modes_to_split[i]
1376 ? "Splitting"
1377 : "Skipping",
1378 GET_MODE_NAME ((machine_mode) i));
1380 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1381 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1382 GET_MODE_NAME (twice_word_mode));
1384 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1385 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1386 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1387 fprintf (dump_file, "\n");
1390 /* Look for registers which are always accessed via word-sized SUBREGs
1391 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1392 registers into several word-sized pseudo-registers. */
1394 static void
1395 decompose_multiword_subregs (bool decompose_copies)
1397 unsigned int max;
1398 basic_block bb;
1399 bool speed_p;
1401 if (dump_file)
1403 dump_choices (false, "size");
1404 dump_choices (true, "speed");
1407 /* Check if this target even has any modes to consider lowering. */
1408 if (!choices[false].something_to_do && !choices[true].something_to_do)
1410 if (dump_file)
1411 fprintf (dump_file, "Nothing to do!\n");
1412 return;
1415 max = max_reg_num ();
1417 /* First see if there are any multi-word pseudo-registers. If there
1418 aren't, there is nothing we can do. This should speed up this
1419 pass in the normal case, since it should be faster than scanning
1420 all the insns. */
1422 unsigned int i;
1423 bool useful_modes_seen = false;
1425 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1426 if (regno_reg_rtx[i] != NULL)
1428 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1429 if (choices[false].move_modes_to_split[(int) mode]
1430 || choices[true].move_modes_to_split[(int) mode])
1432 useful_modes_seen = true;
1433 break;
1437 if (!useful_modes_seen)
1439 if (dump_file)
1440 fprintf (dump_file, "Nothing to lower in this function.\n");
1441 return;
1445 if (df)
1447 df_set_flags (DF_DEFER_INSN_RESCAN);
1448 run_word_dce ();
1451 /* FIXME: It may be possible to change this code to look for each
1452 multi-word pseudo-register and to find each insn which sets or
1453 uses that register. That should be faster than scanning all the
1454 insns. */
1456 decomposable_context = BITMAP_ALLOC (NULL);
1457 non_decomposable_context = BITMAP_ALLOC (NULL);
1458 subreg_context = BITMAP_ALLOC (NULL);
1460 reg_copy_graph.create (max);
1461 reg_copy_graph.safe_grow_cleared (max);
1462 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1464 speed_p = optimize_function_for_speed_p (cfun);
1465 FOR_EACH_BB_FN (bb, cfun)
1467 rtx_insn *insn;
1469 FOR_BB_INSNS (bb, insn)
1471 rtx set;
1472 enum classify_move_insn cmi;
1473 int i, n;
1475 if (!INSN_P (insn)
1476 || GET_CODE (PATTERN (insn)) == CLOBBER
1477 || GET_CODE (PATTERN (insn)) == USE)
1478 continue;
1480 recog_memoized (insn);
1482 if (find_decomposable_shift_zext (insn, speed_p))
1483 continue;
1485 extract_insn (insn);
1487 set = simple_move (insn, speed_p);
1489 if (!set)
1490 cmi = NOT_SIMPLE_MOVE;
1491 else
1493 /* We mark pseudo-to-pseudo copies as decomposable during the
1494 second pass only. The first pass is so early that there is
1495 good chance such moves will be optimized away completely by
1496 subsequent optimizations anyway.
1498 However, we call find_pseudo_copy even during the first pass
1499 so as to properly set up the reg_copy_graph. */
1500 if (find_pseudo_copy (set))
1501 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1502 else
1503 cmi = SIMPLE_MOVE;
1506 n = recog_data.n_operands;
1507 for (i = 0; i < n; ++i)
1509 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1511 /* We handle ASM_OPERANDS as a special case to support
1512 things like x86 rdtsc which returns a DImode value.
1513 We can decompose the output, which will certainly be
1514 operand 0, but not the inputs. */
1516 if (cmi == SIMPLE_MOVE
1517 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1519 gcc_assert (i == 0);
1520 cmi = NOT_SIMPLE_MOVE;
1526 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1527 if (!bitmap_empty_p (decomposable_context))
1529 sbitmap sub_blocks;
1530 unsigned int i;
1531 sbitmap_iterator sbi;
1532 bitmap_iterator iter;
1533 unsigned int regno;
1535 propagate_pseudo_copies ();
1537 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1538 bitmap_clear (sub_blocks);
1540 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1541 decompose_register (regno);
1543 FOR_EACH_BB_FN (bb, cfun)
1545 rtx_insn *insn;
1547 FOR_BB_INSNS (bb, insn)
1549 rtx pat;
1551 if (!INSN_P (insn))
1552 continue;
1554 pat = PATTERN (insn);
1555 if (GET_CODE (pat) == CLOBBER)
1556 resolve_clobber (pat, insn);
1557 else if (GET_CODE (pat) == USE)
1558 resolve_use (pat, insn);
1559 else if (DEBUG_INSN_P (insn))
1560 resolve_debug (insn);
1561 else
1563 rtx set;
1564 int i;
1566 recog_memoized (insn);
1567 extract_insn (insn);
1569 set = simple_move (insn, speed_p);
1570 if (set)
1572 rtx_insn *orig_insn = insn;
1573 bool cfi = control_flow_insn_p (insn);
1575 /* We can end up splitting loads to multi-word pseudos
1576 into separate loads to machine word size pseudos.
1577 When this happens, we first had one load that can
1578 throw, and after resolve_simple_move we'll have a
1579 bunch of loads (at least two). All those loads may
1580 trap if we can have non-call exceptions, so they
1581 all will end the current basic block. We split the
1582 block after the outer loop over all insns, but we
1583 make sure here that we will be able to split the
1584 basic block and still produce the correct control
1585 flow graph for it. */
1586 gcc_assert (!cfi
1587 || (cfun->can_throw_non_call_exceptions
1588 && can_throw_internal (insn)));
1590 insn = resolve_simple_move (set, insn);
1591 if (insn != orig_insn)
1593 recog_memoized (insn);
1594 extract_insn (insn);
1596 if (cfi)
1597 bitmap_set_bit (sub_blocks, bb->index);
1600 else
1602 rtx_insn *decomposed_shift;
1604 decomposed_shift = resolve_shift_zext (insn);
1605 if (decomposed_shift != NULL_RTX)
1607 insn = decomposed_shift;
1608 recog_memoized (insn);
1609 extract_insn (insn);
1613 for (i = recog_data.n_operands - 1; i >= 0; --i)
1614 resolve_subreg_use (recog_data.operand_loc[i], insn);
1616 resolve_reg_notes (insn);
1618 if (num_validated_changes () > 0)
1620 for (i = recog_data.n_dups - 1; i >= 0; --i)
1622 rtx *pl = recog_data.dup_loc[i];
1623 int dup_num = recog_data.dup_num[i];
1624 rtx *px = recog_data.operand_loc[dup_num];
1626 validate_unshare_change (insn, pl, *px, 1);
1629 i = apply_change_group ();
1630 gcc_assert (i);
1636 /* If we had insns to split that caused control flow insns in the middle
1637 of a basic block, split those blocks now. Note that we only handle
1638 the case where splitting a load has caused multiple possibly trapping
1639 loads to appear. */
1640 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1642 rtx_insn *insn, *end;
1643 edge fallthru;
1645 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1646 insn = BB_HEAD (bb);
1647 end = BB_END (bb);
1649 while (insn != end)
1651 if (control_flow_insn_p (insn))
1653 /* Split the block after insn. There will be a fallthru
1654 edge, which is OK so we keep it. We have to create the
1655 exception edges ourselves. */
1656 fallthru = split_block (bb, insn);
1657 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1658 bb = fallthru->dest;
1659 insn = BB_HEAD (bb);
1661 else
1662 insn = NEXT_INSN (insn);
1666 sbitmap_free (sub_blocks);
1670 unsigned int i;
1671 bitmap b;
1673 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1674 if (b)
1675 BITMAP_FREE (b);
1678 reg_copy_graph.release ();
1680 BITMAP_FREE (decomposable_context);
1681 BITMAP_FREE (non_decomposable_context);
1682 BITMAP_FREE (subreg_context);
1685 /* Implement first lower subreg pass. */
1687 namespace {
1689 const pass_data pass_data_lower_subreg =
1691 RTL_PASS, /* type */
1692 "subreg1", /* name */
1693 OPTGROUP_NONE, /* optinfo_flags */
1694 TV_LOWER_SUBREG, /* tv_id */
1695 0, /* properties_required */
1696 0, /* properties_provided */
1697 0, /* properties_destroyed */
1698 0, /* todo_flags_start */
1699 0, /* todo_flags_finish */
1702 class pass_lower_subreg : public rtl_opt_pass
1704 public:
1705 pass_lower_subreg (gcc::context *ctxt)
1706 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1709 /* opt_pass methods: */
1710 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1711 virtual unsigned int execute (function *)
1713 decompose_multiword_subregs (false);
1714 return 0;
1717 }; // class pass_lower_subreg
1719 } // anon namespace
1721 rtl_opt_pass *
1722 make_pass_lower_subreg (gcc::context *ctxt)
1724 return new pass_lower_subreg (ctxt);
1727 /* Implement second lower subreg pass. */
1729 namespace {
1731 const pass_data pass_data_lower_subreg2 =
1733 RTL_PASS, /* type */
1734 "subreg2", /* name */
1735 OPTGROUP_NONE, /* optinfo_flags */
1736 TV_LOWER_SUBREG, /* tv_id */
1737 0, /* properties_required */
1738 0, /* properties_provided */
1739 0, /* properties_destroyed */
1740 0, /* todo_flags_start */
1741 TODO_df_finish, /* todo_flags_finish */
1744 class pass_lower_subreg2 : public rtl_opt_pass
1746 public:
1747 pass_lower_subreg2 (gcc::context *ctxt)
1748 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1751 /* opt_pass methods: */
1752 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1753 virtual unsigned int execute (function *)
1755 decompose_multiword_subregs (true);
1756 return 0;
1759 }; // class pass_lower_subreg2
1761 } // anon namespace
1763 rtl_opt_pass *
1764 make_pass_lower_subreg2 (gcc::context *ctxt)
1766 return new pass_lower_subreg2 (ctxt);