Merge branches/gcc-4_8-branch rev 208968.
[official-gcc.git] / gcc-4_8-branch / gcc / lower-subreg.c
blobcc5c9587d1de9aeee638ee6b0b48397cb09f170d
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "machmode.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tm_p.h"
29 #include "flags.h"
30 #include "insn-config.h"
31 #include "obstack.h"
32 #include "basic-block.h"
33 #include "recog.h"
34 #include "bitmap.h"
35 #include "dce.h"
36 #include "expr.h"
37 #include "except.h"
38 #include "regs.h"
39 #include "tree-pass.h"
40 #include "df.h"
41 #include "lower-subreg.h"
43 #ifdef STACK_GROWS_DOWNWARD
44 # undef STACK_GROWS_DOWNWARD
45 # define STACK_GROWS_DOWNWARD 1
46 #else
47 # define STACK_GROWS_DOWNWARD 0
48 #endif
51 /* Decompose multi-word pseudo-registers into individual
52 pseudo-registers when possible and profitable. This is possible
53 when all the uses of a multi-word register are via SUBREG, or are
54 copies of the register to another location. Breaking apart the
55 register permits more CSE and permits better register allocation.
56 This is profitable if the machine does not have move instructions
57 to do this.
59 This pass only splits moves with modes that are wider than
60 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
61 integer modes that are twice the width of word_mode. The latter
62 could be generalized if there was a need to do this, but the trend in
63 architectures is to not need this.
65 There are two useful preprocessor defines for use by maintainers:
67 #define LOG_COSTS 1
69 if you wish to see the actual cost estimates that are being used
70 for each mode wider than word mode and the cost estimates for zero
71 extension and the shifts. This can be useful when port maintainers
72 are tuning insn rtx costs.
74 #define FORCE_LOWERING 1
76 if you wish to test the pass with all the transformation forced on.
77 This can be useful for finding bugs in the transformations. */
79 #define LOG_COSTS 0
80 #define FORCE_LOWERING 0
82 /* Bit N in this bitmap is set if regno N is used in a context in
83 which we can decompose it. */
84 static bitmap decomposable_context;
86 /* Bit N in this bitmap is set if regno N is used in a context in
87 which it can not be decomposed. */
88 static bitmap non_decomposable_context;
90 /* Bit N in this bitmap is set if regno N is used in a subreg
91 which changes the mode but not the size. This typically happens
92 when the register accessed as a floating-point value; we want to
93 avoid generating accesses to its subwords in integer modes. */
94 static bitmap subreg_context;
96 /* Bit N in the bitmap in element M of this array is set if there is a
97 copy from reg M to reg N. */
98 static vec<bitmap> reg_copy_graph;
100 struct target_lower_subreg default_target_lower_subreg;
101 #if SWITCHABLE_TARGET
102 struct target_lower_subreg *this_target_lower_subreg
103 = &default_target_lower_subreg;
104 #endif
106 #define twice_word_mode \
107 this_target_lower_subreg->x_twice_word_mode
108 #define choices \
109 this_target_lower_subreg->x_choices
111 /* RTXes used while computing costs. */
112 struct cost_rtxes {
113 /* Source and target registers. */
114 rtx source;
115 rtx target;
117 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
118 rtx zext;
120 /* A shift of SOURCE. */
121 rtx shift;
123 /* A SET of TARGET. */
124 rtx set;
127 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
128 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
130 static int
131 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
132 enum machine_mode mode, int op1)
134 PUT_CODE (rtxes->shift, code);
135 PUT_MODE (rtxes->shift, mode);
136 PUT_MODE (rtxes->source, mode);
137 XEXP (rtxes->shift, 1) = GEN_INT (op1);
138 return set_src_cost (rtxes->shift, speed_p);
141 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
142 to true if it is profitable to split a double-word CODE shift
143 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
144 for speed or size profitability.
146 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
147 the cost of moving zero into a word-mode register. WORD_MOVE_COST
148 is the cost of moving between word registers. */
150 static void
151 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
152 bool *splitting, enum rtx_code code,
153 int word_move_zero_cost, int word_move_cost)
155 int wide_cost, narrow_cost, upper_cost, i;
157 for (i = 0; i < BITS_PER_WORD; i++)
159 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
160 i + BITS_PER_WORD);
161 if (i == 0)
162 narrow_cost = word_move_cost;
163 else
164 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
166 if (code != ASHIFTRT)
167 upper_cost = word_move_zero_cost;
168 else if (i == BITS_PER_WORD - 1)
169 upper_cost = word_move_cost;
170 else
171 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
172 BITS_PER_WORD - 1);
174 if (LOG_COSTS)
175 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
176 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
177 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
179 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
180 splitting[i] = true;
184 /* Compute what we should do when optimizing for speed or size; SPEED_P
185 selects which. Use RTXES for computing costs. */
187 static void
188 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
190 unsigned int i;
191 int word_move_zero_cost, word_move_cost;
193 PUT_MODE (rtxes->target, word_mode);
194 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
195 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
197 SET_SRC (rtxes->set) = rtxes->source;
198 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
200 if (LOG_COSTS)
201 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
202 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
204 for (i = 0; i < MAX_MACHINE_MODE; i++)
206 enum machine_mode mode = (enum machine_mode) i;
207 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
208 if (factor > 1)
210 int mode_move_cost;
212 PUT_MODE (rtxes->target, mode);
213 PUT_MODE (rtxes->source, mode);
214 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
216 if (LOG_COSTS)
217 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
218 GET_MODE_NAME (mode), mode_move_cost,
219 word_move_cost, factor);
221 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
223 choices[speed_p].move_modes_to_split[i] = true;
224 choices[speed_p].something_to_do = true;
229 /* For the moves and shifts, the only case that is checked is one
230 where the mode of the target is an integer mode twice the width
231 of the word_mode.
233 If it is not profitable to split a double word move then do not
234 even consider the shifts or the zero extension. */
235 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
237 int zext_cost;
239 /* The only case here to check to see if moving the upper part with a
240 zero is cheaper than doing the zext itself. */
241 PUT_MODE (rtxes->source, word_mode);
242 zext_cost = set_src_cost (rtxes->zext, speed_p);
244 if (LOG_COSTS)
245 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
246 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
247 zext_cost, word_move_cost, word_move_zero_cost);
249 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
250 choices[speed_p].splitting_zext = true;
252 compute_splitting_shift (speed_p, rtxes,
253 choices[speed_p].splitting_ashift, ASHIFT,
254 word_move_zero_cost, word_move_cost);
255 compute_splitting_shift (speed_p, rtxes,
256 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
257 word_move_zero_cost, word_move_cost);
258 compute_splitting_shift (speed_p, rtxes,
259 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
260 word_move_zero_cost, word_move_cost);
264 /* Do one-per-target initialisation. This involves determining
265 which operations on the machine are profitable. If none are found,
266 then the pass just returns when called. */
268 void
269 init_lower_subreg (void)
271 struct cost_rtxes rtxes;
273 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
275 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
277 rtxes.target = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER);
278 rtxes.source = gen_rtx_REG (word_mode, FIRST_PSEUDO_REGISTER + 1);
279 rtxes.set = gen_rtx_SET (VOIDmode, rtxes.target, rtxes.source);
280 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
281 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
283 if (LOG_COSTS)
284 fprintf (stderr, "\nSize costs\n==========\n\n");
285 compute_costs (false, &rtxes);
287 if (LOG_COSTS)
288 fprintf (stderr, "\nSpeed costs\n===========\n\n");
289 compute_costs (true, &rtxes);
292 static bool
293 simple_move_operand (rtx x)
295 if (GET_CODE (x) == SUBREG)
296 x = SUBREG_REG (x);
298 if (!OBJECT_P (x))
299 return false;
301 if (GET_CODE (x) == LABEL_REF
302 || GET_CODE (x) == SYMBOL_REF
303 || GET_CODE (x) == HIGH
304 || GET_CODE (x) == CONST)
305 return false;
307 if (MEM_P (x)
308 && (MEM_VOLATILE_P (x)
309 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
310 return false;
312 return true;
315 /* If INSN is a single set between two objects that we want to split,
316 return the single set. SPEED_P says whether we are optimizing
317 INSN for speed or size.
319 INSN should have been passed to recog and extract_insn before this
320 is called. */
322 static rtx
323 simple_move (rtx insn, bool speed_p)
325 rtx x;
326 rtx set;
327 enum machine_mode mode;
329 if (recog_data.n_operands != 2)
330 return NULL_RTX;
332 set = single_set (insn);
333 if (!set)
334 return NULL_RTX;
336 x = SET_DEST (set);
337 if (x != recog_data.operand[0] && x != recog_data.operand[1])
338 return NULL_RTX;
339 if (!simple_move_operand (x))
340 return NULL_RTX;
342 x = SET_SRC (set);
343 if (x != recog_data.operand[0] && x != recog_data.operand[1])
344 return NULL_RTX;
345 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
346 things like x86 rdtsc which returns a DImode value. */
347 if (GET_CODE (x) != ASM_OPERANDS
348 && !simple_move_operand (x))
349 return NULL_RTX;
351 /* We try to decompose in integer modes, to avoid generating
352 inefficient code copying between integer and floating point
353 registers. That means that we can't decompose if this is a
354 non-integer mode for which there is no integer mode of the same
355 size. */
356 mode = GET_MODE (SET_DEST (set));
357 if (!SCALAR_INT_MODE_P (mode)
358 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
359 == BLKmode))
360 return NULL_RTX;
362 /* Reject PARTIAL_INT modes. They are used for processor specific
363 purposes and it's probably best not to tamper with them. */
364 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
365 return NULL_RTX;
367 if (!choices[speed_p].move_modes_to_split[(int) mode])
368 return NULL_RTX;
370 return set;
373 /* If SET is a copy from one multi-word pseudo-register to another,
374 record that in reg_copy_graph. Return whether it is such a
375 copy. */
377 static bool
378 find_pseudo_copy (rtx set)
380 rtx dest = SET_DEST (set);
381 rtx src = SET_SRC (set);
382 unsigned int rd, rs;
383 bitmap b;
385 if (!REG_P (dest) || !REG_P (src))
386 return false;
388 rd = REGNO (dest);
389 rs = REGNO (src);
390 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
391 return false;
393 b = reg_copy_graph[rs];
394 if (b == NULL)
396 b = BITMAP_ALLOC (NULL);
397 reg_copy_graph[rs] = b;
400 bitmap_set_bit (b, rd);
402 return true;
405 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
406 where they are copied to another register, add the register to
407 which they are copied to DECOMPOSABLE_CONTEXT. Use
408 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
409 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
411 static void
412 propagate_pseudo_copies (void)
414 bitmap queue, propagate;
416 queue = BITMAP_ALLOC (NULL);
417 propagate = BITMAP_ALLOC (NULL);
419 bitmap_copy (queue, decomposable_context);
422 bitmap_iterator iter;
423 unsigned int i;
425 bitmap_clear (propagate);
427 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
429 bitmap b = reg_copy_graph[i];
430 if (b)
431 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
434 bitmap_and_compl (queue, propagate, decomposable_context);
435 bitmap_ior_into (decomposable_context, propagate);
437 while (!bitmap_empty_p (queue));
439 BITMAP_FREE (queue);
440 BITMAP_FREE (propagate);
443 /* A pointer to one of these values is passed to
444 find_decomposable_subregs via for_each_rtx. */
446 enum classify_move_insn
448 /* Not a simple move from one location to another. */
449 NOT_SIMPLE_MOVE,
450 /* A simple move we want to decompose. */
451 DECOMPOSABLE_SIMPLE_MOVE,
452 /* Any other simple move. */
453 SIMPLE_MOVE
456 /* This is called via for_each_rtx. If we find a SUBREG which we
457 could use to decompose a pseudo-register, set a bit in
458 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is
459 not a simple pseudo-register copy, DATA will point at the type of
460 move, and we set a bit in DECOMPOSABLE_CONTEXT or
461 NON_DECOMPOSABLE_CONTEXT as appropriate. */
463 static int
464 find_decomposable_subregs (rtx *px, void *data)
466 enum classify_move_insn *pcmi = (enum classify_move_insn *) data;
467 rtx x = *px;
469 if (x == NULL_RTX)
470 return 0;
472 if (GET_CODE (x) == SUBREG)
474 rtx inner = SUBREG_REG (x);
475 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
477 if (!REG_P (inner))
478 return 0;
480 regno = REGNO (inner);
481 if (HARD_REGISTER_NUM_P (regno))
482 return -1;
484 outer_size = GET_MODE_SIZE (GET_MODE (x));
485 inner_size = GET_MODE_SIZE (GET_MODE (inner));
486 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
487 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
489 /* We only try to decompose single word subregs of multi-word
490 registers. When we find one, we return -1 to avoid iterating
491 over the inner register.
493 ??? This doesn't allow, e.g., DImode subregs of TImode values
494 on 32-bit targets. We would need to record the way the
495 pseudo-register was used, and only decompose if all the uses
496 were the same number and size of pieces. Hopefully this
497 doesn't happen much. */
499 if (outer_words == 1 && inner_words > 1)
501 bitmap_set_bit (decomposable_context, regno);
502 return -1;
505 /* If this is a cast from one mode to another, where the modes
506 have the same size, and they are not tieable, then mark this
507 register as non-decomposable. If we decompose it we are
508 likely to mess up whatever the backend is trying to do. */
509 if (outer_words > 1
510 && outer_size == inner_size
511 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
513 bitmap_set_bit (non_decomposable_context, regno);
514 bitmap_set_bit (subreg_context, regno);
515 return -1;
518 else if (REG_P (x))
520 unsigned int regno;
522 /* We will see an outer SUBREG before we see the inner REG, so
523 when we see a plain REG here it means a direct reference to
524 the register.
526 If this is not a simple copy from one location to another,
527 then we can not decompose this register. If this is a simple
528 copy we want to decompose, and the mode is right,
529 then we mark the register as decomposable.
530 Otherwise we don't say anything about this register --
531 it could be decomposed, but whether that would be
532 profitable depends upon how it is used elsewhere.
534 We only set bits in the bitmap for multi-word
535 pseudo-registers, since those are the only ones we care about
536 and it keeps the size of the bitmaps down. */
538 regno = REGNO (x);
539 if (!HARD_REGISTER_NUM_P (regno)
540 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
542 switch (*pcmi)
544 case NOT_SIMPLE_MOVE:
545 bitmap_set_bit (non_decomposable_context, regno);
546 break;
547 case DECOMPOSABLE_SIMPLE_MOVE:
548 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
549 bitmap_set_bit (decomposable_context, regno);
550 break;
551 case SIMPLE_MOVE:
552 break;
553 default:
554 gcc_unreachable ();
558 else if (MEM_P (x))
560 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
562 /* Any registers used in a MEM do not participate in a
563 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
564 here, and return -1 to block the parent's recursion. */
565 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem);
566 return -1;
569 return 0;
572 /* Decompose REGNO into word-sized components. We smash the REG node
573 in place. This ensures that (1) something goes wrong quickly if we
574 fail to make some replacement, and (2) the debug information inside
575 the symbol table is automatically kept up to date. */
577 static void
578 decompose_register (unsigned int regno)
580 rtx reg;
581 unsigned int words, i;
582 rtvec v;
584 reg = regno_reg_rtx[regno];
586 regno_reg_rtx[regno] = NULL_RTX;
588 words = GET_MODE_SIZE (GET_MODE (reg));
589 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
591 v = rtvec_alloc (words);
592 for (i = 0; i < words; ++i)
593 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
595 PUT_CODE (reg, CONCATN);
596 XVEC (reg, 0) = v;
598 if (dump_file)
600 fprintf (dump_file, "; Splitting reg %u ->", regno);
601 for (i = 0; i < words; ++i)
602 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
603 fputc ('\n', dump_file);
607 /* Get a SUBREG of a CONCATN. */
609 static rtx
610 simplify_subreg_concatn (enum machine_mode outermode, rtx op,
611 unsigned int byte)
613 unsigned int inner_size;
614 enum machine_mode innermode, partmode;
615 rtx part;
616 unsigned int final_offset;
618 gcc_assert (GET_CODE (op) == CONCATN);
619 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
621 innermode = GET_MODE (op);
622 gcc_assert (byte < GET_MODE_SIZE (innermode));
623 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
625 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
626 part = XVECEXP (op, 0, byte / inner_size);
627 partmode = GET_MODE (part);
629 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
630 regular CONST_VECTORs. They have vector or integer modes, depending
631 on the capabilities of the target. Cope with them. */
632 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
633 partmode = GET_MODE_INNER (innermode);
634 else if (partmode == VOIDmode)
636 enum mode_class mclass = GET_MODE_CLASS (innermode);
637 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
640 final_offset = byte % inner_size;
641 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
642 return NULL_RTX;
644 return simplify_gen_subreg (outermode, part, partmode, final_offset);
647 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
649 static rtx
650 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op,
651 enum machine_mode innermode, unsigned int byte)
653 rtx ret;
655 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
656 If OP is a SUBREG of a CONCATN, then it must be a simple mode
657 change with the same size and offset 0, or it must extract a
658 part. We shouldn't see anything else here. */
659 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
661 rtx op2;
663 if ((GET_MODE_SIZE (GET_MODE (op))
664 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
665 && SUBREG_BYTE (op) == 0)
666 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
667 GET_MODE (SUBREG_REG (op)), byte);
669 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
670 SUBREG_BYTE (op));
671 if (op2 == NULL_RTX)
673 /* We don't handle paradoxical subregs here. */
674 gcc_assert (GET_MODE_SIZE (outermode)
675 <= GET_MODE_SIZE (GET_MODE (op)));
676 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
677 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
678 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
679 byte + SUBREG_BYTE (op));
680 gcc_assert (op2 != NULL_RTX);
681 return op2;
684 op = op2;
685 gcc_assert (op != NULL_RTX);
686 gcc_assert (innermode == GET_MODE (op));
689 if (GET_CODE (op) == CONCATN)
690 return simplify_subreg_concatn (outermode, op, byte);
692 ret = simplify_gen_subreg (outermode, op, innermode, byte);
694 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
695 resolve_simple_move will ask for the high part of the paradoxical
696 subreg, which does not have a value. Just return a zero. */
697 if (ret == NULL_RTX
698 && GET_CODE (op) == SUBREG
699 && SUBREG_BYTE (op) == 0
700 && (GET_MODE_SIZE (innermode)
701 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
702 return CONST0_RTX (outermode);
704 gcc_assert (ret != NULL_RTX);
705 return ret;
708 /* Return whether we should resolve X into the registers into which it
709 was decomposed. */
711 static bool
712 resolve_reg_p (rtx x)
714 return GET_CODE (x) == CONCATN;
717 /* Return whether X is a SUBREG of a register which we need to
718 resolve. */
720 static bool
721 resolve_subreg_p (rtx x)
723 if (GET_CODE (x) != SUBREG)
724 return false;
725 return resolve_reg_p (SUBREG_REG (x));
728 /* This is called via for_each_rtx. Look for SUBREGs which need to be
729 decomposed. */
731 static int
732 resolve_subreg_use (rtx *px, void *data)
734 rtx insn = (rtx) data;
735 rtx x = *px;
737 if (x == NULL_RTX)
738 return 0;
740 if (resolve_subreg_p (x))
742 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
743 SUBREG_BYTE (x));
745 /* It is possible for a note to contain a reference which we can
746 decompose. In this case, return 1 to the caller to indicate
747 that the note must be removed. */
748 if (!x)
750 gcc_assert (!insn);
751 return 1;
754 validate_change (insn, px, x, 1);
755 return -1;
758 if (resolve_reg_p (x))
760 /* Return 1 to the caller to indicate that we found a direct
761 reference to a register which is being decomposed. This can
762 happen inside notes, multiword shift or zero-extend
763 instructions. */
764 return 1;
767 return 0;
770 /* This is called via for_each_rtx. Look for SUBREGs which can be
771 decomposed and decomposed REGs that need copying. */
773 static int
774 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED)
776 rtx x = *px;
778 if (x == NULL_RTX)
779 return 0;
781 if (resolve_subreg_p (x))
783 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
784 SUBREG_BYTE (x));
786 if (x)
787 *px = x;
788 else
789 x = copy_rtx (*px);
792 if (resolve_reg_p (x))
793 *px = copy_rtx (x);
795 return 0;
798 /* Resolve any decomposed registers which appear in register notes on
799 INSN. */
801 static void
802 resolve_reg_notes (rtx insn)
804 rtx *pnote, note;
806 note = find_reg_equal_equiv_note (insn);
807 if (note)
809 int old_count = num_validated_changes ();
810 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL))
811 remove_note (insn, note);
812 else
813 if (old_count != num_validated_changes ())
814 df_notes_rescan (insn);
817 pnote = &REG_NOTES (insn);
818 while (*pnote != NULL_RTX)
820 bool del = false;
822 note = *pnote;
823 switch (REG_NOTE_KIND (note))
825 case REG_DEAD:
826 case REG_UNUSED:
827 if (resolve_reg_p (XEXP (note, 0)))
828 del = true;
829 break;
831 default:
832 break;
835 if (del)
836 *pnote = XEXP (note, 1);
837 else
838 pnote = &XEXP (note, 1);
842 /* Return whether X can be decomposed into subwords. */
844 static bool
845 can_decompose_p (rtx x)
847 if (REG_P (x))
849 unsigned int regno = REGNO (x);
851 if (HARD_REGISTER_NUM_P (regno))
853 unsigned int byte, num_bytes;
855 num_bytes = GET_MODE_SIZE (GET_MODE (x));
856 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
857 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
858 return false;
859 return true;
861 else
862 return !bitmap_bit_p (subreg_context, regno);
865 return true;
868 /* Decompose the registers used in a simple move SET within INSN. If
869 we don't change anything, return INSN, otherwise return the start
870 of the sequence of moves. */
872 static rtx
873 resolve_simple_move (rtx set, rtx insn)
875 rtx src, dest, real_dest, insns;
876 enum machine_mode orig_mode, dest_mode;
877 unsigned int words;
878 bool pushing;
880 src = SET_SRC (set);
881 dest = SET_DEST (set);
882 orig_mode = GET_MODE (dest);
884 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
885 gcc_assert (words > 1);
887 start_sequence ();
889 /* We have to handle copying from a SUBREG of a decomposed reg where
890 the SUBREG is larger than word size. Rather than assume that we
891 can take a word_mode SUBREG of the destination, we copy to a new
892 register and then copy that to the destination. */
894 real_dest = NULL_RTX;
896 if (GET_CODE (src) == SUBREG
897 && resolve_reg_p (SUBREG_REG (src))
898 && (SUBREG_BYTE (src) != 0
899 || (GET_MODE_SIZE (orig_mode)
900 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
902 real_dest = dest;
903 dest = gen_reg_rtx (orig_mode);
904 if (REG_P (real_dest))
905 REG_ATTRS (dest) = REG_ATTRS (real_dest);
908 /* Similarly if we are copying to a SUBREG of a decomposed reg where
909 the SUBREG is larger than word size. */
911 if (GET_CODE (dest) == SUBREG
912 && resolve_reg_p (SUBREG_REG (dest))
913 && (SUBREG_BYTE (dest) != 0
914 || (GET_MODE_SIZE (orig_mode)
915 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
917 rtx reg, minsn, smove;
919 reg = gen_reg_rtx (orig_mode);
920 minsn = emit_move_insn (reg, src);
921 smove = single_set (minsn);
922 gcc_assert (smove != NULL_RTX);
923 resolve_simple_move (smove, minsn);
924 src = reg;
927 /* If we didn't have any big SUBREGS of decomposed registers, and
928 neither side of the move is a register we are decomposing, then
929 we don't have to do anything here. */
931 if (src == SET_SRC (set)
932 && dest == SET_DEST (set)
933 && !resolve_reg_p (src)
934 && !resolve_subreg_p (src)
935 && !resolve_reg_p (dest)
936 && !resolve_subreg_p (dest))
938 end_sequence ();
939 return insn;
942 /* It's possible for the code to use a subreg of a decomposed
943 register while forming an address. We need to handle that before
944 passing the address to emit_move_insn. We pass NULL_RTX as the
945 insn parameter to resolve_subreg_use because we can not validate
946 the insn yet. */
947 if (MEM_P (src) || MEM_P (dest))
949 int acg;
951 if (MEM_P (src))
952 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX);
953 if (MEM_P (dest))
954 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX);
955 acg = apply_change_group ();
956 gcc_assert (acg);
959 /* If SRC is a register which we can't decompose, or has side
960 effects, we need to move via a temporary register. */
962 if (!can_decompose_p (src)
963 || side_effects_p (src)
964 || GET_CODE (src) == ASM_OPERANDS)
966 rtx reg;
968 reg = gen_reg_rtx (orig_mode);
970 #ifdef AUTO_INC_DEC
972 rtx move = emit_move_insn (reg, src);
973 if (MEM_P (src))
975 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
976 if (note)
977 add_reg_note (move, REG_INC, XEXP (note, 0));
980 #else
981 emit_move_insn (reg, src);
982 #endif
983 src = reg;
986 /* If DEST is a register which we can't decompose, or has side
987 effects, we need to first move to a temporary register. We
988 handle the common case of pushing an operand directly. We also
989 go through a temporary register if it holds a floating point
990 value. This gives us better code on systems which can't move
991 data easily between integer and floating point registers. */
993 dest_mode = orig_mode;
994 pushing = push_operand (dest, dest_mode);
995 if (!can_decompose_p (dest)
996 || (side_effects_p (dest) && !pushing)
997 || (!SCALAR_INT_MODE_P (dest_mode)
998 && !resolve_reg_p (dest)
999 && !resolve_subreg_p (dest)))
1001 if (real_dest == NULL_RTX)
1002 real_dest = dest;
1003 if (!SCALAR_INT_MODE_P (dest_mode))
1005 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
1006 MODE_INT, 0);
1007 gcc_assert (dest_mode != BLKmode);
1009 dest = gen_reg_rtx (dest_mode);
1010 if (REG_P (real_dest))
1011 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1014 if (pushing)
1016 unsigned int i, j, jinc;
1018 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1019 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1020 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1022 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1024 j = 0;
1025 jinc = 1;
1027 else
1029 j = words - 1;
1030 jinc = -1;
1033 for (i = 0; i < words; ++i, j += jinc)
1035 rtx temp;
1037 temp = copy_rtx (XEXP (dest, 0));
1038 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1039 j * UNITS_PER_WORD);
1040 emit_move_insn (temp,
1041 simplify_gen_subreg_concatn (word_mode, src,
1042 orig_mode,
1043 j * UNITS_PER_WORD));
1046 else
1048 unsigned int i;
1050 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1051 emit_clobber (dest);
1053 for (i = 0; i < words; ++i)
1054 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1055 dest_mode,
1056 i * UNITS_PER_WORD),
1057 simplify_gen_subreg_concatn (word_mode, src,
1058 orig_mode,
1059 i * UNITS_PER_WORD));
1062 if (real_dest != NULL_RTX)
1064 rtx mdest, minsn, smove;
1066 if (dest_mode == orig_mode)
1067 mdest = dest;
1068 else
1069 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1070 minsn = emit_move_insn (real_dest, mdest);
1072 #ifdef AUTO_INC_DEC
1073 if (MEM_P (real_dest)
1074 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1076 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1077 if (note)
1078 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1080 #endif
1082 smove = single_set (minsn);
1083 gcc_assert (smove != NULL_RTX);
1085 resolve_simple_move (smove, minsn);
1088 insns = get_insns ();
1089 end_sequence ();
1091 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1093 emit_insn_before (insns, insn);
1095 delete_insn (insn);
1097 return insns;
1100 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1101 component registers. Return whether we changed something. */
1103 static bool
1104 resolve_clobber (rtx pat, rtx insn)
1106 rtx reg;
1107 enum machine_mode orig_mode;
1108 unsigned int words, i;
1109 int ret;
1111 reg = XEXP (pat, 0);
1112 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1113 return false;
1115 orig_mode = GET_MODE (reg);
1116 words = GET_MODE_SIZE (orig_mode);
1117 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1119 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1120 simplify_gen_subreg_concatn (word_mode, reg,
1121 orig_mode, 0),
1123 df_insn_rescan (insn);
1124 gcc_assert (ret != 0);
1126 for (i = words - 1; i > 0; --i)
1128 rtx x;
1130 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1131 i * UNITS_PER_WORD);
1132 x = gen_rtx_CLOBBER (VOIDmode, x);
1133 emit_insn_after (x, insn);
1136 resolve_reg_notes (insn);
1138 return true;
1141 /* A USE of a decomposed register is no longer meaningful. Return
1142 whether we changed something. */
1144 static bool
1145 resolve_use (rtx pat, rtx insn)
1147 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1149 delete_insn (insn);
1150 return true;
1153 resolve_reg_notes (insn);
1155 return false;
1158 /* A VAR_LOCATION can be simplified. */
1160 static void
1161 resolve_debug (rtx insn)
1163 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX);
1165 df_insn_rescan (insn);
1167 resolve_reg_notes (insn);
1170 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1171 set the decomposable_context bitmap accordingly. SPEED_P is true
1172 if we are optimizing INSN for speed rather than size. Return true
1173 if INSN is decomposable. */
1175 static bool
1176 find_decomposable_shift_zext (rtx insn, bool speed_p)
1178 rtx set;
1179 rtx op;
1180 rtx op_operand;
1182 set = single_set (insn);
1183 if (!set)
1184 return false;
1186 op = SET_SRC (set);
1187 if (GET_CODE (op) != ASHIFT
1188 && GET_CODE (op) != LSHIFTRT
1189 && GET_CODE (op) != ASHIFTRT
1190 && GET_CODE (op) != ZERO_EXTEND)
1191 return false;
1193 op_operand = XEXP (op, 0);
1194 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1195 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1196 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1197 || GET_MODE (op) != twice_word_mode)
1198 return false;
1200 if (GET_CODE (op) == ZERO_EXTEND)
1202 if (GET_MODE (op_operand) != word_mode
1203 || !choices[speed_p].splitting_zext)
1204 return false;
1206 else /* left or right shift */
1208 bool *splitting = (GET_CODE (op) == ASHIFT
1209 ? choices[speed_p].splitting_ashift
1210 : GET_CODE (op) == ASHIFTRT
1211 ? choices[speed_p].splitting_ashiftrt
1212 : choices[speed_p].splitting_lshiftrt);
1213 if (!CONST_INT_P (XEXP (op, 1))
1214 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1215 2 * BITS_PER_WORD - 1)
1216 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1217 return false;
1219 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1222 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1224 return true;
1227 /* Decompose a more than word wide shift (in INSN) of a multiword
1228 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1229 and 'set to zero' insn. Return a pointer to the new insn when a
1230 replacement was done. */
1232 static rtx
1233 resolve_shift_zext (rtx insn)
1235 rtx set;
1236 rtx op;
1237 rtx op_operand;
1238 rtx insns;
1239 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1240 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1242 set = single_set (insn);
1243 if (!set)
1244 return NULL_RTX;
1246 op = SET_SRC (set);
1247 if (GET_CODE (op) != ASHIFT
1248 && GET_CODE (op) != LSHIFTRT
1249 && GET_CODE (op) != ASHIFTRT
1250 && GET_CODE (op) != ZERO_EXTEND)
1251 return NULL_RTX;
1253 op_operand = XEXP (op, 0);
1255 /* We can tear this operation apart only if the regs were already
1256 torn apart. */
1257 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1258 return NULL_RTX;
1260 /* src_reg_num is the number of the word mode register which we
1261 are operating on. For a left shift and a zero_extend on little
1262 endian machines this is register 0. */
1263 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1264 ? 1 : 0;
1266 if (WORDS_BIG_ENDIAN
1267 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1268 src_reg_num = 1 - src_reg_num;
1270 if (GET_CODE (op) == ZERO_EXTEND)
1271 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1272 else
1273 dest_reg_num = 1 - src_reg_num;
1275 offset1 = UNITS_PER_WORD * dest_reg_num;
1276 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1277 src_offset = UNITS_PER_WORD * src_reg_num;
1279 start_sequence ();
1281 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1282 GET_MODE (SET_DEST (set)),
1283 offset1);
1284 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1285 GET_MODE (SET_DEST (set)),
1286 offset2);
1287 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1288 GET_MODE (op_operand),
1289 src_offset);
1290 if (GET_CODE (op) == ASHIFTRT
1291 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1292 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1293 BITS_PER_WORD - 1, NULL_RTX, 0);
1295 if (GET_CODE (op) != ZERO_EXTEND)
1297 int shift_count = INTVAL (XEXP (op, 1));
1298 if (shift_count > BITS_PER_WORD)
1299 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1300 LSHIFT_EXPR : RSHIFT_EXPR,
1301 word_mode, src_reg,
1302 shift_count - BITS_PER_WORD,
1303 dest_reg, GET_CODE (op) != ASHIFTRT);
1306 if (dest_reg != src_reg)
1307 emit_move_insn (dest_reg, src_reg);
1308 if (GET_CODE (op) != ASHIFTRT)
1309 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1310 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1311 emit_move_insn (dest_upper, copy_rtx (src_reg));
1312 else
1313 emit_move_insn (dest_upper, upper_src);
1314 insns = get_insns ();
1316 end_sequence ();
1318 emit_insn_before (insns, insn);
1320 if (dump_file)
1322 rtx in;
1323 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1324 for (in = insns; in != insn; in = NEXT_INSN (in))
1325 fprintf (dump_file, "%d ", INSN_UID (in));
1326 fprintf (dump_file, "\n");
1329 delete_insn (insn);
1330 return insns;
1333 /* Print to dump_file a description of what we're doing with shift code CODE.
1334 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1336 static void
1337 dump_shift_choices (enum rtx_code code, bool *splitting)
1339 int i;
1340 const char *sep;
1342 fprintf (dump_file,
1343 " Splitting mode %s for %s lowering with shift amounts = ",
1344 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1345 sep = "";
1346 for (i = 0; i < BITS_PER_WORD; i++)
1347 if (splitting[i])
1349 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1350 sep = ",";
1352 fprintf (dump_file, "\n");
1355 /* Print to dump_file a description of what we're doing when optimizing
1356 for speed or size; SPEED_P says which. DESCRIPTION is a description
1357 of the SPEED_P choice. */
1359 static void
1360 dump_choices (bool speed_p, const char *description)
1362 unsigned int i;
1364 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1366 for (i = 0; i < MAX_MACHINE_MODE; i++)
1367 if (GET_MODE_SIZE (i) > UNITS_PER_WORD)
1368 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1369 choices[speed_p].move_modes_to_split[i]
1370 ? "Splitting"
1371 : "Skipping",
1372 GET_MODE_NAME ((enum machine_mode) i));
1374 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1375 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1376 GET_MODE_NAME (twice_word_mode));
1378 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1379 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1380 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1381 fprintf (dump_file, "\n");
1384 /* Look for registers which are always accessed via word-sized SUBREGs
1385 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1386 registers into several word-sized pseudo-registers. */
1388 static void
1389 decompose_multiword_subregs (bool decompose_copies)
1391 unsigned int max;
1392 basic_block bb;
1393 bool speed_p;
1395 if (dump_file)
1397 dump_choices (false, "size");
1398 dump_choices (true, "speed");
1401 /* Check if this target even has any modes to consider lowering. */
1402 if (!choices[false].something_to_do && !choices[true].something_to_do)
1404 if (dump_file)
1405 fprintf (dump_file, "Nothing to do!\n");
1406 return;
1409 max = max_reg_num ();
1411 /* First see if there are any multi-word pseudo-registers. If there
1412 aren't, there is nothing we can do. This should speed up this
1413 pass in the normal case, since it should be faster than scanning
1414 all the insns. */
1416 unsigned int i;
1417 bool useful_modes_seen = false;
1419 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1420 if (regno_reg_rtx[i] != NULL)
1422 enum machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1423 if (choices[false].move_modes_to_split[(int) mode]
1424 || choices[true].move_modes_to_split[(int) mode])
1426 useful_modes_seen = true;
1427 break;
1431 if (!useful_modes_seen)
1433 if (dump_file)
1434 fprintf (dump_file, "Nothing to lower in this function.\n");
1435 return;
1439 if (df)
1441 df_set_flags (DF_DEFER_INSN_RESCAN);
1442 run_word_dce ();
1445 /* FIXME: It may be possible to change this code to look for each
1446 multi-word pseudo-register and to find each insn which sets or
1447 uses that register. That should be faster than scanning all the
1448 insns. */
1450 decomposable_context = BITMAP_ALLOC (NULL);
1451 non_decomposable_context = BITMAP_ALLOC (NULL);
1452 subreg_context = BITMAP_ALLOC (NULL);
1454 reg_copy_graph.create (max);
1455 reg_copy_graph.safe_grow_cleared (max);
1456 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1458 speed_p = optimize_function_for_speed_p (cfun);
1459 FOR_EACH_BB (bb)
1461 rtx insn;
1463 FOR_BB_INSNS (bb, insn)
1465 rtx set;
1466 enum classify_move_insn cmi;
1467 int i, n;
1469 if (!INSN_P (insn)
1470 || GET_CODE (PATTERN (insn)) == CLOBBER
1471 || GET_CODE (PATTERN (insn)) == USE)
1472 continue;
1474 recog_memoized (insn);
1476 if (find_decomposable_shift_zext (insn, speed_p))
1477 continue;
1479 extract_insn (insn);
1481 set = simple_move (insn, speed_p);
1483 if (!set)
1484 cmi = NOT_SIMPLE_MOVE;
1485 else
1487 /* We mark pseudo-to-pseudo copies as decomposable during the
1488 second pass only. The first pass is so early that there is
1489 good chance such moves will be optimized away completely by
1490 subsequent optimizations anyway.
1492 However, we call find_pseudo_copy even during the first pass
1493 so as to properly set up the reg_copy_graph. */
1494 if (find_pseudo_copy (set))
1495 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1496 else
1497 cmi = SIMPLE_MOVE;
1500 n = recog_data.n_operands;
1501 for (i = 0; i < n; ++i)
1503 for_each_rtx (&recog_data.operand[i],
1504 find_decomposable_subregs,
1505 &cmi);
1507 /* We handle ASM_OPERANDS as a special case to support
1508 things like x86 rdtsc which returns a DImode value.
1509 We can decompose the output, which will certainly be
1510 operand 0, but not the inputs. */
1512 if (cmi == SIMPLE_MOVE
1513 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1515 gcc_assert (i == 0);
1516 cmi = NOT_SIMPLE_MOVE;
1522 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1523 if (!bitmap_empty_p (decomposable_context))
1525 sbitmap sub_blocks;
1526 unsigned int i;
1527 sbitmap_iterator sbi;
1528 bitmap_iterator iter;
1529 unsigned int regno;
1531 propagate_pseudo_copies ();
1533 sub_blocks = sbitmap_alloc (last_basic_block);
1534 bitmap_clear (sub_blocks);
1536 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1537 decompose_register (regno);
1539 FOR_EACH_BB (bb)
1541 rtx insn;
1543 FOR_BB_INSNS (bb, insn)
1545 rtx pat;
1547 if (!INSN_P (insn))
1548 continue;
1550 pat = PATTERN (insn);
1551 if (GET_CODE (pat) == CLOBBER)
1552 resolve_clobber (pat, insn);
1553 else if (GET_CODE (pat) == USE)
1554 resolve_use (pat, insn);
1555 else if (DEBUG_INSN_P (insn))
1556 resolve_debug (insn);
1557 else
1559 rtx set;
1560 int i;
1562 recog_memoized (insn);
1563 extract_insn (insn);
1565 set = simple_move (insn, speed_p);
1566 if (set)
1568 rtx orig_insn = insn;
1569 bool cfi = control_flow_insn_p (insn);
1571 /* We can end up splitting loads to multi-word pseudos
1572 into separate loads to machine word size pseudos.
1573 When this happens, we first had one load that can
1574 throw, and after resolve_simple_move we'll have a
1575 bunch of loads (at least two). All those loads may
1576 trap if we can have non-call exceptions, so they
1577 all will end the current basic block. We split the
1578 block after the outer loop over all insns, but we
1579 make sure here that we will be able to split the
1580 basic block and still produce the correct control
1581 flow graph for it. */
1582 gcc_assert (!cfi
1583 || (cfun->can_throw_non_call_exceptions
1584 && can_throw_internal (insn)));
1586 insn = resolve_simple_move (set, insn);
1587 if (insn != orig_insn)
1589 recog_memoized (insn);
1590 extract_insn (insn);
1592 if (cfi)
1593 bitmap_set_bit (sub_blocks, bb->index);
1596 else
1598 rtx decomposed_shift;
1600 decomposed_shift = resolve_shift_zext (insn);
1601 if (decomposed_shift != NULL_RTX)
1603 insn = decomposed_shift;
1604 recog_memoized (insn);
1605 extract_insn (insn);
1609 for (i = recog_data.n_operands - 1; i >= 0; --i)
1610 for_each_rtx (recog_data.operand_loc[i],
1611 resolve_subreg_use,
1612 insn);
1614 resolve_reg_notes (insn);
1616 if (num_validated_changes () > 0)
1618 for (i = recog_data.n_dups - 1; i >= 0; --i)
1620 rtx *pl = recog_data.dup_loc[i];
1621 int dup_num = recog_data.dup_num[i];
1622 rtx *px = recog_data.operand_loc[dup_num];
1624 validate_unshare_change (insn, pl, *px, 1);
1627 i = apply_change_group ();
1628 gcc_assert (i);
1634 /* If we had insns to split that caused control flow insns in the middle
1635 of a basic block, split those blocks now. Note that we only handle
1636 the case where splitting a load has caused multiple possibly trapping
1637 loads to appear. */
1638 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1640 rtx insn, end;
1641 edge fallthru;
1643 bb = BASIC_BLOCK (i);
1644 insn = BB_HEAD (bb);
1645 end = BB_END (bb);
1647 while (insn != end)
1649 if (control_flow_insn_p (insn))
1651 /* Split the block after insn. There will be a fallthru
1652 edge, which is OK so we keep it. We have to create the
1653 exception edges ourselves. */
1654 fallthru = split_block (bb, insn);
1655 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1656 bb = fallthru->dest;
1657 insn = BB_HEAD (bb);
1659 else
1660 insn = NEXT_INSN (insn);
1664 sbitmap_free (sub_blocks);
1668 unsigned int i;
1669 bitmap b;
1671 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1672 if (b)
1673 BITMAP_FREE (b);
1676 reg_copy_graph.release ();
1678 BITMAP_FREE (decomposable_context);
1679 BITMAP_FREE (non_decomposable_context);
1680 BITMAP_FREE (subreg_context);
1683 /* Gate function for lower subreg pass. */
1685 static bool
1686 gate_handle_lower_subreg (void)
1688 return flag_split_wide_types != 0;
1691 /* Implement first lower subreg pass. */
1693 static unsigned int
1694 rest_of_handle_lower_subreg (void)
1696 decompose_multiword_subregs (false);
1697 return 0;
1700 /* Implement second lower subreg pass. */
1702 static unsigned int
1703 rest_of_handle_lower_subreg2 (void)
1705 decompose_multiword_subregs (true);
1706 return 0;
1709 struct rtl_opt_pass pass_lower_subreg =
1712 RTL_PASS,
1713 "subreg1", /* name */
1714 OPTGROUP_NONE, /* optinfo_flags */
1715 gate_handle_lower_subreg, /* gate */
1716 rest_of_handle_lower_subreg, /* execute */
1717 NULL, /* sub */
1718 NULL, /* next */
1719 0, /* static_pass_number */
1720 TV_LOWER_SUBREG, /* tv_id */
1721 0, /* properties_required */
1722 0, /* properties_provided */
1723 0, /* properties_destroyed */
1724 0, /* todo_flags_start */
1725 TODO_ggc_collect |
1726 TODO_verify_flow /* todo_flags_finish */
1730 struct rtl_opt_pass pass_lower_subreg2 =
1733 RTL_PASS,
1734 "subreg2", /* name */
1735 OPTGROUP_NONE, /* optinfo_flags */
1736 gate_handle_lower_subreg, /* gate */
1737 rest_of_handle_lower_subreg2, /* execute */
1738 NULL, /* sub */
1739 NULL, /* next */
1740 0, /* static_pass_number */
1741 TV_LOWER_SUBREG, /* tv_id */
1742 0, /* properties_required */
1743 0, /* properties_provided */
1744 0, /* properties_destroyed */
1745 0, /* todo_flags_start */
1746 TODO_df_finish | TODO_verify_rtl_sharing |
1747 TODO_ggc_collect |
1748 TODO_verify_flow /* todo_flags_finish */