gcc/ada/
[official-gcc.git] / gcc / lower-subreg.c
blobbacb65f856d2b2526f334cbac263cf5d9a83a653
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "alias.h"
27 #include "symtab.h"
28 #include "tree.h"
29 #include "rtl.h"
30 #include "tm_p.h"
31 #include "flags.h"
32 #include "insn-config.h"
33 #include "obstack.h"
34 #include "predict.h"
35 #include "hard-reg-set.h"
36 #include "function.h"
37 #include "dominance.h"
38 #include "cfg.h"
39 #include "cfgrtl.h"
40 #include "cfgbuild.h"
41 #include "basic-block.h"
42 #include "recog.h"
43 #include "bitmap.h"
44 #include "dce.h"
45 #include "expmed.h"
46 #include "dojump.h"
47 #include "explow.h"
48 #include "calls.h"
49 #include "emit-rtl.h"
50 #include "varasm.h"
51 #include "stmt.h"
52 #include "expr.h"
53 #include "except.h"
54 #include "regs.h"
55 #include "tree-pass.h"
56 #include "df.h"
57 #include "lower-subreg.h"
58 #include "rtl-iter.h"
61 /* Decompose multi-word pseudo-registers into individual
62 pseudo-registers when possible and profitable. This is possible
63 when all the uses of a multi-word register are via SUBREG, or are
64 copies of the register to another location. Breaking apart the
65 register permits more CSE and permits better register allocation.
66 This is profitable if the machine does not have move instructions
67 to do this.
69 This pass only splits moves with modes that are wider than
70 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
71 integer modes that are twice the width of word_mode. The latter
72 could be generalized if there was a need to do this, but the trend in
73 architectures is to not need this.
75 There are two useful preprocessor defines for use by maintainers:
77 #define LOG_COSTS 1
79 if you wish to see the actual cost estimates that are being used
80 for each mode wider than word mode and the cost estimates for zero
81 extension and the shifts. This can be useful when port maintainers
82 are tuning insn rtx costs.
84 #define FORCE_LOWERING 1
86 if you wish to test the pass with all the transformation forced on.
87 This can be useful for finding bugs in the transformations. */
89 #define LOG_COSTS 0
90 #define FORCE_LOWERING 0
92 /* Bit N in this bitmap is set if regno N is used in a context in
93 which we can decompose it. */
94 static bitmap decomposable_context;
96 /* Bit N in this bitmap is set if regno N is used in a context in
97 which it can not be decomposed. */
98 static bitmap non_decomposable_context;
100 /* Bit N in this bitmap is set if regno N is used in a subreg
101 which changes the mode but not the size. This typically happens
102 when the register accessed as a floating-point value; we want to
103 avoid generating accesses to its subwords in integer modes. */
104 static bitmap subreg_context;
106 /* Bit N in the bitmap in element M of this array is set if there is a
107 copy from reg M to reg N. */
108 static vec<bitmap> reg_copy_graph;
110 struct target_lower_subreg default_target_lower_subreg;
111 #if SWITCHABLE_TARGET
112 struct target_lower_subreg *this_target_lower_subreg
113 = &default_target_lower_subreg;
114 #endif
116 #define twice_word_mode \
117 this_target_lower_subreg->x_twice_word_mode
118 #define choices \
119 this_target_lower_subreg->x_choices
121 /* RTXes used while computing costs. */
122 struct cost_rtxes {
123 /* Source and target registers. */
124 rtx source;
125 rtx target;
127 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
128 rtx zext;
130 /* A shift of SOURCE. */
131 rtx shift;
133 /* A SET of TARGET. */
134 rtx set;
137 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
138 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
140 static int
141 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
142 machine_mode mode, int op1)
144 PUT_CODE (rtxes->shift, code);
145 PUT_MODE (rtxes->shift, mode);
146 PUT_MODE (rtxes->source, mode);
147 XEXP (rtxes->shift, 1) = GEN_INT (op1);
148 return set_src_cost (rtxes->shift, speed_p);
151 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
152 to true if it is profitable to split a double-word CODE shift
153 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
154 for speed or size profitability.
156 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
157 the cost of moving zero into a word-mode register. WORD_MOVE_COST
158 is the cost of moving between word registers. */
160 static void
161 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
162 bool *splitting, enum rtx_code code,
163 int word_move_zero_cost, int word_move_cost)
165 int wide_cost, narrow_cost, upper_cost, i;
167 for (i = 0; i < BITS_PER_WORD; i++)
169 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
170 i + BITS_PER_WORD);
171 if (i == 0)
172 narrow_cost = word_move_cost;
173 else
174 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
176 if (code != ASHIFTRT)
177 upper_cost = word_move_zero_cost;
178 else if (i == BITS_PER_WORD - 1)
179 upper_cost = word_move_cost;
180 else
181 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
182 BITS_PER_WORD - 1);
184 if (LOG_COSTS)
185 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
186 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
187 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
189 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
190 splitting[i] = true;
194 /* Compute what we should do when optimizing for speed or size; SPEED_P
195 selects which. Use RTXES for computing costs. */
197 static void
198 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
200 unsigned int i;
201 int word_move_zero_cost, word_move_cost;
203 PUT_MODE (rtxes->target, word_mode);
204 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
205 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
207 SET_SRC (rtxes->set) = rtxes->source;
208 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
210 if (LOG_COSTS)
211 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
212 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
214 for (i = 0; i < MAX_MACHINE_MODE; i++)
216 machine_mode mode = (machine_mode) i;
217 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
218 if (factor > 1)
220 int mode_move_cost;
222 PUT_MODE (rtxes->target, mode);
223 PUT_MODE (rtxes->source, mode);
224 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
226 if (LOG_COSTS)
227 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
228 GET_MODE_NAME (mode), mode_move_cost,
229 word_move_cost, factor);
231 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
233 choices[speed_p].move_modes_to_split[i] = true;
234 choices[speed_p].something_to_do = true;
239 /* For the moves and shifts, the only case that is checked is one
240 where the mode of the target is an integer mode twice the width
241 of the word_mode.
243 If it is not profitable to split a double word move then do not
244 even consider the shifts or the zero extension. */
245 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
247 int zext_cost;
249 /* The only case here to check to see if moving the upper part with a
250 zero is cheaper than doing the zext itself. */
251 PUT_MODE (rtxes->source, word_mode);
252 zext_cost = set_src_cost (rtxes->zext, speed_p);
254 if (LOG_COSTS)
255 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
256 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
257 zext_cost, word_move_cost, word_move_zero_cost);
259 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
260 choices[speed_p].splitting_zext = true;
262 compute_splitting_shift (speed_p, rtxes,
263 choices[speed_p].splitting_ashift, ASHIFT,
264 word_move_zero_cost, word_move_cost);
265 compute_splitting_shift (speed_p, rtxes,
266 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
267 word_move_zero_cost, word_move_cost);
268 compute_splitting_shift (speed_p, rtxes,
269 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
270 word_move_zero_cost, word_move_cost);
274 /* Do one-per-target initialisation. This involves determining
275 which operations on the machine are profitable. If none are found,
276 then the pass just returns when called. */
278 void
279 init_lower_subreg (void)
281 struct cost_rtxes rtxes;
283 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
285 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode);
287 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
288 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
289 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
290 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
291 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
293 if (LOG_COSTS)
294 fprintf (stderr, "\nSize costs\n==========\n\n");
295 compute_costs (false, &rtxes);
297 if (LOG_COSTS)
298 fprintf (stderr, "\nSpeed costs\n===========\n\n");
299 compute_costs (true, &rtxes);
302 static bool
303 simple_move_operand (rtx x)
305 if (GET_CODE (x) == SUBREG)
306 x = SUBREG_REG (x);
308 if (!OBJECT_P (x))
309 return false;
311 if (GET_CODE (x) == LABEL_REF
312 || GET_CODE (x) == SYMBOL_REF
313 || GET_CODE (x) == HIGH
314 || GET_CODE (x) == CONST)
315 return false;
317 if (MEM_P (x)
318 && (MEM_VOLATILE_P (x)
319 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
320 return false;
322 return true;
325 /* If INSN is a single set between two objects that we want to split,
326 return the single set. SPEED_P says whether we are optimizing
327 INSN for speed or size.
329 INSN should have been passed to recog and extract_insn before this
330 is called. */
332 static rtx
333 simple_move (rtx_insn *insn, bool speed_p)
335 rtx x;
336 rtx set;
337 machine_mode mode;
339 if (recog_data.n_operands != 2)
340 return NULL_RTX;
342 set = single_set (insn);
343 if (!set)
344 return NULL_RTX;
346 x = SET_DEST (set);
347 if (x != recog_data.operand[0] && x != recog_data.operand[1])
348 return NULL_RTX;
349 if (!simple_move_operand (x))
350 return NULL_RTX;
352 x = SET_SRC (set);
353 if (x != recog_data.operand[0] && x != recog_data.operand[1])
354 return NULL_RTX;
355 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
356 things like x86 rdtsc which returns a DImode value. */
357 if (GET_CODE (x) != ASM_OPERANDS
358 && !simple_move_operand (x))
359 return NULL_RTX;
361 /* We try to decompose in integer modes, to avoid generating
362 inefficient code copying between integer and floating point
363 registers. That means that we can't decompose if this is a
364 non-integer mode for which there is no integer mode of the same
365 size. */
366 mode = GET_MODE (SET_DEST (set));
367 if (!SCALAR_INT_MODE_P (mode)
368 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0)
369 == BLKmode))
370 return NULL_RTX;
372 /* Reject PARTIAL_INT modes. They are used for processor specific
373 purposes and it's probably best not to tamper with them. */
374 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
375 return NULL_RTX;
377 if (!choices[speed_p].move_modes_to_split[(int) mode])
378 return NULL_RTX;
380 return set;
383 /* If SET is a copy from one multi-word pseudo-register to another,
384 record that in reg_copy_graph. Return whether it is such a
385 copy. */
387 static bool
388 find_pseudo_copy (rtx set)
390 rtx dest = SET_DEST (set);
391 rtx src = SET_SRC (set);
392 unsigned int rd, rs;
393 bitmap b;
395 if (!REG_P (dest) || !REG_P (src))
396 return false;
398 rd = REGNO (dest);
399 rs = REGNO (src);
400 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
401 return false;
403 b = reg_copy_graph[rs];
404 if (b == NULL)
406 b = BITMAP_ALLOC (NULL);
407 reg_copy_graph[rs] = b;
410 bitmap_set_bit (b, rd);
412 return true;
415 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
416 where they are copied to another register, add the register to
417 which they are copied to DECOMPOSABLE_CONTEXT. Use
418 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
419 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
421 static void
422 propagate_pseudo_copies (void)
424 bitmap queue, propagate;
426 queue = BITMAP_ALLOC (NULL);
427 propagate = BITMAP_ALLOC (NULL);
429 bitmap_copy (queue, decomposable_context);
432 bitmap_iterator iter;
433 unsigned int i;
435 bitmap_clear (propagate);
437 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
439 bitmap b = reg_copy_graph[i];
440 if (b)
441 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
444 bitmap_and_compl (queue, propagate, decomposable_context);
445 bitmap_ior_into (decomposable_context, propagate);
447 while (!bitmap_empty_p (queue));
449 BITMAP_FREE (queue);
450 BITMAP_FREE (propagate);
453 /* A pointer to one of these values is passed to
454 find_decomposable_subregs. */
456 enum classify_move_insn
458 /* Not a simple move from one location to another. */
459 NOT_SIMPLE_MOVE,
460 /* A simple move we want to decompose. */
461 DECOMPOSABLE_SIMPLE_MOVE,
462 /* Any other simple move. */
463 SIMPLE_MOVE
466 /* If we find a SUBREG in *LOC which we could use to decompose a
467 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
468 unadorned register which is not a simple pseudo-register copy,
469 DATA will point at the type of move, and we set a bit in
470 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
472 static void
473 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
475 subrtx_var_iterator::array_type array;
476 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
478 rtx x = *iter;
479 if (GET_CODE (x) == SUBREG)
481 rtx inner = SUBREG_REG (x);
482 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
484 if (!REG_P (inner))
485 continue;
487 regno = REGNO (inner);
488 if (HARD_REGISTER_NUM_P (regno))
490 iter.skip_subrtxes ();
491 continue;
494 outer_size = GET_MODE_SIZE (GET_MODE (x));
495 inner_size = GET_MODE_SIZE (GET_MODE (inner));
496 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
497 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
499 /* We only try to decompose single word subregs of multi-word
500 registers. When we find one, we return -1 to avoid iterating
501 over the inner register.
503 ??? This doesn't allow, e.g., DImode subregs of TImode values
504 on 32-bit targets. We would need to record the way the
505 pseudo-register was used, and only decompose if all the uses
506 were the same number and size of pieces. Hopefully this
507 doesn't happen much. */
509 if (outer_words == 1 && inner_words > 1)
511 bitmap_set_bit (decomposable_context, regno);
512 iter.skip_subrtxes ();
513 continue;
516 /* If this is a cast from one mode to another, where the modes
517 have the same size, and they are not tieable, then mark this
518 register as non-decomposable. If we decompose it we are
519 likely to mess up whatever the backend is trying to do. */
520 if (outer_words > 1
521 && outer_size == inner_size
522 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner)))
524 bitmap_set_bit (non_decomposable_context, regno);
525 bitmap_set_bit (subreg_context, regno);
526 iter.skip_subrtxes ();
527 continue;
530 else if (REG_P (x))
532 unsigned int regno;
534 /* We will see an outer SUBREG before we see the inner REG, so
535 when we see a plain REG here it means a direct reference to
536 the register.
538 If this is not a simple copy from one location to another,
539 then we can not decompose this register. If this is a simple
540 copy we want to decompose, and the mode is right,
541 then we mark the register as decomposable.
542 Otherwise we don't say anything about this register --
543 it could be decomposed, but whether that would be
544 profitable depends upon how it is used elsewhere.
546 We only set bits in the bitmap for multi-word
547 pseudo-registers, since those are the only ones we care about
548 and it keeps the size of the bitmaps down. */
550 regno = REGNO (x);
551 if (!HARD_REGISTER_NUM_P (regno)
552 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
554 switch (*pcmi)
556 case NOT_SIMPLE_MOVE:
557 bitmap_set_bit (non_decomposable_context, regno);
558 break;
559 case DECOMPOSABLE_SIMPLE_MOVE:
560 if (MODES_TIEABLE_P (GET_MODE (x), word_mode))
561 bitmap_set_bit (decomposable_context, regno);
562 break;
563 case SIMPLE_MOVE:
564 break;
565 default:
566 gcc_unreachable ();
570 else if (MEM_P (x))
572 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
574 /* Any registers used in a MEM do not participate in a
575 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
576 here, and return -1 to block the parent's recursion. */
577 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
578 iter.skip_subrtxes ();
583 /* Decompose REGNO into word-sized components. We smash the REG node
584 in place. This ensures that (1) something goes wrong quickly if we
585 fail to make some replacement, and (2) the debug information inside
586 the symbol table is automatically kept up to date. */
588 static void
589 decompose_register (unsigned int regno)
591 rtx reg;
592 unsigned int words, i;
593 rtvec v;
595 reg = regno_reg_rtx[regno];
597 regno_reg_rtx[regno] = NULL_RTX;
599 words = GET_MODE_SIZE (GET_MODE (reg));
600 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
602 v = rtvec_alloc (words);
603 for (i = 0; i < words; ++i)
604 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
606 PUT_CODE (reg, CONCATN);
607 XVEC (reg, 0) = v;
609 if (dump_file)
611 fprintf (dump_file, "; Splitting reg %u ->", regno);
612 for (i = 0; i < words; ++i)
613 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
614 fputc ('\n', dump_file);
618 /* Get a SUBREG of a CONCATN. */
620 static rtx
621 simplify_subreg_concatn (machine_mode outermode, rtx op,
622 unsigned int byte)
624 unsigned int inner_size;
625 machine_mode innermode, partmode;
626 rtx part;
627 unsigned int final_offset;
629 gcc_assert (GET_CODE (op) == CONCATN);
630 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
632 innermode = GET_MODE (op);
633 gcc_assert (byte < GET_MODE_SIZE (innermode));
634 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode));
636 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
637 part = XVECEXP (op, 0, byte / inner_size);
638 partmode = GET_MODE (part);
640 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
641 regular CONST_VECTORs. They have vector or integer modes, depending
642 on the capabilities of the target. Cope with them. */
643 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
644 partmode = GET_MODE_INNER (innermode);
645 else if (partmode == VOIDmode)
647 enum mode_class mclass = GET_MODE_CLASS (innermode);
648 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0);
651 final_offset = byte % inner_size;
652 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
653 return NULL_RTX;
655 return simplify_gen_subreg (outermode, part, partmode, final_offset);
658 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
660 static rtx
661 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
662 machine_mode innermode, unsigned int byte)
664 rtx ret;
666 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
667 If OP is a SUBREG of a CONCATN, then it must be a simple mode
668 change with the same size and offset 0, or it must extract a
669 part. We shouldn't see anything else here. */
670 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
672 rtx op2;
674 if ((GET_MODE_SIZE (GET_MODE (op))
675 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
676 && SUBREG_BYTE (op) == 0)
677 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
678 GET_MODE (SUBREG_REG (op)), byte);
680 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
681 SUBREG_BYTE (op));
682 if (op2 == NULL_RTX)
684 /* We don't handle paradoxical subregs here. */
685 gcc_assert (GET_MODE_SIZE (outermode)
686 <= GET_MODE_SIZE (GET_MODE (op)));
687 gcc_assert (GET_MODE_SIZE (GET_MODE (op))
688 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))));
689 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
690 byte + SUBREG_BYTE (op));
691 gcc_assert (op2 != NULL_RTX);
692 return op2;
695 op = op2;
696 gcc_assert (op != NULL_RTX);
697 gcc_assert (innermode == GET_MODE (op));
700 if (GET_CODE (op) == CONCATN)
701 return simplify_subreg_concatn (outermode, op, byte);
703 ret = simplify_gen_subreg (outermode, op, innermode, byte);
705 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
706 resolve_simple_move will ask for the high part of the paradoxical
707 subreg, which does not have a value. Just return a zero. */
708 if (ret == NULL_RTX
709 && GET_CODE (op) == SUBREG
710 && SUBREG_BYTE (op) == 0
711 && (GET_MODE_SIZE (innermode)
712 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))))
713 return CONST0_RTX (outermode);
715 gcc_assert (ret != NULL_RTX);
716 return ret;
719 /* Return whether we should resolve X into the registers into which it
720 was decomposed. */
722 static bool
723 resolve_reg_p (rtx x)
725 return GET_CODE (x) == CONCATN;
728 /* Return whether X is a SUBREG of a register which we need to
729 resolve. */
731 static bool
732 resolve_subreg_p (rtx x)
734 if (GET_CODE (x) != SUBREG)
735 return false;
736 return resolve_reg_p (SUBREG_REG (x));
739 /* Look for SUBREGs in *LOC which need to be decomposed. */
741 static bool
742 resolve_subreg_use (rtx *loc, rtx insn)
744 subrtx_ptr_iterator::array_type array;
745 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
747 rtx *loc = *iter;
748 rtx x = *loc;
749 if (resolve_subreg_p (x))
751 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
752 SUBREG_BYTE (x));
754 /* It is possible for a note to contain a reference which we can
755 decompose. In this case, return 1 to the caller to indicate
756 that the note must be removed. */
757 if (!x)
759 gcc_assert (!insn);
760 return true;
763 validate_change (insn, loc, x, 1);
764 iter.skip_subrtxes ();
766 else if (resolve_reg_p (x))
767 /* Return 1 to the caller to indicate that we found a direct
768 reference to a register which is being decomposed. This can
769 happen inside notes, multiword shift or zero-extend
770 instructions. */
771 return true;
774 return false;
777 /* Resolve any decomposed registers which appear in register notes on
778 INSN. */
780 static void
781 resolve_reg_notes (rtx_insn *insn)
783 rtx *pnote, note;
785 note = find_reg_equal_equiv_note (insn);
786 if (note)
788 int old_count = num_validated_changes ();
789 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
790 remove_note (insn, note);
791 else
792 if (old_count != num_validated_changes ())
793 df_notes_rescan (insn);
796 pnote = &REG_NOTES (insn);
797 while (*pnote != NULL_RTX)
799 bool del = false;
801 note = *pnote;
802 switch (REG_NOTE_KIND (note))
804 case REG_DEAD:
805 case REG_UNUSED:
806 if (resolve_reg_p (XEXP (note, 0)))
807 del = true;
808 break;
810 default:
811 break;
814 if (del)
815 *pnote = XEXP (note, 1);
816 else
817 pnote = &XEXP (note, 1);
821 /* Return whether X can be decomposed into subwords. */
823 static bool
824 can_decompose_p (rtx x)
826 if (REG_P (x))
828 unsigned int regno = REGNO (x);
830 if (HARD_REGISTER_NUM_P (regno))
832 unsigned int byte, num_bytes;
834 num_bytes = GET_MODE_SIZE (GET_MODE (x));
835 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
836 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
837 return false;
838 return true;
840 else
841 return !bitmap_bit_p (subreg_context, regno);
844 return true;
847 /* Decompose the registers used in a simple move SET within INSN. If
848 we don't change anything, return INSN, otherwise return the start
849 of the sequence of moves. */
851 static rtx_insn *
852 resolve_simple_move (rtx set, rtx_insn *insn)
854 rtx src, dest, real_dest;
855 rtx_insn *insns;
856 machine_mode orig_mode, dest_mode;
857 unsigned int words;
858 bool pushing;
860 src = SET_SRC (set);
861 dest = SET_DEST (set);
862 orig_mode = GET_MODE (dest);
864 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
865 gcc_assert (words > 1);
867 start_sequence ();
869 /* We have to handle copying from a SUBREG of a decomposed reg where
870 the SUBREG is larger than word size. Rather than assume that we
871 can take a word_mode SUBREG of the destination, we copy to a new
872 register and then copy that to the destination. */
874 real_dest = NULL_RTX;
876 if (GET_CODE (src) == SUBREG
877 && resolve_reg_p (SUBREG_REG (src))
878 && (SUBREG_BYTE (src) != 0
879 || (GET_MODE_SIZE (orig_mode)
880 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
882 real_dest = dest;
883 dest = gen_reg_rtx (orig_mode);
884 if (REG_P (real_dest))
885 REG_ATTRS (dest) = REG_ATTRS (real_dest);
888 /* Similarly if we are copying to a SUBREG of a decomposed reg where
889 the SUBREG is larger than word size. */
891 if (GET_CODE (dest) == SUBREG
892 && resolve_reg_p (SUBREG_REG (dest))
893 && (SUBREG_BYTE (dest) != 0
894 || (GET_MODE_SIZE (orig_mode)
895 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
897 rtx reg, smove;
898 rtx_insn *minsn;
900 reg = gen_reg_rtx (orig_mode);
901 minsn = emit_move_insn (reg, src);
902 smove = single_set (minsn);
903 gcc_assert (smove != NULL_RTX);
904 resolve_simple_move (smove, minsn);
905 src = reg;
908 /* If we didn't have any big SUBREGS of decomposed registers, and
909 neither side of the move is a register we are decomposing, then
910 we don't have to do anything here. */
912 if (src == SET_SRC (set)
913 && dest == SET_DEST (set)
914 && !resolve_reg_p (src)
915 && !resolve_subreg_p (src)
916 && !resolve_reg_p (dest)
917 && !resolve_subreg_p (dest))
919 end_sequence ();
920 return insn;
923 /* It's possible for the code to use a subreg of a decomposed
924 register while forming an address. We need to handle that before
925 passing the address to emit_move_insn. We pass NULL_RTX as the
926 insn parameter to resolve_subreg_use because we can not validate
927 the insn yet. */
928 if (MEM_P (src) || MEM_P (dest))
930 int acg;
932 if (MEM_P (src))
933 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
934 if (MEM_P (dest))
935 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
936 acg = apply_change_group ();
937 gcc_assert (acg);
940 /* If SRC is a register which we can't decompose, or has side
941 effects, we need to move via a temporary register. */
943 if (!can_decompose_p (src)
944 || side_effects_p (src)
945 || GET_CODE (src) == ASM_OPERANDS)
947 rtx reg;
949 reg = gen_reg_rtx (orig_mode);
951 #ifdef AUTO_INC_DEC
953 rtx move = emit_move_insn (reg, src);
954 if (MEM_P (src))
956 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
957 if (note)
958 add_reg_note (move, REG_INC, XEXP (note, 0));
961 #else
962 emit_move_insn (reg, src);
963 #endif
964 src = reg;
967 /* If DEST is a register which we can't decompose, or has side
968 effects, we need to first move to a temporary register. We
969 handle the common case of pushing an operand directly. We also
970 go through a temporary register if it holds a floating point
971 value. This gives us better code on systems which can't move
972 data easily between integer and floating point registers. */
974 dest_mode = orig_mode;
975 pushing = push_operand (dest, dest_mode);
976 if (!can_decompose_p (dest)
977 || (side_effects_p (dest) && !pushing)
978 || (!SCALAR_INT_MODE_P (dest_mode)
979 && !resolve_reg_p (dest)
980 && !resolve_subreg_p (dest)))
982 if (real_dest == NULL_RTX)
983 real_dest = dest;
984 if (!SCALAR_INT_MODE_P (dest_mode))
986 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT,
987 MODE_INT, 0);
988 gcc_assert (dest_mode != BLKmode);
990 dest = gen_reg_rtx (dest_mode);
991 if (REG_P (real_dest))
992 REG_ATTRS (dest) = REG_ATTRS (real_dest);
995 if (pushing)
997 unsigned int i, j, jinc;
999 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
1000 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1001 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1003 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1005 j = 0;
1006 jinc = 1;
1008 else
1010 j = words - 1;
1011 jinc = -1;
1014 for (i = 0; i < words; ++i, j += jinc)
1016 rtx temp;
1018 temp = copy_rtx (XEXP (dest, 0));
1019 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1020 j * UNITS_PER_WORD);
1021 emit_move_insn (temp,
1022 simplify_gen_subreg_concatn (word_mode, src,
1023 orig_mode,
1024 j * UNITS_PER_WORD));
1027 else
1029 unsigned int i;
1031 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1032 emit_clobber (dest);
1034 for (i = 0; i < words; ++i)
1035 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1036 dest_mode,
1037 i * UNITS_PER_WORD),
1038 simplify_gen_subreg_concatn (word_mode, src,
1039 orig_mode,
1040 i * UNITS_PER_WORD));
1043 if (real_dest != NULL_RTX)
1045 rtx mdest, smove;
1046 rtx_insn *minsn;
1048 if (dest_mode == orig_mode)
1049 mdest = dest;
1050 else
1051 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1052 minsn = emit_move_insn (real_dest, mdest);
1054 #ifdef AUTO_INC_DEC
1055 if (MEM_P (real_dest)
1056 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1058 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1059 if (note)
1060 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1062 #endif
1064 smove = single_set (minsn);
1065 gcc_assert (smove != NULL_RTX);
1067 resolve_simple_move (smove, minsn);
1070 insns = get_insns ();
1071 end_sequence ();
1073 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1075 emit_insn_before (insns, insn);
1077 /* If we get here via self-recursion, then INSN is not yet in the insns
1078 chain and delete_insn will fail. We only want to remove INSN from the
1079 current sequence. See PR56738. */
1080 if (in_sequence_p ())
1081 remove_insn (insn);
1082 else
1083 delete_insn (insn);
1085 return insns;
1088 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1089 component registers. Return whether we changed something. */
1091 static bool
1092 resolve_clobber (rtx pat, rtx_insn *insn)
1094 rtx reg;
1095 machine_mode orig_mode;
1096 unsigned int words, i;
1097 int ret;
1099 reg = XEXP (pat, 0);
1100 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1101 return false;
1103 orig_mode = GET_MODE (reg);
1104 words = GET_MODE_SIZE (orig_mode);
1105 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1107 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1108 simplify_gen_subreg_concatn (word_mode, reg,
1109 orig_mode, 0),
1111 df_insn_rescan (insn);
1112 gcc_assert (ret != 0);
1114 for (i = words - 1; i > 0; --i)
1116 rtx x;
1118 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1119 i * UNITS_PER_WORD);
1120 x = gen_rtx_CLOBBER (VOIDmode, x);
1121 emit_insn_after (x, insn);
1124 resolve_reg_notes (insn);
1126 return true;
1129 /* A USE of a decomposed register is no longer meaningful. Return
1130 whether we changed something. */
1132 static bool
1133 resolve_use (rtx pat, rtx_insn *insn)
1135 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1137 delete_insn (insn);
1138 return true;
1141 resolve_reg_notes (insn);
1143 return false;
1146 /* A VAR_LOCATION can be simplified. */
1148 static void
1149 resolve_debug (rtx_insn *insn)
1151 subrtx_ptr_iterator::array_type array;
1152 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1154 rtx *loc = *iter;
1155 rtx x = *loc;
1156 if (resolve_subreg_p (x))
1158 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1159 SUBREG_BYTE (x));
1161 if (x)
1162 *loc = x;
1163 else
1164 x = copy_rtx (*loc);
1166 if (resolve_reg_p (x))
1167 *loc = copy_rtx (x);
1170 df_insn_rescan (insn);
1172 resolve_reg_notes (insn);
1175 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1176 set the decomposable_context bitmap accordingly. SPEED_P is true
1177 if we are optimizing INSN for speed rather than size. Return true
1178 if INSN is decomposable. */
1180 static bool
1181 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1183 rtx set;
1184 rtx op;
1185 rtx op_operand;
1187 set = single_set (insn);
1188 if (!set)
1189 return false;
1191 op = SET_SRC (set);
1192 if (GET_CODE (op) != ASHIFT
1193 && GET_CODE (op) != LSHIFTRT
1194 && GET_CODE (op) != ASHIFTRT
1195 && GET_CODE (op) != ZERO_EXTEND)
1196 return false;
1198 op_operand = XEXP (op, 0);
1199 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1200 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1201 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1202 || GET_MODE (op) != twice_word_mode)
1203 return false;
1205 if (GET_CODE (op) == ZERO_EXTEND)
1207 if (GET_MODE (op_operand) != word_mode
1208 || !choices[speed_p].splitting_zext)
1209 return false;
1211 else /* left or right shift */
1213 bool *splitting = (GET_CODE (op) == ASHIFT
1214 ? choices[speed_p].splitting_ashift
1215 : GET_CODE (op) == ASHIFTRT
1216 ? choices[speed_p].splitting_ashiftrt
1217 : choices[speed_p].splitting_lshiftrt);
1218 if (!CONST_INT_P (XEXP (op, 1))
1219 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1220 2 * BITS_PER_WORD - 1)
1221 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1222 return false;
1224 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1227 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1229 return true;
1232 /* Decompose a more than word wide shift (in INSN) of a multiword
1233 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1234 and 'set to zero' insn. Return a pointer to the new insn when a
1235 replacement was done. */
1237 static rtx_insn *
1238 resolve_shift_zext (rtx_insn *insn)
1240 rtx set;
1241 rtx op;
1242 rtx op_operand;
1243 rtx_insn *insns;
1244 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1245 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1247 set = single_set (insn);
1248 if (!set)
1249 return NULL;
1251 op = SET_SRC (set);
1252 if (GET_CODE (op) != ASHIFT
1253 && GET_CODE (op) != LSHIFTRT
1254 && GET_CODE (op) != ASHIFTRT
1255 && GET_CODE (op) != ZERO_EXTEND)
1256 return NULL;
1258 op_operand = XEXP (op, 0);
1260 /* We can tear this operation apart only if the regs were already
1261 torn apart. */
1262 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1263 return NULL;
1265 /* src_reg_num is the number of the word mode register which we
1266 are operating on. For a left shift and a zero_extend on little
1267 endian machines this is register 0. */
1268 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1269 ? 1 : 0;
1271 if (WORDS_BIG_ENDIAN
1272 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD)
1273 src_reg_num = 1 - src_reg_num;
1275 if (GET_CODE (op) == ZERO_EXTEND)
1276 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1277 else
1278 dest_reg_num = 1 - src_reg_num;
1280 offset1 = UNITS_PER_WORD * dest_reg_num;
1281 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1282 src_offset = UNITS_PER_WORD * src_reg_num;
1284 start_sequence ();
1286 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1287 GET_MODE (SET_DEST (set)),
1288 offset1);
1289 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1290 GET_MODE (SET_DEST (set)),
1291 offset2);
1292 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1293 GET_MODE (op_operand),
1294 src_offset);
1295 if (GET_CODE (op) == ASHIFTRT
1296 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1297 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1298 BITS_PER_WORD - 1, NULL_RTX, 0);
1300 if (GET_CODE (op) != ZERO_EXTEND)
1302 int shift_count = INTVAL (XEXP (op, 1));
1303 if (shift_count > BITS_PER_WORD)
1304 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1305 LSHIFT_EXPR : RSHIFT_EXPR,
1306 word_mode, src_reg,
1307 shift_count - BITS_PER_WORD,
1308 dest_reg, GET_CODE (op) != ASHIFTRT);
1311 if (dest_reg != src_reg)
1312 emit_move_insn (dest_reg, src_reg);
1313 if (GET_CODE (op) != ASHIFTRT)
1314 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1315 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1316 emit_move_insn (dest_upper, copy_rtx (src_reg));
1317 else
1318 emit_move_insn (dest_upper, upper_src);
1319 insns = get_insns ();
1321 end_sequence ();
1323 emit_insn_before (insns, insn);
1325 if (dump_file)
1327 rtx_insn *in;
1328 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1329 for (in = insns; in != insn; in = NEXT_INSN (in))
1330 fprintf (dump_file, "%d ", INSN_UID (in));
1331 fprintf (dump_file, "\n");
1334 delete_insn (insn);
1335 return insns;
1338 /* Print to dump_file a description of what we're doing with shift code CODE.
1339 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1341 static void
1342 dump_shift_choices (enum rtx_code code, bool *splitting)
1344 int i;
1345 const char *sep;
1347 fprintf (dump_file,
1348 " Splitting mode %s for %s lowering with shift amounts = ",
1349 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1350 sep = "";
1351 for (i = 0; i < BITS_PER_WORD; i++)
1352 if (splitting[i])
1354 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1355 sep = ",";
1357 fprintf (dump_file, "\n");
1360 /* Print to dump_file a description of what we're doing when optimizing
1361 for speed or size; SPEED_P says which. DESCRIPTION is a description
1362 of the SPEED_P choice. */
1364 static void
1365 dump_choices (bool speed_p, const char *description)
1367 unsigned int i;
1369 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1371 for (i = 0; i < MAX_MACHINE_MODE; i++)
1372 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1373 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1374 choices[speed_p].move_modes_to_split[i]
1375 ? "Splitting"
1376 : "Skipping",
1377 GET_MODE_NAME ((machine_mode) i));
1379 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1380 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1381 GET_MODE_NAME (twice_word_mode));
1383 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1384 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1385 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1386 fprintf (dump_file, "\n");
1389 /* Look for registers which are always accessed via word-sized SUBREGs
1390 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1391 registers into several word-sized pseudo-registers. */
1393 static void
1394 decompose_multiword_subregs (bool decompose_copies)
1396 unsigned int max;
1397 basic_block bb;
1398 bool speed_p;
1400 if (dump_file)
1402 dump_choices (false, "size");
1403 dump_choices (true, "speed");
1406 /* Check if this target even has any modes to consider lowering. */
1407 if (!choices[false].something_to_do && !choices[true].something_to_do)
1409 if (dump_file)
1410 fprintf (dump_file, "Nothing to do!\n");
1411 return;
1414 max = max_reg_num ();
1416 /* First see if there are any multi-word pseudo-registers. If there
1417 aren't, there is nothing we can do. This should speed up this
1418 pass in the normal case, since it should be faster than scanning
1419 all the insns. */
1421 unsigned int i;
1422 bool useful_modes_seen = false;
1424 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1425 if (regno_reg_rtx[i] != NULL)
1427 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1428 if (choices[false].move_modes_to_split[(int) mode]
1429 || choices[true].move_modes_to_split[(int) mode])
1431 useful_modes_seen = true;
1432 break;
1436 if (!useful_modes_seen)
1438 if (dump_file)
1439 fprintf (dump_file, "Nothing to lower in this function.\n");
1440 return;
1444 if (df)
1446 df_set_flags (DF_DEFER_INSN_RESCAN);
1447 run_word_dce ();
1450 /* FIXME: It may be possible to change this code to look for each
1451 multi-word pseudo-register and to find each insn which sets or
1452 uses that register. That should be faster than scanning all the
1453 insns. */
1455 decomposable_context = BITMAP_ALLOC (NULL);
1456 non_decomposable_context = BITMAP_ALLOC (NULL);
1457 subreg_context = BITMAP_ALLOC (NULL);
1459 reg_copy_graph.create (max);
1460 reg_copy_graph.safe_grow_cleared (max);
1461 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1463 speed_p = optimize_function_for_speed_p (cfun);
1464 FOR_EACH_BB_FN (bb, cfun)
1466 rtx_insn *insn;
1468 FOR_BB_INSNS (bb, insn)
1470 rtx set;
1471 enum classify_move_insn cmi;
1472 int i, n;
1474 if (!INSN_P (insn)
1475 || GET_CODE (PATTERN (insn)) == CLOBBER
1476 || GET_CODE (PATTERN (insn)) == USE)
1477 continue;
1479 recog_memoized (insn);
1481 if (find_decomposable_shift_zext (insn, speed_p))
1482 continue;
1484 extract_insn (insn);
1486 set = simple_move (insn, speed_p);
1488 if (!set)
1489 cmi = NOT_SIMPLE_MOVE;
1490 else
1492 /* We mark pseudo-to-pseudo copies as decomposable during the
1493 second pass only. The first pass is so early that there is
1494 good chance such moves will be optimized away completely by
1495 subsequent optimizations anyway.
1497 However, we call find_pseudo_copy even during the first pass
1498 so as to properly set up the reg_copy_graph. */
1499 if (find_pseudo_copy (set))
1500 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1501 else
1502 cmi = SIMPLE_MOVE;
1505 n = recog_data.n_operands;
1506 for (i = 0; i < n; ++i)
1508 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1510 /* We handle ASM_OPERANDS as a special case to support
1511 things like x86 rdtsc which returns a DImode value.
1512 We can decompose the output, which will certainly be
1513 operand 0, but not the inputs. */
1515 if (cmi == SIMPLE_MOVE
1516 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1518 gcc_assert (i == 0);
1519 cmi = NOT_SIMPLE_MOVE;
1525 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1526 if (!bitmap_empty_p (decomposable_context))
1528 sbitmap sub_blocks;
1529 unsigned int i;
1530 sbitmap_iterator sbi;
1531 bitmap_iterator iter;
1532 unsigned int regno;
1534 propagate_pseudo_copies ();
1536 sub_blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
1537 bitmap_clear (sub_blocks);
1539 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1540 decompose_register (regno);
1542 FOR_EACH_BB_FN (bb, cfun)
1544 rtx_insn *insn;
1546 FOR_BB_INSNS (bb, insn)
1548 rtx pat;
1550 if (!INSN_P (insn))
1551 continue;
1553 pat = PATTERN (insn);
1554 if (GET_CODE (pat) == CLOBBER)
1555 resolve_clobber (pat, insn);
1556 else if (GET_CODE (pat) == USE)
1557 resolve_use (pat, insn);
1558 else if (DEBUG_INSN_P (insn))
1559 resolve_debug (insn);
1560 else
1562 rtx set;
1563 int i;
1565 recog_memoized (insn);
1566 extract_insn (insn);
1568 set = simple_move (insn, speed_p);
1569 if (set)
1571 rtx_insn *orig_insn = insn;
1572 bool cfi = control_flow_insn_p (insn);
1574 /* We can end up splitting loads to multi-word pseudos
1575 into separate loads to machine word size pseudos.
1576 When this happens, we first had one load that can
1577 throw, and after resolve_simple_move we'll have a
1578 bunch of loads (at least two). All those loads may
1579 trap if we can have non-call exceptions, so they
1580 all will end the current basic block. We split the
1581 block after the outer loop over all insns, but we
1582 make sure here that we will be able to split the
1583 basic block and still produce the correct control
1584 flow graph for it. */
1585 gcc_assert (!cfi
1586 || (cfun->can_throw_non_call_exceptions
1587 && can_throw_internal (insn)));
1589 insn = resolve_simple_move (set, insn);
1590 if (insn != orig_insn)
1592 recog_memoized (insn);
1593 extract_insn (insn);
1595 if (cfi)
1596 bitmap_set_bit (sub_blocks, bb->index);
1599 else
1601 rtx_insn *decomposed_shift;
1603 decomposed_shift = resolve_shift_zext (insn);
1604 if (decomposed_shift != NULL_RTX)
1606 insn = decomposed_shift;
1607 recog_memoized (insn);
1608 extract_insn (insn);
1612 for (i = recog_data.n_operands - 1; i >= 0; --i)
1613 resolve_subreg_use (recog_data.operand_loc[i], insn);
1615 resolve_reg_notes (insn);
1617 if (num_validated_changes () > 0)
1619 for (i = recog_data.n_dups - 1; i >= 0; --i)
1621 rtx *pl = recog_data.dup_loc[i];
1622 int dup_num = recog_data.dup_num[i];
1623 rtx *px = recog_data.operand_loc[dup_num];
1625 validate_unshare_change (insn, pl, *px, 1);
1628 i = apply_change_group ();
1629 gcc_assert (i);
1635 /* If we had insns to split that caused control flow insns in the middle
1636 of a basic block, split those blocks now. Note that we only handle
1637 the case where splitting a load has caused multiple possibly trapping
1638 loads to appear. */
1639 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1641 rtx_insn *insn, *end;
1642 edge fallthru;
1644 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1645 insn = BB_HEAD (bb);
1646 end = BB_END (bb);
1648 while (insn != end)
1650 if (control_flow_insn_p (insn))
1652 /* Split the block after insn. There will be a fallthru
1653 edge, which is OK so we keep it. We have to create the
1654 exception edges ourselves. */
1655 fallthru = split_block (bb, insn);
1656 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1657 bb = fallthru->dest;
1658 insn = BB_HEAD (bb);
1660 else
1661 insn = NEXT_INSN (insn);
1665 sbitmap_free (sub_blocks);
1669 unsigned int i;
1670 bitmap b;
1672 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1673 if (b)
1674 BITMAP_FREE (b);
1677 reg_copy_graph.release ();
1679 BITMAP_FREE (decomposable_context);
1680 BITMAP_FREE (non_decomposable_context);
1681 BITMAP_FREE (subreg_context);
1684 /* Implement first lower subreg pass. */
1686 namespace {
1688 const pass_data pass_data_lower_subreg =
1690 RTL_PASS, /* type */
1691 "subreg1", /* name */
1692 OPTGROUP_NONE, /* optinfo_flags */
1693 TV_LOWER_SUBREG, /* tv_id */
1694 0, /* properties_required */
1695 0, /* properties_provided */
1696 0, /* properties_destroyed */
1697 0, /* todo_flags_start */
1698 0, /* todo_flags_finish */
1701 class pass_lower_subreg : public rtl_opt_pass
1703 public:
1704 pass_lower_subreg (gcc::context *ctxt)
1705 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1708 /* opt_pass methods: */
1709 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1710 virtual unsigned int execute (function *)
1712 decompose_multiword_subregs (false);
1713 return 0;
1716 }; // class pass_lower_subreg
1718 } // anon namespace
1720 rtl_opt_pass *
1721 make_pass_lower_subreg (gcc::context *ctxt)
1723 return new pass_lower_subreg (ctxt);
1726 /* Implement second lower subreg pass. */
1728 namespace {
1730 const pass_data pass_data_lower_subreg2 =
1732 RTL_PASS, /* type */
1733 "subreg2", /* name */
1734 OPTGROUP_NONE, /* optinfo_flags */
1735 TV_LOWER_SUBREG, /* tv_id */
1736 0, /* properties_required */
1737 0, /* properties_provided */
1738 0, /* properties_destroyed */
1739 0, /* todo_flags_start */
1740 TODO_df_finish, /* todo_flags_finish */
1743 class pass_lower_subreg2 : public rtl_opt_pass
1745 public:
1746 pass_lower_subreg2 (gcc::context *ctxt)
1747 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1750 /* opt_pass methods: */
1751 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1752 virtual unsigned int execute (function *)
1754 decompose_multiword_subregs (true);
1755 return 0;
1758 }; // class pass_lower_subreg2
1760 } // anon namespace
1762 rtl_opt_pass *
1763 make_pass_lower_subreg2 (gcc::context *ctxt)
1765 return new pass_lower_subreg2 (ctxt);