2017-10-16 Tamar Christina <tamar.christina@arm.com>
[official-gcc.git] / gcc / lower-subreg.c
blobe34746b3fe4a66f472692299db53b6d452dea95c
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "tree-pass.h"
41 #include "lower-subreg.h"
42 #include "rtl-iter.h"
43 #include "target.h"
46 /* Decompose multi-word pseudo-registers into individual
47 pseudo-registers when possible and profitable. This is possible
48 when all the uses of a multi-word register are via SUBREG, or are
49 copies of the register to another location. Breaking apart the
50 register permits more CSE and permits better register allocation.
51 This is profitable if the machine does not have move instructions
52 to do this.
54 This pass only splits moves with modes that are wider than
55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
56 integer modes that are twice the width of word_mode. The latter
57 could be generalized if there was a need to do this, but the trend in
58 architectures is to not need this.
60 There are two useful preprocessor defines for use by maintainers:
62 #define LOG_COSTS 1
64 if you wish to see the actual cost estimates that are being used
65 for each mode wider than word mode and the cost estimates for zero
66 extension and the shifts. This can be useful when port maintainers
67 are tuning insn rtx costs.
69 #define FORCE_LOWERING 1
71 if you wish to test the pass with all the transformation forced on.
72 This can be useful for finding bugs in the transformations. */
74 #define LOG_COSTS 0
75 #define FORCE_LOWERING 0
77 /* Bit N in this bitmap is set if regno N is used in a context in
78 which we can decompose it. */
79 static bitmap decomposable_context;
81 /* Bit N in this bitmap is set if regno N is used in a context in
82 which it can not be decomposed. */
83 static bitmap non_decomposable_context;
85 /* Bit N in this bitmap is set if regno N is used in a subreg
86 which changes the mode but not the size. This typically happens
87 when the register accessed as a floating-point value; we want to
88 avoid generating accesses to its subwords in integer modes. */
89 static bitmap subreg_context;
91 /* Bit N in the bitmap in element M of this array is set if there is a
92 copy from reg M to reg N. */
93 static vec<bitmap> reg_copy_graph;
95 struct target_lower_subreg default_target_lower_subreg;
96 #if SWITCHABLE_TARGET
97 struct target_lower_subreg *this_target_lower_subreg
98 = &default_target_lower_subreg;
99 #endif
101 #define twice_word_mode \
102 this_target_lower_subreg->x_twice_word_mode
103 #define choices \
104 this_target_lower_subreg->x_choices
106 /* RTXes used while computing costs. */
107 struct cost_rtxes {
108 /* Source and target registers. */
109 rtx source;
110 rtx target;
112 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
113 rtx zext;
115 /* A shift of SOURCE. */
116 rtx shift;
118 /* A SET of TARGET. */
119 rtx set;
122 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
123 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
125 static int
126 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
127 machine_mode mode, int op1)
129 PUT_CODE (rtxes->shift, code);
130 PUT_MODE (rtxes->shift, mode);
131 PUT_MODE (rtxes->source, mode);
132 XEXP (rtxes->shift, 1) = GEN_INT (op1);
133 return set_src_cost (rtxes->shift, mode, speed_p);
136 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
137 to true if it is profitable to split a double-word CODE shift
138 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
139 for speed or size profitability.
141 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
142 the cost of moving zero into a word-mode register. WORD_MOVE_COST
143 is the cost of moving between word registers. */
145 static void
146 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
147 bool *splitting, enum rtx_code code,
148 int word_move_zero_cost, int word_move_cost)
150 int wide_cost, narrow_cost, upper_cost, i;
152 for (i = 0; i < BITS_PER_WORD; i++)
154 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
155 i + BITS_PER_WORD);
156 if (i == 0)
157 narrow_cost = word_move_cost;
158 else
159 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
161 if (code != ASHIFTRT)
162 upper_cost = word_move_zero_cost;
163 else if (i == BITS_PER_WORD - 1)
164 upper_cost = word_move_cost;
165 else
166 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
167 BITS_PER_WORD - 1);
169 if (LOG_COSTS)
170 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
171 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
172 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
174 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
175 splitting[i] = true;
179 /* Compute what we should do when optimizing for speed or size; SPEED_P
180 selects which. Use RTXES for computing costs. */
182 static void
183 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
185 unsigned int i;
186 int word_move_zero_cost, word_move_cost;
188 PUT_MODE (rtxes->target, word_mode);
189 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
190 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
192 SET_SRC (rtxes->set) = rtxes->source;
193 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
195 if (LOG_COSTS)
196 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
197 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
199 for (i = 0; i < MAX_MACHINE_MODE; i++)
201 machine_mode mode = (machine_mode) i;
202 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
203 if (factor > 1)
205 int mode_move_cost;
207 PUT_MODE (rtxes->target, mode);
208 PUT_MODE (rtxes->source, mode);
209 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
211 if (LOG_COSTS)
212 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
213 GET_MODE_NAME (mode), mode_move_cost,
214 word_move_cost, factor);
216 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
218 choices[speed_p].move_modes_to_split[i] = true;
219 choices[speed_p].something_to_do = true;
224 /* For the moves and shifts, the only case that is checked is one
225 where the mode of the target is an integer mode twice the width
226 of the word_mode.
228 If it is not profitable to split a double word move then do not
229 even consider the shifts or the zero extension. */
230 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
232 int zext_cost;
234 /* The only case here to check to see if moving the upper part with a
235 zero is cheaper than doing the zext itself. */
236 PUT_MODE (rtxes->source, word_mode);
237 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
239 if (LOG_COSTS)
240 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
241 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
242 zext_cost, word_move_cost, word_move_zero_cost);
244 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
245 choices[speed_p].splitting_zext = true;
247 compute_splitting_shift (speed_p, rtxes,
248 choices[speed_p].splitting_ashift, ASHIFT,
249 word_move_zero_cost, word_move_cost);
250 compute_splitting_shift (speed_p, rtxes,
251 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
252 word_move_zero_cost, word_move_cost);
253 compute_splitting_shift (speed_p, rtxes,
254 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
255 word_move_zero_cost, word_move_cost);
259 /* Do one-per-target initialisation. This involves determining
260 which operations on the machine are profitable. If none are found,
261 then the pass just returns when called. */
263 void
264 init_lower_subreg (void)
266 struct cost_rtxes rtxes;
268 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
270 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
272 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
273 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
274 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
275 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
276 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
278 if (LOG_COSTS)
279 fprintf (stderr, "\nSize costs\n==========\n\n");
280 compute_costs (false, &rtxes);
282 if (LOG_COSTS)
283 fprintf (stderr, "\nSpeed costs\n===========\n\n");
284 compute_costs (true, &rtxes);
287 static bool
288 simple_move_operand (rtx x)
290 if (GET_CODE (x) == SUBREG)
291 x = SUBREG_REG (x);
293 if (!OBJECT_P (x))
294 return false;
296 if (GET_CODE (x) == LABEL_REF
297 || GET_CODE (x) == SYMBOL_REF
298 || GET_CODE (x) == HIGH
299 || GET_CODE (x) == CONST)
300 return false;
302 if (MEM_P (x)
303 && (MEM_VOLATILE_P (x)
304 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
305 return false;
307 return true;
310 /* If INSN is a single set between two objects that we want to split,
311 return the single set. SPEED_P says whether we are optimizing
312 INSN for speed or size.
314 INSN should have been passed to recog and extract_insn before this
315 is called. */
317 static rtx
318 simple_move (rtx_insn *insn, bool speed_p)
320 rtx x;
321 rtx set;
322 machine_mode mode;
324 if (recog_data.n_operands != 2)
325 return NULL_RTX;
327 set = single_set (insn);
328 if (!set)
329 return NULL_RTX;
331 x = SET_DEST (set);
332 if (x != recog_data.operand[0] && x != recog_data.operand[1])
333 return NULL_RTX;
334 if (!simple_move_operand (x))
335 return NULL_RTX;
337 x = SET_SRC (set);
338 if (x != recog_data.operand[0] && x != recog_data.operand[1])
339 return NULL_RTX;
340 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
341 things like x86 rdtsc which returns a DImode value. */
342 if (GET_CODE (x) != ASM_OPERANDS
343 && !simple_move_operand (x))
344 return NULL_RTX;
346 /* We try to decompose in integer modes, to avoid generating
347 inefficient code copying between integer and floating point
348 registers. That means that we can't decompose if this is a
349 non-integer mode for which there is no integer mode of the same
350 size. */
351 mode = GET_MODE (SET_DEST (set));
352 if (!SCALAR_INT_MODE_P (mode)
353 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ())
354 return NULL_RTX;
356 /* Reject PARTIAL_INT modes. They are used for processor specific
357 purposes and it's probably best not to tamper with them. */
358 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
359 return NULL_RTX;
361 if (!choices[speed_p].move_modes_to_split[(int) mode])
362 return NULL_RTX;
364 return set;
367 /* If SET is a copy from one multi-word pseudo-register to another,
368 record that in reg_copy_graph. Return whether it is such a
369 copy. */
371 static bool
372 find_pseudo_copy (rtx set)
374 rtx dest = SET_DEST (set);
375 rtx src = SET_SRC (set);
376 unsigned int rd, rs;
377 bitmap b;
379 if (!REG_P (dest) || !REG_P (src))
380 return false;
382 rd = REGNO (dest);
383 rs = REGNO (src);
384 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
385 return false;
387 b = reg_copy_graph[rs];
388 if (b == NULL)
390 b = BITMAP_ALLOC (NULL);
391 reg_copy_graph[rs] = b;
394 bitmap_set_bit (b, rd);
396 return true;
399 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
400 where they are copied to another register, add the register to
401 which they are copied to DECOMPOSABLE_CONTEXT. Use
402 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
403 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
405 static void
406 propagate_pseudo_copies (void)
408 auto_bitmap queue, propagate;
410 bitmap_copy (queue, decomposable_context);
413 bitmap_iterator iter;
414 unsigned int i;
416 bitmap_clear (propagate);
418 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
420 bitmap b = reg_copy_graph[i];
421 if (b)
422 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
425 bitmap_and_compl (queue, propagate, decomposable_context);
426 bitmap_ior_into (decomposable_context, propagate);
428 while (!bitmap_empty_p (queue));
431 /* A pointer to one of these values is passed to
432 find_decomposable_subregs. */
434 enum classify_move_insn
436 /* Not a simple move from one location to another. */
437 NOT_SIMPLE_MOVE,
438 /* A simple move we want to decompose. */
439 DECOMPOSABLE_SIMPLE_MOVE,
440 /* Any other simple move. */
441 SIMPLE_MOVE
444 /* If we find a SUBREG in *LOC which we could use to decompose a
445 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
446 unadorned register which is not a simple pseudo-register copy,
447 DATA will point at the type of move, and we set a bit in
448 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
450 static void
451 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
453 subrtx_var_iterator::array_type array;
454 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
456 rtx x = *iter;
457 if (GET_CODE (x) == SUBREG)
459 rtx inner = SUBREG_REG (x);
460 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
462 if (!REG_P (inner))
463 continue;
465 regno = REGNO (inner);
466 if (HARD_REGISTER_NUM_P (regno))
468 iter.skip_subrtxes ();
469 continue;
472 outer_size = GET_MODE_SIZE (GET_MODE (x));
473 inner_size = GET_MODE_SIZE (GET_MODE (inner));
474 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
475 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
477 /* We only try to decompose single word subregs of multi-word
478 registers. When we find one, we return -1 to avoid iterating
479 over the inner register.
481 ??? This doesn't allow, e.g., DImode subregs of TImode values
482 on 32-bit targets. We would need to record the way the
483 pseudo-register was used, and only decompose if all the uses
484 were the same number and size of pieces. Hopefully this
485 doesn't happen much. */
487 if (outer_words == 1 && inner_words > 1)
489 bitmap_set_bit (decomposable_context, regno);
490 iter.skip_subrtxes ();
491 continue;
494 /* If this is a cast from one mode to another, where the modes
495 have the same size, and they are not tieable, then mark this
496 register as non-decomposable. If we decompose it we are
497 likely to mess up whatever the backend is trying to do. */
498 if (outer_words > 1
499 && outer_size == inner_size
500 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
502 bitmap_set_bit (non_decomposable_context, regno);
503 bitmap_set_bit (subreg_context, regno);
504 iter.skip_subrtxes ();
505 continue;
508 else if (REG_P (x))
510 unsigned int regno;
512 /* We will see an outer SUBREG before we see the inner REG, so
513 when we see a plain REG here it means a direct reference to
514 the register.
516 If this is not a simple copy from one location to another,
517 then we can not decompose this register. If this is a simple
518 copy we want to decompose, and the mode is right,
519 then we mark the register as decomposable.
520 Otherwise we don't say anything about this register --
521 it could be decomposed, but whether that would be
522 profitable depends upon how it is used elsewhere.
524 We only set bits in the bitmap for multi-word
525 pseudo-registers, since those are the only ones we care about
526 and it keeps the size of the bitmaps down. */
528 regno = REGNO (x);
529 if (!HARD_REGISTER_NUM_P (regno)
530 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
532 switch (*pcmi)
534 case NOT_SIMPLE_MOVE:
535 bitmap_set_bit (non_decomposable_context, regno);
536 break;
537 case DECOMPOSABLE_SIMPLE_MOVE:
538 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
539 bitmap_set_bit (decomposable_context, regno);
540 break;
541 case SIMPLE_MOVE:
542 break;
543 default:
544 gcc_unreachable ();
548 else if (MEM_P (x))
550 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
552 /* Any registers used in a MEM do not participate in a
553 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
554 here, and return -1 to block the parent's recursion. */
555 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
556 iter.skip_subrtxes ();
561 /* Decompose REGNO into word-sized components. We smash the REG node
562 in place. This ensures that (1) something goes wrong quickly if we
563 fail to make some replacement, and (2) the debug information inside
564 the symbol table is automatically kept up to date. */
566 static void
567 decompose_register (unsigned int regno)
569 rtx reg;
570 unsigned int words, i;
571 rtvec v;
573 reg = regno_reg_rtx[regno];
575 regno_reg_rtx[regno] = NULL_RTX;
577 words = GET_MODE_SIZE (GET_MODE (reg));
578 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
580 v = rtvec_alloc (words);
581 for (i = 0; i < words; ++i)
582 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
584 PUT_CODE (reg, CONCATN);
585 XVEC (reg, 0) = v;
587 if (dump_file)
589 fprintf (dump_file, "; Splitting reg %u ->", regno);
590 for (i = 0; i < words; ++i)
591 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
592 fputc ('\n', dump_file);
596 /* Get a SUBREG of a CONCATN. */
598 static rtx
599 simplify_subreg_concatn (machine_mode outermode, rtx op,
600 unsigned int byte)
602 unsigned int inner_size;
603 machine_mode innermode, partmode;
604 rtx part;
605 unsigned int final_offset;
607 gcc_assert (GET_CODE (op) == CONCATN);
608 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0);
610 innermode = GET_MODE (op);
611 gcc_assert (byte < GET_MODE_SIZE (innermode));
612 if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode))
613 return NULL_RTX;
615 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0);
616 part = XVECEXP (op, 0, byte / inner_size);
617 partmode = GET_MODE (part);
619 final_offset = byte % inner_size;
620 if (final_offset + GET_MODE_SIZE (outermode) > inner_size)
621 return NULL_RTX;
623 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
624 regular CONST_VECTORs. They have vector or integer modes, depending
625 on the capabilities of the target. Cope with them. */
626 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
627 partmode = GET_MODE_INNER (innermode);
628 else if (partmode == VOIDmode)
629 partmode = mode_for_size (inner_size * BITS_PER_UNIT,
630 GET_MODE_CLASS (innermode), 0).require ();
632 return simplify_gen_subreg (outermode, part, partmode, final_offset);
635 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
637 static rtx
638 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
639 machine_mode innermode, unsigned int byte)
641 rtx ret;
643 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
644 If OP is a SUBREG of a CONCATN, then it must be a simple mode
645 change with the same size and offset 0, or it must extract a
646 part. We shouldn't see anything else here. */
647 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
649 rtx op2;
651 if ((GET_MODE_SIZE (GET_MODE (op))
652 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
653 && SUBREG_BYTE (op) == 0)
654 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
655 GET_MODE (SUBREG_REG (op)), byte);
657 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
658 SUBREG_BYTE (op));
659 if (op2 == NULL_RTX)
661 /* We don't handle paradoxical subregs here. */
662 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
663 gcc_assert (!paradoxical_subreg_p (op));
664 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
665 byte + SUBREG_BYTE (op));
666 gcc_assert (op2 != NULL_RTX);
667 return op2;
670 op = op2;
671 gcc_assert (op != NULL_RTX);
672 gcc_assert (innermode == GET_MODE (op));
675 if (GET_CODE (op) == CONCATN)
676 return simplify_subreg_concatn (outermode, op, byte);
678 ret = simplify_gen_subreg (outermode, op, innermode, byte);
680 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
681 resolve_simple_move will ask for the high part of the paradoxical
682 subreg, which does not have a value. Just return a zero. */
683 if (ret == NULL_RTX
684 && paradoxical_subreg_p (op))
685 return CONST0_RTX (outermode);
687 gcc_assert (ret != NULL_RTX);
688 return ret;
691 /* Return whether we should resolve X into the registers into which it
692 was decomposed. */
694 static bool
695 resolve_reg_p (rtx x)
697 return GET_CODE (x) == CONCATN;
700 /* Return whether X is a SUBREG of a register which we need to
701 resolve. */
703 static bool
704 resolve_subreg_p (rtx x)
706 if (GET_CODE (x) != SUBREG)
707 return false;
708 return resolve_reg_p (SUBREG_REG (x));
711 /* Look for SUBREGs in *LOC which need to be decomposed. */
713 static bool
714 resolve_subreg_use (rtx *loc, rtx insn)
716 subrtx_ptr_iterator::array_type array;
717 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
719 rtx *loc = *iter;
720 rtx x = *loc;
721 if (resolve_subreg_p (x))
723 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
724 SUBREG_BYTE (x));
726 /* It is possible for a note to contain a reference which we can
727 decompose. In this case, return 1 to the caller to indicate
728 that the note must be removed. */
729 if (!x)
731 gcc_assert (!insn);
732 return true;
735 validate_change (insn, loc, x, 1);
736 iter.skip_subrtxes ();
738 else if (resolve_reg_p (x))
739 /* Return 1 to the caller to indicate that we found a direct
740 reference to a register which is being decomposed. This can
741 happen inside notes, multiword shift or zero-extend
742 instructions. */
743 return true;
746 return false;
749 /* Resolve any decomposed registers which appear in register notes on
750 INSN. */
752 static void
753 resolve_reg_notes (rtx_insn *insn)
755 rtx *pnote, note;
757 note = find_reg_equal_equiv_note (insn);
758 if (note)
760 int old_count = num_validated_changes ();
761 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
762 remove_note (insn, note);
763 else
764 if (old_count != num_validated_changes ())
765 df_notes_rescan (insn);
768 pnote = &REG_NOTES (insn);
769 while (*pnote != NULL_RTX)
771 bool del = false;
773 note = *pnote;
774 switch (REG_NOTE_KIND (note))
776 case REG_DEAD:
777 case REG_UNUSED:
778 if (resolve_reg_p (XEXP (note, 0)))
779 del = true;
780 break;
782 default:
783 break;
786 if (del)
787 *pnote = XEXP (note, 1);
788 else
789 pnote = &XEXP (note, 1);
793 /* Return whether X can be decomposed into subwords. */
795 static bool
796 can_decompose_p (rtx x)
798 if (REG_P (x))
800 unsigned int regno = REGNO (x);
802 if (HARD_REGISTER_NUM_P (regno))
804 unsigned int byte, num_bytes;
806 num_bytes = GET_MODE_SIZE (GET_MODE (x));
807 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
808 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
809 return false;
810 return true;
812 else
813 return !bitmap_bit_p (subreg_context, regno);
816 return true;
819 /* Decompose the registers used in a simple move SET within INSN. If
820 we don't change anything, return INSN, otherwise return the start
821 of the sequence of moves. */
823 static rtx_insn *
824 resolve_simple_move (rtx set, rtx_insn *insn)
826 rtx src, dest, real_dest;
827 rtx_insn *insns;
828 machine_mode orig_mode, dest_mode;
829 unsigned int words;
830 bool pushing;
832 src = SET_SRC (set);
833 dest = SET_DEST (set);
834 orig_mode = GET_MODE (dest);
836 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
837 gcc_assert (words > 1);
839 start_sequence ();
841 /* We have to handle copying from a SUBREG of a decomposed reg where
842 the SUBREG is larger than word size. Rather than assume that we
843 can take a word_mode SUBREG of the destination, we copy to a new
844 register and then copy that to the destination. */
846 real_dest = NULL_RTX;
848 if (GET_CODE (src) == SUBREG
849 && resolve_reg_p (SUBREG_REG (src))
850 && (SUBREG_BYTE (src) != 0
851 || (GET_MODE_SIZE (orig_mode)
852 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
854 real_dest = dest;
855 dest = gen_reg_rtx (orig_mode);
856 if (REG_P (real_dest))
857 REG_ATTRS (dest) = REG_ATTRS (real_dest);
860 /* Similarly if we are copying to a SUBREG of a decomposed reg where
861 the SUBREG is larger than word size. */
863 if (GET_CODE (dest) == SUBREG
864 && resolve_reg_p (SUBREG_REG (dest))
865 && (SUBREG_BYTE (dest) != 0
866 || (GET_MODE_SIZE (orig_mode)
867 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
869 rtx reg, smove;
870 rtx_insn *minsn;
872 reg = gen_reg_rtx (orig_mode);
873 minsn = emit_move_insn (reg, src);
874 smove = single_set (minsn);
875 gcc_assert (smove != NULL_RTX);
876 resolve_simple_move (smove, minsn);
877 src = reg;
880 /* If we didn't have any big SUBREGS of decomposed registers, and
881 neither side of the move is a register we are decomposing, then
882 we don't have to do anything here. */
884 if (src == SET_SRC (set)
885 && dest == SET_DEST (set)
886 && !resolve_reg_p (src)
887 && !resolve_subreg_p (src)
888 && !resolve_reg_p (dest)
889 && !resolve_subreg_p (dest))
891 end_sequence ();
892 return insn;
895 /* It's possible for the code to use a subreg of a decomposed
896 register while forming an address. We need to handle that before
897 passing the address to emit_move_insn. We pass NULL_RTX as the
898 insn parameter to resolve_subreg_use because we can not validate
899 the insn yet. */
900 if (MEM_P (src) || MEM_P (dest))
902 int acg;
904 if (MEM_P (src))
905 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
906 if (MEM_P (dest))
907 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
908 acg = apply_change_group ();
909 gcc_assert (acg);
912 /* If SRC is a register which we can't decompose, or has side
913 effects, we need to move via a temporary register. */
915 if (!can_decompose_p (src)
916 || side_effects_p (src)
917 || GET_CODE (src) == ASM_OPERANDS)
919 rtx reg;
921 reg = gen_reg_rtx (orig_mode);
923 if (AUTO_INC_DEC)
925 rtx_insn *move = emit_move_insn (reg, src);
926 if (MEM_P (src))
928 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
929 if (note)
930 add_reg_note (move, REG_INC, XEXP (note, 0));
933 else
934 emit_move_insn (reg, src);
936 src = reg;
939 /* If DEST is a register which we can't decompose, or has side
940 effects, we need to first move to a temporary register. We
941 handle the common case of pushing an operand directly. We also
942 go through a temporary register if it holds a floating point
943 value. This gives us better code on systems which can't move
944 data easily between integer and floating point registers. */
946 dest_mode = orig_mode;
947 pushing = push_operand (dest, dest_mode);
948 if (!can_decompose_p (dest)
949 || (side_effects_p (dest) && !pushing)
950 || (!SCALAR_INT_MODE_P (dest_mode)
951 && !resolve_reg_p (dest)
952 && !resolve_subreg_p (dest)))
954 if (real_dest == NULL_RTX)
955 real_dest = dest;
956 if (!SCALAR_INT_MODE_P (dest_mode))
957 dest_mode = int_mode_for_mode (dest_mode).require ();
958 dest = gen_reg_rtx (dest_mode);
959 if (REG_P (real_dest))
960 REG_ATTRS (dest) = REG_ATTRS (real_dest);
963 if (pushing)
965 unsigned int i, j, jinc;
967 gcc_assert (GET_MODE_SIZE (orig_mode) % UNITS_PER_WORD == 0);
968 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
969 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
971 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
973 j = 0;
974 jinc = 1;
976 else
978 j = words - 1;
979 jinc = -1;
982 for (i = 0; i < words; ++i, j += jinc)
984 rtx temp;
986 temp = copy_rtx (XEXP (dest, 0));
987 temp = adjust_automodify_address_nv (dest, word_mode, temp,
988 j * UNITS_PER_WORD);
989 emit_move_insn (temp,
990 simplify_gen_subreg_concatn (word_mode, src,
991 orig_mode,
992 j * UNITS_PER_WORD));
995 else
997 unsigned int i;
999 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1000 emit_clobber (dest);
1002 for (i = 0; i < words; ++i)
1003 emit_move_insn (simplify_gen_subreg_concatn (word_mode, dest,
1004 dest_mode,
1005 i * UNITS_PER_WORD),
1006 simplify_gen_subreg_concatn (word_mode, src,
1007 orig_mode,
1008 i * UNITS_PER_WORD));
1011 if (real_dest != NULL_RTX)
1013 rtx mdest, smove;
1014 rtx_insn *minsn;
1016 if (dest_mode == orig_mode)
1017 mdest = dest;
1018 else
1019 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1020 minsn = emit_move_insn (real_dest, mdest);
1022 if (AUTO_INC_DEC && MEM_P (real_dest)
1023 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1025 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1026 if (note)
1027 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1030 smove = single_set (minsn);
1031 gcc_assert (smove != NULL_RTX);
1033 resolve_simple_move (smove, minsn);
1036 insns = get_insns ();
1037 end_sequence ();
1039 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1041 emit_insn_before (insns, insn);
1043 /* If we get here via self-recursion, then INSN is not yet in the insns
1044 chain and delete_insn will fail. We only want to remove INSN from the
1045 current sequence. See PR56738. */
1046 if (in_sequence_p ())
1047 remove_insn (insn);
1048 else
1049 delete_insn (insn);
1051 return insns;
1054 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1055 component registers. Return whether we changed something. */
1057 static bool
1058 resolve_clobber (rtx pat, rtx_insn *insn)
1060 rtx reg;
1061 machine_mode orig_mode;
1062 unsigned int words, i;
1063 int ret;
1065 reg = XEXP (pat, 0);
1066 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1067 return false;
1069 orig_mode = GET_MODE (reg);
1070 words = GET_MODE_SIZE (orig_mode);
1071 words = (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1073 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1074 simplify_gen_subreg_concatn (word_mode, reg,
1075 orig_mode, 0),
1077 df_insn_rescan (insn);
1078 gcc_assert (ret != 0);
1080 for (i = words - 1; i > 0; --i)
1082 rtx x;
1084 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1085 i * UNITS_PER_WORD);
1086 x = gen_rtx_CLOBBER (VOIDmode, x);
1087 emit_insn_after (x, insn);
1090 resolve_reg_notes (insn);
1092 return true;
1095 /* A USE of a decomposed register is no longer meaningful. Return
1096 whether we changed something. */
1098 static bool
1099 resolve_use (rtx pat, rtx_insn *insn)
1101 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1103 delete_insn (insn);
1104 return true;
1107 resolve_reg_notes (insn);
1109 return false;
1112 /* A VAR_LOCATION can be simplified. */
1114 static void
1115 resolve_debug (rtx_insn *insn)
1117 subrtx_ptr_iterator::array_type array;
1118 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1120 rtx *loc = *iter;
1121 rtx x = *loc;
1122 if (resolve_subreg_p (x))
1124 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1125 SUBREG_BYTE (x));
1127 if (x)
1128 *loc = x;
1129 else
1130 x = copy_rtx (*loc);
1132 if (resolve_reg_p (x))
1133 *loc = copy_rtx (x);
1136 df_insn_rescan (insn);
1138 resolve_reg_notes (insn);
1141 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1142 set the decomposable_context bitmap accordingly. SPEED_P is true
1143 if we are optimizing INSN for speed rather than size. Return true
1144 if INSN is decomposable. */
1146 static bool
1147 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1149 rtx set;
1150 rtx op;
1151 rtx op_operand;
1153 set = single_set (insn);
1154 if (!set)
1155 return false;
1157 op = SET_SRC (set);
1158 if (GET_CODE (op) != ASHIFT
1159 && GET_CODE (op) != LSHIFTRT
1160 && GET_CODE (op) != ASHIFTRT
1161 && GET_CODE (op) != ZERO_EXTEND)
1162 return false;
1164 op_operand = XEXP (op, 0);
1165 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1166 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1167 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1168 || GET_MODE (op) != twice_word_mode)
1169 return false;
1171 if (GET_CODE (op) == ZERO_EXTEND)
1173 if (GET_MODE (op_operand) != word_mode
1174 || !choices[speed_p].splitting_zext)
1175 return false;
1177 else /* left or right shift */
1179 bool *splitting = (GET_CODE (op) == ASHIFT
1180 ? choices[speed_p].splitting_ashift
1181 : GET_CODE (op) == ASHIFTRT
1182 ? choices[speed_p].splitting_ashiftrt
1183 : choices[speed_p].splitting_lshiftrt);
1184 if (!CONST_INT_P (XEXP (op, 1))
1185 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1186 2 * BITS_PER_WORD - 1)
1187 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1188 return false;
1190 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1193 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1195 return true;
1198 /* Decompose a more than word wide shift (in INSN) of a multiword
1199 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1200 and 'set to zero' insn. Return a pointer to the new insn when a
1201 replacement was done. */
1203 static rtx_insn *
1204 resolve_shift_zext (rtx_insn *insn)
1206 rtx set;
1207 rtx op;
1208 rtx op_operand;
1209 rtx_insn *insns;
1210 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1211 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1212 scalar_int_mode inner_mode;
1214 set = single_set (insn);
1215 if (!set)
1216 return NULL;
1218 op = SET_SRC (set);
1219 if (GET_CODE (op) != ASHIFT
1220 && GET_CODE (op) != LSHIFTRT
1221 && GET_CODE (op) != ASHIFTRT
1222 && GET_CODE (op) != ZERO_EXTEND)
1223 return NULL;
1225 op_operand = XEXP (op, 0);
1226 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1227 return NULL;
1229 /* We can tear this operation apart only if the regs were already
1230 torn apart. */
1231 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1232 return NULL;
1234 /* src_reg_num is the number of the word mode register which we
1235 are operating on. For a left shift and a zero_extend on little
1236 endian machines this is register 0. */
1237 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1238 ? 1 : 0;
1240 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1241 src_reg_num = 1 - src_reg_num;
1243 if (GET_CODE (op) == ZERO_EXTEND)
1244 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1245 else
1246 dest_reg_num = 1 - src_reg_num;
1248 offset1 = UNITS_PER_WORD * dest_reg_num;
1249 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1250 src_offset = UNITS_PER_WORD * src_reg_num;
1252 start_sequence ();
1254 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1255 GET_MODE (SET_DEST (set)),
1256 offset1);
1257 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1258 GET_MODE (SET_DEST (set)),
1259 offset2);
1260 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1261 GET_MODE (op_operand),
1262 src_offset);
1263 if (GET_CODE (op) == ASHIFTRT
1264 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1265 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1266 BITS_PER_WORD - 1, NULL_RTX, 0);
1268 if (GET_CODE (op) != ZERO_EXTEND)
1270 int shift_count = INTVAL (XEXP (op, 1));
1271 if (shift_count > BITS_PER_WORD)
1272 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1273 LSHIFT_EXPR : RSHIFT_EXPR,
1274 word_mode, src_reg,
1275 shift_count - BITS_PER_WORD,
1276 dest_reg, GET_CODE (op) != ASHIFTRT);
1279 if (dest_reg != src_reg)
1280 emit_move_insn (dest_reg, src_reg);
1281 if (GET_CODE (op) != ASHIFTRT)
1282 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1283 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1284 emit_move_insn (dest_upper, copy_rtx (src_reg));
1285 else
1286 emit_move_insn (dest_upper, upper_src);
1287 insns = get_insns ();
1289 end_sequence ();
1291 emit_insn_before (insns, insn);
1293 if (dump_file)
1295 rtx_insn *in;
1296 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1297 for (in = insns; in != insn; in = NEXT_INSN (in))
1298 fprintf (dump_file, "%d ", INSN_UID (in));
1299 fprintf (dump_file, "\n");
1302 delete_insn (insn);
1303 return insns;
1306 /* Print to dump_file a description of what we're doing with shift code CODE.
1307 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1309 static void
1310 dump_shift_choices (enum rtx_code code, bool *splitting)
1312 int i;
1313 const char *sep;
1315 fprintf (dump_file,
1316 " Splitting mode %s for %s lowering with shift amounts = ",
1317 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1318 sep = "";
1319 for (i = 0; i < BITS_PER_WORD; i++)
1320 if (splitting[i])
1322 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1323 sep = ",";
1325 fprintf (dump_file, "\n");
1328 /* Print to dump_file a description of what we're doing when optimizing
1329 for speed or size; SPEED_P says which. DESCRIPTION is a description
1330 of the SPEED_P choice. */
1332 static void
1333 dump_choices (bool speed_p, const char *description)
1335 unsigned int i;
1337 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1339 for (i = 0; i < MAX_MACHINE_MODE; i++)
1340 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD)
1341 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1342 choices[speed_p].move_modes_to_split[i]
1343 ? "Splitting"
1344 : "Skipping",
1345 GET_MODE_NAME ((machine_mode) i));
1347 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1348 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1349 GET_MODE_NAME (twice_word_mode));
1351 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1352 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1353 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1354 fprintf (dump_file, "\n");
1357 /* Look for registers which are always accessed via word-sized SUBREGs
1358 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1359 registers into several word-sized pseudo-registers. */
1361 static void
1362 decompose_multiword_subregs (bool decompose_copies)
1364 unsigned int max;
1365 basic_block bb;
1366 bool speed_p;
1368 if (dump_file)
1370 dump_choices (false, "size");
1371 dump_choices (true, "speed");
1374 /* Check if this target even has any modes to consider lowering. */
1375 if (!choices[false].something_to_do && !choices[true].something_to_do)
1377 if (dump_file)
1378 fprintf (dump_file, "Nothing to do!\n");
1379 return;
1382 max = max_reg_num ();
1384 /* First see if there are any multi-word pseudo-registers. If there
1385 aren't, there is nothing we can do. This should speed up this
1386 pass in the normal case, since it should be faster than scanning
1387 all the insns. */
1389 unsigned int i;
1390 bool useful_modes_seen = false;
1392 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1393 if (regno_reg_rtx[i] != NULL)
1395 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1396 if (choices[false].move_modes_to_split[(int) mode]
1397 || choices[true].move_modes_to_split[(int) mode])
1399 useful_modes_seen = true;
1400 break;
1404 if (!useful_modes_seen)
1406 if (dump_file)
1407 fprintf (dump_file, "Nothing to lower in this function.\n");
1408 return;
1412 if (df)
1414 df_set_flags (DF_DEFER_INSN_RESCAN);
1415 run_word_dce ();
1418 /* FIXME: It may be possible to change this code to look for each
1419 multi-word pseudo-register and to find each insn which sets or
1420 uses that register. That should be faster than scanning all the
1421 insns. */
1423 decomposable_context = BITMAP_ALLOC (NULL);
1424 non_decomposable_context = BITMAP_ALLOC (NULL);
1425 subreg_context = BITMAP_ALLOC (NULL);
1427 reg_copy_graph.create (max);
1428 reg_copy_graph.safe_grow_cleared (max);
1429 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1431 speed_p = optimize_function_for_speed_p (cfun);
1432 FOR_EACH_BB_FN (bb, cfun)
1434 rtx_insn *insn;
1436 FOR_BB_INSNS (bb, insn)
1438 rtx set;
1439 enum classify_move_insn cmi;
1440 int i, n;
1442 if (!INSN_P (insn)
1443 || GET_CODE (PATTERN (insn)) == CLOBBER
1444 || GET_CODE (PATTERN (insn)) == USE)
1445 continue;
1447 recog_memoized (insn);
1449 if (find_decomposable_shift_zext (insn, speed_p))
1450 continue;
1452 extract_insn (insn);
1454 set = simple_move (insn, speed_p);
1456 if (!set)
1457 cmi = NOT_SIMPLE_MOVE;
1458 else
1460 /* We mark pseudo-to-pseudo copies as decomposable during the
1461 second pass only. The first pass is so early that there is
1462 good chance such moves will be optimized away completely by
1463 subsequent optimizations anyway.
1465 However, we call find_pseudo_copy even during the first pass
1466 so as to properly set up the reg_copy_graph. */
1467 if (find_pseudo_copy (set))
1468 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1469 else
1470 cmi = SIMPLE_MOVE;
1473 n = recog_data.n_operands;
1474 for (i = 0; i < n; ++i)
1476 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1478 /* We handle ASM_OPERANDS as a special case to support
1479 things like x86 rdtsc which returns a DImode value.
1480 We can decompose the output, which will certainly be
1481 operand 0, but not the inputs. */
1483 if (cmi == SIMPLE_MOVE
1484 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1486 gcc_assert (i == 0);
1487 cmi = NOT_SIMPLE_MOVE;
1493 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1494 if (!bitmap_empty_p (decomposable_context))
1496 unsigned int i;
1497 sbitmap_iterator sbi;
1498 bitmap_iterator iter;
1499 unsigned int regno;
1501 propagate_pseudo_copies ();
1503 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1504 bitmap_clear (sub_blocks);
1506 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1507 decompose_register (regno);
1509 FOR_EACH_BB_FN (bb, cfun)
1511 rtx_insn *insn;
1513 FOR_BB_INSNS (bb, insn)
1515 rtx pat;
1517 if (!INSN_P (insn))
1518 continue;
1520 pat = PATTERN (insn);
1521 if (GET_CODE (pat) == CLOBBER)
1522 resolve_clobber (pat, insn);
1523 else if (GET_CODE (pat) == USE)
1524 resolve_use (pat, insn);
1525 else if (DEBUG_INSN_P (insn))
1526 resolve_debug (insn);
1527 else
1529 rtx set;
1530 int i;
1532 recog_memoized (insn);
1533 extract_insn (insn);
1535 set = simple_move (insn, speed_p);
1536 if (set)
1538 rtx_insn *orig_insn = insn;
1539 bool cfi = control_flow_insn_p (insn);
1541 /* We can end up splitting loads to multi-word pseudos
1542 into separate loads to machine word size pseudos.
1543 When this happens, we first had one load that can
1544 throw, and after resolve_simple_move we'll have a
1545 bunch of loads (at least two). All those loads may
1546 trap if we can have non-call exceptions, so they
1547 all will end the current basic block. We split the
1548 block after the outer loop over all insns, but we
1549 make sure here that we will be able to split the
1550 basic block and still produce the correct control
1551 flow graph for it. */
1552 gcc_assert (!cfi
1553 || (cfun->can_throw_non_call_exceptions
1554 && can_throw_internal (insn)));
1556 insn = resolve_simple_move (set, insn);
1557 if (insn != orig_insn)
1559 recog_memoized (insn);
1560 extract_insn (insn);
1562 if (cfi)
1563 bitmap_set_bit (sub_blocks, bb->index);
1566 else
1568 rtx_insn *decomposed_shift;
1570 decomposed_shift = resolve_shift_zext (insn);
1571 if (decomposed_shift != NULL_RTX)
1573 insn = decomposed_shift;
1574 recog_memoized (insn);
1575 extract_insn (insn);
1579 for (i = recog_data.n_operands - 1; i >= 0; --i)
1580 resolve_subreg_use (recog_data.operand_loc[i], insn);
1582 resolve_reg_notes (insn);
1584 if (num_validated_changes () > 0)
1586 for (i = recog_data.n_dups - 1; i >= 0; --i)
1588 rtx *pl = recog_data.dup_loc[i];
1589 int dup_num = recog_data.dup_num[i];
1590 rtx *px = recog_data.operand_loc[dup_num];
1592 validate_unshare_change (insn, pl, *px, 1);
1595 i = apply_change_group ();
1596 gcc_assert (i);
1602 /* If we had insns to split that caused control flow insns in the middle
1603 of a basic block, split those blocks now. Note that we only handle
1604 the case where splitting a load has caused multiple possibly trapping
1605 loads to appear. */
1606 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1608 rtx_insn *insn, *end;
1609 edge fallthru;
1611 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1612 insn = BB_HEAD (bb);
1613 end = BB_END (bb);
1615 while (insn != end)
1617 if (control_flow_insn_p (insn))
1619 /* Split the block after insn. There will be a fallthru
1620 edge, which is OK so we keep it. We have to create the
1621 exception edges ourselves. */
1622 fallthru = split_block (bb, insn);
1623 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1624 bb = fallthru->dest;
1625 insn = BB_HEAD (bb);
1627 else
1628 insn = NEXT_INSN (insn);
1634 unsigned int i;
1635 bitmap b;
1637 FOR_EACH_VEC_ELT (reg_copy_graph, i, b)
1638 if (b)
1639 BITMAP_FREE (b);
1642 reg_copy_graph.release ();
1644 BITMAP_FREE (decomposable_context);
1645 BITMAP_FREE (non_decomposable_context);
1646 BITMAP_FREE (subreg_context);
1649 /* Implement first lower subreg pass. */
1651 namespace {
1653 const pass_data pass_data_lower_subreg =
1655 RTL_PASS, /* type */
1656 "subreg1", /* name */
1657 OPTGROUP_NONE, /* optinfo_flags */
1658 TV_LOWER_SUBREG, /* tv_id */
1659 0, /* properties_required */
1660 0, /* properties_provided */
1661 0, /* properties_destroyed */
1662 0, /* todo_flags_start */
1663 0, /* todo_flags_finish */
1666 class pass_lower_subreg : public rtl_opt_pass
1668 public:
1669 pass_lower_subreg (gcc::context *ctxt)
1670 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1673 /* opt_pass methods: */
1674 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1675 virtual unsigned int execute (function *)
1677 decompose_multiword_subregs (false);
1678 return 0;
1681 }; // class pass_lower_subreg
1683 } // anon namespace
1685 rtl_opt_pass *
1686 make_pass_lower_subreg (gcc::context *ctxt)
1688 return new pass_lower_subreg (ctxt);
1691 /* Implement second lower subreg pass. */
1693 namespace {
1695 const pass_data pass_data_lower_subreg2 =
1697 RTL_PASS, /* type */
1698 "subreg2", /* name */
1699 OPTGROUP_NONE, /* optinfo_flags */
1700 TV_LOWER_SUBREG, /* tv_id */
1701 0, /* properties_required */
1702 0, /* properties_provided */
1703 0, /* properties_destroyed */
1704 0, /* todo_flags_start */
1705 TODO_df_finish, /* todo_flags_finish */
1708 class pass_lower_subreg2 : public rtl_opt_pass
1710 public:
1711 pass_lower_subreg2 (gcc::context *ctxt)
1712 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1715 /* opt_pass methods: */
1716 virtual bool gate (function *) { return flag_split_wide_types != 0; }
1717 virtual unsigned int execute (function *)
1719 decompose_multiword_subregs (true);
1720 return 0;
1723 }; // class pass_lower_subreg2
1725 } // anon namespace
1727 rtl_opt_pass *
1728 make_pass_lower_subreg2 (gcc::context *ctxt)
1730 return new pass_lower_subreg2 (ctxt);