c++: over-eager friend matching [PR109649]
[official-gcc.git] / gcc / lower-subreg.cc
blob81fc5380cbe5ea844010dae1018f9c209c7ff2f6
1 /* Decompose multiword subregs.
2 Copyright (C) 2007-2023 Free Software Foundation, Inc.
3 Contributed by Richard Henderson <rth@redhat.com>
4 Ian Lance Taylor <iant@google.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "cfghooks.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "expmed.h"
33 #include "insn-config.h"
34 #include "emit-rtl.h"
35 #include "recog.h"
36 #include "cfgrtl.h"
37 #include "cfgbuild.h"
38 #include "dce.h"
39 #include "expr.h"
40 #include "explow.h"
41 #include "tree-pass.h"
42 #include "lower-subreg.h"
43 #include "rtl-iter.h"
44 #include "target.h"
47 /* Decompose multi-word pseudo-registers into individual
48 pseudo-registers when possible and profitable. This is possible
49 when all the uses of a multi-word register are via SUBREG, or are
50 copies of the register to another location. Breaking apart the
51 register permits more CSE and permits better register allocation.
52 This is profitable if the machine does not have move instructions
53 to do this.
55 This pass only splits moves with modes that are wider than
56 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with
57 integer modes that are twice the width of word_mode. The latter
58 could be generalized if there was a need to do this, but the trend in
59 architectures is to not need this.
61 There are two useful preprocessor defines for use by maintainers:
63 #define LOG_COSTS 1
65 if you wish to see the actual cost estimates that are being used
66 for each mode wider than word mode and the cost estimates for zero
67 extension and the shifts. This can be useful when port maintainers
68 are tuning insn rtx costs.
70 #define FORCE_LOWERING 1
72 if you wish to test the pass with all the transformation forced on.
73 This can be useful for finding bugs in the transformations. */
75 #define LOG_COSTS 0
76 #define FORCE_LOWERING 0
78 /* Bit N in this bitmap is set if regno N is used in a context in
79 which we can decompose it. */
80 static bitmap decomposable_context;
82 /* Bit N in this bitmap is set if regno N is used in a context in
83 which it cannot be decomposed. */
84 static bitmap non_decomposable_context;
86 /* Bit N in this bitmap is set if regno N is used in a subreg
87 which changes the mode but not the size. This typically happens
88 when the register accessed as a floating-point value; we want to
89 avoid generating accesses to its subwords in integer modes. */
90 static bitmap subreg_context;
92 /* Bit N in the bitmap in element M of this array is set if there is a
93 copy from reg M to reg N. */
94 static vec<bitmap> reg_copy_graph;
96 struct target_lower_subreg default_target_lower_subreg;
97 #if SWITCHABLE_TARGET
98 struct target_lower_subreg *this_target_lower_subreg
99 = &default_target_lower_subreg;
100 #endif
102 #define twice_word_mode \
103 this_target_lower_subreg->x_twice_word_mode
104 #define choices \
105 this_target_lower_subreg->x_choices
107 /* Return true if MODE is a mode we know how to lower. When returning true,
108 store its byte size in *BYTES and its word size in *WORDS. */
110 static inline bool
111 interesting_mode_p (machine_mode mode, unsigned int *bytes,
112 unsigned int *words)
114 if (!GET_MODE_SIZE (mode).is_constant (bytes))
115 return false;
116 *words = CEIL (*bytes, UNITS_PER_WORD);
117 return true;
120 /* RTXes used while computing costs. */
121 struct cost_rtxes {
122 /* Source and target registers. */
123 rtx source;
124 rtx target;
126 /* A twice_word_mode ZERO_EXTEND of SOURCE. */
127 rtx zext;
129 /* A shift of SOURCE. */
130 rtx shift;
132 /* A SET of TARGET. */
133 rtx set;
136 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the
137 rtxes in RTXES. SPEED_P selects between the speed and size cost. */
139 static int
140 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code,
141 machine_mode mode, int op1)
143 PUT_CODE (rtxes->shift, code);
144 PUT_MODE (rtxes->shift, mode);
145 PUT_MODE (rtxes->source, mode);
146 XEXP (rtxes->shift, 1) = gen_int_shift_amount (mode, op1);
147 return set_src_cost (rtxes->shift, mode, speed_p);
150 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X]
151 to true if it is profitable to split a double-word CODE shift
152 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing
153 for speed or size profitability.
155 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is
156 the cost of moving zero into a word-mode register. WORD_MOVE_COST
157 is the cost of moving between word registers. */
159 static void
160 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes,
161 bool *splitting, enum rtx_code code,
162 int word_move_zero_cost, int word_move_cost)
164 int wide_cost, narrow_cost, upper_cost, i;
166 for (i = 0; i < BITS_PER_WORD; i++)
168 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode,
169 i + BITS_PER_WORD);
170 if (i == 0)
171 narrow_cost = word_move_cost;
172 else
173 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i);
175 if (code != ASHIFTRT)
176 upper_cost = word_move_zero_cost;
177 else if (i == BITS_PER_WORD - 1)
178 upper_cost = word_move_cost;
179 else
180 upper_cost = shift_cost (speed_p, rtxes, code, word_mode,
181 BITS_PER_WORD - 1);
183 if (LOG_COSTS)
184 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n",
185 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code),
186 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost);
188 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost)
189 splitting[i] = true;
193 /* Compute what we should do when optimizing for speed or size; SPEED_P
194 selects which. Use RTXES for computing costs. */
196 static void
197 compute_costs (bool speed_p, struct cost_rtxes *rtxes)
199 unsigned int i;
200 int word_move_zero_cost, word_move_cost;
202 PUT_MODE (rtxes->target, word_mode);
203 SET_SRC (rtxes->set) = CONST0_RTX (word_mode);
204 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p);
206 SET_SRC (rtxes->set) = rtxes->source;
207 word_move_cost = set_rtx_cost (rtxes->set, speed_p);
209 if (LOG_COSTS)
210 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n",
211 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost);
213 for (i = 0; i < MAX_MACHINE_MODE; i++)
215 machine_mode mode = (machine_mode) i;
216 unsigned int size, factor;
217 if (interesting_mode_p (mode, &size, &factor) && factor > 1)
219 unsigned int mode_move_cost;
221 PUT_MODE (rtxes->target, mode);
222 PUT_MODE (rtxes->source, mode);
223 mode_move_cost = set_rtx_cost (rtxes->set, speed_p);
225 if (LOG_COSTS)
226 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n",
227 GET_MODE_NAME (mode), mode_move_cost,
228 word_move_cost, factor);
230 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor)
232 choices[speed_p].move_modes_to_split[i] = true;
233 choices[speed_p].something_to_do = true;
238 /* For the moves and shifts, the only case that is checked is one
239 where the mode of the target is an integer mode twice the width
240 of the word_mode.
242 If it is not profitable to split a double word move then do not
243 even consider the shifts or the zero extension. */
244 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode])
246 int zext_cost;
248 /* The only case here to check to see if moving the upper part with a
249 zero is cheaper than doing the zext itself. */
250 PUT_MODE (rtxes->source, word_mode);
251 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p);
253 if (LOG_COSTS)
254 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n",
255 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND),
256 zext_cost, word_move_cost, word_move_zero_cost);
258 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost)
259 choices[speed_p].splitting_zext = true;
261 compute_splitting_shift (speed_p, rtxes,
262 choices[speed_p].splitting_ashift, ASHIFT,
263 word_move_zero_cost, word_move_cost);
264 compute_splitting_shift (speed_p, rtxes,
265 choices[speed_p].splitting_lshiftrt, LSHIFTRT,
266 word_move_zero_cost, word_move_cost);
267 compute_splitting_shift (speed_p, rtxes,
268 choices[speed_p].splitting_ashiftrt, ASHIFTRT,
269 word_move_zero_cost, word_move_cost);
273 /* Do one-per-target initialisation. This involves determining
274 which operations on the machine are profitable. If none are found,
275 then the pass just returns when called. */
277 void
278 init_lower_subreg (void)
280 struct cost_rtxes rtxes;
282 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg));
284 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require ();
286 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
287 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2);
288 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source);
289 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source);
290 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx);
292 if (LOG_COSTS)
293 fprintf (stderr, "\nSize costs\n==========\n\n");
294 compute_costs (false, &rtxes);
296 if (LOG_COSTS)
297 fprintf (stderr, "\nSpeed costs\n===========\n\n");
298 compute_costs (true, &rtxes);
301 static bool
302 simple_move_operand (rtx x)
304 if (GET_CODE (x) == SUBREG)
305 x = SUBREG_REG (x);
307 if (!OBJECT_P (x))
308 return false;
310 if (GET_CODE (x) == LABEL_REF
311 || GET_CODE (x) == SYMBOL_REF
312 || GET_CODE (x) == HIGH
313 || GET_CODE (x) == CONST)
314 return false;
316 if (MEM_P (x)
317 && (MEM_VOLATILE_P (x)
318 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x))))
319 return false;
321 return true;
324 /* If X is an operator that can be treated as a simple move that we
325 can split, then return the operand that is operated on. */
327 static rtx
328 operand_for_swap_move_operator (rtx x)
330 /* A word sized rotate of a register pair is equivalent to swapping
331 the registers in the register pair. */
332 if (GET_CODE (x) == ROTATE
333 && GET_MODE (x) == twice_word_mode
334 && simple_move_operand (XEXP (x, 0))
335 && CONST_INT_P (XEXP (x, 1))
336 && INTVAL (XEXP (x, 1)) == BITS_PER_WORD)
337 return XEXP (x, 0);
339 return NULL_RTX;
342 /* If INSN is a single set between two objects that we want to split,
343 return the single set. SPEED_P says whether we are optimizing
344 INSN for speed or size.
346 INSN should have been passed to recog and extract_insn before this
347 is called. */
349 static rtx
350 simple_move (rtx_insn *insn, bool speed_p)
352 rtx x, op;
353 rtx set;
354 machine_mode mode;
356 if (recog_data.n_operands != 2)
357 return NULL_RTX;
359 set = single_set (insn);
360 if (!set)
361 return NULL_RTX;
363 x = SET_DEST (set);
364 if (x != recog_data.operand[0] && x != recog_data.operand[1])
365 return NULL_RTX;
366 if (!simple_move_operand (x))
367 return NULL_RTX;
369 x = SET_SRC (set);
370 if ((op = operand_for_swap_move_operator (x)) != NULL_RTX)
371 x = op;
373 if (x != recog_data.operand[0] && x != recog_data.operand[1])
374 return NULL_RTX;
375 /* For the src we can handle ASM_OPERANDS, and it is beneficial for
376 things like x86 rdtsc which returns a DImode value. */
377 if (GET_CODE (x) != ASM_OPERANDS
378 && !simple_move_operand (x))
379 return NULL_RTX;
381 /* We try to decompose in integer modes, to avoid generating
382 inefficient code copying between integer and floating point
383 registers. That means that we can't decompose if this is a
384 non-integer mode for which there is no integer mode of the same
385 size. */
386 mode = GET_MODE (SET_DEST (set));
387 scalar_int_mode int_mode;
388 if (!SCALAR_INT_MODE_P (mode)
389 && (!int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists (&int_mode)
390 || !targetm.modes_tieable_p (mode, int_mode)))
391 return NULL_RTX;
393 /* Reject PARTIAL_INT modes. They are used for processor specific
394 purposes and it's probably best not to tamper with them. */
395 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
396 return NULL_RTX;
398 if (!choices[speed_p].move_modes_to_split[(int) mode])
399 return NULL_RTX;
401 return set;
404 /* If SET is a copy from one multi-word pseudo-register to another,
405 record that in reg_copy_graph. Return whether it is such a
406 copy. */
408 static bool
409 find_pseudo_copy (rtx set)
411 rtx dest = SET_DEST (set);
412 rtx src = SET_SRC (set);
413 rtx op;
414 unsigned int rd, rs;
415 bitmap b;
417 if ((op = operand_for_swap_move_operator (src)) != NULL_RTX)
418 src = op;
420 if (!REG_P (dest) || !REG_P (src))
421 return false;
423 rd = REGNO (dest);
424 rs = REGNO (src);
425 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs))
426 return false;
428 b = reg_copy_graph[rs];
429 if (b == NULL)
431 b = BITMAP_ALLOC (NULL);
432 reg_copy_graph[rs] = b;
435 bitmap_set_bit (b, rd);
437 return true;
440 /* Look through the registers in DECOMPOSABLE_CONTEXT. For each case
441 where they are copied to another register, add the register to
442 which they are copied to DECOMPOSABLE_CONTEXT. Use
443 NON_DECOMPOSABLE_CONTEXT to limit this--we don't bother to track
444 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */
446 static void
447 propagate_pseudo_copies (void)
449 auto_bitmap queue, propagate;
451 bitmap_copy (queue, decomposable_context);
454 bitmap_iterator iter;
455 unsigned int i;
457 bitmap_clear (propagate);
459 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter)
461 bitmap b = reg_copy_graph[i];
462 if (b)
463 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context);
466 bitmap_and_compl (queue, propagate, decomposable_context);
467 bitmap_ior_into (decomposable_context, propagate);
469 while (!bitmap_empty_p (queue));
472 /* A pointer to one of these values is passed to
473 find_decomposable_subregs. */
475 enum classify_move_insn
477 /* Not a simple move from one location to another. */
478 NOT_SIMPLE_MOVE,
479 /* A simple move we want to decompose. */
480 DECOMPOSABLE_SIMPLE_MOVE,
481 /* Any other simple move. */
482 SIMPLE_MOVE
485 /* If we find a SUBREG in *LOC which we could use to decompose a
486 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an
487 unadorned register which is not a simple pseudo-register copy,
488 DATA will point at the type of move, and we set a bit in
489 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */
491 static void
492 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi)
494 subrtx_var_iterator::array_type array;
495 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST)
497 rtx x = *iter;
498 if (GET_CODE (x) == SUBREG)
500 rtx inner = SUBREG_REG (x);
501 unsigned int regno, outer_size, inner_size, outer_words, inner_words;
503 if (!REG_P (inner))
504 continue;
506 regno = REGNO (inner);
507 if (HARD_REGISTER_NUM_P (regno))
509 iter.skip_subrtxes ();
510 continue;
513 if (!interesting_mode_p (GET_MODE (x), &outer_size, &outer_words)
514 || !interesting_mode_p (GET_MODE (inner), &inner_size,
515 &inner_words))
516 continue;
518 /* We only try to decompose single word subregs of multi-word
519 registers. When we find one, we return -1 to avoid iterating
520 over the inner register.
522 ??? This doesn't allow, e.g., DImode subregs of TImode values
523 on 32-bit targets. We would need to record the way the
524 pseudo-register was used, and only decompose if all the uses
525 were the same number and size of pieces. Hopefully this
526 doesn't happen much. */
528 if (outer_words == 1
529 && inner_words > 1
530 /* Don't allow to decompose floating point subregs of
531 multi-word pseudos if the floating point mode does
532 not have word size, because otherwise we'd generate
533 a subreg with that floating mode from a different
534 sized integral pseudo which is not allowed by
535 validate_subreg. */
536 && (!FLOAT_MODE_P (GET_MODE (x))
537 || outer_size == UNITS_PER_WORD))
539 bitmap_set_bit (decomposable_context, regno);
540 iter.skip_subrtxes ();
541 continue;
544 /* If this is a cast from one mode to another, where the modes
545 have the same size, and they are not tieable, then mark this
546 register as non-decomposable. If we decompose it we are
547 likely to mess up whatever the backend is trying to do. */
548 if (outer_words > 1
549 && outer_size == inner_size
550 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner)))
552 bitmap_set_bit (non_decomposable_context, regno);
553 bitmap_set_bit (subreg_context, regno);
554 iter.skip_subrtxes ();
555 continue;
558 else if (REG_P (x))
560 unsigned int regno, size, words;
562 /* We will see an outer SUBREG before we see the inner REG, so
563 when we see a plain REG here it means a direct reference to
564 the register.
566 If this is not a simple copy from one location to another,
567 then we cannot decompose this register. If this is a simple
568 copy we want to decompose, and the mode is right,
569 then we mark the register as decomposable.
570 Otherwise we don't say anything about this register --
571 it could be decomposed, but whether that would be
572 profitable depends upon how it is used elsewhere.
574 We only set bits in the bitmap for multi-word
575 pseudo-registers, since those are the only ones we care about
576 and it keeps the size of the bitmaps down. */
578 regno = REGNO (x);
579 if (!HARD_REGISTER_NUM_P (regno)
580 && interesting_mode_p (GET_MODE (x), &size, &words)
581 && words > 1)
583 switch (*pcmi)
585 case NOT_SIMPLE_MOVE:
586 bitmap_set_bit (non_decomposable_context, regno);
587 break;
588 case DECOMPOSABLE_SIMPLE_MOVE:
589 if (targetm.modes_tieable_p (GET_MODE (x), word_mode))
590 bitmap_set_bit (decomposable_context, regno);
591 break;
592 case SIMPLE_MOVE:
593 break;
594 default:
595 gcc_unreachable ();
599 else if (MEM_P (x))
601 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE;
603 /* Any registers used in a MEM do not participate in a
604 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion
605 here, and return -1 to block the parent's recursion. */
606 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem);
607 iter.skip_subrtxes ();
612 /* Decompose REGNO into word-sized components. We smash the REG node
613 in place. This ensures that (1) something goes wrong quickly if we
614 fail to make some replacement, and (2) the debug information inside
615 the symbol table is automatically kept up to date. */
617 static void
618 decompose_register (unsigned int regno)
620 rtx reg;
621 unsigned int size, words, i;
622 rtvec v;
624 reg = regno_reg_rtx[regno];
626 regno_reg_rtx[regno] = NULL_RTX;
628 if (!interesting_mode_p (GET_MODE (reg), &size, &words))
629 gcc_unreachable ();
631 v = rtvec_alloc (words);
632 for (i = 0; i < words; ++i)
633 RTVEC_ELT (v, i) = gen_reg_rtx_offset (reg, word_mode, i * UNITS_PER_WORD);
635 PUT_CODE (reg, CONCATN);
636 XVEC (reg, 0) = v;
638 if (dump_file)
640 fprintf (dump_file, "; Splitting reg %u ->", regno);
641 for (i = 0; i < words; ++i)
642 fprintf (dump_file, " %u", REGNO (XVECEXP (reg, 0, i)));
643 fputc ('\n', dump_file);
647 /* Get a SUBREG of a CONCATN. */
649 static rtx
650 simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
652 unsigned int outer_size, outer_words, inner_size, inner_words;
653 machine_mode innermode, partmode;
654 rtx part;
655 unsigned int final_offset;
656 unsigned int byte;
658 innermode = GET_MODE (op);
659 if (!interesting_mode_p (outermode, &outer_size, &outer_words)
660 || !interesting_mode_p (innermode, &inner_size, &inner_words))
661 gcc_unreachable ();
663 /* Must be constant if interesting_mode_p passes. */
664 byte = orig_byte.to_constant ();
665 gcc_assert (GET_CODE (op) == CONCATN);
666 gcc_assert (byte % outer_size == 0);
668 gcc_assert (byte < inner_size);
669 if (outer_size > inner_size)
670 return NULL_RTX;
672 inner_size /= XVECLEN (op, 0);
673 part = XVECEXP (op, 0, byte / inner_size);
674 partmode = GET_MODE (part);
676 final_offset = byte % inner_size;
677 if (final_offset + outer_size > inner_size)
678 return NULL_RTX;
680 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of
681 regular CONST_VECTORs. They have vector or integer modes, depending
682 on the capabilities of the target. Cope with them. */
683 if (partmode == VOIDmode && VECTOR_MODE_P (innermode))
684 partmode = GET_MODE_INNER (innermode);
685 else if (partmode == VOIDmode)
686 partmode = mode_for_size (inner_size * BITS_PER_UNIT,
687 GET_MODE_CLASS (innermode), 0).require ();
689 return simplify_gen_subreg (outermode, part, partmode, final_offset);
692 /* Wrapper around simplify_gen_subreg which handles CONCATN. */
694 static rtx
695 simplify_gen_subreg_concatn (machine_mode outermode, rtx op,
696 machine_mode innermode, unsigned int byte)
698 rtx ret;
700 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN.
701 If OP is a SUBREG of a CONCATN, then it must be a simple mode
702 change with the same size and offset 0, or it must extract a
703 part. We shouldn't see anything else here. */
704 if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == CONCATN)
706 rtx op2;
708 if (known_eq (GET_MODE_SIZE (GET_MODE (op)),
709 GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))
710 && known_eq (SUBREG_BYTE (op), 0))
711 return simplify_gen_subreg_concatn (outermode, SUBREG_REG (op),
712 GET_MODE (SUBREG_REG (op)), byte);
714 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op),
715 SUBREG_BYTE (op));
716 if (op2 == NULL_RTX)
718 /* We don't handle paradoxical subregs here. */
719 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op)));
720 gcc_assert (!paradoxical_subreg_p (op));
721 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op),
722 byte + SUBREG_BYTE (op));
723 gcc_assert (op2 != NULL_RTX);
724 return op2;
727 op = op2;
728 gcc_assert (op != NULL_RTX);
729 gcc_assert (innermode == GET_MODE (op));
732 if (GET_CODE (op) == CONCATN)
733 return simplify_subreg_concatn (outermode, op, byte);
735 ret = simplify_gen_subreg (outermode, op, innermode, byte);
737 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then
738 resolve_simple_move will ask for the high part of the paradoxical
739 subreg, which does not have a value. Just return a zero. */
740 if (ret == NULL_RTX
741 && paradoxical_subreg_p (op))
742 return CONST0_RTX (outermode);
744 gcc_assert (ret != NULL_RTX);
745 return ret;
748 /* Return whether we should resolve X into the registers into which it
749 was decomposed. */
751 static bool
752 resolve_reg_p (rtx x)
754 return GET_CODE (x) == CONCATN;
757 /* Return whether X is a SUBREG of a register which we need to
758 resolve. */
760 static bool
761 resolve_subreg_p (rtx x)
763 if (GET_CODE (x) != SUBREG)
764 return false;
765 return resolve_reg_p (SUBREG_REG (x));
768 /* Look for SUBREGs in *LOC which need to be decomposed. */
770 static bool
771 resolve_subreg_use (rtx *loc, rtx insn)
773 subrtx_ptr_iterator::array_type array;
774 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST)
776 rtx *loc = *iter;
777 rtx x = *loc;
778 if (resolve_subreg_p (x))
780 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
781 SUBREG_BYTE (x));
783 /* It is possible for a note to contain a reference which we can
784 decompose. In this case, return 1 to the caller to indicate
785 that the note must be removed. */
786 if (!x)
788 gcc_assert (!insn);
789 return true;
792 validate_change (insn, loc, x, 1);
793 iter.skip_subrtxes ();
795 else if (resolve_reg_p (x))
796 /* Return 1 to the caller to indicate that we found a direct
797 reference to a register which is being decomposed. This can
798 happen inside notes, multiword shift or zero-extend
799 instructions. */
800 return true;
803 return false;
806 /* Resolve any decomposed registers which appear in register notes on
807 INSN. */
809 static void
810 resolve_reg_notes (rtx_insn *insn)
812 rtx *pnote, note;
814 note = find_reg_equal_equiv_note (insn);
815 if (note)
817 int old_count = num_validated_changes ();
818 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX))
819 remove_note (insn, note);
820 else
821 if (old_count != num_validated_changes ())
822 df_notes_rescan (insn);
825 pnote = &REG_NOTES (insn);
826 while (*pnote != NULL_RTX)
828 bool del = false;
830 note = *pnote;
831 switch (REG_NOTE_KIND (note))
833 case REG_DEAD:
834 case REG_UNUSED:
835 if (resolve_reg_p (XEXP (note, 0)))
836 del = true;
837 break;
839 default:
840 break;
843 if (del)
844 *pnote = XEXP (note, 1);
845 else
846 pnote = &XEXP (note, 1);
850 /* Return whether X can be decomposed into subwords. */
852 static bool
853 can_decompose_p (rtx x)
855 if (REG_P (x))
857 unsigned int regno = REGNO (x);
859 if (HARD_REGISTER_NUM_P (regno))
861 unsigned int byte, num_bytes, num_words;
863 if (!interesting_mode_p (GET_MODE (x), &num_bytes, &num_words))
864 return false;
865 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD)
866 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0)
867 return false;
868 return true;
870 else
871 return !bitmap_bit_p (subreg_context, regno);
874 return true;
877 /* OPND is a concatn operand this is used with a simple move operator.
878 Return a new rtx with the concatn's operands swapped. */
880 static rtx
881 resolve_operand_for_swap_move_operator (rtx opnd)
883 gcc_assert (GET_CODE (opnd) == CONCATN);
884 rtx concatn = copy_rtx (opnd);
885 rtx op0 = XVECEXP (concatn, 0, 0);
886 rtx op1 = XVECEXP (concatn, 0, 1);
887 XVECEXP (concatn, 0, 0) = op1;
888 XVECEXP (concatn, 0, 1) = op0;
889 return concatn;
892 /* Decompose the registers used in a simple move SET within INSN. If
893 we don't change anything, return INSN, otherwise return the start
894 of the sequence of moves. */
896 static rtx_insn *
897 resolve_simple_move (rtx set, rtx_insn *insn)
899 rtx src, dest, real_dest, src_op;
900 rtx_insn *insns;
901 machine_mode orig_mode, dest_mode;
902 unsigned int orig_size, words;
903 bool pushing;
905 src = SET_SRC (set);
906 dest = SET_DEST (set);
907 orig_mode = GET_MODE (dest);
909 if (!interesting_mode_p (orig_mode, &orig_size, &words))
910 gcc_unreachable ();
911 gcc_assert (words > 1);
913 start_sequence ();
915 /* We have to handle copying from a SUBREG of a decomposed reg where
916 the SUBREG is larger than word size. Rather than assume that we
917 can take a word_mode SUBREG of the destination, we copy to a new
918 register and then copy that to the destination. */
920 real_dest = NULL_RTX;
922 if ((src_op = operand_for_swap_move_operator (src)) != NULL_RTX)
924 if (resolve_reg_p (dest))
926 /* DEST is a CONCATN, so swap its operands and strip
927 SRC's operator. */
928 dest = resolve_operand_for_swap_move_operator (dest);
929 src = src_op;
931 else if (resolve_reg_p (src_op))
933 /* SRC is an operation on a CONCATN, so strip the operator and
934 swap the CONCATN's operands. */
935 src = resolve_operand_for_swap_move_operator (src_op);
939 if (GET_CODE (src) == SUBREG
940 && resolve_reg_p (SUBREG_REG (src))
941 && (maybe_ne (SUBREG_BYTE (src), 0)
942 || maybe_ne (orig_size, GET_MODE_SIZE (GET_MODE (SUBREG_REG (src))))))
944 real_dest = dest;
945 dest = gen_reg_rtx (orig_mode);
946 if (REG_P (real_dest))
947 REG_ATTRS (dest) = REG_ATTRS (real_dest);
950 /* Similarly if we are copying to a SUBREG of a decomposed reg where
951 the SUBREG is larger than word size. */
953 if (GET_CODE (dest) == SUBREG
954 && resolve_reg_p (SUBREG_REG (dest))
955 && (maybe_ne (SUBREG_BYTE (dest), 0)
956 || maybe_ne (orig_size,
957 GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest))))))
959 rtx reg, smove;
960 rtx_insn *minsn;
962 reg = gen_reg_rtx (orig_mode);
963 minsn = emit_move_insn (reg, src);
964 smove = single_set (minsn);
965 gcc_assert (smove != NULL_RTX);
966 resolve_simple_move (smove, minsn);
967 src = reg;
970 /* If we didn't have any big SUBREGS of decomposed registers, and
971 neither side of the move is a register we are decomposing, then
972 we don't have to do anything here. */
974 if (src == SET_SRC (set)
975 && dest == SET_DEST (set)
976 && !resolve_reg_p (src)
977 && !resolve_subreg_p (src)
978 && !resolve_reg_p (dest)
979 && !resolve_subreg_p (dest))
981 end_sequence ();
982 return insn;
985 /* It's possible for the code to use a subreg of a decomposed
986 register while forming an address. We need to handle that before
987 passing the address to emit_move_insn. We pass NULL_RTX as the
988 insn parameter to resolve_subreg_use because we cannot validate
989 the insn yet. */
990 if (MEM_P (src) || MEM_P (dest))
992 int acg;
994 if (MEM_P (src))
995 resolve_subreg_use (&XEXP (src, 0), NULL_RTX);
996 if (MEM_P (dest))
997 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX);
998 acg = apply_change_group ();
999 gcc_assert (acg);
1002 /* If SRC is a register which we can't decompose, or has side
1003 effects, we need to move via a temporary register. */
1005 if (!can_decompose_p (src)
1006 || side_effects_p (src)
1007 || GET_CODE (src) == ASM_OPERANDS)
1009 rtx reg;
1011 reg = gen_reg_rtx (orig_mode);
1013 if (AUTO_INC_DEC)
1015 rtx_insn *move = emit_move_insn (reg, src);
1016 if (MEM_P (src))
1018 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1019 if (note)
1020 add_reg_note (move, REG_INC, XEXP (note, 0));
1023 else
1024 emit_move_insn (reg, src);
1026 src = reg;
1029 /* If DEST is a register which we can't decompose, or has side
1030 effects, we need to first move to a temporary register. We
1031 handle the common case of pushing an operand directly. We also
1032 go through a temporary register if it holds a floating point
1033 value. This gives us better code on systems which can't move
1034 data easily between integer and floating point registers. */
1036 dest_mode = orig_mode;
1037 pushing = push_operand (dest, dest_mode);
1038 if (!can_decompose_p (dest)
1039 || (side_effects_p (dest) && !pushing)
1040 || (!SCALAR_INT_MODE_P (dest_mode)
1041 && !resolve_reg_p (dest)
1042 && !resolve_subreg_p (dest)))
1044 if (real_dest == NULL_RTX)
1045 real_dest = dest;
1046 if (!SCALAR_INT_MODE_P (dest_mode))
1047 dest_mode = int_mode_for_mode (dest_mode).require ();
1048 dest = gen_reg_rtx (dest_mode);
1049 if (REG_P (real_dest))
1050 REG_ATTRS (dest) = REG_ATTRS (real_dest);
1053 if (pushing)
1055 unsigned int i, j, jinc;
1057 gcc_assert (orig_size % UNITS_PER_WORD == 0);
1058 gcc_assert (GET_CODE (XEXP (dest, 0)) != PRE_MODIFY);
1059 gcc_assert (GET_CODE (XEXP (dest, 0)) != POST_MODIFY);
1061 if (WORDS_BIG_ENDIAN == STACK_GROWS_DOWNWARD)
1063 j = 0;
1064 jinc = 1;
1066 else
1068 j = words - 1;
1069 jinc = -1;
1072 for (i = 0; i < words; ++i, j += jinc)
1074 rtx temp;
1076 temp = copy_rtx (XEXP (dest, 0));
1077 temp = adjust_automodify_address_nv (dest, word_mode, temp,
1078 j * UNITS_PER_WORD);
1079 emit_move_insn (temp,
1080 simplify_gen_subreg_concatn (word_mode, src,
1081 orig_mode,
1082 j * UNITS_PER_WORD));
1085 else
1087 unsigned int i;
1089 if (REG_P (dest) && !HARD_REGISTER_NUM_P (REGNO (dest)))
1090 emit_clobber (dest);
1092 for (i = 0; i < words; ++i)
1094 rtx t = simplify_gen_subreg_concatn (word_mode, dest,
1095 dest_mode,
1096 i * UNITS_PER_WORD);
1097 /* simplify_gen_subreg_concatn can return (const_int 0) for
1098 some sub-objects of paradoxical subregs. As a source operand,
1099 that's fine. As a destination it must be avoided. Those are
1100 supposed to be don't care bits, so we can just drop that store
1101 on the floor. */
1102 if (t != CONST0_RTX (word_mode))
1103 emit_move_insn (t,
1104 simplify_gen_subreg_concatn (word_mode, src,
1105 orig_mode,
1106 i * UNITS_PER_WORD));
1110 if (real_dest != NULL_RTX)
1112 rtx mdest, smove;
1113 rtx_insn *minsn;
1115 if (dest_mode == orig_mode)
1116 mdest = dest;
1117 else
1118 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0);
1119 minsn = emit_move_insn (real_dest, mdest);
1121 if (AUTO_INC_DEC && MEM_P (real_dest)
1122 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest)))
1124 rtx note = find_reg_note (insn, REG_INC, NULL_RTX);
1125 if (note)
1126 add_reg_note (minsn, REG_INC, XEXP (note, 0));
1129 smove = single_set (minsn);
1130 gcc_assert (smove != NULL_RTX);
1132 resolve_simple_move (smove, minsn);
1135 insns = get_insns ();
1136 end_sequence ();
1138 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX);
1140 emit_insn_before (insns, insn);
1142 /* If we get here via self-recursion, then INSN is not yet in the insns
1143 chain and delete_insn will fail. We only want to remove INSN from the
1144 current sequence. See PR56738. */
1145 if (in_sequence_p ())
1146 remove_insn (insn);
1147 else
1148 delete_insn (insn);
1150 return insns;
1153 /* Change a CLOBBER of a decomposed register into a CLOBBER of the
1154 component registers. Return whether we changed something. */
1156 static bool
1157 resolve_clobber (rtx pat, rtx_insn *insn)
1159 rtx reg;
1160 machine_mode orig_mode;
1161 unsigned int orig_size, words, i;
1162 int ret;
1164 reg = XEXP (pat, 0);
1165 /* For clobbers we can look through paradoxical subregs which
1166 we do not handle in simplify_gen_subreg_concatn. */
1167 if (paradoxical_subreg_p (reg))
1168 reg = SUBREG_REG (reg);
1169 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg))
1170 return false;
1172 orig_mode = GET_MODE (reg);
1173 if (!interesting_mode_p (orig_mode, &orig_size, &words))
1174 gcc_unreachable ();
1176 ret = validate_change (NULL_RTX, &XEXP (pat, 0),
1177 simplify_gen_subreg_concatn (word_mode, reg,
1178 orig_mode, 0),
1180 df_insn_rescan (insn);
1181 gcc_assert (ret != 0);
1183 for (i = words - 1; i > 0; --i)
1185 rtx x;
1187 x = simplify_gen_subreg_concatn (word_mode, reg, orig_mode,
1188 i * UNITS_PER_WORD);
1189 x = gen_rtx_CLOBBER (VOIDmode, x);
1190 emit_insn_after (x, insn);
1193 resolve_reg_notes (insn);
1195 return true;
1198 /* A USE of a decomposed register is no longer meaningful. Return
1199 whether we changed something. */
1201 static bool
1202 resolve_use (rtx pat, rtx_insn *insn)
1204 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0)))
1206 delete_insn (insn);
1207 return true;
1210 resolve_reg_notes (insn);
1212 return false;
1215 /* A VAR_LOCATION can be simplified. */
1217 static void
1218 resolve_debug (rtx_insn *insn)
1220 subrtx_ptr_iterator::array_type array;
1221 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST)
1223 rtx *loc = *iter;
1224 rtx x = *loc;
1225 if (resolve_subreg_p (x))
1227 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x),
1228 SUBREG_BYTE (x));
1230 if (x)
1231 *loc = x;
1232 else
1233 x = copy_rtx (*loc);
1235 if (resolve_reg_p (x))
1236 *loc = copy_rtx (x);
1239 df_insn_rescan (insn);
1241 resolve_reg_notes (insn);
1244 /* Check if INSN is a decomposable multiword-shift or zero-extend and
1245 set the decomposable_context bitmap accordingly. SPEED_P is true
1246 if we are optimizing INSN for speed rather than size. Return true
1247 if INSN is decomposable. */
1249 static bool
1250 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p)
1252 rtx set;
1253 rtx op;
1254 rtx op_operand;
1256 set = single_set (insn);
1257 if (!set)
1258 return false;
1260 op = SET_SRC (set);
1261 if (GET_CODE (op) != ASHIFT
1262 && GET_CODE (op) != LSHIFTRT
1263 && GET_CODE (op) != ASHIFTRT
1264 && GET_CODE (op) != ZERO_EXTEND)
1265 return false;
1267 op_operand = XEXP (op, 0);
1268 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand)
1269 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set)))
1270 || HARD_REGISTER_NUM_P (REGNO (op_operand))
1271 || GET_MODE (op) != twice_word_mode)
1272 return false;
1274 if (GET_CODE (op) == ZERO_EXTEND)
1276 if (GET_MODE (op_operand) != word_mode
1277 || !choices[speed_p].splitting_zext)
1278 return false;
1280 else /* left or right shift */
1282 bool *splitting = (GET_CODE (op) == ASHIFT
1283 ? choices[speed_p].splitting_ashift
1284 : GET_CODE (op) == ASHIFTRT
1285 ? choices[speed_p].splitting_ashiftrt
1286 : choices[speed_p].splitting_lshiftrt);
1287 if (!CONST_INT_P (XEXP (op, 1))
1288 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD,
1289 2 * BITS_PER_WORD - 1)
1290 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD])
1291 return false;
1293 bitmap_set_bit (decomposable_context, REGNO (op_operand));
1296 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set)));
1298 return true;
1301 /* Decompose a more than word wide shift (in INSN) of a multiword
1302 pseudo or a multiword zero-extend of a wordmode pseudo into a move
1303 and 'set to zero' insn. SPEED_P says whether we are optimizing
1304 for speed or size, when checking if a ZERO_EXTEND is preferable.
1305 Return a pointer to the new insn when a replacement was done. */
1307 static rtx_insn *
1308 resolve_shift_zext (rtx_insn *insn, bool speed_p)
1310 rtx set;
1311 rtx op;
1312 rtx op_operand;
1313 rtx_insn *insns;
1314 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX;
1315 int src_reg_num, dest_reg_num, offset1, offset2, src_offset;
1316 scalar_int_mode inner_mode;
1318 set = single_set (insn);
1319 if (!set)
1320 return NULL;
1322 op = SET_SRC (set);
1323 if (GET_CODE (op) != ASHIFT
1324 && GET_CODE (op) != LSHIFTRT
1325 && GET_CODE (op) != ASHIFTRT
1326 && GET_CODE (op) != ZERO_EXTEND)
1327 return NULL;
1329 op_operand = XEXP (op, 0);
1330 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode))
1331 return NULL;
1333 /* We can tear this operation apart only if the regs were already
1334 torn apart. */
1335 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand))
1336 return NULL;
1338 /* src_reg_num is the number of the word mode register which we
1339 are operating on. For a left shift and a zero_extend on little
1340 endian machines this is register 0. */
1341 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
1342 ? 1 : 0;
1344 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD)
1345 src_reg_num = 1 - src_reg_num;
1347 if (GET_CODE (op) == ZERO_EXTEND)
1348 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0;
1349 else
1350 dest_reg_num = 1 - src_reg_num;
1352 offset1 = UNITS_PER_WORD * dest_reg_num;
1353 offset2 = UNITS_PER_WORD * (1 - dest_reg_num);
1354 src_offset = UNITS_PER_WORD * src_reg_num;
1356 start_sequence ();
1358 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1359 GET_MODE (SET_DEST (set)),
1360 offset1);
1361 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set),
1362 GET_MODE (SET_DEST (set)),
1363 offset2);
1364 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand,
1365 GET_MODE (op_operand),
1366 src_offset);
1367 if (GET_CODE (op) == ASHIFTRT
1368 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1)
1369 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg),
1370 BITS_PER_WORD - 1, NULL_RTX, 0);
1372 if (GET_CODE (op) != ZERO_EXTEND)
1374 int shift_count = INTVAL (XEXP (op, 1));
1375 if (shift_count > BITS_PER_WORD)
1376 src_reg = expand_shift (GET_CODE (op) == ASHIFT ?
1377 LSHIFT_EXPR : RSHIFT_EXPR,
1378 word_mode, src_reg,
1379 shift_count - BITS_PER_WORD,
1380 dest_reg, GET_CODE (op) != ASHIFTRT);
1383 /* Consider using ZERO_EXTEND instead of setting DEST_UPPER to zero
1384 if this is considered reasonable. */
1385 if (GET_CODE (op) == LSHIFTRT
1386 && GET_MODE (op) == twice_word_mode
1387 && REG_P (SET_DEST (set))
1388 && !choices[speed_p].splitting_zext)
1390 rtx tmp = force_reg (word_mode, copy_rtx (src_reg));
1391 tmp = simplify_gen_unary (ZERO_EXTEND, twice_word_mode, tmp, word_mode);
1392 emit_move_insn (SET_DEST (set), tmp);
1394 else
1396 if (dest_reg != src_reg)
1397 emit_move_insn (dest_reg, src_reg);
1398 if (GET_CODE (op) != ASHIFTRT)
1399 emit_move_insn (dest_upper, CONST0_RTX (word_mode));
1400 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1)
1401 emit_move_insn (dest_upper, copy_rtx (src_reg));
1402 else
1403 emit_move_insn (dest_upper, upper_src);
1406 insns = get_insns ();
1408 end_sequence ();
1410 emit_insn_before (insns, insn);
1412 if (dump_file)
1414 rtx_insn *in;
1415 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn));
1416 for (in = insns; in != insn; in = NEXT_INSN (in))
1417 fprintf (dump_file, "%d ", INSN_UID (in));
1418 fprintf (dump_file, "\n");
1421 delete_insn (insn);
1422 return insns;
1425 /* Print to dump_file a description of what we're doing with shift code CODE.
1426 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */
1428 static void
1429 dump_shift_choices (enum rtx_code code, bool *splitting)
1431 int i;
1432 const char *sep;
1434 fprintf (dump_file,
1435 " Splitting mode %s for %s lowering with shift amounts = ",
1436 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code));
1437 sep = "";
1438 for (i = 0; i < BITS_PER_WORD; i++)
1439 if (splitting[i])
1441 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD);
1442 sep = ",";
1444 fprintf (dump_file, "\n");
1447 /* Print to dump_file a description of what we're doing when optimizing
1448 for speed or size; SPEED_P says which. DESCRIPTION is a description
1449 of the SPEED_P choice. */
1451 static void
1452 dump_choices (bool speed_p, const char *description)
1454 unsigned int size, factor, i;
1456 fprintf (dump_file, "Choices when optimizing for %s:\n", description);
1458 for (i = 0; i < MAX_MACHINE_MODE; i++)
1459 if (interesting_mode_p ((machine_mode) i, &size, &factor)
1460 && factor > 1)
1461 fprintf (dump_file, " %s mode %s for copy lowering.\n",
1462 choices[speed_p].move_modes_to_split[i]
1463 ? "Splitting"
1464 : "Skipping",
1465 GET_MODE_NAME ((machine_mode) i));
1467 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n",
1468 choices[speed_p].splitting_zext ? "Splitting" : "Skipping",
1469 GET_MODE_NAME (twice_word_mode));
1471 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift);
1472 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt);
1473 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt);
1474 fprintf (dump_file, "\n");
1477 /* Look for registers which are always accessed via word-sized SUBREGs
1478 or -if DECOMPOSE_COPIES is true- via copies. Decompose these
1479 registers into several word-sized pseudo-registers. */
1481 static void
1482 decompose_multiword_subregs (bool decompose_copies)
1484 unsigned int max;
1485 basic_block bb;
1486 bool speed_p;
1488 if (dump_file)
1490 dump_choices (false, "size");
1491 dump_choices (true, "speed");
1494 /* Check if this target even has any modes to consider lowering. */
1495 if (!choices[false].something_to_do && !choices[true].something_to_do)
1497 if (dump_file)
1498 fprintf (dump_file, "Nothing to do!\n");
1499 return;
1502 max = max_reg_num ();
1504 /* First see if there are any multi-word pseudo-registers. If there
1505 aren't, there is nothing we can do. This should speed up this
1506 pass in the normal case, since it should be faster than scanning
1507 all the insns. */
1509 unsigned int i;
1510 bool useful_modes_seen = false;
1512 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i)
1513 if (regno_reg_rtx[i] != NULL)
1515 machine_mode mode = GET_MODE (regno_reg_rtx[i]);
1516 if (choices[false].move_modes_to_split[(int) mode]
1517 || choices[true].move_modes_to_split[(int) mode])
1519 useful_modes_seen = true;
1520 break;
1524 if (!useful_modes_seen)
1526 if (dump_file)
1527 fprintf (dump_file, "Nothing to lower in this function.\n");
1528 return;
1532 if (df)
1534 df_set_flags (DF_DEFER_INSN_RESCAN);
1535 run_word_dce ();
1538 /* FIXME: It may be possible to change this code to look for each
1539 multi-word pseudo-register and to find each insn which sets or
1540 uses that register. That should be faster than scanning all the
1541 insns. */
1543 decomposable_context = BITMAP_ALLOC (NULL);
1544 non_decomposable_context = BITMAP_ALLOC (NULL);
1545 subreg_context = BITMAP_ALLOC (NULL);
1547 reg_copy_graph.create (max);
1548 reg_copy_graph.safe_grow_cleared (max, true);
1549 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max);
1551 speed_p = optimize_function_for_speed_p (cfun);
1552 FOR_EACH_BB_FN (bb, cfun)
1554 rtx_insn *insn;
1556 FOR_BB_INSNS (bb, insn)
1558 rtx set;
1559 enum classify_move_insn cmi;
1560 int i, n;
1562 if (!INSN_P (insn)
1563 || GET_CODE (PATTERN (insn)) == CLOBBER
1564 || GET_CODE (PATTERN (insn)) == USE)
1565 continue;
1567 recog_memoized (insn);
1569 if (find_decomposable_shift_zext (insn, speed_p))
1570 continue;
1572 extract_insn (insn);
1574 set = simple_move (insn, speed_p);
1576 if (!set)
1577 cmi = NOT_SIMPLE_MOVE;
1578 else
1580 /* We mark pseudo-to-pseudo copies as decomposable during the
1581 second pass only. The first pass is so early that there is
1582 good chance such moves will be optimized away completely by
1583 subsequent optimizations anyway.
1585 However, we call find_pseudo_copy even during the first pass
1586 so as to properly set up the reg_copy_graph. */
1587 if (find_pseudo_copy (set))
1588 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE;
1589 else
1590 cmi = SIMPLE_MOVE;
1593 n = recog_data.n_operands;
1594 for (i = 0; i < n; ++i)
1596 find_decomposable_subregs (&recog_data.operand[i], &cmi);
1598 /* We handle ASM_OPERANDS as a special case to support
1599 things like x86 rdtsc which returns a DImode value.
1600 We can decompose the output, which will certainly be
1601 operand 0, but not the inputs. */
1603 if (cmi == SIMPLE_MOVE
1604 && GET_CODE (SET_SRC (set)) == ASM_OPERANDS)
1606 gcc_assert (i == 0);
1607 cmi = NOT_SIMPLE_MOVE;
1613 bitmap_and_compl_into (decomposable_context, non_decomposable_context);
1614 if (!bitmap_empty_p (decomposable_context))
1616 unsigned int i;
1617 sbitmap_iterator sbi;
1618 bitmap_iterator iter;
1619 unsigned int regno;
1621 propagate_pseudo_copies ();
1623 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun));
1624 bitmap_clear (sub_blocks);
1626 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter)
1627 decompose_register (regno);
1629 FOR_EACH_BB_FN (bb, cfun)
1631 rtx_insn *insn;
1633 FOR_BB_INSNS (bb, insn)
1635 rtx pat;
1637 if (!INSN_P (insn))
1638 continue;
1640 pat = PATTERN (insn);
1641 if (GET_CODE (pat) == CLOBBER)
1642 resolve_clobber (pat, insn);
1643 else if (GET_CODE (pat) == USE)
1644 resolve_use (pat, insn);
1645 else if (DEBUG_INSN_P (insn))
1646 resolve_debug (insn);
1647 else
1649 rtx set;
1650 int i;
1652 recog_memoized (insn);
1653 extract_insn (insn);
1655 set = simple_move (insn, speed_p);
1656 if (set)
1658 rtx_insn *orig_insn = insn;
1659 bool cfi = control_flow_insn_p (insn);
1661 /* We can end up splitting loads to multi-word pseudos
1662 into separate loads to machine word size pseudos.
1663 When this happens, we first had one load that can
1664 throw, and after resolve_simple_move we'll have a
1665 bunch of loads (at least two). All those loads may
1666 trap if we can have non-call exceptions, so they
1667 all will end the current basic block. We split the
1668 block after the outer loop over all insns, but we
1669 make sure here that we will be able to split the
1670 basic block and still produce the correct control
1671 flow graph for it. */
1672 gcc_assert (!cfi
1673 || (cfun->can_throw_non_call_exceptions
1674 && can_throw_internal (insn)));
1676 insn = resolve_simple_move (set, insn);
1677 if (insn != orig_insn)
1679 recog_memoized (insn);
1680 extract_insn (insn);
1682 if (cfi)
1683 bitmap_set_bit (sub_blocks, bb->index);
1686 else
1688 rtx_insn *decomposed_shift;
1690 decomposed_shift = resolve_shift_zext (insn, speed_p);
1691 if (decomposed_shift != NULL_RTX)
1693 insn = decomposed_shift;
1694 recog_memoized (insn);
1695 extract_insn (insn);
1699 for (i = recog_data.n_operands - 1; i >= 0; --i)
1700 resolve_subreg_use (recog_data.operand_loc[i], insn);
1702 resolve_reg_notes (insn);
1704 if (num_validated_changes () > 0)
1706 for (i = recog_data.n_dups - 1; i >= 0; --i)
1708 rtx *pl = recog_data.dup_loc[i];
1709 int dup_num = recog_data.dup_num[i];
1710 rtx *px = recog_data.operand_loc[dup_num];
1712 validate_unshare_change (insn, pl, *px, 1);
1715 i = apply_change_group ();
1716 gcc_assert (i);
1722 /* If we had insns to split that caused control flow insns in the middle
1723 of a basic block, split those blocks now. Note that we only handle
1724 the case where splitting a load has caused multiple possibly trapping
1725 loads to appear. */
1726 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi)
1728 rtx_insn *insn, *end;
1729 edge fallthru;
1731 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1732 insn = BB_HEAD (bb);
1733 end = BB_END (bb);
1735 while (insn != end)
1737 if (control_flow_insn_p (insn))
1739 /* Split the block after insn. There will be a fallthru
1740 edge, which is OK so we keep it. We have to create the
1741 exception edges ourselves. */
1742 fallthru = split_block (bb, insn);
1743 rtl_make_eh_edge (NULL, bb, BB_END (bb));
1744 bb = fallthru->dest;
1745 insn = BB_HEAD (bb);
1747 else
1748 insn = NEXT_INSN (insn);
1753 for (bitmap b : reg_copy_graph)
1754 if (b)
1755 BITMAP_FREE (b);
1757 reg_copy_graph.release ();
1759 BITMAP_FREE (decomposable_context);
1760 BITMAP_FREE (non_decomposable_context);
1761 BITMAP_FREE (subreg_context);
1764 /* Implement first lower subreg pass. */
1766 namespace {
1768 const pass_data pass_data_lower_subreg =
1770 RTL_PASS, /* type */
1771 "subreg1", /* name */
1772 OPTGROUP_NONE, /* optinfo_flags */
1773 TV_LOWER_SUBREG, /* tv_id */
1774 0, /* properties_required */
1775 0, /* properties_provided */
1776 0, /* properties_destroyed */
1777 0, /* todo_flags_start */
1778 0, /* todo_flags_finish */
1781 class pass_lower_subreg : public rtl_opt_pass
1783 public:
1784 pass_lower_subreg (gcc::context *ctxt)
1785 : rtl_opt_pass (pass_data_lower_subreg, ctxt)
1788 /* opt_pass methods: */
1789 bool gate (function *) final override { return flag_split_wide_types != 0; }
1790 unsigned int execute (function *) final override
1792 decompose_multiword_subregs (false);
1793 return 0;
1796 }; // class pass_lower_subreg
1798 } // anon namespace
1800 rtl_opt_pass *
1801 make_pass_lower_subreg (gcc::context *ctxt)
1803 return new pass_lower_subreg (ctxt);
1806 /* Implement second lower subreg pass. */
1808 namespace {
1810 const pass_data pass_data_lower_subreg2 =
1812 RTL_PASS, /* type */
1813 "subreg2", /* name */
1814 OPTGROUP_NONE, /* optinfo_flags */
1815 TV_LOWER_SUBREG, /* tv_id */
1816 0, /* properties_required */
1817 0, /* properties_provided */
1818 0, /* properties_destroyed */
1819 0, /* todo_flags_start */
1820 TODO_df_finish, /* todo_flags_finish */
1823 class pass_lower_subreg2 : public rtl_opt_pass
1825 public:
1826 pass_lower_subreg2 (gcc::context *ctxt)
1827 : rtl_opt_pass (pass_data_lower_subreg2, ctxt)
1830 /* opt_pass methods: */
1831 bool gate (function *) final override
1833 return flag_split_wide_types && flag_split_wide_types_early;
1835 unsigned int execute (function *) final override
1837 decompose_multiword_subregs (true);
1838 return 0;
1841 }; // class pass_lower_subreg2
1843 } // anon namespace
1845 rtl_opt_pass *
1846 make_pass_lower_subreg2 (gcc::context *ctxt)
1848 return new pass_lower_subreg2 (ctxt);
1851 /* Implement third lower subreg pass. */
1853 namespace {
1855 const pass_data pass_data_lower_subreg3 =
1857 RTL_PASS, /* type */
1858 "subreg3", /* name */
1859 OPTGROUP_NONE, /* optinfo_flags */
1860 TV_LOWER_SUBREG, /* tv_id */
1861 0, /* properties_required */
1862 0, /* properties_provided */
1863 0, /* properties_destroyed */
1864 0, /* todo_flags_start */
1865 TODO_df_finish, /* todo_flags_finish */
1868 class pass_lower_subreg3 : public rtl_opt_pass
1870 public:
1871 pass_lower_subreg3 (gcc::context *ctxt)
1872 : rtl_opt_pass (pass_data_lower_subreg3, ctxt)
1875 /* opt_pass methods: */
1876 bool gate (function *) final override { return flag_split_wide_types; }
1877 unsigned int execute (function *) final override
1879 decompose_multiword_subregs (true);
1880 return 0;
1883 }; // class pass_lower_subreg3
1885 } // anon namespace
1887 rtl_opt_pass *
1888 make_pass_lower_subreg3 (gcc::context *ctxt)
1890 return new pass_lower_subreg3 (ctxt);