1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "hard-reg-set.h"
37 #include "target-def.h"
38 #include "targhooks.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
49 /* Classifies an address.
52 A simple base register plus immediate offset.
55 A base register indexed by immediate offset with writeback.
58 A base register indexed by (optionally scaled) register.
61 A base register indexed by (optionally scaled) zero-extended register.
64 A base register indexed by (optionally scaled) sign-extended register.
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
70 A constant symbolic address, in pc-relative literal pool. */
72 enum aarch64_address_type
{
82 struct aarch64_address_info
{
83 enum aarch64_address_type type
;
87 enum aarch64_symbol_type symbol_type
;
90 struct simd_immediate_info
98 /* The current code model. */
99 enum aarch64_code_model aarch64_cmodel
;
102 #undef TARGET_HAVE_TLS
103 #define TARGET_HAVE_TLS 1
106 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
107 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
109 enum machine_mode
*, int *,
111 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
112 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
113 static void aarch64_override_options_after_change (void);
114 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
115 static unsigned bit_count (unsigned HOST_WIDE_INT
);
116 static bool aarch64_const_vec_all_same_int_p (rtx
,
117 HOST_WIDE_INT
, HOST_WIDE_INT
);
119 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
120 const unsigned char *sel
);
122 /* The processor for which instructions should be scheduled. */
123 enum aarch64_processor aarch64_tune
= generic
;
125 /* The current tuning set. */
126 const struct tune_params
*aarch64_tune_params
;
128 /* Mask to specify which instructions we are allowed to generate. */
129 unsigned long aarch64_isa_flags
= 0;
131 /* Mask to specify which instruction scheduling options should be used. */
132 unsigned long aarch64_tune_flags
= 0;
134 /* Tuning parameters. */
136 #if HAVE_DESIGNATED_INITIALIZERS
137 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
139 #define NAMED_PARAM(NAME, VAL) (VAL)
142 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
145 static const struct cpu_rtx_cost_table generic_rtx_cost_table
=
147 NAMED_PARAM (memory_load
, COSTS_N_INSNS (1)),
148 NAMED_PARAM (memory_store
, COSTS_N_INSNS (0)),
149 NAMED_PARAM (register_shift
, COSTS_N_INSNS (1)),
150 NAMED_PARAM (int_divide
, COSTS_N_INSNS (6)),
151 NAMED_PARAM (float_divide
, COSTS_N_INSNS (2)),
152 NAMED_PARAM (double_divide
, COSTS_N_INSNS (6)),
153 NAMED_PARAM (int_multiply
, COSTS_N_INSNS (1)),
154 NAMED_PARAM (int_multiply_extend
, COSTS_N_INSNS (1)),
155 NAMED_PARAM (int_multiply_add
, COSTS_N_INSNS (1)),
156 NAMED_PARAM (int_multiply_extend_add
, COSTS_N_INSNS (1)),
157 NAMED_PARAM (float_multiply
, COSTS_N_INSNS (0)),
158 NAMED_PARAM (double_multiply
, COSTS_N_INSNS (1))
161 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
164 static const struct cpu_addrcost_table generic_addrcost_table
=
166 NAMED_PARAM (pre_modify
, 0),
167 NAMED_PARAM (post_modify
, 0),
168 NAMED_PARAM (register_offset
, 0),
169 NAMED_PARAM (register_extend
, 0),
170 NAMED_PARAM (imm_offset
, 0)
173 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
176 static const struct cpu_regmove_cost generic_regmove_cost
=
178 NAMED_PARAM (GP2GP
, 1),
179 NAMED_PARAM (GP2FP
, 2),
180 NAMED_PARAM (FP2GP
, 2),
181 /* We currently do not provide direct support for TFmode Q->Q move.
182 Therefore we need to raise the cost above 2 in order to have
183 reload handle the situation. */
184 NAMED_PARAM (FP2FP
, 4)
187 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
190 static const struct tune_params generic_tunings
=
192 &generic_rtx_cost_table
,
193 &generic_addrcost_table
,
194 &generic_regmove_cost
,
195 NAMED_PARAM (memmov_cost
, 4)
198 /* A processor implementing AArch64. */
201 const char *const name
;
202 enum aarch64_processor core
;
204 const unsigned long flags
;
205 const struct tune_params
*const tune
;
208 /* Processor cores implementing AArch64. */
209 static const struct processor all_cores
[] =
211 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
212 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
213 #include "aarch64-cores.def"
215 {"generic", generic
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
216 {NULL
, aarch64_none
, NULL
, 0, NULL
}
219 /* Architectures implementing AArch64. */
220 static const struct processor all_architectures
[] =
222 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
223 {NAME, CORE, #ARCH, FLAGS, NULL},
224 #include "aarch64-arches.def"
226 {"generic", generic
, "8", AARCH64_FL_FOR_ARCH8
, NULL
},
227 {NULL
, aarch64_none
, NULL
, 0, NULL
}
230 /* Target specification. These are populated as commandline arguments
231 are processed, or NULL if not specified. */
232 static const struct processor
*selected_arch
;
233 static const struct processor
*selected_cpu
;
234 static const struct processor
*selected_tune
;
236 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
238 /* An ISA extension in the co-processor and main instruction set space. */
239 struct aarch64_option_extension
241 const char *const name
;
242 const unsigned long flags_on
;
243 const unsigned long flags_off
;
246 /* ISA extensions in AArch64. */
247 static const struct aarch64_option_extension all_extensions
[] =
249 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
250 {NAME, FLAGS_ON, FLAGS_OFF},
251 #include "aarch64-option-extensions.def"
252 #undef AARCH64_OPT_EXTENSION
256 /* Used to track the size of an address when generating a pre/post
257 increment address. */
258 static enum machine_mode aarch64_memory_reference_mode
;
260 /* Used to force GTY into this file. */
261 static GTY(()) int gty_dummy
;
263 /* A table of valid AArch64 "bitmask immediate" values for
264 logical instructions. */
266 #define AARCH64_NUM_BITMASKS 5334
267 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
269 /* Did we set flag_omit_frame_pointer just so
270 aarch64_frame_pointer_required would be called? */
271 static bool faked_omit_frame_pointer
;
273 typedef enum aarch64_cond_code
275 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
276 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
277 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
281 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
283 /* The condition codes of the processor, and the inverse function. */
284 static const char * const aarch64_condition_codes
[] =
286 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
287 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
290 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
292 aarch64_dbx_register_number (unsigned regno
)
294 if (GP_REGNUM_P (regno
))
295 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
296 else if (regno
== SP_REGNUM
)
297 return AARCH64_DWARF_SP
;
298 else if (FP_REGNUM_P (regno
))
299 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
301 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
302 equivalent DWARF register. */
303 return DWARF_FRAME_REGISTERS
;
306 /* Return TRUE if MODE is any of the large INT modes. */
308 aarch64_vect_struct_mode_p (enum machine_mode mode
)
310 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
313 /* Return TRUE if MODE is any of the vector modes. */
315 aarch64_vector_mode_p (enum machine_mode mode
)
317 return aarch64_vector_mode_supported_p (mode
)
318 || aarch64_vect_struct_mode_p (mode
);
321 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
323 aarch64_array_mode_supported_p (enum machine_mode mode
,
324 unsigned HOST_WIDE_INT nelems
)
327 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
328 && (nelems
>= 2 && nelems
<= 4))
334 /* Implement HARD_REGNO_NREGS. */
337 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
339 switch (aarch64_regno_regclass (regno
))
343 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
345 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
350 /* Implement HARD_REGNO_MODE_OK. */
353 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
355 if (GET_MODE_CLASS (mode
) == MODE_CC
)
356 return regno
== CC_REGNUM
;
358 if (regno
== SP_REGNUM
|| regno
== FRAME_POINTER_REGNUM
359 || regno
== ARG_POINTER_REGNUM
)
360 return mode
== Pmode
;
362 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
365 if (FP_REGNUM_P (regno
))
367 if (aarch64_vect_struct_mode_p (mode
))
369 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
377 /* Return true if calls to DECL should be treated as
378 long-calls (ie called via a register). */
380 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
385 /* Return true if calls to symbol-ref SYM should be treated as
386 long-calls (ie called via a register). */
388 aarch64_is_long_call_p (rtx sym
)
390 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
393 /* Return true if the offsets to a zero/sign-extract operation
394 represent an expression that matches an extend operation. The
395 operands represent the paramters from
397 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
399 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
402 HOST_WIDE_INT mult_val
, extract_val
;
404 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
407 mult_val
= INTVAL (mult_imm
);
408 extract_val
= INTVAL (extract_imm
);
411 && extract_val
< GET_MODE_BITSIZE (mode
)
412 && exact_log2 (extract_val
& ~7) > 0
413 && (extract_val
& 7) <= 4
414 && mult_val
== (1 << (extract_val
& 7)))
420 /* Emit an insn that's a simple single-set. Both the operands must be
421 known to be valid. */
423 emit_set_insn (rtx x
, rtx y
)
425 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
428 /* X and Y are two things to compare using CODE. Emit the compare insn and
429 return the rtx for register 0 in the proper mode. */
431 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
433 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
434 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
436 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
440 /* Build the SYMBOL_REF for __tls_get_addr. */
442 static GTY(()) rtx tls_get_addr_libfunc
;
445 aarch64_tls_get_addr (void)
447 if (!tls_get_addr_libfunc
)
448 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
449 return tls_get_addr_libfunc
;
452 /* Return the TLS model to use for ADDR. */
454 static enum tls_model
455 tls_symbolic_operand_type (rtx addr
)
457 enum tls_model tls_kind
= TLS_MODEL_NONE
;
460 if (GET_CODE (addr
) == CONST
)
462 split_const (addr
, &sym
, &addend
);
463 if (GET_CODE (sym
) == SYMBOL_REF
)
464 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
466 else if (GET_CODE (addr
) == SYMBOL_REF
)
467 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
472 /* We'll allow lo_sum's in addresses in our legitimate addresses
473 so that combine would take care of combining addresses where
474 necessary, but for generation purposes, we'll generate the address
477 tmp = hi (symbol_ref); adrp x1, foo
478 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
482 adrp x1, :got:foo adrp tmp, :tlsgd:foo
483 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
487 Load TLS symbol, depending on TLS mechanism and TLS access model.
489 Global Dynamic - Traditional TLS:
491 add dest, tmp, #:tlsgd_lo12:imm
494 Global Dynamic - TLS Descriptors:
495 adrp dest, :tlsdesc:imm
496 ldr tmp, [dest, #:tlsdesc_lo12:imm]
497 add dest, dest, #:tlsdesc_lo12:imm
504 adrp tmp, :gottprel:imm
505 ldr dest, [tmp, #:gottprel_lo12:imm]
510 add t0, tp, #:tprel_hi12:imm
511 add t0, #:tprel_lo12_nc:imm
515 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
516 enum aarch64_symbol_type type
)
520 case SYMBOL_SMALL_ABSOLUTE
:
523 if (can_create_pseudo_p ())
525 tmp_reg
= gen_reg_rtx (Pmode
);
528 emit_move_insn (tmp_reg
, gen_rtx_HIGH (Pmode
, imm
));
529 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
533 case SYMBOL_TINY_ABSOLUTE
:
534 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
537 case SYMBOL_SMALL_GOT
:
540 if (can_create_pseudo_p ())
541 tmp_reg
= gen_reg_rtx (Pmode
);
542 emit_move_insn (tmp_reg
, gen_rtx_HIGH (Pmode
, imm
));
543 emit_insn (gen_ldr_got_small (dest
, tmp_reg
, imm
));
547 case SYMBOL_SMALL_TLSGD
:
550 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
553 emit_call_insn (gen_tlsgd_small (result
, imm
));
554 insns
= get_insns ();
557 RTL_CONST_CALL_P (insns
) = 1;
558 emit_libcall_block (insns
, dest
, result
, imm
);
562 case SYMBOL_SMALL_TLSDESC
:
564 rtx x0
= gen_rtx_REG (Pmode
, R0_REGNUM
);
567 emit_insn (gen_tlsdesc_small (imm
));
568 tp
= aarch64_load_tp (NULL
);
569 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, x0
)));
570 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
574 case SYMBOL_SMALL_GOTTPREL
:
576 rtx tmp_reg
= gen_reg_rtx (Pmode
);
577 rtx tp
= aarch64_load_tp (NULL
);
578 emit_insn (gen_tlsie_small (tmp_reg
, imm
));
579 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, tmp_reg
)));
580 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
584 case SYMBOL_SMALL_TPREL
:
586 rtx tp
= aarch64_load_tp (NULL
);
587 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
588 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
597 /* Emit a move from SRC to DEST. Assume that the move expanders can
598 handle all moves if !can_create_pseudo_p (). The distinction is
599 important because, unlike emit_move_insn, the move expanders know
600 how to force Pmode objects into the constant pool even when the
601 constant pool address is not itself legitimate. */
603 aarch64_emit_move (rtx dest
, rtx src
)
605 return (can_create_pseudo_p ()
606 ? emit_move_insn (dest
, src
)
607 : emit_move_insn_1 (dest
, src
));
611 aarch64_split_128bit_move (rtx dst
, rtx src
)
615 enum machine_mode src_mode
= GET_MODE (src
);
616 enum machine_mode dst_mode
= GET_MODE (dst
);
617 int src_regno
= REGNO (src
);
618 int dst_regno
= REGNO (dst
);
620 gcc_assert (dst_mode
== TImode
|| dst_mode
== TFmode
);
622 if (REG_P (dst
) && REG_P (src
))
624 gcc_assert (src_mode
== TImode
|| src_mode
== TFmode
);
626 /* Handle r -> w, w -> r. */
627 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
632 (gen_aarch64_movtilow_di (dst
, gen_lowpart (word_mode
, src
)));
634 (gen_aarch64_movtihigh_di (dst
, gen_highpart (word_mode
, src
)));
638 (gen_aarch64_movtflow_di (dst
, gen_lowpart (word_mode
, src
)));
640 (gen_aarch64_movtfhigh_di (dst
, gen_highpart (word_mode
, src
)));
646 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
651 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode
, dst
), src
));
653 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode
, dst
), src
));
657 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode
, dst
), src
));
659 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode
, dst
), src
));
665 /* Fall through to r -> r cases. */
670 low_dst
= gen_lowpart (word_mode
, dst
);
672 && reg_overlap_mentioned_p (low_dst
, src
))
674 aarch64_emit_move (gen_highpart (word_mode
, dst
),
675 gen_highpart_mode (word_mode
, TImode
, src
));
676 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
680 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
681 aarch64_emit_move (gen_highpart (word_mode
, dst
),
682 gen_highpart_mode (word_mode
, TImode
, src
));
686 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
),
687 gen_rtx_REG (DFmode
, src_regno
));
688 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
+ 1),
689 gen_rtx_REG (DFmode
, src_regno
+ 1));
697 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
699 return (! REG_P (src
)
700 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
703 /* Split a complex SIMD combine. */
706 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
708 enum machine_mode src_mode
= GET_MODE (src1
);
709 enum machine_mode dst_mode
= GET_MODE (dst
);
711 gcc_assert (VECTOR_MODE_P (dst_mode
));
713 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
715 rtx (*gen
) (rtx
, rtx
, rtx
);
720 gen
= gen_aarch64_simd_combinev8qi
;
723 gen
= gen_aarch64_simd_combinev4hi
;
726 gen
= gen_aarch64_simd_combinev2si
;
729 gen
= gen_aarch64_simd_combinev2sf
;
732 gen
= gen_aarch64_simd_combinedi
;
735 gen
= gen_aarch64_simd_combinedf
;
741 emit_insn (gen (dst
, src1
, src2
));
746 /* Split a complex SIMD move. */
749 aarch64_split_simd_move (rtx dst
, rtx src
)
751 enum machine_mode src_mode
= GET_MODE (src
);
752 enum machine_mode dst_mode
= GET_MODE (dst
);
754 gcc_assert (VECTOR_MODE_P (dst_mode
));
756 if (REG_P (dst
) && REG_P (src
))
758 rtx (*gen
) (rtx
, rtx
);
760 gcc_assert (VECTOR_MODE_P (src_mode
));
765 gen
= gen_aarch64_split_simd_movv16qi
;
768 gen
= gen_aarch64_split_simd_movv8hi
;
771 gen
= gen_aarch64_split_simd_movv4si
;
774 gen
= gen_aarch64_split_simd_movv2di
;
777 gen
= gen_aarch64_split_simd_movv4sf
;
780 gen
= gen_aarch64_split_simd_movv2df
;
786 emit_insn (gen (dst
, src
));
792 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
794 if (can_create_pseudo_p ())
795 return force_reg (mode
, value
);
798 x
= aarch64_emit_move (x
, value
);
805 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
807 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
810 /* Load the full offset into a register. This
811 might be improvable in the future. */
812 high
= GEN_INT (offset
);
814 high
= aarch64_force_temporary (mode
, temp
, high
);
815 reg
= aarch64_force_temporary (mode
, temp
,
816 gen_rtx_PLUS (mode
, high
, reg
));
818 return plus_constant (mode
, reg
, offset
);
822 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
824 enum machine_mode mode
= GET_MODE (dest
);
825 unsigned HOST_WIDE_INT mask
;
828 unsigned HOST_WIDE_INT val
;
831 int one_match
, zero_match
;
833 gcc_assert (mode
== SImode
|| mode
== DImode
);
835 /* Check on what type of symbol it is. */
836 if (GET_CODE (imm
) == SYMBOL_REF
837 || GET_CODE (imm
) == LABEL_REF
838 || GET_CODE (imm
) == CONST
)
840 rtx mem
, base
, offset
;
841 enum aarch64_symbol_type sty
;
843 /* If we have (const (plus symbol offset)), separate out the offset
844 before we start classifying the symbol. */
845 split_const (imm
, &base
, &offset
);
847 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
850 case SYMBOL_FORCE_TO_MEM
:
851 if (offset
!= const0_rtx
852 && targetm
.cannot_force_const_mem (mode
, imm
))
854 gcc_assert(can_create_pseudo_p ());
855 base
= aarch64_force_temporary (mode
, dest
, base
);
856 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
857 aarch64_emit_move (dest
, base
);
860 mem
= force_const_mem (mode
, imm
);
862 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
865 case SYMBOL_SMALL_TLSGD
:
866 case SYMBOL_SMALL_TLSDESC
:
867 case SYMBOL_SMALL_GOTTPREL
:
868 case SYMBOL_SMALL_GOT
:
869 if (offset
!= const0_rtx
)
871 gcc_assert(can_create_pseudo_p ());
872 base
= aarch64_force_temporary (mode
, dest
, base
);
873 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
874 aarch64_emit_move (dest
, base
);
879 case SYMBOL_SMALL_TPREL
:
880 case SYMBOL_SMALL_ABSOLUTE
:
881 case SYMBOL_TINY_ABSOLUTE
:
882 aarch64_load_symref_appropriately (dest
, imm
, sty
);
890 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
892 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
896 if (!CONST_INT_P (imm
))
898 if (GET_CODE (imm
) == HIGH
)
899 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
902 rtx mem
= force_const_mem (mode
, imm
);
904 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
912 /* We know we can't do this in 1 insn, and we must be able to do it
913 in two; so don't mess around looking for sequences that don't buy
915 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
916 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
917 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
921 /* Remaining cases are all for DImode. */
924 subtargets
= optimize
&& can_create_pseudo_p ();
930 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
932 if ((val
& mask
) == 0)
934 else if ((val
& mask
) == mask
)
941 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
943 if ((val
& mask
) != mask
)
945 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
946 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
947 GEN_INT ((val
>> i
) & 0xffff)));
955 goto simple_sequence
;
957 mask
= 0x0ffff0000UL
;
958 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
960 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
962 if (aarch64_uimm12_shift (val
- (val
& mask
)))
964 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
966 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
967 emit_insn (gen_adddi3 (dest
, subtarget
,
968 GEN_INT (val
- (val
& mask
))));
971 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
973 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
975 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
976 GEN_INT ((val
+ comp
) & mask
)));
977 emit_insn (gen_adddi3 (dest
, subtarget
,
978 GEN_INT (val
- ((val
+ comp
) & mask
))));
981 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
983 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
985 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
986 GEN_INT ((val
- comp
) | ~mask
)));
987 emit_insn (gen_adddi3 (dest
, subtarget
,
988 GEN_INT (val
- ((val
- comp
) | ~mask
))));
991 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
993 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
995 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
996 GEN_INT (val
| ~mask
)));
997 emit_insn (gen_adddi3 (dest
, subtarget
,
998 GEN_INT (val
- (val
| ~mask
))));
1003 /* See if we can do it by arithmetically combining two
1005 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1010 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1011 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1013 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1014 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1015 GEN_INT (aarch64_bitmasks
[i
])));
1016 emit_insn (gen_adddi3 (dest
, subtarget
,
1017 GEN_INT (val
- aarch64_bitmasks
[i
])));
1021 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1023 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1025 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1026 GEN_INT (aarch64_bitmasks
[i
])));
1027 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1028 GEN_INT ((val
>> j
) & 0xffff)));
1034 /* See if we can do it by logically combining two immediates. */
1035 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1037 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1041 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1042 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1044 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1045 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1046 GEN_INT (aarch64_bitmasks
[i
])));
1047 emit_insn (gen_iordi3 (dest
, subtarget
,
1048 GEN_INT (aarch64_bitmasks
[j
])));
1052 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1056 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1057 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1060 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1061 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1062 GEN_INT (aarch64_bitmasks
[j
])));
1063 emit_insn (gen_anddi3 (dest
, subtarget
,
1064 GEN_INT (aarch64_bitmasks
[i
])));
1073 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1075 if ((val
& mask
) != 0)
1079 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1080 GEN_INT (val
& mask
)));
1084 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1085 GEN_INT ((val
>> i
) & 0xffff)));
1091 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1093 /* Indirect calls are not currently supported. */
1097 /* Cannot tail-call to long-calls, since these are outside of the
1098 range of a branch instruction (we could handle this if we added
1099 support for indirect tail-calls. */
1100 if (aarch64_decl_is_long_call_p (decl
))
1106 /* Implement TARGET_PASS_BY_REFERENCE. */
1109 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1110 enum machine_mode mode
,
1112 bool named ATTRIBUTE_UNUSED
)
1115 enum machine_mode dummymode
;
1118 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1119 size
= (mode
== BLKmode
&& type
)
1120 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1124 /* Arrays always passed by reference. */
1125 if (TREE_CODE (type
) == ARRAY_TYPE
)
1127 /* Other aggregates based on their size. */
1128 if (AGGREGATE_TYPE_P (type
))
1129 size
= int_size_in_bytes (type
);
1132 /* Variable sized arguments are always returned by reference. */
1136 /* Can this be a candidate to be passed in fp/simd register(s)? */
1137 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1142 /* Arguments which are variable sized or larger than 2 registers are
1143 passed by reference unless they are a homogenous floating point
1145 return size
> 2 * UNITS_PER_WORD
;
1148 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1150 aarch64_return_in_msb (const_tree valtype
)
1152 enum machine_mode dummy_mode
;
1155 /* Never happens in little-endian mode. */
1156 if (!BYTES_BIG_ENDIAN
)
1159 /* Only composite types smaller than or equal to 16 bytes can
1160 be potentially returned in registers. */
1161 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1162 || int_size_in_bytes (valtype
) <= 0
1163 || int_size_in_bytes (valtype
) > 16)
1166 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1167 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1168 is always passed/returned in the least significant bits of fp/simd
1170 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1171 &dummy_mode
, &dummy_int
, NULL
))
1177 /* Implement TARGET_FUNCTION_VALUE.
1178 Define how to find the value returned by a function. */
1181 aarch64_function_value (const_tree type
, const_tree func
,
1182 bool outgoing ATTRIBUTE_UNUSED
)
1184 enum machine_mode mode
;
1187 enum machine_mode ag_mode
;
1189 mode
= TYPE_MODE (type
);
1190 if (INTEGRAL_TYPE_P (type
))
1191 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1193 if (aarch64_return_in_msb (type
))
1195 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1197 if (size
% UNITS_PER_WORD
!= 0)
1199 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1200 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1204 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1205 &ag_mode
, &count
, NULL
))
1207 if (!aarch64_composite_type_p (type
, mode
))
1209 gcc_assert (count
== 1 && mode
== ag_mode
);
1210 return gen_rtx_REG (mode
, V0_REGNUM
);
1217 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1218 for (i
= 0; i
< count
; i
++)
1220 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1221 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1222 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1223 XVECEXP (par
, 0, i
) = tmp
;
1229 return gen_rtx_REG (mode
, R0_REGNUM
);
1232 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1233 Return true if REGNO is the number of a hard register in which the values
1234 of called function may come back. */
1237 aarch64_function_value_regno_p (const unsigned int regno
)
1239 /* Maximum of 16 bytes can be returned in the general registers. Examples
1240 of 16-byte return values are: 128-bit integers and 16-byte small
1241 structures (excluding homogeneous floating-point aggregates). */
1242 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1245 /* Up to four fp/simd registers can return a function value, e.g. a
1246 homogeneous floating-point aggregate having four members. */
1247 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1248 return !TARGET_GENERAL_REGS_ONLY
;
1253 /* Implement TARGET_RETURN_IN_MEMORY.
1255 If the type T of the result of a function is such that
1257 would require that arg be passed as a value in a register (or set of
1258 registers) according to the parameter passing rules, then the result
1259 is returned in the same registers as would be used for such an
1263 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1266 enum machine_mode ag_mode
;
1269 if (!AGGREGATE_TYPE_P (type
)
1270 && TREE_CODE (type
) != COMPLEX_TYPE
1271 && TREE_CODE (type
) != VECTOR_TYPE
)
1272 /* Simple scalar types always returned in registers. */
1275 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1282 /* Types larger than 2 registers returned in memory. */
1283 size
= int_size_in_bytes (type
);
1284 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1288 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1289 const_tree type
, int *nregs
)
1291 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1292 return aarch64_vfp_is_call_or_return_candidate (mode
,
1294 &pcum
->aapcs_vfp_rmode
,
1299 /* Given MODE and TYPE of a function argument, return the alignment in
1300 bits. The idea is to suppress any stronger alignment requested by
1301 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1302 This is a helper function for local use only. */
1305 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1307 unsigned int alignment
;
1311 if (!integer_zerop (TYPE_SIZE (type
)))
1313 if (TYPE_MODE (type
) == mode
)
1314 alignment
= TYPE_ALIGN (type
);
1316 alignment
= GET_MODE_ALIGNMENT (mode
);
1322 alignment
= GET_MODE_ALIGNMENT (mode
);
1327 /* Layout a function argument according to the AAPCS64 rules. The rule
1328 numbers refer to the rule numbers in the AAPCS64. */
1331 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1333 bool named ATTRIBUTE_UNUSED
)
1335 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1336 int ncrn
, nvrn
, nregs
;
1337 bool allocate_ncrn
, allocate_nvrn
;
1339 /* We need to do this once per argument. */
1340 if (pcum
->aapcs_arg_processed
)
1343 pcum
->aapcs_arg_processed
= true;
1345 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1346 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1351 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1352 The following code thus handles passing by SIMD/FP registers first. */
1354 nvrn
= pcum
->aapcs_nvrn
;
1356 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1357 and homogenous short-vector aggregates (HVA). */
1360 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1362 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1363 if (!aarch64_composite_type_p (type
, mode
))
1365 gcc_assert (nregs
== 1);
1366 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1372 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1373 for (i
= 0; i
< nregs
; i
++)
1375 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1376 V0_REGNUM
+ nvrn
+ i
);
1377 tmp
= gen_rtx_EXPR_LIST
1379 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1380 XVECEXP (par
, 0, i
) = tmp
;
1382 pcum
->aapcs_reg
= par
;
1388 /* C.3 NSRN is set to 8. */
1389 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1394 ncrn
= pcum
->aapcs_ncrn
;
1395 nregs
= ((type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
))
1396 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1399 /* C6 - C9. though the sign and zero extension semantics are
1400 handled elsewhere. This is the case where the argument fits
1401 entirely general registers. */
1402 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1404 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1406 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1408 /* C.8 if the argument has an alignment of 16 then the NGRN is
1409 rounded up to the next even number. */
1410 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1413 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1415 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1416 A reg is still generated for it, but the caller should be smart
1417 enough not to use it. */
1418 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1420 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1427 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1428 for (i
= 0; i
< nregs
; i
++)
1430 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1431 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1432 GEN_INT (i
* UNITS_PER_WORD
));
1433 XVECEXP (par
, 0, i
) = tmp
;
1435 pcum
->aapcs_reg
= par
;
1438 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1443 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1445 /* The argument is passed on stack; record the needed number of words for
1446 this argument (we can re-use NREGS) and align the total size if
1449 pcum
->aapcs_stack_words
= nregs
;
1450 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1451 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1452 16 / UNITS_PER_WORD
) + 1;
1456 /* Implement TARGET_FUNCTION_ARG. */
1459 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1460 const_tree type
, bool named
)
1462 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1463 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1465 if (mode
== VOIDmode
)
1468 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1469 return pcum
->aapcs_reg
;
1473 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1474 const_tree fntype ATTRIBUTE_UNUSED
,
1475 rtx libname ATTRIBUTE_UNUSED
,
1476 const_tree fndecl ATTRIBUTE_UNUSED
,
1477 unsigned n_named ATTRIBUTE_UNUSED
)
1479 pcum
->aapcs_ncrn
= 0;
1480 pcum
->aapcs_nvrn
= 0;
1481 pcum
->aapcs_nextncrn
= 0;
1482 pcum
->aapcs_nextnvrn
= 0;
1483 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1484 pcum
->aapcs_reg
= NULL_RTX
;
1485 pcum
->aapcs_arg_processed
= false;
1486 pcum
->aapcs_stack_words
= 0;
1487 pcum
->aapcs_stack_size
= 0;
1493 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1494 enum machine_mode mode
,
1498 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1499 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1501 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1502 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1503 != (pcum
->aapcs_stack_words
!= 0));
1504 pcum
->aapcs_arg_processed
= false;
1505 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1506 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1507 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1508 pcum
->aapcs_stack_words
= 0;
1509 pcum
->aapcs_reg
= NULL_RTX
;
1514 aarch64_function_arg_regno_p (unsigned regno
)
1516 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1517 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1520 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1521 PARM_BOUNDARY bits of alignment, but will be given anything up
1522 to STACK_BOUNDARY bits if the type requires it. This makes sure
1523 that both before and after the layout of each argument, the Next
1524 Stacked Argument Address (NSAA) will have a minimum alignment of
1528 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1530 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1532 if (alignment
< PARM_BOUNDARY
)
1533 alignment
= PARM_BOUNDARY
;
1534 if (alignment
> STACK_BOUNDARY
)
1535 alignment
= STACK_BOUNDARY
;
1539 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1541 Return true if an argument passed on the stack should be padded upwards,
1542 i.e. if the least-significant byte of the stack slot has useful data.
1544 Small aggregate types are placed in the lowest memory address.
1546 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1549 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1551 /* On little-endian targets, the least significant byte of every stack
1552 argument is passed at the lowest byte address of the stack slot. */
1553 if (!BYTES_BIG_ENDIAN
)
1556 /* Otherwise, integral types and floating point types are padded downward:
1557 the least significant byte of a stack argument is passed at the highest
1558 byte address of the stack slot. */
1560 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
))
1561 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1564 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1568 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1570 It specifies padding for the last (may also be the only)
1571 element of a block move between registers and memory. If
1572 assuming the block is in the memory, padding upward means that
1573 the last element is padded after its highest significant byte,
1574 while in downward padding, the last element is padded at the
1575 its least significant byte side.
1577 Small aggregates and small complex types are always padded
1580 We don't need to worry about homogeneous floating-point or
1581 short-vector aggregates; their move is not affected by the
1582 padding direction determined here. Regardless of endianness,
1583 each element of such an aggregate is put in the least
1584 significant bits of a fp/simd register.
1586 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1587 register has useful data, and return the opposite if the most
1588 significant byte does. */
1591 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1592 bool first ATTRIBUTE_UNUSED
)
1595 /* Small composite types are always padded upward. */
1596 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1598 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1599 : GET_MODE_SIZE (mode
));
1600 if (size
< 2 * UNITS_PER_WORD
)
1604 /* Otherwise, use the default padding. */
1605 return !BYTES_BIG_ENDIAN
;
1608 static enum machine_mode
1609 aarch64_libgcc_cmp_return_mode (void)
1615 aarch64_frame_pointer_required (void)
1617 /* If the function contains dynamic stack allocations, we need to
1618 use the frame pointer to access the static parts of the frame. */
1619 if (cfun
->calls_alloca
)
1622 /* We may have turned flag_omit_frame_pointer on in order to have this
1623 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1624 and we'll check it here.
1625 If we really did set flag_omit_frame_pointer normally, then we return false
1626 (no frame pointer required) in all cases. */
1628 if (flag_omit_frame_pointer
&& !faked_omit_frame_pointer
)
1630 else if (flag_omit_leaf_frame_pointer
)
1631 return !crtl
->is_leaf
;
1635 /* Mark the registers that need to be saved by the callee and calculate
1636 the size of the callee-saved registers area and frame record (both FP
1637 and LR may be omitted). */
1639 aarch64_layout_frame (void)
1641 HOST_WIDE_INT offset
= 0;
1644 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1647 cfun
->machine
->frame
.fp_lr_offset
= 0;
1649 /* First mark all the registers that really need to be saved... */
1650 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1651 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1653 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1654 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1656 /* ... that includes the eh data registers (if needed)... */
1657 if (crtl
->calls_eh_return
)
1658 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1659 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1661 /* ... and any callee saved register that dataflow says is live. */
1662 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1663 if (df_regs_ever_live_p (regno
)
1664 && !call_used_regs
[regno
])
1665 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1667 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1668 if (df_regs_ever_live_p (regno
)
1669 && !call_used_regs
[regno
])
1670 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1672 if (frame_pointer_needed
)
1674 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1675 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1676 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1679 /* Now assign stack slots for them. */
1680 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1681 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1683 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1684 offset
+= UNITS_PER_WORD
;
1687 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1688 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1690 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1691 offset
+= UNITS_PER_WORD
;
1694 if (frame_pointer_needed
)
1696 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1697 offset
+= UNITS_PER_WORD
;
1698 cfun
->machine
->frame
.fp_lr_offset
= UNITS_PER_WORD
;
1701 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1703 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1704 offset
+= UNITS_PER_WORD
;
1705 cfun
->machine
->frame
.fp_lr_offset
+= UNITS_PER_WORD
;
1708 cfun
->machine
->frame
.padding0
=
1709 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1710 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1712 cfun
->machine
->frame
.saved_regs_size
= offset
;
1713 cfun
->machine
->frame
.laid_out
= true;
1716 /* Make the last instruction frame-related and note that it performs
1717 the operation described by FRAME_PATTERN. */
1720 aarch64_set_frame_expr (rtx frame_pattern
)
1724 insn
= get_last_insn ();
1725 RTX_FRAME_RELATED_P (insn
) = 1;
1726 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1727 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1733 aarch64_register_saved_on_entry (int regno
)
1735 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1740 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1741 bool restore
, rtx base_rtx
)
1747 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1750 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1752 if (aarch64_register_saved_on_entry (regno
))
1755 mem
= gen_mem_ref (DFmode
,
1756 plus_constant (Pmode
,
1760 for (regno2
= regno
+ 1;
1761 regno2
<= V31_REGNUM
1762 && !aarch64_register_saved_on_entry (regno2
);
1767 if (regno2
<= V31_REGNUM
&&
1768 aarch64_register_saved_on_entry (regno2
))
1771 /* Next highest register to be saved. */
1772 mem2
= gen_mem_ref (DFmode
,
1776 start_offset
+ increment
));
1777 if (restore
== false)
1780 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1781 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1787 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1788 gen_rtx_REG (DFmode
, regno2
), mem2
));
1790 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DFmode
, regno
));
1791 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DFmode
, regno2
));
1794 /* The first part of a frame-related parallel insn
1795 is always assumed to be relevant to the frame
1796 calculations; subsequent parts, are only
1797 frame-related if explicitly marked. */
1798 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1801 start_offset
+= increment
* 2;
1805 if (restore
== false)
1806 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1809 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1810 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1812 start_offset
+= increment
;
1814 RTX_FRAME_RELATED_P (insn
) = 1;
1821 /* offset from the stack pointer of where the saves and
1822 restore's have to happen. */
1824 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
1828 rtx base_rtx
= stack_pointer_rtx
;
1829 HOST_WIDE_INT start_offset
= offset
;
1830 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
1831 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1832 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
1836 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
1838 if (aarch64_register_saved_on_entry (regno
))
1841 mem
= gen_mem_ref (Pmode
,
1842 plus_constant (Pmode
,
1846 for (regno2
= regno
+ 1;
1848 && !aarch64_register_saved_on_entry (regno2
);
1853 if (regno2
<= limit
&&
1854 aarch64_register_saved_on_entry (regno2
))
1857 /* Next highest register to be saved. */
1858 mem2
= gen_mem_ref (Pmode
,
1862 start_offset
+ increment
));
1863 if (restore
== false)
1866 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
1867 mem2
, gen_rtx_REG (DImode
, regno2
)));
1873 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
1874 gen_rtx_REG (DImode
, regno2
), mem2
));
1876 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1877 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
1880 /* The first part of a frame-related parallel insn
1881 is always assumed to be relevant to the frame
1882 calculations; subsequent parts, are only
1883 frame-related if explicitly marked. */
1884 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1887 start_offset
+= increment
* 2;
1891 if (restore
== false)
1892 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
1895 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
1896 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1898 start_offset
+= increment
;
1900 RTX_FRAME_RELATED_P (insn
) = 1;
1904 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
1908 /* AArch64 stack frames generated by this compiler look like:
1910 +-------------------------------+
1912 | incoming stack arguments |
1914 +-------------------------------+ <-- arg_pointer_rtx
1916 | callee-allocated save area |
1917 | for register varargs |
1919 +-------------------------------+
1923 +-------------------------------+ <-- frame_pointer_rtx
1925 | callee-saved registers |
1927 +-------------------------------+
1929 +-------------------------------+
1931 P +-------------------------------+ <-- hard_frame_pointer_rtx
1932 | dynamic allocation |
1933 +-------------------------------+
1935 | outgoing stack arguments |
1937 +-------------------------------+ <-- stack_pointer_rtx
1939 Dynamic stack allocations such as alloca insert data at point P.
1940 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1941 hard_frame_pointer_rtx unchanged. */
1943 /* Generate the prologue instructions for entry into a function.
1944 Establish the stack frame by decreasing the stack pointer with a
1945 properly calculated size and, if necessary, create a frame record
1946 filled with the values of LR and previous frame pointer. The
1947 current FP is also set up is it is in use. */
1950 aarch64_expand_prologue (void)
1952 /* sub sp, sp, #<frame_size>
1953 stp {fp, lr}, [sp, #<frame_size> - 16]
1954 add fp, sp, #<frame_size> - hardfp_offset
1955 stp {cs_reg}, [fp, #-16] etc.
1957 sub sp, sp, <final_adjustment_if_any>
1959 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
1960 HOST_WIDE_INT frame_size
, offset
;
1961 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
1964 aarch64_layout_frame ();
1965 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
1966 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
1967 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
1968 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
1969 + crtl
->outgoing_args_size
);
1970 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
1971 STACK_BOUNDARY
/ BITS_PER_UNIT
);
1973 if (flag_stack_usage_info
)
1974 current_function_static_stack_size
= frame_size
;
1977 - original_frame_size
1978 - cfun
->machine
->frame
.saved_regs_size
);
1980 /* Store pairs and load pairs have a range only -512 to 504. */
1983 /* When the frame has a large size, an initial decrease is done on
1984 the stack pointer to jump over the callee-allocated save area for
1985 register varargs, the local variable area and/or the callee-saved
1986 register area. This will allow the pre-index write-back
1987 store pair instructions to be used for setting up the stack frame
1989 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
1991 offset
= cfun
->machine
->frame
.saved_regs_size
;
1993 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
1996 if (frame_size
>= 0x1000000)
1998 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
1999 emit_move_insn (op0
, GEN_INT (-frame_size
));
2000 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2001 aarch64_set_frame_expr (gen_rtx_SET
2002 (Pmode
, stack_pointer_rtx
,
2003 gen_rtx_PLUS (Pmode
,
2005 GEN_INT (-frame_size
))));
2007 else if (frame_size
> 0)
2009 if ((frame_size
& 0xfff) != frame_size
)
2011 insn
= emit_insn (gen_add2_insn
2013 GEN_INT (-(frame_size
2014 & ~(HOST_WIDE_INT
)0xfff))));
2015 RTX_FRAME_RELATED_P (insn
) = 1;
2017 if ((frame_size
& 0xfff) != 0)
2019 insn
= emit_insn (gen_add2_insn
2021 GEN_INT (-(frame_size
2022 & (HOST_WIDE_INT
)0xfff))));
2023 RTX_FRAME_RELATED_P (insn
) = 1;
2032 /* Save the frame pointer and lr if the frame pointer is needed
2033 first. Make the frame pointer point to the location of the
2034 old frame pointer on the stack. */
2035 if (frame_pointer_needed
)
2041 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2042 GEN_INT (-offset
)));
2043 RTX_FRAME_RELATED_P (insn
) = 1;
2044 aarch64_set_frame_expr (gen_rtx_SET
2045 (Pmode
, stack_pointer_rtx
,
2046 gen_rtx_MINUS (Pmode
,
2048 GEN_INT (offset
))));
2049 mem_fp
= gen_frame_mem (DImode
,
2050 plus_constant (Pmode
,
2053 mem_lr
= gen_frame_mem (DImode
,
2054 plus_constant (Pmode
,
2058 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2059 hard_frame_pointer_rtx
,
2061 gen_rtx_REG (DImode
,
2066 insn
= emit_insn (gen_storewb_pairdi_di
2067 (stack_pointer_rtx
, stack_pointer_rtx
,
2068 hard_frame_pointer_rtx
,
2069 gen_rtx_REG (DImode
, LR_REGNUM
),
2071 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2072 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2075 /* The first part of a frame-related parallel insn is always
2076 assumed to be relevant to the frame calculations;
2077 subsequent parts, are only frame-related if explicitly
2079 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2080 RTX_FRAME_RELATED_P (insn
) = 1;
2082 /* Set up frame pointer to point to the location of the
2083 previous frame pointer on the stack. */
2084 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2086 GEN_INT (fp_offset
)));
2087 aarch64_set_frame_expr (gen_rtx_SET
2088 (Pmode
, hard_frame_pointer_rtx
,
2089 gen_rtx_PLUS (Pmode
,
2091 GEN_INT (fp_offset
))));
2092 RTX_FRAME_RELATED_P (insn
) = 1;
2093 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2094 hard_frame_pointer_rtx
));
2098 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2099 GEN_INT (-offset
)));
2100 RTX_FRAME_RELATED_P (insn
) = 1;
2103 aarch64_save_or_restore_callee_save_registers
2104 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2107 /* when offset >= 512,
2108 sub sp, sp, #<outgoing_args_size> */
2109 if (frame_size
> -1)
2111 if (crtl
->outgoing_args_size
> 0)
2113 insn
= emit_insn (gen_add2_insn
2115 GEN_INT (- crtl
->outgoing_args_size
)));
2116 RTX_FRAME_RELATED_P (insn
) = 1;
2121 /* Generate the epilogue instructions for returning from a function. */
2123 aarch64_expand_epilogue (bool for_sibcall
)
2125 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2126 HOST_WIDE_INT fp_offset
;
2130 aarch64_layout_frame ();
2131 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2132 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2133 + crtl
->outgoing_args_size
);
2134 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2135 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2138 - original_frame_size
2139 - cfun
->machine
->frame
.saved_regs_size
);
2141 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2143 /* Store pairs and load pairs have a range only -512 to 504. */
2146 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2148 offset
= cfun
->machine
->frame
.saved_regs_size
;
2150 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2152 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2154 insn
= emit_insn (gen_add2_insn
2156 GEN_INT (crtl
->outgoing_args_size
)));
2157 RTX_FRAME_RELATED_P (insn
) = 1;
2163 /* If there were outgoing arguments or we've done dynamic stack
2164 allocation, then restore the stack pointer from the frame
2165 pointer. This is at most one insn and more efficient than using
2166 GCC's internal mechanism. */
2167 if (frame_pointer_needed
2168 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2170 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2171 hard_frame_pointer_rtx
,
2172 GEN_INT (- fp_offset
)));
2173 RTX_FRAME_RELATED_P (insn
) = 1;
2174 /* As SP is set to (FP - fp_offset), according to the rules in
2175 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2176 from the value of SP from now on. */
2177 cfa_reg
= stack_pointer_rtx
;
2180 aarch64_save_or_restore_callee_save_registers
2181 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2183 /* Restore the frame pointer and lr if the frame pointer is needed. */
2186 if (frame_pointer_needed
)
2192 mem_fp
= gen_frame_mem (DImode
,
2193 plus_constant (Pmode
,
2196 mem_lr
= gen_frame_mem (DImode
,
2197 plus_constant (Pmode
,
2201 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2203 gen_rtx_REG (DImode
,
2209 insn
= emit_insn (gen_loadwb_pairdi_di
2212 hard_frame_pointer_rtx
,
2213 gen_rtx_REG (DImode
, LR_REGNUM
),
2215 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2216 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2217 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2218 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2219 plus_constant (Pmode
, cfa_reg
,
2223 /* The first part of a frame-related parallel insn
2224 is always assumed to be relevant to the frame
2225 calculations; subsequent parts, are only
2226 frame-related if explicitly marked. */
2227 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2228 RTX_FRAME_RELATED_P (insn
) = 1;
2229 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2230 add_reg_note (insn
, REG_CFA_RESTORE
,
2231 gen_rtx_REG (DImode
, LR_REGNUM
));
2235 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2237 RTX_FRAME_RELATED_P (insn
) = 1;
2242 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2244 RTX_FRAME_RELATED_P (insn
) = 1;
2248 /* Stack adjustment for exception handler. */
2249 if (crtl
->calls_eh_return
)
2251 /* We need to unwind the stack by the offset computed by
2252 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2253 based on SP. Ideally we would update the SP and define the
2254 CFA along the lines of:
2256 SP = SP + EH_RETURN_STACKADJ_RTX
2257 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2259 However the dwarf emitter only understands a constant
2262 The solution choosen here is to use the otherwise unused IP0
2263 as a temporary register to hold the current SP value. The
2264 CFA is described using IP0 then SP is modified. */
2266 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2268 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2269 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2270 RTX_FRAME_RELATED_P (insn
) = 1;
2272 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2274 /* Ensure the assignment to IP0 does not get optimized away. */
2278 if (frame_size
> -1)
2280 if (frame_size
>= 0x1000000)
2282 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2283 emit_move_insn (op0
, GEN_INT (frame_size
));
2284 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2285 aarch64_set_frame_expr (gen_rtx_SET
2286 (Pmode
, stack_pointer_rtx
,
2287 gen_rtx_PLUS (Pmode
,
2289 GEN_INT (frame_size
))));
2291 else if (frame_size
> 0)
2293 if ((frame_size
& 0xfff) != 0)
2295 insn
= emit_insn (gen_add2_insn
2297 GEN_INT ((frame_size
2298 & (HOST_WIDE_INT
) 0xfff))));
2299 RTX_FRAME_RELATED_P (insn
) = 1;
2301 if ((frame_size
& 0xfff) != frame_size
)
2303 insn
= emit_insn (gen_add2_insn
2305 GEN_INT ((frame_size
2306 & ~ (HOST_WIDE_INT
) 0xfff))));
2307 RTX_FRAME_RELATED_P (insn
) = 1;
2311 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2312 gen_rtx_PLUS (Pmode
,
2314 GEN_INT (offset
))));
2317 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2319 emit_jump_insn (ret_rtx
);
2322 /* Return the place to copy the exception unwinding return address to.
2323 This will probably be a stack slot, but could (in theory be the
2324 return register). */
2326 aarch64_final_eh_return_addr (void)
2328 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2329 aarch64_layout_frame ();
2330 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2331 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2332 + crtl
->outgoing_args_size
);
2333 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2334 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2336 - original_frame_size
2337 - cfun
->machine
->frame
.saved_regs_size
;
2339 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2340 return gen_rtx_REG (DImode
, LR_REGNUM
);
2342 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2343 result in a store to save LR introduced by builtin_eh_return () being
2344 incorrectly deleted because the alias is not detected.
2345 So in the calculation of the address to copy the exception unwinding
2346 return address to, we note 2 cases.
2347 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2348 we return a SP-relative location since all the addresses are SP-relative
2349 in this case. This prevents the store from being optimized away.
2350 If the fp_offset is not 0, then the addresses will be FP-relative and
2351 therefore we return a FP-relative location. */
2353 if (frame_pointer_needed
)
2356 return gen_frame_mem (DImode
,
2357 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2359 return gen_frame_mem (DImode
,
2360 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2363 /* If FP is not needed, we calculate the location of LR, which would be
2364 at the top of the saved registers block. */
2366 return gen_frame_mem (DImode
,
2367 plus_constant (Pmode
,
2370 + cfun
->machine
->frame
.saved_regs_size
2371 - 2 * UNITS_PER_WORD
));
2374 /* Output code to build up a constant in a register. */
2376 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
)
2378 if (aarch64_bitmask_imm (val
, DImode
))
2379 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2385 HOST_WIDE_INT valp
= val
>> 16;
2389 for (i
= 16; i
< 64; i
+= 16)
2391 valm
= (valp
& 0xffff);
2402 /* zcount contains the number of additional MOVK instructions
2403 required if the constant is built up with an initial MOVZ instruction,
2404 while ncount is the number of MOVK instructions required if starting
2405 with a MOVN instruction. Choose the sequence that yields the fewest
2406 number of instructions, preferring MOVZ instructions when they are both
2408 if (ncount
< zcount
)
2410 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2411 GEN_INT ((~val
) & 0xffff));
2416 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2417 GEN_INT (val
& 0xffff));
2423 for (i
= 16; i
< 64; i
+= 16)
2425 if ((val
& 0xffff) != tval
)
2426 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2427 GEN_INT (i
), GEN_INT (val
& 0xffff)));
2434 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2436 HOST_WIDE_INT mdelta
= delta
;
2437 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2438 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2443 if (mdelta
>= 4096 * 4096)
2445 aarch64_build_constant (scratchreg
, delta
);
2446 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2448 else if (mdelta
> 0)
2452 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2453 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2455 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2456 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2458 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2459 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2461 if (mdelta
% 4096 != 0)
2463 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2464 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2465 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2470 /* Output code to add DELTA to the first argument, and then jump
2471 to FUNCTION. Used for C++ multiple inheritance. */
2473 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2474 HOST_WIDE_INT delta
,
2475 HOST_WIDE_INT vcall_offset
,
2478 /* The this pointer is always in x0. Note that this differs from
2479 Arm where the this pointer maybe bumped to r1 if r0 is required
2480 to return a pointer to an aggregate. On AArch64 a result value
2481 pointer will be in x8. */
2482 int this_regno
= R0_REGNUM
;
2483 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2485 reload_completed
= 1;
2486 emit_note (NOTE_INSN_PROLOGUE_END
);
2488 if (vcall_offset
== 0)
2489 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2492 gcc_assert ((vcall_offset
& 0x7) == 0);
2494 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2495 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2496 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2501 if (delta
>= -256 && delta
< 256)
2502 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2503 plus_constant (Pmode
, this_rtx
, delta
));
2505 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2508 aarch64_emit_move (temp0
, gen_rtx_MEM (Pmode
, addr
));
2510 if (vcall_offset
>= -256 && vcall_offset
< 32768)
2511 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2514 aarch64_build_constant (IP1_REGNUM
, vcall_offset
);
2515 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2518 aarch64_emit_move (temp1
, gen_rtx_MEM (Pmode
,addr
));
2519 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2522 /* Generate a tail call to the target function. */
2523 if (!TREE_USED (function
))
2525 assemble_external (function
);
2526 TREE_USED (function
) = 1;
2528 funexp
= XEXP (DECL_RTL (function
), 0);
2529 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2530 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2531 SIBLING_CALL_P (insn
) = 1;
2533 insn
= get_insns ();
2534 shorten_branches (insn
);
2535 final_start_function (insn
, file
, 1);
2536 final (insn
, file
, 1);
2537 final_end_function ();
2539 /* Stop pretending to be a post-reload pass. */
2540 reload_completed
= 0;
2544 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2546 if (GET_CODE (*x
) == SYMBOL_REF
)
2547 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2549 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2550 TLS offsets, not real symbol references. */
2551 if (GET_CODE (*x
) == UNSPEC
2552 && XINT (*x
, 1) == UNSPEC_TLS
)
2559 aarch64_tls_referenced_p (rtx x
)
2561 if (!TARGET_HAVE_TLS
)
2564 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2569 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2571 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2572 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2583 aarch64_build_bitmask_table (void)
2585 unsigned HOST_WIDE_INT mask
, imm
;
2586 unsigned int log_e
, e
, s
, r
;
2587 unsigned int nimms
= 0;
2589 for (log_e
= 1; log_e
<= 6; log_e
++)
2593 mask
= ~(HOST_WIDE_INT
) 0;
2595 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2596 for (s
= 1; s
< e
; s
++)
2598 for (r
= 0; r
< e
; r
++)
2600 /* set s consecutive bits to 1 (s < 64) */
2601 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2602 /* rotate right by r */
2604 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2605 /* replicate the constant depending on SIMD size */
2607 case 1: imm
|= (imm
<< 2);
2608 case 2: imm
|= (imm
<< 4);
2609 case 3: imm
|= (imm
<< 8);
2610 case 4: imm
|= (imm
<< 16);
2611 case 5: imm
|= (imm
<< 32);
2617 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2618 aarch64_bitmasks
[nimms
++] = imm
;
2623 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2624 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2625 aarch64_bitmasks_cmp
);
2629 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2630 a left shift of 0 or 12 bits. */
2632 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2634 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2635 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2640 /* Return true if val is an immediate that can be loaded into a
2641 register by a MOVZ instruction. */
2643 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2645 if (GET_MODE_SIZE (mode
) > 4)
2647 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2648 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2653 /* Ignore sign extension. */
2654 val
&= (HOST_WIDE_INT
) 0xffffffff;
2656 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2657 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2661 /* Return true if val is a valid bitmask immediate. */
2663 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2665 if (GET_MODE_SIZE (mode
) < 8)
2667 /* Replicate bit pattern. */
2668 val
&= (HOST_WIDE_INT
) 0xffffffff;
2671 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2672 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2676 /* Return true if val is an immediate that can be loaded into a
2677 register in a single instruction. */
2679 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2681 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2683 return aarch64_bitmask_imm (val
, mode
);
2687 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2690 if (GET_CODE (x
) == HIGH
)
2693 split_const (x
, &base
, &offset
);
2694 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2695 return (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
) != SYMBOL_FORCE_TO_MEM
);
2697 return aarch64_tls_referenced_p (x
);
2700 /* Return true if register REGNO is a valid index register.
2701 STRICT_P is true if REG_OK_STRICT is in effect. */
2704 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2706 if (!HARD_REGISTER_NUM_P (regno
))
2714 regno
= reg_renumber
[regno
];
2716 return GP_REGNUM_P (regno
);
2719 /* Return true if register REGNO is a valid base register for mode MODE.
2720 STRICT_P is true if REG_OK_STRICT is in effect. */
2723 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2725 if (!HARD_REGISTER_NUM_P (regno
))
2733 regno
= reg_renumber
[regno
];
2736 /* The fake registers will be eliminated to either the stack or
2737 hard frame pointer, both of which are usually valid base registers.
2738 Reload deals with the cases where the eliminated form isn't valid. */
2739 return (GP_REGNUM_P (regno
)
2740 || regno
== SP_REGNUM
2741 || regno
== FRAME_POINTER_REGNUM
2742 || regno
== ARG_POINTER_REGNUM
);
2745 /* Return true if X is a valid base register for mode MODE.
2746 STRICT_P is true if REG_OK_STRICT is in effect. */
2749 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2751 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2754 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2757 /* Return true if address offset is a valid index. If it is, fill in INFO
2758 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2761 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2762 enum machine_mode mode
, bool strict_p
)
2764 enum aarch64_address_type type
;
2769 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2770 && GET_MODE (x
) == Pmode
)
2772 type
= ADDRESS_REG_REG
;
2776 /* (sign_extend:DI (reg:SI)) */
2777 else if ((GET_CODE (x
) == SIGN_EXTEND
2778 || GET_CODE (x
) == ZERO_EXTEND
)
2779 && GET_MODE (x
) == DImode
2780 && GET_MODE (XEXP (x
, 0)) == SImode
)
2782 type
= (GET_CODE (x
) == SIGN_EXTEND
)
2783 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2784 index
= XEXP (x
, 0);
2787 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2788 else if (GET_CODE (x
) == MULT
2789 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2790 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2791 && GET_MODE (XEXP (x
, 0)) == DImode
2792 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2793 && CONST_INT_P (XEXP (x
, 1)))
2795 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2796 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2797 index
= XEXP (XEXP (x
, 0), 0);
2798 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2800 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2801 else if (GET_CODE (x
) == ASHIFT
2802 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2803 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2804 && GET_MODE (XEXP (x
, 0)) == DImode
2805 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2806 && CONST_INT_P (XEXP (x
, 1)))
2808 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2809 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2810 index
= XEXP (XEXP (x
, 0), 0);
2811 shift
= INTVAL (XEXP (x
, 1));
2813 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2814 else if ((GET_CODE (x
) == SIGN_EXTRACT
2815 || GET_CODE (x
) == ZERO_EXTRACT
)
2816 && GET_MODE (x
) == DImode
2817 && GET_CODE (XEXP (x
, 0)) == MULT
2818 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2819 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2821 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2822 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2823 index
= XEXP (XEXP (x
, 0), 0);
2824 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2825 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2826 || INTVAL (XEXP (x
, 2)) != 0)
2829 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2830 (const_int 0xffffffff<<shift)) */
2831 else if (GET_CODE (x
) == AND
2832 && GET_MODE (x
) == DImode
2833 && GET_CODE (XEXP (x
, 0)) == MULT
2834 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2835 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2836 && CONST_INT_P (XEXP (x
, 1)))
2838 type
= ADDRESS_REG_UXTW
;
2839 index
= XEXP (XEXP (x
, 0), 0);
2840 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2841 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2844 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2845 else if ((GET_CODE (x
) == SIGN_EXTRACT
2846 || GET_CODE (x
) == ZERO_EXTRACT
)
2847 && GET_MODE (x
) == DImode
2848 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2849 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2850 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2852 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2853 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2854 index
= XEXP (XEXP (x
, 0), 0);
2855 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2856 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2857 || INTVAL (XEXP (x
, 2)) != 0)
2860 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2861 (const_int 0xffffffff<<shift)) */
2862 else if (GET_CODE (x
) == AND
2863 && GET_MODE (x
) == DImode
2864 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2865 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2866 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2867 && CONST_INT_P (XEXP (x
, 1)))
2869 type
= ADDRESS_REG_UXTW
;
2870 index
= XEXP (XEXP (x
, 0), 0);
2871 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2872 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2875 /* (mult:P (reg:P) (const_int scale)) */
2876 else if (GET_CODE (x
) == MULT
2877 && GET_MODE (x
) == Pmode
2878 && GET_MODE (XEXP (x
, 0)) == Pmode
2879 && CONST_INT_P (XEXP (x
, 1)))
2881 type
= ADDRESS_REG_REG
;
2882 index
= XEXP (x
, 0);
2883 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2885 /* (ashift:P (reg:P) (const_int shift)) */
2886 else if (GET_CODE (x
) == ASHIFT
2887 && GET_MODE (x
) == Pmode
2888 && GET_MODE (XEXP (x
, 0)) == Pmode
2889 && CONST_INT_P (XEXP (x
, 1)))
2891 type
= ADDRESS_REG_REG
;
2892 index
= XEXP (x
, 0);
2893 shift
= INTVAL (XEXP (x
, 1));
2898 if (GET_CODE (index
) == SUBREG
)
2899 index
= SUBREG_REG (index
);
2902 (shift
> 0 && shift
<= 3
2903 && (1 << shift
) == GET_MODE_SIZE (mode
)))
2905 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
2908 info
->offset
= index
;
2909 info
->shift
= shift
;
2917 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
2919 return (offset
>= -64 * GET_MODE_SIZE (mode
)
2920 && offset
< 64 * GET_MODE_SIZE (mode
)
2921 && offset
% GET_MODE_SIZE (mode
) == 0);
2925 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
2926 HOST_WIDE_INT offset
)
2928 return offset
>= -256 && offset
< 256;
2932 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
2935 && offset
< 4096 * GET_MODE_SIZE (mode
)
2936 && offset
% GET_MODE_SIZE (mode
) == 0);
2939 /* Return true if X is a valid address for machine mode MODE. If it is,
2940 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2941 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2944 aarch64_classify_address (struct aarch64_address_info
*info
,
2945 rtx x
, enum machine_mode mode
,
2946 RTX_CODE outer_code
, bool strict_p
)
2948 enum rtx_code code
= GET_CODE (x
);
2950 bool allow_reg_index_p
=
2951 outer_code
!= PARALLEL
&& GET_MODE_SIZE(mode
) != 16;
2953 /* Don't support anything other than POST_INC or REG addressing for
2955 if (aarch64_vector_mode_p (mode
)
2956 && (code
!= POST_INC
&& code
!= REG
))
2963 info
->type
= ADDRESS_REG_IMM
;
2965 info
->offset
= const0_rtx
;
2966 return aarch64_base_register_rtx_p (x
, strict_p
);
2971 if (GET_MODE_SIZE (mode
) != 0
2972 && CONST_INT_P (op1
)
2973 && aarch64_base_register_rtx_p (op0
, strict_p
))
2975 HOST_WIDE_INT offset
= INTVAL (op1
);
2977 info
->type
= ADDRESS_REG_IMM
;
2981 /* TImode and TFmode values are allowed in both pairs of X
2982 registers and individual Q registers. The available
2984 X,X: 7-bit signed scaled offset
2985 Q: 9-bit signed offset
2986 We conservatively require an offset representable in either mode.
2988 if (mode
== TImode
|| mode
== TFmode
)
2989 return (offset_7bit_signed_scaled_p (mode
, offset
)
2990 && offset_9bit_signed_unscaled_p (mode
, offset
));
2992 if (outer_code
== PARALLEL
)
2993 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
2994 && offset_7bit_signed_scaled_p (mode
, offset
));
2996 return (offset_9bit_signed_unscaled_p (mode
, offset
)
2997 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3000 if (allow_reg_index_p
)
3002 /* Look for base + (scaled/extended) index register. */
3003 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3004 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3009 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3010 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3023 info
->type
= ADDRESS_REG_WB
;
3024 info
->base
= XEXP (x
, 0);
3025 info
->offset
= NULL_RTX
;
3026 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3030 info
->type
= ADDRESS_REG_WB
;
3031 info
->base
= XEXP (x
, 0);
3032 if (GET_CODE (XEXP (x
, 1)) == PLUS
3033 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3034 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3035 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3037 HOST_WIDE_INT offset
;
3038 info
->offset
= XEXP (XEXP (x
, 1), 1);
3039 offset
= INTVAL (info
->offset
);
3041 /* TImode and TFmode values are allowed in both pairs of X
3042 registers and individual Q registers. The available
3044 X,X: 7-bit signed scaled offset
3045 Q: 9-bit signed offset
3046 We conservatively require an offset representable in either mode.
3048 if (mode
== TImode
|| mode
== TFmode
)
3049 return (offset_7bit_signed_scaled_p (mode
, offset
)
3050 && offset_9bit_signed_unscaled_p (mode
, offset
));
3052 if (outer_code
== PARALLEL
)
3053 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3054 && offset_7bit_signed_scaled_p (mode
, offset
));
3056 return offset_9bit_signed_unscaled_p (mode
, offset
);
3063 /* load literal: pc-relative constant pool entry. Only supported
3064 for SI mode or larger. */
3065 info
->type
= ADDRESS_SYMBOLIC
;
3066 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3070 split_const (x
, &sym
, &addend
);
3071 return (GET_CODE (sym
) == LABEL_REF
3072 || (GET_CODE (sym
) == SYMBOL_REF
3073 && CONSTANT_POOL_ADDRESS_P (sym
)));
3078 info
->type
= ADDRESS_LO_SUM
;
3079 info
->base
= XEXP (x
, 0);
3080 info
->offset
= XEXP (x
, 1);
3081 if (allow_reg_index_p
3082 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3085 split_const (info
->offset
, &sym
, &offs
);
3086 if (GET_CODE (sym
) == SYMBOL_REF
3087 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3088 == SYMBOL_SMALL_ABSOLUTE
))
3090 /* The symbol and offset must be aligned to the access size. */
3092 unsigned int ref_size
;
3094 if (CONSTANT_POOL_ADDRESS_P (sym
))
3095 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3096 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3098 tree exp
= SYMBOL_REF_DECL (sym
);
3099 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3100 align
= CONSTANT_ALIGNMENT (exp
, align
);
3102 else if (SYMBOL_REF_DECL (sym
))
3103 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3105 align
= BITS_PER_UNIT
;
3107 ref_size
= GET_MODE_SIZE (mode
);
3109 ref_size
= GET_MODE_SIZE (DImode
);
3111 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3112 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3123 aarch64_symbolic_address_p (rtx x
)
3127 split_const (x
, &x
, &offset
);
3128 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3131 /* Classify the base of symbolic expression X, given that X appears in
3134 enum aarch64_symbol_type
3135 aarch64_classify_symbolic_expression (rtx x
,
3136 enum aarch64_symbol_context context
)
3140 split_const (x
, &x
, &offset
);
3141 return aarch64_classify_symbol (x
, context
);
3145 /* Return TRUE if X is a legitimate address for accessing memory in
3148 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3150 struct aarch64_address_info addr
;
3152 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3155 /* Return TRUE if X is a legitimate address for accessing memory in
3156 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3159 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3160 RTX_CODE outer_code
, bool strict_p
)
3162 struct aarch64_address_info addr
;
3164 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3167 /* Return TRUE if rtx X is immediate constant 0.0 */
3169 aarch64_float_const_zero_rtx_p (rtx x
)
3173 if (GET_MODE (x
) == VOIDmode
)
3176 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3177 if (REAL_VALUE_MINUS_ZERO (r
))
3178 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3179 return REAL_VALUES_EQUAL (r
, dconst0
);
3182 /* Return the fixed registers used for condition codes. */
3185 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3188 *p2
= INVALID_REGNUM
;
3193 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3195 /* All floating point compares return CCFP if it is an equality
3196 comparison, and CCFPE otherwise. */
3197 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3224 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3226 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3227 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3228 || GET_CODE (x
) == NEG
))
3231 /* A compare with a shifted operand. Because of canonicalization,
3232 the comparison will have to be swapped when we emit the assembly
3234 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3235 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3236 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3237 || GET_CODE (x
) == LSHIFTRT
3238 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3241 /* A compare of a mode narrower than SI mode against zero can be done
3242 by extending the value in the comparison. */
3243 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3245 /* Only use sign-extension if we really need it. */
3246 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3247 ? CC_SESWPmode
: CC_ZESWPmode
);
3249 /* For everything else, return CCmode. */
3254 aarch64_get_condition_code (rtx x
)
3256 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3257 enum rtx_code comp_code
= GET_CODE (x
);
3259 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3260 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3268 case GE
: return AARCH64_GE
;
3269 case GT
: return AARCH64_GT
;
3270 case LE
: return AARCH64_LS
;
3271 case LT
: return AARCH64_MI
;
3272 case NE
: return AARCH64_NE
;
3273 case EQ
: return AARCH64_EQ
;
3274 case ORDERED
: return AARCH64_VC
;
3275 case UNORDERED
: return AARCH64_VS
;
3276 case UNLT
: return AARCH64_LT
;
3277 case UNLE
: return AARCH64_LE
;
3278 case UNGT
: return AARCH64_HI
;
3279 case UNGE
: return AARCH64_PL
;
3280 default: gcc_unreachable ();
3287 case NE
: return AARCH64_NE
;
3288 case EQ
: return AARCH64_EQ
;
3289 case GE
: return AARCH64_GE
;
3290 case GT
: return AARCH64_GT
;
3291 case LE
: return AARCH64_LE
;
3292 case LT
: return AARCH64_LT
;
3293 case GEU
: return AARCH64_CS
;
3294 case GTU
: return AARCH64_HI
;
3295 case LEU
: return AARCH64_LS
;
3296 case LTU
: return AARCH64_CC
;
3297 default: gcc_unreachable ();
3306 case NE
: return AARCH64_NE
;
3307 case EQ
: return AARCH64_EQ
;
3308 case GE
: return AARCH64_LE
;
3309 case GT
: return AARCH64_LT
;
3310 case LE
: return AARCH64_GE
;
3311 case LT
: return AARCH64_GT
;
3312 case GEU
: return AARCH64_LS
;
3313 case GTU
: return AARCH64_CC
;
3314 case LEU
: return AARCH64_CS
;
3315 case LTU
: return AARCH64_HI
;
3316 default: gcc_unreachable ();
3323 case NE
: return AARCH64_NE
;
3324 case EQ
: return AARCH64_EQ
;
3325 case GE
: return AARCH64_PL
;
3326 case LT
: return AARCH64_MI
;
3327 default: gcc_unreachable ();
3338 bit_count (unsigned HOST_WIDE_INT value
)
3352 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3357 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3361 if (GET_CODE (x
) != CONST_INT
3362 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3364 output_operand_lossage ("invalid operand for '%%%c'", code
);
3380 output_operand_lossage ("invalid operand for '%%%c'", code
);
3390 /* Print N such that 2^N == X. */
3391 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3393 output_operand_lossage ("invalid operand for '%%%c'", code
);
3397 asm_fprintf (f
, "%d", n
);
3402 /* Print the number of non-zero bits in X (a const_int). */
3403 if (GET_CODE (x
) != CONST_INT
)
3405 output_operand_lossage ("invalid operand for '%%%c'", code
);
3409 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3413 /* Print the higher numbered register of a pair (TImode) of regs. */
3414 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3416 output_operand_lossage ("invalid operand for '%%%c'", code
);
3420 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3424 /* Print a condition (eq, ne, etc). */
3426 /* CONST_TRUE_RTX means always -- that's the default. */
3427 if (x
== const_true_rtx
)
3430 if (!COMPARISON_P (x
))
3432 output_operand_lossage ("invalid operand for '%%%c'", code
);
3436 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3440 /* Print the inverse of a condition (eq <-> ne, etc). */
3442 /* CONST_TRUE_RTX means never -- that's the default. */
3443 if (x
== const_true_rtx
)
3449 if (!COMPARISON_P (x
))
3451 output_operand_lossage ("invalid operand for '%%%c'", code
);
3455 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3456 (aarch64_get_condition_code (x
))], f
);
3464 /* Print a scalar FP/SIMD register name. */
3465 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3467 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3470 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3477 /* Print the first FP/SIMD register name in a list. */
3478 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3480 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3483 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3487 /* Print bottom 16 bits of integer constant in hex. */
3488 if (GET_CODE (x
) != CONST_INT
)
3490 output_operand_lossage ("invalid operand for '%%%c'", code
);
3493 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3498 /* Print a general register name or the zero register (32-bit or
3501 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3503 asm_fprintf (f
, "%czr", code
);
3507 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3509 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3513 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3515 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3522 /* Print a normal operand, if it's a general register, then we
3526 output_operand_lossage ("missing operand");
3530 switch (GET_CODE (x
))
3533 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3537 aarch64_memory_reference_mode
= GET_MODE (x
);
3538 output_address (XEXP (x
, 0));
3543 output_addr_const (asm_out_file
, x
);
3547 asm_fprintf (f
, "%wd", INTVAL (x
));
3551 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3553 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3555 HOST_WIDE_INT_MAX
));
3556 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3558 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3567 /* CONST_DOUBLE can represent a double-width integer.
3568 In this case, the mode of x is VOIDmode. */
3569 if (GET_MODE (x
) == VOIDmode
)
3571 else if (aarch64_float_const_zero_rtx_p (x
))
3576 else if (aarch64_float_const_representable_p (x
))
3579 char float_buf
[buf_size
] = {'\0'};
3581 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3582 real_to_decimal_for_mode (float_buf
, &r
,
3585 asm_fprintf (asm_out_file
, "%s", float_buf
);
3589 output_operand_lossage ("invalid constant");
3592 output_operand_lossage ("invalid operand");
3598 if (GET_CODE (x
) == HIGH
)
3601 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3603 case SYMBOL_SMALL_GOT
:
3604 asm_fprintf (asm_out_file
, ":got:");
3607 case SYMBOL_SMALL_TLSGD
:
3608 asm_fprintf (asm_out_file
, ":tlsgd:");
3611 case SYMBOL_SMALL_TLSDESC
:
3612 asm_fprintf (asm_out_file
, ":tlsdesc:");
3615 case SYMBOL_SMALL_GOTTPREL
:
3616 asm_fprintf (asm_out_file
, ":gottprel:");
3619 case SYMBOL_SMALL_TPREL
:
3620 asm_fprintf (asm_out_file
, ":tprel:");
3626 output_addr_const (asm_out_file
, x
);
3630 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3632 case SYMBOL_SMALL_GOT
:
3633 asm_fprintf (asm_out_file
, ":lo12:");
3636 case SYMBOL_SMALL_TLSGD
:
3637 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3640 case SYMBOL_SMALL_TLSDESC
:
3641 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3644 case SYMBOL_SMALL_GOTTPREL
:
3645 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3648 case SYMBOL_SMALL_TPREL
:
3649 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3655 output_addr_const (asm_out_file
, x
);
3660 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3662 case SYMBOL_SMALL_TPREL
:
3663 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3668 output_addr_const (asm_out_file
, x
);
3672 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3678 aarch64_print_operand_address (FILE *f
, rtx x
)
3680 struct aarch64_address_info addr
;
3682 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3686 case ADDRESS_REG_IMM
:
3687 if (addr
.offset
== const0_rtx
)
3688 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3690 asm_fprintf (f
, "[%s,%wd]", reg_names
[REGNO (addr
.base
)],
3691 INTVAL (addr
.offset
));
3694 case ADDRESS_REG_REG
:
3695 if (addr
.shift
== 0)
3696 asm_fprintf (f
, "[%s,%s]", reg_names
[REGNO (addr
.base
)],
3697 reg_names
[REGNO (addr
.offset
)]);
3699 asm_fprintf (f
, "[%s,%s,lsl %u]", reg_names
[REGNO (addr
.base
)],
3700 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3703 case ADDRESS_REG_UXTW
:
3704 if (addr
.shift
== 0)
3705 asm_fprintf (f
, "[%s,w%d,uxtw]", reg_names
[REGNO (addr
.base
)],
3706 REGNO (addr
.offset
) - R0_REGNUM
);
3708 asm_fprintf (f
, "[%s,w%d,uxtw %u]", reg_names
[REGNO (addr
.base
)],
3709 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3712 case ADDRESS_REG_SXTW
:
3713 if (addr
.shift
== 0)
3714 asm_fprintf (f
, "[%s,w%d,sxtw]", reg_names
[REGNO (addr
.base
)],
3715 REGNO (addr
.offset
) - R0_REGNUM
);
3717 asm_fprintf (f
, "[%s,w%d,sxtw %u]", reg_names
[REGNO (addr
.base
)],
3718 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3721 case ADDRESS_REG_WB
:
3722 switch (GET_CODE (x
))
3725 asm_fprintf (f
, "[%s,%d]!", reg_names
[REGNO (addr
.base
)],
3726 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3729 asm_fprintf (f
, "[%s],%d", reg_names
[REGNO (addr
.base
)],
3730 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3733 asm_fprintf (f
, "[%s,-%d]!", reg_names
[REGNO (addr
.base
)],
3734 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3737 asm_fprintf (f
, "[%s],-%d", reg_names
[REGNO (addr
.base
)],
3738 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3741 asm_fprintf (f
, "[%s,%wd]!", reg_names
[REGNO (addr
.base
)],
3742 INTVAL (addr
.offset
));
3745 asm_fprintf (f
, "[%s],%wd", reg_names
[REGNO (addr
.base
)],
3746 INTVAL (addr
.offset
));
3753 case ADDRESS_LO_SUM
:
3754 asm_fprintf (f
, "[%s,#:lo12:", reg_names
[REGNO (addr
.base
)]);
3755 output_addr_const (f
, addr
.offset
);
3756 asm_fprintf (f
, "]");
3759 case ADDRESS_SYMBOLIC
:
3763 output_addr_const (f
, x
);
3767 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED
,
3768 int labelno ATTRIBUTE_UNUSED
)
3770 sorry ("function profiling");
3774 aarch64_label_mentioned_p (rtx x
)
3779 if (GET_CODE (x
) == LABEL_REF
)
3782 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3783 referencing instruction, but they are constant offsets, not
3785 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3788 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
3789 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
3795 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3796 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
3799 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
3806 /* Implement REGNO_REG_CLASS. */
3809 aarch64_regno_regclass (unsigned regno
)
3811 if (GP_REGNUM_P (regno
))
3814 if (regno
== SP_REGNUM
)
3817 if (regno
== FRAME_POINTER_REGNUM
3818 || regno
== ARG_POINTER_REGNUM
)
3821 if (FP_REGNUM_P (regno
))
3822 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
3827 /* Try a machine-dependent way of reloading an illegitimate address
3828 operand. If we find one, push the reload and return the new rtx. */
3831 aarch64_legitimize_reload_address (rtx
*x_p
,
3832 enum machine_mode mode
,
3833 int opnum
, int type
,
3834 int ind_levels ATTRIBUTE_UNUSED
)
3838 /* Do not allow mem (plus (reg, const)) if vector mode. */
3839 if (aarch64_vector_mode_p (mode
)
3840 && GET_CODE (x
) == PLUS
3841 && REG_P (XEXP (x
, 0))
3842 && CONST_INT_P (XEXP (x
, 1)))
3846 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
3847 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3848 opnum
, (enum reload_type
) type
);
3852 /* We must recognize output that we have already generated ourselves. */
3853 if (GET_CODE (x
) == PLUS
3854 && GET_CODE (XEXP (x
, 0)) == PLUS
3855 && REG_P (XEXP (XEXP (x
, 0), 0))
3856 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3857 && CONST_INT_P (XEXP (x
, 1)))
3859 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
3860 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3861 opnum
, (enum reload_type
) type
);
3865 /* We wish to handle large displacements off a base register by splitting
3866 the addend across an add and the mem insn. This can cut the number of
3867 extra insns needed from 3 to 1. It is only useful for load/store of a
3868 single register with 12 bit offset field. */
3869 if (GET_CODE (x
) == PLUS
3870 && REG_P (XEXP (x
, 0))
3871 && CONST_INT_P (XEXP (x
, 1))
3872 && HARD_REGISTER_P (XEXP (x
, 0))
3875 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
3877 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
3878 HOST_WIDE_INT low
= val
& 0xfff;
3879 HOST_WIDE_INT high
= val
- low
;
3883 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3884 BLKmode alignment. */
3885 if (GET_MODE_SIZE (mode
) == 0)
3888 offs
= low
% GET_MODE_SIZE (mode
);
3890 /* Align misaligned offset by adjusting high part to compensate. */
3893 if (aarch64_uimm12_shift (high
+ offs
))
3902 offs
= GET_MODE_SIZE (mode
) - offs
;
3904 high
= high
+ (low
& 0x1000) - offs
;
3909 /* Check for overflow. */
3910 if (high
+ low
!= val
)
3913 cst
= GEN_INT (high
);
3914 if (!aarch64_uimm12_shift (high
))
3915 cst
= force_const_mem (Pmode
, cst
);
3917 /* Reload high part into base reg, leaving the low part
3918 in the mem instruction. */
3919 x
= gen_rtx_PLUS (Pmode
,
3920 gen_rtx_PLUS (Pmode
, XEXP (x
, 0), cst
),
3923 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
3924 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
3925 opnum
, (enum reload_type
) type
);
3934 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
3936 enum machine_mode mode
,
3937 secondary_reload_info
*sri
)
3939 /* Address expressions of the form PLUS (SP, large_offset) need two
3940 scratch registers, one for the constant, and one for holding a
3941 copy of SP, since SP cannot be used on the RHS of an add-reg
3944 && GET_CODE (x
) == PLUS
3945 && XEXP (x
, 0) == stack_pointer_rtx
3946 && CONST_INT_P (XEXP (x
, 1))
3947 && !aarch64_uimm12_shift (INTVAL (XEXP (x
, 1))))
3949 sri
->icode
= CODE_FOR_reload_sp_immediate
;
3953 /* Without the TARGET_SIMD instructions we cannot move a Q register
3954 to a Q register directly. We need a scratch. */
3955 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
3956 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
3957 && reg_class_subset_p (rclass
, FP_REGS
))
3960 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
3961 else if (mode
== TImode
)
3962 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
3966 /* A TFmode or TImode memory access should be handled via an FP_REGS
3967 because AArch64 has richer addressing modes for LDR/STR instructions
3968 than LDP/STP instructions. */
3969 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
3970 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
3973 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
3980 aarch64_can_eliminate (const int from
, const int to
)
3982 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3983 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3985 if (frame_pointer_needed
)
3987 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3989 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3991 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
3992 && !cfun
->calls_alloca
)
3994 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4000 /* If we decided that we didn't need a leaf frame pointer but then used
4001 LR in the function, then we'll want a frame pointer after all, so
4002 prevent this elimination to ensure a frame pointer is used.
4004 NOTE: the original value of flag_omit_frame_pointer gets trashed
4005 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4006 of faked_omit_frame_pointer here (which is true when we always
4007 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4008 pointers when LR is clobbered). */
4009 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4010 && df_regs_ever_live_p (LR_REGNUM
)
4011 && faked_omit_frame_pointer
)
4019 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4021 HOST_WIDE_INT frame_size
;
4022 HOST_WIDE_INT offset
;
4024 aarch64_layout_frame ();
4025 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4026 + crtl
->outgoing_args_size
4027 + cfun
->machine
->saved_varargs_size
);
4029 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4030 offset
= frame_size
;
4032 if (to
== HARD_FRAME_POINTER_REGNUM
)
4034 if (from
== ARG_POINTER_REGNUM
)
4035 return offset
- crtl
->outgoing_args_size
;
4037 if (from
== FRAME_POINTER_REGNUM
)
4038 return cfun
->machine
->frame
.saved_regs_size
;
4041 if (to
== STACK_POINTER_REGNUM
)
4043 if (from
== FRAME_POINTER_REGNUM
)
4045 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4046 + cfun
->machine
->frame
.saved_regs_size
4047 - cfun
->machine
->frame
.fp_lr_offset
;
4048 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4057 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4061 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4065 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4070 aarch64_asm_trampoline_template (FILE *f
)
4072 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4073 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4074 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4075 assemble_aligned_integer (4, const0_rtx
);
4076 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4077 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4081 aarch64_trampoline_size (void)
4083 return 32; /* 3 insns + padding + 2 dwords. */
4087 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4089 rtx fnaddr
, mem
, a_tramp
;
4091 /* Don't need to copy the trailing D-words, we fill those in below. */
4092 emit_block_move (m_tramp
, assemble_trampoline_template (),
4093 GEN_INT (TRAMPOLINE_SIZE
- 16), BLOCK_OP_NORMAL
);
4094 mem
= adjust_address (m_tramp
, DImode
, 16);
4095 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4096 emit_move_insn (mem
, fnaddr
);
4098 mem
= adjust_address (m_tramp
, DImode
, 24);
4099 emit_move_insn (mem
, chain_value
);
4101 /* XXX We should really define a "clear_cache" pattern and use
4102 gen_clear_cache(). */
4103 a_tramp
= XEXP (m_tramp
, 0);
4104 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4105 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
4106 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
4109 static unsigned char
4110 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4121 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4122 (GET_MODE_SIZE (mode
) + 7) / 8;
4136 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t regclass
)
4138 return ((regclass
== POINTER_REGS
|| regclass
== STACK_REG
)
4139 ? GENERAL_REGS
: regclass
);
4143 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4145 asm_fprintf (f
, "%U%s", name
);
4149 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4151 if (priority
== DEFAULT_INIT_PRIORITY
)
4152 default_ctor_section_asm_out_constructor (symbol
, priority
);
4157 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4158 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4159 switch_to_section (s
);
4160 assemble_align (POINTER_SIZE
);
4161 fputs ("\t.dword\t", asm_out_file
);
4162 output_addr_const (asm_out_file
, symbol
);
4163 fputc ('\n', asm_out_file
);
4168 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4170 if (priority
== DEFAULT_INIT_PRIORITY
)
4171 default_dtor_section_asm_out_destructor (symbol
, priority
);
4176 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4177 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4178 switch_to_section (s
);
4179 assemble_align (POINTER_SIZE
);
4180 fputs ("\t.dword\t", asm_out_file
);
4181 output_addr_const (asm_out_file
, symbol
);
4182 fputc ('\n', asm_out_file
);
4187 aarch64_output_casesi (rtx
*operands
)
4191 rtx diff_vec
= PATTERN (next_active_insn (operands
[2]));
4193 static const char *const patterns
[4][2] =
4196 "ldrb\t%w3, [%0,%w1,uxtw]",
4197 "add\t%3, %4, %w3, sxtb #2"
4200 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4201 "add\t%3, %4, %w3, sxth #2"
4204 "ldr\t%w3, [%0,%w1,uxtw #2]",
4205 "add\t%3, %4, %w3, sxtw #2"
4207 /* We assume that DImode is only generated when not optimizing and
4208 that we don't really need 64-bit address offsets. That would
4209 imply an object file with 8GB of code in a single function! */
4211 "ldr\t%w3, [%0,%w1,uxtw #2]",
4212 "add\t%3, %4, %w3, sxtw #2"
4216 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4218 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4220 gcc_assert (index
>= 0 && index
<= 3);
4222 /* Need to implement table size reduction, by chaning the code below. */
4223 output_asm_insn (patterns
[index
][0], operands
);
4224 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4225 snprintf (buf
, sizeof (buf
),
4226 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4227 output_asm_insn (buf
, operands
);
4228 output_asm_insn (patterns
[index
][1], operands
);
4229 output_asm_insn ("br\t%3", operands
);
4230 assemble_label (asm_out_file
, label
);
4235 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4236 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4240 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4242 if (shift
>= 0 && shift
<= 3)
4245 for (size
= 8; size
<= 32; size
*= 2)
4247 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4248 if (mask
== bits
<< shift
)
4256 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4257 const_rtx x ATTRIBUTE_UNUSED
)
4259 /* We can't use blocks for constants when we're using a per-function
4265 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4266 rtx x ATTRIBUTE_UNUSED
,
4267 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4269 /* Force all constant pool entries into the current function section. */
4270 return function_section (current_function_decl
);
4276 /* Helper function for rtx cost calculation. Strip a shift expression
4277 from X. Returns the inner operand if successful, or the original
4278 expression on failure. */
4280 aarch64_strip_shift (rtx x
)
4284 if ((GET_CODE (op
) == ASHIFT
4285 || GET_CODE (op
) == ASHIFTRT
4286 || GET_CODE (op
) == LSHIFTRT
)
4287 && CONST_INT_P (XEXP (op
, 1)))
4288 return XEXP (op
, 0);
4290 if (GET_CODE (op
) == MULT
4291 && CONST_INT_P (XEXP (op
, 1))
4292 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4293 return XEXP (op
, 0);
4298 /* Helper function for rtx cost calculation. Strip a shift or extend
4299 expression from X. Returns the inner operand if successful, or the
4300 original expression on failure. We deal with a number of possible
4301 canonicalization variations here. */
4303 aarch64_strip_shift_or_extend (rtx x
)
4307 /* Zero and sign extraction of a widened value. */
4308 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4309 && XEXP (op
, 2) == const0_rtx
4310 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4312 return XEXP (XEXP (op
, 0), 0);
4314 /* It can also be represented (for zero-extend) as an AND with an
4316 if (GET_CODE (op
) == AND
4317 && GET_CODE (XEXP (op
, 0)) == MULT
4318 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4319 && CONST_INT_P (XEXP (op
, 1))
4320 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4321 INTVAL (XEXP (op
, 1))) != 0)
4322 return XEXP (XEXP (op
, 0), 0);
4324 /* Now handle extended register, as this may also have an optional
4325 left shift by 1..4. */
4326 if (GET_CODE (op
) == ASHIFT
4327 && CONST_INT_P (XEXP (op
, 1))
4328 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4331 if (GET_CODE (op
) == ZERO_EXTEND
4332 || GET_CODE (op
) == SIGN_EXTEND
)
4338 return aarch64_strip_shift (x
);
4341 /* Calculate the cost of calculating X, storing it in *COST. Result
4342 is true if the total cost of the operation has now been calculated. */
4344 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4345 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4348 const struct cpu_rtx_cost_table
*extra_cost
4349 = aarch64_tune_params
->insn_extra_cost
;
4357 switch (GET_CODE (op0
))
4361 *cost
+= extra_cost
->memory_store
;
4363 if (op1
!= const0_rtx
)
4364 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4368 if (! REG_P (SUBREG_REG (op0
)))
4369 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4372 /* Cost is just the cost of the RHS of the set. */
4373 *cost
+= rtx_cost (op1
, SET
, 1, true);
4376 case ZERO_EXTRACT
: /* Bit-field insertion. */
4378 /* Strip any redundant widening of the RHS to meet the width of
4380 if (GET_CODE (op1
) == SUBREG
)
4381 op1
= SUBREG_REG (op1
);
4382 if ((GET_CODE (op1
) == ZERO_EXTEND
4383 || GET_CODE (op1
) == SIGN_EXTEND
)
4384 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
4385 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
4386 >= INTVAL (XEXP (op0
, 1))))
4387 op1
= XEXP (op1
, 0);
4388 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4398 *cost
+= extra_cost
->memory_load
;
4403 op0
= CONST0_RTX (GET_MODE (x
));
4411 if (op1
== const0_rtx
4412 && GET_CODE (op0
) == AND
)
4418 /* Comparisons can work if the order is swapped.
4419 Canonicalization puts the more complex operation first, but
4420 we want it in op1. */
4422 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
4434 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
4435 || (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
4436 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
4438 if (op0
!= const0_rtx
)
4439 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
4441 if (CONST_INT_P (op1
))
4443 if (!aarch64_uimm12_shift (INTVAL (op1
)))
4444 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4448 op1
= aarch64_strip_shift_or_extend (op1
);
4449 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4460 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4462 if (CONST_INT_P (op1
) && aarch64_uimm12_shift (INTVAL (op1
)))
4464 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
4468 rtx new_op0
= aarch64_strip_shift_or_extend (op0
);
4471 && GET_CODE (op0
) == MULT
)
4473 if ((GET_CODE (XEXP (op0
, 0)) == ZERO_EXTEND
4474 && GET_CODE (XEXP (op0
, 1)) == ZERO_EXTEND
)
4475 || (GET_CODE (XEXP (op0
, 0)) == SIGN_EXTEND
4476 && GET_CODE (XEXP (op0
, 1)) == SIGN_EXTEND
))
4478 *cost
+= (rtx_cost (XEXP (XEXP (op0
, 0), 0), MULT
, 0,
4480 + rtx_cost (XEXP (XEXP (op0
, 1), 0), MULT
, 1,
4482 + rtx_cost (op1
, PLUS
, 1, speed
));
4484 *cost
+= extra_cost
->int_multiply_extend_add
;
4487 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4488 + rtx_cost (XEXP (op0
, 1), MULT
, 1, speed
)
4489 + rtx_cost (op1
, PLUS
, 1, speed
));
4492 *cost
+= extra_cost
->int_multiply_add
;
4495 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
4496 + rtx_cost (op1
, PLUS
, 1, speed
));
4510 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4512 if (CONST_INT_P (op1
)
4513 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
4515 *cost
+= rtx_cost (op0
, AND
, 0, speed
);
4519 if (GET_CODE (op0
) == NOT
)
4520 op0
= XEXP (op0
, 0);
4521 op0
= aarch64_strip_shift (op0
);
4522 *cost
+= (rtx_cost (op0
, AND
, 0, speed
)
4523 + rtx_cost (op1
, AND
, 1, speed
));
4530 if ((GET_MODE (x
) == DImode
4531 && GET_MODE (XEXP (x
, 0)) == SImode
)
4532 || GET_CODE (XEXP (x
, 0)) == MEM
)
4534 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
4540 if (GET_CODE (XEXP (x
, 0)) == MEM
)
4542 *cost
+= rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed
);
4548 if (!CONST_INT_P (XEXP (x
, 1)))
4549 *cost
+= COSTS_N_INSNS (2);
4556 /* Shifting by a register often takes an extra cycle. */
4557 if (speed
&& !CONST_INT_P (XEXP (x
, 1)))
4558 *cost
+= extra_cost
->register_shift
;
4560 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed
);
4564 if (!CONSTANT_P (XEXP (x
, 0)))
4565 *cost
+= rtx_cost (XEXP (x
, 0), HIGH
, 0, speed
);
4569 if (!CONSTANT_P (XEXP (x
, 1)))
4570 *cost
+= rtx_cost (XEXP (x
, 1), LO_SUM
, 1, speed
);
4571 *cost
+= rtx_cost (XEXP (x
, 0), LO_SUM
, 0, speed
);
4576 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTRACT
, 0, speed
);
4583 *cost
= COSTS_N_INSNS (1);
4584 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4586 if (CONST_INT_P (op1
)
4587 && exact_log2 (INTVAL (op1
)) > 0)
4589 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
4593 if ((GET_CODE (op0
) == ZERO_EXTEND
4594 && GET_CODE (op1
) == ZERO_EXTEND
)
4595 || (GET_CODE (op0
) == SIGN_EXTEND
4596 && GET_CODE (op1
) == SIGN_EXTEND
))
4598 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4599 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
));
4601 *cost
+= extra_cost
->int_multiply_extend
;
4606 *cost
+= extra_cost
->int_multiply
;
4610 if (GET_MODE (x
) == DFmode
)
4611 *cost
+= extra_cost
->double_multiply
;
4612 else if (GET_MODE (x
) == SFmode
)
4613 *cost
+= extra_cost
->float_multiply
;
4616 return false; /* All arguments need to be in registers. */
4620 *cost
= COSTS_N_INSNS (2);
4623 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4624 *cost
+= (extra_cost
->int_multiply_add
4625 + extra_cost
->int_divide
);
4626 else if (GET_MODE (x
) == DFmode
)
4627 *cost
+= (extra_cost
->double_multiply
4628 + extra_cost
->double_divide
);
4629 else if (GET_MODE (x
) == SFmode
)
4630 *cost
+= (extra_cost
->float_multiply
4631 + extra_cost
->float_divide
);
4633 return false; /* All arguments need to be in registers. */
4637 *cost
= COSTS_N_INSNS (1);
4640 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4641 *cost
+= extra_cost
->int_divide
;
4642 else if (GET_MODE (x
) == DFmode
)
4643 *cost
+= extra_cost
->double_divide
;
4644 else if (GET_MODE (x
) == SFmode
)
4645 *cost
+= extra_cost
->float_divide
;
4647 return false; /* All arguments need to be in registers. */
4656 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED
,
4657 enum machine_mode mode ATTRIBUTE_UNUSED
,
4658 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
4660 enum rtx_code c
= GET_CODE (x
);
4661 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4663 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4664 return addr_cost
->pre_modify
;
4666 if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4667 return addr_cost
->post_modify
;
4671 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4672 return addr_cost
->imm_offset
;
4673 else if (GET_CODE (XEXP (x
, 0)) == MULT
4674 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
4675 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4676 return addr_cost
->register_extend
;
4678 return addr_cost
->register_offset
;
4680 else if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
4681 return addr_cost
->imm_offset
;
4687 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4688 reg_class_t from
, reg_class_t to
)
4690 const struct cpu_regmove_cost
*regmove_cost
4691 = aarch64_tune_params
->regmove_cost
;
4693 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
4694 return regmove_cost
->GP2GP
;
4695 else if (from
== GENERAL_REGS
)
4696 return regmove_cost
->GP2FP
;
4697 else if (to
== GENERAL_REGS
)
4698 return regmove_cost
->FP2GP
;
4700 /* When AdvSIMD instructions are disabled it is not possible to move
4701 a 128-bit value directly between Q registers. This is handled in
4702 secondary reload. A general register is used as a scratch to move
4703 the upper DI value and the lower DI value is moved directly,
4704 hence the cost is the sum of three moves. */
4706 if (! TARGET_SIMD
&& GET_MODE_SIZE (from
) == 128 && GET_MODE_SIZE (to
) == 128)
4707 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
4709 return regmove_cost
->FP2FP
;
4713 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4714 reg_class_t rclass ATTRIBUTE_UNUSED
,
4715 bool in ATTRIBUTE_UNUSED
)
4717 return aarch64_tune_params
->memmov_cost
;
4720 static void initialize_aarch64_code_model (void);
4722 /* Parse the architecture extension string. */
4725 aarch64_parse_extension (char *str
)
4727 /* The extension string is parsed left to right. */
4728 const struct aarch64_option_extension
*opt
= NULL
;
4730 /* Flag to say whether we are adding or removing an extension. */
4731 int adding_ext
= -1;
4733 while (str
!= NULL
&& *str
!= 0)
4739 ext
= strchr (str
, '+');
4746 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
4757 error ("missing feature modifier after %qs", "+no");
4761 /* Scan over the extensions table trying to find an exact match. */
4762 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
4764 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
4766 /* Add or remove the extension. */
4768 aarch64_isa_flags
|= opt
->flags_on
;
4770 aarch64_isa_flags
&= ~(opt
->flags_off
);
4775 if (opt
->name
== NULL
)
4777 /* Extension not found in list. */
4778 error ("unknown feature modifier %qs", str
);
4788 /* Parse the ARCH string. */
4791 aarch64_parse_arch (void)
4794 const struct processor
*arch
;
4795 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
4798 strcpy (str
, aarch64_arch_string
);
4800 ext
= strchr (str
, '+');
4809 error ("missing arch name in -march=%qs", str
);
4813 /* Loop through the list of supported ARCHs to find a match. */
4814 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
4816 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
4818 selected_arch
= arch
;
4819 aarch64_isa_flags
= selected_arch
->flags
;
4820 selected_cpu
= &all_cores
[selected_arch
->core
];
4824 /* ARCH string contains at least one extension. */
4825 aarch64_parse_extension (ext
);
4832 /* ARCH name not found in list. */
4833 error ("unknown value %qs for -march", str
);
4837 /* Parse the CPU string. */
4840 aarch64_parse_cpu (void)
4843 const struct processor
*cpu
;
4844 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
4847 strcpy (str
, aarch64_cpu_string
);
4849 ext
= strchr (str
, '+');
4858 error ("missing cpu name in -mcpu=%qs", str
);
4862 /* Loop through the list of supported CPUs to find a match. */
4863 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
4865 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
4868 aarch64_isa_flags
= selected_cpu
->flags
;
4872 /* CPU string contains at least one extension. */
4873 aarch64_parse_extension (ext
);
4880 /* CPU name not found in list. */
4881 error ("unknown value %qs for -mcpu", str
);
4885 /* Parse the TUNE string. */
4888 aarch64_parse_tune (void)
4890 const struct processor
*cpu
;
4891 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
4892 strcpy (str
, aarch64_tune_string
);
4894 /* Loop through the list of supported CPUs to find a match. */
4895 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
4897 if (strcmp (cpu
->name
, str
) == 0)
4899 selected_tune
= cpu
;
4904 /* CPU name not found in list. */
4905 error ("unknown value %qs for -mtune", str
);
4910 /* Implement TARGET_OPTION_OVERRIDE. */
4913 aarch64_override_options (void)
4915 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4916 otherwise march remains undefined. mtune can be used with either march or
4919 if (aarch64_arch_string
)
4921 aarch64_parse_arch ();
4922 aarch64_cpu_string
= NULL
;
4925 if (aarch64_cpu_string
)
4927 aarch64_parse_cpu ();
4928 selected_arch
= NULL
;
4931 if (aarch64_tune_string
)
4933 aarch64_parse_tune ();
4936 initialize_aarch64_code_model ();
4938 aarch64_build_bitmask_table ();
4940 /* This target defaults to strict volatile bitfields. */
4941 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
4942 flag_strict_volatile_bitfields
= 1;
4944 /* If the user did not specify a processor, choose the default
4945 one for them. This will be the CPU set during configuration using
4946 --with-cpu, otherwise it is "generic". */
4949 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
4950 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
4953 gcc_assert (selected_cpu
);
4955 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4957 selected_tune
= &all_cores
[selected_cpu
->core
];
4959 aarch64_tune_flags
= selected_tune
->flags
;
4960 aarch64_tune
= selected_tune
->core
;
4961 aarch64_tune_params
= selected_tune
->tune
;
4963 aarch64_override_options_after_change ();
4966 /* Implement targetm.override_options_after_change. */
4969 aarch64_override_options_after_change (void)
4971 faked_omit_frame_pointer
= false;
4973 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4974 that aarch64_frame_pointer_required will be called. We need to remember
4975 whether flag_omit_frame_pointer was turned on normally or just faked. */
4977 if (flag_omit_leaf_frame_pointer
&& !flag_omit_frame_pointer
)
4979 flag_omit_frame_pointer
= true;
4980 faked_omit_frame_pointer
= true;
4984 static struct machine_function
*
4985 aarch64_init_machine_status (void)
4987 struct machine_function
*machine
;
4988 machine
= ggc_alloc_cleared_machine_function ();
4993 aarch64_init_expanders (void)
4995 init_machine_status
= aarch64_init_machine_status
;
4998 /* A checking mechanism for the implementation of the various code models. */
5000 initialize_aarch64_code_model (void)
5004 switch (aarch64_cmodel_var
)
5006 case AARCH64_CMODEL_TINY
:
5007 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
5009 case AARCH64_CMODEL_SMALL
:
5010 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
5012 case AARCH64_CMODEL_LARGE
:
5013 sorry ("code model %qs with -f%s", "large",
5014 flag_pic
> 1 ? "PIC" : "pic");
5020 aarch64_cmodel
= aarch64_cmodel_var
;
5023 /* Return true if SYMBOL_REF X binds locally. */
5026 aarch64_symbol_binds_local_p (const_rtx x
)
5028 return (SYMBOL_REF_DECL (x
)
5029 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
5030 : SYMBOL_REF_LOCAL_P (x
));
5033 /* Return true if SYMBOL_REF X is thread local */
5035 aarch64_tls_symbol_p (rtx x
)
5037 if (! TARGET_HAVE_TLS
)
5040 if (GET_CODE (x
) != SYMBOL_REF
)
5043 return SYMBOL_REF_TLS_MODEL (x
) != 0;
5046 /* Classify a TLS symbol into one of the TLS kinds. */
5047 enum aarch64_symbol_type
5048 aarch64_classify_tls_symbol (rtx x
)
5050 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
5054 case TLS_MODEL_GLOBAL_DYNAMIC
:
5055 case TLS_MODEL_LOCAL_DYNAMIC
:
5056 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
5058 case TLS_MODEL_INITIAL_EXEC
:
5059 return SYMBOL_SMALL_GOTTPREL
;
5061 case TLS_MODEL_LOCAL_EXEC
:
5062 return SYMBOL_SMALL_TPREL
;
5064 case TLS_MODEL_EMULATED
:
5065 case TLS_MODEL_NONE
:
5066 return SYMBOL_FORCE_TO_MEM
;
5073 /* Return the method that should be used to access SYMBOL_REF or
5074 LABEL_REF X in context CONTEXT. */
5076 enum aarch64_symbol_type
5077 aarch64_classify_symbol (rtx x
,
5078 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
5080 if (GET_CODE (x
) == LABEL_REF
)
5082 switch (aarch64_cmodel
)
5084 case AARCH64_CMODEL_LARGE
:
5085 return SYMBOL_FORCE_TO_MEM
;
5087 case AARCH64_CMODEL_TINY_PIC
:
5088 case AARCH64_CMODEL_TINY
:
5089 return SYMBOL_TINY_ABSOLUTE
;
5091 case AARCH64_CMODEL_SMALL_PIC
:
5092 case AARCH64_CMODEL_SMALL
:
5093 return SYMBOL_SMALL_ABSOLUTE
;
5100 if (GET_CODE (x
) == SYMBOL_REF
)
5102 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
5103 || CONSTANT_POOL_ADDRESS_P (x
))
5104 return SYMBOL_FORCE_TO_MEM
;
5106 if (aarch64_tls_symbol_p (x
))
5107 return aarch64_classify_tls_symbol (x
);
5109 switch (aarch64_cmodel
)
5111 case AARCH64_CMODEL_TINY
:
5112 if (SYMBOL_REF_WEAK (x
))
5113 return SYMBOL_FORCE_TO_MEM
;
5114 return SYMBOL_TINY_ABSOLUTE
;
5116 case AARCH64_CMODEL_SMALL
:
5117 if (SYMBOL_REF_WEAK (x
))
5118 return SYMBOL_FORCE_TO_MEM
;
5119 return SYMBOL_SMALL_ABSOLUTE
;
5121 case AARCH64_CMODEL_TINY_PIC
:
5122 if (!aarch64_symbol_binds_local_p (x
))
5123 return SYMBOL_SMALL_GOT
;
5124 return SYMBOL_TINY_ABSOLUTE
;
5126 case AARCH64_CMODEL_SMALL_PIC
:
5127 if (!aarch64_symbol_binds_local_p (x
))
5128 return SYMBOL_SMALL_GOT
;
5129 return SYMBOL_SMALL_ABSOLUTE
;
5136 /* By default push everything into the constant pool. */
5137 return SYMBOL_FORCE_TO_MEM
;
5141 aarch64_constant_address_p (rtx x
)
5143 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
5147 aarch64_legitimate_pic_operand_p (rtx x
)
5149 if (GET_CODE (x
) == SYMBOL_REF
5150 || (GET_CODE (x
) == CONST
5151 && GET_CODE (XEXP (x
, 0)) == PLUS
5152 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5158 /* Return true if X holds either a quarter-precision or
5159 floating-point +0.0 constant. */
5161 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
5163 if (!CONST_DOUBLE_P (x
))
5166 /* TODO: We could handle moving 0.0 to a TFmode register,
5167 but first we would like to refactor the movtf_aarch64
5168 to be more amicable to split moves properly and
5169 correctly gate on TARGET_SIMD. For now - reject all
5170 constants which are not to SFmode or DFmode registers. */
5171 if (!(mode
== SFmode
|| mode
== DFmode
))
5174 if (aarch64_float_const_zero_rtx_p (x
))
5176 return aarch64_float_const_representable_p (x
);
5180 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
5182 /* Do not allow vector struct mode constants. We could support
5183 0 and -1 easily, but they need support in aarch64-simd.md. */
5184 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
5187 /* This could probably go away because
5188 we now decompose CONST_INTs according to expand_mov_immediate. */
5189 if ((GET_CODE (x
) == CONST_VECTOR
5190 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
5191 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
5192 return !targetm
.cannot_force_const_mem (mode
, x
);
5194 if (GET_CODE (x
) == HIGH
5195 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
5198 return aarch64_constant_address_p (x
);
5202 aarch64_load_tp (rtx target
)
5205 || GET_MODE (target
) != Pmode
5206 || !register_operand (target
, Pmode
))
5207 target
= gen_reg_rtx (Pmode
);
5209 /* Can return in any reg. */
5210 emit_insn (gen_aarch64_load_tp_hard (target
));
5214 /* On AAPCS systems, this is the "struct __va_list". */
5215 static GTY(()) tree va_list_type
;
5217 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5218 Return the type to use as __builtin_va_list.
5220 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5232 aarch64_build_builtin_va_list (void)
5235 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5237 /* Create the type. */
5238 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
5239 /* Give it the required name. */
5240 va_list_name
= build_decl (BUILTINS_LOCATION
,
5242 get_identifier ("__va_list"),
5244 DECL_ARTIFICIAL (va_list_name
) = 1;
5245 TYPE_NAME (va_list_type
) = va_list_name
;
5246 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
5248 /* Create the fields. */
5249 f_stack
= build_decl (BUILTINS_LOCATION
,
5250 FIELD_DECL
, get_identifier ("__stack"),
5252 f_grtop
= build_decl (BUILTINS_LOCATION
,
5253 FIELD_DECL
, get_identifier ("__gr_top"),
5255 f_vrtop
= build_decl (BUILTINS_LOCATION
,
5256 FIELD_DECL
, get_identifier ("__vr_top"),
5258 f_groff
= build_decl (BUILTINS_LOCATION
,
5259 FIELD_DECL
, get_identifier ("__gr_offs"),
5261 f_vroff
= build_decl (BUILTINS_LOCATION
,
5262 FIELD_DECL
, get_identifier ("__vr_offs"),
5265 DECL_ARTIFICIAL (f_stack
) = 1;
5266 DECL_ARTIFICIAL (f_grtop
) = 1;
5267 DECL_ARTIFICIAL (f_vrtop
) = 1;
5268 DECL_ARTIFICIAL (f_groff
) = 1;
5269 DECL_ARTIFICIAL (f_vroff
) = 1;
5271 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
5272 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
5273 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
5274 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
5275 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
5277 TYPE_FIELDS (va_list_type
) = f_stack
;
5278 DECL_CHAIN (f_stack
) = f_grtop
;
5279 DECL_CHAIN (f_grtop
) = f_vrtop
;
5280 DECL_CHAIN (f_vrtop
) = f_groff
;
5281 DECL_CHAIN (f_groff
) = f_vroff
;
5283 /* Compute its layout. */
5284 layout_type (va_list_type
);
5286 return va_list_type
;
5289 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5291 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
5293 const CUMULATIVE_ARGS
*cum
;
5294 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5295 tree stack
, grtop
, vrtop
, groff
, vroff
;
5297 int gr_save_area_size
;
5298 int vr_save_area_size
;
5301 cum
= &crtl
->args
.info
;
5303 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
5305 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
5307 if (TARGET_GENERAL_REGS_ONLY
)
5309 if (cum
->aapcs_nvrn
> 0)
5310 sorry ("%qs and floating point or vector arguments",
5311 "-mgeneral-regs-only");
5312 vr_save_area_size
= 0;
5315 f_stack
= TYPE_FIELDS (va_list_type_node
);
5316 f_grtop
= DECL_CHAIN (f_stack
);
5317 f_vrtop
= DECL_CHAIN (f_grtop
);
5318 f_groff
= DECL_CHAIN (f_vrtop
);
5319 f_vroff
= DECL_CHAIN (f_groff
);
5321 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
5323 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
5325 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
5327 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
5329 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
5332 /* Emit code to initialize STACK, which points to the next varargs stack
5333 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5334 by named arguments. STACK is 8-byte aligned. */
5335 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
5336 if (cum
->aapcs_stack_size
> 0)
5337 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
5338 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
5339 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5341 /* Emit code to initialize GRTOP, the top of the GR save area.
5342 virtual_incoming_args_rtx should have been 16 byte aligned. */
5343 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
5344 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
5345 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5347 /* Emit code to initialize VRTOP, the top of the VR save area.
5348 This address is gr_save_area_bytes below GRTOP, rounded
5349 down to the next 16-byte boundary. */
5350 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
5351 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
5352 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5355 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
5356 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
5357 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5359 /* Emit code to initialize GROFF, the offset from GRTOP of the
5360 next GPR argument. */
5361 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
5362 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
5363 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5365 /* Likewise emit code to initialize VROFF, the offset from FTOP
5366 of the next VR argument. */
5367 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
5368 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
5369 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5372 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5375 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
5376 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
5380 bool is_ha
; /* is HFA or HVA. */
5381 bool dw_align
; /* double-word align. */
5382 enum machine_mode ag_mode
= VOIDmode
;
5384 enum machine_mode mode
;
5386 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5387 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
5388 HOST_WIDE_INT size
, rsize
, adjust
, align
;
5389 tree t
, u
, cond1
, cond2
;
5391 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5393 type
= build_pointer_type (type
);
5395 mode
= TYPE_MODE (type
);
5397 f_stack
= TYPE_FIELDS (va_list_type_node
);
5398 f_grtop
= DECL_CHAIN (f_stack
);
5399 f_vrtop
= DECL_CHAIN (f_grtop
);
5400 f_groff
= DECL_CHAIN (f_vrtop
);
5401 f_vroff
= DECL_CHAIN (f_groff
);
5403 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
5404 f_stack
, NULL_TREE
);
5405 size
= int_size_in_bytes (type
);
5406 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
5410 if (aarch64_vfp_is_call_or_return_candidate (mode
,
5416 /* TYPE passed in fp/simd registers. */
5417 if (TARGET_GENERAL_REGS_ONLY
)
5418 sorry ("%qs and floating point or vector arguments",
5419 "-mgeneral-regs-only");
5421 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
5422 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
5423 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
5424 unshare_expr (valist
), f_vroff
, NULL_TREE
);
5426 rsize
= nregs
* UNITS_PER_VREG
;
5430 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
5431 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
5433 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5434 && size
< UNITS_PER_VREG
)
5436 adjust
= UNITS_PER_VREG
- size
;
5441 /* TYPE passed in general registers. */
5442 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
5443 unshare_expr (valist
), f_grtop
, NULL_TREE
);
5444 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
5445 unshare_expr (valist
), f_groff
, NULL_TREE
);
5446 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
5447 nregs
= rsize
/ UNITS_PER_WORD
;
5452 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5453 && size
< UNITS_PER_WORD
)
5455 adjust
= UNITS_PER_WORD
- size
;
5459 /* Get a local temporary for the field value. */
5460 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
5462 /* Emit code to branch if off >= 0. */
5463 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
5464 build_int_cst (TREE_TYPE (off
), 0));
5465 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
5469 /* Emit: offs = (offs + 15) & -16. */
5470 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5471 build_int_cst (TREE_TYPE (off
), 15));
5472 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
5473 build_int_cst (TREE_TYPE (off
), -16));
5474 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
5479 /* Update ap.__[g|v]r_offs */
5480 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5481 build_int_cst (TREE_TYPE (off
), rsize
));
5482 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
5486 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5488 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5489 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
5490 build_int_cst (TREE_TYPE (f_off
), 0));
5491 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
5493 /* String up: make sure the assignment happens before the use. */
5494 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
5495 COND_EXPR_ELSE (cond1
) = t
;
5497 /* Prepare the trees handling the argument that is passed on the stack;
5498 the top level node will store in ON_STACK. */
5499 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
5502 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5503 t
= fold_convert (intDI_type_node
, arg
);
5504 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5505 build_int_cst (TREE_TYPE (t
), 15));
5506 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5507 build_int_cst (TREE_TYPE (t
), -16));
5508 t
= fold_convert (TREE_TYPE (arg
), t
);
5509 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
5513 /* Advance ap.__stack */
5514 t
= fold_convert (intDI_type_node
, arg
);
5515 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5516 build_int_cst (TREE_TYPE (t
), size
+ 7));
5517 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5518 build_int_cst (TREE_TYPE (t
), -8));
5519 t
= fold_convert (TREE_TYPE (arg
), t
);
5520 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
5521 /* String up roundup and advance. */
5523 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5524 /* String up with arg */
5525 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
5526 /* Big-endianness related address adjustment. */
5527 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5528 && size
< UNITS_PER_WORD
)
5530 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
5531 size_int (UNITS_PER_WORD
- size
));
5532 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
5535 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
5536 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
5538 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5541 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
5542 build_int_cst (TREE_TYPE (off
), adjust
));
5544 t
= fold_convert (sizetype
, t
);
5545 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
5549 /* type ha; // treat as "struct {ftype field[n];}"
5550 ... [computing offs]
5551 for (i = 0; i <nregs; ++i, offs += 16)
5552 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5555 tree tmp_ha
, field_t
, field_ptr_t
;
5557 /* Declare a local variable. */
5558 tmp_ha
= create_tmp_var_raw (type
, "ha");
5559 gimple_add_tmp_var (tmp_ha
);
5561 /* Establish the base type. */
5565 field_t
= float_type_node
;
5566 field_ptr_t
= float_ptr_type_node
;
5569 field_t
= double_type_node
;
5570 field_ptr_t
= double_ptr_type_node
;
5573 field_t
= long_double_type_node
;
5574 field_ptr_t
= long_double_ptr_type_node
;
5576 /* The half precision and quad precision are not fully supported yet. Enable
5577 the following code after the support is complete. Need to find the correct
5578 type node for __fp16 *. */
5581 field_t
= float_type_node
;
5582 field_ptr_t
= float_ptr_type_node
;
5588 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
5589 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
5590 field_ptr_t
= build_pointer_type (field_t
);
5597 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5598 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
5600 t
= fold_convert (field_ptr_t
, addr
);
5601 t
= build2 (MODIFY_EXPR
, field_t
,
5602 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
5603 build1 (INDIRECT_REF
, field_t
, t
));
5605 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5606 for (i
= 1; i
< nregs
; ++i
)
5608 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
5609 u
= fold_convert (field_ptr_t
, addr
);
5610 u
= build2 (MODIFY_EXPR
, field_t
,
5611 build2 (MEM_REF
, field_t
, tmp_ha
,
5612 build_int_cst (field_ptr_t
,
5614 int_size_in_bytes (field_t
)))),
5615 build1 (INDIRECT_REF
, field_t
, u
));
5616 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
5619 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
5620 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
5623 COND_EXPR_ELSE (cond2
) = t
;
5624 addr
= fold_convert (build_pointer_type (type
), cond1
);
5625 addr
= build_va_arg_indirect_ref (addr
);
5628 addr
= build_va_arg_indirect_ref (addr
);
5633 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5636 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
5637 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
5640 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
5641 CUMULATIVE_ARGS local_cum
;
5642 int gr_saved
, vr_saved
;
5644 /* The caller has advanced CUM up to, but not beyond, the last named
5645 argument. Advance a local copy of CUM past the last "real" named
5646 argument, to find out how many registers are left over. */
5648 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
5650 /* Found out how many registers we need to save. */
5651 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
5652 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
5654 if (TARGET_GENERAL_REGS_ONLY
)
5656 if (local_cum
.aapcs_nvrn
> 0)
5657 sorry ("%qs and floating point or vector arguments",
5658 "-mgeneral-regs-only");
5668 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5669 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
5670 - gr_saved
* UNITS_PER_WORD
);
5671 mem
= gen_frame_mem (BLKmode
, ptr
);
5672 set_mem_alias_set (mem
, get_varargs_alias_set ());
5674 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
5679 /* We can't use move_block_from_reg, because it will use
5680 the wrong mode, storing D regs only. */
5681 enum machine_mode mode
= TImode
;
5684 /* Set OFF to the offset from virtual_incoming_args_rtx of
5685 the first vector register. The VR save area lies below
5686 the GR one, and is aligned to 16 bytes. */
5687 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5688 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5689 off
-= vr_saved
* UNITS_PER_VREG
;
5691 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
5695 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
5696 mem
= gen_frame_mem (mode
, ptr
);
5697 set_mem_alias_set (mem
, get_varargs_alias_set ());
5698 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
5699 off
+= UNITS_PER_VREG
;
5704 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5705 any complication of having crtl->args.pretend_args_size changed. */
5706 cfun
->machine
->saved_varargs_size
5707 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5708 STACK_BOUNDARY
/ BITS_PER_UNIT
)
5709 + vr_saved
* UNITS_PER_VREG
);
5713 aarch64_conditional_register_usage (void)
5718 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
5721 call_used_regs
[i
] = 1;
5726 /* Walk down the type tree of TYPE counting consecutive base elements.
5727 If *MODEP is VOIDmode, then set it to the first valid floating point
5728 type. If a non-floating point type is found, or if a floating point
5729 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5730 otherwise return the count in the sub-tree. */
5732 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5734 enum machine_mode mode
;
5737 switch (TREE_CODE (type
))
5740 mode
= TYPE_MODE (type
);
5741 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5744 if (*modep
== VOIDmode
)
5753 mode
= TYPE_MODE (TREE_TYPE (type
));
5754 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5757 if (*modep
== VOIDmode
)
5766 /* Use V2SImode and V4SImode as representatives of all 64-bit
5767 and 128-bit vector types. */
5768 size
= int_size_in_bytes (type
);
5781 if (*modep
== VOIDmode
)
5784 /* Vector modes are considered to be opaque: two vectors are
5785 equivalent for the purposes of being homogeneous aggregates
5786 if they are the same size. */
5795 tree index
= TYPE_DOMAIN (type
);
5797 /* Can't handle incomplete types. */
5798 if (!COMPLETE_TYPE_P (type
))
5801 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5804 || !TYPE_MAX_VALUE (index
)
5805 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
5806 || !TYPE_MIN_VALUE (index
)
5807 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
5811 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
5812 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
5814 /* There must be no padding. */
5815 if (!host_integerp (TYPE_SIZE (type
), 1)
5816 || (tree_low_cst (TYPE_SIZE (type
), 1)
5817 != count
* GET_MODE_BITSIZE (*modep
)))
5829 /* Can't handle incomplete types. */
5830 if (!COMPLETE_TYPE_P (type
))
5833 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
5835 if (TREE_CODE (field
) != FIELD_DECL
)
5838 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5844 /* There must be no padding. */
5845 if (!host_integerp (TYPE_SIZE (type
), 1)
5846 || (tree_low_cst (TYPE_SIZE (type
), 1)
5847 != count
* GET_MODE_BITSIZE (*modep
)))
5854 case QUAL_UNION_TYPE
:
5856 /* These aren't very interesting except in a degenerate case. */
5861 /* Can't handle incomplete types. */
5862 if (!COMPLETE_TYPE_P (type
))
5865 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
5867 if (TREE_CODE (field
) != FIELD_DECL
)
5870 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5873 count
= count
> sub_count
? count
: sub_count
;
5876 /* There must be no padding. */
5877 if (!host_integerp (TYPE_SIZE (type
), 1)
5878 || (tree_low_cst (TYPE_SIZE (type
), 1)
5879 != count
* GET_MODE_BITSIZE (*modep
)))
5892 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5893 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5894 array types. The C99 floating-point complex types are also considered
5895 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5896 types, which are GCC extensions and out of the scope of AAPCS64, are
5897 treated as composite types here as well.
5899 Note that MODE itself is not sufficient in determining whether a type
5900 is such a composite type or not. This is because
5901 stor-layout.c:compute_record_mode may have already changed the MODE
5902 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5903 structure with only one field may have its MODE set to the mode of the
5904 field. Also an integer mode whose size matches the size of the
5905 RECORD_TYPE type may be used to substitute the original mode
5906 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5907 solely relied on. */
5910 aarch64_composite_type_p (const_tree type
,
5911 enum machine_mode mode
)
5913 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
5917 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
5918 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
5924 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5925 type as described in AAPCS64 \S 4.1.2.
5927 See the comment above aarch64_composite_type_p for the notes on MODE. */
5930 aarch64_short_vector_p (const_tree type
,
5931 enum machine_mode mode
)
5933 HOST_WIDE_INT size
= -1;
5935 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
5936 size
= int_size_in_bytes (type
);
5937 else if (!aarch64_composite_type_p (type
, mode
)
5938 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
5939 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
5940 size
= GET_MODE_SIZE (mode
);
5942 return (size
== 8 || size
== 16) ? true : false;
5945 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5946 shall be passed or returned in simd/fp register(s) (providing these
5947 parameter passing registers are available).
5949 Upon successful return, *COUNT returns the number of needed registers,
5950 *BASE_MODE returns the mode of the individual register and when IS_HAF
5951 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5952 floating-point aggregate or a homogeneous short-vector aggregate. */
5955 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
5957 enum machine_mode
*base_mode
,
5961 enum machine_mode new_mode
= VOIDmode
;
5962 bool composite_p
= aarch64_composite_type_p (type
, mode
);
5964 if (is_ha
!= NULL
) *is_ha
= false;
5966 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5967 || aarch64_short_vector_p (type
, mode
))
5972 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
5974 if (is_ha
!= NULL
) *is_ha
= true;
5976 new_mode
= GET_MODE_INNER (mode
);
5978 else if (type
&& composite_p
)
5980 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
5982 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
5984 if (is_ha
!= NULL
) *is_ha
= true;
5993 *base_mode
= new_mode
;
5997 /* Implement TARGET_STRUCT_VALUE_RTX. */
6000 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
6001 int incoming ATTRIBUTE_UNUSED
)
6003 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
6006 /* Implements target hook vector_mode_supported_p. */
6008 aarch64_vector_mode_supported_p (enum machine_mode mode
)
6011 && (mode
== V4SImode
|| mode
== V8HImode
6012 || mode
== V16QImode
|| mode
== V2DImode
6013 || mode
== V2SImode
|| mode
== V4HImode
6014 || mode
== V8QImode
|| mode
== V2SFmode
6015 || mode
== V4SFmode
|| mode
== V2DFmode
))
6021 /* Return appropriate SIMD container
6022 for MODE within a vector of WIDTH bits. */
6023 static enum machine_mode
6024 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
6026 gcc_assert (width
== 64 || width
== 128);
6065 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6066 static enum machine_mode
6067 aarch64_preferred_simd_mode (enum machine_mode mode
)
6069 return aarch64_simd_container_mode (mode
, 128);
6072 /* Return the bitmask of possible vector sizes for the vectorizer
6075 aarch64_autovectorize_vector_sizes (void)
6080 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6081 vector types in order to conform to the AAPCS64 (see "Procedure
6082 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6083 qualify for emission with the mangled names defined in that document,
6084 a vector type must not only be of the correct mode but also be
6085 composed of AdvSIMD vector element types (e.g.
6086 _builtin_aarch64_simd_qi); these types are registered by
6087 aarch64_init_simd_builtins (). In other words, vector types defined
6088 in other ways e.g. via vector_size attribute will get default
6092 enum machine_mode mode
;
6093 const char *element_type_name
;
6094 const char *mangled_name
;
6095 } aarch64_simd_mangle_map_entry
;
6097 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
6098 /* 64-bit containerized types. */
6099 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6100 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6101 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6102 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6103 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6104 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6105 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6106 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6107 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6108 /* 128-bit containerized types. */
6109 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6110 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6111 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6112 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6113 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6114 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6115 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6116 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6117 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6118 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6119 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6120 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6121 { VOIDmode
, NULL
, NULL
}
6124 /* Implement TARGET_MANGLE_TYPE. */
6127 aarch64_mangle_type (const_tree type
)
6129 /* The AArch64 ABI documents say that "__va_list" has to be
6130 managled as if it is in the "std" namespace. */
6131 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
6132 return "St9__va_list";
6134 /* Check the mode of the vector type, and the name of the vector
6135 element type, against the table. */
6136 if (TREE_CODE (type
) == VECTOR_TYPE
)
6138 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
6140 while (pos
->mode
!= VOIDmode
)
6142 tree elt_type
= TREE_TYPE (type
);
6144 if (pos
->mode
== TYPE_MODE (type
)
6145 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
6146 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
6147 pos
->element_type_name
))
6148 return pos
->mangled_name
;
6154 /* Use the default mangling. */
6158 /* Return the equivalent letter for size. */
6160 sizetochar (int size
)
6164 case 64: return 'd';
6165 case 32: return 's';
6166 case 16: return 'h';
6167 case 8 : return 'b';
6168 default: gcc_unreachable ();
6172 /* Return true iff x is a uniform vector of floating-point
6173 constants, and the constant can be represented in
6174 quarter-precision form. Note, as aarch64_float_const_representable
6175 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6177 aarch64_vect_float_const_representable_p (rtx x
)
6180 REAL_VALUE_TYPE r0
, ri
;
6183 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
6186 x0
= CONST_VECTOR_ELT (x
, 0);
6187 if (!CONST_DOUBLE_P (x0
))
6190 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
6192 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
6194 xi
= CONST_VECTOR_ELT (x
, i
);
6195 if (!CONST_DOUBLE_P (xi
))
6198 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
6199 if (!REAL_VALUES_EQUAL (r0
, ri
))
6203 return aarch64_float_const_representable_p (x0
);
6206 /* Return true for valid and false for invalid. */
6208 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
6209 struct simd_immediate_info
*info
)
6211 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6213 for (i = 0; i < idx; i += (STRIDE)) \
6218 immtype = (CLASS); \
6219 elsize = (ELSIZE); \
6225 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
6226 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
6227 unsigned char bytes
[16];
6228 int immtype
= -1, matches
;
6229 unsigned int invmask
= inverse
? 0xff : 0;
6232 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6234 if (! (aarch64_simd_imm_zero_p (op
, mode
)
6235 || aarch64_vect_float_const_representable_p (op
)))
6240 info
->value
= CONST_VECTOR_ELT (op
, 0);
6241 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
6249 /* Splat vector constant out into a byte vector. */
6250 for (i
= 0; i
< n_elts
; i
++)
6252 rtx el
= CONST_VECTOR_ELT (op
, i
);
6253 unsigned HOST_WIDE_INT elpart
;
6254 unsigned int part
, parts
;
6256 if (GET_CODE (el
) == CONST_INT
)
6258 elpart
= INTVAL (el
);
6261 else if (GET_CODE (el
) == CONST_DOUBLE
)
6263 elpart
= CONST_DOUBLE_LOW (el
);
6269 for (part
= 0; part
< parts
; part
++)
6272 for (byte
= 0; byte
< innersize
; byte
++)
6274 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
6275 elpart
>>= BITS_PER_UNIT
;
6277 if (GET_CODE (el
) == CONST_DOUBLE
)
6278 elpart
= CONST_DOUBLE_HIGH (el
);
6283 gcc_assert (idx
== GET_MODE_SIZE (mode
));
6287 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
6288 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
6290 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6291 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6293 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6294 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6296 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6297 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
6299 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
6301 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
6303 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
6304 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
6306 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6307 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6309 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6310 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6312 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6313 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
6315 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
6317 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
6319 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6320 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
6322 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6323 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
6325 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6326 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 0, 0);
6328 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6329 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 0, 1);
6331 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
6333 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
6334 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
6338 /* TODO: Currently the assembler cannot handle types 12 to 15.
6339 And there is no way to specify cmode through the compiler.
6340 Disable them till there is support in the assembler. */
6342 || (immtype
>= 12 && immtype
<= 15)
6348 info
->element_width
= elsize
;
6349 info
->mvn
= emvn
!= 0;
6350 info
->shift
= eshift
;
6352 unsigned HOST_WIDE_INT imm
= 0;
6354 /* Un-invert bytes of recognized vector, if necessary. */
6356 for (i
= 0; i
< idx
; i
++)
6357 bytes
[i
] ^= invmask
;
6361 /* FIXME: Broken on 32-bit H_W_I hosts. */
6362 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
6364 for (i
= 0; i
< 8; i
++)
6365 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
6366 << (i
* BITS_PER_UNIT
);
6369 info
->value
= GEN_INT (imm
);
6373 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
6374 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
6376 /* Construct 'abcdefgh' because the assembler cannot handle
6377 generic constants. */
6380 imm
= (imm
>> info
->shift
) & 0xff;
6381 info
->value
= GEN_INT (imm
);
6390 aarch64_const_vec_all_same_int_p (rtx x
,
6391 HOST_WIDE_INT minval
,
6392 HOST_WIDE_INT maxval
)
6394 HOST_WIDE_INT firstval
;
6397 if (GET_CODE (x
) != CONST_VECTOR
6398 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
6401 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
6402 if (firstval
< minval
|| firstval
> maxval
)
6405 count
= CONST_VECTOR_NUNITS (x
);
6406 for (i
= 1; i
< count
; i
++)
6407 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
6413 /* Check of immediate shift constants are within range. */
6415 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
6417 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
6419 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
6421 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
6424 /* Return true if X is a uniform vector where all elements
6425 are either the floating-point constant 0.0 or the
6426 integer constant 0. */
6428 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
6430 return x
== CONST0_RTX (mode
);
6434 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
6436 HOST_WIDE_INT imm
= INTVAL (x
);
6439 for (i
= 0; i
< 8; i
++)
6441 unsigned int byte
= imm
& 0xff;
6442 if (byte
!= 0xff && byte
!= 0)
6451 aarch64_mov_operand_p (rtx x
,
6452 enum aarch64_symbol_context context
,
6453 enum machine_mode mode
)
6455 if (GET_CODE (x
) == HIGH
6456 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6459 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
6462 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
6465 return aarch64_classify_symbolic_expression (x
, context
)
6466 == SYMBOL_TINY_ABSOLUTE
;
6469 /* Return a const_int vector of VAL. */
6471 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
6473 int nunits
= GET_MODE_NUNITS (mode
);
6474 rtvec v
= rtvec_alloc (nunits
);
6477 for (i
=0; i
< nunits
; i
++)
6478 RTVEC_ELT (v
, i
) = GEN_INT (val
);
6480 return gen_rtx_CONST_VECTOR (mode
, v
);
6483 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6486 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
6488 enum machine_mode vmode
;
6490 gcc_assert (!VECTOR_MODE_P (mode
));
6491 vmode
= aarch64_preferred_simd_mode (mode
);
6492 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
6493 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
6496 /* Construct and return a PARALLEL RTX vector. */
6498 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
6500 int nunits
= GET_MODE_NUNITS (mode
);
6501 rtvec v
= rtvec_alloc (nunits
/ 2);
6502 int base
= high
? nunits
/ 2 : 0;
6506 for (i
=0; i
< nunits
/ 2; i
++)
6507 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
6509 t1
= gen_rtx_PARALLEL (mode
, v
);
6513 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6514 HIGH (exclusive). */
6516 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6519 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6520 lane
= INTVAL (operand
);
6522 if (lane
< low
|| lane
>= high
)
6523 error ("lane out of range");
6527 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6529 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6530 HOST_WIDE_INT lane
= INTVAL (operand
);
6532 if (lane
< low
|| lane
>= high
)
6533 error ("constant out of range");
6536 /* Emit code to reinterpret one AdvSIMD type as another,
6537 without altering bits. */
6539 aarch64_simd_reinterpret (rtx dest
, rtx src
)
6541 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
6544 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6547 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
6548 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
6551 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
6552 rtx tmp1
= gen_reg_rtx (mode
);
6553 rtx tmp2
= gen_reg_rtx (mode
);
6555 emit_insn (intfn (tmp1
, op1
, tmp2
));
6557 emit_move_insn (mem
, tmp1
);
6558 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
6559 emit_move_insn (mem
, tmp2
);
6562 /* Return TRUE if OP is a valid vector addressing mode. */
6564 aarch64_simd_mem_operand_p (rtx op
)
6566 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
6567 || GET_CODE (XEXP (op
, 0)) == REG
);
6570 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6571 not to early-clobber SRC registers in the process.
6573 We assume that the operands described by SRC and DEST represent a
6574 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6575 number of components into which the copy has been decomposed. */
6577 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
6578 rtx
*src
, unsigned int count
)
6582 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
6583 || REGNO (operands
[0]) < REGNO (operands
[1]))
6585 for (i
= 0; i
< count
; i
++)
6587 operands
[2 * i
] = dest
[i
];
6588 operands
[2 * i
+ 1] = src
[i
];
6593 for (i
= 0; i
< count
; i
++)
6595 operands
[2 * i
] = dest
[count
- i
- 1];
6596 operands
[2 * i
+ 1] = src
[count
- i
- 1];
6601 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6602 one of VSTRUCT modes: OI, CI or XI. */
6604 aarch64_simd_attr_length_move (rtx insn
)
6606 enum machine_mode mode
;
6608 extract_insn_cached (insn
);
6610 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
6612 mode
= GET_MODE (recog_data
.operand
[0]);
6628 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6629 alignment of a vector to 128 bits. */
6630 static HOST_WIDE_INT
6631 aarch64_simd_vector_alignment (const_tree type
)
6633 HOST_WIDE_INT align
= tree_low_cst (TYPE_SIZE (type
), 0);
6634 return MIN (align
, 128);
6637 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6639 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
6644 /* We guarantee alignment for vectors up to 128-bits. */
6645 if (tree_int_cst_compare (TYPE_SIZE (type
),
6646 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
6649 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6653 /* If VALS is a vector constant that can be loaded into a register
6654 using DUP, generate instructions to do so and return an RTX to
6655 assign to the register. Otherwise return NULL_RTX. */
6657 aarch64_simd_dup_constant (rtx vals
)
6659 enum machine_mode mode
= GET_MODE (vals
);
6660 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6661 int n_elts
= GET_MODE_NUNITS (mode
);
6662 bool all_same
= true;
6666 if (GET_CODE (vals
) != CONST_VECTOR
)
6669 for (i
= 1; i
< n_elts
; ++i
)
6671 x
= CONST_VECTOR_ELT (vals
, i
);
6672 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
6679 /* We can load this constant by using DUP and a constant in a
6680 single ARM register. This will be cheaper than a vector
6682 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
6683 return gen_rtx_VEC_DUPLICATE (mode
, x
);
6687 /* Generate code to load VALS, which is a PARALLEL containing only
6688 constants (for vec_init) or CONST_VECTOR, efficiently into a
6689 register. Returns an RTX to copy into the register, or NULL_RTX
6690 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6692 aarch64_simd_make_constant (rtx vals
)
6694 enum machine_mode mode
= GET_MODE (vals
);
6696 rtx const_vec
= NULL_RTX
;
6697 int n_elts
= GET_MODE_NUNITS (mode
);
6701 if (GET_CODE (vals
) == CONST_VECTOR
)
6703 else if (GET_CODE (vals
) == PARALLEL
)
6705 /* A CONST_VECTOR must contain only CONST_INTs and
6706 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6707 Only store valid constants in a CONST_VECTOR. */
6708 for (i
= 0; i
< n_elts
; ++i
)
6710 rtx x
= XVECEXP (vals
, 0, i
);
6711 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
6714 if (n_const
== n_elts
)
6715 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6720 if (const_vec
!= NULL_RTX
6721 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
6722 /* Load using MOVI/MVNI. */
6724 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
6725 /* Loaded using DUP. */
6727 else if (const_vec
!= NULL_RTX
)
6728 /* Load from constant pool. We can not take advantage of single-cycle
6729 LD1 because we need a PC-relative addressing mode. */
6732 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6733 We can not construct an initializer. */
6738 aarch64_expand_vector_init (rtx target
, rtx vals
)
6740 enum machine_mode mode
= GET_MODE (target
);
6741 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6742 int n_elts
= GET_MODE_NUNITS (mode
);
6743 int n_var
= 0, one_var
= -1;
6744 bool all_same
= true;
6748 x
= XVECEXP (vals
, 0, 0);
6749 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
6750 n_var
= 1, one_var
= 0;
6752 for (i
= 1; i
< n_elts
; ++i
)
6754 x
= XVECEXP (vals
, 0, i
);
6755 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
6756 ++n_var
, one_var
= i
;
6758 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6764 rtx constant
= aarch64_simd_make_constant (vals
);
6765 if (constant
!= NULL_RTX
)
6767 emit_move_insn (target
, constant
);
6772 /* Splat a single non-constant element if we can. */
6775 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
6776 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
6780 /* One field is non-constant. Load constant then overwrite varying
6781 field. This is more efficient than using the stack. */
6784 rtx copy
= copy_rtx (vals
);
6785 rtx index
= GEN_INT (one_var
);
6786 enum insn_code icode
;
6788 /* Load constant part of vector, substitute neighboring value for
6790 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
6791 aarch64_expand_vector_init (target
, copy
);
6793 /* Insert variable. */
6794 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
6795 icode
= optab_handler (vec_set_optab
, mode
);
6796 gcc_assert (icode
!= CODE_FOR_nothing
);
6797 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
6801 /* Construct the vector in memory one field at a time
6802 and load the whole vector. */
6803 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
6804 for (i
= 0; i
< n_elts
; i
++)
6805 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
6806 i
* GET_MODE_SIZE (inner_mode
)),
6807 XVECEXP (vals
, 0, i
));
6808 emit_move_insn (target
, mem
);
6812 static unsigned HOST_WIDE_INT
6813 aarch64_shift_truncation_mask (enum machine_mode mode
)
6816 (aarch64_vector_mode_supported_p (mode
)
6817 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
6820 #ifndef TLS_SECTION_ASM_FLAG
6821 #define TLS_SECTION_ASM_FLAG 'T'
6825 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
6826 tree decl ATTRIBUTE_UNUSED
)
6828 char flagchars
[10], *f
= flagchars
;
6830 /* If we have already declared this section, we can use an
6831 abbreviated form to switch back to it -- unless this section is
6832 part of a COMDAT groups, in which case GAS requires the full
6833 declaration every time. */
6834 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
6835 && (flags
& SECTION_DECLARED
))
6837 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
6841 if (!(flags
& SECTION_DEBUG
))
6843 if (flags
& SECTION_WRITE
)
6845 if (flags
& SECTION_CODE
)
6847 if (flags
& SECTION_SMALL
)
6849 if (flags
& SECTION_MERGE
)
6851 if (flags
& SECTION_STRINGS
)
6853 if (flags
& SECTION_TLS
)
6854 *f
++ = TLS_SECTION_ASM_FLAG
;
6855 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
6859 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
6861 if (!(flags
& SECTION_NOTYPE
))
6866 if (flags
& SECTION_BSS
)
6871 #ifdef TYPE_OPERAND_FMT
6872 format
= "," TYPE_OPERAND_FMT
;
6877 fprintf (asm_out_file
, format
, type
);
6879 if (flags
& SECTION_ENTSIZE
)
6880 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
6881 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
6883 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
6884 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
6886 fprintf (asm_out_file
, ",%s,comdat",
6887 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
6891 putc ('\n', asm_out_file
);
6894 /* Select a format to encode pointers in exception handling data. */
6896 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
6899 switch (aarch64_cmodel
)
6901 case AARCH64_CMODEL_TINY
:
6902 case AARCH64_CMODEL_TINY_PIC
:
6903 case AARCH64_CMODEL_SMALL
:
6904 case AARCH64_CMODEL_SMALL_PIC
:
6905 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6907 type
= DW_EH_PE_sdata4
;
6910 /* No assumptions here. 8-byte relocs required. */
6911 type
= DW_EH_PE_sdata8
;
6914 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
6917 /* Emit load exclusive. */
6920 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
6921 rtx mem
, rtx model_rtx
)
6923 rtx (*gen
) (rtx
, rtx
, rtx
);
6927 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
6928 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
6929 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
6930 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
6935 emit_insn (gen (rval
, mem
, model_rtx
));
6938 /* Emit store exclusive. */
6941 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
6942 rtx rval
, rtx mem
, rtx model_rtx
)
6944 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
6948 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
6949 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
6950 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
6951 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
6956 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
6959 /* Mark the previous jump instruction as unlikely. */
6962 aarch64_emit_unlikely_jump (rtx insn
)
6964 rtx very_unlikely
= GEN_INT (REG_BR_PROB_BASE
/ 100 - 1);
6966 insn
= emit_jump_insn (insn
);
6967 add_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
6970 /* Expand a compare and swap pattern. */
6973 aarch64_expand_compare_and_swap (rtx operands
[])
6975 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
6976 enum machine_mode mode
, cmp_mode
;
6977 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
6982 oldval
= operands
[3];
6983 newval
= operands
[4];
6984 is_weak
= operands
[5];
6985 mod_s
= operands
[6];
6986 mod_f
= operands
[7];
6987 mode
= GET_MODE (mem
);
6990 /* Normally the succ memory model must be stronger than fail, but in the
6991 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6992 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6994 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
6995 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
6996 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
7002 /* For short modes, we're going to perform the comparison in SImode,
7003 so do the zero-extension now. */
7005 rval
= gen_reg_rtx (SImode
);
7006 oldval
= convert_modes (SImode
, mode
, oldval
, true);
7011 /* Force the value into a register if needed. */
7012 if (!aarch64_plus_operand (oldval
, mode
))
7013 oldval
= force_reg (cmp_mode
, oldval
);
7022 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
7023 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
7024 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
7025 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
7030 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
7032 if (mode
== QImode
|| mode
== HImode
)
7033 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
7035 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7036 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
7037 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
7040 /* Split a compare and swap pattern. */
7043 aarch64_split_compare_and_swap (rtx operands
[])
7045 rtx rval
, mem
, oldval
, newval
, scratch
;
7046 enum machine_mode mode
;
7048 rtx label1
, label2
, x
, cond
;
7052 oldval
= operands
[2];
7053 newval
= operands
[3];
7054 is_weak
= (operands
[4] != const0_rtx
);
7055 scratch
= operands
[7];
7056 mode
= GET_MODE (mem
);
7061 label1
= gen_label_rtx ();
7062 emit_label (label1
);
7064 label2
= gen_label_rtx ();
7066 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
7068 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
7069 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7070 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7071 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
7072 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7074 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
7078 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
7079 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7080 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
7081 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7085 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7086 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
7087 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
7090 emit_label (label2
);
7093 /* Split an atomic operation. */
7096 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
7097 rtx value
, rtx model_rtx
, rtx cond
)
7099 enum machine_mode mode
= GET_MODE (mem
);
7100 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
7103 label
= gen_label_rtx ();
7107 new_out
= gen_lowpart (wmode
, new_out
);
7109 old_out
= gen_lowpart (wmode
, old_out
);
7112 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
7114 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
7123 x
= gen_rtx_AND (wmode
, old_out
, value
);
7124 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7125 x
= gen_rtx_NOT (wmode
, new_out
);
7126 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7130 if (CONST_INT_P (value
))
7132 value
= GEN_INT (-INTVAL (value
));
7138 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
7139 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7143 aarch64_emit_store_exclusive (mode
, cond
, mem
,
7144 gen_lowpart (mode
, new_out
), model_rtx
);
7146 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7147 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7148 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
7149 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7153 aarch64_print_extension (void)
7155 const struct aarch64_option_extension
*opt
= NULL
;
7157 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7158 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
7159 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
7161 asm_fprintf (asm_out_file
, "\n");
7165 aarch64_start_file (void)
7169 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
7170 aarch64_print_extension ();
7172 else if (selected_cpu
)
7174 asm_fprintf (asm_out_file
, "\t.cpu %s", selected_cpu
->name
);
7175 aarch64_print_extension ();
7177 default_file_start();
7180 /* Target hook for c_mode_for_suffix. */
7181 static enum machine_mode
7182 aarch64_c_mode_for_suffix (char suffix
)
7190 /* We can only represent floating point constants which will fit in
7191 "quarter-precision" values. These values are characterised by
7192 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7195 (-1)^s * (n/16) * 2^r
7198 's' is the sign bit.
7199 'n' is an integer in the range 16 <= n <= 31.
7200 'r' is an integer in the range -3 <= r <= 4. */
7202 /* Return true iff X can be represented by a quarter-precision
7203 floating point immediate operand X. Note, we cannot represent 0.0. */
7205 aarch64_float_const_representable_p (rtx x
)
7207 /* This represents our current view of how many bits
7208 make up the mantissa. */
7209 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7211 unsigned HOST_WIDE_INT mantissa
, mask
;
7212 HOST_WIDE_INT m1
, m2
;
7213 REAL_VALUE_TYPE r
, m
;
7215 if (!CONST_DOUBLE_P (x
))
7218 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7220 /* We cannot represent infinities, NaNs or +/-zero. We won't
7221 know if we have +zero until we analyse the mantissa, but we
7222 can reject the other invalid values. */
7223 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
7224 || REAL_VALUE_MINUS_ZERO (r
))
7227 /* Extract exponent. */
7228 r
= real_value_abs (&r
);
7229 exponent
= REAL_EXP (&r
);
7231 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7232 highest (sign) bit, with a fixed binary point at bit point_pos.
7233 m1 holds the low part of the mantissa, m2 the high part.
7234 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7235 bits for the mantissa, this can fail (low bits will be lost). */
7236 real_ldexp (&m
, &r
, point_pos
- exponent
);
7237 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
7239 /* If the low part of the mantissa has bits set we cannot represent
7243 /* We have rejected the lower HOST_WIDE_INT, so update our
7244 understanding of how many bits lie in the mantissa and
7245 look only at the high HOST_WIDE_INT. */
7247 point_pos
-= HOST_BITS_PER_WIDE_INT
;
7249 /* We can only represent values with a mantissa of the form 1.xxxx. */
7250 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
7251 if ((mantissa
& mask
) != 0)
7254 /* Having filtered unrepresentable values, we may now remove all
7255 but the highest 5 bits. */
7256 mantissa
>>= point_pos
- 5;
7258 /* We cannot represent the value 0.0, so reject it. This is handled
7263 /* Then, as bit 4 is always set, we can mask it off, leaving
7264 the mantissa in the range [0, 15]. */
7265 mantissa
&= ~(1 << 4);
7266 gcc_assert (mantissa
<= 15);
7268 /* GCC internally does not use IEEE754-like encoding (where normalized
7269 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7270 Our mantissa values are shifted 4 places to the left relative to
7271 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7272 by 5 places to correct for GCC's representation. */
7273 exponent
= 5 - exponent
;
7275 return (exponent
>= 0 && exponent
<= 7);
7279 aarch64_output_simd_mov_immediate (rtx const_vector
,
7280 enum machine_mode mode
,
7284 static char templ
[40];
7285 const char *mnemonic
;
7286 unsigned int lane_count
= 0;
7289 struct simd_immediate_info info
;
7291 /* This will return true to show const_vector is legal for use as either
7292 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7293 also update INFO to show how the immediate should be generated. */
7294 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
7295 gcc_assert (is_valid
);
7297 element_char
= sizetochar (info
.element_width
);
7298 lane_count
= width
/ info
.element_width
;
7300 mode
= GET_MODE_INNER (mode
);
7301 if (mode
== SFmode
|| mode
== DFmode
)
7303 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
7304 if (aarch64_float_const_zero_rtx_p (info
.value
))
7305 info
.value
= GEN_INT (0);
7310 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
7311 char float_buf
[buf_size
] = {'\0'};
7312 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
7315 if (lane_count
== 1)
7316 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
7318 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
7319 lane_count
, element_char
, float_buf
);
7324 mnemonic
= info
.mvn
? "mvni" : "movi";
7326 if (lane_count
== 1)
7327 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
7328 mnemonic
, UINTVAL (info
.value
));
7329 else if (info
.shift
)
7330 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7331 ", lsl %d", mnemonic
, lane_count
, element_char
,
7332 UINTVAL (info
.value
), info
.shift
);
7334 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
7335 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
7340 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
7341 enum machine_mode mode
)
7343 enum machine_mode vmode
;
7345 gcc_assert (!VECTOR_MODE_P (mode
));
7346 vmode
= aarch64_simd_container_mode (mode
, 64);
7347 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
7348 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
7351 /* Split operands into moves from op[1] + op[2] into op[0]. */
7354 aarch64_split_combinev16qi (rtx operands
[3])
7356 unsigned int dest
= REGNO (operands
[0]);
7357 unsigned int src1
= REGNO (operands
[1]);
7358 unsigned int src2
= REGNO (operands
[2]);
7359 enum machine_mode halfmode
= GET_MODE (operands
[1]);
7360 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
7363 gcc_assert (halfmode
== V16QImode
);
7365 if (src1
== dest
&& src2
== dest
+ halfregs
)
7367 /* No-op move. Can't split to nothing; emit something. */
7368 emit_note (NOTE_INSN_DELETED
);
7372 /* Preserve register attributes for variable tracking. */
7373 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
7374 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
7375 GET_MODE_SIZE (halfmode
));
7377 /* Special case of reversed high/low parts. */
7378 if (reg_overlap_mentioned_p (operands
[2], destlo
)
7379 && reg_overlap_mentioned_p (operands
[1], desthi
))
7381 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7382 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
7383 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7385 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
7387 /* Try to avoid unnecessary moves if part of the result
7388 is in the right place already. */
7390 emit_move_insn (destlo
, operands
[1]);
7391 if (src2
!= dest
+ halfregs
)
7392 emit_move_insn (desthi
, operands
[2]);
7396 if (src2
!= dest
+ halfregs
)
7397 emit_move_insn (desthi
, operands
[2]);
7399 emit_move_insn (destlo
, operands
[1]);
7403 /* vec_perm support. */
7405 #define MAX_VECT_LEN 16
7407 struct expand_vec_perm_d
7409 rtx target
, op0
, op1
;
7410 unsigned char perm
[MAX_VECT_LEN
];
7411 enum machine_mode vmode
;
7417 /* Generate a variable permutation. */
7420 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7422 enum machine_mode vmode
= GET_MODE (target
);
7423 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7425 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
7426 gcc_checking_assert (GET_MODE (op0
) == vmode
);
7427 gcc_checking_assert (GET_MODE (op1
) == vmode
);
7428 gcc_checking_assert (GET_MODE (sel
) == vmode
);
7429 gcc_checking_assert (TARGET_SIMD
);
7433 if (vmode
== V8QImode
)
7435 /* Expand the argument to a V16QI mode by duplicating it. */
7436 rtx pair
= gen_reg_rtx (V16QImode
);
7437 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
7438 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7442 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
7449 if (vmode
== V8QImode
)
7451 pair
= gen_reg_rtx (V16QImode
);
7452 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
7453 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7457 pair
= gen_reg_rtx (OImode
);
7458 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
7459 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
7465 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7467 enum machine_mode vmode
= GET_MODE (target
);
7468 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
7469 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7470 rtx rmask
[MAX_VECT_LEN
], mask
;
7472 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
7474 /* The TBL instruction does not use a modulo index, so we must take care
7475 of that ourselves. */
7476 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7477 for (i
= 0; i
< nelt
; ++i
)
7479 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
7480 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
7482 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
7485 /* Recognize patterns suitable for the TRN instructions. */
7487 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
7489 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7490 rtx out
, in0
, in1
, x
;
7491 rtx (*gen
) (rtx
, rtx
, rtx
);
7492 enum machine_mode vmode
= d
->vmode
;
7494 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7497 /* Note that these are little-endian tests.
7498 We correct for big-endian later. */
7499 if (d
->perm
[0] == 0)
7501 else if (d
->perm
[0] == 1)
7505 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7507 for (i
= 0; i
< nelt
; i
+= 2)
7509 if (d
->perm
[i
] != i
+ odd
)
7511 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
7521 if (BYTES_BIG_ENDIAN
)
7523 x
= in0
, in0
= in1
, in1
= x
;
7532 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
7533 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
7534 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
7535 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
7536 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
7537 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
7538 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
7539 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
7540 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
7541 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
7550 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
7551 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
7552 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
7553 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
7554 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
7555 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
7556 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
7557 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
7558 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
7559 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
7565 emit_insn (gen (out
, in0
, in1
));
7569 /* Recognize patterns suitable for the UZP instructions. */
7571 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
7573 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7574 rtx out
, in0
, in1
, x
;
7575 rtx (*gen
) (rtx
, rtx
, rtx
);
7576 enum machine_mode vmode
= d
->vmode
;
7578 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7581 /* Note that these are little-endian tests.
7582 We correct for big-endian later. */
7583 if (d
->perm
[0] == 0)
7585 else if (d
->perm
[0] == 1)
7589 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7591 for (i
= 0; i
< nelt
; i
++)
7593 unsigned elt
= (i
* 2 + odd
) & mask
;
7594 if (d
->perm
[i
] != elt
)
7604 if (BYTES_BIG_ENDIAN
)
7606 x
= in0
, in0
= in1
, in1
= x
;
7615 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
7616 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
7617 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
7618 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
7619 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
7620 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
7621 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
7622 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
7623 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
7624 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
7633 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
7634 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
7635 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
7636 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
7637 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
7638 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
7639 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
7640 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
7641 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
7642 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
7648 emit_insn (gen (out
, in0
, in1
));
7652 /* Recognize patterns suitable for the ZIP instructions. */
7654 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
7656 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
7657 rtx out
, in0
, in1
, x
;
7658 rtx (*gen
) (rtx
, rtx
, rtx
);
7659 enum machine_mode vmode
= d
->vmode
;
7661 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7664 /* Note that these are little-endian tests.
7665 We correct for big-endian later. */
7667 if (d
->perm
[0] == high
)
7670 else if (d
->perm
[0] == 0)
7674 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7676 for (i
= 0; i
< nelt
/ 2; i
++)
7678 unsigned elt
= (i
+ high
) & mask
;
7679 if (d
->perm
[i
* 2] != elt
)
7681 elt
= (elt
+ nelt
) & mask
;
7682 if (d
->perm
[i
* 2 + 1] != elt
)
7692 if (BYTES_BIG_ENDIAN
)
7694 x
= in0
, in0
= in1
, in1
= x
;
7703 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
7704 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
7705 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
7706 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
7707 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
7708 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
7709 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
7710 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
7711 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
7712 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
7721 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
7722 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
7723 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
7724 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
7725 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
7726 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
7727 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
7728 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
7729 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
7730 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
7736 emit_insn (gen (out
, in0
, in1
));
7741 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
7743 rtx rperm
[MAX_VECT_LEN
], sel
;
7744 enum machine_mode vmode
= d
->vmode
;
7745 unsigned int i
, nelt
= d
->nelt
;
7747 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7748 numbering of elements for big-endian, we must reverse the order. */
7749 if (BYTES_BIG_ENDIAN
)
7755 /* Generic code will try constant permutation twice. Once with the
7756 original mode and again with the elements lowered to QImode.
7757 So wait and don't do the selector expansion ourselves. */
7758 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
7761 for (i
= 0; i
< nelt
; ++i
)
7762 rperm
[i
] = GEN_INT (d
->perm
[i
]);
7763 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
7764 sel
= force_reg (vmode
, sel
);
7766 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
7771 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
7773 /* The pattern matching functions above are written to look for a small
7774 number to begin the sequence (0, 1, N/2). If we begin with an index
7775 from the second operand, we can swap the operands. */
7776 if (d
->perm
[0] >= d
->nelt
)
7778 unsigned i
, nelt
= d
->nelt
;
7781 for (i
= 0; i
< nelt
; ++i
)
7782 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
7791 if (aarch64_evpc_zip (d
))
7793 else if (aarch64_evpc_uzp (d
))
7795 else if (aarch64_evpc_trn (d
))
7797 return aarch64_evpc_tbl (d
);
7802 /* Expand a vec_perm_const pattern. */
7805 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7807 struct expand_vec_perm_d d
;
7814 d
.vmode
= GET_MODE (target
);
7815 gcc_assert (VECTOR_MODE_P (d
.vmode
));
7816 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
7817 d
.testing_p
= false;
7819 for (i
= which
= 0; i
< nelt
; ++i
)
7821 rtx e
= XVECEXP (sel
, 0, i
);
7822 int ei
= INTVAL (e
) & (2 * nelt
- 1);
7823 which
|= (ei
< nelt
? 1 : 2);
7833 d
.one_vector_p
= false;
7834 if (!rtx_equal_p (op0
, op1
))
7837 /* The elements of PERM do not suggest that only the first operand
7838 is used, but both operands are identical. Allow easier matching
7839 of the permutation by folding the permutation into the single
7843 for (i
= 0; i
< nelt
; ++i
)
7844 d
.perm
[i
] &= nelt
- 1;
7846 d
.one_vector_p
= true;
7851 d
.one_vector_p
= true;
7855 return aarch64_expand_vec_perm_const_1 (&d
);
7859 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
7860 const unsigned char *sel
)
7862 struct expand_vec_perm_d d
;
7863 unsigned int i
, nelt
, which
;
7867 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
7869 memcpy (d
.perm
, sel
, nelt
);
7871 /* Calculate whether all elements are in one vector. */
7872 for (i
= which
= 0; i
< nelt
; ++i
)
7874 unsigned char e
= d
.perm
[i
];
7875 gcc_assert (e
< 2 * nelt
);
7876 which
|= (e
< nelt
? 1 : 2);
7879 /* If all elements are from the second vector, reindex as if from the
7882 for (i
= 0; i
< nelt
; ++i
)
7885 /* Check whether the mask can be applied to a single vector. */
7886 d
.one_vector_p
= (which
!= 3);
7888 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
7889 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
7890 if (!d
.one_vector_p
)
7891 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
7894 ret
= aarch64_expand_vec_perm_const_1 (&d
);
7900 #undef TARGET_ADDRESS_COST
7901 #define TARGET_ADDRESS_COST aarch64_address_cost
7903 /* This hook will determines whether unnamed bitfields affect the alignment
7904 of the containing structure. The hook returns true if the structure
7905 should inherit the alignment requirements of an unnamed bitfield's
7907 #undef TARGET_ALIGN_ANON_BITFIELD
7908 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7910 #undef TARGET_ASM_ALIGNED_DI_OP
7911 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7913 #undef TARGET_ASM_ALIGNED_HI_OP
7914 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7916 #undef TARGET_ASM_ALIGNED_SI_OP
7917 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7919 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7920 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7921 hook_bool_const_tree_hwi_hwi_const_tree_true
7923 #undef TARGET_ASM_FILE_START
7924 #define TARGET_ASM_FILE_START aarch64_start_file
7926 #undef TARGET_ASM_OUTPUT_MI_THUNK
7927 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7929 #undef TARGET_ASM_SELECT_RTX_SECTION
7930 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7932 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7933 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7935 #undef TARGET_BUILD_BUILTIN_VA_LIST
7936 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7938 #undef TARGET_CALLEE_COPIES
7939 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7941 #undef TARGET_CAN_ELIMINATE
7942 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7944 #undef TARGET_CANNOT_FORCE_CONST_MEM
7945 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7947 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7948 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7950 /* Only the least significant bit is used for initialization guard
7952 #undef TARGET_CXX_GUARD_MASK_BIT
7953 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7955 #undef TARGET_C_MODE_FOR_SUFFIX
7956 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7958 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7959 #undef TARGET_DEFAULT_TARGET_FLAGS
7960 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7963 #undef TARGET_CLASS_MAX_NREGS
7964 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7966 #undef TARGET_BUILTIN_DECL
7967 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7969 #undef TARGET_EXPAND_BUILTIN
7970 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7972 #undef TARGET_EXPAND_BUILTIN_VA_START
7973 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7975 #undef TARGET_FOLD_BUILTIN
7976 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
7978 #undef TARGET_FUNCTION_ARG
7979 #define TARGET_FUNCTION_ARG aarch64_function_arg
7981 #undef TARGET_FUNCTION_ARG_ADVANCE
7982 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7984 #undef TARGET_FUNCTION_ARG_BOUNDARY
7985 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7987 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7988 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7990 #undef TARGET_FUNCTION_VALUE
7991 #define TARGET_FUNCTION_VALUE aarch64_function_value
7993 #undef TARGET_FUNCTION_VALUE_REGNO_P
7994 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7996 #undef TARGET_FRAME_POINTER_REQUIRED
7997 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7999 #undef TARGET_GIMPLE_FOLD_BUILTIN
8000 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8002 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8003 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8005 #undef TARGET_INIT_BUILTINS
8006 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8008 #undef TARGET_LEGITIMATE_ADDRESS_P
8009 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8011 #undef TARGET_LEGITIMATE_CONSTANT_P
8012 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8014 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8015 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8017 #undef TARGET_MANGLE_TYPE
8018 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8020 #undef TARGET_MEMORY_MOVE_COST
8021 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8023 #undef TARGET_MUST_PASS_IN_STACK
8024 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8026 /* This target hook should return true if accesses to volatile bitfields
8027 should use the narrowest mode possible. It should return false if these
8028 accesses should use the bitfield container type. */
8029 #undef TARGET_NARROW_VOLATILE_BITFIELD
8030 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8032 #undef TARGET_OPTION_OVERRIDE
8033 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8035 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8036 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8037 aarch64_override_options_after_change
8039 #undef TARGET_PASS_BY_REFERENCE
8040 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8042 #undef TARGET_PREFERRED_RELOAD_CLASS
8043 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8045 #undef TARGET_SECONDARY_RELOAD
8046 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8048 #undef TARGET_SHIFT_TRUNCATION_MASK
8049 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8051 #undef TARGET_SETUP_INCOMING_VARARGS
8052 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8054 #undef TARGET_STRUCT_VALUE_RTX
8055 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8057 #undef TARGET_REGISTER_MOVE_COST
8058 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8060 #undef TARGET_RETURN_IN_MEMORY
8061 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8063 #undef TARGET_RETURN_IN_MSB
8064 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8066 #undef TARGET_RTX_COSTS
8067 #define TARGET_RTX_COSTS aarch64_rtx_costs
8069 #undef TARGET_TRAMPOLINE_INIT
8070 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8072 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8073 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8075 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8076 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8078 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8079 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8081 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8082 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8084 #undef TARGET_VECTORIZE_BUILTINS
8085 #define TARGET_VECTORIZE_BUILTINS
8087 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8088 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8089 aarch64_builtin_vectorized_function
8091 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8092 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8093 aarch64_autovectorize_vector_sizes
8095 /* Section anchor support. */
8097 #undef TARGET_MIN_ANCHOR_OFFSET
8098 #define TARGET_MIN_ANCHOR_OFFSET -256
8100 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8101 byte offset; we can do much more for larger data types, but have no way
8102 to determine the size of the access. We assume accesses are aligned. */
8103 #undef TARGET_MAX_ANCHOR_OFFSET
8104 #define TARGET_MAX_ANCHOR_OFFSET 4095
8106 #undef TARGET_VECTOR_ALIGNMENT
8107 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8109 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8110 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8111 aarch64_simd_vector_alignment_reachable
8113 /* vec_perm support. */
8115 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8116 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8117 aarch64_vectorize_vec_perm_const_ok
8120 #undef TARGET_FIXED_CONDITION_CODE_REGS
8121 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8123 struct gcc_target targetm
= TARGET_INITIALIZER
;
8125 #include "gt-aarch64.h"