1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "hard-reg-set.h"
37 #include "target-def.h"
38 #include "targhooks.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
49 #include "tree-vectorizer.h"
51 /* Classifies an address.
54 A simple base register plus immediate offset.
57 A base register indexed by immediate offset with writeback.
60 A base register indexed by (optionally scaled) register.
63 A base register indexed by (optionally scaled) zero-extended register.
66 A base register indexed by (optionally scaled) sign-extended register.
69 A LO_SUM rtx with a base register and "LO12" symbol relocation.
72 A constant symbolic address, in pc-relative literal pool. */
74 enum aarch64_address_type
{
84 struct aarch64_address_info
{
85 enum aarch64_address_type type
;
89 enum aarch64_symbol_type symbol_type
;
92 struct simd_immediate_info
101 /* The current code model. */
102 enum aarch64_code_model aarch64_cmodel
;
105 #undef TARGET_HAVE_TLS
106 #define TARGET_HAVE_TLS 1
109 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
110 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
112 enum machine_mode
*, int *,
114 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
115 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
116 static void aarch64_override_options_after_change (void);
117 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
118 static unsigned bit_count (unsigned HOST_WIDE_INT
);
119 static bool aarch64_const_vec_all_same_int_p (rtx
,
120 HOST_WIDE_INT
, HOST_WIDE_INT
);
122 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
123 const unsigned char *sel
);
125 /* The processor for which instructions should be scheduled. */
126 enum aarch64_processor aarch64_tune
= generic
;
128 /* The current tuning set. */
129 const struct tune_params
*aarch64_tune_params
;
131 /* Mask to specify which instructions we are allowed to generate. */
132 unsigned long aarch64_isa_flags
= 0;
134 /* Mask to specify which instruction scheduling options should be used. */
135 unsigned long aarch64_tune_flags
= 0;
137 /* Tuning parameters. */
139 #if HAVE_DESIGNATED_INITIALIZERS
140 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
142 #define NAMED_PARAM(NAME, VAL) (VAL)
145 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
148 static const struct cpu_rtx_cost_table generic_rtx_cost_table
=
150 NAMED_PARAM (memory_load
, COSTS_N_INSNS (1)),
151 NAMED_PARAM (memory_store
, COSTS_N_INSNS (0)),
152 NAMED_PARAM (register_shift
, COSTS_N_INSNS (1)),
153 NAMED_PARAM (int_divide
, COSTS_N_INSNS (6)),
154 NAMED_PARAM (float_divide
, COSTS_N_INSNS (2)),
155 NAMED_PARAM (double_divide
, COSTS_N_INSNS (6)),
156 NAMED_PARAM (int_multiply
, COSTS_N_INSNS (1)),
157 NAMED_PARAM (int_multiply_extend
, COSTS_N_INSNS (1)),
158 NAMED_PARAM (int_multiply_add
, COSTS_N_INSNS (1)),
159 NAMED_PARAM (int_multiply_extend_add
, COSTS_N_INSNS (1)),
160 NAMED_PARAM (float_multiply
, COSTS_N_INSNS (0)),
161 NAMED_PARAM (double_multiply
, COSTS_N_INSNS (1))
164 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
167 static const struct cpu_addrcost_table generic_addrcost_table
=
169 NAMED_PARAM (pre_modify
, 0),
170 NAMED_PARAM (post_modify
, 0),
171 NAMED_PARAM (register_offset
, 0),
172 NAMED_PARAM (register_extend
, 0),
173 NAMED_PARAM (imm_offset
, 0)
176 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
179 static const struct cpu_regmove_cost generic_regmove_cost
=
181 NAMED_PARAM (GP2GP
, 1),
182 NAMED_PARAM (GP2FP
, 2),
183 NAMED_PARAM (FP2GP
, 2),
184 /* We currently do not provide direct support for TFmode Q->Q move.
185 Therefore we need to raise the cost above 2 in order to have
186 reload handle the situation. */
187 NAMED_PARAM (FP2FP
, 4)
190 /* Generic costs for vector insn classes. */
191 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
194 static const struct cpu_vector_cost generic_vector_cost
=
196 NAMED_PARAM (scalar_stmt_cost
, 1),
197 NAMED_PARAM (scalar_load_cost
, 1),
198 NAMED_PARAM (scalar_store_cost
, 1),
199 NAMED_PARAM (vec_stmt_cost
, 1),
200 NAMED_PARAM (vec_to_scalar_cost
, 1),
201 NAMED_PARAM (scalar_to_vec_cost
, 1),
202 NAMED_PARAM (vec_align_load_cost
, 1),
203 NAMED_PARAM (vec_unalign_load_cost
, 1),
204 NAMED_PARAM (vec_unalign_store_cost
, 1),
205 NAMED_PARAM (vec_store_cost
, 1),
206 NAMED_PARAM (cond_taken_branch_cost
, 3),
207 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
210 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
213 static const struct tune_params generic_tunings
=
215 &generic_rtx_cost_table
,
216 &generic_addrcost_table
,
217 &generic_regmove_cost
,
218 &generic_vector_cost
,
219 NAMED_PARAM (memmov_cost
, 4)
222 /* A processor implementing AArch64. */
225 const char *const name
;
226 enum aarch64_processor core
;
228 const unsigned long flags
;
229 const struct tune_params
*const tune
;
232 /* Processor cores implementing AArch64. */
233 static const struct processor all_cores
[] =
235 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
236 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
237 #include "aarch64-cores.def"
239 {"generic", generic
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
240 {NULL
, aarch64_none
, NULL
, 0, NULL
}
243 /* Architectures implementing AArch64. */
244 static const struct processor all_architectures
[] =
246 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
247 {NAME, CORE, #ARCH, FLAGS, NULL},
248 #include "aarch64-arches.def"
250 {"generic", generic
, "8", AARCH64_FL_FOR_ARCH8
, NULL
},
251 {NULL
, aarch64_none
, NULL
, 0, NULL
}
254 /* Target specification. These are populated as commandline arguments
255 are processed, or NULL if not specified. */
256 static const struct processor
*selected_arch
;
257 static const struct processor
*selected_cpu
;
258 static const struct processor
*selected_tune
;
260 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
262 /* An ISA extension in the co-processor and main instruction set space. */
263 struct aarch64_option_extension
265 const char *const name
;
266 const unsigned long flags_on
;
267 const unsigned long flags_off
;
270 /* ISA extensions in AArch64. */
271 static const struct aarch64_option_extension all_extensions
[] =
273 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
274 {NAME, FLAGS_ON, FLAGS_OFF},
275 #include "aarch64-option-extensions.def"
276 #undef AARCH64_OPT_EXTENSION
280 /* Used to track the size of an address when generating a pre/post
281 increment address. */
282 static enum machine_mode aarch64_memory_reference_mode
;
284 /* Used to force GTY into this file. */
285 static GTY(()) int gty_dummy
;
287 /* A table of valid AArch64 "bitmask immediate" values for
288 logical instructions. */
290 #define AARCH64_NUM_BITMASKS 5334
291 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
293 /* Did we set flag_omit_frame_pointer just so
294 aarch64_frame_pointer_required would be called? */
295 static bool faked_omit_frame_pointer
;
297 typedef enum aarch64_cond_code
299 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
300 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
301 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
305 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
307 /* The condition codes of the processor, and the inverse function. */
308 static const char * const aarch64_condition_codes
[] =
310 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
311 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
314 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
316 aarch64_dbx_register_number (unsigned regno
)
318 if (GP_REGNUM_P (regno
))
319 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
320 else if (regno
== SP_REGNUM
)
321 return AARCH64_DWARF_SP
;
322 else if (FP_REGNUM_P (regno
))
323 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
325 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
326 equivalent DWARF register. */
327 return DWARF_FRAME_REGISTERS
;
330 /* Return TRUE if MODE is any of the large INT modes. */
332 aarch64_vect_struct_mode_p (enum machine_mode mode
)
334 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
337 /* Return TRUE if MODE is any of the vector modes. */
339 aarch64_vector_mode_p (enum machine_mode mode
)
341 return aarch64_vector_mode_supported_p (mode
)
342 || aarch64_vect_struct_mode_p (mode
);
345 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
347 aarch64_array_mode_supported_p (enum machine_mode mode
,
348 unsigned HOST_WIDE_INT nelems
)
351 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
352 && (nelems
>= 2 && nelems
<= 4))
358 /* Implement HARD_REGNO_NREGS. */
361 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
363 switch (aarch64_regno_regclass (regno
))
367 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
369 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
374 /* Implement HARD_REGNO_MODE_OK. */
377 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
379 if (GET_MODE_CLASS (mode
) == MODE_CC
)
380 return regno
== CC_REGNUM
;
382 if (regno
== SP_REGNUM
|| regno
== FRAME_POINTER_REGNUM
383 || regno
== ARG_POINTER_REGNUM
)
384 return mode
== Pmode
;
386 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
389 if (FP_REGNUM_P (regno
))
391 if (aarch64_vect_struct_mode_p (mode
))
393 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
401 /* Return true if calls to DECL should be treated as
402 long-calls (ie called via a register). */
404 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
409 /* Return true if calls to symbol-ref SYM should be treated as
410 long-calls (ie called via a register). */
412 aarch64_is_long_call_p (rtx sym
)
414 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
417 /* Return true if the offsets to a zero/sign-extract operation
418 represent an expression that matches an extend operation. The
419 operands represent the paramters from
421 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
423 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
426 HOST_WIDE_INT mult_val
, extract_val
;
428 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
431 mult_val
= INTVAL (mult_imm
);
432 extract_val
= INTVAL (extract_imm
);
435 && extract_val
< GET_MODE_BITSIZE (mode
)
436 && exact_log2 (extract_val
& ~7) > 0
437 && (extract_val
& 7) <= 4
438 && mult_val
== (1 << (extract_val
& 7)))
444 /* Emit an insn that's a simple single-set. Both the operands must be
445 known to be valid. */
447 emit_set_insn (rtx x
, rtx y
)
449 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
452 /* X and Y are two things to compare using CODE. Emit the compare insn and
453 return the rtx for register 0 in the proper mode. */
455 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
457 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
458 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
460 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
464 /* Build the SYMBOL_REF for __tls_get_addr. */
466 static GTY(()) rtx tls_get_addr_libfunc
;
469 aarch64_tls_get_addr (void)
471 if (!tls_get_addr_libfunc
)
472 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
473 return tls_get_addr_libfunc
;
476 /* Return the TLS model to use for ADDR. */
478 static enum tls_model
479 tls_symbolic_operand_type (rtx addr
)
481 enum tls_model tls_kind
= TLS_MODEL_NONE
;
484 if (GET_CODE (addr
) == CONST
)
486 split_const (addr
, &sym
, &addend
);
487 if (GET_CODE (sym
) == SYMBOL_REF
)
488 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
490 else if (GET_CODE (addr
) == SYMBOL_REF
)
491 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
496 /* We'll allow lo_sum's in addresses in our legitimate addresses
497 so that combine would take care of combining addresses where
498 necessary, but for generation purposes, we'll generate the address
501 tmp = hi (symbol_ref); adrp x1, foo
502 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
506 adrp x1, :got:foo adrp tmp, :tlsgd:foo
507 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
511 Load TLS symbol, depending on TLS mechanism and TLS access model.
513 Global Dynamic - Traditional TLS:
515 add dest, tmp, #:tlsgd_lo12:imm
518 Global Dynamic - TLS Descriptors:
519 adrp dest, :tlsdesc:imm
520 ldr tmp, [dest, #:tlsdesc_lo12:imm]
521 add dest, dest, #:tlsdesc_lo12:imm
528 adrp tmp, :gottprel:imm
529 ldr dest, [tmp, #:gottprel_lo12:imm]
534 add t0, tp, #:tprel_hi12:imm
535 add t0, #:tprel_lo12_nc:imm
539 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
540 enum aarch64_symbol_type type
)
544 case SYMBOL_SMALL_ABSOLUTE
:
547 if (can_create_pseudo_p ())
549 tmp_reg
= gen_reg_rtx (Pmode
);
552 emit_move_insn (tmp_reg
, gen_rtx_HIGH (Pmode
, imm
));
553 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
557 case SYMBOL_TINY_ABSOLUTE
:
558 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
561 case SYMBOL_SMALL_GOT
:
564 if (can_create_pseudo_p ())
565 tmp_reg
= gen_reg_rtx (Pmode
);
566 emit_move_insn (tmp_reg
, gen_rtx_HIGH (Pmode
, imm
));
567 emit_insn (gen_ldr_got_small (dest
, tmp_reg
, imm
));
571 case SYMBOL_SMALL_TLSGD
:
574 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
577 emit_call_insn (gen_tlsgd_small (result
, imm
));
578 insns
= get_insns ();
581 RTL_CONST_CALL_P (insns
) = 1;
582 emit_libcall_block (insns
, dest
, result
, imm
);
586 case SYMBOL_SMALL_TLSDESC
:
588 rtx x0
= gen_rtx_REG (Pmode
, R0_REGNUM
);
591 emit_insn (gen_tlsdesc_small (imm
));
592 tp
= aarch64_load_tp (NULL
);
593 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, x0
)));
594 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
598 case SYMBOL_SMALL_GOTTPREL
:
600 rtx tmp_reg
= gen_reg_rtx (Pmode
);
601 rtx tp
= aarch64_load_tp (NULL
);
602 emit_insn (gen_tlsie_small (tmp_reg
, imm
));
603 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, tmp_reg
)));
604 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
608 case SYMBOL_SMALL_TPREL
:
610 rtx tp
= aarch64_load_tp (NULL
);
611 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
612 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
616 case SYMBOL_TINY_GOT
:
617 emit_insn (gen_ldr_got_tiny (dest
, imm
));
625 /* Emit a move from SRC to DEST. Assume that the move expanders can
626 handle all moves if !can_create_pseudo_p (). The distinction is
627 important because, unlike emit_move_insn, the move expanders know
628 how to force Pmode objects into the constant pool even when the
629 constant pool address is not itself legitimate. */
631 aarch64_emit_move (rtx dest
, rtx src
)
633 return (can_create_pseudo_p ()
634 ? emit_move_insn (dest
, src
)
635 : emit_move_insn_1 (dest
, src
));
639 aarch64_split_128bit_move (rtx dst
, rtx src
)
643 enum machine_mode src_mode
= GET_MODE (src
);
644 enum machine_mode dst_mode
= GET_MODE (dst
);
645 int src_regno
= REGNO (src
);
646 int dst_regno
= REGNO (dst
);
648 gcc_assert (dst_mode
== TImode
|| dst_mode
== TFmode
);
650 if (REG_P (dst
) && REG_P (src
))
652 gcc_assert (src_mode
== TImode
|| src_mode
== TFmode
);
654 /* Handle r -> w, w -> r. */
655 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
660 (gen_aarch64_movtilow_di (dst
, gen_lowpart (word_mode
, src
)));
662 (gen_aarch64_movtihigh_di (dst
, gen_highpart (word_mode
, src
)));
666 (gen_aarch64_movtflow_di (dst
, gen_lowpart (word_mode
, src
)));
668 (gen_aarch64_movtfhigh_di (dst
, gen_highpart (word_mode
, src
)));
674 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
679 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode
, dst
), src
));
681 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode
, dst
), src
));
685 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode
, dst
), src
));
687 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode
, dst
), src
));
693 /* Fall through to r -> r cases. */
698 low_dst
= gen_lowpart (word_mode
, dst
);
700 && reg_overlap_mentioned_p (low_dst
, src
))
702 aarch64_emit_move (gen_highpart (word_mode
, dst
),
703 gen_highpart_mode (word_mode
, TImode
, src
));
704 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
708 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
709 aarch64_emit_move (gen_highpart (word_mode
, dst
),
710 gen_highpart_mode (word_mode
, TImode
, src
));
714 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
),
715 gen_rtx_REG (DFmode
, src_regno
));
716 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
+ 1),
717 gen_rtx_REG (DFmode
, src_regno
+ 1));
725 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
727 return (! REG_P (src
)
728 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
731 /* Split a complex SIMD combine. */
734 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
736 enum machine_mode src_mode
= GET_MODE (src1
);
737 enum machine_mode dst_mode
= GET_MODE (dst
);
739 gcc_assert (VECTOR_MODE_P (dst_mode
));
741 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
743 rtx (*gen
) (rtx
, rtx
, rtx
);
748 gen
= gen_aarch64_simd_combinev8qi
;
751 gen
= gen_aarch64_simd_combinev4hi
;
754 gen
= gen_aarch64_simd_combinev2si
;
757 gen
= gen_aarch64_simd_combinev2sf
;
760 gen
= gen_aarch64_simd_combinedi
;
763 gen
= gen_aarch64_simd_combinedf
;
769 emit_insn (gen (dst
, src1
, src2
));
774 /* Split a complex SIMD move. */
777 aarch64_split_simd_move (rtx dst
, rtx src
)
779 enum machine_mode src_mode
= GET_MODE (src
);
780 enum machine_mode dst_mode
= GET_MODE (dst
);
782 gcc_assert (VECTOR_MODE_P (dst_mode
));
784 if (REG_P (dst
) && REG_P (src
))
786 rtx (*gen
) (rtx
, rtx
);
788 gcc_assert (VECTOR_MODE_P (src_mode
));
793 gen
= gen_aarch64_split_simd_movv16qi
;
796 gen
= gen_aarch64_split_simd_movv8hi
;
799 gen
= gen_aarch64_split_simd_movv4si
;
802 gen
= gen_aarch64_split_simd_movv2di
;
805 gen
= gen_aarch64_split_simd_movv4sf
;
808 gen
= gen_aarch64_split_simd_movv2df
;
814 emit_insn (gen (dst
, src
));
820 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
822 if (can_create_pseudo_p ())
823 return force_reg (mode
, value
);
826 x
= aarch64_emit_move (x
, value
);
833 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
835 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
838 /* Load the full offset into a register. This
839 might be improvable in the future. */
840 high
= GEN_INT (offset
);
842 high
= aarch64_force_temporary (mode
, temp
, high
);
843 reg
= aarch64_force_temporary (mode
, temp
,
844 gen_rtx_PLUS (mode
, high
, reg
));
846 return plus_constant (mode
, reg
, offset
);
850 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
852 enum machine_mode mode
= GET_MODE (dest
);
853 unsigned HOST_WIDE_INT mask
;
856 unsigned HOST_WIDE_INT val
;
859 int one_match
, zero_match
;
861 gcc_assert (mode
== SImode
|| mode
== DImode
);
863 /* Check on what type of symbol it is. */
864 if (GET_CODE (imm
) == SYMBOL_REF
865 || GET_CODE (imm
) == LABEL_REF
866 || GET_CODE (imm
) == CONST
)
868 rtx mem
, base
, offset
;
869 enum aarch64_symbol_type sty
;
871 /* If we have (const (plus symbol offset)), separate out the offset
872 before we start classifying the symbol. */
873 split_const (imm
, &base
, &offset
);
875 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
878 case SYMBOL_FORCE_TO_MEM
:
879 if (offset
!= const0_rtx
880 && targetm
.cannot_force_const_mem (mode
, imm
))
882 gcc_assert(can_create_pseudo_p ());
883 base
= aarch64_force_temporary (mode
, dest
, base
);
884 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
885 aarch64_emit_move (dest
, base
);
888 mem
= force_const_mem (mode
, imm
);
890 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
893 case SYMBOL_SMALL_TLSGD
:
894 case SYMBOL_SMALL_TLSDESC
:
895 case SYMBOL_SMALL_GOTTPREL
:
896 case SYMBOL_SMALL_GOT
:
897 case SYMBOL_TINY_GOT
:
898 if (offset
!= const0_rtx
)
900 gcc_assert(can_create_pseudo_p ());
901 base
= aarch64_force_temporary (mode
, dest
, base
);
902 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
903 aarch64_emit_move (dest
, base
);
908 case SYMBOL_SMALL_TPREL
:
909 case SYMBOL_SMALL_ABSOLUTE
:
910 case SYMBOL_TINY_ABSOLUTE
:
911 aarch64_load_symref_appropriately (dest
, imm
, sty
);
919 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
921 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
925 if (!CONST_INT_P (imm
))
927 if (GET_CODE (imm
) == HIGH
)
928 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
931 rtx mem
= force_const_mem (mode
, imm
);
933 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
941 /* We know we can't do this in 1 insn, and we must be able to do it
942 in two; so don't mess around looking for sequences that don't buy
944 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
945 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
946 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
950 /* Remaining cases are all for DImode. */
953 subtargets
= optimize
&& can_create_pseudo_p ();
959 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
961 if ((val
& mask
) == 0)
963 else if ((val
& mask
) == mask
)
970 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
972 if ((val
& mask
) != mask
)
974 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
975 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
976 GEN_INT ((val
>> i
) & 0xffff)));
984 goto simple_sequence
;
986 mask
= 0x0ffff0000UL
;
987 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
989 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
991 if (aarch64_uimm12_shift (val
- (val
& mask
)))
993 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
995 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
996 emit_insn (gen_adddi3 (dest
, subtarget
,
997 GEN_INT (val
- (val
& mask
))));
1000 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1002 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1004 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1005 GEN_INT ((val
+ comp
) & mask
)));
1006 emit_insn (gen_adddi3 (dest
, subtarget
,
1007 GEN_INT (val
- ((val
+ comp
) & mask
))));
1010 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1012 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1014 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1015 GEN_INT ((val
- comp
) | ~mask
)));
1016 emit_insn (gen_adddi3 (dest
, subtarget
,
1017 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1020 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1022 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1024 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1025 GEN_INT (val
| ~mask
)));
1026 emit_insn (gen_adddi3 (dest
, subtarget
,
1027 GEN_INT (val
- (val
| ~mask
))));
1032 /* See if we can do it by arithmetically combining two
1034 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1039 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1040 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1042 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1043 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1044 GEN_INT (aarch64_bitmasks
[i
])));
1045 emit_insn (gen_adddi3 (dest
, subtarget
,
1046 GEN_INT (val
- aarch64_bitmasks
[i
])));
1050 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1052 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1054 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1055 GEN_INT (aarch64_bitmasks
[i
])));
1056 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1057 GEN_INT ((val
>> j
) & 0xffff)));
1063 /* See if we can do it by logically combining two immediates. */
1064 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1066 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1070 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1071 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1073 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1074 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1075 GEN_INT (aarch64_bitmasks
[i
])));
1076 emit_insn (gen_iordi3 (dest
, subtarget
,
1077 GEN_INT (aarch64_bitmasks
[j
])));
1081 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1085 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1086 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1089 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1090 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1091 GEN_INT (aarch64_bitmasks
[j
])));
1092 emit_insn (gen_anddi3 (dest
, subtarget
,
1093 GEN_INT (aarch64_bitmasks
[i
])));
1102 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1104 if ((val
& mask
) != 0)
1108 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1109 GEN_INT (val
& mask
)));
1113 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1114 GEN_INT ((val
>> i
) & 0xffff)));
1120 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1122 /* Indirect calls are not currently supported. */
1126 /* Cannot tail-call to long-calls, since these are outside of the
1127 range of a branch instruction (we could handle this if we added
1128 support for indirect tail-calls. */
1129 if (aarch64_decl_is_long_call_p (decl
))
1135 /* Implement TARGET_PASS_BY_REFERENCE. */
1138 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1139 enum machine_mode mode
,
1141 bool named ATTRIBUTE_UNUSED
)
1144 enum machine_mode dummymode
;
1147 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1148 size
= (mode
== BLKmode
&& type
)
1149 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1153 /* Arrays always passed by reference. */
1154 if (TREE_CODE (type
) == ARRAY_TYPE
)
1156 /* Other aggregates based on their size. */
1157 if (AGGREGATE_TYPE_P (type
))
1158 size
= int_size_in_bytes (type
);
1161 /* Variable sized arguments are always returned by reference. */
1165 /* Can this be a candidate to be passed in fp/simd register(s)? */
1166 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1171 /* Arguments which are variable sized or larger than 2 registers are
1172 passed by reference unless they are a homogenous floating point
1174 return size
> 2 * UNITS_PER_WORD
;
1177 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1179 aarch64_return_in_msb (const_tree valtype
)
1181 enum machine_mode dummy_mode
;
1184 /* Never happens in little-endian mode. */
1185 if (!BYTES_BIG_ENDIAN
)
1188 /* Only composite types smaller than or equal to 16 bytes can
1189 be potentially returned in registers. */
1190 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1191 || int_size_in_bytes (valtype
) <= 0
1192 || int_size_in_bytes (valtype
) > 16)
1195 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1196 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1197 is always passed/returned in the least significant bits of fp/simd
1199 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1200 &dummy_mode
, &dummy_int
, NULL
))
1206 /* Implement TARGET_FUNCTION_VALUE.
1207 Define how to find the value returned by a function. */
1210 aarch64_function_value (const_tree type
, const_tree func
,
1211 bool outgoing ATTRIBUTE_UNUSED
)
1213 enum machine_mode mode
;
1216 enum machine_mode ag_mode
;
1218 mode
= TYPE_MODE (type
);
1219 if (INTEGRAL_TYPE_P (type
))
1220 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1222 if (aarch64_return_in_msb (type
))
1224 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1226 if (size
% UNITS_PER_WORD
!= 0)
1228 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1229 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1233 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1234 &ag_mode
, &count
, NULL
))
1236 if (!aarch64_composite_type_p (type
, mode
))
1238 gcc_assert (count
== 1 && mode
== ag_mode
);
1239 return gen_rtx_REG (mode
, V0_REGNUM
);
1246 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1247 for (i
= 0; i
< count
; i
++)
1249 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1250 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1251 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1252 XVECEXP (par
, 0, i
) = tmp
;
1258 return gen_rtx_REG (mode
, R0_REGNUM
);
1261 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1262 Return true if REGNO is the number of a hard register in which the values
1263 of called function may come back. */
1266 aarch64_function_value_regno_p (const unsigned int regno
)
1268 /* Maximum of 16 bytes can be returned in the general registers. Examples
1269 of 16-byte return values are: 128-bit integers and 16-byte small
1270 structures (excluding homogeneous floating-point aggregates). */
1271 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1274 /* Up to four fp/simd registers can return a function value, e.g. a
1275 homogeneous floating-point aggregate having four members. */
1276 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1277 return !TARGET_GENERAL_REGS_ONLY
;
1282 /* Implement TARGET_RETURN_IN_MEMORY.
1284 If the type T of the result of a function is such that
1286 would require that arg be passed as a value in a register (or set of
1287 registers) according to the parameter passing rules, then the result
1288 is returned in the same registers as would be used for such an
1292 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1295 enum machine_mode ag_mode
;
1298 if (!AGGREGATE_TYPE_P (type
)
1299 && TREE_CODE (type
) != COMPLEX_TYPE
1300 && TREE_CODE (type
) != VECTOR_TYPE
)
1301 /* Simple scalar types always returned in registers. */
1304 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1311 /* Types larger than 2 registers returned in memory. */
1312 size
= int_size_in_bytes (type
);
1313 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1317 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1318 const_tree type
, int *nregs
)
1320 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1321 return aarch64_vfp_is_call_or_return_candidate (mode
,
1323 &pcum
->aapcs_vfp_rmode
,
1328 /* Given MODE and TYPE of a function argument, return the alignment in
1329 bits. The idea is to suppress any stronger alignment requested by
1330 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1331 This is a helper function for local use only. */
1334 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1336 unsigned int alignment
;
1340 if (!integer_zerop (TYPE_SIZE (type
)))
1342 if (TYPE_MODE (type
) == mode
)
1343 alignment
= TYPE_ALIGN (type
);
1345 alignment
= GET_MODE_ALIGNMENT (mode
);
1351 alignment
= GET_MODE_ALIGNMENT (mode
);
1356 /* Layout a function argument according to the AAPCS64 rules. The rule
1357 numbers refer to the rule numbers in the AAPCS64. */
1360 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1362 bool named ATTRIBUTE_UNUSED
)
1364 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1365 int ncrn
, nvrn
, nregs
;
1366 bool allocate_ncrn
, allocate_nvrn
;
1368 /* We need to do this once per argument. */
1369 if (pcum
->aapcs_arg_processed
)
1372 pcum
->aapcs_arg_processed
= true;
1374 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1375 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1380 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1381 The following code thus handles passing by SIMD/FP registers first. */
1383 nvrn
= pcum
->aapcs_nvrn
;
1385 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1386 and homogenous short-vector aggregates (HVA). */
1389 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1391 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1392 if (!aarch64_composite_type_p (type
, mode
))
1394 gcc_assert (nregs
== 1);
1395 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1401 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1402 for (i
= 0; i
< nregs
; i
++)
1404 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1405 V0_REGNUM
+ nvrn
+ i
);
1406 tmp
= gen_rtx_EXPR_LIST
1408 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1409 XVECEXP (par
, 0, i
) = tmp
;
1411 pcum
->aapcs_reg
= par
;
1417 /* C.3 NSRN is set to 8. */
1418 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1423 ncrn
= pcum
->aapcs_ncrn
;
1424 nregs
= ((type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
))
1425 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1428 /* C6 - C9. though the sign and zero extension semantics are
1429 handled elsewhere. This is the case where the argument fits
1430 entirely general registers. */
1431 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1433 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1435 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1437 /* C.8 if the argument has an alignment of 16 then the NGRN is
1438 rounded up to the next even number. */
1439 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1442 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1444 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1445 A reg is still generated for it, but the caller should be smart
1446 enough not to use it. */
1447 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1449 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1456 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1457 for (i
= 0; i
< nregs
; i
++)
1459 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1460 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1461 GEN_INT (i
* UNITS_PER_WORD
));
1462 XVECEXP (par
, 0, i
) = tmp
;
1464 pcum
->aapcs_reg
= par
;
1467 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1472 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1474 /* The argument is passed on stack; record the needed number of words for
1475 this argument (we can re-use NREGS) and align the total size if
1478 pcum
->aapcs_stack_words
= nregs
;
1479 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1480 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1481 16 / UNITS_PER_WORD
) + 1;
1485 /* Implement TARGET_FUNCTION_ARG. */
1488 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1489 const_tree type
, bool named
)
1491 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1492 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1494 if (mode
== VOIDmode
)
1497 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1498 return pcum
->aapcs_reg
;
1502 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1503 const_tree fntype ATTRIBUTE_UNUSED
,
1504 rtx libname ATTRIBUTE_UNUSED
,
1505 const_tree fndecl ATTRIBUTE_UNUSED
,
1506 unsigned n_named ATTRIBUTE_UNUSED
)
1508 pcum
->aapcs_ncrn
= 0;
1509 pcum
->aapcs_nvrn
= 0;
1510 pcum
->aapcs_nextncrn
= 0;
1511 pcum
->aapcs_nextnvrn
= 0;
1512 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1513 pcum
->aapcs_reg
= NULL_RTX
;
1514 pcum
->aapcs_arg_processed
= false;
1515 pcum
->aapcs_stack_words
= 0;
1516 pcum
->aapcs_stack_size
= 0;
1522 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1523 enum machine_mode mode
,
1527 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1528 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1530 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1531 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1532 != (pcum
->aapcs_stack_words
!= 0));
1533 pcum
->aapcs_arg_processed
= false;
1534 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1535 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1536 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1537 pcum
->aapcs_stack_words
= 0;
1538 pcum
->aapcs_reg
= NULL_RTX
;
1543 aarch64_function_arg_regno_p (unsigned regno
)
1545 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1546 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1549 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1550 PARM_BOUNDARY bits of alignment, but will be given anything up
1551 to STACK_BOUNDARY bits if the type requires it. This makes sure
1552 that both before and after the layout of each argument, the Next
1553 Stacked Argument Address (NSAA) will have a minimum alignment of
1557 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1559 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1561 if (alignment
< PARM_BOUNDARY
)
1562 alignment
= PARM_BOUNDARY
;
1563 if (alignment
> STACK_BOUNDARY
)
1564 alignment
= STACK_BOUNDARY
;
1568 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1570 Return true if an argument passed on the stack should be padded upwards,
1571 i.e. if the least-significant byte of the stack slot has useful data.
1573 Small aggregate types are placed in the lowest memory address.
1575 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1578 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1580 /* On little-endian targets, the least significant byte of every stack
1581 argument is passed at the lowest byte address of the stack slot. */
1582 if (!BYTES_BIG_ENDIAN
)
1585 /* Otherwise, integral types and floating point types are padded downward:
1586 the least significant byte of a stack argument is passed at the highest
1587 byte address of the stack slot. */
1589 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
))
1590 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1593 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1597 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1599 It specifies padding for the last (may also be the only)
1600 element of a block move between registers and memory. If
1601 assuming the block is in the memory, padding upward means that
1602 the last element is padded after its highest significant byte,
1603 while in downward padding, the last element is padded at the
1604 its least significant byte side.
1606 Small aggregates and small complex types are always padded
1609 We don't need to worry about homogeneous floating-point or
1610 short-vector aggregates; their move is not affected by the
1611 padding direction determined here. Regardless of endianness,
1612 each element of such an aggregate is put in the least
1613 significant bits of a fp/simd register.
1615 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1616 register has useful data, and return the opposite if the most
1617 significant byte does. */
1620 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1621 bool first ATTRIBUTE_UNUSED
)
1624 /* Small composite types are always padded upward. */
1625 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1627 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1628 : GET_MODE_SIZE (mode
));
1629 if (size
< 2 * UNITS_PER_WORD
)
1633 /* Otherwise, use the default padding. */
1634 return !BYTES_BIG_ENDIAN
;
1637 static enum machine_mode
1638 aarch64_libgcc_cmp_return_mode (void)
1644 aarch64_frame_pointer_required (void)
1646 /* If the function contains dynamic stack allocations, we need to
1647 use the frame pointer to access the static parts of the frame. */
1648 if (cfun
->calls_alloca
)
1651 /* We may have turned flag_omit_frame_pointer on in order to have this
1652 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1653 and we'll check it here.
1654 If we really did set flag_omit_frame_pointer normally, then we return false
1655 (no frame pointer required) in all cases. */
1657 if (flag_omit_frame_pointer
&& !faked_omit_frame_pointer
)
1659 else if (flag_omit_leaf_frame_pointer
)
1660 return !crtl
->is_leaf
;
1664 /* Mark the registers that need to be saved by the callee and calculate
1665 the size of the callee-saved registers area and frame record (both FP
1666 and LR may be omitted). */
1668 aarch64_layout_frame (void)
1670 HOST_WIDE_INT offset
= 0;
1673 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1676 cfun
->machine
->frame
.fp_lr_offset
= 0;
1678 /* First mark all the registers that really need to be saved... */
1679 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1680 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1682 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1683 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1685 /* ... that includes the eh data registers (if needed)... */
1686 if (crtl
->calls_eh_return
)
1687 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1688 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1690 /* ... and any callee saved register that dataflow says is live. */
1691 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1692 if (df_regs_ever_live_p (regno
)
1693 && !call_used_regs
[regno
])
1694 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1696 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1697 if (df_regs_ever_live_p (regno
)
1698 && !call_used_regs
[regno
])
1699 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1701 if (frame_pointer_needed
)
1703 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1704 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1705 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1708 /* Now assign stack slots for them. */
1709 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1710 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1712 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1713 offset
+= UNITS_PER_WORD
;
1716 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1717 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1719 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1720 offset
+= UNITS_PER_WORD
;
1723 if (frame_pointer_needed
)
1725 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1726 offset
+= UNITS_PER_WORD
;
1727 cfun
->machine
->frame
.fp_lr_offset
= UNITS_PER_WORD
;
1730 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1732 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1733 offset
+= UNITS_PER_WORD
;
1734 cfun
->machine
->frame
.fp_lr_offset
+= UNITS_PER_WORD
;
1737 cfun
->machine
->frame
.padding0
=
1738 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1739 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1741 cfun
->machine
->frame
.saved_regs_size
= offset
;
1742 cfun
->machine
->frame
.laid_out
= true;
1745 /* Make the last instruction frame-related and note that it performs
1746 the operation described by FRAME_PATTERN. */
1749 aarch64_set_frame_expr (rtx frame_pattern
)
1753 insn
= get_last_insn ();
1754 RTX_FRAME_RELATED_P (insn
) = 1;
1755 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1756 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1762 aarch64_register_saved_on_entry (int regno
)
1764 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1769 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1770 bool restore
, rtx base_rtx
)
1776 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1779 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1781 if (aarch64_register_saved_on_entry (regno
))
1784 mem
= gen_mem_ref (DFmode
,
1785 plus_constant (Pmode
,
1789 for (regno2
= regno
+ 1;
1790 regno2
<= V31_REGNUM
1791 && !aarch64_register_saved_on_entry (regno2
);
1796 if (regno2
<= V31_REGNUM
&&
1797 aarch64_register_saved_on_entry (regno2
))
1800 /* Next highest register to be saved. */
1801 mem2
= gen_mem_ref (DFmode
,
1805 start_offset
+ increment
));
1806 if (restore
== false)
1809 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1810 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1816 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1817 gen_rtx_REG (DFmode
, regno2
), mem2
));
1819 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DFmode
, regno
));
1820 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DFmode
, regno2
));
1823 /* The first part of a frame-related parallel insn
1824 is always assumed to be relevant to the frame
1825 calculations; subsequent parts, are only
1826 frame-related if explicitly marked. */
1827 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1830 start_offset
+= increment
* 2;
1834 if (restore
== false)
1835 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1838 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1839 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1841 start_offset
+= increment
;
1843 RTX_FRAME_RELATED_P (insn
) = 1;
1850 /* offset from the stack pointer of where the saves and
1851 restore's have to happen. */
1853 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
1857 rtx base_rtx
= stack_pointer_rtx
;
1858 HOST_WIDE_INT start_offset
= offset
;
1859 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
1860 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1861 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
1865 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
1867 if (aarch64_register_saved_on_entry (regno
))
1870 mem
= gen_mem_ref (Pmode
,
1871 plus_constant (Pmode
,
1875 for (regno2
= regno
+ 1;
1877 && !aarch64_register_saved_on_entry (regno2
);
1882 if (regno2
<= limit
&&
1883 aarch64_register_saved_on_entry (regno2
))
1886 /* Next highest register to be saved. */
1887 mem2
= gen_mem_ref (Pmode
,
1891 start_offset
+ increment
));
1892 if (restore
== false)
1895 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
1896 mem2
, gen_rtx_REG (DImode
, regno2
)));
1902 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
1903 gen_rtx_REG (DImode
, regno2
), mem2
));
1905 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1906 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
1909 /* The first part of a frame-related parallel insn
1910 is always assumed to be relevant to the frame
1911 calculations; subsequent parts, are only
1912 frame-related if explicitly marked. */
1913 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1916 start_offset
+= increment
* 2;
1920 if (restore
== false)
1921 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
1924 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
1925 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1927 start_offset
+= increment
;
1929 RTX_FRAME_RELATED_P (insn
) = 1;
1933 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
1937 /* AArch64 stack frames generated by this compiler look like:
1939 +-------------------------------+
1941 | incoming stack arguments |
1943 +-------------------------------+ <-- arg_pointer_rtx
1945 | callee-allocated save area |
1946 | for register varargs |
1948 +-------------------------------+
1952 +-------------------------------+ <-- frame_pointer_rtx
1954 | callee-saved registers |
1956 +-------------------------------+
1958 +-------------------------------+
1960 P +-------------------------------+ <-- hard_frame_pointer_rtx
1961 | dynamic allocation |
1962 +-------------------------------+
1964 | outgoing stack arguments |
1966 +-------------------------------+ <-- stack_pointer_rtx
1968 Dynamic stack allocations such as alloca insert data at point P.
1969 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1970 hard_frame_pointer_rtx unchanged. */
1972 /* Generate the prologue instructions for entry into a function.
1973 Establish the stack frame by decreasing the stack pointer with a
1974 properly calculated size and, if necessary, create a frame record
1975 filled with the values of LR and previous frame pointer. The
1976 current FP is also set up if it is in use. */
1979 aarch64_expand_prologue (void)
1981 /* sub sp, sp, #<frame_size>
1982 stp {fp, lr}, [sp, #<frame_size> - 16]
1983 add fp, sp, #<frame_size> - hardfp_offset
1984 stp {cs_reg}, [fp, #-16] etc.
1986 sub sp, sp, <final_adjustment_if_any>
1988 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
1989 HOST_WIDE_INT frame_size
, offset
;
1990 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
1993 aarch64_layout_frame ();
1994 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
1995 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
1996 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
1997 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
1998 + crtl
->outgoing_args_size
);
1999 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2000 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2002 if (flag_stack_usage_info
)
2003 current_function_static_stack_size
= frame_size
;
2006 - original_frame_size
2007 - cfun
->machine
->frame
.saved_regs_size
);
2009 /* Store pairs and load pairs have a range only -512 to 504. */
2012 /* When the frame has a large size, an initial decrease is done on
2013 the stack pointer to jump over the callee-allocated save area for
2014 register varargs, the local variable area and/or the callee-saved
2015 register area. This will allow the pre-index write-back
2016 store pair instructions to be used for setting up the stack frame
2018 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2020 offset
= cfun
->machine
->frame
.saved_regs_size
;
2022 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2025 if (frame_size
>= 0x1000000)
2027 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2028 emit_move_insn (op0
, GEN_INT (-frame_size
));
2029 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2030 aarch64_set_frame_expr (gen_rtx_SET
2031 (Pmode
, stack_pointer_rtx
,
2032 gen_rtx_PLUS (Pmode
,
2034 GEN_INT (-frame_size
))));
2036 else if (frame_size
> 0)
2038 if ((frame_size
& 0xfff) != frame_size
)
2040 insn
= emit_insn (gen_add2_insn
2042 GEN_INT (-(frame_size
2043 & ~(HOST_WIDE_INT
)0xfff))));
2044 RTX_FRAME_RELATED_P (insn
) = 1;
2046 if ((frame_size
& 0xfff) != 0)
2048 insn
= emit_insn (gen_add2_insn
2050 GEN_INT (-(frame_size
2051 & (HOST_WIDE_INT
)0xfff))));
2052 RTX_FRAME_RELATED_P (insn
) = 1;
2061 /* Save the frame pointer and lr if the frame pointer is needed
2062 first. Make the frame pointer point to the location of the
2063 old frame pointer on the stack. */
2064 if (frame_pointer_needed
)
2070 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2071 GEN_INT (-offset
)));
2072 RTX_FRAME_RELATED_P (insn
) = 1;
2073 aarch64_set_frame_expr (gen_rtx_SET
2074 (Pmode
, stack_pointer_rtx
,
2075 gen_rtx_MINUS (Pmode
,
2077 GEN_INT (offset
))));
2078 mem_fp
= gen_frame_mem (DImode
,
2079 plus_constant (Pmode
,
2082 mem_lr
= gen_frame_mem (DImode
,
2083 plus_constant (Pmode
,
2087 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2088 hard_frame_pointer_rtx
,
2090 gen_rtx_REG (DImode
,
2095 insn
= emit_insn (gen_storewb_pairdi_di
2096 (stack_pointer_rtx
, stack_pointer_rtx
,
2097 hard_frame_pointer_rtx
,
2098 gen_rtx_REG (DImode
, LR_REGNUM
),
2100 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2101 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2104 /* The first part of a frame-related parallel insn is always
2105 assumed to be relevant to the frame calculations;
2106 subsequent parts, are only frame-related if explicitly
2108 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2109 RTX_FRAME_RELATED_P (insn
) = 1;
2111 /* Set up frame pointer to point to the location of the
2112 previous frame pointer on the stack. */
2113 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2115 GEN_INT (fp_offset
)));
2116 aarch64_set_frame_expr (gen_rtx_SET
2117 (Pmode
, hard_frame_pointer_rtx
,
2118 gen_rtx_PLUS (Pmode
,
2120 GEN_INT (fp_offset
))));
2121 RTX_FRAME_RELATED_P (insn
) = 1;
2122 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2123 hard_frame_pointer_rtx
));
2127 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2128 GEN_INT (-offset
)));
2129 RTX_FRAME_RELATED_P (insn
) = 1;
2132 aarch64_save_or_restore_callee_save_registers
2133 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2136 /* when offset >= 512,
2137 sub sp, sp, #<outgoing_args_size> */
2138 if (frame_size
> -1)
2140 if (crtl
->outgoing_args_size
> 0)
2142 insn
= emit_insn (gen_add2_insn
2144 GEN_INT (- crtl
->outgoing_args_size
)));
2145 RTX_FRAME_RELATED_P (insn
) = 1;
2150 /* Generate the epilogue instructions for returning from a function. */
2152 aarch64_expand_epilogue (bool for_sibcall
)
2154 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2155 HOST_WIDE_INT fp_offset
;
2159 aarch64_layout_frame ();
2160 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2161 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2162 + crtl
->outgoing_args_size
);
2163 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2164 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2167 - original_frame_size
2168 - cfun
->machine
->frame
.saved_regs_size
);
2170 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2172 /* Store pairs and load pairs have a range only -512 to 504. */
2175 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2177 offset
= cfun
->machine
->frame
.saved_regs_size
;
2179 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2181 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2183 insn
= emit_insn (gen_add2_insn
2185 GEN_INT (crtl
->outgoing_args_size
)));
2186 RTX_FRAME_RELATED_P (insn
) = 1;
2192 /* If there were outgoing arguments or we've done dynamic stack
2193 allocation, then restore the stack pointer from the frame
2194 pointer. This is at most one insn and more efficient than using
2195 GCC's internal mechanism. */
2196 if (frame_pointer_needed
2197 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2199 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2200 hard_frame_pointer_rtx
,
2201 GEN_INT (- fp_offset
)));
2202 RTX_FRAME_RELATED_P (insn
) = 1;
2203 /* As SP is set to (FP - fp_offset), according to the rules in
2204 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2205 from the value of SP from now on. */
2206 cfa_reg
= stack_pointer_rtx
;
2209 aarch64_save_or_restore_callee_save_registers
2210 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2212 /* Restore the frame pointer and lr if the frame pointer is needed. */
2215 if (frame_pointer_needed
)
2221 mem_fp
= gen_frame_mem (DImode
,
2222 plus_constant (Pmode
,
2225 mem_lr
= gen_frame_mem (DImode
,
2226 plus_constant (Pmode
,
2230 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2232 gen_rtx_REG (DImode
,
2238 insn
= emit_insn (gen_loadwb_pairdi_di
2241 hard_frame_pointer_rtx
,
2242 gen_rtx_REG (DImode
, LR_REGNUM
),
2244 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2245 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2246 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2247 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2248 plus_constant (Pmode
, cfa_reg
,
2252 /* The first part of a frame-related parallel insn
2253 is always assumed to be relevant to the frame
2254 calculations; subsequent parts, are only
2255 frame-related if explicitly marked. */
2256 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2257 RTX_FRAME_RELATED_P (insn
) = 1;
2258 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2259 add_reg_note (insn
, REG_CFA_RESTORE
,
2260 gen_rtx_REG (DImode
, LR_REGNUM
));
2264 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2266 RTX_FRAME_RELATED_P (insn
) = 1;
2271 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2273 RTX_FRAME_RELATED_P (insn
) = 1;
2277 /* Stack adjustment for exception handler. */
2278 if (crtl
->calls_eh_return
)
2280 /* We need to unwind the stack by the offset computed by
2281 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2282 based on SP. Ideally we would update the SP and define the
2283 CFA along the lines of:
2285 SP = SP + EH_RETURN_STACKADJ_RTX
2286 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2288 However the dwarf emitter only understands a constant
2291 The solution choosen here is to use the otherwise unused IP0
2292 as a temporary register to hold the current SP value. The
2293 CFA is described using IP0 then SP is modified. */
2295 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2297 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2298 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2299 RTX_FRAME_RELATED_P (insn
) = 1;
2301 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2303 /* Ensure the assignment to IP0 does not get optimized away. */
2307 if (frame_size
> -1)
2309 if (frame_size
>= 0x1000000)
2311 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2312 emit_move_insn (op0
, GEN_INT (frame_size
));
2313 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2314 aarch64_set_frame_expr (gen_rtx_SET
2315 (Pmode
, stack_pointer_rtx
,
2316 gen_rtx_PLUS (Pmode
,
2318 GEN_INT (frame_size
))));
2320 else if (frame_size
> 0)
2322 if ((frame_size
& 0xfff) != 0)
2324 insn
= emit_insn (gen_add2_insn
2326 GEN_INT ((frame_size
2327 & (HOST_WIDE_INT
) 0xfff))));
2328 RTX_FRAME_RELATED_P (insn
) = 1;
2330 if ((frame_size
& 0xfff) != frame_size
)
2332 insn
= emit_insn (gen_add2_insn
2334 GEN_INT ((frame_size
2335 & ~ (HOST_WIDE_INT
) 0xfff))));
2336 RTX_FRAME_RELATED_P (insn
) = 1;
2340 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2341 gen_rtx_PLUS (Pmode
,
2343 GEN_INT (offset
))));
2346 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2348 emit_jump_insn (ret_rtx
);
2351 /* Return the place to copy the exception unwinding return address to.
2352 This will probably be a stack slot, but could (in theory be the
2353 return register). */
2355 aarch64_final_eh_return_addr (void)
2357 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2358 aarch64_layout_frame ();
2359 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2360 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2361 + crtl
->outgoing_args_size
);
2362 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2363 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2365 - original_frame_size
2366 - cfun
->machine
->frame
.saved_regs_size
;
2368 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2369 return gen_rtx_REG (DImode
, LR_REGNUM
);
2371 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2372 result in a store to save LR introduced by builtin_eh_return () being
2373 incorrectly deleted because the alias is not detected.
2374 So in the calculation of the address to copy the exception unwinding
2375 return address to, we note 2 cases.
2376 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2377 we return a SP-relative location since all the addresses are SP-relative
2378 in this case. This prevents the store from being optimized away.
2379 If the fp_offset is not 0, then the addresses will be FP-relative and
2380 therefore we return a FP-relative location. */
2382 if (frame_pointer_needed
)
2385 return gen_frame_mem (DImode
,
2386 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2388 return gen_frame_mem (DImode
,
2389 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2392 /* If FP is not needed, we calculate the location of LR, which would be
2393 at the top of the saved registers block. */
2395 return gen_frame_mem (DImode
,
2396 plus_constant (Pmode
,
2399 + cfun
->machine
->frame
.saved_regs_size
2400 - 2 * UNITS_PER_WORD
));
2403 /* Output code to build up a constant in a register. */
2405 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
)
2407 if (aarch64_bitmask_imm (val
, DImode
))
2408 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2414 HOST_WIDE_INT valp
= val
>> 16;
2418 for (i
= 16; i
< 64; i
+= 16)
2420 valm
= (valp
& 0xffff);
2431 /* zcount contains the number of additional MOVK instructions
2432 required if the constant is built up with an initial MOVZ instruction,
2433 while ncount is the number of MOVK instructions required if starting
2434 with a MOVN instruction. Choose the sequence that yields the fewest
2435 number of instructions, preferring MOVZ instructions when they are both
2437 if (ncount
< zcount
)
2439 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2440 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2445 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2446 GEN_INT (val
& 0xffff));
2452 for (i
= 16; i
< 64; i
+= 16)
2454 if ((val
& 0xffff) != tval
)
2455 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2456 GEN_INT (i
), GEN_INT (val
& 0xffff)));
2463 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2465 HOST_WIDE_INT mdelta
= delta
;
2466 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2467 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2472 if (mdelta
>= 4096 * 4096)
2474 aarch64_build_constant (scratchreg
, delta
);
2475 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2477 else if (mdelta
> 0)
2481 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2482 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2484 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2485 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2487 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2488 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2490 if (mdelta
% 4096 != 0)
2492 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2493 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2494 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2499 /* Output code to add DELTA to the first argument, and then jump
2500 to FUNCTION. Used for C++ multiple inheritance. */
2502 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2503 HOST_WIDE_INT delta
,
2504 HOST_WIDE_INT vcall_offset
,
2507 /* The this pointer is always in x0. Note that this differs from
2508 Arm where the this pointer maybe bumped to r1 if r0 is required
2509 to return a pointer to an aggregate. On AArch64 a result value
2510 pointer will be in x8. */
2511 int this_regno
= R0_REGNUM
;
2512 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2514 reload_completed
= 1;
2515 emit_note (NOTE_INSN_PROLOGUE_END
);
2517 if (vcall_offset
== 0)
2518 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2521 gcc_assert ((vcall_offset
& 0x7) == 0);
2523 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2524 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2525 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2530 if (delta
>= -256 && delta
< 256)
2531 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2532 plus_constant (Pmode
, this_rtx
, delta
));
2534 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2537 aarch64_emit_move (temp0
, gen_rtx_MEM (Pmode
, addr
));
2539 if (vcall_offset
>= -256 && vcall_offset
< 32768)
2540 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2543 aarch64_build_constant (IP1_REGNUM
, vcall_offset
);
2544 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2547 aarch64_emit_move (temp1
, gen_rtx_MEM (Pmode
,addr
));
2548 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2551 /* Generate a tail call to the target function. */
2552 if (!TREE_USED (function
))
2554 assemble_external (function
);
2555 TREE_USED (function
) = 1;
2557 funexp
= XEXP (DECL_RTL (function
), 0);
2558 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2559 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2560 SIBLING_CALL_P (insn
) = 1;
2562 insn
= get_insns ();
2563 shorten_branches (insn
);
2564 final_start_function (insn
, file
, 1);
2565 final (insn
, file
, 1);
2566 final_end_function ();
2568 /* Stop pretending to be a post-reload pass. */
2569 reload_completed
= 0;
2573 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2575 if (GET_CODE (*x
) == SYMBOL_REF
)
2576 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2578 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2579 TLS offsets, not real symbol references. */
2580 if (GET_CODE (*x
) == UNSPEC
2581 && XINT (*x
, 1) == UNSPEC_TLS
)
2588 aarch64_tls_referenced_p (rtx x
)
2590 if (!TARGET_HAVE_TLS
)
2593 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2598 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2600 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2601 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2612 aarch64_build_bitmask_table (void)
2614 unsigned HOST_WIDE_INT mask
, imm
;
2615 unsigned int log_e
, e
, s
, r
;
2616 unsigned int nimms
= 0;
2618 for (log_e
= 1; log_e
<= 6; log_e
++)
2622 mask
= ~(HOST_WIDE_INT
) 0;
2624 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2625 for (s
= 1; s
< e
; s
++)
2627 for (r
= 0; r
< e
; r
++)
2629 /* set s consecutive bits to 1 (s < 64) */
2630 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2631 /* rotate right by r */
2633 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2634 /* replicate the constant depending on SIMD size */
2636 case 1: imm
|= (imm
<< 2);
2637 case 2: imm
|= (imm
<< 4);
2638 case 3: imm
|= (imm
<< 8);
2639 case 4: imm
|= (imm
<< 16);
2640 case 5: imm
|= (imm
<< 32);
2646 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2647 aarch64_bitmasks
[nimms
++] = imm
;
2652 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2653 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2654 aarch64_bitmasks_cmp
);
2658 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2659 a left shift of 0 or 12 bits. */
2661 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2663 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2664 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2669 /* Return true if val is an immediate that can be loaded into a
2670 register by a MOVZ instruction. */
2672 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2674 if (GET_MODE_SIZE (mode
) > 4)
2676 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2677 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2682 /* Ignore sign extension. */
2683 val
&= (HOST_WIDE_INT
) 0xffffffff;
2685 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2686 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2690 /* Return true if val is a valid bitmask immediate. */
2692 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2694 if (GET_MODE_SIZE (mode
) < 8)
2696 /* Replicate bit pattern. */
2697 val
&= (HOST_WIDE_INT
) 0xffffffff;
2700 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2701 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2705 /* Return true if val is an immediate that can be loaded into a
2706 register in a single instruction. */
2708 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2710 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2712 return aarch64_bitmask_imm (val
, mode
);
2716 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2720 if (GET_CODE (x
) == HIGH
)
2723 split_const (x
, &base
, &offset
);
2724 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2725 return (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2726 != SYMBOL_FORCE_TO_MEM
);
2728 return aarch64_tls_referenced_p (x
);
2731 /* Return true if register REGNO is a valid index register.
2732 STRICT_P is true if REG_OK_STRICT is in effect. */
2735 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2737 if (!HARD_REGISTER_NUM_P (regno
))
2745 regno
= reg_renumber
[regno
];
2747 return GP_REGNUM_P (regno
);
2750 /* Return true if register REGNO is a valid base register for mode MODE.
2751 STRICT_P is true if REG_OK_STRICT is in effect. */
2754 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2756 if (!HARD_REGISTER_NUM_P (regno
))
2764 regno
= reg_renumber
[regno
];
2767 /* The fake registers will be eliminated to either the stack or
2768 hard frame pointer, both of which are usually valid base registers.
2769 Reload deals with the cases where the eliminated form isn't valid. */
2770 return (GP_REGNUM_P (regno
)
2771 || regno
== SP_REGNUM
2772 || regno
== FRAME_POINTER_REGNUM
2773 || regno
== ARG_POINTER_REGNUM
);
2776 /* Return true if X is a valid base register for mode MODE.
2777 STRICT_P is true if REG_OK_STRICT is in effect. */
2780 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2782 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2785 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2788 /* Return true if address offset is a valid index. If it is, fill in INFO
2789 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2792 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2793 enum machine_mode mode
, bool strict_p
)
2795 enum aarch64_address_type type
;
2800 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2801 && GET_MODE (x
) == Pmode
)
2803 type
= ADDRESS_REG_REG
;
2807 /* (sign_extend:DI (reg:SI)) */
2808 else if ((GET_CODE (x
) == SIGN_EXTEND
2809 || GET_CODE (x
) == ZERO_EXTEND
)
2810 && GET_MODE (x
) == DImode
2811 && GET_MODE (XEXP (x
, 0)) == SImode
)
2813 type
= (GET_CODE (x
) == SIGN_EXTEND
)
2814 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2815 index
= XEXP (x
, 0);
2818 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2819 else if (GET_CODE (x
) == MULT
2820 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2821 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2822 && GET_MODE (XEXP (x
, 0)) == DImode
2823 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2824 && CONST_INT_P (XEXP (x
, 1)))
2826 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2827 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2828 index
= XEXP (XEXP (x
, 0), 0);
2829 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2831 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2832 else if (GET_CODE (x
) == ASHIFT
2833 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2834 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2835 && GET_MODE (XEXP (x
, 0)) == DImode
2836 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2837 && CONST_INT_P (XEXP (x
, 1)))
2839 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2840 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2841 index
= XEXP (XEXP (x
, 0), 0);
2842 shift
= INTVAL (XEXP (x
, 1));
2844 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2845 else if ((GET_CODE (x
) == SIGN_EXTRACT
2846 || GET_CODE (x
) == ZERO_EXTRACT
)
2847 && GET_MODE (x
) == DImode
2848 && GET_CODE (XEXP (x
, 0)) == MULT
2849 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2850 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2852 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2853 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2854 index
= XEXP (XEXP (x
, 0), 0);
2855 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2856 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2857 || INTVAL (XEXP (x
, 2)) != 0)
2860 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2861 (const_int 0xffffffff<<shift)) */
2862 else if (GET_CODE (x
) == AND
2863 && GET_MODE (x
) == DImode
2864 && GET_CODE (XEXP (x
, 0)) == MULT
2865 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2866 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2867 && CONST_INT_P (XEXP (x
, 1)))
2869 type
= ADDRESS_REG_UXTW
;
2870 index
= XEXP (XEXP (x
, 0), 0);
2871 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2872 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2875 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2876 else if ((GET_CODE (x
) == SIGN_EXTRACT
2877 || GET_CODE (x
) == ZERO_EXTRACT
)
2878 && GET_MODE (x
) == DImode
2879 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2880 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2881 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2883 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2884 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2885 index
= XEXP (XEXP (x
, 0), 0);
2886 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2887 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2888 || INTVAL (XEXP (x
, 2)) != 0)
2891 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2892 (const_int 0xffffffff<<shift)) */
2893 else if (GET_CODE (x
) == AND
2894 && GET_MODE (x
) == DImode
2895 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2896 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2897 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2898 && CONST_INT_P (XEXP (x
, 1)))
2900 type
= ADDRESS_REG_UXTW
;
2901 index
= XEXP (XEXP (x
, 0), 0);
2902 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2903 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2906 /* (mult:P (reg:P) (const_int scale)) */
2907 else if (GET_CODE (x
) == MULT
2908 && GET_MODE (x
) == Pmode
2909 && GET_MODE (XEXP (x
, 0)) == Pmode
2910 && CONST_INT_P (XEXP (x
, 1)))
2912 type
= ADDRESS_REG_REG
;
2913 index
= XEXP (x
, 0);
2914 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2916 /* (ashift:P (reg:P) (const_int shift)) */
2917 else if (GET_CODE (x
) == ASHIFT
2918 && GET_MODE (x
) == Pmode
2919 && GET_MODE (XEXP (x
, 0)) == Pmode
2920 && CONST_INT_P (XEXP (x
, 1)))
2922 type
= ADDRESS_REG_REG
;
2923 index
= XEXP (x
, 0);
2924 shift
= INTVAL (XEXP (x
, 1));
2929 if (GET_CODE (index
) == SUBREG
)
2930 index
= SUBREG_REG (index
);
2933 (shift
> 0 && shift
<= 3
2934 && (1 << shift
) == GET_MODE_SIZE (mode
)))
2936 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
2939 info
->offset
= index
;
2940 info
->shift
= shift
;
2948 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
2950 return (offset
>= -64 * GET_MODE_SIZE (mode
)
2951 && offset
< 64 * GET_MODE_SIZE (mode
)
2952 && offset
% GET_MODE_SIZE (mode
) == 0);
2956 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
2957 HOST_WIDE_INT offset
)
2959 return offset
>= -256 && offset
< 256;
2963 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
2966 && offset
< 4096 * GET_MODE_SIZE (mode
)
2967 && offset
% GET_MODE_SIZE (mode
) == 0);
2970 /* Return true if X is a valid address for machine mode MODE. If it is,
2971 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2972 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2975 aarch64_classify_address (struct aarch64_address_info
*info
,
2976 rtx x
, enum machine_mode mode
,
2977 RTX_CODE outer_code
, bool strict_p
)
2979 enum rtx_code code
= GET_CODE (x
);
2981 bool allow_reg_index_p
=
2982 outer_code
!= PARALLEL
&& GET_MODE_SIZE(mode
) != 16;
2984 /* Don't support anything other than POST_INC or REG addressing for
2986 if (aarch64_vector_mode_p (mode
)
2987 && (code
!= POST_INC
&& code
!= REG
))
2994 info
->type
= ADDRESS_REG_IMM
;
2996 info
->offset
= const0_rtx
;
2997 return aarch64_base_register_rtx_p (x
, strict_p
);
3002 if (GET_MODE_SIZE (mode
) != 0
3003 && CONST_INT_P (op1
)
3004 && aarch64_base_register_rtx_p (op0
, strict_p
))
3006 HOST_WIDE_INT offset
= INTVAL (op1
);
3008 info
->type
= ADDRESS_REG_IMM
;
3012 /* TImode and TFmode values are allowed in both pairs of X
3013 registers and individual Q registers. The available
3015 X,X: 7-bit signed scaled offset
3016 Q: 9-bit signed offset
3017 We conservatively require an offset representable in either mode.
3019 if (mode
== TImode
|| mode
== TFmode
)
3020 return (offset_7bit_signed_scaled_p (mode
, offset
)
3021 && offset_9bit_signed_unscaled_p (mode
, offset
));
3023 if (outer_code
== PARALLEL
)
3024 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3025 && offset_7bit_signed_scaled_p (mode
, offset
));
3027 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3028 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3031 if (allow_reg_index_p
)
3033 /* Look for base + (scaled/extended) index register. */
3034 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3035 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3040 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3041 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3054 info
->type
= ADDRESS_REG_WB
;
3055 info
->base
= XEXP (x
, 0);
3056 info
->offset
= NULL_RTX
;
3057 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3061 info
->type
= ADDRESS_REG_WB
;
3062 info
->base
= XEXP (x
, 0);
3063 if (GET_CODE (XEXP (x
, 1)) == PLUS
3064 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3065 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3066 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3068 HOST_WIDE_INT offset
;
3069 info
->offset
= XEXP (XEXP (x
, 1), 1);
3070 offset
= INTVAL (info
->offset
);
3072 /* TImode and TFmode values are allowed in both pairs of X
3073 registers and individual Q registers. The available
3075 X,X: 7-bit signed scaled offset
3076 Q: 9-bit signed offset
3077 We conservatively require an offset representable in either mode.
3079 if (mode
== TImode
|| mode
== TFmode
)
3080 return (offset_7bit_signed_scaled_p (mode
, offset
)
3081 && offset_9bit_signed_unscaled_p (mode
, offset
));
3083 if (outer_code
== PARALLEL
)
3084 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3085 && offset_7bit_signed_scaled_p (mode
, offset
));
3087 return offset_9bit_signed_unscaled_p (mode
, offset
);
3094 /* load literal: pc-relative constant pool entry. Only supported
3095 for SI mode or larger. */
3096 info
->type
= ADDRESS_SYMBOLIC
;
3097 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3101 split_const (x
, &sym
, &addend
);
3102 return (GET_CODE (sym
) == LABEL_REF
3103 || (GET_CODE (sym
) == SYMBOL_REF
3104 && CONSTANT_POOL_ADDRESS_P (sym
)));
3109 info
->type
= ADDRESS_LO_SUM
;
3110 info
->base
= XEXP (x
, 0);
3111 info
->offset
= XEXP (x
, 1);
3112 if (allow_reg_index_p
3113 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3116 split_const (info
->offset
, &sym
, &offs
);
3117 if (GET_CODE (sym
) == SYMBOL_REF
3118 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3119 == SYMBOL_SMALL_ABSOLUTE
))
3121 /* The symbol and offset must be aligned to the access size. */
3123 unsigned int ref_size
;
3125 if (CONSTANT_POOL_ADDRESS_P (sym
))
3126 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3127 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3129 tree exp
= SYMBOL_REF_DECL (sym
);
3130 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3131 align
= CONSTANT_ALIGNMENT (exp
, align
);
3133 else if (SYMBOL_REF_DECL (sym
))
3134 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3136 align
= BITS_PER_UNIT
;
3138 ref_size
= GET_MODE_SIZE (mode
);
3140 ref_size
= GET_MODE_SIZE (DImode
);
3142 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3143 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3154 aarch64_symbolic_address_p (rtx x
)
3158 split_const (x
, &x
, &offset
);
3159 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3162 /* Classify the base of symbolic expression X, given that X appears in
3165 enum aarch64_symbol_type
3166 aarch64_classify_symbolic_expression (rtx x
,
3167 enum aarch64_symbol_context context
)
3171 split_const (x
, &x
, &offset
);
3172 return aarch64_classify_symbol (x
, context
);
3176 /* Return TRUE if X is a legitimate address for accessing memory in
3179 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3181 struct aarch64_address_info addr
;
3183 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3186 /* Return TRUE if X is a legitimate address for accessing memory in
3187 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3190 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3191 RTX_CODE outer_code
, bool strict_p
)
3193 struct aarch64_address_info addr
;
3195 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3198 /* Return TRUE if rtx X is immediate constant 0.0 */
3200 aarch64_float_const_zero_rtx_p (rtx x
)
3204 if (GET_MODE (x
) == VOIDmode
)
3207 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3208 if (REAL_VALUE_MINUS_ZERO (r
))
3209 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3210 return REAL_VALUES_EQUAL (r
, dconst0
);
3213 /* Return the fixed registers used for condition codes. */
3216 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3219 *p2
= INVALID_REGNUM
;
3224 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3226 /* All floating point compares return CCFP if it is an equality
3227 comparison, and CCFPE otherwise. */
3228 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3255 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3257 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3258 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3259 || GET_CODE (x
) == NEG
))
3262 /* A compare with a shifted or negated operand. Because of canonicalization,
3263 the comparison will have to be swapped when we emit the assembly
3265 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3266 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3267 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3268 || GET_CODE (x
) == LSHIFTRT
3269 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
3270 || GET_CODE (x
) == NEG
))
3273 /* A compare of a mode narrower than SI mode against zero can be done
3274 by extending the value in the comparison. */
3275 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3277 /* Only use sign-extension if we really need it. */
3278 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3279 ? CC_SESWPmode
: CC_ZESWPmode
);
3281 /* For everything else, return CCmode. */
3286 aarch64_get_condition_code (rtx x
)
3288 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3289 enum rtx_code comp_code
= GET_CODE (x
);
3291 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3292 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3300 case GE
: return AARCH64_GE
;
3301 case GT
: return AARCH64_GT
;
3302 case LE
: return AARCH64_LS
;
3303 case LT
: return AARCH64_MI
;
3304 case NE
: return AARCH64_NE
;
3305 case EQ
: return AARCH64_EQ
;
3306 case ORDERED
: return AARCH64_VC
;
3307 case UNORDERED
: return AARCH64_VS
;
3308 case UNLT
: return AARCH64_LT
;
3309 case UNLE
: return AARCH64_LE
;
3310 case UNGT
: return AARCH64_HI
;
3311 case UNGE
: return AARCH64_PL
;
3312 default: gcc_unreachable ();
3319 case NE
: return AARCH64_NE
;
3320 case EQ
: return AARCH64_EQ
;
3321 case GE
: return AARCH64_GE
;
3322 case GT
: return AARCH64_GT
;
3323 case LE
: return AARCH64_LE
;
3324 case LT
: return AARCH64_LT
;
3325 case GEU
: return AARCH64_CS
;
3326 case GTU
: return AARCH64_HI
;
3327 case LEU
: return AARCH64_LS
;
3328 case LTU
: return AARCH64_CC
;
3329 default: gcc_unreachable ();
3338 case NE
: return AARCH64_NE
;
3339 case EQ
: return AARCH64_EQ
;
3340 case GE
: return AARCH64_LE
;
3341 case GT
: return AARCH64_LT
;
3342 case LE
: return AARCH64_GE
;
3343 case LT
: return AARCH64_GT
;
3344 case GEU
: return AARCH64_LS
;
3345 case GTU
: return AARCH64_CC
;
3346 case LEU
: return AARCH64_CS
;
3347 case LTU
: return AARCH64_HI
;
3348 default: gcc_unreachable ();
3355 case NE
: return AARCH64_NE
;
3356 case EQ
: return AARCH64_EQ
;
3357 case GE
: return AARCH64_PL
;
3358 case LT
: return AARCH64_MI
;
3359 default: gcc_unreachable ();
3370 bit_count (unsigned HOST_WIDE_INT value
)
3384 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3389 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3393 if (GET_CODE (x
) != CONST_INT
3394 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3396 output_operand_lossage ("invalid operand for '%%%c'", code
);
3412 output_operand_lossage ("invalid operand for '%%%c'", code
);
3422 /* Print N such that 2^N == X. */
3423 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3425 output_operand_lossage ("invalid operand for '%%%c'", code
);
3429 asm_fprintf (f
, "%d", n
);
3434 /* Print the number of non-zero bits in X (a const_int). */
3435 if (GET_CODE (x
) != CONST_INT
)
3437 output_operand_lossage ("invalid operand for '%%%c'", code
);
3441 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3445 /* Print the higher numbered register of a pair (TImode) of regs. */
3446 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3448 output_operand_lossage ("invalid operand for '%%%c'", code
);
3452 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3456 /* Print a condition (eq, ne, etc). */
3458 /* CONST_TRUE_RTX means always -- that's the default. */
3459 if (x
== const_true_rtx
)
3462 if (!COMPARISON_P (x
))
3464 output_operand_lossage ("invalid operand for '%%%c'", code
);
3468 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3472 /* Print the inverse of a condition (eq <-> ne, etc). */
3474 /* CONST_TRUE_RTX means never -- that's the default. */
3475 if (x
== const_true_rtx
)
3481 if (!COMPARISON_P (x
))
3483 output_operand_lossage ("invalid operand for '%%%c'", code
);
3487 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3488 (aarch64_get_condition_code (x
))], f
);
3496 /* Print a scalar FP/SIMD register name. */
3497 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3499 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3502 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3509 /* Print the first FP/SIMD register name in a list. */
3510 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3512 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3515 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3519 /* Print bottom 16 bits of integer constant in hex. */
3520 if (GET_CODE (x
) != CONST_INT
)
3522 output_operand_lossage ("invalid operand for '%%%c'", code
);
3525 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3530 /* Print a general register name or the zero register (32-bit or
3533 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3535 asm_fprintf (f
, "%czr", code
);
3539 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3541 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3545 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3547 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3554 /* Print a normal operand, if it's a general register, then we
3558 output_operand_lossage ("missing operand");
3562 switch (GET_CODE (x
))
3565 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3569 aarch64_memory_reference_mode
= GET_MODE (x
);
3570 output_address (XEXP (x
, 0));
3575 output_addr_const (asm_out_file
, x
);
3579 asm_fprintf (f
, "%wd", INTVAL (x
));
3583 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3585 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3587 HOST_WIDE_INT_MAX
));
3588 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3590 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3599 /* CONST_DOUBLE can represent a double-width integer.
3600 In this case, the mode of x is VOIDmode. */
3601 if (GET_MODE (x
) == VOIDmode
)
3603 else if (aarch64_float_const_zero_rtx_p (x
))
3608 else if (aarch64_float_const_representable_p (x
))
3611 char float_buf
[buf_size
] = {'\0'};
3613 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3614 real_to_decimal_for_mode (float_buf
, &r
,
3617 asm_fprintf (asm_out_file
, "%s", float_buf
);
3621 output_operand_lossage ("invalid constant");
3624 output_operand_lossage ("invalid operand");
3630 if (GET_CODE (x
) == HIGH
)
3633 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3635 case SYMBOL_SMALL_GOT
:
3636 asm_fprintf (asm_out_file
, ":got:");
3639 case SYMBOL_SMALL_TLSGD
:
3640 asm_fprintf (asm_out_file
, ":tlsgd:");
3643 case SYMBOL_SMALL_TLSDESC
:
3644 asm_fprintf (asm_out_file
, ":tlsdesc:");
3647 case SYMBOL_SMALL_GOTTPREL
:
3648 asm_fprintf (asm_out_file
, ":gottprel:");
3651 case SYMBOL_SMALL_TPREL
:
3652 asm_fprintf (asm_out_file
, ":tprel:");
3655 case SYMBOL_TINY_GOT
:
3662 output_addr_const (asm_out_file
, x
);
3666 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3668 case SYMBOL_SMALL_GOT
:
3669 asm_fprintf (asm_out_file
, ":lo12:");
3672 case SYMBOL_SMALL_TLSGD
:
3673 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3676 case SYMBOL_SMALL_TLSDESC
:
3677 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3680 case SYMBOL_SMALL_GOTTPREL
:
3681 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3684 case SYMBOL_SMALL_TPREL
:
3685 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3688 case SYMBOL_TINY_GOT
:
3689 asm_fprintf (asm_out_file
, ":got:");
3695 output_addr_const (asm_out_file
, x
);
3700 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3702 case SYMBOL_SMALL_TPREL
:
3703 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3708 output_addr_const (asm_out_file
, x
);
3712 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3718 aarch64_print_operand_address (FILE *f
, rtx x
)
3720 struct aarch64_address_info addr
;
3722 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3726 case ADDRESS_REG_IMM
:
3727 if (addr
.offset
== const0_rtx
)
3728 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3730 asm_fprintf (f
, "[%s,%wd]", reg_names
[REGNO (addr
.base
)],
3731 INTVAL (addr
.offset
));
3734 case ADDRESS_REG_REG
:
3735 if (addr
.shift
== 0)
3736 asm_fprintf (f
, "[%s,%s]", reg_names
[REGNO (addr
.base
)],
3737 reg_names
[REGNO (addr
.offset
)]);
3739 asm_fprintf (f
, "[%s,%s,lsl %u]", reg_names
[REGNO (addr
.base
)],
3740 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3743 case ADDRESS_REG_UXTW
:
3744 if (addr
.shift
== 0)
3745 asm_fprintf (f
, "[%s,w%d,uxtw]", reg_names
[REGNO (addr
.base
)],
3746 REGNO (addr
.offset
) - R0_REGNUM
);
3748 asm_fprintf (f
, "[%s,w%d,uxtw %u]", reg_names
[REGNO (addr
.base
)],
3749 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3752 case ADDRESS_REG_SXTW
:
3753 if (addr
.shift
== 0)
3754 asm_fprintf (f
, "[%s,w%d,sxtw]", reg_names
[REGNO (addr
.base
)],
3755 REGNO (addr
.offset
) - R0_REGNUM
);
3757 asm_fprintf (f
, "[%s,w%d,sxtw %u]", reg_names
[REGNO (addr
.base
)],
3758 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3761 case ADDRESS_REG_WB
:
3762 switch (GET_CODE (x
))
3765 asm_fprintf (f
, "[%s,%d]!", reg_names
[REGNO (addr
.base
)],
3766 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3769 asm_fprintf (f
, "[%s],%d", reg_names
[REGNO (addr
.base
)],
3770 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3773 asm_fprintf (f
, "[%s,-%d]!", reg_names
[REGNO (addr
.base
)],
3774 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3777 asm_fprintf (f
, "[%s],-%d", reg_names
[REGNO (addr
.base
)],
3778 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3781 asm_fprintf (f
, "[%s,%wd]!", reg_names
[REGNO (addr
.base
)],
3782 INTVAL (addr
.offset
));
3785 asm_fprintf (f
, "[%s],%wd", reg_names
[REGNO (addr
.base
)],
3786 INTVAL (addr
.offset
));
3793 case ADDRESS_LO_SUM
:
3794 asm_fprintf (f
, "[%s,#:lo12:", reg_names
[REGNO (addr
.base
)]);
3795 output_addr_const (f
, addr
.offset
);
3796 asm_fprintf (f
, "]");
3799 case ADDRESS_SYMBOLIC
:
3803 output_addr_const (f
, x
);
3807 aarch64_label_mentioned_p (rtx x
)
3812 if (GET_CODE (x
) == LABEL_REF
)
3815 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3816 referencing instruction, but they are constant offsets, not
3818 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3821 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
3822 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
3828 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3829 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
3832 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
3839 /* Implement REGNO_REG_CLASS. */
3842 aarch64_regno_regclass (unsigned regno
)
3844 if (GP_REGNUM_P (regno
))
3847 if (regno
== SP_REGNUM
)
3850 if (regno
== FRAME_POINTER_REGNUM
3851 || regno
== ARG_POINTER_REGNUM
)
3854 if (FP_REGNUM_P (regno
))
3855 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
3860 /* Try a machine-dependent way of reloading an illegitimate address
3861 operand. If we find one, push the reload and return the new rtx. */
3864 aarch64_legitimize_reload_address (rtx
*x_p
,
3865 enum machine_mode mode
,
3866 int opnum
, int type
,
3867 int ind_levels ATTRIBUTE_UNUSED
)
3871 /* Do not allow mem (plus (reg, const)) if vector mode. */
3872 if (aarch64_vector_mode_p (mode
)
3873 && GET_CODE (x
) == PLUS
3874 && REG_P (XEXP (x
, 0))
3875 && CONST_INT_P (XEXP (x
, 1)))
3879 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
3880 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3881 opnum
, (enum reload_type
) type
);
3885 /* We must recognize output that we have already generated ourselves. */
3886 if (GET_CODE (x
) == PLUS
3887 && GET_CODE (XEXP (x
, 0)) == PLUS
3888 && REG_P (XEXP (XEXP (x
, 0), 0))
3889 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3890 && CONST_INT_P (XEXP (x
, 1)))
3892 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
3893 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3894 opnum
, (enum reload_type
) type
);
3898 /* We wish to handle large displacements off a base register by splitting
3899 the addend across an add and the mem insn. This can cut the number of
3900 extra insns needed from 3 to 1. It is only useful for load/store of a
3901 single register with 12 bit offset field. */
3902 if (GET_CODE (x
) == PLUS
3903 && REG_P (XEXP (x
, 0))
3904 && CONST_INT_P (XEXP (x
, 1))
3905 && HARD_REGISTER_P (XEXP (x
, 0))
3908 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
3910 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
3911 HOST_WIDE_INT low
= val
& 0xfff;
3912 HOST_WIDE_INT high
= val
- low
;
3916 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3917 BLKmode alignment. */
3918 if (GET_MODE_SIZE (mode
) == 0)
3921 offs
= low
% GET_MODE_SIZE (mode
);
3923 /* Align misaligned offset by adjusting high part to compensate. */
3926 if (aarch64_uimm12_shift (high
+ offs
))
3935 offs
= GET_MODE_SIZE (mode
) - offs
;
3937 high
= high
+ (low
& 0x1000) - offs
;
3942 /* Check for overflow. */
3943 if (high
+ low
!= val
)
3946 cst
= GEN_INT (high
);
3947 if (!aarch64_uimm12_shift (high
))
3948 cst
= force_const_mem (Pmode
, cst
);
3950 /* Reload high part into base reg, leaving the low part
3951 in the mem instruction. */
3952 x
= gen_rtx_PLUS (Pmode
,
3953 gen_rtx_PLUS (Pmode
, XEXP (x
, 0), cst
),
3956 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
3957 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
3958 opnum
, (enum reload_type
) type
);
3967 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
3969 enum machine_mode mode
,
3970 secondary_reload_info
*sri
)
3972 /* Address expressions of the form PLUS (SP, large_offset) need two
3973 scratch registers, one for the constant, and one for holding a
3974 copy of SP, since SP cannot be used on the RHS of an add-reg
3977 && GET_CODE (x
) == PLUS
3978 && XEXP (x
, 0) == stack_pointer_rtx
3979 && CONST_INT_P (XEXP (x
, 1))
3980 && !aarch64_uimm12_shift (INTVAL (XEXP (x
, 1))))
3982 sri
->icode
= CODE_FOR_reload_sp_immediate
;
3986 /* Without the TARGET_SIMD instructions we cannot move a Q register
3987 to a Q register directly. We need a scratch. */
3988 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
3989 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
3990 && reg_class_subset_p (rclass
, FP_REGS
))
3993 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
3994 else if (mode
== TImode
)
3995 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
3999 /* A TFmode or TImode memory access should be handled via an FP_REGS
4000 because AArch64 has richer addressing modes for LDR/STR instructions
4001 than LDP/STP instructions. */
4002 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4003 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4006 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4013 aarch64_can_eliminate (const int from
, const int to
)
4015 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4016 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4018 if (frame_pointer_needed
)
4020 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4022 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4024 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4025 && !cfun
->calls_alloca
)
4027 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4033 /* If we decided that we didn't need a leaf frame pointer but then used
4034 LR in the function, then we'll want a frame pointer after all, so
4035 prevent this elimination to ensure a frame pointer is used.
4037 NOTE: the original value of flag_omit_frame_pointer gets trashed
4038 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4039 of faked_omit_frame_pointer here (which is true when we always
4040 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4041 pointers when LR is clobbered). */
4042 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4043 && df_regs_ever_live_p (LR_REGNUM
)
4044 && faked_omit_frame_pointer
)
4052 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4054 HOST_WIDE_INT frame_size
;
4055 HOST_WIDE_INT offset
;
4057 aarch64_layout_frame ();
4058 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4059 + crtl
->outgoing_args_size
4060 + cfun
->machine
->saved_varargs_size
);
4062 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4063 offset
= frame_size
;
4065 if (to
== HARD_FRAME_POINTER_REGNUM
)
4067 if (from
== ARG_POINTER_REGNUM
)
4068 return offset
- crtl
->outgoing_args_size
;
4070 if (from
== FRAME_POINTER_REGNUM
)
4071 return cfun
->machine
->frame
.saved_regs_size
+ get_frame_size ();
4074 if (to
== STACK_POINTER_REGNUM
)
4076 if (from
== FRAME_POINTER_REGNUM
)
4078 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4079 + cfun
->machine
->frame
.saved_regs_size
4081 - cfun
->machine
->frame
.fp_lr_offset
;
4082 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4091 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4095 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4099 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4104 aarch64_asm_trampoline_template (FILE *f
)
4106 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4107 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4108 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4109 assemble_aligned_integer (4, const0_rtx
);
4110 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4111 assemble_aligned_integer (UNITS_PER_WORD
, const0_rtx
);
4115 aarch64_trampoline_size (void)
4117 return 32; /* 3 insns + padding + 2 dwords. */
4121 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4123 rtx fnaddr
, mem
, a_tramp
;
4125 /* Don't need to copy the trailing D-words, we fill those in below. */
4126 emit_block_move (m_tramp
, assemble_trampoline_template (),
4127 GEN_INT (TRAMPOLINE_SIZE
- 16), BLOCK_OP_NORMAL
);
4128 mem
= adjust_address (m_tramp
, DImode
, 16);
4129 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4130 emit_move_insn (mem
, fnaddr
);
4132 mem
= adjust_address (m_tramp
, DImode
, 24);
4133 emit_move_insn (mem
, chain_value
);
4135 /* XXX We should really define a "clear_cache" pattern and use
4136 gen_clear_cache(). */
4137 a_tramp
= XEXP (m_tramp
, 0);
4138 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4139 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, Pmode
,
4140 plus_constant (Pmode
, a_tramp
, TRAMPOLINE_SIZE
), Pmode
);
4143 static unsigned char
4144 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4155 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4156 (GET_MODE_SIZE (mode
) + 7) / 8;
4170 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED
, reg_class_t regclass
)
4172 return ((regclass
== POINTER_REGS
|| regclass
== STACK_REG
)
4173 ? GENERAL_REGS
: regclass
);
4177 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4179 asm_fprintf (f
, "%U%s", name
);
4183 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4185 if (priority
== DEFAULT_INIT_PRIORITY
)
4186 default_ctor_section_asm_out_constructor (symbol
, priority
);
4191 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4192 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4193 switch_to_section (s
);
4194 assemble_align (POINTER_SIZE
);
4195 fputs ("\t.dword\t", asm_out_file
);
4196 output_addr_const (asm_out_file
, symbol
);
4197 fputc ('\n', asm_out_file
);
4202 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4204 if (priority
== DEFAULT_INIT_PRIORITY
)
4205 default_dtor_section_asm_out_destructor (symbol
, priority
);
4210 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4211 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4212 switch_to_section (s
);
4213 assemble_align (POINTER_SIZE
);
4214 fputs ("\t.dword\t", asm_out_file
);
4215 output_addr_const (asm_out_file
, symbol
);
4216 fputc ('\n', asm_out_file
);
4221 aarch64_output_casesi (rtx
*operands
)
4225 rtx diff_vec
= PATTERN (next_real_insn (operands
[2]));
4227 static const char *const patterns
[4][2] =
4230 "ldrb\t%w3, [%0,%w1,uxtw]",
4231 "add\t%3, %4, %w3, sxtb #2"
4234 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4235 "add\t%3, %4, %w3, sxth #2"
4238 "ldr\t%w3, [%0,%w1,uxtw #2]",
4239 "add\t%3, %4, %w3, sxtw #2"
4241 /* We assume that DImode is only generated when not optimizing and
4242 that we don't really need 64-bit address offsets. That would
4243 imply an object file with 8GB of code in a single function! */
4245 "ldr\t%w3, [%0,%w1,uxtw #2]",
4246 "add\t%3, %4, %w3, sxtw #2"
4250 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4252 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4254 gcc_assert (index
>= 0 && index
<= 3);
4256 /* Need to implement table size reduction, by chaning the code below. */
4257 output_asm_insn (patterns
[index
][0], operands
);
4258 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4259 snprintf (buf
, sizeof (buf
),
4260 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4261 output_asm_insn (buf
, operands
);
4262 output_asm_insn (patterns
[index
][1], operands
);
4263 output_asm_insn ("br\t%3", operands
);
4264 assemble_label (asm_out_file
, label
);
4269 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4270 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4274 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4276 if (shift
>= 0 && shift
<= 3)
4279 for (size
= 8; size
<= 32; size
*= 2)
4281 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4282 if (mask
== bits
<< shift
)
4290 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4291 const_rtx x ATTRIBUTE_UNUSED
)
4293 /* We can't use blocks for constants when we're using a per-function
4299 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4300 rtx x ATTRIBUTE_UNUSED
,
4301 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4303 /* Force all constant pool entries into the current function section. */
4304 return function_section (current_function_decl
);
4310 /* Helper function for rtx cost calculation. Strip a shift expression
4311 from X. Returns the inner operand if successful, or the original
4312 expression on failure. */
4314 aarch64_strip_shift (rtx x
)
4318 if ((GET_CODE (op
) == ASHIFT
4319 || GET_CODE (op
) == ASHIFTRT
4320 || GET_CODE (op
) == LSHIFTRT
)
4321 && CONST_INT_P (XEXP (op
, 1)))
4322 return XEXP (op
, 0);
4324 if (GET_CODE (op
) == MULT
4325 && CONST_INT_P (XEXP (op
, 1))
4326 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4327 return XEXP (op
, 0);
4332 /* Helper function for rtx cost calculation. Strip a shift or extend
4333 expression from X. Returns the inner operand if successful, or the
4334 original expression on failure. We deal with a number of possible
4335 canonicalization variations here. */
4337 aarch64_strip_shift_or_extend (rtx x
)
4341 /* Zero and sign extraction of a widened value. */
4342 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4343 && XEXP (op
, 2) == const0_rtx
4344 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4346 return XEXP (XEXP (op
, 0), 0);
4348 /* It can also be represented (for zero-extend) as an AND with an
4350 if (GET_CODE (op
) == AND
4351 && GET_CODE (XEXP (op
, 0)) == MULT
4352 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4353 && CONST_INT_P (XEXP (op
, 1))
4354 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4355 INTVAL (XEXP (op
, 1))) != 0)
4356 return XEXP (XEXP (op
, 0), 0);
4358 /* Now handle extended register, as this may also have an optional
4359 left shift by 1..4. */
4360 if (GET_CODE (op
) == ASHIFT
4361 && CONST_INT_P (XEXP (op
, 1))
4362 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4365 if (GET_CODE (op
) == ZERO_EXTEND
4366 || GET_CODE (op
) == SIGN_EXTEND
)
4372 return aarch64_strip_shift (x
);
4375 /* Calculate the cost of calculating X, storing it in *COST. Result
4376 is true if the total cost of the operation has now been calculated. */
4378 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4379 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4382 const struct cpu_rtx_cost_table
*extra_cost
4383 = aarch64_tune_params
->insn_extra_cost
;
4391 switch (GET_CODE (op0
))
4395 *cost
+= extra_cost
->memory_store
;
4397 if (op1
!= const0_rtx
)
4398 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4402 if (! REG_P (SUBREG_REG (op0
)))
4403 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4406 /* Cost is just the cost of the RHS of the set. */
4407 *cost
+= rtx_cost (op1
, SET
, 1, true);
4410 case ZERO_EXTRACT
: /* Bit-field insertion. */
4412 /* Strip any redundant widening of the RHS to meet the width of
4414 if (GET_CODE (op1
) == SUBREG
)
4415 op1
= SUBREG_REG (op1
);
4416 if ((GET_CODE (op1
) == ZERO_EXTEND
4417 || GET_CODE (op1
) == SIGN_EXTEND
)
4418 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
4419 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
4420 >= INTVAL (XEXP (op0
, 1))))
4421 op1
= XEXP (op1
, 0);
4422 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4432 *cost
+= extra_cost
->memory_load
;
4437 op0
= CONST0_RTX (GET_MODE (x
));
4445 if (op1
== const0_rtx
4446 && GET_CODE (op0
) == AND
)
4452 /* Comparisons can work if the order is swapped.
4453 Canonicalization puts the more complex operation first, but
4454 we want it in op1. */
4456 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
4468 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
4469 || (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
4470 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
4472 if (op0
!= const0_rtx
)
4473 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
4475 if (CONST_INT_P (op1
))
4477 if (!aarch64_uimm12_shift (INTVAL (op1
)))
4478 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4482 op1
= aarch64_strip_shift_or_extend (op1
);
4483 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4494 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4496 if (CONST_INT_P (op1
) && aarch64_uimm12_shift (INTVAL (op1
)))
4498 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
4502 rtx new_op0
= aarch64_strip_shift_or_extend (op0
);
4505 && GET_CODE (op0
) == MULT
)
4507 if ((GET_CODE (XEXP (op0
, 0)) == ZERO_EXTEND
4508 && GET_CODE (XEXP (op0
, 1)) == ZERO_EXTEND
)
4509 || (GET_CODE (XEXP (op0
, 0)) == SIGN_EXTEND
4510 && GET_CODE (XEXP (op0
, 1)) == SIGN_EXTEND
))
4512 *cost
+= (rtx_cost (XEXP (XEXP (op0
, 0), 0), MULT
, 0,
4514 + rtx_cost (XEXP (XEXP (op0
, 1), 0), MULT
, 1,
4516 + rtx_cost (op1
, PLUS
, 1, speed
));
4518 *cost
+= extra_cost
->int_multiply_extend_add
;
4521 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4522 + rtx_cost (XEXP (op0
, 1), MULT
, 1, speed
)
4523 + rtx_cost (op1
, PLUS
, 1, speed
));
4526 *cost
+= extra_cost
->int_multiply_add
;
4529 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
4530 + rtx_cost (op1
, PLUS
, 1, speed
));
4544 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4546 if (CONST_INT_P (op1
)
4547 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
4549 *cost
+= rtx_cost (op0
, AND
, 0, speed
);
4553 if (GET_CODE (op0
) == NOT
)
4554 op0
= XEXP (op0
, 0);
4555 op0
= aarch64_strip_shift (op0
);
4556 *cost
+= (rtx_cost (op0
, AND
, 0, speed
)
4557 + rtx_cost (op1
, AND
, 1, speed
));
4564 if ((GET_MODE (x
) == DImode
4565 && GET_MODE (XEXP (x
, 0)) == SImode
)
4566 || GET_CODE (XEXP (x
, 0)) == MEM
)
4568 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
4574 if (GET_CODE (XEXP (x
, 0)) == MEM
)
4576 *cost
+= rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed
);
4582 if (!CONST_INT_P (XEXP (x
, 1)))
4583 *cost
+= COSTS_N_INSNS (2);
4590 /* Shifting by a register often takes an extra cycle. */
4591 if (speed
&& !CONST_INT_P (XEXP (x
, 1)))
4592 *cost
+= extra_cost
->register_shift
;
4594 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed
);
4598 if (!CONSTANT_P (XEXP (x
, 0)))
4599 *cost
+= rtx_cost (XEXP (x
, 0), HIGH
, 0, speed
);
4603 if (!CONSTANT_P (XEXP (x
, 1)))
4604 *cost
+= rtx_cost (XEXP (x
, 1), LO_SUM
, 1, speed
);
4605 *cost
+= rtx_cost (XEXP (x
, 0), LO_SUM
, 0, speed
);
4610 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTRACT
, 0, speed
);
4617 *cost
= COSTS_N_INSNS (1);
4618 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4620 if (CONST_INT_P (op1
)
4621 && exact_log2 (INTVAL (op1
)) > 0)
4623 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
4627 if ((GET_CODE (op0
) == ZERO_EXTEND
4628 && GET_CODE (op1
) == ZERO_EXTEND
)
4629 || (GET_CODE (op0
) == SIGN_EXTEND
4630 && GET_CODE (op1
) == SIGN_EXTEND
))
4632 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4633 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
));
4635 *cost
+= extra_cost
->int_multiply_extend
;
4640 *cost
+= extra_cost
->int_multiply
;
4644 if (GET_MODE (x
) == DFmode
)
4645 *cost
+= extra_cost
->double_multiply
;
4646 else if (GET_MODE (x
) == SFmode
)
4647 *cost
+= extra_cost
->float_multiply
;
4650 return false; /* All arguments need to be in registers. */
4654 *cost
= COSTS_N_INSNS (2);
4657 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4658 *cost
+= (extra_cost
->int_multiply_add
4659 + extra_cost
->int_divide
);
4660 else if (GET_MODE (x
) == DFmode
)
4661 *cost
+= (extra_cost
->double_multiply
4662 + extra_cost
->double_divide
);
4663 else if (GET_MODE (x
) == SFmode
)
4664 *cost
+= (extra_cost
->float_multiply
4665 + extra_cost
->float_divide
);
4667 return false; /* All arguments need to be in registers. */
4671 *cost
= COSTS_N_INSNS (1);
4674 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4675 *cost
+= extra_cost
->int_divide
;
4676 else if (GET_MODE (x
) == DFmode
)
4677 *cost
+= extra_cost
->double_divide
;
4678 else if (GET_MODE (x
) == SFmode
)
4679 *cost
+= extra_cost
->float_divide
;
4681 return false; /* All arguments need to be in registers. */
4690 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED
,
4691 enum machine_mode mode ATTRIBUTE_UNUSED
,
4692 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
4694 enum rtx_code c
= GET_CODE (x
);
4695 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4697 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4698 return addr_cost
->pre_modify
;
4700 if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4701 return addr_cost
->post_modify
;
4705 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4706 return addr_cost
->imm_offset
;
4707 else if (GET_CODE (XEXP (x
, 0)) == MULT
4708 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
4709 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4710 return addr_cost
->register_extend
;
4712 return addr_cost
->register_offset
;
4714 else if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
4715 return addr_cost
->imm_offset
;
4721 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4722 reg_class_t from
, reg_class_t to
)
4724 const struct cpu_regmove_cost
*regmove_cost
4725 = aarch64_tune_params
->regmove_cost
;
4727 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
4728 return regmove_cost
->GP2GP
;
4729 else if (from
== GENERAL_REGS
)
4730 return regmove_cost
->GP2FP
;
4731 else if (to
== GENERAL_REGS
)
4732 return regmove_cost
->FP2GP
;
4734 /* When AdvSIMD instructions are disabled it is not possible to move
4735 a 128-bit value directly between Q registers. This is handled in
4736 secondary reload. A general register is used as a scratch to move
4737 the upper DI value and the lower DI value is moved directly,
4738 hence the cost is the sum of three moves. */
4740 if (! TARGET_SIMD
&& GET_MODE_SIZE (from
) == 128 && GET_MODE_SIZE (to
) == 128)
4741 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
4743 return regmove_cost
->FP2FP
;
4747 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4748 reg_class_t rclass ATTRIBUTE_UNUSED
,
4749 bool in ATTRIBUTE_UNUSED
)
4751 return aarch64_tune_params
->memmov_cost
;
4754 /* Vectorizer cost model target hooks. */
4756 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4758 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4760 int misalign ATTRIBUTE_UNUSED
)
4764 switch (type_of_cost
)
4767 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
4770 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
4773 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
4776 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4779 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
4782 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
4785 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
4788 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
4790 case unaligned_load
:
4791 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
4793 case unaligned_store
:
4794 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
4796 case cond_branch_taken
:
4797 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
4799 case cond_branch_not_taken
:
4800 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
4803 case vec_promote_demote
:
4804 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4807 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
4808 return elements
/ 2 + 1;
4815 /* Implement targetm.vectorize.add_stmt_cost. */
4817 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
4818 struct _stmt_vec_info
*stmt_info
, int misalign
,
4819 enum vect_cost_model_location where
)
4821 unsigned *cost
= (unsigned *) data
;
4822 unsigned retval
= 0;
4824 if (flag_vect_cost_model
)
4826 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
4828 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
4830 /* Statements in an inner loop relative to the loop being
4831 vectorized are weighted more heavily. The value here is
4832 a function (linear for now) of the loop nest level. */
4833 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
4835 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4836 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
4837 unsigned nest_level
= loop_depth (loop
);
4839 count
*= nest_level
;
4842 retval
= (unsigned) (count
* stmt_cost
);
4843 cost
[where
] += retval
;
4849 static void initialize_aarch64_code_model (void);
4851 /* Parse the architecture extension string. */
4854 aarch64_parse_extension (char *str
)
4856 /* The extension string is parsed left to right. */
4857 const struct aarch64_option_extension
*opt
= NULL
;
4859 /* Flag to say whether we are adding or removing an extension. */
4860 int adding_ext
= -1;
4862 while (str
!= NULL
&& *str
!= 0)
4868 ext
= strchr (str
, '+');
4875 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
4886 error ("missing feature modifier after %qs", "+no");
4890 /* Scan over the extensions table trying to find an exact match. */
4891 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
4893 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
4895 /* Add or remove the extension. */
4897 aarch64_isa_flags
|= opt
->flags_on
;
4899 aarch64_isa_flags
&= ~(opt
->flags_off
);
4904 if (opt
->name
== NULL
)
4906 /* Extension not found in list. */
4907 error ("unknown feature modifier %qs", str
);
4917 /* Parse the ARCH string. */
4920 aarch64_parse_arch (void)
4923 const struct processor
*arch
;
4924 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
4927 strcpy (str
, aarch64_arch_string
);
4929 ext
= strchr (str
, '+');
4938 error ("missing arch name in -march=%qs", str
);
4942 /* Loop through the list of supported ARCHs to find a match. */
4943 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
4945 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
4947 selected_arch
= arch
;
4948 aarch64_isa_flags
= selected_arch
->flags
;
4949 selected_cpu
= &all_cores
[selected_arch
->core
];
4953 /* ARCH string contains at least one extension. */
4954 aarch64_parse_extension (ext
);
4961 /* ARCH name not found in list. */
4962 error ("unknown value %qs for -march", str
);
4966 /* Parse the CPU string. */
4969 aarch64_parse_cpu (void)
4972 const struct processor
*cpu
;
4973 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
4976 strcpy (str
, aarch64_cpu_string
);
4978 ext
= strchr (str
, '+');
4987 error ("missing cpu name in -mcpu=%qs", str
);
4991 /* Loop through the list of supported CPUs to find a match. */
4992 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
4994 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
4997 aarch64_isa_flags
= selected_cpu
->flags
;
5001 /* CPU string contains at least one extension. */
5002 aarch64_parse_extension (ext
);
5009 /* CPU name not found in list. */
5010 error ("unknown value %qs for -mcpu", str
);
5014 /* Parse the TUNE string. */
5017 aarch64_parse_tune (void)
5019 const struct processor
*cpu
;
5020 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
5021 strcpy (str
, aarch64_tune_string
);
5023 /* Loop through the list of supported CPUs to find a match. */
5024 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5026 if (strcmp (cpu
->name
, str
) == 0)
5028 selected_tune
= cpu
;
5033 /* CPU name not found in list. */
5034 error ("unknown value %qs for -mtune", str
);
5039 /* Implement TARGET_OPTION_OVERRIDE. */
5042 aarch64_override_options (void)
5044 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5045 otherwise march remains undefined. mtune can be used with either march or
5048 if (aarch64_arch_string
)
5050 aarch64_parse_arch ();
5051 aarch64_cpu_string
= NULL
;
5054 if (aarch64_cpu_string
)
5056 aarch64_parse_cpu ();
5057 selected_arch
= NULL
;
5060 if (aarch64_tune_string
)
5062 aarch64_parse_tune ();
5065 initialize_aarch64_code_model ();
5067 aarch64_build_bitmask_table ();
5069 /* This target defaults to strict volatile bitfields. */
5070 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
5071 flag_strict_volatile_bitfields
= 1;
5073 /* If the user did not specify a processor, choose the default
5074 one for them. This will be the CPU set during configuration using
5075 --with-cpu, otherwise it is "generic". */
5078 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
5079 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
5082 gcc_assert (selected_cpu
);
5084 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5086 selected_tune
= &all_cores
[selected_cpu
->core
];
5088 aarch64_tune_flags
= selected_tune
->flags
;
5089 aarch64_tune
= selected_tune
->core
;
5090 aarch64_tune_params
= selected_tune
->tune
;
5092 aarch64_override_options_after_change ();
5095 /* Implement targetm.override_options_after_change. */
5098 aarch64_override_options_after_change (void)
5100 faked_omit_frame_pointer
= false;
5102 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5103 that aarch64_frame_pointer_required will be called. We need to remember
5104 whether flag_omit_frame_pointer was turned on normally or just faked. */
5106 if (flag_omit_leaf_frame_pointer
&& !flag_omit_frame_pointer
)
5108 flag_omit_frame_pointer
= true;
5109 faked_omit_frame_pointer
= true;
5113 static struct machine_function
*
5114 aarch64_init_machine_status (void)
5116 struct machine_function
*machine
;
5117 machine
= ggc_alloc_cleared_machine_function ();
5122 aarch64_init_expanders (void)
5124 init_machine_status
= aarch64_init_machine_status
;
5127 /* A checking mechanism for the implementation of the various code models. */
5129 initialize_aarch64_code_model (void)
5133 switch (aarch64_cmodel_var
)
5135 case AARCH64_CMODEL_TINY
:
5136 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
5138 case AARCH64_CMODEL_SMALL
:
5139 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
5141 case AARCH64_CMODEL_LARGE
:
5142 sorry ("code model %qs with -f%s", "large",
5143 flag_pic
> 1 ? "PIC" : "pic");
5149 aarch64_cmodel
= aarch64_cmodel_var
;
5152 /* Return true if SYMBOL_REF X binds locally. */
5155 aarch64_symbol_binds_local_p (const_rtx x
)
5157 return (SYMBOL_REF_DECL (x
)
5158 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
5159 : SYMBOL_REF_LOCAL_P (x
));
5162 /* Return true if SYMBOL_REF X is thread local */
5164 aarch64_tls_symbol_p (rtx x
)
5166 if (! TARGET_HAVE_TLS
)
5169 if (GET_CODE (x
) != SYMBOL_REF
)
5172 return SYMBOL_REF_TLS_MODEL (x
) != 0;
5175 /* Classify a TLS symbol into one of the TLS kinds. */
5176 enum aarch64_symbol_type
5177 aarch64_classify_tls_symbol (rtx x
)
5179 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
5183 case TLS_MODEL_GLOBAL_DYNAMIC
:
5184 case TLS_MODEL_LOCAL_DYNAMIC
:
5185 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
5187 case TLS_MODEL_INITIAL_EXEC
:
5188 return SYMBOL_SMALL_GOTTPREL
;
5190 case TLS_MODEL_LOCAL_EXEC
:
5191 return SYMBOL_SMALL_TPREL
;
5193 case TLS_MODEL_EMULATED
:
5194 case TLS_MODEL_NONE
:
5195 return SYMBOL_FORCE_TO_MEM
;
5202 /* Return the method that should be used to access SYMBOL_REF or
5203 LABEL_REF X in context CONTEXT. */
5205 enum aarch64_symbol_type
5206 aarch64_classify_symbol (rtx x
,
5207 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
5209 if (GET_CODE (x
) == LABEL_REF
)
5211 switch (aarch64_cmodel
)
5213 case AARCH64_CMODEL_LARGE
:
5214 return SYMBOL_FORCE_TO_MEM
;
5216 case AARCH64_CMODEL_TINY_PIC
:
5217 case AARCH64_CMODEL_TINY
:
5218 return SYMBOL_TINY_ABSOLUTE
;
5220 case AARCH64_CMODEL_SMALL_PIC
:
5221 case AARCH64_CMODEL_SMALL
:
5222 return SYMBOL_SMALL_ABSOLUTE
;
5229 if (GET_CODE (x
) == SYMBOL_REF
)
5231 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
5232 || CONSTANT_POOL_ADDRESS_P (x
))
5233 return SYMBOL_FORCE_TO_MEM
;
5235 if (aarch64_tls_symbol_p (x
))
5236 return aarch64_classify_tls_symbol (x
);
5238 switch (aarch64_cmodel
)
5240 case AARCH64_CMODEL_TINY
:
5241 if (SYMBOL_REF_WEAK (x
))
5242 return SYMBOL_FORCE_TO_MEM
;
5243 return SYMBOL_TINY_ABSOLUTE
;
5245 case AARCH64_CMODEL_SMALL
:
5246 if (SYMBOL_REF_WEAK (x
))
5247 return SYMBOL_FORCE_TO_MEM
;
5248 return SYMBOL_SMALL_ABSOLUTE
;
5250 case AARCH64_CMODEL_TINY_PIC
:
5251 if (!aarch64_symbol_binds_local_p (x
))
5252 return SYMBOL_TINY_GOT
;
5253 return SYMBOL_TINY_ABSOLUTE
;
5255 case AARCH64_CMODEL_SMALL_PIC
:
5256 if (!aarch64_symbol_binds_local_p (x
))
5257 return SYMBOL_SMALL_GOT
;
5258 return SYMBOL_SMALL_ABSOLUTE
;
5265 /* By default push everything into the constant pool. */
5266 return SYMBOL_FORCE_TO_MEM
;
5270 aarch64_constant_address_p (rtx x
)
5272 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
5276 aarch64_legitimate_pic_operand_p (rtx x
)
5278 if (GET_CODE (x
) == SYMBOL_REF
5279 || (GET_CODE (x
) == CONST
5280 && GET_CODE (XEXP (x
, 0)) == PLUS
5281 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5287 /* Return true if X holds either a quarter-precision or
5288 floating-point +0.0 constant. */
5290 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
5292 if (!CONST_DOUBLE_P (x
))
5295 /* TODO: We could handle moving 0.0 to a TFmode register,
5296 but first we would like to refactor the movtf_aarch64
5297 to be more amicable to split moves properly and
5298 correctly gate on TARGET_SIMD. For now - reject all
5299 constants which are not to SFmode or DFmode registers. */
5300 if (!(mode
== SFmode
|| mode
== DFmode
))
5303 if (aarch64_float_const_zero_rtx_p (x
))
5305 return aarch64_float_const_representable_p (x
);
5309 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
5311 /* Do not allow vector struct mode constants. We could support
5312 0 and -1 easily, but they need support in aarch64-simd.md. */
5313 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
5316 /* This could probably go away because
5317 we now decompose CONST_INTs according to expand_mov_immediate. */
5318 if ((GET_CODE (x
) == CONST_VECTOR
5319 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
5320 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
5321 return !targetm
.cannot_force_const_mem (mode
, x
);
5323 if (GET_CODE (x
) == HIGH
5324 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
5327 return aarch64_constant_address_p (x
);
5331 aarch64_load_tp (rtx target
)
5334 || GET_MODE (target
) != Pmode
5335 || !register_operand (target
, Pmode
))
5336 target
= gen_reg_rtx (Pmode
);
5338 /* Can return in any reg. */
5339 emit_insn (gen_aarch64_load_tp_hard (target
));
5343 /* On AAPCS systems, this is the "struct __va_list". */
5344 static GTY(()) tree va_list_type
;
5346 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5347 Return the type to use as __builtin_va_list.
5349 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5361 aarch64_build_builtin_va_list (void)
5364 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5366 /* Create the type. */
5367 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
5368 /* Give it the required name. */
5369 va_list_name
= build_decl (BUILTINS_LOCATION
,
5371 get_identifier ("__va_list"),
5373 DECL_ARTIFICIAL (va_list_name
) = 1;
5374 TYPE_NAME (va_list_type
) = va_list_name
;
5375 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
5377 /* Create the fields. */
5378 f_stack
= build_decl (BUILTINS_LOCATION
,
5379 FIELD_DECL
, get_identifier ("__stack"),
5381 f_grtop
= build_decl (BUILTINS_LOCATION
,
5382 FIELD_DECL
, get_identifier ("__gr_top"),
5384 f_vrtop
= build_decl (BUILTINS_LOCATION
,
5385 FIELD_DECL
, get_identifier ("__vr_top"),
5387 f_groff
= build_decl (BUILTINS_LOCATION
,
5388 FIELD_DECL
, get_identifier ("__gr_offs"),
5390 f_vroff
= build_decl (BUILTINS_LOCATION
,
5391 FIELD_DECL
, get_identifier ("__vr_offs"),
5394 DECL_ARTIFICIAL (f_stack
) = 1;
5395 DECL_ARTIFICIAL (f_grtop
) = 1;
5396 DECL_ARTIFICIAL (f_vrtop
) = 1;
5397 DECL_ARTIFICIAL (f_groff
) = 1;
5398 DECL_ARTIFICIAL (f_vroff
) = 1;
5400 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
5401 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
5402 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
5403 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
5404 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
5406 TYPE_FIELDS (va_list_type
) = f_stack
;
5407 DECL_CHAIN (f_stack
) = f_grtop
;
5408 DECL_CHAIN (f_grtop
) = f_vrtop
;
5409 DECL_CHAIN (f_vrtop
) = f_groff
;
5410 DECL_CHAIN (f_groff
) = f_vroff
;
5412 /* Compute its layout. */
5413 layout_type (va_list_type
);
5415 return va_list_type
;
5418 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5420 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
5422 const CUMULATIVE_ARGS
*cum
;
5423 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5424 tree stack
, grtop
, vrtop
, groff
, vroff
;
5426 int gr_save_area_size
;
5427 int vr_save_area_size
;
5430 cum
= &crtl
->args
.info
;
5432 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
5434 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
5436 if (TARGET_GENERAL_REGS_ONLY
)
5438 if (cum
->aapcs_nvrn
> 0)
5439 sorry ("%qs and floating point or vector arguments",
5440 "-mgeneral-regs-only");
5441 vr_save_area_size
= 0;
5444 f_stack
= TYPE_FIELDS (va_list_type_node
);
5445 f_grtop
= DECL_CHAIN (f_stack
);
5446 f_vrtop
= DECL_CHAIN (f_grtop
);
5447 f_groff
= DECL_CHAIN (f_vrtop
);
5448 f_vroff
= DECL_CHAIN (f_groff
);
5450 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
5452 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
5454 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
5456 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
5458 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
5461 /* Emit code to initialize STACK, which points to the next varargs stack
5462 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5463 by named arguments. STACK is 8-byte aligned. */
5464 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
5465 if (cum
->aapcs_stack_size
> 0)
5466 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
5467 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
5468 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5470 /* Emit code to initialize GRTOP, the top of the GR save area.
5471 virtual_incoming_args_rtx should have been 16 byte aligned. */
5472 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
5473 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
5474 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5476 /* Emit code to initialize VRTOP, the top of the VR save area.
5477 This address is gr_save_area_bytes below GRTOP, rounded
5478 down to the next 16-byte boundary. */
5479 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
5480 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
5481 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5484 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
5485 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
5486 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5488 /* Emit code to initialize GROFF, the offset from GRTOP of the
5489 next GPR argument. */
5490 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
5491 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
5492 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5494 /* Likewise emit code to initialize VROFF, the offset from FTOP
5495 of the next VR argument. */
5496 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
5497 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
5498 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5501 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5504 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
5505 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
5509 bool is_ha
; /* is HFA or HVA. */
5510 bool dw_align
; /* double-word align. */
5511 enum machine_mode ag_mode
= VOIDmode
;
5513 enum machine_mode mode
;
5515 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5516 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
5517 HOST_WIDE_INT size
, rsize
, adjust
, align
;
5518 tree t
, u
, cond1
, cond2
;
5520 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5522 type
= build_pointer_type (type
);
5524 mode
= TYPE_MODE (type
);
5526 f_stack
= TYPE_FIELDS (va_list_type_node
);
5527 f_grtop
= DECL_CHAIN (f_stack
);
5528 f_vrtop
= DECL_CHAIN (f_grtop
);
5529 f_groff
= DECL_CHAIN (f_vrtop
);
5530 f_vroff
= DECL_CHAIN (f_groff
);
5532 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
5533 f_stack
, NULL_TREE
);
5534 size
= int_size_in_bytes (type
);
5535 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
5539 if (aarch64_vfp_is_call_or_return_candidate (mode
,
5545 /* TYPE passed in fp/simd registers. */
5546 if (TARGET_GENERAL_REGS_ONLY
)
5547 sorry ("%qs and floating point or vector arguments",
5548 "-mgeneral-regs-only");
5550 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
5551 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
5552 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
5553 unshare_expr (valist
), f_vroff
, NULL_TREE
);
5555 rsize
= nregs
* UNITS_PER_VREG
;
5559 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
5560 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
5562 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5563 && size
< UNITS_PER_VREG
)
5565 adjust
= UNITS_PER_VREG
- size
;
5570 /* TYPE passed in general registers. */
5571 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
5572 unshare_expr (valist
), f_grtop
, NULL_TREE
);
5573 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
5574 unshare_expr (valist
), f_groff
, NULL_TREE
);
5575 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
5576 nregs
= rsize
/ UNITS_PER_WORD
;
5581 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5582 && size
< UNITS_PER_WORD
)
5584 adjust
= UNITS_PER_WORD
- size
;
5588 /* Get a local temporary for the field value. */
5589 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
5591 /* Emit code to branch if off >= 0. */
5592 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
5593 build_int_cst (TREE_TYPE (off
), 0));
5594 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
5598 /* Emit: offs = (offs + 15) & -16. */
5599 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5600 build_int_cst (TREE_TYPE (off
), 15));
5601 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
5602 build_int_cst (TREE_TYPE (off
), -16));
5603 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
5608 /* Update ap.__[g|v]r_offs */
5609 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5610 build_int_cst (TREE_TYPE (off
), rsize
));
5611 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
5615 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5617 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5618 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
5619 build_int_cst (TREE_TYPE (f_off
), 0));
5620 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
5622 /* String up: make sure the assignment happens before the use. */
5623 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
5624 COND_EXPR_ELSE (cond1
) = t
;
5626 /* Prepare the trees handling the argument that is passed on the stack;
5627 the top level node will store in ON_STACK. */
5628 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
5631 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5632 t
= fold_convert (intDI_type_node
, arg
);
5633 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5634 build_int_cst (TREE_TYPE (t
), 15));
5635 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5636 build_int_cst (TREE_TYPE (t
), -16));
5637 t
= fold_convert (TREE_TYPE (arg
), t
);
5638 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
5642 /* Advance ap.__stack */
5643 t
= fold_convert (intDI_type_node
, arg
);
5644 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5645 build_int_cst (TREE_TYPE (t
), size
+ 7));
5646 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5647 build_int_cst (TREE_TYPE (t
), -8));
5648 t
= fold_convert (TREE_TYPE (arg
), t
);
5649 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
5650 /* String up roundup and advance. */
5652 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5653 /* String up with arg */
5654 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
5655 /* Big-endianness related address adjustment. */
5656 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5657 && size
< UNITS_PER_WORD
)
5659 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
5660 size_int (UNITS_PER_WORD
- size
));
5661 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
5664 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
5665 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
5667 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5670 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
5671 build_int_cst (TREE_TYPE (off
), adjust
));
5673 t
= fold_convert (sizetype
, t
);
5674 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
5678 /* type ha; // treat as "struct {ftype field[n];}"
5679 ... [computing offs]
5680 for (i = 0; i <nregs; ++i, offs += 16)
5681 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5684 tree tmp_ha
, field_t
, field_ptr_t
;
5686 /* Declare a local variable. */
5687 tmp_ha
= create_tmp_var_raw (type
, "ha");
5688 gimple_add_tmp_var (tmp_ha
);
5690 /* Establish the base type. */
5694 field_t
= float_type_node
;
5695 field_ptr_t
= float_ptr_type_node
;
5698 field_t
= double_type_node
;
5699 field_ptr_t
= double_ptr_type_node
;
5702 field_t
= long_double_type_node
;
5703 field_ptr_t
= long_double_ptr_type_node
;
5705 /* The half precision and quad precision are not fully supported yet. Enable
5706 the following code after the support is complete. Need to find the correct
5707 type node for __fp16 *. */
5710 field_t
= float_type_node
;
5711 field_ptr_t
= float_ptr_type_node
;
5717 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
5718 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
5719 field_ptr_t
= build_pointer_type (field_t
);
5726 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5727 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
5729 t
= fold_convert (field_ptr_t
, addr
);
5730 t
= build2 (MODIFY_EXPR
, field_t
,
5731 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
5732 build1 (INDIRECT_REF
, field_t
, t
));
5734 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5735 for (i
= 1; i
< nregs
; ++i
)
5737 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
5738 u
= fold_convert (field_ptr_t
, addr
);
5739 u
= build2 (MODIFY_EXPR
, field_t
,
5740 build2 (MEM_REF
, field_t
, tmp_ha
,
5741 build_int_cst (field_ptr_t
,
5743 int_size_in_bytes (field_t
)))),
5744 build1 (INDIRECT_REF
, field_t
, u
));
5745 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
5748 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
5749 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
5752 COND_EXPR_ELSE (cond2
) = t
;
5753 addr
= fold_convert (build_pointer_type (type
), cond1
);
5754 addr
= build_va_arg_indirect_ref (addr
);
5757 addr
= build_va_arg_indirect_ref (addr
);
5762 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5765 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
5766 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
5769 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
5770 CUMULATIVE_ARGS local_cum
;
5771 int gr_saved
, vr_saved
;
5773 /* The caller has advanced CUM up to, but not beyond, the last named
5774 argument. Advance a local copy of CUM past the last "real" named
5775 argument, to find out how many registers are left over. */
5777 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
5779 /* Found out how many registers we need to save. */
5780 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
5781 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
5783 if (TARGET_GENERAL_REGS_ONLY
)
5785 if (local_cum
.aapcs_nvrn
> 0)
5786 sorry ("%qs and floating point or vector arguments",
5787 "-mgeneral-regs-only");
5797 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5798 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
5799 - gr_saved
* UNITS_PER_WORD
);
5800 mem
= gen_frame_mem (BLKmode
, ptr
);
5801 set_mem_alias_set (mem
, get_varargs_alias_set ());
5803 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
5808 /* We can't use move_block_from_reg, because it will use
5809 the wrong mode, storing D regs only. */
5810 enum machine_mode mode
= TImode
;
5813 /* Set OFF to the offset from virtual_incoming_args_rtx of
5814 the first vector register. The VR save area lies below
5815 the GR one, and is aligned to 16 bytes. */
5816 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5817 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5818 off
-= vr_saved
* UNITS_PER_VREG
;
5820 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
5824 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
5825 mem
= gen_frame_mem (mode
, ptr
);
5826 set_mem_alias_set (mem
, get_varargs_alias_set ());
5827 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
5828 off
+= UNITS_PER_VREG
;
5833 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5834 any complication of having crtl->args.pretend_args_size changed. */
5835 cfun
->machine
->saved_varargs_size
5836 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5837 STACK_BOUNDARY
/ BITS_PER_UNIT
)
5838 + vr_saved
* UNITS_PER_VREG
);
5842 aarch64_conditional_register_usage (void)
5847 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
5850 call_used_regs
[i
] = 1;
5855 /* Walk down the type tree of TYPE counting consecutive base elements.
5856 If *MODEP is VOIDmode, then set it to the first valid floating point
5857 type. If a non-floating point type is found, or if a floating point
5858 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5859 otherwise return the count in the sub-tree. */
5861 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5863 enum machine_mode mode
;
5866 switch (TREE_CODE (type
))
5869 mode
= TYPE_MODE (type
);
5870 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5873 if (*modep
== VOIDmode
)
5882 mode
= TYPE_MODE (TREE_TYPE (type
));
5883 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5886 if (*modep
== VOIDmode
)
5895 /* Use V2SImode and V4SImode as representatives of all 64-bit
5896 and 128-bit vector types. */
5897 size
= int_size_in_bytes (type
);
5910 if (*modep
== VOIDmode
)
5913 /* Vector modes are considered to be opaque: two vectors are
5914 equivalent for the purposes of being homogeneous aggregates
5915 if they are the same size. */
5924 tree index
= TYPE_DOMAIN (type
);
5926 /* Can't handle incomplete types. */
5927 if (!COMPLETE_TYPE_P (type
))
5930 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
5933 || !TYPE_MAX_VALUE (index
)
5934 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
5935 || !TYPE_MIN_VALUE (index
)
5936 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
5940 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
5941 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
5943 /* There must be no padding. */
5944 if (!host_integerp (TYPE_SIZE (type
), 1)
5945 || (tree_low_cst (TYPE_SIZE (type
), 1)
5946 != count
* GET_MODE_BITSIZE (*modep
)))
5958 /* Can't handle incomplete types. */
5959 if (!COMPLETE_TYPE_P (type
))
5962 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
5964 if (TREE_CODE (field
) != FIELD_DECL
)
5967 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
5973 /* There must be no padding. */
5974 if (!host_integerp (TYPE_SIZE (type
), 1)
5975 || (tree_low_cst (TYPE_SIZE (type
), 1)
5976 != count
* GET_MODE_BITSIZE (*modep
)))
5983 case QUAL_UNION_TYPE
:
5985 /* These aren't very interesting except in a degenerate case. */
5990 /* Can't handle incomplete types. */
5991 if (!COMPLETE_TYPE_P (type
))
5994 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
5996 if (TREE_CODE (field
) != FIELD_DECL
)
5999 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6002 count
= count
> sub_count
? count
: sub_count
;
6005 /* There must be no padding. */
6006 if (!host_integerp (TYPE_SIZE (type
), 1)
6007 || (tree_low_cst (TYPE_SIZE (type
), 1)
6008 != count
* GET_MODE_BITSIZE (*modep
)))
6021 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6022 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6023 array types. The C99 floating-point complex types are also considered
6024 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6025 types, which are GCC extensions and out of the scope of AAPCS64, are
6026 treated as composite types here as well.
6028 Note that MODE itself is not sufficient in determining whether a type
6029 is such a composite type or not. This is because
6030 stor-layout.c:compute_record_mode may have already changed the MODE
6031 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6032 structure with only one field may have its MODE set to the mode of the
6033 field. Also an integer mode whose size matches the size of the
6034 RECORD_TYPE type may be used to substitute the original mode
6035 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6036 solely relied on. */
6039 aarch64_composite_type_p (const_tree type
,
6040 enum machine_mode mode
)
6042 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
6046 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
6047 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
6053 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6054 type as described in AAPCS64 \S 4.1.2.
6056 See the comment above aarch64_composite_type_p for the notes on MODE. */
6059 aarch64_short_vector_p (const_tree type
,
6060 enum machine_mode mode
)
6062 HOST_WIDE_INT size
= -1;
6064 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6065 size
= int_size_in_bytes (type
);
6066 else if (!aarch64_composite_type_p (type
, mode
)
6067 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6068 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
6069 size
= GET_MODE_SIZE (mode
);
6071 return (size
== 8 || size
== 16) ? true : false;
6074 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6075 shall be passed or returned in simd/fp register(s) (providing these
6076 parameter passing registers are available).
6078 Upon successful return, *COUNT returns the number of needed registers,
6079 *BASE_MODE returns the mode of the individual register and when IS_HAF
6080 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6081 floating-point aggregate or a homogeneous short-vector aggregate. */
6084 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
6086 enum machine_mode
*base_mode
,
6090 enum machine_mode new_mode
= VOIDmode
;
6091 bool composite_p
= aarch64_composite_type_p (type
, mode
);
6093 if (is_ha
!= NULL
) *is_ha
= false;
6095 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6096 || aarch64_short_vector_p (type
, mode
))
6101 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6103 if (is_ha
!= NULL
) *is_ha
= true;
6105 new_mode
= GET_MODE_INNER (mode
);
6107 else if (type
&& composite_p
)
6109 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6111 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
6113 if (is_ha
!= NULL
) *is_ha
= true;
6122 *base_mode
= new_mode
;
6126 /* Implement TARGET_STRUCT_VALUE_RTX. */
6129 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
6130 int incoming ATTRIBUTE_UNUSED
)
6132 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
6135 /* Implements target hook vector_mode_supported_p. */
6137 aarch64_vector_mode_supported_p (enum machine_mode mode
)
6140 && (mode
== V4SImode
|| mode
== V8HImode
6141 || mode
== V16QImode
|| mode
== V2DImode
6142 || mode
== V2SImode
|| mode
== V4HImode
6143 || mode
== V8QImode
|| mode
== V2SFmode
6144 || mode
== V4SFmode
|| mode
== V2DFmode
))
6150 /* Return appropriate SIMD container
6151 for MODE within a vector of WIDTH bits. */
6152 static enum machine_mode
6153 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
6155 gcc_assert (width
== 64 || width
== 128);
6194 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6195 static enum machine_mode
6196 aarch64_preferred_simd_mode (enum machine_mode mode
)
6198 return aarch64_simd_container_mode (mode
, 128);
6201 /* Return the bitmask of possible vector sizes for the vectorizer
6204 aarch64_autovectorize_vector_sizes (void)
6209 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6210 vector types in order to conform to the AAPCS64 (see "Procedure
6211 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6212 qualify for emission with the mangled names defined in that document,
6213 a vector type must not only be of the correct mode but also be
6214 composed of AdvSIMD vector element types (e.g.
6215 _builtin_aarch64_simd_qi); these types are registered by
6216 aarch64_init_simd_builtins (). In other words, vector types defined
6217 in other ways e.g. via vector_size attribute will get default
6221 enum machine_mode mode
;
6222 const char *element_type_name
;
6223 const char *mangled_name
;
6224 } aarch64_simd_mangle_map_entry
;
6226 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
6227 /* 64-bit containerized types. */
6228 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6229 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6230 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6231 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6232 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6233 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6234 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6235 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6236 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6237 /* 128-bit containerized types. */
6238 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6239 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6240 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6241 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6242 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6243 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6244 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6245 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6246 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6247 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6248 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6249 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6250 { VOIDmode
, NULL
, NULL
}
6253 /* Implement TARGET_MANGLE_TYPE. */
6256 aarch64_mangle_type (const_tree type
)
6258 /* The AArch64 ABI documents say that "__va_list" has to be
6259 managled as if it is in the "std" namespace. */
6260 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
6261 return "St9__va_list";
6263 /* Check the mode of the vector type, and the name of the vector
6264 element type, against the table. */
6265 if (TREE_CODE (type
) == VECTOR_TYPE
)
6267 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
6269 while (pos
->mode
!= VOIDmode
)
6271 tree elt_type
= TREE_TYPE (type
);
6273 if (pos
->mode
== TYPE_MODE (type
)
6274 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
6275 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
6276 pos
->element_type_name
))
6277 return pos
->mangled_name
;
6283 /* Use the default mangling. */
6287 /* Return the equivalent letter for size. */
6289 sizetochar (int size
)
6293 case 64: return 'd';
6294 case 32: return 's';
6295 case 16: return 'h';
6296 case 8 : return 'b';
6297 default: gcc_unreachable ();
6301 /* Return true iff x is a uniform vector of floating-point
6302 constants, and the constant can be represented in
6303 quarter-precision form. Note, as aarch64_float_const_representable
6304 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6306 aarch64_vect_float_const_representable_p (rtx x
)
6309 REAL_VALUE_TYPE r0
, ri
;
6312 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
6315 x0
= CONST_VECTOR_ELT (x
, 0);
6316 if (!CONST_DOUBLE_P (x0
))
6319 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
6321 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
6323 xi
= CONST_VECTOR_ELT (x
, i
);
6324 if (!CONST_DOUBLE_P (xi
))
6327 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
6328 if (!REAL_VALUES_EQUAL (r0
, ri
))
6332 return aarch64_float_const_representable_p (x0
);
6335 /* Return true for valid and false for invalid. */
6337 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
6338 struct simd_immediate_info
*info
)
6340 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6342 for (i = 0; i < idx; i += (STRIDE)) \
6347 immtype = (CLASS); \
6348 elsize = (ELSIZE); \
6354 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
6355 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
6356 unsigned char bytes
[16];
6357 int immtype
= -1, matches
;
6358 unsigned int invmask
= inverse
? 0xff : 0;
6361 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6363 if (! (aarch64_simd_imm_zero_p (op
, mode
)
6364 || aarch64_vect_float_const_representable_p (op
)))
6369 info
->value
= CONST_VECTOR_ELT (op
, 0);
6370 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
6378 /* Splat vector constant out into a byte vector. */
6379 for (i
= 0; i
< n_elts
; i
++)
6381 rtx el
= CONST_VECTOR_ELT (op
, i
);
6382 unsigned HOST_WIDE_INT elpart
;
6383 unsigned int part
, parts
;
6385 if (GET_CODE (el
) == CONST_INT
)
6387 elpart
= INTVAL (el
);
6390 else if (GET_CODE (el
) == CONST_DOUBLE
)
6392 elpart
= CONST_DOUBLE_LOW (el
);
6398 for (part
= 0; part
< parts
; part
++)
6401 for (byte
= 0; byte
< innersize
; byte
++)
6403 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
6404 elpart
>>= BITS_PER_UNIT
;
6406 if (GET_CODE (el
) == CONST_DOUBLE
)
6407 elpart
= CONST_DOUBLE_HIGH (el
);
6412 gcc_assert (idx
== GET_MODE_SIZE (mode
));
6416 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
6417 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
6419 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6420 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6422 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6423 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6425 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6426 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
6428 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
6430 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
6432 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
6433 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
6435 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6436 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6438 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6439 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6441 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6442 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
6444 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
6446 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
6448 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6449 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6451 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6452 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6454 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6455 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6457 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6458 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6460 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
6462 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
6463 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
6472 info
->element_width
= elsize
;
6473 info
->mvn
= emvn
!= 0;
6474 info
->shift
= eshift
;
6476 unsigned HOST_WIDE_INT imm
= 0;
6478 if (immtype
>= 12 && immtype
<= 15)
6481 /* Un-invert bytes of recognized vector, if necessary. */
6483 for (i
= 0; i
< idx
; i
++)
6484 bytes
[i
] ^= invmask
;
6488 /* FIXME: Broken on 32-bit H_W_I hosts. */
6489 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
6491 for (i
= 0; i
< 8; i
++)
6492 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
6493 << (i
* BITS_PER_UNIT
);
6496 info
->value
= GEN_INT (imm
);
6500 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
6501 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
6503 /* Construct 'abcdefgh' because the assembler cannot handle
6504 generic constants. */
6507 imm
= (imm
>> info
->shift
) & 0xff;
6508 info
->value
= GEN_INT (imm
);
6517 aarch64_const_vec_all_same_int_p (rtx x
,
6518 HOST_WIDE_INT minval
,
6519 HOST_WIDE_INT maxval
)
6521 HOST_WIDE_INT firstval
;
6524 if (GET_CODE (x
) != CONST_VECTOR
6525 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
6528 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
6529 if (firstval
< minval
|| firstval
> maxval
)
6532 count
= CONST_VECTOR_NUNITS (x
);
6533 for (i
= 1; i
< count
; i
++)
6534 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
6540 /* Check of immediate shift constants are within range. */
6542 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
6544 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
6546 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
6548 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
6551 /* Return true if X is a uniform vector where all elements
6552 are either the floating-point constant 0.0 or the
6553 integer constant 0. */
6555 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
6557 return x
== CONST0_RTX (mode
);
6561 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
6563 HOST_WIDE_INT imm
= INTVAL (x
);
6566 for (i
= 0; i
< 8; i
++)
6568 unsigned int byte
= imm
& 0xff;
6569 if (byte
!= 0xff && byte
!= 0)
6578 aarch64_mov_operand_p (rtx x
,
6579 enum aarch64_symbol_context context
,
6580 enum machine_mode mode
)
6582 if (GET_CODE (x
) == HIGH
6583 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6586 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
6589 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
6592 return aarch64_classify_symbolic_expression (x
, context
)
6593 == SYMBOL_TINY_ABSOLUTE
;
6596 /* Return a const_int vector of VAL. */
6598 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
6600 int nunits
= GET_MODE_NUNITS (mode
);
6601 rtvec v
= rtvec_alloc (nunits
);
6604 for (i
=0; i
< nunits
; i
++)
6605 RTVEC_ELT (v
, i
) = GEN_INT (val
);
6607 return gen_rtx_CONST_VECTOR (mode
, v
);
6610 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6613 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
6615 enum machine_mode vmode
;
6617 gcc_assert (!VECTOR_MODE_P (mode
));
6618 vmode
= aarch64_preferred_simd_mode (mode
);
6619 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
6620 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
6623 /* Construct and return a PARALLEL RTX vector. */
6625 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
6627 int nunits
= GET_MODE_NUNITS (mode
);
6628 rtvec v
= rtvec_alloc (nunits
/ 2);
6629 int base
= high
? nunits
/ 2 : 0;
6633 for (i
=0; i
< nunits
/ 2; i
++)
6634 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
6636 t1
= gen_rtx_PARALLEL (mode
, v
);
6640 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6641 HIGH (exclusive). */
6643 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6646 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6647 lane
= INTVAL (operand
);
6649 if (lane
< low
|| lane
>= high
)
6650 error ("lane out of range");
6654 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6656 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6657 HOST_WIDE_INT lane
= INTVAL (operand
);
6659 if (lane
< low
|| lane
>= high
)
6660 error ("constant out of range");
6663 /* Emit code to reinterpret one AdvSIMD type as another,
6664 without altering bits. */
6666 aarch64_simd_reinterpret (rtx dest
, rtx src
)
6668 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
6671 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6674 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
6675 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
6678 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
6679 rtx tmp1
= gen_reg_rtx (mode
);
6680 rtx tmp2
= gen_reg_rtx (mode
);
6682 emit_insn (intfn (tmp1
, op1
, tmp2
));
6684 emit_move_insn (mem
, tmp1
);
6685 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
6686 emit_move_insn (mem
, tmp2
);
6689 /* Return TRUE if OP is a valid vector addressing mode. */
6691 aarch64_simd_mem_operand_p (rtx op
)
6693 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
6694 || GET_CODE (XEXP (op
, 0)) == REG
);
6697 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6698 not to early-clobber SRC registers in the process.
6700 We assume that the operands described by SRC and DEST represent a
6701 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6702 number of components into which the copy has been decomposed. */
6704 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
6705 rtx
*src
, unsigned int count
)
6709 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
6710 || REGNO (operands
[0]) < REGNO (operands
[1]))
6712 for (i
= 0; i
< count
; i
++)
6714 operands
[2 * i
] = dest
[i
];
6715 operands
[2 * i
+ 1] = src
[i
];
6720 for (i
= 0; i
< count
; i
++)
6722 operands
[2 * i
] = dest
[count
- i
- 1];
6723 operands
[2 * i
+ 1] = src
[count
- i
- 1];
6728 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6729 one of VSTRUCT modes: OI, CI or XI. */
6731 aarch64_simd_attr_length_move (rtx insn
)
6733 enum machine_mode mode
;
6735 extract_insn_cached (insn
);
6737 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
6739 mode
= GET_MODE (recog_data
.operand
[0]);
6755 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6756 alignment of a vector to 128 bits. */
6757 static HOST_WIDE_INT
6758 aarch64_simd_vector_alignment (const_tree type
)
6760 HOST_WIDE_INT align
= tree_low_cst (TYPE_SIZE (type
), 0);
6761 return MIN (align
, 128);
6764 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6766 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
6771 /* We guarantee alignment for vectors up to 128-bits. */
6772 if (tree_int_cst_compare (TYPE_SIZE (type
),
6773 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
6776 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6780 /* If VALS is a vector constant that can be loaded into a register
6781 using DUP, generate instructions to do so and return an RTX to
6782 assign to the register. Otherwise return NULL_RTX. */
6784 aarch64_simd_dup_constant (rtx vals
)
6786 enum machine_mode mode
= GET_MODE (vals
);
6787 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6788 int n_elts
= GET_MODE_NUNITS (mode
);
6789 bool all_same
= true;
6793 if (GET_CODE (vals
) != CONST_VECTOR
)
6796 for (i
= 1; i
< n_elts
; ++i
)
6798 x
= CONST_VECTOR_ELT (vals
, i
);
6799 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
6806 /* We can load this constant by using DUP and a constant in a
6807 single ARM register. This will be cheaper than a vector
6809 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
6810 return gen_rtx_VEC_DUPLICATE (mode
, x
);
6814 /* Generate code to load VALS, which is a PARALLEL containing only
6815 constants (for vec_init) or CONST_VECTOR, efficiently into a
6816 register. Returns an RTX to copy into the register, or NULL_RTX
6817 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6819 aarch64_simd_make_constant (rtx vals
)
6821 enum machine_mode mode
= GET_MODE (vals
);
6823 rtx const_vec
= NULL_RTX
;
6824 int n_elts
= GET_MODE_NUNITS (mode
);
6828 if (GET_CODE (vals
) == CONST_VECTOR
)
6830 else if (GET_CODE (vals
) == PARALLEL
)
6832 /* A CONST_VECTOR must contain only CONST_INTs and
6833 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6834 Only store valid constants in a CONST_VECTOR. */
6835 for (i
= 0; i
< n_elts
; ++i
)
6837 rtx x
= XVECEXP (vals
, 0, i
);
6838 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
6841 if (n_const
== n_elts
)
6842 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6847 if (const_vec
!= NULL_RTX
6848 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
6849 /* Load using MOVI/MVNI. */
6851 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
6852 /* Loaded using DUP. */
6854 else if (const_vec
!= NULL_RTX
)
6855 /* Load from constant pool. We can not take advantage of single-cycle
6856 LD1 because we need a PC-relative addressing mode. */
6859 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6860 We can not construct an initializer. */
6865 aarch64_expand_vector_init (rtx target
, rtx vals
)
6867 enum machine_mode mode
= GET_MODE (target
);
6868 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6869 int n_elts
= GET_MODE_NUNITS (mode
);
6870 int n_var
= 0, one_var
= -1;
6871 bool all_same
= true;
6875 x
= XVECEXP (vals
, 0, 0);
6876 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
6877 n_var
= 1, one_var
= 0;
6879 for (i
= 1; i
< n_elts
; ++i
)
6881 x
= XVECEXP (vals
, 0, i
);
6882 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
6883 ++n_var
, one_var
= i
;
6885 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6891 rtx constant
= aarch64_simd_make_constant (vals
);
6892 if (constant
!= NULL_RTX
)
6894 emit_move_insn (target
, constant
);
6899 /* Splat a single non-constant element if we can. */
6902 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
6903 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
6907 /* One field is non-constant. Load constant then overwrite varying
6908 field. This is more efficient than using the stack. */
6911 rtx copy
= copy_rtx (vals
);
6912 rtx index
= GEN_INT (one_var
);
6913 enum insn_code icode
;
6915 /* Load constant part of vector, substitute neighboring value for
6917 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
6918 aarch64_expand_vector_init (target
, copy
);
6920 /* Insert variable. */
6921 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
6922 icode
= optab_handler (vec_set_optab
, mode
);
6923 gcc_assert (icode
!= CODE_FOR_nothing
);
6924 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
6928 /* Construct the vector in memory one field at a time
6929 and load the whole vector. */
6930 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
6931 for (i
= 0; i
< n_elts
; i
++)
6932 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
6933 i
* GET_MODE_SIZE (inner_mode
)),
6934 XVECEXP (vals
, 0, i
));
6935 emit_move_insn (target
, mem
);
6939 static unsigned HOST_WIDE_INT
6940 aarch64_shift_truncation_mask (enum machine_mode mode
)
6943 (aarch64_vector_mode_supported_p (mode
)
6944 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
6947 #ifndef TLS_SECTION_ASM_FLAG
6948 #define TLS_SECTION_ASM_FLAG 'T'
6952 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
6953 tree decl ATTRIBUTE_UNUSED
)
6955 char flagchars
[10], *f
= flagchars
;
6957 /* If we have already declared this section, we can use an
6958 abbreviated form to switch back to it -- unless this section is
6959 part of a COMDAT groups, in which case GAS requires the full
6960 declaration every time. */
6961 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
6962 && (flags
& SECTION_DECLARED
))
6964 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
6968 if (!(flags
& SECTION_DEBUG
))
6970 if (flags
& SECTION_WRITE
)
6972 if (flags
& SECTION_CODE
)
6974 if (flags
& SECTION_SMALL
)
6976 if (flags
& SECTION_MERGE
)
6978 if (flags
& SECTION_STRINGS
)
6980 if (flags
& SECTION_TLS
)
6981 *f
++ = TLS_SECTION_ASM_FLAG
;
6982 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
6986 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
6988 if (!(flags
& SECTION_NOTYPE
))
6993 if (flags
& SECTION_BSS
)
6998 #ifdef TYPE_OPERAND_FMT
6999 format
= "," TYPE_OPERAND_FMT
;
7004 fprintf (asm_out_file
, format
, type
);
7006 if (flags
& SECTION_ENTSIZE
)
7007 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
7008 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7010 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
7011 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
7013 fprintf (asm_out_file
, ",%s,comdat",
7014 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
7018 putc ('\n', asm_out_file
);
7021 /* Select a format to encode pointers in exception handling data. */
7023 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
7026 switch (aarch64_cmodel
)
7028 case AARCH64_CMODEL_TINY
:
7029 case AARCH64_CMODEL_TINY_PIC
:
7030 case AARCH64_CMODEL_SMALL
:
7031 case AARCH64_CMODEL_SMALL_PIC
:
7032 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7034 type
= DW_EH_PE_sdata4
;
7037 /* No assumptions here. 8-byte relocs required. */
7038 type
= DW_EH_PE_sdata8
;
7041 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
7044 /* Emit load exclusive. */
7047 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
7048 rtx mem
, rtx model_rtx
)
7050 rtx (*gen
) (rtx
, rtx
, rtx
);
7054 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
7055 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
7056 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
7057 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
7062 emit_insn (gen (rval
, mem
, model_rtx
));
7065 /* Emit store exclusive. */
7068 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
7069 rtx rval
, rtx mem
, rtx model_rtx
)
7071 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
7075 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
7076 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
7077 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
7078 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
7083 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
7086 /* Mark the previous jump instruction as unlikely. */
7089 aarch64_emit_unlikely_jump (rtx insn
)
7091 rtx very_unlikely
= GEN_INT (REG_BR_PROB_BASE
/ 100 - 1);
7093 insn
= emit_jump_insn (insn
);
7094 add_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
7097 /* Expand a compare and swap pattern. */
7100 aarch64_expand_compare_and_swap (rtx operands
[])
7102 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
7103 enum machine_mode mode
, cmp_mode
;
7104 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
7109 oldval
= operands
[3];
7110 newval
= operands
[4];
7111 is_weak
= operands
[5];
7112 mod_s
= operands
[6];
7113 mod_f
= operands
[7];
7114 mode
= GET_MODE (mem
);
7117 /* Normally the succ memory model must be stronger than fail, but in the
7118 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7119 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7121 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
7122 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
7123 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
7129 /* For short modes, we're going to perform the comparison in SImode,
7130 so do the zero-extension now. */
7132 rval
= gen_reg_rtx (SImode
);
7133 oldval
= convert_modes (SImode
, mode
, oldval
, true);
7138 /* Force the value into a register if needed. */
7139 if (!aarch64_plus_operand (oldval
, mode
))
7140 oldval
= force_reg (cmp_mode
, oldval
);
7149 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
7150 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
7151 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
7152 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
7157 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
7159 if (mode
== QImode
|| mode
== HImode
)
7160 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
7162 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7163 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
7164 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
7167 /* Split a compare and swap pattern. */
7170 aarch64_split_compare_and_swap (rtx operands
[])
7172 rtx rval
, mem
, oldval
, newval
, scratch
;
7173 enum machine_mode mode
;
7175 rtx label1
, label2
, x
, cond
;
7179 oldval
= operands
[2];
7180 newval
= operands
[3];
7181 is_weak
= (operands
[4] != const0_rtx
);
7182 scratch
= operands
[7];
7183 mode
= GET_MODE (mem
);
7188 label1
= gen_label_rtx ();
7189 emit_label (label1
);
7191 label2
= gen_label_rtx ();
7193 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
7195 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
7196 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7197 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7198 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
7199 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7201 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
7205 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
7206 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7207 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
7208 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7212 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7213 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
7214 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
7217 emit_label (label2
);
7220 /* Split an atomic operation. */
7223 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
7224 rtx value
, rtx model_rtx
, rtx cond
)
7226 enum machine_mode mode
= GET_MODE (mem
);
7227 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
7230 label
= gen_label_rtx ();
7234 new_out
= gen_lowpart (wmode
, new_out
);
7236 old_out
= gen_lowpart (wmode
, old_out
);
7239 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
7241 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
7250 x
= gen_rtx_AND (wmode
, old_out
, value
);
7251 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7252 x
= gen_rtx_NOT (wmode
, new_out
);
7253 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7257 if (CONST_INT_P (value
))
7259 value
= GEN_INT (-INTVAL (value
));
7265 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
7266 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7270 aarch64_emit_store_exclusive (mode
, cond
, mem
,
7271 gen_lowpart (mode
, new_out
), model_rtx
);
7273 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7274 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7275 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
7276 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7280 aarch64_print_extension (void)
7282 const struct aarch64_option_extension
*opt
= NULL
;
7284 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7285 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
7286 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
7288 asm_fprintf (asm_out_file
, "\n");
7292 aarch64_start_file (void)
7296 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
7297 aarch64_print_extension ();
7299 else if (selected_cpu
)
7301 asm_fprintf (asm_out_file
, "\t.cpu %s", selected_cpu
->name
);
7302 aarch64_print_extension ();
7304 default_file_start();
7307 /* Target hook for c_mode_for_suffix. */
7308 static enum machine_mode
7309 aarch64_c_mode_for_suffix (char suffix
)
7317 /* We can only represent floating point constants which will fit in
7318 "quarter-precision" values. These values are characterised by
7319 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7322 (-1)^s * (n/16) * 2^r
7325 's' is the sign bit.
7326 'n' is an integer in the range 16 <= n <= 31.
7327 'r' is an integer in the range -3 <= r <= 4. */
7329 /* Return true iff X can be represented by a quarter-precision
7330 floating point immediate operand X. Note, we cannot represent 0.0. */
7332 aarch64_float_const_representable_p (rtx x
)
7334 /* This represents our current view of how many bits
7335 make up the mantissa. */
7336 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7338 unsigned HOST_WIDE_INT mantissa
, mask
;
7339 HOST_WIDE_INT m1
, m2
;
7340 REAL_VALUE_TYPE r
, m
;
7342 if (!CONST_DOUBLE_P (x
))
7345 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7347 /* We cannot represent infinities, NaNs or +/-zero. We won't
7348 know if we have +zero until we analyse the mantissa, but we
7349 can reject the other invalid values. */
7350 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
7351 || REAL_VALUE_MINUS_ZERO (r
))
7354 /* Extract exponent. */
7355 r
= real_value_abs (&r
);
7356 exponent
= REAL_EXP (&r
);
7358 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7359 highest (sign) bit, with a fixed binary point at bit point_pos.
7360 m1 holds the low part of the mantissa, m2 the high part.
7361 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7362 bits for the mantissa, this can fail (low bits will be lost). */
7363 real_ldexp (&m
, &r
, point_pos
- exponent
);
7364 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
7366 /* If the low part of the mantissa has bits set we cannot represent
7370 /* We have rejected the lower HOST_WIDE_INT, so update our
7371 understanding of how many bits lie in the mantissa and
7372 look only at the high HOST_WIDE_INT. */
7374 point_pos
-= HOST_BITS_PER_WIDE_INT
;
7376 /* We can only represent values with a mantissa of the form 1.xxxx. */
7377 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
7378 if ((mantissa
& mask
) != 0)
7381 /* Having filtered unrepresentable values, we may now remove all
7382 but the highest 5 bits. */
7383 mantissa
>>= point_pos
- 5;
7385 /* We cannot represent the value 0.0, so reject it. This is handled
7390 /* Then, as bit 4 is always set, we can mask it off, leaving
7391 the mantissa in the range [0, 15]. */
7392 mantissa
&= ~(1 << 4);
7393 gcc_assert (mantissa
<= 15);
7395 /* GCC internally does not use IEEE754-like encoding (where normalized
7396 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7397 Our mantissa values are shifted 4 places to the left relative to
7398 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7399 by 5 places to correct for GCC's representation. */
7400 exponent
= 5 - exponent
;
7402 return (exponent
>= 0 && exponent
<= 7);
7406 aarch64_output_simd_mov_immediate (rtx const_vector
,
7407 enum machine_mode mode
,
7411 static char templ
[40];
7412 const char *mnemonic
;
7413 const char *shift_op
;
7414 unsigned int lane_count
= 0;
7417 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
7419 /* This will return true to show const_vector is legal for use as either
7420 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7421 also update INFO to show how the immediate should be generated. */
7422 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
7423 gcc_assert (is_valid
);
7425 element_char
= sizetochar (info
.element_width
);
7426 lane_count
= width
/ info
.element_width
;
7428 mode
= GET_MODE_INNER (mode
);
7429 if (mode
== SFmode
|| mode
== DFmode
)
7431 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
7432 if (aarch64_float_const_zero_rtx_p (info
.value
))
7433 info
.value
= GEN_INT (0);
7438 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
7439 char float_buf
[buf_size
] = {'\0'};
7440 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
7443 if (lane_count
== 1)
7444 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
7446 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
7447 lane_count
, element_char
, float_buf
);
7452 mnemonic
= info
.mvn
? "mvni" : "movi";
7453 shift_op
= info
.msl
? "msl" : "lsl";
7455 if (lane_count
== 1)
7456 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
7457 mnemonic
, UINTVAL (info
.value
));
7458 else if (info
.shift
)
7459 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7460 ", %s %d", mnemonic
, lane_count
, element_char
,
7461 UINTVAL (info
.value
), shift_op
, info
.shift
);
7463 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
7464 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
7469 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
7470 enum machine_mode mode
)
7472 enum machine_mode vmode
;
7474 gcc_assert (!VECTOR_MODE_P (mode
));
7475 vmode
= aarch64_simd_container_mode (mode
, 64);
7476 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
7477 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
7480 /* Split operands into moves from op[1] + op[2] into op[0]. */
7483 aarch64_split_combinev16qi (rtx operands
[3])
7485 unsigned int dest
= REGNO (operands
[0]);
7486 unsigned int src1
= REGNO (operands
[1]);
7487 unsigned int src2
= REGNO (operands
[2]);
7488 enum machine_mode halfmode
= GET_MODE (operands
[1]);
7489 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
7492 gcc_assert (halfmode
== V16QImode
);
7494 if (src1
== dest
&& src2
== dest
+ halfregs
)
7496 /* No-op move. Can't split to nothing; emit something. */
7497 emit_note (NOTE_INSN_DELETED
);
7501 /* Preserve register attributes for variable tracking. */
7502 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
7503 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
7504 GET_MODE_SIZE (halfmode
));
7506 /* Special case of reversed high/low parts. */
7507 if (reg_overlap_mentioned_p (operands
[2], destlo
)
7508 && reg_overlap_mentioned_p (operands
[1], desthi
))
7510 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7511 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
7512 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7514 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
7516 /* Try to avoid unnecessary moves if part of the result
7517 is in the right place already. */
7519 emit_move_insn (destlo
, operands
[1]);
7520 if (src2
!= dest
+ halfregs
)
7521 emit_move_insn (desthi
, operands
[2]);
7525 if (src2
!= dest
+ halfregs
)
7526 emit_move_insn (desthi
, operands
[2]);
7528 emit_move_insn (destlo
, operands
[1]);
7532 /* vec_perm support. */
7534 #define MAX_VECT_LEN 16
7536 struct expand_vec_perm_d
7538 rtx target
, op0
, op1
;
7539 unsigned char perm
[MAX_VECT_LEN
];
7540 enum machine_mode vmode
;
7546 /* Generate a variable permutation. */
7549 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7551 enum machine_mode vmode
= GET_MODE (target
);
7552 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7554 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
7555 gcc_checking_assert (GET_MODE (op0
) == vmode
);
7556 gcc_checking_assert (GET_MODE (op1
) == vmode
);
7557 gcc_checking_assert (GET_MODE (sel
) == vmode
);
7558 gcc_checking_assert (TARGET_SIMD
);
7562 if (vmode
== V8QImode
)
7564 /* Expand the argument to a V16QI mode by duplicating it. */
7565 rtx pair
= gen_reg_rtx (V16QImode
);
7566 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
7567 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7571 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
7578 if (vmode
== V8QImode
)
7580 pair
= gen_reg_rtx (V16QImode
);
7581 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
7582 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7586 pair
= gen_reg_rtx (OImode
);
7587 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
7588 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
7594 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7596 enum machine_mode vmode
= GET_MODE (target
);
7597 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
7598 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7599 rtx rmask
[MAX_VECT_LEN
], mask
;
7601 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
7603 /* The TBL instruction does not use a modulo index, so we must take care
7604 of that ourselves. */
7605 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7606 for (i
= 0; i
< nelt
; ++i
)
7608 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
7609 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
7611 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
7614 /* Recognize patterns suitable for the TRN instructions. */
7616 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
7618 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7619 rtx out
, in0
, in1
, x
;
7620 rtx (*gen
) (rtx
, rtx
, rtx
);
7621 enum machine_mode vmode
= d
->vmode
;
7623 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7626 /* Note that these are little-endian tests.
7627 We correct for big-endian later. */
7628 if (d
->perm
[0] == 0)
7630 else if (d
->perm
[0] == 1)
7634 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7636 for (i
= 0; i
< nelt
; i
+= 2)
7638 if (d
->perm
[i
] != i
+ odd
)
7640 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
7650 if (BYTES_BIG_ENDIAN
)
7652 x
= in0
, in0
= in1
, in1
= x
;
7661 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
7662 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
7663 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
7664 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
7665 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
7666 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
7667 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
7668 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
7669 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
7670 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
7679 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
7680 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
7681 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
7682 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
7683 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
7684 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
7685 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
7686 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
7687 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
7688 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
7694 emit_insn (gen (out
, in0
, in1
));
7698 /* Recognize patterns suitable for the UZP instructions. */
7700 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
7702 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7703 rtx out
, in0
, in1
, x
;
7704 rtx (*gen
) (rtx
, rtx
, rtx
);
7705 enum machine_mode vmode
= d
->vmode
;
7707 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7710 /* Note that these are little-endian tests.
7711 We correct for big-endian later. */
7712 if (d
->perm
[0] == 0)
7714 else if (d
->perm
[0] == 1)
7718 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7720 for (i
= 0; i
< nelt
; i
++)
7722 unsigned elt
= (i
* 2 + odd
) & mask
;
7723 if (d
->perm
[i
] != elt
)
7733 if (BYTES_BIG_ENDIAN
)
7735 x
= in0
, in0
= in1
, in1
= x
;
7744 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
7745 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
7746 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
7747 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
7748 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
7749 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
7750 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
7751 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
7752 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
7753 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
7762 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
7763 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
7764 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
7765 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
7766 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
7767 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
7768 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
7769 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
7770 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
7771 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
7777 emit_insn (gen (out
, in0
, in1
));
7781 /* Recognize patterns suitable for the ZIP instructions. */
7783 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
7785 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
7786 rtx out
, in0
, in1
, x
;
7787 rtx (*gen
) (rtx
, rtx
, rtx
);
7788 enum machine_mode vmode
= d
->vmode
;
7790 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7793 /* Note that these are little-endian tests.
7794 We correct for big-endian later. */
7796 if (d
->perm
[0] == high
)
7799 else if (d
->perm
[0] == 0)
7803 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7805 for (i
= 0; i
< nelt
/ 2; i
++)
7807 unsigned elt
= (i
+ high
) & mask
;
7808 if (d
->perm
[i
* 2] != elt
)
7810 elt
= (elt
+ nelt
) & mask
;
7811 if (d
->perm
[i
* 2 + 1] != elt
)
7821 if (BYTES_BIG_ENDIAN
)
7823 x
= in0
, in0
= in1
, in1
= x
;
7832 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
7833 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
7834 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
7835 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
7836 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
7837 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
7838 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
7839 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
7840 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
7841 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
7850 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
7851 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
7852 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
7853 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
7854 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
7855 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
7856 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
7857 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
7858 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
7859 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
7865 emit_insn (gen (out
, in0
, in1
));
7870 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
7872 rtx rperm
[MAX_VECT_LEN
], sel
;
7873 enum machine_mode vmode
= d
->vmode
;
7874 unsigned int i
, nelt
= d
->nelt
;
7876 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7877 numbering of elements for big-endian, we must reverse the order. */
7878 if (BYTES_BIG_ENDIAN
)
7884 /* Generic code will try constant permutation twice. Once with the
7885 original mode and again with the elements lowered to QImode.
7886 So wait and don't do the selector expansion ourselves. */
7887 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
7890 for (i
= 0; i
< nelt
; ++i
)
7891 rperm
[i
] = GEN_INT (d
->perm
[i
]);
7892 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
7893 sel
= force_reg (vmode
, sel
);
7895 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
7900 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
7902 /* The pattern matching functions above are written to look for a small
7903 number to begin the sequence (0, 1, N/2). If we begin with an index
7904 from the second operand, we can swap the operands. */
7905 if (d
->perm
[0] >= d
->nelt
)
7907 unsigned i
, nelt
= d
->nelt
;
7910 for (i
= 0; i
< nelt
; ++i
)
7911 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
7920 if (aarch64_evpc_zip (d
))
7922 else if (aarch64_evpc_uzp (d
))
7924 else if (aarch64_evpc_trn (d
))
7926 return aarch64_evpc_tbl (d
);
7931 /* Expand a vec_perm_const pattern. */
7934 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7936 struct expand_vec_perm_d d
;
7943 d
.vmode
= GET_MODE (target
);
7944 gcc_assert (VECTOR_MODE_P (d
.vmode
));
7945 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
7946 d
.testing_p
= false;
7948 for (i
= which
= 0; i
< nelt
; ++i
)
7950 rtx e
= XVECEXP (sel
, 0, i
);
7951 int ei
= INTVAL (e
) & (2 * nelt
- 1);
7952 which
|= (ei
< nelt
? 1 : 2);
7962 d
.one_vector_p
= false;
7963 if (!rtx_equal_p (op0
, op1
))
7966 /* The elements of PERM do not suggest that only the first operand
7967 is used, but both operands are identical. Allow easier matching
7968 of the permutation by folding the permutation into the single
7972 for (i
= 0; i
< nelt
; ++i
)
7973 d
.perm
[i
] &= nelt
- 1;
7975 d
.one_vector_p
= true;
7980 d
.one_vector_p
= true;
7984 return aarch64_expand_vec_perm_const_1 (&d
);
7988 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
7989 const unsigned char *sel
)
7991 struct expand_vec_perm_d d
;
7992 unsigned int i
, nelt
, which
;
7996 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
7998 memcpy (d
.perm
, sel
, nelt
);
8000 /* Calculate whether all elements are in one vector. */
8001 for (i
= which
= 0; i
< nelt
; ++i
)
8003 unsigned char e
= d
.perm
[i
];
8004 gcc_assert (e
< 2 * nelt
);
8005 which
|= (e
< nelt
? 1 : 2);
8008 /* If all elements are from the second vector, reindex as if from the
8011 for (i
= 0; i
< nelt
; ++i
)
8014 /* Check whether the mask can be applied to a single vector. */
8015 d
.one_vector_p
= (which
!= 3);
8017 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
8018 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
8019 if (!d
.one_vector_p
)
8020 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
8023 ret
= aarch64_expand_vec_perm_const_1 (&d
);
8029 #undef TARGET_ADDRESS_COST
8030 #define TARGET_ADDRESS_COST aarch64_address_cost
8032 /* This hook will determines whether unnamed bitfields affect the alignment
8033 of the containing structure. The hook returns true if the structure
8034 should inherit the alignment requirements of an unnamed bitfield's
8036 #undef TARGET_ALIGN_ANON_BITFIELD
8037 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8039 #undef TARGET_ASM_ALIGNED_DI_OP
8040 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8042 #undef TARGET_ASM_ALIGNED_HI_OP
8043 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8045 #undef TARGET_ASM_ALIGNED_SI_OP
8046 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8048 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8049 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8050 hook_bool_const_tree_hwi_hwi_const_tree_true
8052 #undef TARGET_ASM_FILE_START
8053 #define TARGET_ASM_FILE_START aarch64_start_file
8055 #undef TARGET_ASM_OUTPUT_MI_THUNK
8056 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8058 #undef TARGET_ASM_SELECT_RTX_SECTION
8059 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8061 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8062 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8064 #undef TARGET_BUILD_BUILTIN_VA_LIST
8065 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8067 #undef TARGET_CALLEE_COPIES
8068 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8070 #undef TARGET_CAN_ELIMINATE
8071 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8073 #undef TARGET_CANNOT_FORCE_CONST_MEM
8074 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8076 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8077 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8079 /* Only the least significant bit is used for initialization guard
8081 #undef TARGET_CXX_GUARD_MASK_BIT
8082 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8084 #undef TARGET_C_MODE_FOR_SUFFIX
8085 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8087 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8088 #undef TARGET_DEFAULT_TARGET_FLAGS
8089 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8092 #undef TARGET_CLASS_MAX_NREGS
8093 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8095 #undef TARGET_BUILTIN_DECL
8096 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8098 #undef TARGET_EXPAND_BUILTIN
8099 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8101 #undef TARGET_EXPAND_BUILTIN_VA_START
8102 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8104 #undef TARGET_FOLD_BUILTIN
8105 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8107 #undef TARGET_FUNCTION_ARG
8108 #define TARGET_FUNCTION_ARG aarch64_function_arg
8110 #undef TARGET_FUNCTION_ARG_ADVANCE
8111 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8113 #undef TARGET_FUNCTION_ARG_BOUNDARY
8114 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8116 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8117 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8119 #undef TARGET_FUNCTION_VALUE
8120 #define TARGET_FUNCTION_VALUE aarch64_function_value
8122 #undef TARGET_FUNCTION_VALUE_REGNO_P
8123 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8125 #undef TARGET_FRAME_POINTER_REQUIRED
8126 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8128 #undef TARGET_GIMPLE_FOLD_BUILTIN
8129 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8131 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8132 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8134 #undef TARGET_INIT_BUILTINS
8135 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8137 #undef TARGET_LEGITIMATE_ADDRESS_P
8138 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8140 #undef TARGET_LEGITIMATE_CONSTANT_P
8141 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8143 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8144 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8146 #undef TARGET_MANGLE_TYPE
8147 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8149 #undef TARGET_MEMORY_MOVE_COST
8150 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8152 #undef TARGET_MUST_PASS_IN_STACK
8153 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8155 /* This target hook should return true if accesses to volatile bitfields
8156 should use the narrowest mode possible. It should return false if these
8157 accesses should use the bitfield container type. */
8158 #undef TARGET_NARROW_VOLATILE_BITFIELD
8159 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8161 #undef TARGET_OPTION_OVERRIDE
8162 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8164 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8165 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8166 aarch64_override_options_after_change
8168 #undef TARGET_PASS_BY_REFERENCE
8169 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8171 #undef TARGET_PREFERRED_RELOAD_CLASS
8172 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8174 #undef TARGET_SECONDARY_RELOAD
8175 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8177 #undef TARGET_SHIFT_TRUNCATION_MASK
8178 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8180 #undef TARGET_SETUP_INCOMING_VARARGS
8181 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8183 #undef TARGET_STRUCT_VALUE_RTX
8184 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8186 #undef TARGET_REGISTER_MOVE_COST
8187 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8189 #undef TARGET_RETURN_IN_MEMORY
8190 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8192 #undef TARGET_RETURN_IN_MSB
8193 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8195 #undef TARGET_RTX_COSTS
8196 #define TARGET_RTX_COSTS aarch64_rtx_costs
8198 #undef TARGET_TRAMPOLINE_INIT
8199 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8201 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8202 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8204 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8205 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8207 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8208 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8210 #undef TARGET_VECTORIZE_ADD_STMT_COST
8211 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8213 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8214 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8215 aarch64_builtin_vectorization_cost
8217 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8218 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8220 #undef TARGET_VECTORIZE_BUILTINS
8221 #define TARGET_VECTORIZE_BUILTINS
8223 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8224 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8225 aarch64_builtin_vectorized_function
8227 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8228 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8229 aarch64_autovectorize_vector_sizes
8231 /* Section anchor support. */
8233 #undef TARGET_MIN_ANCHOR_OFFSET
8234 #define TARGET_MIN_ANCHOR_OFFSET -256
8236 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8237 byte offset; we can do much more for larger data types, but have no way
8238 to determine the size of the access. We assume accesses are aligned. */
8239 #undef TARGET_MAX_ANCHOR_OFFSET
8240 #define TARGET_MAX_ANCHOR_OFFSET 4095
8242 #undef TARGET_VECTOR_ALIGNMENT
8243 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8245 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8246 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8247 aarch64_simd_vector_alignment_reachable
8249 /* vec_perm support. */
8251 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8252 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8253 aarch64_vectorize_vec_perm_const_ok
8256 #undef TARGET_FIXED_CONDITION_CODE_REGS
8257 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8259 struct gcc_target targetm
= TARGET_INITIALIZER
;
8261 #include "gt-aarch64.h"