1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "hard-reg-set.h"
37 #include "target-def.h"
38 #include "targhooks.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
49 #include "tree-vectorizer.h"
51 /* Defined for convenience. */
52 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
54 /* Classifies an address.
57 A simple base register plus immediate offset.
60 A base register indexed by immediate offset with writeback.
63 A base register indexed by (optionally scaled) register.
66 A base register indexed by (optionally scaled) zero-extended register.
69 A base register indexed by (optionally scaled) sign-extended register.
72 A LO_SUM rtx with a base register and "LO12" symbol relocation.
75 A constant symbolic address, in pc-relative literal pool. */
77 enum aarch64_address_type
{
87 struct aarch64_address_info
{
88 enum aarch64_address_type type
;
92 enum aarch64_symbol_type symbol_type
;
95 struct simd_immediate_info
104 /* The current code model. */
105 enum aarch64_code_model aarch64_cmodel
;
108 #undef TARGET_HAVE_TLS
109 #define TARGET_HAVE_TLS 1
112 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
113 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
115 enum machine_mode
*, int *,
117 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
118 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
119 static void aarch64_override_options_after_change (void);
120 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
121 static unsigned bit_count (unsigned HOST_WIDE_INT
);
122 static bool aarch64_const_vec_all_same_int_p (rtx
,
123 HOST_WIDE_INT
, HOST_WIDE_INT
);
125 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
126 const unsigned char *sel
);
128 /* The processor for which instructions should be scheduled. */
129 enum aarch64_processor aarch64_tune
= generic
;
131 /* The current tuning set. */
132 const struct tune_params
*aarch64_tune_params
;
134 /* Mask to specify which instructions we are allowed to generate. */
135 unsigned long aarch64_isa_flags
= 0;
137 /* Mask to specify which instruction scheduling options should be used. */
138 unsigned long aarch64_tune_flags
= 0;
140 /* Tuning parameters. */
142 #if HAVE_DESIGNATED_INITIALIZERS
143 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
145 #define NAMED_PARAM(NAME, VAL) (VAL)
148 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
151 static const struct cpu_rtx_cost_table generic_rtx_cost_table
=
153 NAMED_PARAM (memory_load
, COSTS_N_INSNS (1)),
154 NAMED_PARAM (memory_store
, COSTS_N_INSNS (0)),
155 NAMED_PARAM (register_shift
, COSTS_N_INSNS (1)),
156 NAMED_PARAM (int_divide
, COSTS_N_INSNS (6)),
157 NAMED_PARAM (float_divide
, COSTS_N_INSNS (2)),
158 NAMED_PARAM (double_divide
, COSTS_N_INSNS (6)),
159 NAMED_PARAM (int_multiply
, COSTS_N_INSNS (1)),
160 NAMED_PARAM (int_multiply_extend
, COSTS_N_INSNS (1)),
161 NAMED_PARAM (int_multiply_add
, COSTS_N_INSNS (1)),
162 NAMED_PARAM (int_multiply_extend_add
, COSTS_N_INSNS (1)),
163 NAMED_PARAM (float_multiply
, COSTS_N_INSNS (0)),
164 NAMED_PARAM (double_multiply
, COSTS_N_INSNS (1))
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170 static const struct cpu_addrcost_table generic_addrcost_table
=
172 NAMED_PARAM (pre_modify
, 0),
173 NAMED_PARAM (post_modify
, 0),
174 NAMED_PARAM (register_offset
, 0),
175 NAMED_PARAM (register_extend
, 0),
176 NAMED_PARAM (imm_offset
, 0)
179 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 static const struct cpu_regmove_cost generic_regmove_cost
=
184 NAMED_PARAM (GP2GP
, 1),
185 NAMED_PARAM (GP2FP
, 2),
186 NAMED_PARAM (FP2GP
, 2),
187 /* We currently do not provide direct support for TFmode Q->Q move.
188 Therefore we need to raise the cost above 2 in order to have
189 reload handle the situation. */
190 NAMED_PARAM (FP2FP
, 4)
193 /* Generic costs for vector insn classes. */
194 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
197 static const struct cpu_vector_cost generic_vector_cost
=
199 NAMED_PARAM (scalar_stmt_cost
, 1),
200 NAMED_PARAM (scalar_load_cost
, 1),
201 NAMED_PARAM (scalar_store_cost
, 1),
202 NAMED_PARAM (vec_stmt_cost
, 1),
203 NAMED_PARAM (vec_to_scalar_cost
, 1),
204 NAMED_PARAM (scalar_to_vec_cost
, 1),
205 NAMED_PARAM (vec_align_load_cost
, 1),
206 NAMED_PARAM (vec_unalign_load_cost
, 1),
207 NAMED_PARAM (vec_unalign_store_cost
, 1),
208 NAMED_PARAM (vec_store_cost
, 1),
209 NAMED_PARAM (cond_taken_branch_cost
, 3),
210 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 static const struct tune_params generic_tunings
=
218 &generic_rtx_cost_table
,
219 &generic_addrcost_table
,
220 &generic_regmove_cost
,
221 &generic_vector_cost
,
222 NAMED_PARAM (memmov_cost
, 4)
225 /* A processor implementing AArch64. */
228 const char *const name
;
229 enum aarch64_processor core
;
231 const unsigned long flags
;
232 const struct tune_params
*const tune
;
235 /* Processor cores implementing AArch64. */
236 static const struct processor all_cores
[] =
238 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
239 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
240 #include "aarch64-cores.def"
242 {"generic", generic
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
243 {NULL
, aarch64_none
, NULL
, 0, NULL
}
246 /* Architectures implementing AArch64. */
247 static const struct processor all_architectures
[] =
249 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
250 {NAME, CORE, #ARCH, FLAGS, NULL},
251 #include "aarch64-arches.def"
253 {"generic", generic
, "8", AARCH64_FL_FOR_ARCH8
, NULL
},
254 {NULL
, aarch64_none
, NULL
, 0, NULL
}
257 /* Target specification. These are populated as commandline arguments
258 are processed, or NULL if not specified. */
259 static const struct processor
*selected_arch
;
260 static const struct processor
*selected_cpu
;
261 static const struct processor
*selected_tune
;
263 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
265 /* An ISA extension in the co-processor and main instruction set space. */
266 struct aarch64_option_extension
268 const char *const name
;
269 const unsigned long flags_on
;
270 const unsigned long flags_off
;
273 /* ISA extensions in AArch64. */
274 static const struct aarch64_option_extension all_extensions
[] =
276 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
277 {NAME, FLAGS_ON, FLAGS_OFF},
278 #include "aarch64-option-extensions.def"
279 #undef AARCH64_OPT_EXTENSION
283 /* Used to track the size of an address when generating a pre/post
284 increment address. */
285 static enum machine_mode aarch64_memory_reference_mode
;
287 /* Used to force GTY into this file. */
288 static GTY(()) int gty_dummy
;
290 /* A table of valid AArch64 "bitmask immediate" values for
291 logical instructions. */
293 #define AARCH64_NUM_BITMASKS 5334
294 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
296 /* Did we set flag_omit_frame_pointer just so
297 aarch64_frame_pointer_required would be called? */
298 static bool faked_omit_frame_pointer
;
300 typedef enum aarch64_cond_code
302 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
303 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
304 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
308 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
310 /* The condition codes of the processor, and the inverse function. */
311 static const char * const aarch64_condition_codes
[] =
313 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
314 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
317 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
319 aarch64_dbx_register_number (unsigned regno
)
321 if (GP_REGNUM_P (regno
))
322 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
323 else if (regno
== SP_REGNUM
)
324 return AARCH64_DWARF_SP
;
325 else if (FP_REGNUM_P (regno
))
326 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
328 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
329 equivalent DWARF register. */
330 return DWARF_FRAME_REGISTERS
;
333 /* Return TRUE if MODE is any of the large INT modes. */
335 aarch64_vect_struct_mode_p (enum machine_mode mode
)
337 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
340 /* Return TRUE if MODE is any of the vector modes. */
342 aarch64_vector_mode_p (enum machine_mode mode
)
344 return aarch64_vector_mode_supported_p (mode
)
345 || aarch64_vect_struct_mode_p (mode
);
348 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
350 aarch64_array_mode_supported_p (enum machine_mode mode
,
351 unsigned HOST_WIDE_INT nelems
)
354 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
355 && (nelems
>= 2 && nelems
<= 4))
361 /* Implement HARD_REGNO_NREGS. */
364 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
366 switch (aarch64_regno_regclass (regno
))
370 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
372 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
377 /* Implement HARD_REGNO_MODE_OK. */
380 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
382 if (GET_MODE_CLASS (mode
) == MODE_CC
)
383 return regno
== CC_REGNUM
;
385 if (regno
== SP_REGNUM
)
386 /* The purpose of comparing with ptr_mode is to support the
387 global register variable associated with the stack pointer
388 register via the syntax of asm ("wsp") in ILP32. */
389 return mode
== Pmode
|| mode
== ptr_mode
;
391 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
392 return mode
== Pmode
;
394 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
397 if (FP_REGNUM_P (regno
))
399 if (aarch64_vect_struct_mode_p (mode
))
401 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
409 /* Return true if calls to DECL should be treated as
410 long-calls (ie called via a register). */
412 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
417 /* Return true if calls to symbol-ref SYM should be treated as
418 long-calls (ie called via a register). */
420 aarch64_is_long_call_p (rtx sym
)
422 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
425 /* Return true if the offsets to a zero/sign-extract operation
426 represent an expression that matches an extend operation. The
427 operands represent the paramters from
429 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
431 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
434 HOST_WIDE_INT mult_val
, extract_val
;
436 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
439 mult_val
= INTVAL (mult_imm
);
440 extract_val
= INTVAL (extract_imm
);
443 && extract_val
< GET_MODE_BITSIZE (mode
)
444 && exact_log2 (extract_val
& ~7) > 0
445 && (extract_val
& 7) <= 4
446 && mult_val
== (1 << (extract_val
& 7)))
452 /* Emit an insn that's a simple single-set. Both the operands must be
453 known to be valid. */
455 emit_set_insn (rtx x
, rtx y
)
457 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
460 /* X and Y are two things to compare using CODE. Emit the compare insn and
461 return the rtx for register 0 in the proper mode. */
463 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
465 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
466 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
468 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
472 /* Build the SYMBOL_REF for __tls_get_addr. */
474 static GTY(()) rtx tls_get_addr_libfunc
;
477 aarch64_tls_get_addr (void)
479 if (!tls_get_addr_libfunc
)
480 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
481 return tls_get_addr_libfunc
;
484 /* Return the TLS model to use for ADDR. */
486 static enum tls_model
487 tls_symbolic_operand_type (rtx addr
)
489 enum tls_model tls_kind
= TLS_MODEL_NONE
;
492 if (GET_CODE (addr
) == CONST
)
494 split_const (addr
, &sym
, &addend
);
495 if (GET_CODE (sym
) == SYMBOL_REF
)
496 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
498 else if (GET_CODE (addr
) == SYMBOL_REF
)
499 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
504 /* We'll allow lo_sum's in addresses in our legitimate addresses
505 so that combine would take care of combining addresses where
506 necessary, but for generation purposes, we'll generate the address
509 tmp = hi (symbol_ref); adrp x1, foo
510 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
514 adrp x1, :got:foo adrp tmp, :tlsgd:foo
515 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
519 Load TLS symbol, depending on TLS mechanism and TLS access model.
521 Global Dynamic - Traditional TLS:
523 add dest, tmp, #:tlsgd_lo12:imm
526 Global Dynamic - TLS Descriptors:
527 adrp dest, :tlsdesc:imm
528 ldr tmp, [dest, #:tlsdesc_lo12:imm]
529 add dest, dest, #:tlsdesc_lo12:imm
536 adrp tmp, :gottprel:imm
537 ldr dest, [tmp, #:gottprel_lo12:imm]
542 add t0, tp, #:tprel_hi12:imm
543 add t0, #:tprel_lo12_nc:imm
547 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
548 enum aarch64_symbol_type type
)
552 case SYMBOL_SMALL_ABSOLUTE
:
554 /* In ILP32, the mode of dest can be either SImode or DImode. */
556 enum machine_mode mode
= GET_MODE (dest
);
558 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
560 if (can_create_pseudo_p ())
561 tmp_reg
= gen_reg_rtx (mode
);
563 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
564 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
568 case SYMBOL_TINY_ABSOLUTE
:
569 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
572 case SYMBOL_SMALL_GOT
:
574 /* In ILP32, the mode of dest can be either SImode or DImode,
575 while the got entry is always of SImode size. The mode of
576 dest depends on how dest is used: if dest is assigned to a
577 pointer (e.g. in the memory), it has SImode; it may have
578 DImode if dest is dereferenced to access the memeory.
579 This is why we have to handle three different ldr_got_small
580 patterns here (two patterns for ILP32). */
582 enum machine_mode mode
= GET_MODE (dest
);
584 if (can_create_pseudo_p ())
585 tmp_reg
= gen_reg_rtx (mode
);
587 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
588 if (mode
== ptr_mode
)
591 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
593 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
597 gcc_assert (mode
== Pmode
);
598 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
604 case SYMBOL_SMALL_TLSGD
:
607 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
610 emit_call_insn (gen_tlsgd_small (result
, imm
));
611 insns
= get_insns ();
614 RTL_CONST_CALL_P (insns
) = 1;
615 emit_libcall_block (insns
, dest
, result
, imm
);
619 case SYMBOL_SMALL_TLSDESC
:
621 rtx x0
= gen_rtx_REG (Pmode
, R0_REGNUM
);
624 emit_insn (gen_tlsdesc_small (imm
));
625 tp
= aarch64_load_tp (NULL
);
626 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, x0
)));
627 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
631 case SYMBOL_SMALL_GOTTPREL
:
633 rtx tmp_reg
= gen_reg_rtx (Pmode
);
634 rtx tp
= aarch64_load_tp (NULL
);
635 emit_insn (gen_tlsie_small (tmp_reg
, imm
));
636 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, tmp_reg
)));
637 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
641 case SYMBOL_SMALL_TPREL
:
643 rtx tp
= aarch64_load_tp (NULL
);
644 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
645 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
649 case SYMBOL_TINY_GOT
:
650 emit_insn (gen_ldr_got_tiny (dest
, imm
));
658 /* Emit a move from SRC to DEST. Assume that the move expanders can
659 handle all moves if !can_create_pseudo_p (). The distinction is
660 important because, unlike emit_move_insn, the move expanders know
661 how to force Pmode objects into the constant pool even when the
662 constant pool address is not itself legitimate. */
664 aarch64_emit_move (rtx dest
, rtx src
)
666 return (can_create_pseudo_p ()
667 ? emit_move_insn (dest
, src
)
668 : emit_move_insn_1 (dest
, src
));
672 aarch64_split_128bit_move (rtx dst
, rtx src
)
676 enum machine_mode src_mode
= GET_MODE (src
);
677 enum machine_mode dst_mode
= GET_MODE (dst
);
678 int src_regno
= REGNO (src
);
679 int dst_regno
= REGNO (dst
);
681 gcc_assert (dst_mode
== TImode
|| dst_mode
== TFmode
);
683 if (REG_P (dst
) && REG_P (src
))
685 gcc_assert (src_mode
== TImode
|| src_mode
== TFmode
);
687 /* Handle r -> w, w -> r. */
688 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
693 (gen_aarch64_movtilow_di (dst
, gen_lowpart (word_mode
, src
)));
695 (gen_aarch64_movtihigh_di (dst
, gen_highpart (word_mode
, src
)));
699 (gen_aarch64_movtflow_di (dst
, gen_lowpart (word_mode
, src
)));
701 (gen_aarch64_movtfhigh_di (dst
, gen_highpart (word_mode
, src
)));
707 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
712 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode
, dst
), src
));
714 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode
, dst
), src
));
718 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode
, dst
), src
));
720 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode
, dst
), src
));
726 /* Fall through to r -> r cases. */
731 low_dst
= gen_lowpart (word_mode
, dst
);
733 && reg_overlap_mentioned_p (low_dst
, src
))
735 aarch64_emit_move (gen_highpart (word_mode
, dst
),
736 gen_highpart_mode (word_mode
, TImode
, src
));
737 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
741 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
742 aarch64_emit_move (gen_highpart (word_mode
, dst
),
743 gen_highpart_mode (word_mode
, TImode
, src
));
747 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
),
748 gen_rtx_REG (DFmode
, src_regno
));
749 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
+ 1),
750 gen_rtx_REG (DFmode
, src_regno
+ 1));
758 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
760 return (! REG_P (src
)
761 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
764 /* Split a complex SIMD combine. */
767 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
769 enum machine_mode src_mode
= GET_MODE (src1
);
770 enum machine_mode dst_mode
= GET_MODE (dst
);
772 gcc_assert (VECTOR_MODE_P (dst_mode
));
774 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
776 rtx (*gen
) (rtx
, rtx
, rtx
);
781 gen
= gen_aarch64_simd_combinev8qi
;
784 gen
= gen_aarch64_simd_combinev4hi
;
787 gen
= gen_aarch64_simd_combinev2si
;
790 gen
= gen_aarch64_simd_combinev2sf
;
793 gen
= gen_aarch64_simd_combinedi
;
796 gen
= gen_aarch64_simd_combinedf
;
802 emit_insn (gen (dst
, src1
, src2
));
807 /* Split a complex SIMD move. */
810 aarch64_split_simd_move (rtx dst
, rtx src
)
812 enum machine_mode src_mode
= GET_MODE (src
);
813 enum machine_mode dst_mode
= GET_MODE (dst
);
815 gcc_assert (VECTOR_MODE_P (dst_mode
));
817 if (REG_P (dst
) && REG_P (src
))
819 rtx (*gen
) (rtx
, rtx
);
821 gcc_assert (VECTOR_MODE_P (src_mode
));
826 gen
= gen_aarch64_split_simd_movv16qi
;
829 gen
= gen_aarch64_split_simd_movv8hi
;
832 gen
= gen_aarch64_split_simd_movv4si
;
835 gen
= gen_aarch64_split_simd_movv2di
;
838 gen
= gen_aarch64_split_simd_movv4sf
;
841 gen
= gen_aarch64_split_simd_movv2df
;
847 emit_insn (gen (dst
, src
));
853 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
855 if (can_create_pseudo_p ())
856 return force_reg (mode
, value
);
859 x
= aarch64_emit_move (x
, value
);
866 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
868 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
871 /* Load the full offset into a register. This
872 might be improvable in the future. */
873 high
= GEN_INT (offset
);
875 high
= aarch64_force_temporary (mode
, temp
, high
);
876 reg
= aarch64_force_temporary (mode
, temp
,
877 gen_rtx_PLUS (mode
, high
, reg
));
879 return plus_constant (mode
, reg
, offset
);
883 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
885 enum machine_mode mode
= GET_MODE (dest
);
886 unsigned HOST_WIDE_INT mask
;
889 unsigned HOST_WIDE_INT val
;
892 int one_match
, zero_match
;
894 gcc_assert (mode
== SImode
|| mode
== DImode
);
896 /* Check on what type of symbol it is. */
897 if (GET_CODE (imm
) == SYMBOL_REF
898 || GET_CODE (imm
) == LABEL_REF
899 || GET_CODE (imm
) == CONST
)
901 rtx mem
, base
, offset
;
902 enum aarch64_symbol_type sty
;
904 /* If we have (const (plus symbol offset)), separate out the offset
905 before we start classifying the symbol. */
906 split_const (imm
, &base
, &offset
);
908 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
911 case SYMBOL_FORCE_TO_MEM
:
912 if (offset
!= const0_rtx
913 && targetm
.cannot_force_const_mem (mode
, imm
))
915 gcc_assert(can_create_pseudo_p ());
916 base
= aarch64_force_temporary (mode
, dest
, base
);
917 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
918 aarch64_emit_move (dest
, base
);
921 mem
= force_const_mem (ptr_mode
, imm
);
923 if (mode
!= ptr_mode
)
924 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
925 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
928 case SYMBOL_SMALL_TLSGD
:
929 case SYMBOL_SMALL_TLSDESC
:
930 case SYMBOL_SMALL_GOTTPREL
:
931 case SYMBOL_SMALL_GOT
:
932 case SYMBOL_TINY_GOT
:
933 if (offset
!= const0_rtx
)
935 gcc_assert(can_create_pseudo_p ());
936 base
= aarch64_force_temporary (mode
, dest
, base
);
937 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
938 aarch64_emit_move (dest
, base
);
943 case SYMBOL_SMALL_TPREL
:
944 case SYMBOL_SMALL_ABSOLUTE
:
945 case SYMBOL_TINY_ABSOLUTE
:
946 aarch64_load_symref_appropriately (dest
, imm
, sty
);
954 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
956 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
960 if (!CONST_INT_P (imm
))
962 if (GET_CODE (imm
) == HIGH
)
963 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
966 rtx mem
= force_const_mem (mode
, imm
);
968 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
976 /* We know we can't do this in 1 insn, and we must be able to do it
977 in two; so don't mess around looking for sequences that don't buy
979 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
980 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
981 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
985 /* Remaining cases are all for DImode. */
988 subtargets
= optimize
&& can_create_pseudo_p ();
994 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
996 if ((val
& mask
) == 0)
998 else if ((val
& mask
) == mask
)
1005 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1007 if ((val
& mask
) != mask
)
1009 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1010 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1011 GEN_INT ((val
>> i
) & 0xffff)));
1018 if (zero_match
== 2)
1019 goto simple_sequence
;
1021 mask
= 0x0ffff0000UL
;
1022 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1024 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1026 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1028 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1030 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1031 emit_insn (gen_adddi3 (dest
, subtarget
,
1032 GEN_INT (val
- (val
& mask
))));
1035 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1037 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1039 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1040 GEN_INT ((val
+ comp
) & mask
)));
1041 emit_insn (gen_adddi3 (dest
, subtarget
,
1042 GEN_INT (val
- ((val
+ comp
) & mask
))));
1045 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1047 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1049 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1050 GEN_INT ((val
- comp
) | ~mask
)));
1051 emit_insn (gen_adddi3 (dest
, subtarget
,
1052 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1055 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1057 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1059 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1060 GEN_INT (val
| ~mask
)));
1061 emit_insn (gen_adddi3 (dest
, subtarget
,
1062 GEN_INT (val
- (val
| ~mask
))));
1067 /* See if we can do it by arithmetically combining two
1069 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1074 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1075 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1077 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1078 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1079 GEN_INT (aarch64_bitmasks
[i
])));
1080 emit_insn (gen_adddi3 (dest
, subtarget
,
1081 GEN_INT (val
- aarch64_bitmasks
[i
])));
1085 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1087 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1089 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1090 GEN_INT (aarch64_bitmasks
[i
])));
1091 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1092 GEN_INT ((val
>> j
) & 0xffff)));
1098 /* See if we can do it by logically combining two immediates. */
1099 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1101 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1105 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1106 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1108 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1109 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1110 GEN_INT (aarch64_bitmasks
[i
])));
1111 emit_insn (gen_iordi3 (dest
, subtarget
,
1112 GEN_INT (aarch64_bitmasks
[j
])));
1116 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1120 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1121 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1124 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1125 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1126 GEN_INT (aarch64_bitmasks
[j
])));
1127 emit_insn (gen_anddi3 (dest
, subtarget
,
1128 GEN_INT (aarch64_bitmasks
[i
])));
1137 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1139 if ((val
& mask
) != 0)
1143 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1144 GEN_INT (val
& mask
)));
1148 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1149 GEN_INT ((val
>> i
) & 0xffff)));
1155 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1157 /* Indirect calls are not currently supported. */
1161 /* Cannot tail-call to long-calls, since these are outside of the
1162 range of a branch instruction (we could handle this if we added
1163 support for indirect tail-calls. */
1164 if (aarch64_decl_is_long_call_p (decl
))
1170 /* Implement TARGET_PASS_BY_REFERENCE. */
1173 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1174 enum machine_mode mode
,
1176 bool named ATTRIBUTE_UNUSED
)
1179 enum machine_mode dummymode
;
1182 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1183 size
= (mode
== BLKmode
&& type
)
1184 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1188 /* Arrays always passed by reference. */
1189 if (TREE_CODE (type
) == ARRAY_TYPE
)
1191 /* Other aggregates based on their size. */
1192 if (AGGREGATE_TYPE_P (type
))
1193 size
= int_size_in_bytes (type
);
1196 /* Variable sized arguments are always returned by reference. */
1200 /* Can this be a candidate to be passed in fp/simd register(s)? */
1201 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1206 /* Arguments which are variable sized or larger than 2 registers are
1207 passed by reference unless they are a homogenous floating point
1209 return size
> 2 * UNITS_PER_WORD
;
1212 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1214 aarch64_return_in_msb (const_tree valtype
)
1216 enum machine_mode dummy_mode
;
1219 /* Never happens in little-endian mode. */
1220 if (!BYTES_BIG_ENDIAN
)
1223 /* Only composite types smaller than or equal to 16 bytes can
1224 be potentially returned in registers. */
1225 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1226 || int_size_in_bytes (valtype
) <= 0
1227 || int_size_in_bytes (valtype
) > 16)
1230 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1231 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1232 is always passed/returned in the least significant bits of fp/simd
1234 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1235 &dummy_mode
, &dummy_int
, NULL
))
1241 /* Implement TARGET_FUNCTION_VALUE.
1242 Define how to find the value returned by a function. */
1245 aarch64_function_value (const_tree type
, const_tree func
,
1246 bool outgoing ATTRIBUTE_UNUSED
)
1248 enum machine_mode mode
;
1251 enum machine_mode ag_mode
;
1253 mode
= TYPE_MODE (type
);
1254 if (INTEGRAL_TYPE_P (type
))
1255 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1257 if (aarch64_return_in_msb (type
))
1259 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1261 if (size
% UNITS_PER_WORD
!= 0)
1263 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1264 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1268 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1269 &ag_mode
, &count
, NULL
))
1271 if (!aarch64_composite_type_p (type
, mode
))
1273 gcc_assert (count
== 1 && mode
== ag_mode
);
1274 return gen_rtx_REG (mode
, V0_REGNUM
);
1281 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1282 for (i
= 0; i
< count
; i
++)
1284 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1285 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1286 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1287 XVECEXP (par
, 0, i
) = tmp
;
1293 return gen_rtx_REG (mode
, R0_REGNUM
);
1296 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1297 Return true if REGNO is the number of a hard register in which the values
1298 of called function may come back. */
1301 aarch64_function_value_regno_p (const unsigned int regno
)
1303 /* Maximum of 16 bytes can be returned in the general registers. Examples
1304 of 16-byte return values are: 128-bit integers and 16-byte small
1305 structures (excluding homogeneous floating-point aggregates). */
1306 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1309 /* Up to four fp/simd registers can return a function value, e.g. a
1310 homogeneous floating-point aggregate having four members. */
1311 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1312 return !TARGET_GENERAL_REGS_ONLY
;
1317 /* Implement TARGET_RETURN_IN_MEMORY.
1319 If the type T of the result of a function is such that
1321 would require that arg be passed as a value in a register (or set of
1322 registers) according to the parameter passing rules, then the result
1323 is returned in the same registers as would be used for such an
1327 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1330 enum machine_mode ag_mode
;
1333 if (!AGGREGATE_TYPE_P (type
)
1334 && TREE_CODE (type
) != COMPLEX_TYPE
1335 && TREE_CODE (type
) != VECTOR_TYPE
)
1336 /* Simple scalar types always returned in registers. */
1339 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1346 /* Types larger than 2 registers returned in memory. */
1347 size
= int_size_in_bytes (type
);
1348 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1352 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1353 const_tree type
, int *nregs
)
1355 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1356 return aarch64_vfp_is_call_or_return_candidate (mode
,
1358 &pcum
->aapcs_vfp_rmode
,
1363 /* Given MODE and TYPE of a function argument, return the alignment in
1364 bits. The idea is to suppress any stronger alignment requested by
1365 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1366 This is a helper function for local use only. */
1369 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1371 unsigned int alignment
;
1375 if (!integer_zerop (TYPE_SIZE (type
)))
1377 if (TYPE_MODE (type
) == mode
)
1378 alignment
= TYPE_ALIGN (type
);
1380 alignment
= GET_MODE_ALIGNMENT (mode
);
1386 alignment
= GET_MODE_ALIGNMENT (mode
);
1391 /* Layout a function argument according to the AAPCS64 rules. The rule
1392 numbers refer to the rule numbers in the AAPCS64. */
1395 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1397 bool named ATTRIBUTE_UNUSED
)
1399 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1400 int ncrn
, nvrn
, nregs
;
1401 bool allocate_ncrn
, allocate_nvrn
;
1403 /* We need to do this once per argument. */
1404 if (pcum
->aapcs_arg_processed
)
1407 pcum
->aapcs_arg_processed
= true;
1409 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1410 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1415 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1416 The following code thus handles passing by SIMD/FP registers first. */
1418 nvrn
= pcum
->aapcs_nvrn
;
1420 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1421 and homogenous short-vector aggregates (HVA). */
1424 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1426 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1427 if (!aarch64_composite_type_p (type
, mode
))
1429 gcc_assert (nregs
== 1);
1430 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1436 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1437 for (i
= 0; i
< nregs
; i
++)
1439 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1440 V0_REGNUM
+ nvrn
+ i
);
1441 tmp
= gen_rtx_EXPR_LIST
1443 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1444 XVECEXP (par
, 0, i
) = tmp
;
1446 pcum
->aapcs_reg
= par
;
1452 /* C.3 NSRN is set to 8. */
1453 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1458 ncrn
= pcum
->aapcs_ncrn
;
1459 nregs
= ((type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
))
1460 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1463 /* C6 - C9. though the sign and zero extension semantics are
1464 handled elsewhere. This is the case where the argument fits
1465 entirely general registers. */
1466 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1468 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1470 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1472 /* C.8 if the argument has an alignment of 16 then the NGRN is
1473 rounded up to the next even number. */
1474 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1477 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1479 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1480 A reg is still generated for it, but the caller should be smart
1481 enough not to use it. */
1482 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1484 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1491 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1492 for (i
= 0; i
< nregs
; i
++)
1494 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1495 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1496 GEN_INT (i
* UNITS_PER_WORD
));
1497 XVECEXP (par
, 0, i
) = tmp
;
1499 pcum
->aapcs_reg
= par
;
1502 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1507 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1509 /* The argument is passed on stack; record the needed number of words for
1510 this argument (we can re-use NREGS) and align the total size if
1513 pcum
->aapcs_stack_words
= nregs
;
1514 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1515 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1516 16 / UNITS_PER_WORD
) + 1;
1520 /* Implement TARGET_FUNCTION_ARG. */
1523 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1524 const_tree type
, bool named
)
1526 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1527 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1529 if (mode
== VOIDmode
)
1532 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1533 return pcum
->aapcs_reg
;
1537 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1538 const_tree fntype ATTRIBUTE_UNUSED
,
1539 rtx libname ATTRIBUTE_UNUSED
,
1540 const_tree fndecl ATTRIBUTE_UNUSED
,
1541 unsigned n_named ATTRIBUTE_UNUSED
)
1543 pcum
->aapcs_ncrn
= 0;
1544 pcum
->aapcs_nvrn
= 0;
1545 pcum
->aapcs_nextncrn
= 0;
1546 pcum
->aapcs_nextnvrn
= 0;
1547 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1548 pcum
->aapcs_reg
= NULL_RTX
;
1549 pcum
->aapcs_arg_processed
= false;
1550 pcum
->aapcs_stack_words
= 0;
1551 pcum
->aapcs_stack_size
= 0;
1557 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1558 enum machine_mode mode
,
1562 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1563 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1565 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1566 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1567 != (pcum
->aapcs_stack_words
!= 0));
1568 pcum
->aapcs_arg_processed
= false;
1569 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1570 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1571 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1572 pcum
->aapcs_stack_words
= 0;
1573 pcum
->aapcs_reg
= NULL_RTX
;
1578 aarch64_function_arg_regno_p (unsigned regno
)
1580 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1581 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1584 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1585 PARM_BOUNDARY bits of alignment, but will be given anything up
1586 to STACK_BOUNDARY bits if the type requires it. This makes sure
1587 that both before and after the layout of each argument, the Next
1588 Stacked Argument Address (NSAA) will have a minimum alignment of
1592 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1594 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1596 if (alignment
< PARM_BOUNDARY
)
1597 alignment
= PARM_BOUNDARY
;
1598 if (alignment
> STACK_BOUNDARY
)
1599 alignment
= STACK_BOUNDARY
;
1603 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1605 Return true if an argument passed on the stack should be padded upwards,
1606 i.e. if the least-significant byte of the stack slot has useful data.
1608 Small aggregate types are placed in the lowest memory address.
1610 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1613 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1615 /* On little-endian targets, the least significant byte of every stack
1616 argument is passed at the lowest byte address of the stack slot. */
1617 if (!BYTES_BIG_ENDIAN
)
1620 /* Otherwise, integral, floating-point and pointer types are padded downward:
1621 the least significant byte of a stack argument is passed at the highest
1622 byte address of the stack slot. */
1624 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1625 || POINTER_TYPE_P (type
))
1626 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1629 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1633 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1635 It specifies padding for the last (may also be the only)
1636 element of a block move between registers and memory. If
1637 assuming the block is in the memory, padding upward means that
1638 the last element is padded after its highest significant byte,
1639 while in downward padding, the last element is padded at the
1640 its least significant byte side.
1642 Small aggregates and small complex types are always padded
1645 We don't need to worry about homogeneous floating-point or
1646 short-vector aggregates; their move is not affected by the
1647 padding direction determined here. Regardless of endianness,
1648 each element of such an aggregate is put in the least
1649 significant bits of a fp/simd register.
1651 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1652 register has useful data, and return the opposite if the most
1653 significant byte does. */
1656 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1657 bool first ATTRIBUTE_UNUSED
)
1660 /* Small composite types are always padded upward. */
1661 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1663 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1664 : GET_MODE_SIZE (mode
));
1665 if (size
< 2 * UNITS_PER_WORD
)
1669 /* Otherwise, use the default padding. */
1670 return !BYTES_BIG_ENDIAN
;
1673 static enum machine_mode
1674 aarch64_libgcc_cmp_return_mode (void)
1680 aarch64_frame_pointer_required (void)
1682 /* If the function contains dynamic stack allocations, we need to
1683 use the frame pointer to access the static parts of the frame. */
1684 if (cfun
->calls_alloca
)
1687 /* We may have turned flag_omit_frame_pointer on in order to have this
1688 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1689 and we'll check it here.
1690 If we really did set flag_omit_frame_pointer normally, then we return false
1691 (no frame pointer required) in all cases. */
1693 if (flag_omit_frame_pointer
&& !faked_omit_frame_pointer
)
1695 else if (flag_omit_leaf_frame_pointer
)
1696 return !crtl
->is_leaf
;
1700 /* Mark the registers that need to be saved by the callee and calculate
1701 the size of the callee-saved registers area and frame record (both FP
1702 and LR may be omitted). */
1704 aarch64_layout_frame (void)
1706 HOST_WIDE_INT offset
= 0;
1709 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1712 cfun
->machine
->frame
.fp_lr_offset
= 0;
1714 /* First mark all the registers that really need to be saved... */
1715 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1716 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1718 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1719 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1721 /* ... that includes the eh data registers (if needed)... */
1722 if (crtl
->calls_eh_return
)
1723 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1724 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1726 /* ... and any callee saved register that dataflow says is live. */
1727 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1728 if (df_regs_ever_live_p (regno
)
1729 && !call_used_regs
[regno
])
1730 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1732 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1733 if (df_regs_ever_live_p (regno
)
1734 && !call_used_regs
[regno
])
1735 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1737 if (frame_pointer_needed
)
1739 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1740 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1741 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1744 /* Now assign stack slots for them. */
1745 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1746 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1748 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1749 offset
+= UNITS_PER_WORD
;
1752 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1753 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1755 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1756 offset
+= UNITS_PER_WORD
;
1759 if (frame_pointer_needed
)
1761 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1762 offset
+= UNITS_PER_WORD
;
1763 cfun
->machine
->frame
.fp_lr_offset
= UNITS_PER_WORD
;
1766 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1768 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1769 offset
+= UNITS_PER_WORD
;
1770 cfun
->machine
->frame
.fp_lr_offset
+= UNITS_PER_WORD
;
1773 cfun
->machine
->frame
.padding0
=
1774 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1775 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1777 cfun
->machine
->frame
.saved_regs_size
= offset
;
1778 cfun
->machine
->frame
.laid_out
= true;
1781 /* Make the last instruction frame-related and note that it performs
1782 the operation described by FRAME_PATTERN. */
1785 aarch64_set_frame_expr (rtx frame_pattern
)
1789 insn
= get_last_insn ();
1790 RTX_FRAME_RELATED_P (insn
) = 1;
1791 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1792 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1798 aarch64_register_saved_on_entry (int regno
)
1800 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1805 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1806 bool restore
, rtx base_rtx
)
1812 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1815 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1817 if (aarch64_register_saved_on_entry (regno
))
1820 mem
= gen_mem_ref (DFmode
,
1821 plus_constant (Pmode
,
1825 for (regno2
= regno
+ 1;
1826 regno2
<= V31_REGNUM
1827 && !aarch64_register_saved_on_entry (regno2
);
1832 if (regno2
<= V31_REGNUM
&&
1833 aarch64_register_saved_on_entry (regno2
))
1836 /* Next highest register to be saved. */
1837 mem2
= gen_mem_ref (DFmode
,
1841 start_offset
+ increment
));
1842 if (restore
== false)
1845 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1846 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1852 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1853 gen_rtx_REG (DFmode
, regno2
), mem2
));
1855 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DFmode
, regno
));
1856 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DFmode
, regno2
));
1859 /* The first part of a frame-related parallel insn
1860 is always assumed to be relevant to the frame
1861 calculations; subsequent parts, are only
1862 frame-related if explicitly marked. */
1863 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1866 start_offset
+= increment
* 2;
1870 if (restore
== false)
1871 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1874 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1875 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1877 start_offset
+= increment
;
1879 RTX_FRAME_RELATED_P (insn
) = 1;
1886 /* offset from the stack pointer of where the saves and
1887 restore's have to happen. */
1889 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
1893 rtx base_rtx
= stack_pointer_rtx
;
1894 HOST_WIDE_INT start_offset
= offset
;
1895 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
1896 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1897 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
1901 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
1903 if (aarch64_register_saved_on_entry (regno
))
1906 mem
= gen_mem_ref (Pmode
,
1907 plus_constant (Pmode
,
1911 for (regno2
= regno
+ 1;
1913 && !aarch64_register_saved_on_entry (regno2
);
1918 if (regno2
<= limit
&&
1919 aarch64_register_saved_on_entry (regno2
))
1922 /* Next highest register to be saved. */
1923 mem2
= gen_mem_ref (Pmode
,
1927 start_offset
+ increment
));
1928 if (restore
== false)
1931 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
1932 mem2
, gen_rtx_REG (DImode
, regno2
)));
1938 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
1939 gen_rtx_REG (DImode
, regno2
), mem2
));
1941 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1942 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
1945 /* The first part of a frame-related parallel insn
1946 is always assumed to be relevant to the frame
1947 calculations; subsequent parts, are only
1948 frame-related if explicitly marked. */
1949 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1952 start_offset
+= increment
* 2;
1956 if (restore
== false)
1957 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
1960 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
1961 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1963 start_offset
+= increment
;
1965 RTX_FRAME_RELATED_P (insn
) = 1;
1969 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
1973 /* AArch64 stack frames generated by this compiler look like:
1975 +-------------------------------+
1977 | incoming stack arguments |
1979 +-------------------------------+ <-- arg_pointer_rtx
1981 | callee-allocated save area |
1982 | for register varargs |
1984 +-------------------------------+
1988 +-------------------------------+ <-- frame_pointer_rtx
1990 | callee-saved registers |
1992 +-------------------------------+
1994 +-------------------------------+
1996 P +-------------------------------+ <-- hard_frame_pointer_rtx
1997 | dynamic allocation |
1998 +-------------------------------+
2000 | outgoing stack arguments |
2002 +-------------------------------+ <-- stack_pointer_rtx
2004 Dynamic stack allocations such as alloca insert data at point P.
2005 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2006 hard_frame_pointer_rtx unchanged. */
2008 /* Generate the prologue instructions for entry into a function.
2009 Establish the stack frame by decreasing the stack pointer with a
2010 properly calculated size and, if necessary, create a frame record
2011 filled with the values of LR and previous frame pointer. The
2012 current FP is also set up is it is in use. */
2015 aarch64_expand_prologue (void)
2017 /* sub sp, sp, #<frame_size>
2018 stp {fp, lr}, [sp, #<frame_size> - 16]
2019 add fp, sp, #<frame_size> - hardfp_offset
2020 stp {cs_reg}, [fp, #-16] etc.
2022 sub sp, sp, <final_adjustment_if_any>
2024 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
2025 HOST_WIDE_INT frame_size
, offset
;
2026 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
2029 aarch64_layout_frame ();
2030 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2031 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
2032 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
2033 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2034 + crtl
->outgoing_args_size
);
2035 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2036 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2038 if (flag_stack_usage_info
)
2039 current_function_static_stack_size
= frame_size
;
2042 - original_frame_size
2043 - cfun
->machine
->frame
.saved_regs_size
);
2045 /* Store pairs and load pairs have a range only -512 to 504. */
2048 /* When the frame has a large size, an initial decrease is done on
2049 the stack pointer to jump over the callee-allocated save area for
2050 register varargs, the local variable area and/or the callee-saved
2051 register area. This will allow the pre-index write-back
2052 store pair instructions to be used for setting up the stack frame
2054 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2056 offset
= cfun
->machine
->frame
.saved_regs_size
;
2058 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2061 if (frame_size
>= 0x1000000)
2063 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2064 emit_move_insn (op0
, GEN_INT (-frame_size
));
2065 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2066 aarch64_set_frame_expr (gen_rtx_SET
2067 (Pmode
, stack_pointer_rtx
,
2068 plus_constant (Pmode
,
2072 else if (frame_size
> 0)
2074 if ((frame_size
& 0xfff) != frame_size
)
2076 insn
= emit_insn (gen_add2_insn
2078 GEN_INT (-(frame_size
2079 & ~(HOST_WIDE_INT
)0xfff))));
2080 RTX_FRAME_RELATED_P (insn
) = 1;
2082 if ((frame_size
& 0xfff) != 0)
2084 insn
= emit_insn (gen_add2_insn
2086 GEN_INT (-(frame_size
2087 & (HOST_WIDE_INT
)0xfff))));
2088 RTX_FRAME_RELATED_P (insn
) = 1;
2097 /* Save the frame pointer and lr if the frame pointer is needed
2098 first. Make the frame pointer point to the location of the
2099 old frame pointer on the stack. */
2100 if (frame_pointer_needed
)
2106 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2107 GEN_INT (-offset
)));
2108 RTX_FRAME_RELATED_P (insn
) = 1;
2109 aarch64_set_frame_expr (gen_rtx_SET
2110 (Pmode
, stack_pointer_rtx
,
2111 gen_rtx_MINUS (Pmode
,
2113 GEN_INT (offset
))));
2114 mem_fp
= gen_frame_mem (DImode
,
2115 plus_constant (Pmode
,
2118 mem_lr
= gen_frame_mem (DImode
,
2119 plus_constant (Pmode
,
2123 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2124 hard_frame_pointer_rtx
,
2126 gen_rtx_REG (DImode
,
2131 insn
= emit_insn (gen_storewb_pairdi_di
2132 (stack_pointer_rtx
, stack_pointer_rtx
,
2133 hard_frame_pointer_rtx
,
2134 gen_rtx_REG (DImode
, LR_REGNUM
),
2136 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2137 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2140 /* The first part of a frame-related parallel insn is always
2141 assumed to be relevant to the frame calculations;
2142 subsequent parts, are only frame-related if explicitly
2144 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2145 RTX_FRAME_RELATED_P (insn
) = 1;
2147 /* Set up frame pointer to point to the location of the
2148 previous frame pointer on the stack. */
2149 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2151 GEN_INT (fp_offset
)));
2152 aarch64_set_frame_expr (gen_rtx_SET
2153 (Pmode
, hard_frame_pointer_rtx
,
2154 plus_constant (Pmode
,
2157 RTX_FRAME_RELATED_P (insn
) = 1;
2158 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2159 hard_frame_pointer_rtx
));
2163 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2164 GEN_INT (-offset
)));
2165 RTX_FRAME_RELATED_P (insn
) = 1;
2168 aarch64_save_or_restore_callee_save_registers
2169 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2172 /* when offset >= 512,
2173 sub sp, sp, #<outgoing_args_size> */
2174 if (frame_size
> -1)
2176 if (crtl
->outgoing_args_size
> 0)
2178 insn
= emit_insn (gen_add2_insn
2180 GEN_INT (- crtl
->outgoing_args_size
)));
2181 RTX_FRAME_RELATED_P (insn
) = 1;
2186 /* Generate the epilogue instructions for returning from a function. */
2188 aarch64_expand_epilogue (bool for_sibcall
)
2190 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2191 HOST_WIDE_INT fp_offset
;
2195 aarch64_layout_frame ();
2196 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2197 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2198 + crtl
->outgoing_args_size
);
2199 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2200 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2203 - original_frame_size
2204 - cfun
->machine
->frame
.saved_regs_size
);
2206 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2208 /* Store pairs and load pairs have a range only -512 to 504. */
2211 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2213 offset
= cfun
->machine
->frame
.saved_regs_size
;
2215 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2217 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2219 insn
= emit_insn (gen_add2_insn
2221 GEN_INT (crtl
->outgoing_args_size
)));
2222 RTX_FRAME_RELATED_P (insn
) = 1;
2228 /* If there were outgoing arguments or we've done dynamic stack
2229 allocation, then restore the stack pointer from the frame
2230 pointer. This is at most one insn and more efficient than using
2231 GCC's internal mechanism. */
2232 if (frame_pointer_needed
2233 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2235 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2236 hard_frame_pointer_rtx
,
2237 GEN_INT (- fp_offset
)));
2238 RTX_FRAME_RELATED_P (insn
) = 1;
2239 /* As SP is set to (FP - fp_offset), according to the rules in
2240 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2241 from the value of SP from now on. */
2242 cfa_reg
= stack_pointer_rtx
;
2245 aarch64_save_or_restore_callee_save_registers
2246 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2248 /* Restore the frame pointer and lr if the frame pointer is needed. */
2251 if (frame_pointer_needed
)
2257 mem_fp
= gen_frame_mem (DImode
,
2258 plus_constant (Pmode
,
2261 mem_lr
= gen_frame_mem (DImode
,
2262 plus_constant (Pmode
,
2266 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2268 gen_rtx_REG (DImode
,
2274 insn
= emit_insn (gen_loadwb_pairdi_di
2277 hard_frame_pointer_rtx
,
2278 gen_rtx_REG (DImode
, LR_REGNUM
),
2280 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2281 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2282 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2283 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2284 plus_constant (Pmode
, cfa_reg
,
2288 /* The first part of a frame-related parallel insn
2289 is always assumed to be relevant to the frame
2290 calculations; subsequent parts, are only
2291 frame-related if explicitly marked. */
2292 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2293 RTX_FRAME_RELATED_P (insn
) = 1;
2294 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2295 add_reg_note (insn
, REG_CFA_RESTORE
,
2296 gen_rtx_REG (DImode
, LR_REGNUM
));
2300 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2302 RTX_FRAME_RELATED_P (insn
) = 1;
2307 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2309 RTX_FRAME_RELATED_P (insn
) = 1;
2313 /* Stack adjustment for exception handler. */
2314 if (crtl
->calls_eh_return
)
2316 /* We need to unwind the stack by the offset computed by
2317 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2318 based on SP. Ideally we would update the SP and define the
2319 CFA along the lines of:
2321 SP = SP + EH_RETURN_STACKADJ_RTX
2322 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2324 However the dwarf emitter only understands a constant
2327 The solution chosen here is to use the otherwise unused IP0
2328 as a temporary register to hold the current SP value. The
2329 CFA is described using IP0 then SP is modified. */
2331 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2333 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2334 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2335 RTX_FRAME_RELATED_P (insn
) = 1;
2337 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2339 /* Ensure the assignment to IP0 does not get optimized away. */
2343 if (frame_size
> -1)
2345 if (frame_size
>= 0x1000000)
2347 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2348 emit_move_insn (op0
, GEN_INT (frame_size
));
2349 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2350 aarch64_set_frame_expr (gen_rtx_SET
2351 (Pmode
, stack_pointer_rtx
,
2352 plus_constant (Pmode
,
2356 else if (frame_size
> 0)
2358 if ((frame_size
& 0xfff) != 0)
2360 insn
= emit_insn (gen_add2_insn
2362 GEN_INT ((frame_size
2363 & (HOST_WIDE_INT
) 0xfff))));
2364 RTX_FRAME_RELATED_P (insn
) = 1;
2366 if ((frame_size
& 0xfff) != frame_size
)
2368 insn
= emit_insn (gen_add2_insn
2370 GEN_INT ((frame_size
2371 & ~ (HOST_WIDE_INT
) 0xfff))));
2372 RTX_FRAME_RELATED_P (insn
) = 1;
2376 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2377 plus_constant (Pmode
,
2382 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2384 emit_jump_insn (ret_rtx
);
2387 /* Return the place to copy the exception unwinding return address to.
2388 This will probably be a stack slot, but could (in theory be the
2389 return register). */
2391 aarch64_final_eh_return_addr (void)
2393 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2394 aarch64_layout_frame ();
2395 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2396 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2397 + crtl
->outgoing_args_size
);
2398 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2399 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2401 - original_frame_size
2402 - cfun
->machine
->frame
.saved_regs_size
;
2404 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2405 return gen_rtx_REG (DImode
, LR_REGNUM
);
2407 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2408 result in a store to save LR introduced by builtin_eh_return () being
2409 incorrectly deleted because the alias is not detected.
2410 So in the calculation of the address to copy the exception unwinding
2411 return address to, we note 2 cases.
2412 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2413 we return a SP-relative location since all the addresses are SP-relative
2414 in this case. This prevents the store from being optimized away.
2415 If the fp_offset is not 0, then the addresses will be FP-relative and
2416 therefore we return a FP-relative location. */
2418 if (frame_pointer_needed
)
2421 return gen_frame_mem (DImode
,
2422 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2424 return gen_frame_mem (DImode
,
2425 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2428 /* If FP is not needed, we calculate the location of LR, which would be
2429 at the top of the saved registers block. */
2431 return gen_frame_mem (DImode
,
2432 plus_constant (Pmode
,
2435 + cfun
->machine
->frame
.saved_regs_size
2436 - 2 * UNITS_PER_WORD
));
2439 /* Output code to build up a constant in a register. */
2441 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
)
2443 if (aarch64_bitmask_imm (val
, DImode
))
2444 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2450 HOST_WIDE_INT valp
= val
>> 16;
2454 for (i
= 16; i
< 64; i
+= 16)
2456 valm
= (valp
& 0xffff);
2467 /* zcount contains the number of additional MOVK instructions
2468 required if the constant is built up with an initial MOVZ instruction,
2469 while ncount is the number of MOVK instructions required if starting
2470 with a MOVN instruction. Choose the sequence that yields the fewest
2471 number of instructions, preferring MOVZ instructions when they are both
2473 if (ncount
< zcount
)
2475 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2476 GEN_INT ((~val
) & 0xffff));
2481 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2482 GEN_INT (val
& 0xffff));
2488 for (i
= 16; i
< 64; i
+= 16)
2490 if ((val
& 0xffff) != tval
)
2491 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2492 GEN_INT (i
), GEN_INT (val
& 0xffff)));
2499 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2501 HOST_WIDE_INT mdelta
= delta
;
2502 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2503 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2508 if (mdelta
>= 4096 * 4096)
2510 aarch64_build_constant (scratchreg
, delta
);
2511 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2513 else if (mdelta
> 0)
2517 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2518 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2520 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2521 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2523 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2524 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2526 if (mdelta
% 4096 != 0)
2528 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2529 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2530 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2535 /* Output code to add DELTA to the first argument, and then jump
2536 to FUNCTION. Used for C++ multiple inheritance. */
2538 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2539 HOST_WIDE_INT delta
,
2540 HOST_WIDE_INT vcall_offset
,
2543 /* The this pointer is always in x0. Note that this differs from
2544 Arm where the this pointer maybe bumped to r1 if r0 is required
2545 to return a pointer to an aggregate. On AArch64 a result value
2546 pointer will be in x8. */
2547 int this_regno
= R0_REGNUM
;
2548 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2550 reload_completed
= 1;
2551 emit_note (NOTE_INSN_PROLOGUE_END
);
2553 if (vcall_offset
== 0)
2554 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2557 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2559 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2560 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2561 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2566 if (delta
>= -256 && delta
< 256)
2567 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2568 plus_constant (Pmode
, this_rtx
, delta
));
2570 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2573 if (Pmode
== ptr_mode
)
2574 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2576 aarch64_emit_move (temp0
,
2577 gen_rtx_ZERO_EXTEND (Pmode
,
2578 gen_rtx_MEM (ptr_mode
, addr
)));
2580 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2581 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2584 aarch64_build_constant (IP1_REGNUM
, vcall_offset
);
2585 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2588 if (Pmode
== ptr_mode
)
2589 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2591 aarch64_emit_move (temp1
,
2592 gen_rtx_SIGN_EXTEND (Pmode
,
2593 gen_rtx_MEM (ptr_mode
, addr
)));
2595 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2598 /* Generate a tail call to the target function. */
2599 if (!TREE_USED (function
))
2601 assemble_external (function
);
2602 TREE_USED (function
) = 1;
2604 funexp
= XEXP (DECL_RTL (function
), 0);
2605 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2606 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2607 SIBLING_CALL_P (insn
) = 1;
2609 insn
= get_insns ();
2610 shorten_branches (insn
);
2611 final_start_function (insn
, file
, 1);
2612 final (insn
, file
, 1);
2613 final_end_function ();
2615 /* Stop pretending to be a post-reload pass. */
2616 reload_completed
= 0;
2620 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2622 if (GET_CODE (*x
) == SYMBOL_REF
)
2623 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2625 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2626 TLS offsets, not real symbol references. */
2627 if (GET_CODE (*x
) == UNSPEC
2628 && XINT (*x
, 1) == UNSPEC_TLS
)
2635 aarch64_tls_referenced_p (rtx x
)
2637 if (!TARGET_HAVE_TLS
)
2640 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2645 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2647 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2648 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2659 aarch64_build_bitmask_table (void)
2661 unsigned HOST_WIDE_INT mask
, imm
;
2662 unsigned int log_e
, e
, s
, r
;
2663 unsigned int nimms
= 0;
2665 for (log_e
= 1; log_e
<= 6; log_e
++)
2669 mask
= ~(HOST_WIDE_INT
) 0;
2671 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2672 for (s
= 1; s
< e
; s
++)
2674 for (r
= 0; r
< e
; r
++)
2676 /* set s consecutive bits to 1 (s < 64) */
2677 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2678 /* rotate right by r */
2680 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2681 /* replicate the constant depending on SIMD size */
2683 case 1: imm
|= (imm
<< 2);
2684 case 2: imm
|= (imm
<< 4);
2685 case 3: imm
|= (imm
<< 8);
2686 case 4: imm
|= (imm
<< 16);
2687 case 5: imm
|= (imm
<< 32);
2693 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2694 aarch64_bitmasks
[nimms
++] = imm
;
2699 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2700 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2701 aarch64_bitmasks_cmp
);
2705 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2706 a left shift of 0 or 12 bits. */
2708 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2710 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2711 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2716 /* Return true if val is an immediate that can be loaded into a
2717 register by a MOVZ instruction. */
2719 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2721 if (GET_MODE_SIZE (mode
) > 4)
2723 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2724 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2729 /* Ignore sign extension. */
2730 val
&= (HOST_WIDE_INT
) 0xffffffff;
2732 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2733 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2737 /* Return true if val is a valid bitmask immediate. */
2739 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2741 if (GET_MODE_SIZE (mode
) < 8)
2743 /* Replicate bit pattern. */
2744 val
&= (HOST_WIDE_INT
) 0xffffffff;
2747 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2748 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2752 /* Return true if val is an immediate that can be loaded into a
2753 register in a single instruction. */
2755 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2757 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2759 return aarch64_bitmask_imm (val
, mode
);
2763 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2767 if (GET_CODE (x
) == HIGH
)
2770 split_const (x
, &base
, &offset
);
2771 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2773 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2774 != SYMBOL_FORCE_TO_MEM
)
2777 /* Avoid generating a 64-bit relocation in ILP32; leave
2778 to aarch64_expand_mov_immediate to handle it properly. */
2779 return mode
!= ptr_mode
;
2782 return aarch64_tls_referenced_p (x
);
2785 /* Return true if register REGNO is a valid index register.
2786 STRICT_P is true if REG_OK_STRICT is in effect. */
2789 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2791 if (!HARD_REGISTER_NUM_P (regno
))
2799 regno
= reg_renumber
[regno
];
2801 return GP_REGNUM_P (regno
);
2804 /* Return true if register REGNO is a valid base register for mode MODE.
2805 STRICT_P is true if REG_OK_STRICT is in effect. */
2808 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2810 if (!HARD_REGISTER_NUM_P (regno
))
2818 regno
= reg_renumber
[regno
];
2821 /* The fake registers will be eliminated to either the stack or
2822 hard frame pointer, both of which are usually valid base registers.
2823 Reload deals with the cases where the eliminated form isn't valid. */
2824 return (GP_REGNUM_P (regno
)
2825 || regno
== SP_REGNUM
2826 || regno
== FRAME_POINTER_REGNUM
2827 || regno
== ARG_POINTER_REGNUM
);
2830 /* Return true if X is a valid base register for mode MODE.
2831 STRICT_P is true if REG_OK_STRICT is in effect. */
2834 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2836 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2839 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2842 /* Return true if address offset is a valid index. If it is, fill in INFO
2843 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2846 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2847 enum machine_mode mode
, bool strict_p
)
2849 enum aarch64_address_type type
;
2854 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2855 && GET_MODE (x
) == Pmode
)
2857 type
= ADDRESS_REG_REG
;
2861 /* (sign_extend:DI (reg:SI)) */
2862 else if ((GET_CODE (x
) == SIGN_EXTEND
2863 || GET_CODE (x
) == ZERO_EXTEND
)
2864 && GET_MODE (x
) == DImode
2865 && GET_MODE (XEXP (x
, 0)) == SImode
)
2867 type
= (GET_CODE (x
) == SIGN_EXTEND
)
2868 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2869 index
= XEXP (x
, 0);
2872 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2873 else if (GET_CODE (x
) == MULT
2874 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2875 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2876 && GET_MODE (XEXP (x
, 0)) == DImode
2877 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2878 && CONST_INT_P (XEXP (x
, 1)))
2880 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2881 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2882 index
= XEXP (XEXP (x
, 0), 0);
2883 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2885 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2886 else if (GET_CODE (x
) == ASHIFT
2887 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2888 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2889 && GET_MODE (XEXP (x
, 0)) == DImode
2890 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2891 && CONST_INT_P (XEXP (x
, 1)))
2893 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2894 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2895 index
= XEXP (XEXP (x
, 0), 0);
2896 shift
= INTVAL (XEXP (x
, 1));
2898 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2899 else if ((GET_CODE (x
) == SIGN_EXTRACT
2900 || GET_CODE (x
) == ZERO_EXTRACT
)
2901 && GET_MODE (x
) == DImode
2902 && GET_CODE (XEXP (x
, 0)) == MULT
2903 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2904 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2906 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2907 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2908 index
= XEXP (XEXP (x
, 0), 0);
2909 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2910 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2911 || INTVAL (XEXP (x
, 2)) != 0)
2914 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2915 (const_int 0xffffffff<<shift)) */
2916 else if (GET_CODE (x
) == AND
2917 && GET_MODE (x
) == DImode
2918 && GET_CODE (XEXP (x
, 0)) == MULT
2919 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2920 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2921 && CONST_INT_P (XEXP (x
, 1)))
2923 type
= ADDRESS_REG_UXTW
;
2924 index
= XEXP (XEXP (x
, 0), 0);
2925 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2926 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2929 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2930 else if ((GET_CODE (x
) == SIGN_EXTRACT
2931 || GET_CODE (x
) == ZERO_EXTRACT
)
2932 && GET_MODE (x
) == DImode
2933 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2934 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2935 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2937 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2938 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2939 index
= XEXP (XEXP (x
, 0), 0);
2940 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2941 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2942 || INTVAL (XEXP (x
, 2)) != 0)
2945 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2946 (const_int 0xffffffff<<shift)) */
2947 else if (GET_CODE (x
) == AND
2948 && GET_MODE (x
) == DImode
2949 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2950 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2951 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2952 && CONST_INT_P (XEXP (x
, 1)))
2954 type
= ADDRESS_REG_UXTW
;
2955 index
= XEXP (XEXP (x
, 0), 0);
2956 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2957 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2960 /* (mult:P (reg:P) (const_int scale)) */
2961 else if (GET_CODE (x
) == MULT
2962 && GET_MODE (x
) == Pmode
2963 && GET_MODE (XEXP (x
, 0)) == Pmode
2964 && CONST_INT_P (XEXP (x
, 1)))
2966 type
= ADDRESS_REG_REG
;
2967 index
= XEXP (x
, 0);
2968 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2970 /* (ashift:P (reg:P) (const_int shift)) */
2971 else if (GET_CODE (x
) == ASHIFT
2972 && GET_MODE (x
) == Pmode
2973 && GET_MODE (XEXP (x
, 0)) == Pmode
2974 && CONST_INT_P (XEXP (x
, 1)))
2976 type
= ADDRESS_REG_REG
;
2977 index
= XEXP (x
, 0);
2978 shift
= INTVAL (XEXP (x
, 1));
2983 if (GET_CODE (index
) == SUBREG
)
2984 index
= SUBREG_REG (index
);
2987 (shift
> 0 && shift
<= 3
2988 && (1 << shift
) == GET_MODE_SIZE (mode
)))
2990 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
2993 info
->offset
= index
;
2994 info
->shift
= shift
;
3002 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3004 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3005 && offset
< 64 * GET_MODE_SIZE (mode
)
3006 && offset
% GET_MODE_SIZE (mode
) == 0);
3010 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3011 HOST_WIDE_INT offset
)
3013 return offset
>= -256 && offset
< 256;
3017 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3020 && offset
< 4096 * GET_MODE_SIZE (mode
)
3021 && offset
% GET_MODE_SIZE (mode
) == 0);
3024 /* Return true if X is a valid address for machine mode MODE. If it is,
3025 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3026 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3029 aarch64_classify_address (struct aarch64_address_info
*info
,
3030 rtx x
, enum machine_mode mode
,
3031 RTX_CODE outer_code
, bool strict_p
)
3033 enum rtx_code code
= GET_CODE (x
);
3035 bool allow_reg_index_p
=
3036 outer_code
!= PARALLEL
&& GET_MODE_SIZE(mode
) != 16;
3038 /* Don't support anything other than POST_INC or REG addressing for
3040 if (aarch64_vector_mode_p (mode
)
3041 && (code
!= POST_INC
&& code
!= REG
))
3048 info
->type
= ADDRESS_REG_IMM
;
3050 info
->offset
= const0_rtx
;
3051 return aarch64_base_register_rtx_p (x
, strict_p
);
3056 if (GET_MODE_SIZE (mode
) != 0
3057 && CONST_INT_P (op1
)
3058 && aarch64_base_register_rtx_p (op0
, strict_p
))
3060 HOST_WIDE_INT offset
= INTVAL (op1
);
3062 info
->type
= ADDRESS_REG_IMM
;
3066 /* TImode and TFmode values are allowed in both pairs of X
3067 registers and individual Q registers. The available
3069 X,X: 7-bit signed scaled offset
3070 Q: 9-bit signed offset
3071 We conservatively require an offset representable in either mode.
3073 if (mode
== TImode
|| mode
== TFmode
)
3074 return (offset_7bit_signed_scaled_p (mode
, offset
)
3075 && offset_9bit_signed_unscaled_p (mode
, offset
));
3077 if (outer_code
== PARALLEL
)
3078 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3079 && offset_7bit_signed_scaled_p (mode
, offset
));
3081 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3082 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3085 if (allow_reg_index_p
)
3087 /* Look for base + (scaled/extended) index register. */
3088 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3089 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3094 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3095 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3108 info
->type
= ADDRESS_REG_WB
;
3109 info
->base
= XEXP (x
, 0);
3110 info
->offset
= NULL_RTX
;
3111 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3115 info
->type
= ADDRESS_REG_WB
;
3116 info
->base
= XEXP (x
, 0);
3117 if (GET_CODE (XEXP (x
, 1)) == PLUS
3118 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3119 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3120 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3122 HOST_WIDE_INT offset
;
3123 info
->offset
= XEXP (XEXP (x
, 1), 1);
3124 offset
= INTVAL (info
->offset
);
3126 /* TImode and TFmode values are allowed in both pairs of X
3127 registers and individual Q registers. The available
3129 X,X: 7-bit signed scaled offset
3130 Q: 9-bit signed offset
3131 We conservatively require an offset representable in either mode.
3133 if (mode
== TImode
|| mode
== TFmode
)
3134 return (offset_7bit_signed_scaled_p (mode
, offset
)
3135 && offset_9bit_signed_unscaled_p (mode
, offset
));
3137 if (outer_code
== PARALLEL
)
3138 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3139 && offset_7bit_signed_scaled_p (mode
, offset
));
3141 return offset_9bit_signed_unscaled_p (mode
, offset
);
3148 /* load literal: pc-relative constant pool entry. Only supported
3149 for SI mode or larger. */
3150 info
->type
= ADDRESS_SYMBOLIC
;
3151 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3155 split_const (x
, &sym
, &addend
);
3156 return (GET_CODE (sym
) == LABEL_REF
3157 || (GET_CODE (sym
) == SYMBOL_REF
3158 && CONSTANT_POOL_ADDRESS_P (sym
)));
3163 info
->type
= ADDRESS_LO_SUM
;
3164 info
->base
= XEXP (x
, 0);
3165 info
->offset
= XEXP (x
, 1);
3166 if (allow_reg_index_p
3167 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3170 split_const (info
->offset
, &sym
, &offs
);
3171 if (GET_CODE (sym
) == SYMBOL_REF
3172 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3173 == SYMBOL_SMALL_ABSOLUTE
))
3175 /* The symbol and offset must be aligned to the access size. */
3177 unsigned int ref_size
;
3179 if (CONSTANT_POOL_ADDRESS_P (sym
))
3180 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3181 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3183 tree exp
= SYMBOL_REF_DECL (sym
);
3184 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3185 align
= CONSTANT_ALIGNMENT (exp
, align
);
3187 else if (SYMBOL_REF_DECL (sym
))
3188 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3190 align
= BITS_PER_UNIT
;
3192 ref_size
= GET_MODE_SIZE (mode
);
3194 ref_size
= GET_MODE_SIZE (DImode
);
3196 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3197 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3208 aarch64_symbolic_address_p (rtx x
)
3212 split_const (x
, &x
, &offset
);
3213 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3216 /* Classify the base of symbolic expression X, given that X appears in
3219 enum aarch64_symbol_type
3220 aarch64_classify_symbolic_expression (rtx x
,
3221 enum aarch64_symbol_context context
)
3225 split_const (x
, &x
, &offset
);
3226 return aarch64_classify_symbol (x
, context
);
3230 /* Return TRUE if X is a legitimate address for accessing memory in
3233 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3235 struct aarch64_address_info addr
;
3237 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3240 /* Return TRUE if X is a legitimate address for accessing memory in
3241 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3244 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3245 RTX_CODE outer_code
, bool strict_p
)
3247 struct aarch64_address_info addr
;
3249 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3252 /* Return TRUE if rtx X is immediate constant 0.0 */
3254 aarch64_float_const_zero_rtx_p (rtx x
)
3258 if (GET_MODE (x
) == VOIDmode
)
3261 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3262 if (REAL_VALUE_MINUS_ZERO (r
))
3263 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3264 return REAL_VALUES_EQUAL (r
, dconst0
);
3267 /* Return the fixed registers used for condition codes. */
3270 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3273 *p2
= INVALID_REGNUM
;
3278 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3280 /* All floating point compares return CCFP if it is an equality
3281 comparison, and CCFPE otherwise. */
3282 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3309 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3311 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3312 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3313 || GET_CODE (x
) == NEG
))
3316 /* A compare with a shifted or negated operand. Because of canonicalization,
3317 the comparison will have to be swapped when we emit the assembly
3319 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3320 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3321 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3322 || GET_CODE (x
) == LSHIFTRT
3323 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
3324 || GET_CODE (x
) == NEG
))
3327 /* A compare of a mode narrower than SI mode against zero can be done
3328 by extending the value in the comparison. */
3329 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3331 /* Only use sign-extension if we really need it. */
3332 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3333 ? CC_SESWPmode
: CC_ZESWPmode
);
3335 /* For everything else, return CCmode. */
3340 aarch64_get_condition_code (rtx x
)
3342 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3343 enum rtx_code comp_code
= GET_CODE (x
);
3345 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3346 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3354 case GE
: return AARCH64_GE
;
3355 case GT
: return AARCH64_GT
;
3356 case LE
: return AARCH64_LS
;
3357 case LT
: return AARCH64_MI
;
3358 case NE
: return AARCH64_NE
;
3359 case EQ
: return AARCH64_EQ
;
3360 case ORDERED
: return AARCH64_VC
;
3361 case UNORDERED
: return AARCH64_VS
;
3362 case UNLT
: return AARCH64_LT
;
3363 case UNLE
: return AARCH64_LE
;
3364 case UNGT
: return AARCH64_HI
;
3365 case UNGE
: return AARCH64_PL
;
3366 default: gcc_unreachable ();
3373 case NE
: return AARCH64_NE
;
3374 case EQ
: return AARCH64_EQ
;
3375 case GE
: return AARCH64_GE
;
3376 case GT
: return AARCH64_GT
;
3377 case LE
: return AARCH64_LE
;
3378 case LT
: return AARCH64_LT
;
3379 case GEU
: return AARCH64_CS
;
3380 case GTU
: return AARCH64_HI
;
3381 case LEU
: return AARCH64_LS
;
3382 case LTU
: return AARCH64_CC
;
3383 default: gcc_unreachable ();
3392 case NE
: return AARCH64_NE
;
3393 case EQ
: return AARCH64_EQ
;
3394 case GE
: return AARCH64_LE
;
3395 case GT
: return AARCH64_LT
;
3396 case LE
: return AARCH64_GE
;
3397 case LT
: return AARCH64_GT
;
3398 case GEU
: return AARCH64_LS
;
3399 case GTU
: return AARCH64_CC
;
3400 case LEU
: return AARCH64_CS
;
3401 case LTU
: return AARCH64_HI
;
3402 default: gcc_unreachable ();
3409 case NE
: return AARCH64_NE
;
3410 case EQ
: return AARCH64_EQ
;
3411 case GE
: return AARCH64_PL
;
3412 case LT
: return AARCH64_MI
;
3413 default: gcc_unreachable ();
3424 bit_count (unsigned HOST_WIDE_INT value
)
3438 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3443 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3447 if (GET_CODE (x
) != CONST_INT
3448 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3450 output_operand_lossage ("invalid operand for '%%%c'", code
);
3466 output_operand_lossage ("invalid operand for '%%%c'", code
);
3476 /* Print N such that 2^N == X. */
3477 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3479 output_operand_lossage ("invalid operand for '%%%c'", code
);
3483 asm_fprintf (f
, "%d", n
);
3488 /* Print the number of non-zero bits in X (a const_int). */
3489 if (GET_CODE (x
) != CONST_INT
)
3491 output_operand_lossage ("invalid operand for '%%%c'", code
);
3495 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3499 /* Print the higher numbered register of a pair (TImode) of regs. */
3500 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3502 output_operand_lossage ("invalid operand for '%%%c'", code
);
3506 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3510 /* Print a condition (eq, ne, etc). */
3512 /* CONST_TRUE_RTX means always -- that's the default. */
3513 if (x
== const_true_rtx
)
3516 if (!COMPARISON_P (x
))
3518 output_operand_lossage ("invalid operand for '%%%c'", code
);
3522 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3526 /* Print the inverse of a condition (eq <-> ne, etc). */
3528 /* CONST_TRUE_RTX means never -- that's the default. */
3529 if (x
== const_true_rtx
)
3535 if (!COMPARISON_P (x
))
3537 output_operand_lossage ("invalid operand for '%%%c'", code
);
3541 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3542 (aarch64_get_condition_code (x
))], f
);
3550 /* Print a scalar FP/SIMD register name. */
3551 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3553 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3556 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3563 /* Print the first FP/SIMD register name in a list. */
3564 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3566 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3569 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3573 /* Print bottom 16 bits of integer constant in hex. */
3574 if (GET_CODE (x
) != CONST_INT
)
3576 output_operand_lossage ("invalid operand for '%%%c'", code
);
3579 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3584 /* Print a general register name or the zero register (32-bit or
3587 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3589 asm_fprintf (f
, "%czr", code
);
3593 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3595 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3599 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3601 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3608 /* Print a normal operand, if it's a general register, then we
3612 output_operand_lossage ("missing operand");
3616 switch (GET_CODE (x
))
3619 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3623 aarch64_memory_reference_mode
= GET_MODE (x
);
3624 output_address (XEXP (x
, 0));
3629 output_addr_const (asm_out_file
, x
);
3633 asm_fprintf (f
, "%wd", INTVAL (x
));
3637 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3639 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3641 HOST_WIDE_INT_MAX
));
3642 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3644 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3653 /* CONST_DOUBLE can represent a double-width integer.
3654 In this case, the mode of x is VOIDmode. */
3655 if (GET_MODE (x
) == VOIDmode
)
3657 else if (aarch64_float_const_zero_rtx_p (x
))
3662 else if (aarch64_float_const_representable_p (x
))
3665 char float_buf
[buf_size
] = {'\0'};
3667 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3668 real_to_decimal_for_mode (float_buf
, &r
,
3671 asm_fprintf (asm_out_file
, "%s", float_buf
);
3675 output_operand_lossage ("invalid constant");
3678 output_operand_lossage ("invalid operand");
3684 if (GET_CODE (x
) == HIGH
)
3687 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3689 case SYMBOL_SMALL_GOT
:
3690 asm_fprintf (asm_out_file
, ":got:");
3693 case SYMBOL_SMALL_TLSGD
:
3694 asm_fprintf (asm_out_file
, ":tlsgd:");
3697 case SYMBOL_SMALL_TLSDESC
:
3698 asm_fprintf (asm_out_file
, ":tlsdesc:");
3701 case SYMBOL_SMALL_GOTTPREL
:
3702 asm_fprintf (asm_out_file
, ":gottprel:");
3705 case SYMBOL_SMALL_TPREL
:
3706 asm_fprintf (asm_out_file
, ":tprel:");
3709 case SYMBOL_TINY_GOT
:
3716 output_addr_const (asm_out_file
, x
);
3720 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3722 case SYMBOL_SMALL_GOT
:
3723 asm_fprintf (asm_out_file
, ":lo12:");
3726 case SYMBOL_SMALL_TLSGD
:
3727 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3730 case SYMBOL_SMALL_TLSDESC
:
3731 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3734 case SYMBOL_SMALL_GOTTPREL
:
3735 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3738 case SYMBOL_SMALL_TPREL
:
3739 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3742 case SYMBOL_TINY_GOT
:
3743 asm_fprintf (asm_out_file
, ":got:");
3749 output_addr_const (asm_out_file
, x
);
3754 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3756 case SYMBOL_SMALL_TPREL
:
3757 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3762 output_addr_const (asm_out_file
, x
);
3766 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3772 aarch64_print_operand_address (FILE *f
, rtx x
)
3774 struct aarch64_address_info addr
;
3776 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3780 case ADDRESS_REG_IMM
:
3781 if (addr
.offset
== const0_rtx
)
3782 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3784 asm_fprintf (f
, "[%s,%wd]", reg_names
[REGNO (addr
.base
)],
3785 INTVAL (addr
.offset
));
3788 case ADDRESS_REG_REG
:
3789 if (addr
.shift
== 0)
3790 asm_fprintf (f
, "[%s,%s]", reg_names
[REGNO (addr
.base
)],
3791 reg_names
[REGNO (addr
.offset
)]);
3793 asm_fprintf (f
, "[%s,%s,lsl %u]", reg_names
[REGNO (addr
.base
)],
3794 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3797 case ADDRESS_REG_UXTW
:
3798 if (addr
.shift
== 0)
3799 asm_fprintf (f
, "[%s,w%d,uxtw]", reg_names
[REGNO (addr
.base
)],
3800 REGNO (addr
.offset
) - R0_REGNUM
);
3802 asm_fprintf (f
, "[%s,w%d,uxtw %u]", reg_names
[REGNO (addr
.base
)],
3803 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3806 case ADDRESS_REG_SXTW
:
3807 if (addr
.shift
== 0)
3808 asm_fprintf (f
, "[%s,w%d,sxtw]", reg_names
[REGNO (addr
.base
)],
3809 REGNO (addr
.offset
) - R0_REGNUM
);
3811 asm_fprintf (f
, "[%s,w%d,sxtw %u]", reg_names
[REGNO (addr
.base
)],
3812 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3815 case ADDRESS_REG_WB
:
3816 switch (GET_CODE (x
))
3819 asm_fprintf (f
, "[%s,%d]!", reg_names
[REGNO (addr
.base
)],
3820 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3823 asm_fprintf (f
, "[%s],%d", reg_names
[REGNO (addr
.base
)],
3824 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3827 asm_fprintf (f
, "[%s,-%d]!", reg_names
[REGNO (addr
.base
)],
3828 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3831 asm_fprintf (f
, "[%s],-%d", reg_names
[REGNO (addr
.base
)],
3832 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3835 asm_fprintf (f
, "[%s,%wd]!", reg_names
[REGNO (addr
.base
)],
3836 INTVAL (addr
.offset
));
3839 asm_fprintf (f
, "[%s],%wd", reg_names
[REGNO (addr
.base
)],
3840 INTVAL (addr
.offset
));
3847 case ADDRESS_LO_SUM
:
3848 asm_fprintf (f
, "[%s,#:lo12:", reg_names
[REGNO (addr
.base
)]);
3849 output_addr_const (f
, addr
.offset
);
3850 asm_fprintf (f
, "]");
3853 case ADDRESS_SYMBOLIC
:
3857 output_addr_const (f
, x
);
3861 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED
,
3862 int labelno ATTRIBUTE_UNUSED
)
3864 sorry ("function profiling");
3868 aarch64_label_mentioned_p (rtx x
)
3873 if (GET_CODE (x
) == LABEL_REF
)
3876 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3877 referencing instruction, but they are constant offsets, not
3879 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3882 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
3883 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
3889 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3890 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
3893 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
3900 /* Implement REGNO_REG_CLASS. */
3903 aarch64_regno_regclass (unsigned regno
)
3905 if (GP_REGNUM_P (regno
))
3908 if (regno
== SP_REGNUM
)
3911 if (regno
== FRAME_POINTER_REGNUM
3912 || regno
== ARG_POINTER_REGNUM
)
3915 if (FP_REGNUM_P (regno
))
3916 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
3921 /* Try a machine-dependent way of reloading an illegitimate address
3922 operand. If we find one, push the reload and return the new rtx. */
3925 aarch64_legitimize_reload_address (rtx
*x_p
,
3926 enum machine_mode mode
,
3927 int opnum
, int type
,
3928 int ind_levels ATTRIBUTE_UNUSED
)
3932 /* Do not allow mem (plus (reg, const)) if vector mode. */
3933 if (aarch64_vector_mode_p (mode
)
3934 && GET_CODE (x
) == PLUS
3935 && REG_P (XEXP (x
, 0))
3936 && CONST_INT_P (XEXP (x
, 1)))
3940 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
3941 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3942 opnum
, (enum reload_type
) type
);
3946 /* We must recognize output that we have already generated ourselves. */
3947 if (GET_CODE (x
) == PLUS
3948 && GET_CODE (XEXP (x
, 0)) == PLUS
3949 && REG_P (XEXP (XEXP (x
, 0), 0))
3950 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3951 && CONST_INT_P (XEXP (x
, 1)))
3953 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
3954 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3955 opnum
, (enum reload_type
) type
);
3959 /* We wish to handle large displacements off a base register by splitting
3960 the addend across an add and the mem insn. This can cut the number of
3961 extra insns needed from 3 to 1. It is only useful for load/store of a
3962 single register with 12 bit offset field. */
3963 if (GET_CODE (x
) == PLUS
3964 && REG_P (XEXP (x
, 0))
3965 && CONST_INT_P (XEXP (x
, 1))
3966 && HARD_REGISTER_P (XEXP (x
, 0))
3969 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
3971 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
3972 HOST_WIDE_INT low
= val
& 0xfff;
3973 HOST_WIDE_INT high
= val
- low
;
3976 enum machine_mode xmode
= GET_MODE (x
);
3978 /* In ILP32, xmode can be either DImode or SImode. */
3979 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
3981 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3982 BLKmode alignment. */
3983 if (GET_MODE_SIZE (mode
) == 0)
3986 offs
= low
% GET_MODE_SIZE (mode
);
3988 /* Align misaligned offset by adjusting high part to compensate. */
3991 if (aarch64_uimm12_shift (high
+ offs
))
4000 offs
= GET_MODE_SIZE (mode
) - offs
;
4002 high
= high
+ (low
& 0x1000) - offs
;
4007 /* Check for overflow. */
4008 if (high
+ low
!= val
)
4011 cst
= GEN_INT (high
);
4012 if (!aarch64_uimm12_shift (high
))
4013 cst
= force_const_mem (xmode
, cst
);
4015 /* Reload high part into base reg, leaving the low part
4016 in the mem instruction. */
4017 x
= plus_constant (xmode
,
4018 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4021 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4022 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4023 opnum
, (enum reload_type
) type
);
4032 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4034 enum machine_mode mode
,
4035 secondary_reload_info
*sri
)
4037 /* Address expressions of the form PLUS (SP, large_offset) need two
4038 scratch registers, one for the constant, and one for holding a
4039 copy of SP, since SP cannot be used on the RHS of an add-reg
4042 && GET_CODE (x
) == PLUS
4043 && XEXP (x
, 0) == stack_pointer_rtx
4044 && CONST_INT_P (XEXP (x
, 1))
4045 && !aarch64_uimm12_shift (INTVAL (XEXP (x
, 1))))
4047 sri
->icode
= CODE_FOR_reload_sp_immediate
;
4051 /* Without the TARGET_SIMD instructions we cannot move a Q register
4052 to a Q register directly. We need a scratch. */
4053 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4054 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4055 && reg_class_subset_p (rclass
, FP_REGS
))
4058 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4059 else if (mode
== TImode
)
4060 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4064 /* A TFmode or TImode memory access should be handled via an FP_REGS
4065 because AArch64 has richer addressing modes for LDR/STR instructions
4066 than LDP/STP instructions. */
4067 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4068 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4071 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4078 aarch64_can_eliminate (const int from
, const int to
)
4080 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4081 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4083 if (frame_pointer_needed
)
4085 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4087 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4089 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4090 && !cfun
->calls_alloca
)
4092 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4098 /* If we decided that we didn't need a leaf frame pointer but then used
4099 LR in the function, then we'll want a frame pointer after all, so
4100 prevent this elimination to ensure a frame pointer is used.
4102 NOTE: the original value of flag_omit_frame_pointer gets trashed
4103 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4104 of faked_omit_frame_pointer here (which is true when we always
4105 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4106 pointers when LR is clobbered). */
4107 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4108 && df_regs_ever_live_p (LR_REGNUM
)
4109 && faked_omit_frame_pointer
)
4117 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4119 HOST_WIDE_INT frame_size
;
4120 HOST_WIDE_INT offset
;
4122 aarch64_layout_frame ();
4123 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4124 + crtl
->outgoing_args_size
4125 + cfun
->machine
->saved_varargs_size
);
4127 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4128 offset
= frame_size
;
4130 if (to
== HARD_FRAME_POINTER_REGNUM
)
4132 if (from
== ARG_POINTER_REGNUM
)
4133 return offset
- crtl
->outgoing_args_size
;
4135 if (from
== FRAME_POINTER_REGNUM
)
4136 return cfun
->machine
->frame
.saved_regs_size
;
4139 if (to
== STACK_POINTER_REGNUM
)
4141 if (from
== FRAME_POINTER_REGNUM
)
4143 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4144 + cfun
->machine
->frame
.saved_regs_size
4145 - cfun
->machine
->frame
.fp_lr_offset
;
4146 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4155 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4159 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4163 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4168 aarch64_asm_trampoline_template (FILE *f
)
4172 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4173 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4177 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4178 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4180 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4181 assemble_aligned_integer (4, const0_rtx
);
4182 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4183 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4187 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4189 rtx fnaddr
, mem
, a_tramp
;
4190 const int tramp_code_sz
= 16;
4192 /* Don't need to copy the trailing D-words, we fill those in below. */
4193 emit_block_move (m_tramp
, assemble_trampoline_template (),
4194 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4195 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4196 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4197 if (GET_MODE (fnaddr
) != ptr_mode
)
4198 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4199 emit_move_insn (mem
, fnaddr
);
4201 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4202 emit_move_insn (mem
, chain_value
);
4204 /* XXX We should really define a "clear_cache" pattern and use
4205 gen_clear_cache(). */
4206 a_tramp
= XEXP (m_tramp
, 0);
4207 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4208 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4209 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4213 static unsigned char
4214 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4225 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4226 (GET_MODE_SIZE (mode
) + 7) / 8;
4240 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4242 if (regclass
== POINTER_REGS
|| regclass
== STACK_REG
)
4243 return GENERAL_REGS
;
4245 /* If it's an integer immediate that MOVI can't handle, then
4246 FP_REGS is not an option, so we return NO_REGS instead. */
4247 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4248 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4255 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4257 asm_fprintf (f
, "%U%s", name
);
4261 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4263 if (priority
== DEFAULT_INIT_PRIORITY
)
4264 default_ctor_section_asm_out_constructor (symbol
, priority
);
4269 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4270 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4271 switch_to_section (s
);
4272 assemble_align (POINTER_SIZE
);
4273 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4278 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4280 if (priority
== DEFAULT_INIT_PRIORITY
)
4281 default_dtor_section_asm_out_destructor (symbol
, priority
);
4286 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4287 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4288 switch_to_section (s
);
4289 assemble_align (POINTER_SIZE
);
4290 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4295 aarch64_output_casesi (rtx
*operands
)
4299 rtx diff_vec
= PATTERN (next_active_insn (operands
[2]));
4301 static const char *const patterns
[4][2] =
4304 "ldrb\t%w3, [%0,%w1,uxtw]",
4305 "add\t%3, %4, %w3, sxtb #2"
4308 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4309 "add\t%3, %4, %w3, sxth #2"
4312 "ldr\t%w3, [%0,%w1,uxtw #2]",
4313 "add\t%3, %4, %w3, sxtw #2"
4315 /* We assume that DImode is only generated when not optimizing and
4316 that we don't really need 64-bit address offsets. That would
4317 imply an object file with 8GB of code in a single function! */
4319 "ldr\t%w3, [%0,%w1,uxtw #2]",
4320 "add\t%3, %4, %w3, sxtw #2"
4324 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4326 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4328 gcc_assert (index
>= 0 && index
<= 3);
4330 /* Need to implement table size reduction, by chaning the code below. */
4331 output_asm_insn (patterns
[index
][0], operands
);
4332 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4333 snprintf (buf
, sizeof (buf
),
4334 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4335 output_asm_insn (buf
, operands
);
4336 output_asm_insn (patterns
[index
][1], operands
);
4337 output_asm_insn ("br\t%3", operands
);
4338 assemble_label (asm_out_file
, label
);
4343 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4344 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4348 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4350 if (shift
>= 0 && shift
<= 3)
4353 for (size
= 8; size
<= 32; size
*= 2)
4355 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4356 if (mask
== bits
<< shift
)
4364 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4365 const_rtx x ATTRIBUTE_UNUSED
)
4367 /* We can't use blocks for constants when we're using a per-function
4373 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4374 rtx x ATTRIBUTE_UNUSED
,
4375 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4377 /* Force all constant pool entries into the current function section. */
4378 return function_section (current_function_decl
);
4384 /* Helper function for rtx cost calculation. Strip a shift expression
4385 from X. Returns the inner operand if successful, or the original
4386 expression on failure. */
4388 aarch64_strip_shift (rtx x
)
4392 if ((GET_CODE (op
) == ASHIFT
4393 || GET_CODE (op
) == ASHIFTRT
4394 || GET_CODE (op
) == LSHIFTRT
)
4395 && CONST_INT_P (XEXP (op
, 1)))
4396 return XEXP (op
, 0);
4398 if (GET_CODE (op
) == MULT
4399 && CONST_INT_P (XEXP (op
, 1))
4400 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4401 return XEXP (op
, 0);
4406 /* Helper function for rtx cost calculation. Strip a shift or extend
4407 expression from X. Returns the inner operand if successful, or the
4408 original expression on failure. We deal with a number of possible
4409 canonicalization variations here. */
4411 aarch64_strip_shift_or_extend (rtx x
)
4415 /* Zero and sign extraction of a widened value. */
4416 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4417 && XEXP (op
, 2) == const0_rtx
4418 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4420 return XEXP (XEXP (op
, 0), 0);
4422 /* It can also be represented (for zero-extend) as an AND with an
4424 if (GET_CODE (op
) == AND
4425 && GET_CODE (XEXP (op
, 0)) == MULT
4426 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4427 && CONST_INT_P (XEXP (op
, 1))
4428 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4429 INTVAL (XEXP (op
, 1))) != 0)
4430 return XEXP (XEXP (op
, 0), 0);
4432 /* Now handle extended register, as this may also have an optional
4433 left shift by 1..4. */
4434 if (GET_CODE (op
) == ASHIFT
4435 && CONST_INT_P (XEXP (op
, 1))
4436 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4439 if (GET_CODE (op
) == ZERO_EXTEND
4440 || GET_CODE (op
) == SIGN_EXTEND
)
4446 return aarch64_strip_shift (x
);
4449 /* Calculate the cost of calculating X, storing it in *COST. Result
4450 is true if the total cost of the operation has now been calculated. */
4452 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4453 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4456 const struct cpu_rtx_cost_table
*extra_cost
4457 = aarch64_tune_params
->insn_extra_cost
;
4465 switch (GET_CODE (op0
))
4469 *cost
+= extra_cost
->memory_store
;
4471 if (op1
!= const0_rtx
)
4472 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4476 if (! REG_P (SUBREG_REG (op0
)))
4477 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4480 /* Cost is just the cost of the RHS of the set. */
4481 *cost
+= rtx_cost (op1
, SET
, 1, true);
4484 case ZERO_EXTRACT
: /* Bit-field insertion. */
4486 /* Strip any redundant widening of the RHS to meet the width of
4488 if (GET_CODE (op1
) == SUBREG
)
4489 op1
= SUBREG_REG (op1
);
4490 if ((GET_CODE (op1
) == ZERO_EXTEND
4491 || GET_CODE (op1
) == SIGN_EXTEND
)
4492 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
4493 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
4494 >= INTVAL (XEXP (op0
, 1))))
4495 op1
= XEXP (op1
, 0);
4496 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4506 *cost
+= extra_cost
->memory_load
;
4511 op0
= CONST0_RTX (GET_MODE (x
));
4519 if (op1
== const0_rtx
4520 && GET_CODE (op0
) == AND
)
4526 /* Comparisons can work if the order is swapped.
4527 Canonicalization puts the more complex operation first, but
4528 we want it in op1. */
4530 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
4542 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
4543 || (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
4544 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
4546 if (op0
!= const0_rtx
)
4547 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
4549 if (CONST_INT_P (op1
))
4551 if (!aarch64_uimm12_shift (INTVAL (op1
)))
4552 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4556 op1
= aarch64_strip_shift_or_extend (op1
);
4557 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4568 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4570 if (CONST_INT_P (op1
) && aarch64_uimm12_shift (INTVAL (op1
)))
4572 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
4576 rtx new_op0
= aarch64_strip_shift_or_extend (op0
);
4579 && GET_CODE (op0
) == MULT
)
4581 if ((GET_CODE (XEXP (op0
, 0)) == ZERO_EXTEND
4582 && GET_CODE (XEXP (op0
, 1)) == ZERO_EXTEND
)
4583 || (GET_CODE (XEXP (op0
, 0)) == SIGN_EXTEND
4584 && GET_CODE (XEXP (op0
, 1)) == SIGN_EXTEND
))
4586 *cost
+= (rtx_cost (XEXP (XEXP (op0
, 0), 0), MULT
, 0,
4588 + rtx_cost (XEXP (XEXP (op0
, 1), 0), MULT
, 1,
4590 + rtx_cost (op1
, PLUS
, 1, speed
));
4592 *cost
+= extra_cost
->int_multiply_extend_add
;
4595 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4596 + rtx_cost (XEXP (op0
, 1), MULT
, 1, speed
)
4597 + rtx_cost (op1
, PLUS
, 1, speed
));
4600 *cost
+= extra_cost
->int_multiply_add
;
4603 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
4604 + rtx_cost (op1
, PLUS
, 1, speed
));
4618 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4620 if (CONST_INT_P (op1
)
4621 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
4623 *cost
+= rtx_cost (op0
, AND
, 0, speed
);
4627 if (GET_CODE (op0
) == NOT
)
4628 op0
= XEXP (op0
, 0);
4629 op0
= aarch64_strip_shift (op0
);
4630 *cost
+= (rtx_cost (op0
, AND
, 0, speed
)
4631 + rtx_cost (op1
, AND
, 1, speed
));
4638 if ((GET_MODE (x
) == DImode
4639 && GET_MODE (XEXP (x
, 0)) == SImode
)
4640 || GET_CODE (XEXP (x
, 0)) == MEM
)
4642 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
4648 if (GET_CODE (XEXP (x
, 0)) == MEM
)
4650 *cost
+= rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed
);
4656 if (!CONST_INT_P (XEXP (x
, 1)))
4657 *cost
+= COSTS_N_INSNS (2);
4664 /* Shifting by a register often takes an extra cycle. */
4665 if (speed
&& !CONST_INT_P (XEXP (x
, 1)))
4666 *cost
+= extra_cost
->register_shift
;
4668 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed
);
4672 if (!CONSTANT_P (XEXP (x
, 0)))
4673 *cost
+= rtx_cost (XEXP (x
, 0), HIGH
, 0, speed
);
4677 if (!CONSTANT_P (XEXP (x
, 1)))
4678 *cost
+= rtx_cost (XEXP (x
, 1), LO_SUM
, 1, speed
);
4679 *cost
+= rtx_cost (XEXP (x
, 0), LO_SUM
, 0, speed
);
4684 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTRACT
, 0, speed
);
4691 *cost
= COSTS_N_INSNS (1);
4692 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4694 if (CONST_INT_P (op1
)
4695 && exact_log2 (INTVAL (op1
)) > 0)
4697 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
4701 if ((GET_CODE (op0
) == ZERO_EXTEND
4702 && GET_CODE (op1
) == ZERO_EXTEND
)
4703 || (GET_CODE (op0
) == SIGN_EXTEND
4704 && GET_CODE (op1
) == SIGN_EXTEND
))
4706 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4707 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
));
4709 *cost
+= extra_cost
->int_multiply_extend
;
4714 *cost
+= extra_cost
->int_multiply
;
4718 if (GET_MODE (x
) == DFmode
)
4719 *cost
+= extra_cost
->double_multiply
;
4720 else if (GET_MODE (x
) == SFmode
)
4721 *cost
+= extra_cost
->float_multiply
;
4724 return false; /* All arguments need to be in registers. */
4728 *cost
= COSTS_N_INSNS (2);
4731 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4732 *cost
+= (extra_cost
->int_multiply_add
4733 + extra_cost
->int_divide
);
4734 else if (GET_MODE (x
) == DFmode
)
4735 *cost
+= (extra_cost
->double_multiply
4736 + extra_cost
->double_divide
);
4737 else if (GET_MODE (x
) == SFmode
)
4738 *cost
+= (extra_cost
->float_multiply
4739 + extra_cost
->float_divide
);
4741 return false; /* All arguments need to be in registers. */
4745 *cost
= COSTS_N_INSNS (1);
4748 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4749 *cost
+= extra_cost
->int_divide
;
4750 else if (GET_MODE (x
) == DFmode
)
4751 *cost
+= extra_cost
->double_divide
;
4752 else if (GET_MODE (x
) == SFmode
)
4753 *cost
+= extra_cost
->float_divide
;
4755 return false; /* All arguments need to be in registers. */
4764 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED
,
4765 enum machine_mode mode ATTRIBUTE_UNUSED
,
4766 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
4768 enum rtx_code c
= GET_CODE (x
);
4769 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4771 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4772 return addr_cost
->pre_modify
;
4774 if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4775 return addr_cost
->post_modify
;
4779 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4780 return addr_cost
->imm_offset
;
4781 else if (GET_CODE (XEXP (x
, 0)) == MULT
4782 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
4783 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4784 return addr_cost
->register_extend
;
4786 return addr_cost
->register_offset
;
4788 else if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
4789 return addr_cost
->imm_offset
;
4795 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4796 reg_class_t from
, reg_class_t to
)
4798 const struct cpu_regmove_cost
*regmove_cost
4799 = aarch64_tune_params
->regmove_cost
;
4801 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
4802 return regmove_cost
->GP2GP
;
4803 else if (from
== GENERAL_REGS
)
4804 return regmove_cost
->GP2FP
;
4805 else if (to
== GENERAL_REGS
)
4806 return regmove_cost
->FP2GP
;
4808 /* When AdvSIMD instructions are disabled it is not possible to move
4809 a 128-bit value directly between Q registers. This is handled in
4810 secondary reload. A general register is used as a scratch to move
4811 the upper DI value and the lower DI value is moved directly,
4812 hence the cost is the sum of three moves. */
4814 if (! TARGET_SIMD
&& GET_MODE_SIZE (from
) == 128 && GET_MODE_SIZE (to
) == 128)
4815 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
4817 return regmove_cost
->FP2FP
;
4821 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4822 reg_class_t rclass ATTRIBUTE_UNUSED
,
4823 bool in ATTRIBUTE_UNUSED
)
4825 return aarch64_tune_params
->memmov_cost
;
4828 /* Vectorizer cost model target hooks. */
4830 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4832 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4834 int misalign ATTRIBUTE_UNUSED
)
4838 switch (type_of_cost
)
4841 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
4844 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
4847 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
4850 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4853 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
4856 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
4859 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
4862 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
4864 case unaligned_load
:
4865 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
4867 case unaligned_store
:
4868 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
4870 case cond_branch_taken
:
4871 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
4873 case cond_branch_not_taken
:
4874 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
4877 case vec_promote_demote
:
4878 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4881 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
4882 return elements
/ 2 + 1;
4889 /* Implement targetm.vectorize.add_stmt_cost. */
4891 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
4892 struct _stmt_vec_info
*stmt_info
, int misalign
,
4893 enum vect_cost_model_location where
)
4895 unsigned *cost
= (unsigned *) data
;
4896 unsigned retval
= 0;
4898 if (flag_vect_cost_model
)
4900 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
4902 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
4904 /* Statements in an inner loop relative to the loop being
4905 vectorized are weighted more heavily. The value here is
4906 a function (linear for now) of the loop nest level. */
4907 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
4909 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4910 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
4911 unsigned nest_level
= loop_depth (loop
);
4913 count
*= nest_level
;
4916 retval
= (unsigned) (count
* stmt_cost
);
4917 cost
[where
] += retval
;
4923 static void initialize_aarch64_code_model (void);
4925 /* Parse the architecture extension string. */
4928 aarch64_parse_extension (char *str
)
4930 /* The extension string is parsed left to right. */
4931 const struct aarch64_option_extension
*opt
= NULL
;
4933 /* Flag to say whether we are adding or removing an extension. */
4934 int adding_ext
= -1;
4936 while (str
!= NULL
&& *str
!= 0)
4942 ext
= strchr (str
, '+');
4949 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
4960 error ("missing feature modifier after %qs", "+no");
4964 /* Scan over the extensions table trying to find an exact match. */
4965 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
4967 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
4969 /* Add or remove the extension. */
4971 aarch64_isa_flags
|= opt
->flags_on
;
4973 aarch64_isa_flags
&= ~(opt
->flags_off
);
4978 if (opt
->name
== NULL
)
4980 /* Extension not found in list. */
4981 error ("unknown feature modifier %qs", str
);
4991 /* Parse the ARCH string. */
4994 aarch64_parse_arch (void)
4997 const struct processor
*arch
;
4998 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
5001 strcpy (str
, aarch64_arch_string
);
5003 ext
= strchr (str
, '+');
5012 error ("missing arch name in -march=%qs", str
);
5016 /* Loop through the list of supported ARCHs to find a match. */
5017 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
5019 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
5021 selected_arch
= arch
;
5022 aarch64_isa_flags
= selected_arch
->flags
;
5023 selected_cpu
= &all_cores
[selected_arch
->core
];
5027 /* ARCH string contains at least one extension. */
5028 aarch64_parse_extension (ext
);
5035 /* ARCH name not found in list. */
5036 error ("unknown value %qs for -march", str
);
5040 /* Parse the CPU string. */
5043 aarch64_parse_cpu (void)
5046 const struct processor
*cpu
;
5047 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
5050 strcpy (str
, aarch64_cpu_string
);
5052 ext
= strchr (str
, '+');
5061 error ("missing cpu name in -mcpu=%qs", str
);
5065 /* Loop through the list of supported CPUs to find a match. */
5066 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5068 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
5071 aarch64_isa_flags
= selected_cpu
->flags
;
5075 /* CPU string contains at least one extension. */
5076 aarch64_parse_extension (ext
);
5083 /* CPU name not found in list. */
5084 error ("unknown value %qs for -mcpu", str
);
5088 /* Parse the TUNE string. */
5091 aarch64_parse_tune (void)
5093 const struct processor
*cpu
;
5094 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
5095 strcpy (str
, aarch64_tune_string
);
5097 /* Loop through the list of supported CPUs to find a match. */
5098 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5100 if (strcmp (cpu
->name
, str
) == 0)
5102 selected_tune
= cpu
;
5107 /* CPU name not found in list. */
5108 error ("unknown value %qs for -mtune", str
);
5113 /* Implement TARGET_OPTION_OVERRIDE. */
5116 aarch64_override_options (void)
5118 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5119 otherwise march remains undefined. mtune can be used with either march or
5122 if (aarch64_arch_string
)
5124 aarch64_parse_arch ();
5125 aarch64_cpu_string
= NULL
;
5128 if (aarch64_cpu_string
)
5130 aarch64_parse_cpu ();
5131 selected_arch
= NULL
;
5134 if (aarch64_tune_string
)
5136 aarch64_parse_tune ();
5139 initialize_aarch64_code_model ();
5141 aarch64_build_bitmask_table ();
5143 /* This target defaults to strict volatile bitfields. */
5144 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
5145 flag_strict_volatile_bitfields
= 1;
5147 /* If the user did not specify a processor, choose the default
5148 one for them. This will be the CPU set during configuration using
5149 --with-cpu, otherwise it is "generic". */
5152 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
5153 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
5156 gcc_assert (selected_cpu
);
5158 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5160 selected_tune
= &all_cores
[selected_cpu
->core
];
5162 aarch64_tune_flags
= selected_tune
->flags
;
5163 aarch64_tune
= selected_tune
->core
;
5164 aarch64_tune_params
= selected_tune
->tune
;
5166 aarch64_override_options_after_change ();
5169 /* Implement targetm.override_options_after_change. */
5172 aarch64_override_options_after_change (void)
5174 faked_omit_frame_pointer
= false;
5176 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5177 that aarch64_frame_pointer_required will be called. We need to remember
5178 whether flag_omit_frame_pointer was turned on normally or just faked. */
5180 if (flag_omit_leaf_frame_pointer
&& !flag_omit_frame_pointer
)
5182 flag_omit_frame_pointer
= true;
5183 faked_omit_frame_pointer
= true;
5187 static struct machine_function
*
5188 aarch64_init_machine_status (void)
5190 struct machine_function
*machine
;
5191 machine
= ggc_alloc_cleared_machine_function ();
5196 aarch64_init_expanders (void)
5198 init_machine_status
= aarch64_init_machine_status
;
5201 /* A checking mechanism for the implementation of the various code models. */
5203 initialize_aarch64_code_model (void)
5207 switch (aarch64_cmodel_var
)
5209 case AARCH64_CMODEL_TINY
:
5210 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
5212 case AARCH64_CMODEL_SMALL
:
5213 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
5215 case AARCH64_CMODEL_LARGE
:
5216 sorry ("code model %qs with -f%s", "large",
5217 flag_pic
> 1 ? "PIC" : "pic");
5223 aarch64_cmodel
= aarch64_cmodel_var
;
5226 /* Return true if SYMBOL_REF X binds locally. */
5229 aarch64_symbol_binds_local_p (const_rtx x
)
5231 return (SYMBOL_REF_DECL (x
)
5232 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
5233 : SYMBOL_REF_LOCAL_P (x
));
5236 /* Return true if SYMBOL_REF X is thread local */
5238 aarch64_tls_symbol_p (rtx x
)
5240 if (! TARGET_HAVE_TLS
)
5243 if (GET_CODE (x
) != SYMBOL_REF
)
5246 return SYMBOL_REF_TLS_MODEL (x
) != 0;
5249 /* Classify a TLS symbol into one of the TLS kinds. */
5250 enum aarch64_symbol_type
5251 aarch64_classify_tls_symbol (rtx x
)
5253 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
5257 case TLS_MODEL_GLOBAL_DYNAMIC
:
5258 case TLS_MODEL_LOCAL_DYNAMIC
:
5259 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
5261 case TLS_MODEL_INITIAL_EXEC
:
5262 return SYMBOL_SMALL_GOTTPREL
;
5264 case TLS_MODEL_LOCAL_EXEC
:
5265 return SYMBOL_SMALL_TPREL
;
5267 case TLS_MODEL_EMULATED
:
5268 case TLS_MODEL_NONE
:
5269 return SYMBOL_FORCE_TO_MEM
;
5276 /* Return the method that should be used to access SYMBOL_REF or
5277 LABEL_REF X in context CONTEXT. */
5279 enum aarch64_symbol_type
5280 aarch64_classify_symbol (rtx x
,
5281 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
5283 if (GET_CODE (x
) == LABEL_REF
)
5285 switch (aarch64_cmodel
)
5287 case AARCH64_CMODEL_LARGE
:
5288 return SYMBOL_FORCE_TO_MEM
;
5290 case AARCH64_CMODEL_TINY_PIC
:
5291 case AARCH64_CMODEL_TINY
:
5292 return SYMBOL_TINY_ABSOLUTE
;
5294 case AARCH64_CMODEL_SMALL_PIC
:
5295 case AARCH64_CMODEL_SMALL
:
5296 return SYMBOL_SMALL_ABSOLUTE
;
5303 if (GET_CODE (x
) == SYMBOL_REF
)
5305 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
5306 || CONSTANT_POOL_ADDRESS_P (x
))
5307 return SYMBOL_FORCE_TO_MEM
;
5309 if (aarch64_tls_symbol_p (x
))
5310 return aarch64_classify_tls_symbol (x
);
5312 switch (aarch64_cmodel
)
5314 case AARCH64_CMODEL_TINY
:
5315 if (SYMBOL_REF_WEAK (x
))
5316 return SYMBOL_FORCE_TO_MEM
;
5317 return SYMBOL_TINY_ABSOLUTE
;
5319 case AARCH64_CMODEL_SMALL
:
5320 if (SYMBOL_REF_WEAK (x
))
5321 return SYMBOL_FORCE_TO_MEM
;
5322 return SYMBOL_SMALL_ABSOLUTE
;
5324 case AARCH64_CMODEL_TINY_PIC
:
5325 if (!aarch64_symbol_binds_local_p (x
))
5326 return SYMBOL_TINY_GOT
;
5327 return SYMBOL_TINY_ABSOLUTE
;
5329 case AARCH64_CMODEL_SMALL_PIC
:
5330 if (!aarch64_symbol_binds_local_p (x
))
5331 return SYMBOL_SMALL_GOT
;
5332 return SYMBOL_SMALL_ABSOLUTE
;
5339 /* By default push everything into the constant pool. */
5340 return SYMBOL_FORCE_TO_MEM
;
5344 aarch64_constant_address_p (rtx x
)
5346 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
5350 aarch64_legitimate_pic_operand_p (rtx x
)
5352 if (GET_CODE (x
) == SYMBOL_REF
5353 || (GET_CODE (x
) == CONST
5354 && GET_CODE (XEXP (x
, 0)) == PLUS
5355 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5361 /* Return true if X holds either a quarter-precision or
5362 floating-point +0.0 constant. */
5364 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
5366 if (!CONST_DOUBLE_P (x
))
5369 /* TODO: We could handle moving 0.0 to a TFmode register,
5370 but first we would like to refactor the movtf_aarch64
5371 to be more amicable to split moves properly and
5372 correctly gate on TARGET_SIMD. For now - reject all
5373 constants which are not to SFmode or DFmode registers. */
5374 if (!(mode
== SFmode
|| mode
== DFmode
))
5377 if (aarch64_float_const_zero_rtx_p (x
))
5379 return aarch64_float_const_representable_p (x
);
5383 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
5385 /* Do not allow vector struct mode constants. We could support
5386 0 and -1 easily, but they need support in aarch64-simd.md. */
5387 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
5390 /* This could probably go away because
5391 we now decompose CONST_INTs according to expand_mov_immediate. */
5392 if ((GET_CODE (x
) == CONST_VECTOR
5393 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
5394 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
5395 return !targetm
.cannot_force_const_mem (mode
, x
);
5397 if (GET_CODE (x
) == HIGH
5398 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
5401 return aarch64_constant_address_p (x
);
5405 aarch64_load_tp (rtx target
)
5408 || GET_MODE (target
) != Pmode
5409 || !register_operand (target
, Pmode
))
5410 target
= gen_reg_rtx (Pmode
);
5412 /* Can return in any reg. */
5413 emit_insn (gen_aarch64_load_tp_hard (target
));
5417 /* On AAPCS systems, this is the "struct __va_list". */
5418 static GTY(()) tree va_list_type
;
5420 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5421 Return the type to use as __builtin_va_list.
5423 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5435 aarch64_build_builtin_va_list (void)
5438 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5440 /* Create the type. */
5441 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
5442 /* Give it the required name. */
5443 va_list_name
= build_decl (BUILTINS_LOCATION
,
5445 get_identifier ("__va_list"),
5447 DECL_ARTIFICIAL (va_list_name
) = 1;
5448 TYPE_NAME (va_list_type
) = va_list_name
;
5449 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
5451 /* Create the fields. */
5452 f_stack
= build_decl (BUILTINS_LOCATION
,
5453 FIELD_DECL
, get_identifier ("__stack"),
5455 f_grtop
= build_decl (BUILTINS_LOCATION
,
5456 FIELD_DECL
, get_identifier ("__gr_top"),
5458 f_vrtop
= build_decl (BUILTINS_LOCATION
,
5459 FIELD_DECL
, get_identifier ("__vr_top"),
5461 f_groff
= build_decl (BUILTINS_LOCATION
,
5462 FIELD_DECL
, get_identifier ("__gr_offs"),
5464 f_vroff
= build_decl (BUILTINS_LOCATION
,
5465 FIELD_DECL
, get_identifier ("__vr_offs"),
5468 DECL_ARTIFICIAL (f_stack
) = 1;
5469 DECL_ARTIFICIAL (f_grtop
) = 1;
5470 DECL_ARTIFICIAL (f_vrtop
) = 1;
5471 DECL_ARTIFICIAL (f_groff
) = 1;
5472 DECL_ARTIFICIAL (f_vroff
) = 1;
5474 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
5475 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
5476 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
5477 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
5478 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
5480 TYPE_FIELDS (va_list_type
) = f_stack
;
5481 DECL_CHAIN (f_stack
) = f_grtop
;
5482 DECL_CHAIN (f_grtop
) = f_vrtop
;
5483 DECL_CHAIN (f_vrtop
) = f_groff
;
5484 DECL_CHAIN (f_groff
) = f_vroff
;
5486 /* Compute its layout. */
5487 layout_type (va_list_type
);
5489 return va_list_type
;
5492 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5494 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
5496 const CUMULATIVE_ARGS
*cum
;
5497 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5498 tree stack
, grtop
, vrtop
, groff
, vroff
;
5500 int gr_save_area_size
;
5501 int vr_save_area_size
;
5504 cum
= &crtl
->args
.info
;
5506 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
5508 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
5510 if (TARGET_GENERAL_REGS_ONLY
)
5512 if (cum
->aapcs_nvrn
> 0)
5513 sorry ("%qs and floating point or vector arguments",
5514 "-mgeneral-regs-only");
5515 vr_save_area_size
= 0;
5518 f_stack
= TYPE_FIELDS (va_list_type_node
);
5519 f_grtop
= DECL_CHAIN (f_stack
);
5520 f_vrtop
= DECL_CHAIN (f_grtop
);
5521 f_groff
= DECL_CHAIN (f_vrtop
);
5522 f_vroff
= DECL_CHAIN (f_groff
);
5524 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
5526 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
5528 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
5530 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
5532 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
5535 /* Emit code to initialize STACK, which points to the next varargs stack
5536 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5537 by named arguments. STACK is 8-byte aligned. */
5538 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
5539 if (cum
->aapcs_stack_size
> 0)
5540 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
5541 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
5542 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5544 /* Emit code to initialize GRTOP, the top of the GR save area.
5545 virtual_incoming_args_rtx should have been 16 byte aligned. */
5546 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
5547 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
5548 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5550 /* Emit code to initialize VRTOP, the top of the VR save area.
5551 This address is gr_save_area_bytes below GRTOP, rounded
5552 down to the next 16-byte boundary. */
5553 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
5554 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
5555 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5558 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
5559 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
5560 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5562 /* Emit code to initialize GROFF, the offset from GRTOP of the
5563 next GPR argument. */
5564 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
5565 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
5566 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5568 /* Likewise emit code to initialize VROFF, the offset from FTOP
5569 of the next VR argument. */
5570 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
5571 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
5572 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5575 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5578 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
5579 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
5583 bool is_ha
; /* is HFA or HVA. */
5584 bool dw_align
; /* double-word align. */
5585 enum machine_mode ag_mode
= VOIDmode
;
5587 enum machine_mode mode
;
5589 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5590 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
5591 HOST_WIDE_INT size
, rsize
, adjust
, align
;
5592 tree t
, u
, cond1
, cond2
;
5594 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5596 type
= build_pointer_type (type
);
5598 mode
= TYPE_MODE (type
);
5600 f_stack
= TYPE_FIELDS (va_list_type_node
);
5601 f_grtop
= DECL_CHAIN (f_stack
);
5602 f_vrtop
= DECL_CHAIN (f_grtop
);
5603 f_groff
= DECL_CHAIN (f_vrtop
);
5604 f_vroff
= DECL_CHAIN (f_groff
);
5606 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
5607 f_stack
, NULL_TREE
);
5608 size
= int_size_in_bytes (type
);
5609 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
5613 if (aarch64_vfp_is_call_or_return_candidate (mode
,
5619 /* TYPE passed in fp/simd registers. */
5620 if (TARGET_GENERAL_REGS_ONLY
)
5621 sorry ("%qs and floating point or vector arguments",
5622 "-mgeneral-regs-only");
5624 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
5625 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
5626 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
5627 unshare_expr (valist
), f_vroff
, NULL_TREE
);
5629 rsize
= nregs
* UNITS_PER_VREG
;
5633 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
5634 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
5636 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5637 && size
< UNITS_PER_VREG
)
5639 adjust
= UNITS_PER_VREG
- size
;
5644 /* TYPE passed in general registers. */
5645 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
5646 unshare_expr (valist
), f_grtop
, NULL_TREE
);
5647 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
5648 unshare_expr (valist
), f_groff
, NULL_TREE
);
5649 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
5650 nregs
= rsize
/ UNITS_PER_WORD
;
5655 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5656 && size
< UNITS_PER_WORD
)
5658 adjust
= UNITS_PER_WORD
- size
;
5662 /* Get a local temporary for the field value. */
5663 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
5665 /* Emit code to branch if off >= 0. */
5666 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
5667 build_int_cst (TREE_TYPE (off
), 0));
5668 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
5672 /* Emit: offs = (offs + 15) & -16. */
5673 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5674 build_int_cst (TREE_TYPE (off
), 15));
5675 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
5676 build_int_cst (TREE_TYPE (off
), -16));
5677 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
5682 /* Update ap.__[g|v]r_offs */
5683 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5684 build_int_cst (TREE_TYPE (off
), rsize
));
5685 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
5689 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5691 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5692 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
5693 build_int_cst (TREE_TYPE (f_off
), 0));
5694 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
5696 /* String up: make sure the assignment happens before the use. */
5697 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
5698 COND_EXPR_ELSE (cond1
) = t
;
5700 /* Prepare the trees handling the argument that is passed on the stack;
5701 the top level node will store in ON_STACK. */
5702 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
5705 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5706 t
= fold_convert (intDI_type_node
, arg
);
5707 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5708 build_int_cst (TREE_TYPE (t
), 15));
5709 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5710 build_int_cst (TREE_TYPE (t
), -16));
5711 t
= fold_convert (TREE_TYPE (arg
), t
);
5712 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
5716 /* Advance ap.__stack */
5717 t
= fold_convert (intDI_type_node
, arg
);
5718 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5719 build_int_cst (TREE_TYPE (t
), size
+ 7));
5720 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5721 build_int_cst (TREE_TYPE (t
), -8));
5722 t
= fold_convert (TREE_TYPE (arg
), t
);
5723 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
5724 /* String up roundup and advance. */
5726 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5727 /* String up with arg */
5728 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
5729 /* Big-endianness related address adjustment. */
5730 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5731 && size
< UNITS_PER_WORD
)
5733 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
5734 size_int (UNITS_PER_WORD
- size
));
5735 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
5738 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
5739 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
5741 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5744 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
5745 build_int_cst (TREE_TYPE (off
), adjust
));
5747 t
= fold_convert (sizetype
, t
);
5748 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
5752 /* type ha; // treat as "struct {ftype field[n];}"
5753 ... [computing offs]
5754 for (i = 0; i <nregs; ++i, offs += 16)
5755 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5758 tree tmp_ha
, field_t
, field_ptr_t
;
5760 /* Declare a local variable. */
5761 tmp_ha
= create_tmp_var_raw (type
, "ha");
5762 gimple_add_tmp_var (tmp_ha
);
5764 /* Establish the base type. */
5768 field_t
= float_type_node
;
5769 field_ptr_t
= float_ptr_type_node
;
5772 field_t
= double_type_node
;
5773 field_ptr_t
= double_ptr_type_node
;
5776 field_t
= long_double_type_node
;
5777 field_ptr_t
= long_double_ptr_type_node
;
5779 /* The half precision and quad precision are not fully supported yet. Enable
5780 the following code after the support is complete. Need to find the correct
5781 type node for __fp16 *. */
5784 field_t
= float_type_node
;
5785 field_ptr_t
= float_ptr_type_node
;
5791 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
5792 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
5793 field_ptr_t
= build_pointer_type (field_t
);
5800 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5801 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
5803 t
= fold_convert (field_ptr_t
, addr
);
5804 t
= build2 (MODIFY_EXPR
, field_t
,
5805 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
5806 build1 (INDIRECT_REF
, field_t
, t
));
5808 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5809 for (i
= 1; i
< nregs
; ++i
)
5811 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
5812 u
= fold_convert (field_ptr_t
, addr
);
5813 u
= build2 (MODIFY_EXPR
, field_t
,
5814 build2 (MEM_REF
, field_t
, tmp_ha
,
5815 build_int_cst (field_ptr_t
,
5817 int_size_in_bytes (field_t
)))),
5818 build1 (INDIRECT_REF
, field_t
, u
));
5819 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
5822 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
5823 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
5826 COND_EXPR_ELSE (cond2
) = t
;
5827 addr
= fold_convert (build_pointer_type (type
), cond1
);
5828 addr
= build_va_arg_indirect_ref (addr
);
5831 addr
= build_va_arg_indirect_ref (addr
);
5836 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5839 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
5840 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
5843 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
5844 CUMULATIVE_ARGS local_cum
;
5845 int gr_saved
, vr_saved
;
5847 /* The caller has advanced CUM up to, but not beyond, the last named
5848 argument. Advance a local copy of CUM past the last "real" named
5849 argument, to find out how many registers are left over. */
5851 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
5853 /* Found out how many registers we need to save. */
5854 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
5855 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
5857 if (TARGET_GENERAL_REGS_ONLY
)
5859 if (local_cum
.aapcs_nvrn
> 0)
5860 sorry ("%qs and floating point or vector arguments",
5861 "-mgeneral-regs-only");
5871 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5872 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
5873 - gr_saved
* UNITS_PER_WORD
);
5874 mem
= gen_frame_mem (BLKmode
, ptr
);
5875 set_mem_alias_set (mem
, get_varargs_alias_set ());
5877 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
5882 /* We can't use move_block_from_reg, because it will use
5883 the wrong mode, storing D regs only. */
5884 enum machine_mode mode
= TImode
;
5887 /* Set OFF to the offset from virtual_incoming_args_rtx of
5888 the first vector register. The VR save area lies below
5889 the GR one, and is aligned to 16 bytes. */
5890 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5891 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5892 off
-= vr_saved
* UNITS_PER_VREG
;
5894 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
5898 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
5899 mem
= gen_frame_mem (mode
, ptr
);
5900 set_mem_alias_set (mem
, get_varargs_alias_set ());
5901 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
5902 off
+= UNITS_PER_VREG
;
5907 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5908 any complication of having crtl->args.pretend_args_size changed. */
5909 cfun
->machine
->saved_varargs_size
5910 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5911 STACK_BOUNDARY
/ BITS_PER_UNIT
)
5912 + vr_saved
* UNITS_PER_VREG
);
5916 aarch64_conditional_register_usage (void)
5921 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
5924 call_used_regs
[i
] = 1;
5929 /* Walk down the type tree of TYPE counting consecutive base elements.
5930 If *MODEP is VOIDmode, then set it to the first valid floating point
5931 type. If a non-floating point type is found, or if a floating point
5932 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5933 otherwise return the count in the sub-tree. */
5935 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5937 enum machine_mode mode
;
5940 switch (TREE_CODE (type
))
5943 mode
= TYPE_MODE (type
);
5944 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5947 if (*modep
== VOIDmode
)
5956 mode
= TYPE_MODE (TREE_TYPE (type
));
5957 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
5960 if (*modep
== VOIDmode
)
5969 /* Use V2SImode and V4SImode as representatives of all 64-bit
5970 and 128-bit vector types. */
5971 size
= int_size_in_bytes (type
);
5984 if (*modep
== VOIDmode
)
5987 /* Vector modes are considered to be opaque: two vectors are
5988 equivalent for the purposes of being homogeneous aggregates
5989 if they are the same size. */
5998 tree index
= TYPE_DOMAIN (type
);
6000 /* Can't handle incomplete types. */
6001 if (!COMPLETE_TYPE_P (type
))
6004 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
6007 || !TYPE_MAX_VALUE (index
)
6008 || !host_integerp (TYPE_MAX_VALUE (index
), 1)
6009 || !TYPE_MIN_VALUE (index
)
6010 || !host_integerp (TYPE_MIN_VALUE (index
), 1)
6014 count
*= (1 + tree_low_cst (TYPE_MAX_VALUE (index
), 1)
6015 - tree_low_cst (TYPE_MIN_VALUE (index
), 1));
6017 /* There must be no padding. */
6018 if (!host_integerp (TYPE_SIZE (type
), 1)
6019 || (tree_low_cst (TYPE_SIZE (type
), 1)
6020 != count
* GET_MODE_BITSIZE (*modep
)))
6032 /* Can't handle incomplete types. */
6033 if (!COMPLETE_TYPE_P (type
))
6036 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6038 if (TREE_CODE (field
) != FIELD_DECL
)
6041 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6047 /* There must be no padding. */
6048 if (!host_integerp (TYPE_SIZE (type
), 1)
6049 || (tree_low_cst (TYPE_SIZE (type
), 1)
6050 != count
* GET_MODE_BITSIZE (*modep
)))
6057 case QUAL_UNION_TYPE
:
6059 /* These aren't very interesting except in a degenerate case. */
6064 /* Can't handle incomplete types. */
6065 if (!COMPLETE_TYPE_P (type
))
6068 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6070 if (TREE_CODE (field
) != FIELD_DECL
)
6073 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6076 count
= count
> sub_count
? count
: sub_count
;
6079 /* There must be no padding. */
6080 if (!host_integerp (TYPE_SIZE (type
), 1)
6081 || (tree_low_cst (TYPE_SIZE (type
), 1)
6082 != count
* GET_MODE_BITSIZE (*modep
)))
6095 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6096 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6097 array types. The C99 floating-point complex types are also considered
6098 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6099 types, which are GCC extensions and out of the scope of AAPCS64, are
6100 treated as composite types here as well.
6102 Note that MODE itself is not sufficient in determining whether a type
6103 is such a composite type or not. This is because
6104 stor-layout.c:compute_record_mode may have already changed the MODE
6105 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6106 structure with only one field may have its MODE set to the mode of the
6107 field. Also an integer mode whose size matches the size of the
6108 RECORD_TYPE type may be used to substitute the original mode
6109 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6110 solely relied on. */
6113 aarch64_composite_type_p (const_tree type
,
6114 enum machine_mode mode
)
6116 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
6120 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
6121 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
6127 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6128 type as described in AAPCS64 \S 4.1.2.
6130 See the comment above aarch64_composite_type_p for the notes on MODE. */
6133 aarch64_short_vector_p (const_tree type
,
6134 enum machine_mode mode
)
6136 HOST_WIDE_INT size
= -1;
6138 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6139 size
= int_size_in_bytes (type
);
6140 else if (!aarch64_composite_type_p (type
, mode
)
6141 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6142 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
6143 size
= GET_MODE_SIZE (mode
);
6145 return (size
== 8 || size
== 16) ? true : false;
6148 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6149 shall be passed or returned in simd/fp register(s) (providing these
6150 parameter passing registers are available).
6152 Upon successful return, *COUNT returns the number of needed registers,
6153 *BASE_MODE returns the mode of the individual register and when IS_HAF
6154 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6155 floating-point aggregate or a homogeneous short-vector aggregate. */
6158 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
6160 enum machine_mode
*base_mode
,
6164 enum machine_mode new_mode
= VOIDmode
;
6165 bool composite_p
= aarch64_composite_type_p (type
, mode
);
6167 if (is_ha
!= NULL
) *is_ha
= false;
6169 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6170 || aarch64_short_vector_p (type
, mode
))
6175 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6177 if (is_ha
!= NULL
) *is_ha
= true;
6179 new_mode
= GET_MODE_INNER (mode
);
6181 else if (type
&& composite_p
)
6183 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6185 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
6187 if (is_ha
!= NULL
) *is_ha
= true;
6196 *base_mode
= new_mode
;
6200 /* Implement TARGET_STRUCT_VALUE_RTX. */
6203 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
6204 int incoming ATTRIBUTE_UNUSED
)
6206 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
6209 /* Implements target hook vector_mode_supported_p. */
6211 aarch64_vector_mode_supported_p (enum machine_mode mode
)
6214 && (mode
== V4SImode
|| mode
== V8HImode
6215 || mode
== V16QImode
|| mode
== V2DImode
6216 || mode
== V2SImode
|| mode
== V4HImode
6217 || mode
== V8QImode
|| mode
== V2SFmode
6218 || mode
== V4SFmode
|| mode
== V2DFmode
))
6224 /* Return appropriate SIMD container
6225 for MODE within a vector of WIDTH bits. */
6226 static enum machine_mode
6227 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
6229 gcc_assert (width
== 64 || width
== 128);
6268 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6269 static enum machine_mode
6270 aarch64_preferred_simd_mode (enum machine_mode mode
)
6272 return aarch64_simd_container_mode (mode
, 128);
6275 /* Return the bitmask of possible vector sizes for the vectorizer
6278 aarch64_autovectorize_vector_sizes (void)
6283 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6284 vector types in order to conform to the AAPCS64 (see "Procedure
6285 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6286 qualify for emission with the mangled names defined in that document,
6287 a vector type must not only be of the correct mode but also be
6288 composed of AdvSIMD vector element types (e.g.
6289 _builtin_aarch64_simd_qi); these types are registered by
6290 aarch64_init_simd_builtins (). In other words, vector types defined
6291 in other ways e.g. via vector_size attribute will get default
6295 enum machine_mode mode
;
6296 const char *element_type_name
;
6297 const char *mangled_name
;
6298 } aarch64_simd_mangle_map_entry
;
6300 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
6301 /* 64-bit containerized types. */
6302 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6303 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6304 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6305 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6306 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6307 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6308 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6309 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6310 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6311 /* 128-bit containerized types. */
6312 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6313 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6314 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6315 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6316 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6317 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6318 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6319 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6320 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6321 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6322 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6323 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6324 { VOIDmode
, NULL
, NULL
}
6327 /* Implement TARGET_MANGLE_TYPE. */
6330 aarch64_mangle_type (const_tree type
)
6332 /* The AArch64 ABI documents say that "__va_list" has to be
6333 managled as if it is in the "std" namespace. */
6334 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
6335 return "St9__va_list";
6337 /* Check the mode of the vector type, and the name of the vector
6338 element type, against the table. */
6339 if (TREE_CODE (type
) == VECTOR_TYPE
)
6341 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
6343 while (pos
->mode
!= VOIDmode
)
6345 tree elt_type
= TREE_TYPE (type
);
6347 if (pos
->mode
== TYPE_MODE (type
)
6348 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
6349 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
6350 pos
->element_type_name
))
6351 return pos
->mangled_name
;
6357 /* Use the default mangling. */
6361 /* Return the equivalent letter for size. */
6363 sizetochar (int size
)
6367 case 64: return 'd';
6368 case 32: return 's';
6369 case 16: return 'h';
6370 case 8 : return 'b';
6371 default: gcc_unreachable ();
6375 /* Return true iff x is a uniform vector of floating-point
6376 constants, and the constant can be represented in
6377 quarter-precision form. Note, as aarch64_float_const_representable
6378 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6380 aarch64_vect_float_const_representable_p (rtx x
)
6383 REAL_VALUE_TYPE r0
, ri
;
6386 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
6389 x0
= CONST_VECTOR_ELT (x
, 0);
6390 if (!CONST_DOUBLE_P (x0
))
6393 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
6395 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
6397 xi
= CONST_VECTOR_ELT (x
, i
);
6398 if (!CONST_DOUBLE_P (xi
))
6401 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
6402 if (!REAL_VALUES_EQUAL (r0
, ri
))
6406 return aarch64_float_const_representable_p (x0
);
6409 /* Return true for valid and false for invalid. */
6411 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
6412 struct simd_immediate_info
*info
)
6414 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6416 for (i = 0; i < idx; i += (STRIDE)) \
6421 immtype = (CLASS); \
6422 elsize = (ELSIZE); \
6428 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
6429 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
6430 unsigned char bytes
[16];
6431 int immtype
= -1, matches
;
6432 unsigned int invmask
= inverse
? 0xff : 0;
6435 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6437 if (! (aarch64_simd_imm_zero_p (op
, mode
)
6438 || aarch64_vect_float_const_representable_p (op
)))
6443 info
->value
= CONST_VECTOR_ELT (op
, 0);
6444 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
6452 /* Splat vector constant out into a byte vector. */
6453 for (i
= 0; i
< n_elts
; i
++)
6455 rtx el
= CONST_VECTOR_ELT (op
, i
);
6456 unsigned HOST_WIDE_INT elpart
;
6457 unsigned int part
, parts
;
6459 if (GET_CODE (el
) == CONST_INT
)
6461 elpart
= INTVAL (el
);
6464 else if (GET_CODE (el
) == CONST_DOUBLE
)
6466 elpart
= CONST_DOUBLE_LOW (el
);
6472 for (part
= 0; part
< parts
; part
++)
6475 for (byte
= 0; byte
< innersize
; byte
++)
6477 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
6478 elpart
>>= BITS_PER_UNIT
;
6480 if (GET_CODE (el
) == CONST_DOUBLE
)
6481 elpart
= CONST_DOUBLE_HIGH (el
);
6486 gcc_assert (idx
== GET_MODE_SIZE (mode
));
6490 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
6491 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
6493 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6494 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6496 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6497 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6499 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6500 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
6502 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
6504 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
6506 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
6507 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
6509 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6510 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6512 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6513 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6515 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6516 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
6518 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
6520 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
6522 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6523 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6525 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6526 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6528 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6529 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6531 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6532 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6534 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
6536 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
6537 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
6546 info
->element_width
= elsize
;
6547 info
->mvn
= emvn
!= 0;
6548 info
->shift
= eshift
;
6550 unsigned HOST_WIDE_INT imm
= 0;
6552 if (immtype
>= 12 && immtype
<= 15)
6555 /* Un-invert bytes of recognized vector, if necessary. */
6557 for (i
= 0; i
< idx
; i
++)
6558 bytes
[i
] ^= invmask
;
6562 /* FIXME: Broken on 32-bit H_W_I hosts. */
6563 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
6565 for (i
= 0; i
< 8; i
++)
6566 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
6567 << (i
* BITS_PER_UNIT
);
6570 info
->value
= GEN_INT (imm
);
6574 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
6575 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
6577 /* Construct 'abcdefgh' because the assembler cannot handle
6578 generic constants. */
6581 imm
= (imm
>> info
->shift
) & 0xff;
6582 info
->value
= GEN_INT (imm
);
6591 aarch64_const_vec_all_same_int_p (rtx x
,
6592 HOST_WIDE_INT minval
,
6593 HOST_WIDE_INT maxval
)
6595 HOST_WIDE_INT firstval
;
6598 if (GET_CODE (x
) != CONST_VECTOR
6599 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
6602 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
6603 if (firstval
< minval
|| firstval
> maxval
)
6606 count
= CONST_VECTOR_NUNITS (x
);
6607 for (i
= 1; i
< count
; i
++)
6608 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
6614 /* Check of immediate shift constants are within range. */
6616 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
6618 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
6620 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
6622 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
6625 /* Return true if X is a uniform vector where all elements
6626 are either the floating-point constant 0.0 or the
6627 integer constant 0. */
6629 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
6631 return x
== CONST0_RTX (mode
);
6635 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
6637 HOST_WIDE_INT imm
= INTVAL (x
);
6640 for (i
= 0; i
< 8; i
++)
6642 unsigned int byte
= imm
& 0xff;
6643 if (byte
!= 0xff && byte
!= 0)
6652 aarch64_mov_operand_p (rtx x
,
6653 enum aarch64_symbol_context context
,
6654 enum machine_mode mode
)
6656 if (GET_CODE (x
) == HIGH
6657 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6660 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
6663 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
6666 return aarch64_classify_symbolic_expression (x
, context
)
6667 == SYMBOL_TINY_ABSOLUTE
;
6670 /* Return a const_int vector of VAL. */
6672 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
6674 int nunits
= GET_MODE_NUNITS (mode
);
6675 rtvec v
= rtvec_alloc (nunits
);
6678 for (i
=0; i
< nunits
; i
++)
6679 RTVEC_ELT (v
, i
) = GEN_INT (val
);
6681 return gen_rtx_CONST_VECTOR (mode
, v
);
6684 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6687 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
6689 enum machine_mode vmode
;
6691 gcc_assert (!VECTOR_MODE_P (mode
));
6692 vmode
= aarch64_preferred_simd_mode (mode
);
6693 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
6694 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
6697 /* Construct and return a PARALLEL RTX vector. */
6699 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
6701 int nunits
= GET_MODE_NUNITS (mode
);
6702 rtvec v
= rtvec_alloc (nunits
/ 2);
6703 int base
= high
? nunits
/ 2 : 0;
6707 for (i
=0; i
< nunits
/ 2; i
++)
6708 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
6710 t1
= gen_rtx_PARALLEL (mode
, v
);
6714 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6715 HIGH (exclusive). */
6717 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6720 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6721 lane
= INTVAL (operand
);
6723 if (lane
< low
|| lane
>= high
)
6724 error ("lane out of range");
6728 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6730 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6731 HOST_WIDE_INT lane
= INTVAL (operand
);
6733 if (lane
< low
|| lane
>= high
)
6734 error ("constant out of range");
6737 /* Emit code to reinterpret one AdvSIMD type as another,
6738 without altering bits. */
6740 aarch64_simd_reinterpret (rtx dest
, rtx src
)
6742 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
6745 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6748 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
6749 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
6752 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
6753 rtx tmp1
= gen_reg_rtx (mode
);
6754 rtx tmp2
= gen_reg_rtx (mode
);
6756 emit_insn (intfn (tmp1
, op1
, tmp2
));
6758 emit_move_insn (mem
, tmp1
);
6759 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
6760 emit_move_insn (mem
, tmp2
);
6763 /* Return TRUE if OP is a valid vector addressing mode. */
6765 aarch64_simd_mem_operand_p (rtx op
)
6767 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
6768 || GET_CODE (XEXP (op
, 0)) == REG
);
6771 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6772 not to early-clobber SRC registers in the process.
6774 We assume that the operands described by SRC and DEST represent a
6775 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6776 number of components into which the copy has been decomposed. */
6778 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
6779 rtx
*src
, unsigned int count
)
6783 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
6784 || REGNO (operands
[0]) < REGNO (operands
[1]))
6786 for (i
= 0; i
< count
; i
++)
6788 operands
[2 * i
] = dest
[i
];
6789 operands
[2 * i
+ 1] = src
[i
];
6794 for (i
= 0; i
< count
; i
++)
6796 operands
[2 * i
] = dest
[count
- i
- 1];
6797 operands
[2 * i
+ 1] = src
[count
- i
- 1];
6802 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6803 one of VSTRUCT modes: OI, CI or XI. */
6805 aarch64_simd_attr_length_move (rtx insn
)
6807 enum machine_mode mode
;
6809 extract_insn_cached (insn
);
6811 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
6813 mode
= GET_MODE (recog_data
.operand
[0]);
6829 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6830 alignment of a vector to 128 bits. */
6831 static HOST_WIDE_INT
6832 aarch64_simd_vector_alignment (const_tree type
)
6834 HOST_WIDE_INT align
= tree_low_cst (TYPE_SIZE (type
), 0);
6835 return MIN (align
, 128);
6838 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6840 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
6845 /* We guarantee alignment for vectors up to 128-bits. */
6846 if (tree_int_cst_compare (TYPE_SIZE (type
),
6847 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
6850 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6854 /* If VALS is a vector constant that can be loaded into a register
6855 using DUP, generate instructions to do so and return an RTX to
6856 assign to the register. Otherwise return NULL_RTX. */
6858 aarch64_simd_dup_constant (rtx vals
)
6860 enum machine_mode mode
= GET_MODE (vals
);
6861 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6862 int n_elts
= GET_MODE_NUNITS (mode
);
6863 bool all_same
= true;
6867 if (GET_CODE (vals
) != CONST_VECTOR
)
6870 for (i
= 1; i
< n_elts
; ++i
)
6872 x
= CONST_VECTOR_ELT (vals
, i
);
6873 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
6880 /* We can load this constant by using DUP and a constant in a
6881 single ARM register. This will be cheaper than a vector
6883 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
6884 return gen_rtx_VEC_DUPLICATE (mode
, x
);
6888 /* Generate code to load VALS, which is a PARALLEL containing only
6889 constants (for vec_init) or CONST_VECTOR, efficiently into a
6890 register. Returns an RTX to copy into the register, or NULL_RTX
6891 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6893 aarch64_simd_make_constant (rtx vals
)
6895 enum machine_mode mode
= GET_MODE (vals
);
6897 rtx const_vec
= NULL_RTX
;
6898 int n_elts
= GET_MODE_NUNITS (mode
);
6902 if (GET_CODE (vals
) == CONST_VECTOR
)
6904 else if (GET_CODE (vals
) == PARALLEL
)
6906 /* A CONST_VECTOR must contain only CONST_INTs and
6907 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6908 Only store valid constants in a CONST_VECTOR. */
6909 for (i
= 0; i
< n_elts
; ++i
)
6911 rtx x
= XVECEXP (vals
, 0, i
);
6912 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
6915 if (n_const
== n_elts
)
6916 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6921 if (const_vec
!= NULL_RTX
6922 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
6923 /* Load using MOVI/MVNI. */
6925 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
6926 /* Loaded using DUP. */
6928 else if (const_vec
!= NULL_RTX
)
6929 /* Load from constant pool. We can not take advantage of single-cycle
6930 LD1 because we need a PC-relative addressing mode. */
6933 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6934 We can not construct an initializer. */
6939 aarch64_expand_vector_init (rtx target
, rtx vals
)
6941 enum machine_mode mode
= GET_MODE (target
);
6942 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6943 int n_elts
= GET_MODE_NUNITS (mode
);
6944 int n_var
= 0, one_var
= -1;
6945 bool all_same
= true;
6949 x
= XVECEXP (vals
, 0, 0);
6950 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
6951 n_var
= 1, one_var
= 0;
6953 for (i
= 1; i
< n_elts
; ++i
)
6955 x
= XVECEXP (vals
, 0, i
);
6956 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
6957 ++n_var
, one_var
= i
;
6959 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6965 rtx constant
= aarch64_simd_make_constant (vals
);
6966 if (constant
!= NULL_RTX
)
6968 emit_move_insn (target
, constant
);
6973 /* Splat a single non-constant element if we can. */
6976 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
6977 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
6981 /* One field is non-constant. Load constant then overwrite varying
6982 field. This is more efficient than using the stack. */
6985 rtx copy
= copy_rtx (vals
);
6986 rtx index
= GEN_INT (one_var
);
6987 enum insn_code icode
;
6989 /* Load constant part of vector, substitute neighboring value for
6991 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
6992 aarch64_expand_vector_init (target
, copy
);
6994 /* Insert variable. */
6995 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
6996 icode
= optab_handler (vec_set_optab
, mode
);
6997 gcc_assert (icode
!= CODE_FOR_nothing
);
6998 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
7002 /* Construct the vector in memory one field at a time
7003 and load the whole vector. */
7004 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7005 for (i
= 0; i
< n_elts
; i
++)
7006 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7007 i
* GET_MODE_SIZE (inner_mode
)),
7008 XVECEXP (vals
, 0, i
));
7009 emit_move_insn (target
, mem
);
7013 static unsigned HOST_WIDE_INT
7014 aarch64_shift_truncation_mask (enum machine_mode mode
)
7017 (aarch64_vector_mode_supported_p (mode
)
7018 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
7021 #ifndef TLS_SECTION_ASM_FLAG
7022 #define TLS_SECTION_ASM_FLAG 'T'
7026 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
7027 tree decl ATTRIBUTE_UNUSED
)
7029 char flagchars
[10], *f
= flagchars
;
7031 /* If we have already declared this section, we can use an
7032 abbreviated form to switch back to it -- unless this section is
7033 part of a COMDAT groups, in which case GAS requires the full
7034 declaration every time. */
7035 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7036 && (flags
& SECTION_DECLARED
))
7038 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
7042 if (!(flags
& SECTION_DEBUG
))
7044 if (flags
& SECTION_WRITE
)
7046 if (flags
& SECTION_CODE
)
7048 if (flags
& SECTION_SMALL
)
7050 if (flags
& SECTION_MERGE
)
7052 if (flags
& SECTION_STRINGS
)
7054 if (flags
& SECTION_TLS
)
7055 *f
++ = TLS_SECTION_ASM_FLAG
;
7056 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7060 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
7062 if (!(flags
& SECTION_NOTYPE
))
7067 if (flags
& SECTION_BSS
)
7072 #ifdef TYPE_OPERAND_FMT
7073 format
= "," TYPE_OPERAND_FMT
;
7078 fprintf (asm_out_file
, format
, type
);
7080 if (flags
& SECTION_ENTSIZE
)
7081 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
7082 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7084 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
7085 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
7087 fprintf (asm_out_file
, ",%s,comdat",
7088 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
7092 putc ('\n', asm_out_file
);
7095 /* Select a format to encode pointers in exception handling data. */
7097 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
7100 switch (aarch64_cmodel
)
7102 case AARCH64_CMODEL_TINY
:
7103 case AARCH64_CMODEL_TINY_PIC
:
7104 case AARCH64_CMODEL_SMALL
:
7105 case AARCH64_CMODEL_SMALL_PIC
:
7106 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7108 type
= DW_EH_PE_sdata4
;
7111 /* No assumptions here. 8-byte relocs required. */
7112 type
= DW_EH_PE_sdata8
;
7115 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
7118 /* Emit load exclusive. */
7121 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
7122 rtx mem
, rtx model_rtx
)
7124 rtx (*gen
) (rtx
, rtx
, rtx
);
7128 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
7129 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
7130 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
7131 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
7136 emit_insn (gen (rval
, mem
, model_rtx
));
7139 /* Emit store exclusive. */
7142 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
7143 rtx rval
, rtx mem
, rtx model_rtx
)
7145 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
7149 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
7150 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
7151 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
7152 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
7157 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
7160 /* Mark the previous jump instruction as unlikely. */
7163 aarch64_emit_unlikely_jump (rtx insn
)
7165 rtx very_unlikely
= GEN_INT (REG_BR_PROB_BASE
/ 100 - 1);
7167 insn
= emit_jump_insn (insn
);
7168 add_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
7171 /* Expand a compare and swap pattern. */
7174 aarch64_expand_compare_and_swap (rtx operands
[])
7176 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
7177 enum machine_mode mode
, cmp_mode
;
7178 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
7183 oldval
= operands
[3];
7184 newval
= operands
[4];
7185 is_weak
= operands
[5];
7186 mod_s
= operands
[6];
7187 mod_f
= operands
[7];
7188 mode
= GET_MODE (mem
);
7191 /* Normally the succ memory model must be stronger than fail, but in the
7192 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7193 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7195 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
7196 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
7197 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
7203 /* For short modes, we're going to perform the comparison in SImode,
7204 so do the zero-extension now. */
7206 rval
= gen_reg_rtx (SImode
);
7207 oldval
= convert_modes (SImode
, mode
, oldval
, true);
7212 /* Force the value into a register if needed. */
7213 if (!aarch64_plus_operand (oldval
, mode
))
7214 oldval
= force_reg (cmp_mode
, oldval
);
7223 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
7224 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
7225 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
7226 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
7231 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
7233 if (mode
== QImode
|| mode
== HImode
)
7234 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
7236 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7237 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
7238 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
7241 /* Split a compare and swap pattern. */
7244 aarch64_split_compare_and_swap (rtx operands
[])
7246 rtx rval
, mem
, oldval
, newval
, scratch
;
7247 enum machine_mode mode
;
7249 rtx label1
, label2
, x
, cond
;
7253 oldval
= operands
[2];
7254 newval
= operands
[3];
7255 is_weak
= (operands
[4] != const0_rtx
);
7256 scratch
= operands
[7];
7257 mode
= GET_MODE (mem
);
7262 label1
= gen_label_rtx ();
7263 emit_label (label1
);
7265 label2
= gen_label_rtx ();
7267 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
7269 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
7270 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7271 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7272 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
7273 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7275 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
7279 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
7280 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7281 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
7282 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7286 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7287 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
7288 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
7291 emit_label (label2
);
7294 /* Split an atomic operation. */
7297 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
7298 rtx value
, rtx model_rtx
, rtx cond
)
7300 enum machine_mode mode
= GET_MODE (mem
);
7301 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
7304 label
= gen_label_rtx ();
7308 new_out
= gen_lowpart (wmode
, new_out
);
7310 old_out
= gen_lowpart (wmode
, old_out
);
7313 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
7315 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
7324 x
= gen_rtx_AND (wmode
, old_out
, value
);
7325 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7326 x
= gen_rtx_NOT (wmode
, new_out
);
7327 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7331 if (CONST_INT_P (value
))
7333 value
= GEN_INT (-INTVAL (value
));
7339 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
7340 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7344 aarch64_emit_store_exclusive (mode
, cond
, mem
,
7345 gen_lowpart (mode
, new_out
), model_rtx
);
7347 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7348 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7349 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
7350 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7354 aarch64_print_extension (void)
7356 const struct aarch64_option_extension
*opt
= NULL
;
7358 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7359 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
7360 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
7362 asm_fprintf (asm_out_file
, "\n");
7366 aarch64_start_file (void)
7370 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
7371 aarch64_print_extension ();
7373 else if (selected_cpu
)
7375 asm_fprintf (asm_out_file
, "\t.cpu %s", selected_cpu
->name
);
7376 aarch64_print_extension ();
7378 default_file_start();
7381 /* Target hook for c_mode_for_suffix. */
7382 static enum machine_mode
7383 aarch64_c_mode_for_suffix (char suffix
)
7391 /* We can only represent floating point constants which will fit in
7392 "quarter-precision" values. These values are characterised by
7393 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7396 (-1)^s * (n/16) * 2^r
7399 's' is the sign bit.
7400 'n' is an integer in the range 16 <= n <= 31.
7401 'r' is an integer in the range -3 <= r <= 4. */
7403 /* Return true iff X can be represented by a quarter-precision
7404 floating point immediate operand X. Note, we cannot represent 0.0. */
7406 aarch64_float_const_representable_p (rtx x
)
7408 /* This represents our current view of how many bits
7409 make up the mantissa. */
7410 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7412 unsigned HOST_WIDE_INT mantissa
, mask
;
7413 HOST_WIDE_INT m1
, m2
;
7414 REAL_VALUE_TYPE r
, m
;
7416 if (!CONST_DOUBLE_P (x
))
7419 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7421 /* We cannot represent infinities, NaNs or +/-zero. We won't
7422 know if we have +zero until we analyse the mantissa, but we
7423 can reject the other invalid values. */
7424 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
7425 || REAL_VALUE_MINUS_ZERO (r
))
7428 /* Extract exponent. */
7429 r
= real_value_abs (&r
);
7430 exponent
= REAL_EXP (&r
);
7432 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7433 highest (sign) bit, with a fixed binary point at bit point_pos.
7434 m1 holds the low part of the mantissa, m2 the high part.
7435 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7436 bits for the mantissa, this can fail (low bits will be lost). */
7437 real_ldexp (&m
, &r
, point_pos
- exponent
);
7438 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
7440 /* If the low part of the mantissa has bits set we cannot represent
7444 /* We have rejected the lower HOST_WIDE_INT, so update our
7445 understanding of how many bits lie in the mantissa and
7446 look only at the high HOST_WIDE_INT. */
7448 point_pos
-= HOST_BITS_PER_WIDE_INT
;
7450 /* We can only represent values with a mantissa of the form 1.xxxx. */
7451 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
7452 if ((mantissa
& mask
) != 0)
7455 /* Having filtered unrepresentable values, we may now remove all
7456 but the highest 5 bits. */
7457 mantissa
>>= point_pos
- 5;
7459 /* We cannot represent the value 0.0, so reject it. This is handled
7464 /* Then, as bit 4 is always set, we can mask it off, leaving
7465 the mantissa in the range [0, 15]. */
7466 mantissa
&= ~(1 << 4);
7467 gcc_assert (mantissa
<= 15);
7469 /* GCC internally does not use IEEE754-like encoding (where normalized
7470 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7471 Our mantissa values are shifted 4 places to the left relative to
7472 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7473 by 5 places to correct for GCC's representation. */
7474 exponent
= 5 - exponent
;
7476 return (exponent
>= 0 && exponent
<= 7);
7480 aarch64_output_simd_mov_immediate (rtx const_vector
,
7481 enum machine_mode mode
,
7485 static char templ
[40];
7486 const char *mnemonic
;
7487 const char *shift_op
;
7488 unsigned int lane_count
= 0;
7491 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
7493 /* This will return true to show const_vector is legal for use as either
7494 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7495 also update INFO to show how the immediate should be generated. */
7496 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
7497 gcc_assert (is_valid
);
7499 element_char
= sizetochar (info
.element_width
);
7500 lane_count
= width
/ info
.element_width
;
7502 mode
= GET_MODE_INNER (mode
);
7503 if (mode
== SFmode
|| mode
== DFmode
)
7505 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
7506 if (aarch64_float_const_zero_rtx_p (info
.value
))
7507 info
.value
= GEN_INT (0);
7512 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
7513 char float_buf
[buf_size
] = {'\0'};
7514 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
7517 if (lane_count
== 1)
7518 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
7520 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
7521 lane_count
, element_char
, float_buf
);
7526 mnemonic
= info
.mvn
? "mvni" : "movi";
7527 shift_op
= info
.msl
? "msl" : "lsl";
7529 if (lane_count
== 1)
7530 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
7531 mnemonic
, UINTVAL (info
.value
));
7532 else if (info
.shift
)
7533 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7534 ", %s %d", mnemonic
, lane_count
, element_char
,
7535 UINTVAL (info
.value
), shift_op
, info
.shift
);
7537 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
7538 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
7543 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
7544 enum machine_mode mode
)
7546 enum machine_mode vmode
;
7548 gcc_assert (!VECTOR_MODE_P (mode
));
7549 vmode
= aarch64_simd_container_mode (mode
, 64);
7550 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
7551 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
7554 /* Split operands into moves from op[1] + op[2] into op[0]. */
7557 aarch64_split_combinev16qi (rtx operands
[3])
7559 unsigned int dest
= REGNO (operands
[0]);
7560 unsigned int src1
= REGNO (operands
[1]);
7561 unsigned int src2
= REGNO (operands
[2]);
7562 enum machine_mode halfmode
= GET_MODE (operands
[1]);
7563 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
7566 gcc_assert (halfmode
== V16QImode
);
7568 if (src1
== dest
&& src2
== dest
+ halfregs
)
7570 /* No-op move. Can't split to nothing; emit something. */
7571 emit_note (NOTE_INSN_DELETED
);
7575 /* Preserve register attributes for variable tracking. */
7576 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
7577 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
7578 GET_MODE_SIZE (halfmode
));
7580 /* Special case of reversed high/low parts. */
7581 if (reg_overlap_mentioned_p (operands
[2], destlo
)
7582 && reg_overlap_mentioned_p (operands
[1], desthi
))
7584 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7585 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
7586 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7588 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
7590 /* Try to avoid unnecessary moves if part of the result
7591 is in the right place already. */
7593 emit_move_insn (destlo
, operands
[1]);
7594 if (src2
!= dest
+ halfregs
)
7595 emit_move_insn (desthi
, operands
[2]);
7599 if (src2
!= dest
+ halfregs
)
7600 emit_move_insn (desthi
, operands
[2]);
7602 emit_move_insn (destlo
, operands
[1]);
7606 /* vec_perm support. */
7608 #define MAX_VECT_LEN 16
7610 struct expand_vec_perm_d
7612 rtx target
, op0
, op1
;
7613 unsigned char perm
[MAX_VECT_LEN
];
7614 enum machine_mode vmode
;
7620 /* Generate a variable permutation. */
7623 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7625 enum machine_mode vmode
= GET_MODE (target
);
7626 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7628 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
7629 gcc_checking_assert (GET_MODE (op0
) == vmode
);
7630 gcc_checking_assert (GET_MODE (op1
) == vmode
);
7631 gcc_checking_assert (GET_MODE (sel
) == vmode
);
7632 gcc_checking_assert (TARGET_SIMD
);
7636 if (vmode
== V8QImode
)
7638 /* Expand the argument to a V16QI mode by duplicating it. */
7639 rtx pair
= gen_reg_rtx (V16QImode
);
7640 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
7641 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7645 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
7652 if (vmode
== V8QImode
)
7654 pair
= gen_reg_rtx (V16QImode
);
7655 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
7656 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7660 pair
= gen_reg_rtx (OImode
);
7661 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
7662 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
7668 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7670 enum machine_mode vmode
= GET_MODE (target
);
7671 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
7672 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7673 rtx rmask
[MAX_VECT_LEN
], mask
;
7675 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
7677 /* The TBL instruction does not use a modulo index, so we must take care
7678 of that ourselves. */
7679 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7680 for (i
= 0; i
< nelt
; ++i
)
7682 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
7683 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
7685 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
7688 /* Recognize patterns suitable for the TRN instructions. */
7690 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
7692 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7693 rtx out
, in0
, in1
, x
;
7694 rtx (*gen
) (rtx
, rtx
, rtx
);
7695 enum machine_mode vmode
= d
->vmode
;
7697 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7700 /* Note that these are little-endian tests.
7701 We correct for big-endian later. */
7702 if (d
->perm
[0] == 0)
7704 else if (d
->perm
[0] == 1)
7708 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7710 for (i
= 0; i
< nelt
; i
+= 2)
7712 if (d
->perm
[i
] != i
+ odd
)
7714 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
7724 if (BYTES_BIG_ENDIAN
)
7726 x
= in0
, in0
= in1
, in1
= x
;
7735 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
7736 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
7737 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
7738 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
7739 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
7740 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
7741 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
7742 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
7743 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
7744 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
7753 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
7754 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
7755 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
7756 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
7757 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
7758 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
7759 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
7760 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
7761 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
7762 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
7768 emit_insn (gen (out
, in0
, in1
));
7772 /* Recognize patterns suitable for the UZP instructions. */
7774 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
7776 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7777 rtx out
, in0
, in1
, x
;
7778 rtx (*gen
) (rtx
, rtx
, rtx
);
7779 enum machine_mode vmode
= d
->vmode
;
7781 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7784 /* Note that these are little-endian tests.
7785 We correct for big-endian later. */
7786 if (d
->perm
[0] == 0)
7788 else if (d
->perm
[0] == 1)
7792 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7794 for (i
= 0; i
< nelt
; i
++)
7796 unsigned elt
= (i
* 2 + odd
) & mask
;
7797 if (d
->perm
[i
] != elt
)
7807 if (BYTES_BIG_ENDIAN
)
7809 x
= in0
, in0
= in1
, in1
= x
;
7818 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
7819 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
7820 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
7821 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
7822 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
7823 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
7824 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
7825 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
7826 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
7827 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
7836 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
7837 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
7838 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
7839 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
7840 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
7841 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
7842 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
7843 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
7844 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
7845 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
7851 emit_insn (gen (out
, in0
, in1
));
7855 /* Recognize patterns suitable for the ZIP instructions. */
7857 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
7859 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
7860 rtx out
, in0
, in1
, x
;
7861 rtx (*gen
) (rtx
, rtx
, rtx
);
7862 enum machine_mode vmode
= d
->vmode
;
7864 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7867 /* Note that these are little-endian tests.
7868 We correct for big-endian later. */
7870 if (d
->perm
[0] == high
)
7873 else if (d
->perm
[0] == 0)
7877 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7879 for (i
= 0; i
< nelt
/ 2; i
++)
7881 unsigned elt
= (i
+ high
) & mask
;
7882 if (d
->perm
[i
* 2] != elt
)
7884 elt
= (elt
+ nelt
) & mask
;
7885 if (d
->perm
[i
* 2 + 1] != elt
)
7895 if (BYTES_BIG_ENDIAN
)
7897 x
= in0
, in0
= in1
, in1
= x
;
7906 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
7907 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
7908 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
7909 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
7910 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
7911 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
7912 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
7913 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
7914 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
7915 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
7924 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
7925 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
7926 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
7927 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
7928 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
7929 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
7930 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
7931 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
7932 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
7933 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
7939 emit_insn (gen (out
, in0
, in1
));
7944 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
7946 rtx (*gen
) (rtx
, rtx
, rtx
);
7947 rtx out
= d
->target
;
7949 enum machine_mode vmode
= d
->vmode
;
7950 unsigned int i
, elt
, nelt
= d
->nelt
;
7953 /* TODO: This may not be big-endian safe. */
7954 if (BYTES_BIG_ENDIAN
)
7958 for (i
= 1; i
< nelt
; i
++)
7960 if (elt
!= d
->perm
[i
])
7964 /* The generic preparation in aarch64_expand_vec_perm_const_1
7965 swaps the operand order and the permute indices if it finds
7966 d->perm[0] to be in the second operand. Thus, we can always
7967 use d->op0 and need not do any extra arithmetic to get the
7968 correct lane number. */
7970 lane
= GEN_INT (elt
);
7974 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
7975 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
7976 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
7977 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
7978 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
7979 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
7980 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
7981 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
7982 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
7983 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
7988 emit_insn (gen (out
, in0
, lane
));
7993 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
7995 rtx rperm
[MAX_VECT_LEN
], sel
;
7996 enum machine_mode vmode
= d
->vmode
;
7997 unsigned int i
, nelt
= d
->nelt
;
7999 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8000 numbering of elements for big-endian, we must reverse the order. */
8001 if (BYTES_BIG_ENDIAN
)
8007 /* Generic code will try constant permutation twice. Once with the
8008 original mode and again with the elements lowered to QImode.
8009 So wait and don't do the selector expansion ourselves. */
8010 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
8013 for (i
= 0; i
< nelt
; ++i
)
8014 rperm
[i
] = GEN_INT (d
->perm
[i
]);
8015 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
8016 sel
= force_reg (vmode
, sel
);
8018 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
8023 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
8025 /* The pattern matching functions above are written to look for a small
8026 number to begin the sequence (0, 1, N/2). If we begin with an index
8027 from the second operand, we can swap the operands. */
8028 if (d
->perm
[0] >= d
->nelt
)
8030 unsigned i
, nelt
= d
->nelt
;
8033 for (i
= 0; i
< nelt
; ++i
)
8034 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
8043 if (aarch64_evpc_zip (d
))
8045 else if (aarch64_evpc_uzp (d
))
8047 else if (aarch64_evpc_trn (d
))
8049 else if (aarch64_evpc_dup (d
))
8051 return aarch64_evpc_tbl (d
);
8056 /* Expand a vec_perm_const pattern. */
8059 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8061 struct expand_vec_perm_d d
;
8068 d
.vmode
= GET_MODE (target
);
8069 gcc_assert (VECTOR_MODE_P (d
.vmode
));
8070 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8071 d
.testing_p
= false;
8073 for (i
= which
= 0; i
< nelt
; ++i
)
8075 rtx e
= XVECEXP (sel
, 0, i
);
8076 int ei
= INTVAL (e
) & (2 * nelt
- 1);
8077 which
|= (ei
< nelt
? 1 : 2);
8087 d
.one_vector_p
= false;
8088 if (!rtx_equal_p (op0
, op1
))
8091 /* The elements of PERM do not suggest that only the first operand
8092 is used, but both operands are identical. Allow easier matching
8093 of the permutation by folding the permutation into the single
8097 for (i
= 0; i
< nelt
; ++i
)
8098 d
.perm
[i
] &= nelt
- 1;
8100 d
.one_vector_p
= true;
8105 d
.one_vector_p
= true;
8109 return aarch64_expand_vec_perm_const_1 (&d
);
8113 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
8114 const unsigned char *sel
)
8116 struct expand_vec_perm_d d
;
8117 unsigned int i
, nelt
, which
;
8121 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8123 memcpy (d
.perm
, sel
, nelt
);
8125 /* Calculate whether all elements are in one vector. */
8126 for (i
= which
= 0; i
< nelt
; ++i
)
8128 unsigned char e
= d
.perm
[i
];
8129 gcc_assert (e
< 2 * nelt
);
8130 which
|= (e
< nelt
? 1 : 2);
8133 /* If all elements are from the second vector, reindex as if from the
8136 for (i
= 0; i
< nelt
; ++i
)
8139 /* Check whether the mask can be applied to a single vector. */
8140 d
.one_vector_p
= (which
!= 3);
8142 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
8143 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
8144 if (!d
.one_vector_p
)
8145 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
8148 ret
= aarch64_expand_vec_perm_const_1 (&d
);
8154 #undef TARGET_ADDRESS_COST
8155 #define TARGET_ADDRESS_COST aarch64_address_cost
8157 /* This hook will determines whether unnamed bitfields affect the alignment
8158 of the containing structure. The hook returns true if the structure
8159 should inherit the alignment requirements of an unnamed bitfield's
8161 #undef TARGET_ALIGN_ANON_BITFIELD
8162 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8164 #undef TARGET_ASM_ALIGNED_DI_OP
8165 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8167 #undef TARGET_ASM_ALIGNED_HI_OP
8168 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8170 #undef TARGET_ASM_ALIGNED_SI_OP
8171 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8173 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8174 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8175 hook_bool_const_tree_hwi_hwi_const_tree_true
8177 #undef TARGET_ASM_FILE_START
8178 #define TARGET_ASM_FILE_START aarch64_start_file
8180 #undef TARGET_ASM_OUTPUT_MI_THUNK
8181 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8183 #undef TARGET_ASM_SELECT_RTX_SECTION
8184 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8186 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8187 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8189 #undef TARGET_BUILD_BUILTIN_VA_LIST
8190 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8192 #undef TARGET_CALLEE_COPIES
8193 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8195 #undef TARGET_CAN_ELIMINATE
8196 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8198 #undef TARGET_CANNOT_FORCE_CONST_MEM
8199 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8201 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8202 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8204 /* Only the least significant bit is used for initialization guard
8206 #undef TARGET_CXX_GUARD_MASK_BIT
8207 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8209 #undef TARGET_C_MODE_FOR_SUFFIX
8210 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8212 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8213 #undef TARGET_DEFAULT_TARGET_FLAGS
8214 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8217 #undef TARGET_CLASS_MAX_NREGS
8218 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8220 #undef TARGET_BUILTIN_DECL
8221 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8223 #undef TARGET_EXPAND_BUILTIN
8224 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8226 #undef TARGET_EXPAND_BUILTIN_VA_START
8227 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8229 #undef TARGET_FOLD_BUILTIN
8230 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8232 #undef TARGET_FUNCTION_ARG
8233 #define TARGET_FUNCTION_ARG aarch64_function_arg
8235 #undef TARGET_FUNCTION_ARG_ADVANCE
8236 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8238 #undef TARGET_FUNCTION_ARG_BOUNDARY
8239 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8241 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8242 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8244 #undef TARGET_FUNCTION_VALUE
8245 #define TARGET_FUNCTION_VALUE aarch64_function_value
8247 #undef TARGET_FUNCTION_VALUE_REGNO_P
8248 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8250 #undef TARGET_FRAME_POINTER_REQUIRED
8251 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8253 #undef TARGET_GIMPLE_FOLD_BUILTIN
8254 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8256 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8257 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8259 #undef TARGET_INIT_BUILTINS
8260 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8262 #undef TARGET_LEGITIMATE_ADDRESS_P
8263 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8265 #undef TARGET_LEGITIMATE_CONSTANT_P
8266 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8268 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8269 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8271 #undef TARGET_MANGLE_TYPE
8272 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8274 #undef TARGET_MEMORY_MOVE_COST
8275 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8277 #undef TARGET_MUST_PASS_IN_STACK
8278 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8280 /* This target hook should return true if accesses to volatile bitfields
8281 should use the narrowest mode possible. It should return false if these
8282 accesses should use the bitfield container type. */
8283 #undef TARGET_NARROW_VOLATILE_BITFIELD
8284 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8286 #undef TARGET_OPTION_OVERRIDE
8287 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8289 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8290 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8291 aarch64_override_options_after_change
8293 #undef TARGET_PASS_BY_REFERENCE
8294 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8296 #undef TARGET_PREFERRED_RELOAD_CLASS
8297 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8299 #undef TARGET_SECONDARY_RELOAD
8300 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8302 #undef TARGET_SHIFT_TRUNCATION_MASK
8303 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8305 #undef TARGET_SETUP_INCOMING_VARARGS
8306 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8308 #undef TARGET_STRUCT_VALUE_RTX
8309 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8311 #undef TARGET_REGISTER_MOVE_COST
8312 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8314 #undef TARGET_RETURN_IN_MEMORY
8315 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8317 #undef TARGET_RETURN_IN_MSB
8318 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8320 #undef TARGET_RTX_COSTS
8321 #define TARGET_RTX_COSTS aarch64_rtx_costs
8323 #undef TARGET_TRAMPOLINE_INIT
8324 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8326 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8327 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8329 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8330 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8332 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8333 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8335 #undef TARGET_VECTORIZE_ADD_STMT_COST
8336 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8338 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8339 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8340 aarch64_builtin_vectorization_cost
8342 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8343 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8345 #undef TARGET_VECTORIZE_BUILTINS
8346 #define TARGET_VECTORIZE_BUILTINS
8348 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8349 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8350 aarch64_builtin_vectorized_function
8352 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8353 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8354 aarch64_autovectorize_vector_sizes
8356 /* Section anchor support. */
8358 #undef TARGET_MIN_ANCHOR_OFFSET
8359 #define TARGET_MIN_ANCHOR_OFFSET -256
8361 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8362 byte offset; we can do much more for larger data types, but have no way
8363 to determine the size of the access. We assume accesses are aligned. */
8364 #undef TARGET_MAX_ANCHOR_OFFSET
8365 #define TARGET_MAX_ANCHOR_OFFSET 4095
8367 #undef TARGET_VECTOR_ALIGNMENT
8368 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8370 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8371 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8372 aarch64_simd_vector_alignment_reachable
8374 /* vec_perm support. */
8376 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8377 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8378 aarch64_vectorize_vec_perm_const_ok
8381 #undef TARGET_FIXED_CONDITION_CODE_REGS
8382 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8384 struct gcc_target targetm
= TARGET_INITIALIZER
;
8386 #include "gt-aarch64.h"