1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
74 A simple base register plus immediate offset.
77 A base register indexed by immediate offset with writeback.
80 A base register indexed by (optionally scaled) register.
83 A base register indexed by (optionally scaled) zero-extended register.
86 A base register indexed by (optionally scaled) sign-extended register.
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type
{
104 struct aarch64_address_info
{
105 enum aarch64_address_type type
;
109 enum aarch64_symbol_type symbol_type
;
112 struct simd_immediate_info
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel
;
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
133 enum machine_mode
*, int *,
135 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
136 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
139 static unsigned bit_count (unsigned HOST_WIDE_INT
);
140 static bool aarch64_const_vec_all_same_int_p (rtx
,
141 HOST_WIDE_INT
, HOST_WIDE_INT
);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
144 const unsigned char *sel
);
145 static int aarch64_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
147 /* The processor for which instructions should be scheduled. */
148 enum aarch64_processor aarch64_tune
= cortexa53
;
150 /* The current tuning set. */
151 const struct tune_params
*aarch64_tune_params
;
153 /* Mask to specify which instructions we are allowed to generate. */
154 unsigned long aarch64_isa_flags
= 0;
156 /* Mask to specify which instruction scheduling options should be used. */
157 unsigned long aarch64_tune_flags
= 0;
159 /* Tuning parameters. */
161 #if HAVE_DESIGNATED_INITIALIZERS
162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164 #define NAMED_PARAM(NAME, VAL) (VAL)
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174 static const struct cpu_addrcost_table generic_addrcost_table
=
176 #if HAVE_DESIGNATED_INITIALIZERS
185 NAMED_PARAM (pre_modify
, 0),
186 NAMED_PARAM (post_modify
, 0),
187 NAMED_PARAM (register_offset
, 0),
188 NAMED_PARAM (register_extend
, 0),
189 NAMED_PARAM (imm_offset
, 0)
192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
195 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
197 #if HAVE_DESIGNATED_INITIALIZERS
206 NAMED_PARAM (pre_modify
, 0),
207 NAMED_PARAM (post_modify
, 0),
208 NAMED_PARAM (register_offset
, 0),
209 NAMED_PARAM (register_extend
, 0),
210 NAMED_PARAM (imm_offset
, 0),
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 static const struct cpu_regmove_cost generic_regmove_cost
=
218 NAMED_PARAM (GP2GP
, 1),
219 NAMED_PARAM (GP2FP
, 2),
220 NAMED_PARAM (FP2GP
, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP
, 4)
227 /* Generic costs for vector insn classes. */
228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
231 static const struct cpu_vector_cost generic_vector_cost
=
233 NAMED_PARAM (scalar_stmt_cost
, 1),
234 NAMED_PARAM (scalar_load_cost
, 1),
235 NAMED_PARAM (scalar_store_cost
, 1),
236 NAMED_PARAM (vec_stmt_cost
, 1),
237 NAMED_PARAM (vec_to_scalar_cost
, 1),
238 NAMED_PARAM (scalar_to_vec_cost
, 1),
239 NAMED_PARAM (vec_align_load_cost
, 1),
240 NAMED_PARAM (vec_unalign_load_cost
, 1),
241 NAMED_PARAM (vec_unalign_store_cost
, 1),
242 NAMED_PARAM (vec_store_cost
, 1),
243 NAMED_PARAM (cond_taken_branch_cost
, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
247 /* Generic costs for vector insn classes. */
248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
251 static const struct cpu_vector_cost cortexa57_vector_cost
=
253 NAMED_PARAM (scalar_stmt_cost
, 1),
254 NAMED_PARAM (scalar_load_cost
, 4),
255 NAMED_PARAM (scalar_store_cost
, 1),
256 NAMED_PARAM (vec_stmt_cost
, 3),
257 NAMED_PARAM (vec_to_scalar_cost
, 8),
258 NAMED_PARAM (scalar_to_vec_cost
, 8),
259 NAMED_PARAM (vec_align_load_cost
, 5),
260 NAMED_PARAM (vec_unalign_load_cost
, 5),
261 NAMED_PARAM (vec_unalign_store_cost
, 1),
262 NAMED_PARAM (vec_store_cost
, 1),
263 NAMED_PARAM (cond_taken_branch_cost
, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
270 static const struct tune_params generic_tunings
=
272 &cortexa57_extra_costs
,
273 &generic_addrcost_table
,
274 &generic_regmove_cost
,
275 &generic_vector_cost
,
276 NAMED_PARAM (memmov_cost
, 4),
277 NAMED_PARAM (issue_rate
, 2)
280 static const struct tune_params cortexa53_tunings
=
282 &cortexa53_extra_costs
,
283 &generic_addrcost_table
,
284 &generic_regmove_cost
,
285 &generic_vector_cost
,
286 NAMED_PARAM (memmov_cost
, 4),
287 NAMED_PARAM (issue_rate
, 2)
290 static const struct tune_params cortexa57_tunings
=
292 &cortexa57_extra_costs
,
293 &cortexa57_addrcost_table
,
294 &generic_regmove_cost
,
295 &cortexa57_vector_cost
,
296 NAMED_PARAM (memmov_cost
, 4),
297 NAMED_PARAM (issue_rate
, 3)
300 /* A processor implementing AArch64. */
303 const char *const name
;
304 enum aarch64_processor core
;
306 const unsigned long flags
;
307 const struct tune_params
*const tune
;
310 /* Processor cores implementing AArch64. */
311 static const struct processor all_cores
[] =
313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315 #include "aarch64-cores.def"
317 {"generic", cortexa53
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
318 {NULL
, aarch64_none
, NULL
, 0, NULL
}
321 /* Architectures implementing AArch64. */
322 static const struct processor all_architectures
[] =
324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326 #include "aarch64-arches.def"
328 {NULL
, aarch64_none
, NULL
, 0, NULL
}
331 /* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333 static const struct processor
*selected_arch
;
334 static const struct processor
*selected_cpu
;
335 static const struct processor
*selected_tune
;
337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339 /* An ISA extension in the co-processor and main instruction set space. */
340 struct aarch64_option_extension
342 const char *const name
;
343 const unsigned long flags_on
;
344 const unsigned long flags_off
;
347 /* ISA extensions in AArch64. */
348 static const struct aarch64_option_extension all_extensions
[] =
350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352 #include "aarch64-option-extensions.def"
353 #undef AARCH64_OPT_EXTENSION
357 /* Used to track the size of an address when generating a pre/post
358 increment address. */
359 static enum machine_mode aarch64_memory_reference_mode
;
361 /* Used to force GTY into this file. */
362 static GTY(()) int gty_dummy
;
364 /* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
367 #define AARCH64_NUM_BITMASKS 5334
368 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
370 typedef enum aarch64_cond_code
372 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
373 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
374 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380 /* The condition codes of the processor, and the inverse function. */
381 static const char * const aarch64_condition_codes
[] =
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
387 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
389 aarch64_dbx_register_number (unsigned regno
)
391 if (GP_REGNUM_P (regno
))
392 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
393 else if (regno
== SP_REGNUM
)
394 return AARCH64_DWARF_SP
;
395 else if (FP_REGNUM_P (regno
))
396 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS
;
403 /* Return TRUE if MODE is any of the large INT modes. */
405 aarch64_vect_struct_mode_p (enum machine_mode mode
)
407 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
410 /* Return TRUE if MODE is any of the vector modes. */
412 aarch64_vector_mode_p (enum machine_mode mode
)
414 return aarch64_vector_mode_supported_p (mode
)
415 || aarch64_vect_struct_mode_p (mode
);
418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420 aarch64_array_mode_supported_p (enum machine_mode mode
,
421 unsigned HOST_WIDE_INT nelems
)
424 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
425 && (nelems
>= 2 && nelems
<= 4))
431 /* Implement HARD_REGNO_NREGS. */
434 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
436 switch (aarch64_regno_regclass (regno
))
440 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
442 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
447 /* Implement HARD_REGNO_MODE_OK. */
450 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
452 if (GET_MODE_CLASS (mode
) == MODE_CC
)
453 return regno
== CC_REGNUM
;
455 if (regno
== SP_REGNUM
)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode
== Pmode
|| mode
== ptr_mode
;
461 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
462 return mode
== Pmode
;
464 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
467 if (FP_REGNUM_P (regno
))
469 if (aarch64_vect_struct_mode_p (mode
))
471 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
479 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
481 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
482 enum machine_mode mode
)
484 /* Handle modes that fit within single registers. */
485 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
487 if (GET_MODE_SIZE (mode
) >= 4)
492 /* Fall back to generic for multi-reg and very large modes. */
494 return choose_hard_reg_mode (regno
, nregs
, false);
497 /* Return true if calls to DECL should be treated as
498 long-calls (ie called via a register). */
500 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
505 /* Return true if calls to symbol-ref SYM should be treated as
506 long-calls (ie called via a register). */
508 aarch64_is_long_call_p (rtx sym
)
510 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
513 /* Return true if the offsets to a zero/sign-extract operation
514 represent an expression that matches an extend operation. The
515 operands represent the paramters from
517 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
519 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
522 HOST_WIDE_INT mult_val
, extract_val
;
524 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
527 mult_val
= INTVAL (mult_imm
);
528 extract_val
= INTVAL (extract_imm
);
531 && extract_val
< GET_MODE_BITSIZE (mode
)
532 && exact_log2 (extract_val
& ~7) > 0
533 && (extract_val
& 7) <= 4
534 && mult_val
== (1 << (extract_val
& 7)))
540 /* Emit an insn that's a simple single-set. Both the operands must be
541 known to be valid. */
543 emit_set_insn (rtx x
, rtx y
)
545 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
548 /* X and Y are two things to compare using CODE. Emit the compare insn and
549 return the rtx for register 0 in the proper mode. */
551 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
553 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
554 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
556 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
560 /* Build the SYMBOL_REF for __tls_get_addr. */
562 static GTY(()) rtx tls_get_addr_libfunc
;
565 aarch64_tls_get_addr (void)
567 if (!tls_get_addr_libfunc
)
568 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
569 return tls_get_addr_libfunc
;
572 /* Return the TLS model to use for ADDR. */
574 static enum tls_model
575 tls_symbolic_operand_type (rtx addr
)
577 enum tls_model tls_kind
= TLS_MODEL_NONE
;
580 if (GET_CODE (addr
) == CONST
)
582 split_const (addr
, &sym
, &addend
);
583 if (GET_CODE (sym
) == SYMBOL_REF
)
584 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
586 else if (GET_CODE (addr
) == SYMBOL_REF
)
587 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
592 /* We'll allow lo_sum's in addresses in our legitimate addresses
593 so that combine would take care of combining addresses where
594 necessary, but for generation purposes, we'll generate the address
597 tmp = hi (symbol_ref); adrp x1, foo
598 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
602 adrp x1, :got:foo adrp tmp, :tlsgd:foo
603 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
607 Load TLS symbol, depending on TLS mechanism and TLS access model.
609 Global Dynamic - Traditional TLS:
611 add dest, tmp, #:tlsgd_lo12:imm
614 Global Dynamic - TLS Descriptors:
615 adrp dest, :tlsdesc:imm
616 ldr tmp, [dest, #:tlsdesc_lo12:imm]
617 add dest, dest, #:tlsdesc_lo12:imm
624 adrp tmp, :gottprel:imm
625 ldr dest, [tmp, #:gottprel_lo12:imm]
630 add t0, tp, #:tprel_hi12:imm
631 add t0, #:tprel_lo12_nc:imm
635 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
636 enum aarch64_symbol_type type
)
640 case SYMBOL_SMALL_ABSOLUTE
:
642 /* In ILP32, the mode of dest can be either SImode or DImode. */
644 enum machine_mode mode
= GET_MODE (dest
);
646 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
648 if (can_create_pseudo_p ())
649 tmp_reg
= gen_reg_rtx (mode
);
651 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
652 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
656 case SYMBOL_TINY_ABSOLUTE
:
657 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
660 case SYMBOL_SMALL_GOT
:
662 /* In ILP32, the mode of dest can be either SImode or DImode,
663 while the got entry is always of SImode size. The mode of
664 dest depends on how dest is used: if dest is assigned to a
665 pointer (e.g. in the memory), it has SImode; it may have
666 DImode if dest is dereferenced to access the memeory.
667 This is why we have to handle three different ldr_got_small
668 patterns here (two patterns for ILP32). */
670 enum machine_mode mode
= GET_MODE (dest
);
672 if (can_create_pseudo_p ())
673 tmp_reg
= gen_reg_rtx (mode
);
675 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
676 if (mode
== ptr_mode
)
679 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
681 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
685 gcc_assert (mode
== Pmode
);
686 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
692 case SYMBOL_SMALL_TLSGD
:
695 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
698 emit_call_insn (gen_tlsgd_small (result
, imm
));
699 insns
= get_insns ();
702 RTL_CONST_CALL_P (insns
) = 1;
703 emit_libcall_block (insns
, dest
, result
, imm
);
707 case SYMBOL_SMALL_TLSDESC
:
709 enum machine_mode mode
= GET_MODE (dest
);
710 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
713 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
715 /* In ILP32, the got entry is always of SImode size. Unlike
716 small GOT, the dest is fixed at reg 0. */
718 emit_insn (gen_tlsdesc_small_si (imm
));
720 emit_insn (gen_tlsdesc_small_di (imm
));
721 tp
= aarch64_load_tp (NULL
);
724 tp
= gen_lowpart (mode
, tp
);
726 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
727 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
731 case SYMBOL_SMALL_GOTTPREL
:
733 /* In ILP32, the mode of dest can be either SImode or DImode,
734 while the got entry is always of SImode size. The mode of
735 dest depends on how dest is used: if dest is assigned to a
736 pointer (e.g. in the memory), it has SImode; it may have
737 DImode if dest is dereferenced to access the memeory.
738 This is why we have to handle three different tlsie_small
739 patterns here (two patterns for ILP32). */
740 enum machine_mode mode
= GET_MODE (dest
);
741 rtx tmp_reg
= gen_reg_rtx (mode
);
742 rtx tp
= aarch64_load_tp (NULL
);
744 if (mode
== ptr_mode
)
747 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
750 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
751 tp
= gen_lowpart (mode
, tp
);
756 gcc_assert (mode
== Pmode
);
757 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
760 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
761 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
765 case SYMBOL_SMALL_TPREL
:
767 rtx tp
= aarch64_load_tp (NULL
);
768 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
769 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
773 case SYMBOL_TINY_GOT
:
774 emit_insn (gen_ldr_got_tiny (dest
, imm
));
782 /* Emit a move from SRC to DEST. Assume that the move expanders can
783 handle all moves if !can_create_pseudo_p (). The distinction is
784 important because, unlike emit_move_insn, the move expanders know
785 how to force Pmode objects into the constant pool even when the
786 constant pool address is not itself legitimate. */
788 aarch64_emit_move (rtx dest
, rtx src
)
790 return (can_create_pseudo_p ()
791 ? emit_move_insn (dest
, src
)
792 : emit_move_insn_1 (dest
, src
));
795 /* Split a 128-bit move operation into two 64-bit move operations,
796 taking care to handle partial overlap of register to register
797 copies. Special cases are needed when moving between GP regs and
798 FP regs. SRC can be a register, constant or memory; DST a register
799 or memory. If either operand is memory it must not have any side
802 aarch64_split_128bit_move (rtx dst
, rtx src
)
807 enum machine_mode mode
= GET_MODE (dst
);
809 gcc_assert (mode
== TImode
|| mode
== TFmode
);
810 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
811 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
813 if (REG_P (dst
) && REG_P (src
))
815 int src_regno
= REGNO (src
);
816 int dst_regno
= REGNO (dst
);
818 /* Handle FP <-> GP regs. */
819 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
821 src_lo
= gen_lowpart (word_mode
, src
);
822 src_hi
= gen_highpart (word_mode
, src
);
826 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
827 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
831 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
832 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
836 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
838 dst_lo
= gen_lowpart (word_mode
, dst
);
839 dst_hi
= gen_highpart (word_mode
, dst
);
843 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
844 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
848 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
849 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
855 dst_lo
= gen_lowpart (word_mode
, dst
);
856 dst_hi
= gen_highpart (word_mode
, dst
);
857 src_lo
= gen_lowpart (word_mode
, src
);
858 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
860 /* At most one pairing may overlap. */
861 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
863 aarch64_emit_move (dst_hi
, src_hi
);
864 aarch64_emit_move (dst_lo
, src_lo
);
868 aarch64_emit_move (dst_lo
, src_lo
);
869 aarch64_emit_move (dst_hi
, src_hi
);
874 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
876 return (! REG_P (src
)
877 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
880 /* Split a complex SIMD combine. */
883 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
885 enum machine_mode src_mode
= GET_MODE (src1
);
886 enum machine_mode dst_mode
= GET_MODE (dst
);
888 gcc_assert (VECTOR_MODE_P (dst_mode
));
890 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
892 rtx (*gen
) (rtx
, rtx
, rtx
);
897 gen
= gen_aarch64_simd_combinev8qi
;
900 gen
= gen_aarch64_simd_combinev4hi
;
903 gen
= gen_aarch64_simd_combinev2si
;
906 gen
= gen_aarch64_simd_combinev2sf
;
909 gen
= gen_aarch64_simd_combinedi
;
912 gen
= gen_aarch64_simd_combinedf
;
918 emit_insn (gen (dst
, src1
, src2
));
923 /* Split a complex SIMD move. */
926 aarch64_split_simd_move (rtx dst
, rtx src
)
928 enum machine_mode src_mode
= GET_MODE (src
);
929 enum machine_mode dst_mode
= GET_MODE (dst
);
931 gcc_assert (VECTOR_MODE_P (dst_mode
));
933 if (REG_P (dst
) && REG_P (src
))
935 rtx (*gen
) (rtx
, rtx
);
937 gcc_assert (VECTOR_MODE_P (src_mode
));
942 gen
= gen_aarch64_split_simd_movv16qi
;
945 gen
= gen_aarch64_split_simd_movv8hi
;
948 gen
= gen_aarch64_split_simd_movv4si
;
951 gen
= gen_aarch64_split_simd_movv2di
;
954 gen
= gen_aarch64_split_simd_movv4sf
;
957 gen
= gen_aarch64_split_simd_movv2df
;
963 emit_insn (gen (dst
, src
));
969 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
971 if (can_create_pseudo_p ())
972 return force_reg (mode
, value
);
975 x
= aarch64_emit_move (x
, value
);
982 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
984 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
987 /* Load the full offset into a register. This
988 might be improvable in the future. */
989 high
= GEN_INT (offset
);
991 high
= aarch64_force_temporary (mode
, temp
, high
);
992 reg
= aarch64_force_temporary (mode
, temp
,
993 gen_rtx_PLUS (mode
, high
, reg
));
995 return plus_constant (mode
, reg
, offset
);
999 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1001 enum machine_mode mode
= GET_MODE (dest
);
1002 unsigned HOST_WIDE_INT mask
;
1005 unsigned HOST_WIDE_INT val
;
1008 int one_match
, zero_match
;
1010 gcc_assert (mode
== SImode
|| mode
== DImode
);
1012 /* Check on what type of symbol it is. */
1013 if (GET_CODE (imm
) == SYMBOL_REF
1014 || GET_CODE (imm
) == LABEL_REF
1015 || GET_CODE (imm
) == CONST
)
1017 rtx mem
, base
, offset
;
1018 enum aarch64_symbol_type sty
;
1020 /* If we have (const (plus symbol offset)), separate out the offset
1021 before we start classifying the symbol. */
1022 split_const (imm
, &base
, &offset
);
1024 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
1027 case SYMBOL_FORCE_TO_MEM
:
1028 if (offset
!= const0_rtx
1029 && targetm
.cannot_force_const_mem (mode
, imm
))
1031 gcc_assert (can_create_pseudo_p ());
1032 base
= aarch64_force_temporary (mode
, dest
, base
);
1033 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1034 aarch64_emit_move (dest
, base
);
1037 mem
= force_const_mem (ptr_mode
, imm
);
1039 if (mode
!= ptr_mode
)
1040 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1041 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1044 case SYMBOL_SMALL_TLSGD
:
1045 case SYMBOL_SMALL_TLSDESC
:
1046 case SYMBOL_SMALL_GOTTPREL
:
1047 case SYMBOL_SMALL_GOT
:
1048 case SYMBOL_TINY_GOT
:
1049 if (offset
!= const0_rtx
)
1051 gcc_assert(can_create_pseudo_p ());
1052 base
= aarch64_force_temporary (mode
, dest
, base
);
1053 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1054 aarch64_emit_move (dest
, base
);
1059 case SYMBOL_SMALL_TPREL
:
1060 case SYMBOL_SMALL_ABSOLUTE
:
1061 case SYMBOL_TINY_ABSOLUTE
:
1062 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1070 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1072 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1076 if (!CONST_INT_P (imm
))
1078 if (GET_CODE (imm
) == HIGH
)
1079 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1082 rtx mem
= force_const_mem (mode
, imm
);
1084 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1092 /* We know we can't do this in 1 insn, and we must be able to do it
1093 in two; so don't mess around looking for sequences that don't buy
1095 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
1096 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1097 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1101 /* Remaining cases are all for DImode. */
1104 subtargets
= optimize
&& can_create_pseudo_p ();
1110 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1112 if ((val
& mask
) == 0)
1114 else if ((val
& mask
) == mask
)
1121 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1123 if ((val
& mask
) != mask
)
1125 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1126 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1127 GEN_INT ((val
>> i
) & 0xffff)));
1134 if (zero_match
== 2)
1135 goto simple_sequence
;
1137 mask
= 0x0ffff0000UL
;
1138 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1140 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1142 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1144 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1146 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1147 emit_insn (gen_adddi3 (dest
, subtarget
,
1148 GEN_INT (val
- (val
& mask
))));
1151 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1153 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1155 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1156 GEN_INT ((val
+ comp
) & mask
)));
1157 emit_insn (gen_adddi3 (dest
, subtarget
,
1158 GEN_INT (val
- ((val
+ comp
) & mask
))));
1161 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1163 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1165 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1166 GEN_INT ((val
- comp
) | ~mask
)));
1167 emit_insn (gen_adddi3 (dest
, subtarget
,
1168 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1171 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1173 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1175 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1176 GEN_INT (val
| ~mask
)));
1177 emit_insn (gen_adddi3 (dest
, subtarget
,
1178 GEN_INT (val
- (val
| ~mask
))));
1183 /* See if we can do it by arithmetically combining two
1185 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1190 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1191 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1193 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1194 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1195 GEN_INT (aarch64_bitmasks
[i
])));
1196 emit_insn (gen_adddi3 (dest
, subtarget
,
1197 GEN_INT (val
- aarch64_bitmasks
[i
])));
1201 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1203 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1205 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1206 GEN_INT (aarch64_bitmasks
[i
])));
1207 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1208 GEN_INT ((val
>> j
) & 0xffff)));
1214 /* See if we can do it by logically combining two immediates. */
1215 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1217 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1221 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1222 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1224 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1225 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1226 GEN_INT (aarch64_bitmasks
[i
])));
1227 emit_insn (gen_iordi3 (dest
, subtarget
,
1228 GEN_INT (aarch64_bitmasks
[j
])));
1232 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1236 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1237 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1240 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1241 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1242 GEN_INT (aarch64_bitmasks
[j
])));
1243 emit_insn (gen_anddi3 (dest
, subtarget
,
1244 GEN_INT (aarch64_bitmasks
[i
])));
1253 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1255 if ((val
& mask
) != 0)
1259 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1260 GEN_INT (val
& mask
)));
1264 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1265 GEN_INT ((val
>> i
) & 0xffff)));
1271 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1273 /* Indirect calls are not currently supported. */
1277 /* Cannot tail-call to long-calls, since these are outside of the
1278 range of a branch instruction (we could handle this if we added
1279 support for indirect tail-calls. */
1280 if (aarch64_decl_is_long_call_p (decl
))
1286 /* Implement TARGET_PASS_BY_REFERENCE. */
1289 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1290 enum machine_mode mode
,
1292 bool named ATTRIBUTE_UNUSED
)
1295 enum machine_mode dummymode
;
1298 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1299 size
= (mode
== BLKmode
&& type
)
1300 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1302 /* Aggregates are passed by reference based on their size. */
1303 if (type
&& AGGREGATE_TYPE_P (type
))
1305 size
= int_size_in_bytes (type
);
1308 /* Variable sized arguments are always returned by reference. */
1312 /* Can this be a candidate to be passed in fp/simd register(s)? */
1313 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1318 /* Arguments which are variable sized or larger than 2 registers are
1319 passed by reference unless they are a homogenous floating point
1321 return size
> 2 * UNITS_PER_WORD
;
1324 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1326 aarch64_return_in_msb (const_tree valtype
)
1328 enum machine_mode dummy_mode
;
1331 /* Never happens in little-endian mode. */
1332 if (!BYTES_BIG_ENDIAN
)
1335 /* Only composite types smaller than or equal to 16 bytes can
1336 be potentially returned in registers. */
1337 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1338 || int_size_in_bytes (valtype
) <= 0
1339 || int_size_in_bytes (valtype
) > 16)
1342 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1343 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1344 is always passed/returned in the least significant bits of fp/simd
1346 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1347 &dummy_mode
, &dummy_int
, NULL
))
1353 /* Implement TARGET_FUNCTION_VALUE.
1354 Define how to find the value returned by a function. */
1357 aarch64_function_value (const_tree type
, const_tree func
,
1358 bool outgoing ATTRIBUTE_UNUSED
)
1360 enum machine_mode mode
;
1363 enum machine_mode ag_mode
;
1365 mode
= TYPE_MODE (type
);
1366 if (INTEGRAL_TYPE_P (type
))
1367 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1369 if (aarch64_return_in_msb (type
))
1371 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1373 if (size
% UNITS_PER_WORD
!= 0)
1375 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1376 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1380 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1381 &ag_mode
, &count
, NULL
))
1383 if (!aarch64_composite_type_p (type
, mode
))
1385 gcc_assert (count
== 1 && mode
== ag_mode
);
1386 return gen_rtx_REG (mode
, V0_REGNUM
);
1393 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1394 for (i
= 0; i
< count
; i
++)
1396 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1397 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1398 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1399 XVECEXP (par
, 0, i
) = tmp
;
1405 return gen_rtx_REG (mode
, R0_REGNUM
);
1408 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1409 Return true if REGNO is the number of a hard register in which the values
1410 of called function may come back. */
1413 aarch64_function_value_regno_p (const unsigned int regno
)
1415 /* Maximum of 16 bytes can be returned in the general registers. Examples
1416 of 16-byte return values are: 128-bit integers and 16-byte small
1417 structures (excluding homogeneous floating-point aggregates). */
1418 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1421 /* Up to four fp/simd registers can return a function value, e.g. a
1422 homogeneous floating-point aggregate having four members. */
1423 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1424 return !TARGET_GENERAL_REGS_ONLY
;
1429 /* Implement TARGET_RETURN_IN_MEMORY.
1431 If the type T of the result of a function is such that
1433 would require that arg be passed as a value in a register (or set of
1434 registers) according to the parameter passing rules, then the result
1435 is returned in the same registers as would be used for such an
1439 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1442 enum machine_mode ag_mode
;
1445 if (!AGGREGATE_TYPE_P (type
)
1446 && TREE_CODE (type
) != COMPLEX_TYPE
1447 && TREE_CODE (type
) != VECTOR_TYPE
)
1448 /* Simple scalar types always returned in registers. */
1451 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1458 /* Types larger than 2 registers returned in memory. */
1459 size
= int_size_in_bytes (type
);
1460 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1464 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1465 const_tree type
, int *nregs
)
1467 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1468 return aarch64_vfp_is_call_or_return_candidate (mode
,
1470 &pcum
->aapcs_vfp_rmode
,
1475 /* Given MODE and TYPE of a function argument, return the alignment in
1476 bits. The idea is to suppress any stronger alignment requested by
1477 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1478 This is a helper function for local use only. */
1481 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1483 unsigned int alignment
;
1487 if (!integer_zerop (TYPE_SIZE (type
)))
1489 if (TYPE_MODE (type
) == mode
)
1490 alignment
= TYPE_ALIGN (type
);
1492 alignment
= GET_MODE_ALIGNMENT (mode
);
1498 alignment
= GET_MODE_ALIGNMENT (mode
);
1503 /* Layout a function argument according to the AAPCS64 rules. The rule
1504 numbers refer to the rule numbers in the AAPCS64. */
1507 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1509 bool named ATTRIBUTE_UNUSED
)
1511 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1512 int ncrn
, nvrn
, nregs
;
1513 bool allocate_ncrn
, allocate_nvrn
;
1515 /* We need to do this once per argument. */
1516 if (pcum
->aapcs_arg_processed
)
1519 pcum
->aapcs_arg_processed
= true;
1521 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1522 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1527 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1528 The following code thus handles passing by SIMD/FP registers first. */
1530 nvrn
= pcum
->aapcs_nvrn
;
1532 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1533 and homogenous short-vector aggregates (HVA). */
1536 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1538 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1539 if (!aarch64_composite_type_p (type
, mode
))
1541 gcc_assert (nregs
== 1);
1542 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1548 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1549 for (i
= 0; i
< nregs
; i
++)
1551 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1552 V0_REGNUM
+ nvrn
+ i
);
1553 tmp
= gen_rtx_EXPR_LIST
1555 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1556 XVECEXP (par
, 0, i
) = tmp
;
1558 pcum
->aapcs_reg
= par
;
1564 /* C.3 NSRN is set to 8. */
1565 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1570 ncrn
= pcum
->aapcs_ncrn
;
1571 nregs
= ((type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
))
1572 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1575 /* C6 - C9. though the sign and zero extension semantics are
1576 handled elsewhere. This is the case where the argument fits
1577 entirely general registers. */
1578 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1580 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1582 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1584 /* C.8 if the argument has an alignment of 16 then the NGRN is
1585 rounded up to the next even number. */
1586 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1589 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1591 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1592 A reg is still generated for it, but the caller should be smart
1593 enough not to use it. */
1594 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1596 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1603 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1604 for (i
= 0; i
< nregs
; i
++)
1606 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1607 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1608 GEN_INT (i
* UNITS_PER_WORD
));
1609 XVECEXP (par
, 0, i
) = tmp
;
1611 pcum
->aapcs_reg
= par
;
1614 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1619 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1621 /* The argument is passed on stack; record the needed number of words for
1622 this argument (we can re-use NREGS) and align the total size if
1625 pcum
->aapcs_stack_words
= nregs
;
1626 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1627 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1628 16 / UNITS_PER_WORD
) + 1;
1632 /* Implement TARGET_FUNCTION_ARG. */
1635 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1636 const_tree type
, bool named
)
1638 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1639 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1641 if (mode
== VOIDmode
)
1644 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1645 return pcum
->aapcs_reg
;
1649 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1650 const_tree fntype ATTRIBUTE_UNUSED
,
1651 rtx libname ATTRIBUTE_UNUSED
,
1652 const_tree fndecl ATTRIBUTE_UNUSED
,
1653 unsigned n_named ATTRIBUTE_UNUSED
)
1655 pcum
->aapcs_ncrn
= 0;
1656 pcum
->aapcs_nvrn
= 0;
1657 pcum
->aapcs_nextncrn
= 0;
1658 pcum
->aapcs_nextnvrn
= 0;
1659 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1660 pcum
->aapcs_reg
= NULL_RTX
;
1661 pcum
->aapcs_arg_processed
= false;
1662 pcum
->aapcs_stack_words
= 0;
1663 pcum
->aapcs_stack_size
= 0;
1669 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1670 enum machine_mode mode
,
1674 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1675 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1677 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1678 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1679 != (pcum
->aapcs_stack_words
!= 0));
1680 pcum
->aapcs_arg_processed
= false;
1681 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1682 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1683 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1684 pcum
->aapcs_stack_words
= 0;
1685 pcum
->aapcs_reg
= NULL_RTX
;
1690 aarch64_function_arg_regno_p (unsigned regno
)
1692 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1693 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1696 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1697 PARM_BOUNDARY bits of alignment, but will be given anything up
1698 to STACK_BOUNDARY bits if the type requires it. This makes sure
1699 that both before and after the layout of each argument, the Next
1700 Stacked Argument Address (NSAA) will have a minimum alignment of
1704 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1706 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1708 if (alignment
< PARM_BOUNDARY
)
1709 alignment
= PARM_BOUNDARY
;
1710 if (alignment
> STACK_BOUNDARY
)
1711 alignment
= STACK_BOUNDARY
;
1715 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1717 Return true if an argument passed on the stack should be padded upwards,
1718 i.e. if the least-significant byte of the stack slot has useful data.
1720 Small aggregate types are placed in the lowest memory address.
1722 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1725 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1727 /* On little-endian targets, the least significant byte of every stack
1728 argument is passed at the lowest byte address of the stack slot. */
1729 if (!BYTES_BIG_ENDIAN
)
1732 /* Otherwise, integral, floating-point and pointer types are padded downward:
1733 the least significant byte of a stack argument is passed at the highest
1734 byte address of the stack slot. */
1736 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1737 || POINTER_TYPE_P (type
))
1738 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1741 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1745 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1747 It specifies padding for the last (may also be the only)
1748 element of a block move between registers and memory. If
1749 assuming the block is in the memory, padding upward means that
1750 the last element is padded after its highest significant byte,
1751 while in downward padding, the last element is padded at the
1752 its least significant byte side.
1754 Small aggregates and small complex types are always padded
1757 We don't need to worry about homogeneous floating-point or
1758 short-vector aggregates; their move is not affected by the
1759 padding direction determined here. Regardless of endianness,
1760 each element of such an aggregate is put in the least
1761 significant bits of a fp/simd register.
1763 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1764 register has useful data, and return the opposite if the most
1765 significant byte does. */
1768 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1769 bool first ATTRIBUTE_UNUSED
)
1772 /* Small composite types are always padded upward. */
1773 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1775 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1776 : GET_MODE_SIZE (mode
));
1777 if (size
< 2 * UNITS_PER_WORD
)
1781 /* Otherwise, use the default padding. */
1782 return !BYTES_BIG_ENDIAN
;
1785 static enum machine_mode
1786 aarch64_libgcc_cmp_return_mode (void)
1792 aarch64_frame_pointer_required (void)
1794 /* If the function contains dynamic stack allocations, we need to
1795 use the frame pointer to access the static parts of the frame. */
1796 if (cfun
->calls_alloca
)
1799 /* In aarch64_override_options_after_change
1800 flag_omit_leaf_frame_pointer turns off the frame pointer by
1801 default. Turn it back on now if we've not got a leaf
1803 if (flag_omit_leaf_frame_pointer
1804 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
1810 /* Mark the registers that need to be saved by the callee and calculate
1811 the size of the callee-saved registers area and frame record (both FP
1812 and LR may be omitted). */
1814 aarch64_layout_frame (void)
1816 HOST_WIDE_INT offset
= 0;
1819 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1822 /* First mark all the registers that really need to be saved... */
1823 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1824 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1826 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1827 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1829 /* ... that includes the eh data registers (if needed)... */
1830 if (crtl
->calls_eh_return
)
1831 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1832 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1834 /* ... and any callee saved register that dataflow says is live. */
1835 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1836 if (df_regs_ever_live_p (regno
)
1837 && !call_used_regs
[regno
])
1838 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1840 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1841 if (df_regs_ever_live_p (regno
)
1842 && !call_used_regs
[regno
])
1843 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1845 if (frame_pointer_needed
)
1847 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1848 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1849 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1852 /* Now assign stack slots for them. */
1853 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1854 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1856 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1857 offset
+= UNITS_PER_WORD
;
1860 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1861 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1863 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1864 offset
+= UNITS_PER_WORD
;
1867 if (frame_pointer_needed
)
1869 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1870 offset
+= UNITS_PER_WORD
;
1873 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1875 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1876 offset
+= UNITS_PER_WORD
;
1879 cfun
->machine
->frame
.padding0
=
1880 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1881 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1883 cfun
->machine
->frame
.saved_regs_size
= offset
;
1884 cfun
->machine
->frame
.laid_out
= true;
1887 /* Make the last instruction frame-related and note that it performs
1888 the operation described by FRAME_PATTERN. */
1891 aarch64_set_frame_expr (rtx frame_pattern
)
1895 insn
= get_last_insn ();
1896 RTX_FRAME_RELATED_P (insn
) = 1;
1897 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1898 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1904 aarch64_register_saved_on_entry (int regno
)
1906 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1911 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1912 bool restore
, rtx base_rtx
)
1918 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
)
1919 = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1922 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1924 if (aarch64_register_saved_on_entry (regno
))
1927 mem
= gen_mem_ref (DFmode
,
1928 plus_constant (Pmode
,
1932 for (regno2
= regno
+ 1;
1933 regno2
<= V31_REGNUM
1934 && !aarch64_register_saved_on_entry (regno2
);
1939 if (regno2
<= V31_REGNUM
&&
1940 aarch64_register_saved_on_entry (regno2
))
1943 /* Next highest register to be saved. */
1944 mem2
= gen_mem_ref (DFmode
,
1948 start_offset
+ increment
));
1949 if (restore
== false)
1952 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1953 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1959 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1960 gen_rtx_REG (DFmode
, regno2
), mem2
));
1962 add_reg_note (insn
, REG_CFA_RESTORE
,
1963 gen_rtx_REG (DFmode
, regno
));
1964 add_reg_note (insn
, REG_CFA_RESTORE
,
1965 gen_rtx_REG (DFmode
, regno2
));
1968 /* The first part of a frame-related parallel insn
1969 is always assumed to be relevant to the frame
1970 calculations; subsequent parts, are only
1971 frame-related if explicitly marked. */
1972 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
1974 start_offset
+= increment
* 2;
1978 if (restore
== false)
1979 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1982 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1983 add_reg_note (insn
, REG_CFA_RESTORE
,
1984 gen_rtx_REG (DImode
, regno
));
1986 start_offset
+= increment
;
1988 RTX_FRAME_RELATED_P (insn
) = 1;
1995 /* offset from the stack pointer of where the saves and
1996 restore's have to happen. */
1998 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
2002 rtx base_rtx
= stack_pointer_rtx
;
2003 HOST_WIDE_INT start_offset
= offset
;
2004 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
2005 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
2006 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
2010 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
2012 if (aarch64_register_saved_on_entry (regno
))
2015 mem
= gen_mem_ref (Pmode
,
2016 plus_constant (Pmode
,
2020 for (regno2
= regno
+ 1;
2022 && !aarch64_register_saved_on_entry (regno2
);
2027 if (regno2
<= limit
&&
2028 aarch64_register_saved_on_entry (regno2
))
2031 /* Next highest register to be saved. */
2032 mem2
= gen_mem_ref (Pmode
,
2036 start_offset
+ increment
));
2037 if (restore
== false)
2040 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
2041 mem2
, gen_rtx_REG (DImode
, regno2
)));
2047 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
2048 gen_rtx_REG (DImode
, regno2
), mem2
));
2050 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
2051 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
2054 /* The first part of a frame-related parallel insn
2055 is always assumed to be relevant to the frame
2056 calculations; subsequent parts, are only
2057 frame-related if explicitly marked. */
2058 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
2061 start_offset
+= increment
* 2;
2065 if (restore
== false)
2066 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
2069 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
2070 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
2072 start_offset
+= increment
;
2074 RTX_FRAME_RELATED_P (insn
) = 1;
2078 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
2082 /* AArch64 stack frames generated by this compiler look like:
2084 +-------------------------------+
2086 | incoming stack arguments |
2088 +-------------------------------+ <-- arg_pointer_rtx
2090 | callee-allocated save area |
2091 | for register varargs |
2093 +-------------------------------+ <-- frame_pointer_rtx
2097 +-------------------------------+
2099 +-------------------------------+ |
2102 | callee-saved registers | | frame.saved_regs_size
2104 +-------------------------------+ |
2106 +-------------------------------+ |
2108 P +-------------------------------+ <-- hard_frame_pointer_rtx
2109 | dynamic allocation |
2110 +-------------------------------+
2112 | outgoing stack arguments |
2114 +-------------------------------+ <-- stack_pointer_rtx
2116 Dynamic stack allocations such as alloca insert data at point P.
2117 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2118 hard_frame_pointer_rtx unchanged. */
2120 /* Generate the prologue instructions for entry into a function.
2121 Establish the stack frame by decreasing the stack pointer with a
2122 properly calculated size and, if necessary, create a frame record
2123 filled with the values of LR and previous frame pointer. The
2124 current FP is also set up if it is in use. */
2127 aarch64_expand_prologue (void)
2129 /* sub sp, sp, #<frame_size>
2130 stp {fp, lr}, [sp, #<frame_size> - 16]
2131 add fp, sp, #<frame_size> - hardfp_offset
2132 stp {cs_reg}, [fp, #-16] etc.
2134 sub sp, sp, <final_adjustment_if_any>
2136 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
2137 HOST_WIDE_INT frame_size
, offset
;
2138 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
2141 aarch64_layout_frame ();
2142 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2143 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
2144 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
2145 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2146 + crtl
->outgoing_args_size
);
2147 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2148 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2150 if (flag_stack_usage_info
)
2151 current_function_static_stack_size
= frame_size
;
2154 - original_frame_size
2155 - cfun
->machine
->frame
.saved_regs_size
);
2157 /* Store pairs and load pairs have a range only -512 to 504. */
2160 /* When the frame has a large size, an initial decrease is done on
2161 the stack pointer to jump over the callee-allocated save area for
2162 register varargs, the local variable area and/or the callee-saved
2163 register area. This will allow the pre-index write-back
2164 store pair instructions to be used for setting up the stack frame
2166 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2168 offset
= cfun
->machine
->frame
.saved_regs_size
;
2170 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2173 if (frame_size
>= 0x1000000)
2175 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2176 emit_move_insn (op0
, GEN_INT (-frame_size
));
2177 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2178 aarch64_set_frame_expr (gen_rtx_SET
2179 (Pmode
, stack_pointer_rtx
,
2180 plus_constant (Pmode
,
2184 else if (frame_size
> 0)
2186 if ((frame_size
& 0xfff) != frame_size
)
2188 insn
= emit_insn (gen_add2_insn
2190 GEN_INT (-(frame_size
2191 & ~(HOST_WIDE_INT
)0xfff))));
2192 RTX_FRAME_RELATED_P (insn
) = 1;
2194 if ((frame_size
& 0xfff) != 0)
2196 insn
= emit_insn (gen_add2_insn
2198 GEN_INT (-(frame_size
2199 & (HOST_WIDE_INT
)0xfff))));
2200 RTX_FRAME_RELATED_P (insn
) = 1;
2209 /* Save the frame pointer and lr if the frame pointer is needed
2210 first. Make the frame pointer point to the location of the
2211 old frame pointer on the stack. */
2212 if (frame_pointer_needed
)
2218 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2219 GEN_INT (-offset
)));
2220 RTX_FRAME_RELATED_P (insn
) = 1;
2221 aarch64_set_frame_expr (gen_rtx_SET
2222 (Pmode
, stack_pointer_rtx
,
2223 gen_rtx_MINUS (Pmode
,
2225 GEN_INT (offset
))));
2226 mem_fp
= gen_frame_mem (DImode
,
2227 plus_constant (Pmode
,
2230 mem_lr
= gen_frame_mem (DImode
,
2231 plus_constant (Pmode
,
2235 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2236 hard_frame_pointer_rtx
,
2238 gen_rtx_REG (DImode
,
2243 insn
= emit_insn (gen_storewb_pairdi_di
2244 (stack_pointer_rtx
, stack_pointer_rtx
,
2245 hard_frame_pointer_rtx
,
2246 gen_rtx_REG (DImode
, LR_REGNUM
),
2248 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2249 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2252 /* The first part of a frame-related parallel insn is always
2253 assumed to be relevant to the frame calculations;
2254 subsequent parts, are only frame-related if explicitly
2256 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2257 RTX_FRAME_RELATED_P (insn
) = 1;
2259 /* Set up frame pointer to point to the location of the
2260 previous frame pointer on the stack. */
2261 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2263 GEN_INT (fp_offset
)));
2264 aarch64_set_frame_expr (gen_rtx_SET
2265 (Pmode
, hard_frame_pointer_rtx
,
2266 plus_constant (Pmode
,
2269 RTX_FRAME_RELATED_P (insn
) = 1;
2270 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2271 hard_frame_pointer_rtx
));
2275 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2276 GEN_INT (-offset
)));
2277 RTX_FRAME_RELATED_P (insn
) = 1;
2280 aarch64_save_or_restore_callee_save_registers
2281 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2284 /* when offset >= 512,
2285 sub sp, sp, #<outgoing_args_size> */
2286 if (frame_size
> -1)
2288 if (crtl
->outgoing_args_size
> 0)
2290 insn
= emit_insn (gen_add2_insn
2292 GEN_INT (- crtl
->outgoing_args_size
)));
2293 RTX_FRAME_RELATED_P (insn
) = 1;
2298 /* Generate the epilogue instructions for returning from a function. */
2300 aarch64_expand_epilogue (bool for_sibcall
)
2302 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2303 HOST_WIDE_INT fp_offset
;
2307 aarch64_layout_frame ();
2308 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2309 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2310 + crtl
->outgoing_args_size
);
2311 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2312 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2315 - original_frame_size
2316 - cfun
->machine
->frame
.saved_regs_size
);
2318 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2320 /* Store pairs and load pairs have a range only -512 to 504. */
2323 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2325 offset
= cfun
->machine
->frame
.saved_regs_size
;
2327 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2329 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2331 insn
= emit_insn (gen_add2_insn
2333 GEN_INT (crtl
->outgoing_args_size
)));
2334 RTX_FRAME_RELATED_P (insn
) = 1;
2340 /* If there were outgoing arguments or we've done dynamic stack
2341 allocation, then restore the stack pointer from the frame
2342 pointer. This is at most one insn and more efficient than using
2343 GCC's internal mechanism. */
2344 if (frame_pointer_needed
2345 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2347 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2348 hard_frame_pointer_rtx
,
2349 GEN_INT (- fp_offset
)));
2350 RTX_FRAME_RELATED_P (insn
) = 1;
2351 /* As SP is set to (FP - fp_offset), according to the rules in
2352 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2353 from the value of SP from now on. */
2354 cfa_reg
= stack_pointer_rtx
;
2357 aarch64_save_or_restore_callee_save_registers
2358 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2360 /* Restore the frame pointer and lr if the frame pointer is needed. */
2363 if (frame_pointer_needed
)
2369 mem_fp
= gen_frame_mem (DImode
,
2370 plus_constant (Pmode
,
2373 mem_lr
= gen_frame_mem (DImode
,
2374 plus_constant (Pmode
,
2378 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2380 gen_rtx_REG (DImode
,
2386 insn
= emit_insn (gen_loadwb_pairdi_di
2389 hard_frame_pointer_rtx
,
2390 gen_rtx_REG (DImode
, LR_REGNUM
),
2392 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2393 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2394 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2395 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2396 plus_constant (Pmode
, cfa_reg
,
2400 /* The first part of a frame-related parallel insn
2401 is always assumed to be relevant to the frame
2402 calculations; subsequent parts, are only
2403 frame-related if explicitly marked. */
2404 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2405 RTX_FRAME_RELATED_P (insn
) = 1;
2406 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2407 add_reg_note (insn
, REG_CFA_RESTORE
,
2408 gen_rtx_REG (DImode
, LR_REGNUM
));
2412 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2414 RTX_FRAME_RELATED_P (insn
) = 1;
2419 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2421 RTX_FRAME_RELATED_P (insn
) = 1;
2425 /* Stack adjustment for exception handler. */
2426 if (crtl
->calls_eh_return
)
2428 /* We need to unwind the stack by the offset computed by
2429 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2430 based on SP. Ideally we would update the SP and define the
2431 CFA along the lines of:
2433 SP = SP + EH_RETURN_STACKADJ_RTX
2434 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2436 However the dwarf emitter only understands a constant
2439 The solution chosen here is to use the otherwise unused IP0
2440 as a temporary register to hold the current SP value. The
2441 CFA is described using IP0 then SP is modified. */
2443 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2445 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2446 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2447 RTX_FRAME_RELATED_P (insn
) = 1;
2449 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2451 /* Ensure the assignment to IP0 does not get optimized away. */
2455 if (frame_size
> -1)
2457 if (frame_size
>= 0x1000000)
2459 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2460 emit_move_insn (op0
, GEN_INT (frame_size
));
2461 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2462 aarch64_set_frame_expr (gen_rtx_SET
2463 (Pmode
, stack_pointer_rtx
,
2464 plus_constant (Pmode
,
2468 else if (frame_size
> 0)
2470 if ((frame_size
& 0xfff) != 0)
2472 insn
= emit_insn (gen_add2_insn
2474 GEN_INT ((frame_size
2475 & (HOST_WIDE_INT
) 0xfff))));
2476 RTX_FRAME_RELATED_P (insn
) = 1;
2478 if ((frame_size
& 0xfff) != frame_size
)
2480 insn
= emit_insn (gen_add2_insn
2482 GEN_INT ((frame_size
2483 & ~ (HOST_WIDE_INT
) 0xfff))));
2484 RTX_FRAME_RELATED_P (insn
) = 1;
2488 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2489 plus_constant (Pmode
,
2494 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2496 emit_jump_insn (ret_rtx
);
2499 /* Return the place to copy the exception unwinding return address to.
2500 This will probably be a stack slot, but could (in theory be the
2501 return register). */
2503 aarch64_final_eh_return_addr (void)
2505 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2506 aarch64_layout_frame ();
2507 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2508 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2509 + crtl
->outgoing_args_size
);
2510 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2511 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2513 - original_frame_size
2514 - cfun
->machine
->frame
.saved_regs_size
;
2516 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2517 return gen_rtx_REG (DImode
, LR_REGNUM
);
2519 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2520 result in a store to save LR introduced by builtin_eh_return () being
2521 incorrectly deleted because the alias is not detected.
2522 So in the calculation of the address to copy the exception unwinding
2523 return address to, we note 2 cases.
2524 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2525 we return a SP-relative location since all the addresses are SP-relative
2526 in this case. This prevents the store from being optimized away.
2527 If the fp_offset is not 0, then the addresses will be FP-relative and
2528 therefore we return a FP-relative location. */
2530 if (frame_pointer_needed
)
2533 return gen_frame_mem (DImode
,
2534 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2536 return gen_frame_mem (DImode
,
2537 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2540 /* If FP is not needed, we calculate the location of LR, which would be
2541 at the top of the saved registers block. */
2543 return gen_frame_mem (DImode
,
2544 plus_constant (Pmode
,
2547 + cfun
->machine
->frame
.saved_regs_size
2548 - 2 * UNITS_PER_WORD
));
2551 /* Possibly output code to build up a constant in a register. For
2552 the benefit of the costs infrastructure, returns the number of
2553 instructions which would be emitted. GENERATE inhibits or
2554 enables code generation. */
2557 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2561 if (aarch64_bitmask_imm (val
, DImode
))
2564 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2572 HOST_WIDE_INT valp
= val
>> 16;
2576 for (i
= 16; i
< 64; i
+= 16)
2578 valm
= (valp
& 0xffff);
2589 /* zcount contains the number of additional MOVK instructions
2590 required if the constant is built up with an initial MOVZ instruction,
2591 while ncount is the number of MOVK instructions required if starting
2592 with a MOVN instruction. Choose the sequence that yields the fewest
2593 number of instructions, preferring MOVZ instructions when they are both
2595 if (ncount
< zcount
)
2598 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2599 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2606 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2607 GEN_INT (val
& 0xffff));
2614 for (i
= 16; i
< 64; i
+= 16)
2616 if ((val
& 0xffff) != tval
)
2619 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2621 GEN_INT (val
& 0xffff)));
2631 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2633 HOST_WIDE_INT mdelta
= delta
;
2634 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2635 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2640 if (mdelta
>= 4096 * 4096)
2642 (void) aarch64_build_constant (scratchreg
, delta
, true);
2643 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2645 else if (mdelta
> 0)
2649 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2650 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2652 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2653 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2655 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2656 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2658 if (mdelta
% 4096 != 0)
2660 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2661 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2662 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2667 /* Output code to add DELTA to the first argument, and then jump
2668 to FUNCTION. Used for C++ multiple inheritance. */
2670 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2671 HOST_WIDE_INT delta
,
2672 HOST_WIDE_INT vcall_offset
,
2675 /* The this pointer is always in x0. Note that this differs from
2676 Arm where the this pointer maybe bumped to r1 if r0 is required
2677 to return a pointer to an aggregate. On AArch64 a result value
2678 pointer will be in x8. */
2679 int this_regno
= R0_REGNUM
;
2680 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2682 reload_completed
= 1;
2683 emit_note (NOTE_INSN_PROLOGUE_END
);
2685 if (vcall_offset
== 0)
2686 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2689 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2691 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2692 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2693 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2698 if (delta
>= -256 && delta
< 256)
2699 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2700 plus_constant (Pmode
, this_rtx
, delta
));
2702 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2705 if (Pmode
== ptr_mode
)
2706 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2708 aarch64_emit_move (temp0
,
2709 gen_rtx_ZERO_EXTEND (Pmode
,
2710 gen_rtx_MEM (ptr_mode
, addr
)));
2712 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2713 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2716 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2717 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2720 if (Pmode
== ptr_mode
)
2721 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2723 aarch64_emit_move (temp1
,
2724 gen_rtx_SIGN_EXTEND (Pmode
,
2725 gen_rtx_MEM (ptr_mode
, addr
)));
2727 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2730 /* Generate a tail call to the target function. */
2731 if (!TREE_USED (function
))
2733 assemble_external (function
);
2734 TREE_USED (function
) = 1;
2736 funexp
= XEXP (DECL_RTL (function
), 0);
2737 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2738 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2739 SIBLING_CALL_P (insn
) = 1;
2741 insn
= get_insns ();
2742 shorten_branches (insn
);
2743 final_start_function (insn
, file
, 1);
2744 final (insn
, file
, 1);
2745 final_end_function ();
2747 /* Stop pretending to be a post-reload pass. */
2748 reload_completed
= 0;
2752 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2754 if (GET_CODE (*x
) == SYMBOL_REF
)
2755 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2757 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2758 TLS offsets, not real symbol references. */
2759 if (GET_CODE (*x
) == UNSPEC
2760 && XINT (*x
, 1) == UNSPEC_TLS
)
2767 aarch64_tls_referenced_p (rtx x
)
2769 if (!TARGET_HAVE_TLS
)
2772 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2777 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2779 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2780 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2791 aarch64_build_bitmask_table (void)
2793 unsigned HOST_WIDE_INT mask
, imm
;
2794 unsigned int log_e
, e
, s
, r
;
2795 unsigned int nimms
= 0;
2797 for (log_e
= 1; log_e
<= 6; log_e
++)
2801 mask
= ~(HOST_WIDE_INT
) 0;
2803 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2804 for (s
= 1; s
< e
; s
++)
2806 for (r
= 0; r
< e
; r
++)
2808 /* set s consecutive bits to 1 (s < 64) */
2809 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2810 /* rotate right by r */
2812 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2813 /* replicate the constant depending on SIMD size */
2815 case 1: imm
|= (imm
<< 2);
2816 case 2: imm
|= (imm
<< 4);
2817 case 3: imm
|= (imm
<< 8);
2818 case 4: imm
|= (imm
<< 16);
2819 case 5: imm
|= (imm
<< 32);
2825 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2826 aarch64_bitmasks
[nimms
++] = imm
;
2831 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2832 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2833 aarch64_bitmasks_cmp
);
2837 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2838 a left shift of 0 or 12 bits. */
2840 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2842 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2843 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2848 /* Return true if val is an immediate that can be loaded into a
2849 register by a MOVZ instruction. */
2851 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2853 if (GET_MODE_SIZE (mode
) > 4)
2855 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2856 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2861 /* Ignore sign extension. */
2862 val
&= (HOST_WIDE_INT
) 0xffffffff;
2864 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2865 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2869 /* Return true if val is a valid bitmask immediate. */
2871 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2873 if (GET_MODE_SIZE (mode
) < 8)
2875 /* Replicate bit pattern. */
2876 val
&= (HOST_WIDE_INT
) 0xffffffff;
2879 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2880 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2884 /* Return true if val is an immediate that can be loaded into a
2885 register in a single instruction. */
2887 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2889 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2891 return aarch64_bitmask_imm (val
, mode
);
2895 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2899 if (GET_CODE (x
) == HIGH
)
2902 split_const (x
, &base
, &offset
);
2903 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2905 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2906 != SYMBOL_FORCE_TO_MEM
)
2909 /* Avoid generating a 64-bit relocation in ILP32; leave
2910 to aarch64_expand_mov_immediate to handle it properly. */
2911 return mode
!= ptr_mode
;
2914 return aarch64_tls_referenced_p (x
);
2917 /* Return true if register REGNO is a valid index register.
2918 STRICT_P is true if REG_OK_STRICT is in effect. */
2921 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2923 if (!HARD_REGISTER_NUM_P (regno
))
2931 regno
= reg_renumber
[regno
];
2933 return GP_REGNUM_P (regno
);
2936 /* Return true if register REGNO is a valid base register for mode MODE.
2937 STRICT_P is true if REG_OK_STRICT is in effect. */
2940 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2942 if (!HARD_REGISTER_NUM_P (regno
))
2950 regno
= reg_renumber
[regno
];
2953 /* The fake registers will be eliminated to either the stack or
2954 hard frame pointer, both of which are usually valid base registers.
2955 Reload deals with the cases where the eliminated form isn't valid. */
2956 return (GP_REGNUM_P (regno
)
2957 || regno
== SP_REGNUM
2958 || regno
== FRAME_POINTER_REGNUM
2959 || regno
== ARG_POINTER_REGNUM
);
2962 /* Return true if X is a valid base register for mode MODE.
2963 STRICT_P is true if REG_OK_STRICT is in effect. */
2966 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2968 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2971 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2974 /* Return true if address offset is a valid index. If it is, fill in INFO
2975 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2978 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2979 enum machine_mode mode
, bool strict_p
)
2981 enum aarch64_address_type type
;
2986 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2987 && GET_MODE (x
) == Pmode
)
2989 type
= ADDRESS_REG_REG
;
2993 /* (sign_extend:DI (reg:SI)) */
2994 else if ((GET_CODE (x
) == SIGN_EXTEND
2995 || GET_CODE (x
) == ZERO_EXTEND
)
2996 && GET_MODE (x
) == DImode
2997 && GET_MODE (XEXP (x
, 0)) == SImode
)
2999 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3000 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3001 index
= XEXP (x
, 0);
3004 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3005 else if (GET_CODE (x
) == MULT
3006 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3007 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3008 && GET_MODE (XEXP (x
, 0)) == DImode
3009 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3010 && CONST_INT_P (XEXP (x
, 1)))
3012 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3013 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3014 index
= XEXP (XEXP (x
, 0), 0);
3015 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3017 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3018 else if (GET_CODE (x
) == ASHIFT
3019 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3020 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3021 && GET_MODE (XEXP (x
, 0)) == DImode
3022 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3023 && CONST_INT_P (XEXP (x
, 1)))
3025 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3026 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3027 index
= XEXP (XEXP (x
, 0), 0);
3028 shift
= INTVAL (XEXP (x
, 1));
3030 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3031 else if ((GET_CODE (x
) == SIGN_EXTRACT
3032 || GET_CODE (x
) == ZERO_EXTRACT
)
3033 && GET_MODE (x
) == DImode
3034 && GET_CODE (XEXP (x
, 0)) == MULT
3035 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3036 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3038 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3039 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3040 index
= XEXP (XEXP (x
, 0), 0);
3041 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3042 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3043 || INTVAL (XEXP (x
, 2)) != 0)
3046 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3047 (const_int 0xffffffff<<shift)) */
3048 else if (GET_CODE (x
) == AND
3049 && GET_MODE (x
) == DImode
3050 && GET_CODE (XEXP (x
, 0)) == MULT
3051 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3052 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3053 && CONST_INT_P (XEXP (x
, 1)))
3055 type
= ADDRESS_REG_UXTW
;
3056 index
= XEXP (XEXP (x
, 0), 0);
3057 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3058 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3061 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3062 else if ((GET_CODE (x
) == SIGN_EXTRACT
3063 || GET_CODE (x
) == ZERO_EXTRACT
)
3064 && GET_MODE (x
) == DImode
3065 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3066 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3067 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3069 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3070 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3071 index
= XEXP (XEXP (x
, 0), 0);
3072 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3073 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3074 || INTVAL (XEXP (x
, 2)) != 0)
3077 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3078 (const_int 0xffffffff<<shift)) */
3079 else if (GET_CODE (x
) == AND
3080 && GET_MODE (x
) == DImode
3081 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3082 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3083 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3084 && CONST_INT_P (XEXP (x
, 1)))
3086 type
= ADDRESS_REG_UXTW
;
3087 index
= XEXP (XEXP (x
, 0), 0);
3088 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3089 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3092 /* (mult:P (reg:P) (const_int scale)) */
3093 else if (GET_CODE (x
) == MULT
3094 && GET_MODE (x
) == Pmode
3095 && GET_MODE (XEXP (x
, 0)) == Pmode
3096 && CONST_INT_P (XEXP (x
, 1)))
3098 type
= ADDRESS_REG_REG
;
3099 index
= XEXP (x
, 0);
3100 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3102 /* (ashift:P (reg:P) (const_int shift)) */
3103 else if (GET_CODE (x
) == ASHIFT
3104 && GET_MODE (x
) == Pmode
3105 && GET_MODE (XEXP (x
, 0)) == Pmode
3106 && CONST_INT_P (XEXP (x
, 1)))
3108 type
= ADDRESS_REG_REG
;
3109 index
= XEXP (x
, 0);
3110 shift
= INTVAL (XEXP (x
, 1));
3115 if (GET_CODE (index
) == SUBREG
)
3116 index
= SUBREG_REG (index
);
3119 (shift
> 0 && shift
<= 3
3120 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3122 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3125 info
->offset
= index
;
3126 info
->shift
= shift
;
3134 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3136 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3137 && offset
< 64 * GET_MODE_SIZE (mode
)
3138 && offset
% GET_MODE_SIZE (mode
) == 0);
3142 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3143 HOST_WIDE_INT offset
)
3145 return offset
>= -256 && offset
< 256;
3149 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3152 && offset
< 4096 * GET_MODE_SIZE (mode
)
3153 && offset
% GET_MODE_SIZE (mode
) == 0);
3156 /* Return true if X is a valid address for machine mode MODE. If it is,
3157 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3158 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3161 aarch64_classify_address (struct aarch64_address_info
*info
,
3162 rtx x
, enum machine_mode mode
,
3163 RTX_CODE outer_code
, bool strict_p
)
3165 enum rtx_code code
= GET_CODE (x
);
3167 bool allow_reg_index_p
=
3168 outer_code
!= PARALLEL
&& GET_MODE_SIZE(mode
) != 16;
3170 /* Don't support anything other than POST_INC or REG addressing for
3172 if (aarch64_vector_mode_p (mode
)
3173 && (code
!= POST_INC
&& code
!= REG
))
3180 info
->type
= ADDRESS_REG_IMM
;
3182 info
->offset
= const0_rtx
;
3183 return aarch64_base_register_rtx_p (x
, strict_p
);
3188 if (GET_MODE_SIZE (mode
) != 0
3189 && CONST_INT_P (op1
)
3190 && aarch64_base_register_rtx_p (op0
, strict_p
))
3192 HOST_WIDE_INT offset
= INTVAL (op1
);
3194 info
->type
= ADDRESS_REG_IMM
;
3198 /* TImode and TFmode values are allowed in both pairs of X
3199 registers and individual Q registers. The available
3201 X,X: 7-bit signed scaled offset
3202 Q: 9-bit signed offset
3203 We conservatively require an offset representable in either mode.
3205 if (mode
== TImode
|| mode
== TFmode
)
3206 return (offset_7bit_signed_scaled_p (mode
, offset
)
3207 && offset_9bit_signed_unscaled_p (mode
, offset
));
3209 if (outer_code
== PARALLEL
)
3210 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3211 && offset_7bit_signed_scaled_p (mode
, offset
));
3213 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3214 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3217 if (allow_reg_index_p
)
3219 /* Look for base + (scaled/extended) index register. */
3220 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3221 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3226 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3227 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3240 info
->type
= ADDRESS_REG_WB
;
3241 info
->base
= XEXP (x
, 0);
3242 info
->offset
= NULL_RTX
;
3243 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3247 info
->type
= ADDRESS_REG_WB
;
3248 info
->base
= XEXP (x
, 0);
3249 if (GET_CODE (XEXP (x
, 1)) == PLUS
3250 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3251 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3252 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3254 HOST_WIDE_INT offset
;
3255 info
->offset
= XEXP (XEXP (x
, 1), 1);
3256 offset
= INTVAL (info
->offset
);
3258 /* TImode and TFmode values are allowed in both pairs of X
3259 registers and individual Q registers. The available
3261 X,X: 7-bit signed scaled offset
3262 Q: 9-bit signed offset
3263 We conservatively require an offset representable in either mode.
3265 if (mode
== TImode
|| mode
== TFmode
)
3266 return (offset_7bit_signed_scaled_p (mode
, offset
)
3267 && offset_9bit_signed_unscaled_p (mode
, offset
));
3269 if (outer_code
== PARALLEL
)
3270 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3271 && offset_7bit_signed_scaled_p (mode
, offset
));
3273 return offset_9bit_signed_unscaled_p (mode
, offset
);
3280 /* load literal: pc-relative constant pool entry. Only supported
3281 for SI mode or larger. */
3282 info
->type
= ADDRESS_SYMBOLIC
;
3283 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3287 split_const (x
, &sym
, &addend
);
3288 return (GET_CODE (sym
) == LABEL_REF
3289 || (GET_CODE (sym
) == SYMBOL_REF
3290 && CONSTANT_POOL_ADDRESS_P (sym
)));
3295 info
->type
= ADDRESS_LO_SUM
;
3296 info
->base
= XEXP (x
, 0);
3297 info
->offset
= XEXP (x
, 1);
3298 if (allow_reg_index_p
3299 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3302 split_const (info
->offset
, &sym
, &offs
);
3303 if (GET_CODE (sym
) == SYMBOL_REF
3304 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3305 == SYMBOL_SMALL_ABSOLUTE
))
3307 /* The symbol and offset must be aligned to the access size. */
3309 unsigned int ref_size
;
3311 if (CONSTANT_POOL_ADDRESS_P (sym
))
3312 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3313 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3315 tree exp
= SYMBOL_REF_DECL (sym
);
3316 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3317 align
= CONSTANT_ALIGNMENT (exp
, align
);
3319 else if (SYMBOL_REF_DECL (sym
))
3320 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3321 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3322 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3323 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3325 align
= BITS_PER_UNIT
;
3327 ref_size
= GET_MODE_SIZE (mode
);
3329 ref_size
= GET_MODE_SIZE (DImode
);
3331 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3332 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3343 aarch64_symbolic_address_p (rtx x
)
3347 split_const (x
, &x
, &offset
);
3348 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3351 /* Classify the base of symbolic expression X, given that X appears in
3354 enum aarch64_symbol_type
3355 aarch64_classify_symbolic_expression (rtx x
,
3356 enum aarch64_symbol_context context
)
3360 split_const (x
, &x
, &offset
);
3361 return aarch64_classify_symbol (x
, context
);
3365 /* Return TRUE if X is a legitimate address for accessing memory in
3368 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3370 struct aarch64_address_info addr
;
3372 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3375 /* Return TRUE if X is a legitimate address for accessing memory in
3376 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3379 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3380 RTX_CODE outer_code
, bool strict_p
)
3382 struct aarch64_address_info addr
;
3384 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3387 /* Return TRUE if rtx X is immediate constant 0.0 */
3389 aarch64_float_const_zero_rtx_p (rtx x
)
3393 if (GET_MODE (x
) == VOIDmode
)
3396 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3397 if (REAL_VALUE_MINUS_ZERO (r
))
3398 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3399 return REAL_VALUES_EQUAL (r
, dconst0
);
3402 /* Return the fixed registers used for condition codes. */
3405 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3408 *p2
= INVALID_REGNUM
;
3413 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3415 /* All floating point compares return CCFP if it is an equality
3416 comparison, and CCFPE otherwise. */
3417 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3444 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3446 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3447 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3448 || GET_CODE (x
) == NEG
))
3451 /* A compare with a shifted operand. Because of canonicalization,
3452 the comparison will have to be swapped when we emit the assembly
3454 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3455 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3456 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3457 || GET_CODE (x
) == LSHIFTRT
3458 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3461 /* Similarly for a negated operand, but we can only do this for
3463 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3464 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3465 && (code
== EQ
|| code
== NE
)
3466 && GET_CODE (x
) == NEG
)
3469 /* A compare of a mode narrower than SI mode against zero can be done
3470 by extending the value in the comparison. */
3471 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3473 /* Only use sign-extension if we really need it. */
3474 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3475 ? CC_SESWPmode
: CC_ZESWPmode
);
3477 /* For everything else, return CCmode. */
3482 aarch64_get_condition_code (rtx x
)
3484 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3485 enum rtx_code comp_code
= GET_CODE (x
);
3487 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3488 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3496 case GE
: return AARCH64_GE
;
3497 case GT
: return AARCH64_GT
;
3498 case LE
: return AARCH64_LS
;
3499 case LT
: return AARCH64_MI
;
3500 case NE
: return AARCH64_NE
;
3501 case EQ
: return AARCH64_EQ
;
3502 case ORDERED
: return AARCH64_VC
;
3503 case UNORDERED
: return AARCH64_VS
;
3504 case UNLT
: return AARCH64_LT
;
3505 case UNLE
: return AARCH64_LE
;
3506 case UNGT
: return AARCH64_HI
;
3507 case UNGE
: return AARCH64_PL
;
3508 default: gcc_unreachable ();
3515 case NE
: return AARCH64_NE
;
3516 case EQ
: return AARCH64_EQ
;
3517 case GE
: return AARCH64_GE
;
3518 case GT
: return AARCH64_GT
;
3519 case LE
: return AARCH64_LE
;
3520 case LT
: return AARCH64_LT
;
3521 case GEU
: return AARCH64_CS
;
3522 case GTU
: return AARCH64_HI
;
3523 case LEU
: return AARCH64_LS
;
3524 case LTU
: return AARCH64_CC
;
3525 default: gcc_unreachable ();
3534 case NE
: return AARCH64_NE
;
3535 case EQ
: return AARCH64_EQ
;
3536 case GE
: return AARCH64_LE
;
3537 case GT
: return AARCH64_LT
;
3538 case LE
: return AARCH64_GE
;
3539 case LT
: return AARCH64_GT
;
3540 case GEU
: return AARCH64_LS
;
3541 case GTU
: return AARCH64_CC
;
3542 case LEU
: return AARCH64_CS
;
3543 case LTU
: return AARCH64_HI
;
3544 default: gcc_unreachable ();
3551 case NE
: return AARCH64_NE
;
3552 case EQ
: return AARCH64_EQ
;
3553 case GE
: return AARCH64_PL
;
3554 case LT
: return AARCH64_MI
;
3555 default: gcc_unreachable ();
3562 case NE
: return AARCH64_NE
;
3563 case EQ
: return AARCH64_EQ
;
3564 default: gcc_unreachable ();
3575 bit_count (unsigned HOST_WIDE_INT value
)
3589 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3593 /* An integer or symbol address without a preceding # sign. */
3595 switch (GET_CODE (x
))
3598 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3602 output_addr_const (f
, x
);
3606 if (GET_CODE (XEXP (x
, 0)) == PLUS
3607 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3609 output_addr_const (f
, x
);
3615 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3620 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3624 if (GET_CODE (x
) != CONST_INT
3625 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3627 output_operand_lossage ("invalid operand for '%%%c'", code
);
3643 output_operand_lossage ("invalid operand for '%%%c'", code
);
3653 /* Print N such that 2^N == X. */
3654 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3656 output_operand_lossage ("invalid operand for '%%%c'", code
);
3660 asm_fprintf (f
, "%d", n
);
3665 /* Print the number of non-zero bits in X (a const_int). */
3666 if (GET_CODE (x
) != CONST_INT
)
3668 output_operand_lossage ("invalid operand for '%%%c'", code
);
3672 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3676 /* Print the higher numbered register of a pair (TImode) of regs. */
3677 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3679 output_operand_lossage ("invalid operand for '%%%c'", code
);
3683 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3687 /* Print a condition (eq, ne, etc). */
3689 /* CONST_TRUE_RTX means always -- that's the default. */
3690 if (x
== const_true_rtx
)
3693 if (!COMPARISON_P (x
))
3695 output_operand_lossage ("invalid operand for '%%%c'", code
);
3699 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3703 /* Print the inverse of a condition (eq <-> ne, etc). */
3705 /* CONST_TRUE_RTX means never -- that's the default. */
3706 if (x
== const_true_rtx
)
3712 if (!COMPARISON_P (x
))
3714 output_operand_lossage ("invalid operand for '%%%c'", code
);
3718 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3719 (aarch64_get_condition_code (x
))], f
);
3727 /* Print a scalar FP/SIMD register name. */
3728 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3730 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3733 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3740 /* Print the first FP/SIMD register name in a list. */
3741 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3743 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3746 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3750 /* Print bottom 16 bits of integer constant in hex. */
3751 if (GET_CODE (x
) != CONST_INT
)
3753 output_operand_lossage ("invalid operand for '%%%c'", code
);
3756 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3761 /* Print a general register name or the zero register (32-bit or
3764 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3766 asm_fprintf (f
, "%czr", code
);
3770 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3772 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3776 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3778 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3785 /* Print a normal operand, if it's a general register, then we
3789 output_operand_lossage ("missing operand");
3793 switch (GET_CODE (x
))
3796 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3800 aarch64_memory_reference_mode
= GET_MODE (x
);
3801 output_address (XEXP (x
, 0));
3806 output_addr_const (asm_out_file
, x
);
3810 asm_fprintf (f
, "%wd", INTVAL (x
));
3814 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3816 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3818 HOST_WIDE_INT_MAX
));
3819 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3821 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3830 /* CONST_DOUBLE can represent a double-width integer.
3831 In this case, the mode of x is VOIDmode. */
3832 if (GET_MODE (x
) == VOIDmode
)
3834 else if (aarch64_float_const_zero_rtx_p (x
))
3839 else if (aarch64_float_const_representable_p (x
))
3842 char float_buf
[buf_size
] = {'\0'};
3844 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3845 real_to_decimal_for_mode (float_buf
, &r
,
3848 asm_fprintf (asm_out_file
, "%s", float_buf
);
3852 output_operand_lossage ("invalid constant");
3855 output_operand_lossage ("invalid operand");
3861 if (GET_CODE (x
) == HIGH
)
3864 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3866 case SYMBOL_SMALL_GOT
:
3867 asm_fprintf (asm_out_file
, ":got:");
3870 case SYMBOL_SMALL_TLSGD
:
3871 asm_fprintf (asm_out_file
, ":tlsgd:");
3874 case SYMBOL_SMALL_TLSDESC
:
3875 asm_fprintf (asm_out_file
, ":tlsdesc:");
3878 case SYMBOL_SMALL_GOTTPREL
:
3879 asm_fprintf (asm_out_file
, ":gottprel:");
3882 case SYMBOL_SMALL_TPREL
:
3883 asm_fprintf (asm_out_file
, ":tprel:");
3886 case SYMBOL_TINY_GOT
:
3893 output_addr_const (asm_out_file
, x
);
3897 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3899 case SYMBOL_SMALL_GOT
:
3900 asm_fprintf (asm_out_file
, ":lo12:");
3903 case SYMBOL_SMALL_TLSGD
:
3904 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3907 case SYMBOL_SMALL_TLSDESC
:
3908 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3911 case SYMBOL_SMALL_GOTTPREL
:
3912 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3915 case SYMBOL_SMALL_TPREL
:
3916 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3919 case SYMBOL_TINY_GOT
:
3920 asm_fprintf (asm_out_file
, ":got:");
3926 output_addr_const (asm_out_file
, x
);
3931 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3933 case SYMBOL_SMALL_TPREL
:
3934 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3939 output_addr_const (asm_out_file
, x
);
3943 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3949 aarch64_print_operand_address (FILE *f
, rtx x
)
3951 struct aarch64_address_info addr
;
3953 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3957 case ADDRESS_REG_IMM
:
3958 if (addr
.offset
== const0_rtx
)
3959 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3961 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
3962 INTVAL (addr
.offset
));
3965 case ADDRESS_REG_REG
:
3966 if (addr
.shift
== 0)
3967 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
3968 reg_names
[REGNO (addr
.offset
)]);
3970 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
3971 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3974 case ADDRESS_REG_UXTW
:
3975 if (addr
.shift
== 0)
3976 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
3977 REGNO (addr
.offset
) - R0_REGNUM
);
3979 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
3980 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3983 case ADDRESS_REG_SXTW
:
3984 if (addr
.shift
== 0)
3985 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
3986 REGNO (addr
.offset
) - R0_REGNUM
);
3988 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
3989 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3992 case ADDRESS_REG_WB
:
3993 switch (GET_CODE (x
))
3996 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
3997 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4000 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4001 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4004 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4005 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4008 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4009 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4012 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4013 INTVAL (addr
.offset
));
4016 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4017 INTVAL (addr
.offset
));
4024 case ADDRESS_LO_SUM
:
4025 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4026 output_addr_const (f
, addr
.offset
);
4027 asm_fprintf (f
, "]");
4030 case ADDRESS_SYMBOLIC
:
4034 output_addr_const (f
, x
);
4038 aarch64_label_mentioned_p (rtx x
)
4043 if (GET_CODE (x
) == LABEL_REF
)
4046 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4047 referencing instruction, but they are constant offsets, not
4049 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4052 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4053 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4059 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4060 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4063 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4070 /* Implement REGNO_REG_CLASS. */
4073 aarch64_regno_regclass (unsigned regno
)
4075 if (GP_REGNUM_P (regno
))
4078 if (regno
== SP_REGNUM
)
4081 if (regno
== FRAME_POINTER_REGNUM
4082 || regno
== ARG_POINTER_REGNUM
)
4083 return POINTER_REGS
;
4085 if (FP_REGNUM_P (regno
))
4086 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4091 /* Try a machine-dependent way of reloading an illegitimate address
4092 operand. If we find one, push the reload and return the new rtx. */
4095 aarch64_legitimize_reload_address (rtx
*x_p
,
4096 enum machine_mode mode
,
4097 int opnum
, int type
,
4098 int ind_levels ATTRIBUTE_UNUSED
)
4102 /* Do not allow mem (plus (reg, const)) if vector mode. */
4103 if (aarch64_vector_mode_p (mode
)
4104 && GET_CODE (x
) == PLUS
4105 && REG_P (XEXP (x
, 0))
4106 && CONST_INT_P (XEXP (x
, 1)))
4110 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4111 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4112 opnum
, (enum reload_type
) type
);
4116 /* We must recognize output that we have already generated ourselves. */
4117 if (GET_CODE (x
) == PLUS
4118 && GET_CODE (XEXP (x
, 0)) == PLUS
4119 && REG_P (XEXP (XEXP (x
, 0), 0))
4120 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4121 && CONST_INT_P (XEXP (x
, 1)))
4123 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4124 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4125 opnum
, (enum reload_type
) type
);
4129 /* We wish to handle large displacements off a base register by splitting
4130 the addend across an add and the mem insn. This can cut the number of
4131 extra insns needed from 3 to 1. It is only useful for load/store of a
4132 single register with 12 bit offset field. */
4133 if (GET_CODE (x
) == PLUS
4134 && REG_P (XEXP (x
, 0))
4135 && CONST_INT_P (XEXP (x
, 1))
4136 && HARD_REGISTER_P (XEXP (x
, 0))
4139 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4141 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4142 HOST_WIDE_INT low
= val
& 0xfff;
4143 HOST_WIDE_INT high
= val
- low
;
4146 enum machine_mode xmode
= GET_MODE (x
);
4148 /* In ILP32, xmode can be either DImode or SImode. */
4149 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4151 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4152 BLKmode alignment. */
4153 if (GET_MODE_SIZE (mode
) == 0)
4156 offs
= low
% GET_MODE_SIZE (mode
);
4158 /* Align misaligned offset by adjusting high part to compensate. */
4161 if (aarch64_uimm12_shift (high
+ offs
))
4170 offs
= GET_MODE_SIZE (mode
) - offs
;
4172 high
= high
+ (low
& 0x1000) - offs
;
4177 /* Check for overflow. */
4178 if (high
+ low
!= val
)
4181 cst
= GEN_INT (high
);
4182 if (!aarch64_uimm12_shift (high
))
4183 cst
= force_const_mem (xmode
, cst
);
4185 /* Reload high part into base reg, leaving the low part
4186 in the mem instruction.
4187 Note that replacing this gen_rtx_PLUS with plus_constant is
4188 wrong in this case because we rely on the
4189 (plus (plus reg c1) c2) structure being preserved so that
4190 XEXP (*p, 0) in push_reload below uses the correct term. */
4191 x
= gen_rtx_PLUS (xmode
,
4192 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4195 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4196 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4197 opnum
, (enum reload_type
) type
);
4206 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4208 enum machine_mode mode
,
4209 secondary_reload_info
*sri
)
4211 /* Without the TARGET_SIMD instructions we cannot move a Q register
4212 to a Q register directly. We need a scratch. */
4213 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4214 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4215 && reg_class_subset_p (rclass
, FP_REGS
))
4218 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4219 else if (mode
== TImode
)
4220 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4224 /* A TFmode or TImode memory access should be handled via an FP_REGS
4225 because AArch64 has richer addressing modes for LDR/STR instructions
4226 than LDP/STP instructions. */
4227 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4228 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4231 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4238 aarch64_can_eliminate (const int from
, const int to
)
4240 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4241 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4243 if (frame_pointer_needed
)
4245 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4247 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4249 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4250 && !cfun
->calls_alloca
)
4252 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4262 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4264 HOST_WIDE_INT frame_size
;
4265 HOST_WIDE_INT offset
;
4267 aarch64_layout_frame ();
4268 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4269 + crtl
->outgoing_args_size
4270 + cfun
->machine
->saved_varargs_size
);
4272 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4273 offset
= frame_size
;
4275 if (to
== HARD_FRAME_POINTER_REGNUM
)
4277 if (from
== ARG_POINTER_REGNUM
)
4278 return offset
- crtl
->outgoing_args_size
;
4280 if (from
== FRAME_POINTER_REGNUM
)
4281 return cfun
->machine
->frame
.saved_regs_size
+ get_frame_size ();
4284 if (to
== STACK_POINTER_REGNUM
)
4286 if (from
== FRAME_POINTER_REGNUM
)
4288 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4289 + cfun
->machine
->frame
.saved_regs_size
4290 + get_frame_size ();
4291 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4300 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4304 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4308 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4313 aarch64_asm_trampoline_template (FILE *f
)
4317 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4318 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4322 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4323 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4325 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4326 assemble_aligned_integer (4, const0_rtx
);
4327 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4328 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4332 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4334 rtx fnaddr
, mem
, a_tramp
;
4335 const int tramp_code_sz
= 16;
4337 /* Don't need to copy the trailing D-words, we fill those in below. */
4338 emit_block_move (m_tramp
, assemble_trampoline_template (),
4339 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4340 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4341 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4342 if (GET_MODE (fnaddr
) != ptr_mode
)
4343 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4344 emit_move_insn (mem
, fnaddr
);
4346 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4347 emit_move_insn (mem
, chain_value
);
4349 /* XXX We should really define a "clear_cache" pattern and use
4350 gen_clear_cache(). */
4351 a_tramp
= XEXP (m_tramp
, 0);
4352 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4353 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4354 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4358 static unsigned char
4359 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4370 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4371 (GET_MODE_SIZE (mode
) + 7) / 8;
4385 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4387 if (regclass
== POINTER_REGS
)
4388 return GENERAL_REGS
;
4390 if (regclass
== STACK_REG
)
4393 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4399 /* If it's an integer immediate that MOVI can't handle, then
4400 FP_REGS is not an option, so we return NO_REGS instead. */
4401 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4402 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4405 /* Register eliminiation can result in a request for
4406 SP+constant->FP_REGS. We cannot support such operations which
4407 use SP as source and an FP_REG as destination, so reject out
4409 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4411 rtx lhs
= XEXP (x
, 0);
4413 /* Look through a possible SUBREG introduced by ILP32. */
4414 if (GET_CODE (lhs
) == SUBREG
)
4415 lhs
= SUBREG_REG (lhs
);
4417 gcc_assert (REG_P (lhs
));
4418 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4427 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4429 asm_fprintf (f
, "%U%s", name
);
4433 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4435 if (priority
== DEFAULT_INIT_PRIORITY
)
4436 default_ctor_section_asm_out_constructor (symbol
, priority
);
4441 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4442 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4443 switch_to_section (s
);
4444 assemble_align (POINTER_SIZE
);
4445 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4450 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4452 if (priority
== DEFAULT_INIT_PRIORITY
)
4453 default_dtor_section_asm_out_destructor (symbol
, priority
);
4458 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4459 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4460 switch_to_section (s
);
4461 assemble_align (POINTER_SIZE
);
4462 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4467 aarch64_output_casesi (rtx
*operands
)
4471 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
4473 static const char *const patterns
[4][2] =
4476 "ldrb\t%w3, [%0,%w1,uxtw]",
4477 "add\t%3, %4, %w3, sxtb #2"
4480 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4481 "add\t%3, %4, %w3, sxth #2"
4484 "ldr\t%w3, [%0,%w1,uxtw #2]",
4485 "add\t%3, %4, %w3, sxtw #2"
4487 /* We assume that DImode is only generated when not optimizing and
4488 that we don't really need 64-bit address offsets. That would
4489 imply an object file with 8GB of code in a single function! */
4491 "ldr\t%w3, [%0,%w1,uxtw #2]",
4492 "add\t%3, %4, %w3, sxtw #2"
4496 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4498 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4500 gcc_assert (index
>= 0 && index
<= 3);
4502 /* Need to implement table size reduction, by chaning the code below. */
4503 output_asm_insn (patterns
[index
][0], operands
);
4504 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4505 snprintf (buf
, sizeof (buf
),
4506 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4507 output_asm_insn (buf
, operands
);
4508 output_asm_insn (patterns
[index
][1], operands
);
4509 output_asm_insn ("br\t%3", operands
);
4510 assemble_label (asm_out_file
, label
);
4515 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4516 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4520 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4522 if (shift
>= 0 && shift
<= 3)
4525 for (size
= 8; size
<= 32; size
*= 2)
4527 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4528 if (mask
== bits
<< shift
)
4536 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4537 const_rtx x ATTRIBUTE_UNUSED
)
4539 /* We can't use blocks for constants when we're using a per-function
4545 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4546 rtx x ATTRIBUTE_UNUSED
,
4547 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4549 /* Force all constant pool entries into the current function section. */
4550 return function_section (current_function_decl
);
4556 /* Helper function for rtx cost calculation. Strip a shift expression
4557 from X. Returns the inner operand if successful, or the original
4558 expression on failure. */
4560 aarch64_strip_shift (rtx x
)
4564 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4565 we can convert both to ROR during final output. */
4566 if ((GET_CODE (op
) == ASHIFT
4567 || GET_CODE (op
) == ASHIFTRT
4568 || GET_CODE (op
) == LSHIFTRT
4569 || GET_CODE (op
) == ROTATERT
4570 || GET_CODE (op
) == ROTATE
)
4571 && CONST_INT_P (XEXP (op
, 1)))
4572 return XEXP (op
, 0);
4574 if (GET_CODE (op
) == MULT
4575 && CONST_INT_P (XEXP (op
, 1))
4576 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4577 return XEXP (op
, 0);
4582 /* Helper function for rtx cost calculation. Strip an extend
4583 expression from X. Returns the inner operand if successful, or the
4584 original expression on failure. We deal with a number of possible
4585 canonicalization variations here. */
4587 aarch64_strip_extend (rtx x
)
4591 /* Zero and sign extraction of a widened value. */
4592 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4593 && XEXP (op
, 2) == const0_rtx
4594 && GET_CODE (XEXP (op
, 0)) == MULT
4595 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4597 return XEXP (XEXP (op
, 0), 0);
4599 /* It can also be represented (for zero-extend) as an AND with an
4601 if (GET_CODE (op
) == AND
4602 && GET_CODE (XEXP (op
, 0)) == MULT
4603 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4604 && CONST_INT_P (XEXP (op
, 1))
4605 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4606 INTVAL (XEXP (op
, 1))) != 0)
4607 return XEXP (XEXP (op
, 0), 0);
4609 /* Now handle extended register, as this may also have an optional
4610 left shift by 1..4. */
4611 if (GET_CODE (op
) == ASHIFT
4612 && CONST_INT_P (XEXP (op
, 1))
4613 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4616 if (GET_CODE (op
) == ZERO_EXTEND
4617 || GET_CODE (op
) == SIGN_EXTEND
)
4626 /* Helper function for rtx cost calculation. Calculate the cost of
4627 a MULT, which may be part of a multiply-accumulate rtx. Return
4628 the calculated cost of the expression, recursing manually in to
4629 operands where needed. */
4632 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
4635 const struct cpu_cost_table
*extra_cost
4636 = aarch64_tune_params
->insn_extra_cost
;
4638 bool maybe_fma
= (outer
== PLUS
|| outer
== MINUS
);
4639 enum machine_mode mode
= GET_MODE (x
);
4641 gcc_checking_assert (code
== MULT
);
4646 if (VECTOR_MODE_P (mode
))
4647 mode
= GET_MODE_INNER (mode
);
4649 /* Integer multiply/fma. */
4650 if (GET_MODE_CLASS (mode
) == MODE_INT
)
4652 /* The multiply will be canonicalized as a shift, cost it as such. */
4653 if (CONST_INT_P (op1
)
4654 && exact_log2 (INTVAL (op1
)) > 0)
4659 /* ADD (shifted register). */
4660 cost
+= extra_cost
->alu
.arith_shift
;
4662 /* LSL (immediate). */
4663 cost
+= extra_cost
->alu
.shift
;
4666 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
4671 /* Integer multiplies or FMAs have zero/sign extending variants. */
4672 if ((GET_CODE (op0
) == ZERO_EXTEND
4673 && GET_CODE (op1
) == ZERO_EXTEND
)
4674 || (GET_CODE (op0
) == SIGN_EXTEND
4675 && GET_CODE (op1
) == SIGN_EXTEND
))
4677 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4678 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
4683 /* MADD/SMADDL/UMADDL. */
4684 cost
+= extra_cost
->mult
[0].extend_add
;
4686 /* MUL/SMULL/UMULL. */
4687 cost
+= extra_cost
->mult
[0].extend
;
4693 /* This is either an integer multiply or an FMA. In both cases
4694 we want to recurse and cost the operands. */
4695 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4696 + rtx_cost (op1
, MULT
, 1, speed
);
4702 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
4705 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
4714 /* Floating-point FMA can also support negations of the
4716 if (GET_CODE (op0
) == NEG
)
4719 op0
= XEXP (op0
, 0);
4721 if (GET_CODE (op1
) == NEG
)
4724 op1
= XEXP (op1
, 0);
4728 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4729 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
4732 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
4735 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4736 + rtx_cost (op1
, MULT
, 1, speed
);
4742 aarch64_address_cost (rtx x
,
4743 enum machine_mode mode
,
4744 addr_space_t as ATTRIBUTE_UNUSED
,
4747 enum rtx_code c
= GET_CODE (x
);
4748 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4749 struct aarch64_address_info info
;
4753 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
4755 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
4757 /* This is a CONST or SYMBOL ref which will be split
4758 in a different way depending on the code model in use.
4759 Cost it through the generic infrastructure. */
4760 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
4761 /* Divide through by the cost of one instruction to
4762 bring it to the same units as the address costs. */
4763 cost_symbol_ref
/= COSTS_N_INSNS (1);
4764 /* The cost is then the cost of preparing the address,
4765 followed by an immediate (possibly 0) offset. */
4766 return cost_symbol_ref
+ addr_cost
->imm_offset
;
4770 /* This is most likely a jump table from a case
4772 return addr_cost
->register_offset
;
4778 case ADDRESS_LO_SUM
:
4779 case ADDRESS_SYMBOLIC
:
4780 case ADDRESS_REG_IMM
:
4781 cost
+= addr_cost
->imm_offset
;
4784 case ADDRESS_REG_WB
:
4785 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4786 cost
+= addr_cost
->pre_modify
;
4787 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4788 cost
+= addr_cost
->post_modify
;
4794 case ADDRESS_REG_REG
:
4795 cost
+= addr_cost
->register_offset
;
4798 case ADDRESS_REG_UXTW
:
4799 case ADDRESS_REG_SXTW
:
4800 cost
+= addr_cost
->register_extend
;
4810 /* For the sake of calculating the cost of the shifted register
4811 component, we can treat same sized modes in the same way. */
4812 switch (GET_MODE_BITSIZE (mode
))
4815 cost
+= addr_cost
->addr_scale_costs
.hi
;
4819 cost
+= addr_cost
->addr_scale_costs
.si
;
4823 cost
+= addr_cost
->addr_scale_costs
.di
;
4826 /* We can't tell, or this is a 128-bit vector. */
4828 cost
+= addr_cost
->addr_scale_costs
.ti
;
4836 /* Calculate the cost of calculating X, storing it in *COST. Result
4837 is true if the total cost of the operation has now been calculated. */
4839 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4840 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4843 const struct cpu_cost_table
*extra_cost
4844 = aarch64_tune_params
->insn_extra_cost
;
4845 enum machine_mode mode
= GET_MODE (x
);
4847 /* By default, assume that everything has equivalent cost to the
4848 cheapest instruction. Any additional costs are applied as a delta
4849 above this default. */
4850 *cost
= COSTS_N_INSNS (1);
4852 /* TODO: The cost infrastructure currently does not handle
4853 vector operations. Assume that all vector operations
4854 are equally expensive. */
4855 if (VECTOR_MODE_P (mode
))
4858 *cost
+= extra_cost
->vect
.alu
;
4865 /* The cost depends entirely on the operands to SET. */
4870 switch (GET_CODE (op0
))
4875 rtx address
= XEXP (op0
, 0);
4876 if (GET_MODE_CLASS (mode
) == MODE_INT
)
4877 *cost
+= extra_cost
->ldst
.store
;
4878 else if (mode
== SFmode
)
4879 *cost
+= extra_cost
->ldst
.storef
;
4880 else if (mode
== DFmode
)
4881 *cost
+= extra_cost
->ldst
.stored
;
4884 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
4888 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4892 if (! REG_P (SUBREG_REG (op0
)))
4893 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4897 /* const0_rtx is in general free, but we will use an
4898 instruction to set a register to 0. */
4899 if (REG_P (op1
) || op1
== const0_rtx
)
4901 /* The cost is 1 per register copied. */
4902 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
4904 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
4907 /* Cost is just the cost of the RHS of the set. */
4908 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4913 /* Bit-field insertion. Strip any redundant widening of
4914 the RHS to meet the width of the target. */
4915 if (GET_CODE (op1
) == SUBREG
)
4916 op1
= SUBREG_REG (op1
);
4917 if ((GET_CODE (op1
) == ZERO_EXTEND
4918 || GET_CODE (op1
) == SIGN_EXTEND
)
4919 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
4920 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
4921 >= INTVAL (XEXP (op0
, 1))))
4922 op1
= XEXP (op1
, 0);
4924 if (CONST_INT_P (op1
))
4926 /* MOV immediate is assumed to always be cheap. */
4927 *cost
= COSTS_N_INSNS (1);
4933 *cost
+= extra_cost
->alu
.bfi
;
4934 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
4940 /* We can't make sense of this, assume default cost. */
4941 *cost
= COSTS_N_INSNS (1);
4947 /* If an instruction can incorporate a constant within the
4948 instruction, the instruction's expression avoids calling
4949 rtx_cost() on the constant. If rtx_cost() is called on a
4950 constant, then it is usually because the constant must be
4951 moved into a register by one or more instructions.
4953 The exception is constant 0, which can be expressed
4954 as XZR/WZR and is therefore free. The exception to this is
4955 if we have (set (reg) (const0_rtx)) in which case we must cost
4956 the move. However, we can catch that when we cost the SET, so
4957 we don't need to consider that here. */
4958 if (x
== const0_rtx
)
4962 /* To an approximation, building any other constant is
4963 proportionally expensive to the number of instructions
4964 required to build that constant. This is true whether we
4965 are compiling for SPEED or otherwise. */
4966 *cost
= COSTS_N_INSNS (aarch64_build_constant (0,
4975 /* mov[df,sf]_aarch64. */
4976 if (aarch64_float_const_representable_p (x
))
4977 /* FMOV (scalar immediate). */
4978 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
4979 else if (!aarch64_float_const_zero_rtx_p (x
))
4981 /* This will be a load from memory. */
4983 *cost
+= extra_cost
->ldst
.loadd
;
4985 *cost
+= extra_cost
->ldst
.loadf
;
4988 /* Otherwise this is +0.0. We get this using MOVI d0, #0
4989 or MOV v0.s[0], wzr - neither of which are modeled by the
4990 cost tables. Just use the default cost. */
5000 /* For loads we want the base cost of a load, plus an
5001 approximation for the additional cost of the addressing
5003 rtx address
= XEXP (x
, 0);
5004 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5005 *cost
+= extra_cost
->ldst
.load
;
5006 else if (mode
== SFmode
)
5007 *cost
+= extra_cost
->ldst
.loadf
;
5008 else if (mode
== DFmode
)
5009 *cost
+= extra_cost
->ldst
.loadd
;
5012 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5021 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5023 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5024 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5027 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5031 /* Cost this as SUB wzr, X. */
5032 op0
= CONST0_RTX (GET_MODE (x
));
5037 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5039 /* Support (neg(fma...)) as a single instruction only if
5040 sign of zeros is unimportant. This matches the decision
5041 making in aarch64.md. */
5042 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5045 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5050 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5060 if (op1
== const0_rtx
5061 && GET_CODE (op0
) == AND
)
5067 /* Comparisons can work if the order is swapped.
5068 Canonicalization puts the more complex operation first, but
5069 we want it in op1. */
5071 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5084 /* Detect valid immediates. */
5085 if ((GET_MODE_CLASS (mode
) == MODE_INT
5086 || (GET_MODE_CLASS (mode
) == MODE_CC
5087 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5088 && CONST_INT_P (op1
)
5089 && aarch64_uimm12_shift (INTVAL (op1
)))
5091 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5094 /* SUB(S) (immediate). */
5095 *cost
+= extra_cost
->alu
.arith
;
5100 rtx new_op1
= aarch64_strip_extend (op1
);
5102 /* Cost this as an FMA-alike operation. */
5103 if ((GET_CODE (new_op1
) == MULT
5104 || GET_CODE (new_op1
) == ASHIFT
)
5107 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5108 (enum rtx_code
) code
,
5110 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5114 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
5118 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5120 *cost
+= extra_cost
->alu
.arith
;
5121 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5123 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5135 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5136 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5139 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
5140 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5144 if (GET_MODE_CLASS (mode
) == MODE_INT
5145 && CONST_INT_P (op1
)
5146 && aarch64_uimm12_shift (INTVAL (op1
)))
5148 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
5151 /* ADD (immediate). */
5152 *cost
+= extra_cost
->alu
.arith
;
5156 /* Strip any extend, leave shifts behind as we will
5157 cost them through mult_cost. */
5158 new_op0
= aarch64_strip_extend (op0
);
5160 if (GET_CODE (new_op0
) == MULT
5161 || GET_CODE (new_op0
) == ASHIFT
)
5163 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
5165 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5169 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
5170 + rtx_cost (op1
, PLUS
, 1, speed
));
5174 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5176 *cost
+= extra_cost
->alu
.arith
;
5177 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5179 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5185 *cost
= COSTS_N_INSNS (1);
5188 *cost
+= extra_cost
->alu
.rev
;
5193 if (aarch_rev16_p (x
))
5195 *cost
= COSTS_N_INSNS (1);
5198 *cost
+= extra_cost
->alu
.rev
;
5210 && GET_CODE (op0
) == MULT
5211 && CONST_INT_P (XEXP (op0
, 1))
5212 && CONST_INT_P (op1
)
5213 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
5216 /* This is a UBFM/SBFM. */
5217 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
5219 *cost
+= extra_cost
->alu
.bfx
;
5223 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5225 /* We possibly get the immediate for free, this is not
5227 if (CONST_INT_P (op1
)
5228 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
5230 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
5233 *cost
+= extra_cost
->alu
.logical
;
5241 /* Handle ORN, EON, or BIC. */
5242 if (GET_CODE (op0
) == NOT
)
5243 op0
= XEXP (op0
, 0);
5245 new_op0
= aarch64_strip_shift (op0
);
5247 /* If we had a shift on op0 then this is a logical-shift-
5248 by-register/immediate operation. Otherwise, this is just
5249 a logical operation. */
5254 /* Shift by immediate. */
5255 if (CONST_INT_P (XEXP (op0
, 1)))
5256 *cost
+= extra_cost
->alu
.log_shift
;
5258 *cost
+= extra_cost
->alu
.log_shift_reg
;
5261 *cost
+= extra_cost
->alu
.logical
;
5264 /* In both cases we want to cost both operands. */
5265 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
5266 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5276 *cost
+= extra_cost
->alu
.logical
;
5278 /* The logical instruction could have the shifted register form,
5279 but the cost is the same if the shift is processed as a separate
5280 instruction, so we don't bother with it here. */
5286 /* If a value is written in SI mode, then zero extended to DI
5287 mode, the operation will in general be free as a write to
5288 a 'w' register implicitly zeroes the upper bits of an 'x'
5289 register. However, if this is
5291 (set (reg) (zero_extend (reg)))
5293 we must cost the explicit register move. */
5295 && GET_MODE (op0
) == SImode
5298 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
5300 if (!op_cost
&& speed
)
5302 *cost
+= extra_cost
->alu
.extend
;
5304 /* Free, the cost is that of the SI mode operation. */
5309 else if (MEM_P (XEXP (x
, 0)))
5311 /* All loads can zero extend to any size for free. */
5312 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
5318 *cost
+= extra_cost
->alu
.extend
;
5323 if (MEM_P (XEXP (x
, 0)))
5328 rtx address
= XEXP (XEXP (x
, 0), 0);
5329 *cost
+= extra_cost
->ldst
.load_sign_extend
;
5332 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5339 *cost
+= extra_cost
->alu
.extend
;
5343 if (!CONST_INT_P (XEXP (x
, 1)))
5344 *cost
+= COSTS_N_INSNS (2);
5351 /* Shifting by a register often takes an extra cycle. */
5352 if (speed
&& !CONST_INT_P (XEXP (x
, 1)))
5353 *cost
+= extra_cost
->alu
.arith_shift_reg
;
5355 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed
);
5359 if (!CONSTANT_P (XEXP (x
, 0)))
5360 *cost
+= rtx_cost (XEXP (x
, 0), HIGH
, 0, speed
);
5364 if (!CONSTANT_P (XEXP (x
, 1)))
5365 *cost
+= rtx_cost (XEXP (x
, 1), LO_SUM
, 1, speed
);
5366 *cost
+= rtx_cost (XEXP (x
, 0), LO_SUM
, 0, speed
);
5371 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTRACT
, 0, speed
);
5375 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
5376 /* aarch64_rtx_mult_cost always handles recursion to its
5382 *cost
= COSTS_N_INSNS (2);
5385 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5386 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
5387 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
5388 else if (GET_MODE (x
) == DFmode
)
5389 *cost
+= (extra_cost
->fp
[1].mult
5390 + extra_cost
->fp
[1].div
);
5391 else if (GET_MODE (x
) == SFmode
)
5392 *cost
+= (extra_cost
->fp
[0].mult
5393 + extra_cost
->fp
[0].div
);
5395 return false; /* All arguments need to be in registers. */
5399 *cost
= COSTS_N_INSNS (1);
5402 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5403 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
;
5404 else if (GET_MODE (x
) == DFmode
)
5405 *cost
+= extra_cost
->fp
[1].div
;
5406 else if (GET_MODE (x
) == SFmode
)
5407 *cost
+= extra_cost
->fp
[0].div
;
5409 return false; /* All arguments need to be in registers. */
5417 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5418 calculated for X. This cost is stored in *COST. Returns true
5419 if the total cost of X was calculated. */
5421 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
5422 int param
, int *cost
, bool speed
)
5424 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
5426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5428 print_rtl_single (dump_file
, x
);
5429 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
5430 speed
? "Hot" : "Cold",
5431 *cost
, result
? "final" : "partial");
5438 aarch64_register_move_cost (enum machine_mode mode
,
5439 reg_class_t from_i
, reg_class_t to_i
)
5441 enum reg_class from
= (enum reg_class
) from_i
;
5442 enum reg_class to
= (enum reg_class
) to_i
;
5443 const struct cpu_regmove_cost
*regmove_cost
5444 = aarch64_tune_params
->regmove_cost
;
5446 /* Moving between GPR and stack cost is the same as GP2GP. */
5447 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
5448 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
5449 return regmove_cost
->GP2GP
;
5451 /* To/From the stack register, we move via the gprs. */
5452 if (to
== STACK_REG
|| from
== STACK_REG
)
5453 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
5454 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
5456 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
5457 return regmove_cost
->GP2GP
;
5458 else if (from
== GENERAL_REGS
)
5459 return regmove_cost
->GP2FP
;
5460 else if (to
== GENERAL_REGS
)
5461 return regmove_cost
->FP2GP
;
5463 /* When AdvSIMD instructions are disabled it is not possible to move
5464 a 128-bit value directly between Q registers. This is handled in
5465 secondary reload. A general register is used as a scratch to move
5466 the upper DI value and the lower DI value is moved directly,
5467 hence the cost is the sum of three moves. */
5468 if (! TARGET_SIMD
&& GET_MODE_SIZE (mode
) == 128)
5469 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
5471 return regmove_cost
->FP2FP
;
5475 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
5476 reg_class_t rclass ATTRIBUTE_UNUSED
,
5477 bool in ATTRIBUTE_UNUSED
)
5479 return aarch64_tune_params
->memmov_cost
;
5482 /* Return the number of instructions that can be issued per cycle. */
5484 aarch64_sched_issue_rate (void)
5486 return aarch64_tune_params
->issue_rate
;
5489 /* Vectorizer cost model target hooks. */
5491 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5493 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5495 int misalign ATTRIBUTE_UNUSED
)
5499 switch (type_of_cost
)
5502 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
5505 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
5508 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
5511 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
5514 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
5517 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
5520 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
5523 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
5525 case unaligned_load
:
5526 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
5528 case unaligned_store
:
5529 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
5531 case cond_branch_taken
:
5532 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
5534 case cond_branch_not_taken
:
5535 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
5538 case vec_promote_demote
:
5539 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
5542 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5543 return elements
/ 2 + 1;
5550 /* Implement targetm.vectorize.add_stmt_cost. */
5552 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
5553 struct _stmt_vec_info
*stmt_info
, int misalign
,
5554 enum vect_cost_model_location where
)
5556 unsigned *cost
= (unsigned *) data
;
5557 unsigned retval
= 0;
5559 if (flag_vect_cost_model
)
5561 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
5563 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
5565 /* Statements in an inner loop relative to the loop being
5566 vectorized are weighted more heavily. The value here is
5567 a function (linear for now) of the loop nest level. */
5568 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
5570 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5571 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
5572 unsigned nest_level
= loop_depth (loop
);
5574 count
*= nest_level
;
5577 retval
= (unsigned) (count
* stmt_cost
);
5578 cost
[where
] += retval
;
5584 static void initialize_aarch64_code_model (void);
5586 /* Parse the architecture extension string. */
5589 aarch64_parse_extension (char *str
)
5591 /* The extension string is parsed left to right. */
5592 const struct aarch64_option_extension
*opt
= NULL
;
5594 /* Flag to say whether we are adding or removing an extension. */
5595 int adding_ext
= -1;
5597 while (str
!= NULL
&& *str
!= 0)
5603 ext
= strchr (str
, '+');
5610 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
5621 error ("missing feature modifier after %qs", "+no");
5625 /* Scan over the extensions table trying to find an exact match. */
5626 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
5628 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
5630 /* Add or remove the extension. */
5632 aarch64_isa_flags
|= opt
->flags_on
;
5634 aarch64_isa_flags
&= ~(opt
->flags_off
);
5639 if (opt
->name
== NULL
)
5641 /* Extension not found in list. */
5642 error ("unknown feature modifier %qs", str
);
5652 /* Parse the ARCH string. */
5655 aarch64_parse_arch (void)
5658 const struct processor
*arch
;
5659 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
5662 strcpy (str
, aarch64_arch_string
);
5664 ext
= strchr (str
, '+');
5673 error ("missing arch name in -march=%qs", str
);
5677 /* Loop through the list of supported ARCHs to find a match. */
5678 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
5680 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
5682 selected_arch
= arch
;
5683 aarch64_isa_flags
= selected_arch
->flags
;
5686 selected_cpu
= &all_cores
[selected_arch
->core
];
5690 /* ARCH string contains at least one extension. */
5691 aarch64_parse_extension (ext
);
5694 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
5696 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5697 selected_cpu
->name
, selected_arch
->name
);
5704 /* ARCH name not found in list. */
5705 error ("unknown value %qs for -march", str
);
5709 /* Parse the CPU string. */
5712 aarch64_parse_cpu (void)
5715 const struct processor
*cpu
;
5716 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
5719 strcpy (str
, aarch64_cpu_string
);
5721 ext
= strchr (str
, '+');
5730 error ("missing cpu name in -mcpu=%qs", str
);
5734 /* Loop through the list of supported CPUs to find a match. */
5735 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5737 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
5740 selected_tune
= cpu
;
5741 aarch64_isa_flags
= selected_cpu
->flags
;
5745 /* CPU string contains at least one extension. */
5746 aarch64_parse_extension (ext
);
5753 /* CPU name not found in list. */
5754 error ("unknown value %qs for -mcpu", str
);
5758 /* Parse the TUNE string. */
5761 aarch64_parse_tune (void)
5763 const struct processor
*cpu
;
5764 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
5765 strcpy (str
, aarch64_tune_string
);
5767 /* Loop through the list of supported CPUs to find a match. */
5768 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5770 if (strcmp (cpu
->name
, str
) == 0)
5772 selected_tune
= cpu
;
5777 /* CPU name not found in list. */
5778 error ("unknown value %qs for -mtune", str
);
5783 /* Implement TARGET_OPTION_OVERRIDE. */
5786 aarch64_override_options (void)
5788 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5789 If either of -march or -mtune is given, they override their
5790 respective component of -mcpu.
5792 So, first parse AARCH64_CPU_STRING, then the others, be careful
5793 with -march as, if -mcpu is not present on the command line, march
5794 must set a sensible default CPU. */
5795 if (aarch64_cpu_string
)
5797 aarch64_parse_cpu ();
5800 if (aarch64_arch_string
)
5802 aarch64_parse_arch ();
5805 if (aarch64_tune_string
)
5807 aarch64_parse_tune ();
5810 #ifndef HAVE_AS_MABI_OPTION
5811 /* The compiler may have been configured with 2.23.* binutils, which does
5812 not have support for ILP32. */
5814 error ("Assembler does not support -mabi=ilp32");
5817 initialize_aarch64_code_model ();
5819 aarch64_build_bitmask_table ();
5821 /* This target defaults to strict volatile bitfields. */
5822 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
5823 flag_strict_volatile_bitfields
= 1;
5825 /* If the user did not specify a processor, choose the default
5826 one for them. This will be the CPU set during configuration using
5827 --with-cpu, otherwise it is "generic". */
5830 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
5831 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
5834 gcc_assert (selected_cpu
);
5836 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5838 selected_tune
= &all_cores
[selected_cpu
->core
];
5840 aarch64_tune_flags
= selected_tune
->flags
;
5841 aarch64_tune
= selected_tune
->core
;
5842 aarch64_tune_params
= selected_tune
->tune
;
5844 aarch64_override_options_after_change ();
5847 /* Implement targetm.override_options_after_change. */
5850 aarch64_override_options_after_change (void)
5852 if (flag_omit_frame_pointer
)
5853 flag_omit_leaf_frame_pointer
= false;
5854 else if (flag_omit_leaf_frame_pointer
)
5855 flag_omit_frame_pointer
= true;
5858 static struct machine_function
*
5859 aarch64_init_machine_status (void)
5861 struct machine_function
*machine
;
5862 machine
= ggc_alloc_cleared_machine_function ();
5867 aarch64_init_expanders (void)
5869 init_machine_status
= aarch64_init_machine_status
;
5872 /* A checking mechanism for the implementation of the various code models. */
5874 initialize_aarch64_code_model (void)
5878 switch (aarch64_cmodel_var
)
5880 case AARCH64_CMODEL_TINY
:
5881 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
5883 case AARCH64_CMODEL_SMALL
:
5884 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
5886 case AARCH64_CMODEL_LARGE
:
5887 sorry ("code model %qs with -f%s", "large",
5888 flag_pic
> 1 ? "PIC" : "pic");
5894 aarch64_cmodel
= aarch64_cmodel_var
;
5897 /* Return true if SYMBOL_REF X binds locally. */
5900 aarch64_symbol_binds_local_p (const_rtx x
)
5902 return (SYMBOL_REF_DECL (x
)
5903 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
5904 : SYMBOL_REF_LOCAL_P (x
));
5907 /* Return true if SYMBOL_REF X is thread local */
5909 aarch64_tls_symbol_p (rtx x
)
5911 if (! TARGET_HAVE_TLS
)
5914 if (GET_CODE (x
) != SYMBOL_REF
)
5917 return SYMBOL_REF_TLS_MODEL (x
) != 0;
5920 /* Classify a TLS symbol into one of the TLS kinds. */
5921 enum aarch64_symbol_type
5922 aarch64_classify_tls_symbol (rtx x
)
5924 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
5928 case TLS_MODEL_GLOBAL_DYNAMIC
:
5929 case TLS_MODEL_LOCAL_DYNAMIC
:
5930 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
5932 case TLS_MODEL_INITIAL_EXEC
:
5933 return SYMBOL_SMALL_GOTTPREL
;
5935 case TLS_MODEL_LOCAL_EXEC
:
5936 return SYMBOL_SMALL_TPREL
;
5938 case TLS_MODEL_EMULATED
:
5939 case TLS_MODEL_NONE
:
5940 return SYMBOL_FORCE_TO_MEM
;
5947 /* Return the method that should be used to access SYMBOL_REF or
5948 LABEL_REF X in context CONTEXT. */
5950 enum aarch64_symbol_type
5951 aarch64_classify_symbol (rtx x
,
5952 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
5954 if (GET_CODE (x
) == LABEL_REF
)
5956 switch (aarch64_cmodel
)
5958 case AARCH64_CMODEL_LARGE
:
5959 return SYMBOL_FORCE_TO_MEM
;
5961 case AARCH64_CMODEL_TINY_PIC
:
5962 case AARCH64_CMODEL_TINY
:
5963 return SYMBOL_TINY_ABSOLUTE
;
5965 case AARCH64_CMODEL_SMALL_PIC
:
5966 case AARCH64_CMODEL_SMALL
:
5967 return SYMBOL_SMALL_ABSOLUTE
;
5974 if (GET_CODE (x
) == SYMBOL_REF
)
5976 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
5977 return SYMBOL_FORCE_TO_MEM
;
5979 if (aarch64_tls_symbol_p (x
))
5980 return aarch64_classify_tls_symbol (x
);
5982 switch (aarch64_cmodel
)
5984 case AARCH64_CMODEL_TINY
:
5985 if (SYMBOL_REF_WEAK (x
))
5986 return SYMBOL_FORCE_TO_MEM
;
5987 return SYMBOL_TINY_ABSOLUTE
;
5989 case AARCH64_CMODEL_SMALL
:
5990 if (SYMBOL_REF_WEAK (x
))
5991 return SYMBOL_FORCE_TO_MEM
;
5992 return SYMBOL_SMALL_ABSOLUTE
;
5994 case AARCH64_CMODEL_TINY_PIC
:
5995 if (!aarch64_symbol_binds_local_p (x
))
5996 return SYMBOL_TINY_GOT
;
5997 return SYMBOL_TINY_ABSOLUTE
;
5999 case AARCH64_CMODEL_SMALL_PIC
:
6000 if (!aarch64_symbol_binds_local_p (x
))
6001 return SYMBOL_SMALL_GOT
;
6002 return SYMBOL_SMALL_ABSOLUTE
;
6009 /* By default push everything into the constant pool. */
6010 return SYMBOL_FORCE_TO_MEM
;
6014 aarch64_constant_address_p (rtx x
)
6016 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
6020 aarch64_legitimate_pic_operand_p (rtx x
)
6022 if (GET_CODE (x
) == SYMBOL_REF
6023 || (GET_CODE (x
) == CONST
6024 && GET_CODE (XEXP (x
, 0)) == PLUS
6025 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6031 /* Return true if X holds either a quarter-precision or
6032 floating-point +0.0 constant. */
6034 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
6036 if (!CONST_DOUBLE_P (x
))
6039 /* TODO: We could handle moving 0.0 to a TFmode register,
6040 but first we would like to refactor the movtf_aarch64
6041 to be more amicable to split moves properly and
6042 correctly gate on TARGET_SIMD. For now - reject all
6043 constants which are not to SFmode or DFmode registers. */
6044 if (!(mode
== SFmode
|| mode
== DFmode
))
6047 if (aarch64_float_const_zero_rtx_p (x
))
6049 return aarch64_float_const_representable_p (x
);
6053 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6055 /* Do not allow vector struct mode constants. We could support
6056 0 and -1 easily, but they need support in aarch64-simd.md. */
6057 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
6060 /* This could probably go away because
6061 we now decompose CONST_INTs according to expand_mov_immediate. */
6062 if ((GET_CODE (x
) == CONST_VECTOR
6063 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
6064 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
6065 return !targetm
.cannot_force_const_mem (mode
, x
);
6067 if (GET_CODE (x
) == HIGH
6068 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6071 return aarch64_constant_address_p (x
);
6075 aarch64_load_tp (rtx target
)
6078 || GET_MODE (target
) != Pmode
6079 || !register_operand (target
, Pmode
))
6080 target
= gen_reg_rtx (Pmode
);
6082 /* Can return in any reg. */
6083 emit_insn (gen_aarch64_load_tp_hard (target
));
6087 /* On AAPCS systems, this is the "struct __va_list". */
6088 static GTY(()) tree va_list_type
;
6090 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6091 Return the type to use as __builtin_va_list.
6093 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6105 aarch64_build_builtin_va_list (void)
6108 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6110 /* Create the type. */
6111 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
6112 /* Give it the required name. */
6113 va_list_name
= build_decl (BUILTINS_LOCATION
,
6115 get_identifier ("__va_list"),
6117 DECL_ARTIFICIAL (va_list_name
) = 1;
6118 TYPE_NAME (va_list_type
) = va_list_name
;
6119 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
6121 /* Create the fields. */
6122 f_stack
= build_decl (BUILTINS_LOCATION
,
6123 FIELD_DECL
, get_identifier ("__stack"),
6125 f_grtop
= build_decl (BUILTINS_LOCATION
,
6126 FIELD_DECL
, get_identifier ("__gr_top"),
6128 f_vrtop
= build_decl (BUILTINS_LOCATION
,
6129 FIELD_DECL
, get_identifier ("__vr_top"),
6131 f_groff
= build_decl (BUILTINS_LOCATION
,
6132 FIELD_DECL
, get_identifier ("__gr_offs"),
6134 f_vroff
= build_decl (BUILTINS_LOCATION
,
6135 FIELD_DECL
, get_identifier ("__vr_offs"),
6138 DECL_ARTIFICIAL (f_stack
) = 1;
6139 DECL_ARTIFICIAL (f_grtop
) = 1;
6140 DECL_ARTIFICIAL (f_vrtop
) = 1;
6141 DECL_ARTIFICIAL (f_groff
) = 1;
6142 DECL_ARTIFICIAL (f_vroff
) = 1;
6144 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
6145 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
6146 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
6147 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
6148 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
6150 TYPE_FIELDS (va_list_type
) = f_stack
;
6151 DECL_CHAIN (f_stack
) = f_grtop
;
6152 DECL_CHAIN (f_grtop
) = f_vrtop
;
6153 DECL_CHAIN (f_vrtop
) = f_groff
;
6154 DECL_CHAIN (f_groff
) = f_vroff
;
6156 /* Compute its layout. */
6157 layout_type (va_list_type
);
6159 return va_list_type
;
6162 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6164 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
6166 const CUMULATIVE_ARGS
*cum
;
6167 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6168 tree stack
, grtop
, vrtop
, groff
, vroff
;
6170 int gr_save_area_size
;
6171 int vr_save_area_size
;
6174 cum
= &crtl
->args
.info
;
6176 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
6178 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
6180 if (TARGET_GENERAL_REGS_ONLY
)
6182 if (cum
->aapcs_nvrn
> 0)
6183 sorry ("%qs and floating point or vector arguments",
6184 "-mgeneral-regs-only");
6185 vr_save_area_size
= 0;
6188 f_stack
= TYPE_FIELDS (va_list_type_node
);
6189 f_grtop
= DECL_CHAIN (f_stack
);
6190 f_vrtop
= DECL_CHAIN (f_grtop
);
6191 f_groff
= DECL_CHAIN (f_vrtop
);
6192 f_vroff
= DECL_CHAIN (f_groff
);
6194 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
6196 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
6198 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
6200 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
6202 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
6205 /* Emit code to initialize STACK, which points to the next varargs stack
6206 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6207 by named arguments. STACK is 8-byte aligned. */
6208 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
6209 if (cum
->aapcs_stack_size
> 0)
6210 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
6211 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
6212 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6214 /* Emit code to initialize GRTOP, the top of the GR save area.
6215 virtual_incoming_args_rtx should have been 16 byte aligned. */
6216 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
6217 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
6218 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6220 /* Emit code to initialize VRTOP, the top of the VR save area.
6221 This address is gr_save_area_bytes below GRTOP, rounded
6222 down to the next 16-byte boundary. */
6223 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
6224 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
6225 STACK_BOUNDARY
/ BITS_PER_UNIT
);
6228 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
6229 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
6230 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6232 /* Emit code to initialize GROFF, the offset from GRTOP of the
6233 next GPR argument. */
6234 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
6235 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
6236 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6238 /* Likewise emit code to initialize VROFF, the offset from FTOP
6239 of the next VR argument. */
6240 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
6241 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
6242 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6245 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6248 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6249 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
6253 bool is_ha
; /* is HFA or HVA. */
6254 bool dw_align
; /* double-word align. */
6255 enum machine_mode ag_mode
= VOIDmode
;
6257 enum machine_mode mode
;
6259 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6260 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
6261 HOST_WIDE_INT size
, rsize
, adjust
, align
;
6262 tree t
, u
, cond1
, cond2
;
6264 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
6266 type
= build_pointer_type (type
);
6268 mode
= TYPE_MODE (type
);
6270 f_stack
= TYPE_FIELDS (va_list_type_node
);
6271 f_grtop
= DECL_CHAIN (f_stack
);
6272 f_vrtop
= DECL_CHAIN (f_grtop
);
6273 f_groff
= DECL_CHAIN (f_vrtop
);
6274 f_vroff
= DECL_CHAIN (f_groff
);
6276 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
6277 f_stack
, NULL_TREE
);
6278 size
= int_size_in_bytes (type
);
6279 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
6283 if (aarch64_vfp_is_call_or_return_candidate (mode
,
6289 /* TYPE passed in fp/simd registers. */
6290 if (TARGET_GENERAL_REGS_ONLY
)
6291 sorry ("%qs and floating point or vector arguments",
6292 "-mgeneral-regs-only");
6294 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
6295 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
6296 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
6297 unshare_expr (valist
), f_vroff
, NULL_TREE
);
6299 rsize
= nregs
* UNITS_PER_VREG
;
6303 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
6304 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
6306 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6307 && size
< UNITS_PER_VREG
)
6309 adjust
= UNITS_PER_VREG
- size
;
6314 /* TYPE passed in general registers. */
6315 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
6316 unshare_expr (valist
), f_grtop
, NULL_TREE
);
6317 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
6318 unshare_expr (valist
), f_groff
, NULL_TREE
);
6319 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
6320 nregs
= rsize
/ UNITS_PER_WORD
;
6325 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6326 && size
< UNITS_PER_WORD
)
6328 adjust
= UNITS_PER_WORD
- size
;
6332 /* Get a local temporary for the field value. */
6333 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
6335 /* Emit code to branch if off >= 0. */
6336 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
6337 build_int_cst (TREE_TYPE (off
), 0));
6338 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
6342 /* Emit: offs = (offs + 15) & -16. */
6343 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6344 build_int_cst (TREE_TYPE (off
), 15));
6345 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
6346 build_int_cst (TREE_TYPE (off
), -16));
6347 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
6352 /* Update ap.__[g|v]r_offs */
6353 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6354 build_int_cst (TREE_TYPE (off
), rsize
));
6355 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
6359 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6361 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6362 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
6363 build_int_cst (TREE_TYPE (f_off
), 0));
6364 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
6366 /* String up: make sure the assignment happens before the use. */
6367 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
6368 COND_EXPR_ELSE (cond1
) = t
;
6370 /* Prepare the trees handling the argument that is passed on the stack;
6371 the top level node will store in ON_STACK. */
6372 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
6375 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6376 t
= fold_convert (intDI_type_node
, arg
);
6377 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6378 build_int_cst (TREE_TYPE (t
), 15));
6379 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6380 build_int_cst (TREE_TYPE (t
), -16));
6381 t
= fold_convert (TREE_TYPE (arg
), t
);
6382 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
6386 /* Advance ap.__stack */
6387 t
= fold_convert (intDI_type_node
, arg
);
6388 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6389 build_int_cst (TREE_TYPE (t
), size
+ 7));
6390 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6391 build_int_cst (TREE_TYPE (t
), -8));
6392 t
= fold_convert (TREE_TYPE (arg
), t
);
6393 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
6394 /* String up roundup and advance. */
6396 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6397 /* String up with arg */
6398 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
6399 /* Big-endianness related address adjustment. */
6400 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6401 && size
< UNITS_PER_WORD
)
6403 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
6404 size_int (UNITS_PER_WORD
- size
));
6405 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
6408 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
6409 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
6411 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6414 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
6415 build_int_cst (TREE_TYPE (off
), adjust
));
6417 t
= fold_convert (sizetype
, t
);
6418 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
6422 /* type ha; // treat as "struct {ftype field[n];}"
6423 ... [computing offs]
6424 for (i = 0; i <nregs; ++i, offs += 16)
6425 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6428 tree tmp_ha
, field_t
, field_ptr_t
;
6430 /* Declare a local variable. */
6431 tmp_ha
= create_tmp_var_raw (type
, "ha");
6432 gimple_add_tmp_var (tmp_ha
);
6434 /* Establish the base type. */
6438 field_t
= float_type_node
;
6439 field_ptr_t
= float_ptr_type_node
;
6442 field_t
= double_type_node
;
6443 field_ptr_t
= double_ptr_type_node
;
6446 field_t
= long_double_type_node
;
6447 field_ptr_t
= long_double_ptr_type_node
;
6449 /* The half precision and quad precision are not fully supported yet. Enable
6450 the following code after the support is complete. Need to find the correct
6451 type node for __fp16 *. */
6454 field_t
= float_type_node
;
6455 field_ptr_t
= float_ptr_type_node
;
6461 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
6462 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
6463 field_ptr_t
= build_pointer_type (field_t
);
6470 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6471 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
6473 t
= fold_convert (field_ptr_t
, addr
);
6474 t
= build2 (MODIFY_EXPR
, field_t
,
6475 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
6476 build1 (INDIRECT_REF
, field_t
, t
));
6478 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6479 for (i
= 1; i
< nregs
; ++i
)
6481 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
6482 u
= fold_convert (field_ptr_t
, addr
);
6483 u
= build2 (MODIFY_EXPR
, field_t
,
6484 build2 (MEM_REF
, field_t
, tmp_ha
,
6485 build_int_cst (field_ptr_t
,
6487 int_size_in_bytes (field_t
)))),
6488 build1 (INDIRECT_REF
, field_t
, u
));
6489 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
6492 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
6493 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
6496 COND_EXPR_ELSE (cond2
) = t
;
6497 addr
= fold_convert (build_pointer_type (type
), cond1
);
6498 addr
= build_va_arg_indirect_ref (addr
);
6501 addr
= build_va_arg_indirect_ref (addr
);
6506 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6509 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
6510 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
6513 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6514 CUMULATIVE_ARGS local_cum
;
6515 int gr_saved
, vr_saved
;
6517 /* The caller has advanced CUM up to, but not beyond, the last named
6518 argument. Advance a local copy of CUM past the last "real" named
6519 argument, to find out how many registers are left over. */
6521 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
6523 /* Found out how many registers we need to save. */
6524 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
6525 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
6527 if (TARGET_GENERAL_REGS_ONLY
)
6529 if (local_cum
.aapcs_nvrn
> 0)
6530 sorry ("%qs and floating point or vector arguments",
6531 "-mgeneral-regs-only");
6541 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6542 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
6543 - gr_saved
* UNITS_PER_WORD
);
6544 mem
= gen_frame_mem (BLKmode
, ptr
);
6545 set_mem_alias_set (mem
, get_varargs_alias_set ());
6547 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
6552 /* We can't use move_block_from_reg, because it will use
6553 the wrong mode, storing D regs only. */
6554 enum machine_mode mode
= TImode
;
6557 /* Set OFF to the offset from virtual_incoming_args_rtx of
6558 the first vector register. The VR save area lies below
6559 the GR one, and is aligned to 16 bytes. */
6560 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
6561 STACK_BOUNDARY
/ BITS_PER_UNIT
);
6562 off
-= vr_saved
* UNITS_PER_VREG
;
6564 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
6568 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
6569 mem
= gen_frame_mem (mode
, ptr
);
6570 set_mem_alias_set (mem
, get_varargs_alias_set ());
6571 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
6572 off
+= UNITS_PER_VREG
;
6577 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6578 any complication of having crtl->args.pretend_args_size changed. */
6579 cfun
->machine
->saved_varargs_size
6580 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
6581 STACK_BOUNDARY
/ BITS_PER_UNIT
)
6582 + vr_saved
* UNITS_PER_VREG
);
6586 aarch64_conditional_register_usage (void)
6591 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
6594 call_used_regs
[i
] = 1;
6599 /* Walk down the type tree of TYPE counting consecutive base elements.
6600 If *MODEP is VOIDmode, then set it to the first valid floating point
6601 type. If a non-floating point type is found, or if a floating point
6602 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6603 otherwise return the count in the sub-tree. */
6605 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
6607 enum machine_mode mode
;
6610 switch (TREE_CODE (type
))
6613 mode
= TYPE_MODE (type
);
6614 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
6617 if (*modep
== VOIDmode
)
6626 mode
= TYPE_MODE (TREE_TYPE (type
));
6627 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
6630 if (*modep
== VOIDmode
)
6639 /* Use V2SImode and V4SImode as representatives of all 64-bit
6640 and 128-bit vector types. */
6641 size
= int_size_in_bytes (type
);
6654 if (*modep
== VOIDmode
)
6657 /* Vector modes are considered to be opaque: two vectors are
6658 equivalent for the purposes of being homogeneous aggregates
6659 if they are the same size. */
6668 tree index
= TYPE_DOMAIN (type
);
6670 /* Can't handle incomplete types nor sizes that are not
6672 if (!COMPLETE_TYPE_P (type
)
6673 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6676 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
6679 || !TYPE_MAX_VALUE (index
)
6680 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6681 || !TYPE_MIN_VALUE (index
)
6682 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6686 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6687 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6689 /* There must be no padding. */
6690 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
6702 /* Can't handle incomplete types nor sizes that are not
6704 if (!COMPLETE_TYPE_P (type
)
6705 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6708 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6710 if (TREE_CODE (field
) != FIELD_DECL
)
6713 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6719 /* There must be no padding. */
6720 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
6727 case QUAL_UNION_TYPE
:
6729 /* These aren't very interesting except in a degenerate case. */
6734 /* Can't handle incomplete types nor sizes that are not
6736 if (!COMPLETE_TYPE_P (type
)
6737 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6740 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6742 if (TREE_CODE (field
) != FIELD_DECL
)
6745 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6748 count
= count
> sub_count
? count
: sub_count
;
6751 /* There must be no padding. */
6752 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
6765 /* Return true if we use LRA instead of reload pass. */
6767 aarch64_lra_p (void)
6769 return aarch64_lra_flag
;
6772 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6773 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6774 array types. The C99 floating-point complex types are also considered
6775 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6776 types, which are GCC extensions and out of the scope of AAPCS64, are
6777 treated as composite types here as well.
6779 Note that MODE itself is not sufficient in determining whether a type
6780 is such a composite type or not. This is because
6781 stor-layout.c:compute_record_mode may have already changed the MODE
6782 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6783 structure with only one field may have its MODE set to the mode of the
6784 field. Also an integer mode whose size matches the size of the
6785 RECORD_TYPE type may be used to substitute the original mode
6786 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6787 solely relied on. */
6790 aarch64_composite_type_p (const_tree type
,
6791 enum machine_mode mode
)
6793 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
6797 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
6798 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
6804 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6805 type as described in AAPCS64 \S 4.1.2.
6807 See the comment above aarch64_composite_type_p for the notes on MODE. */
6810 aarch64_short_vector_p (const_tree type
,
6811 enum machine_mode mode
)
6813 HOST_WIDE_INT size
= -1;
6815 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6816 size
= int_size_in_bytes (type
);
6817 else if (!aarch64_composite_type_p (type
, mode
)
6818 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6819 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
6820 size
= GET_MODE_SIZE (mode
);
6822 return (size
== 8 || size
== 16) ? true : false;
6825 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6826 shall be passed or returned in simd/fp register(s) (providing these
6827 parameter passing registers are available).
6829 Upon successful return, *COUNT returns the number of needed registers,
6830 *BASE_MODE returns the mode of the individual register and when IS_HAF
6831 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6832 floating-point aggregate or a homogeneous short-vector aggregate. */
6835 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
6837 enum machine_mode
*base_mode
,
6841 enum machine_mode new_mode
= VOIDmode
;
6842 bool composite_p
= aarch64_composite_type_p (type
, mode
);
6844 if (is_ha
!= NULL
) *is_ha
= false;
6846 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6847 || aarch64_short_vector_p (type
, mode
))
6852 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6854 if (is_ha
!= NULL
) *is_ha
= true;
6856 new_mode
= GET_MODE_INNER (mode
);
6858 else if (type
&& composite_p
)
6860 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6862 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
6864 if (is_ha
!= NULL
) *is_ha
= true;
6873 *base_mode
= new_mode
;
6877 /* Implement TARGET_STRUCT_VALUE_RTX. */
6880 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
6881 int incoming ATTRIBUTE_UNUSED
)
6883 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
6886 /* Implements target hook vector_mode_supported_p. */
6888 aarch64_vector_mode_supported_p (enum machine_mode mode
)
6891 && (mode
== V4SImode
|| mode
== V8HImode
6892 || mode
== V16QImode
|| mode
== V2DImode
6893 || mode
== V2SImode
|| mode
== V4HImode
6894 || mode
== V8QImode
|| mode
== V2SFmode
6895 || mode
== V4SFmode
|| mode
== V2DFmode
))
6901 /* Return appropriate SIMD container
6902 for MODE within a vector of WIDTH bits. */
6903 static enum machine_mode
6904 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
6906 gcc_assert (width
== 64 || width
== 128);
6945 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6946 static enum machine_mode
6947 aarch64_preferred_simd_mode (enum machine_mode mode
)
6949 return aarch64_simd_container_mode (mode
, 128);
6952 /* Return the bitmask of possible vector sizes for the vectorizer
6955 aarch64_autovectorize_vector_sizes (void)
6960 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6961 vector types in order to conform to the AAPCS64 (see "Procedure
6962 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6963 qualify for emission with the mangled names defined in that document,
6964 a vector type must not only be of the correct mode but also be
6965 composed of AdvSIMD vector element types (e.g.
6966 _builtin_aarch64_simd_qi); these types are registered by
6967 aarch64_init_simd_builtins (). In other words, vector types defined
6968 in other ways e.g. via vector_size attribute will get default
6972 enum machine_mode mode
;
6973 const char *element_type_name
;
6974 const char *mangled_name
;
6975 } aarch64_simd_mangle_map_entry
;
6977 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
6978 /* 64-bit containerized types. */
6979 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6980 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6981 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6982 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6983 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6984 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6985 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6986 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6987 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6988 /* 128-bit containerized types. */
6989 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6990 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6991 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6992 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6993 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6994 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6995 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6996 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6997 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6998 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6999 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7000 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7001 { V2DImode
, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7002 { VOIDmode
, NULL
, NULL
}
7005 /* Implement TARGET_MANGLE_TYPE. */
7008 aarch64_mangle_type (const_tree type
)
7010 /* The AArch64 ABI documents say that "__va_list" has to be
7011 managled as if it is in the "std" namespace. */
7012 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
7013 return "St9__va_list";
7015 /* Check the mode of the vector type, and the name of the vector
7016 element type, against the table. */
7017 if (TREE_CODE (type
) == VECTOR_TYPE
)
7019 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
7021 while (pos
->mode
!= VOIDmode
)
7023 tree elt_type
= TREE_TYPE (type
);
7025 if (pos
->mode
== TYPE_MODE (type
)
7026 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
7027 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
7028 pos
->element_type_name
))
7029 return pos
->mangled_name
;
7035 /* Use the default mangling. */
7039 /* Return the equivalent letter for size. */
7041 sizetochar (int size
)
7045 case 64: return 'd';
7046 case 32: return 's';
7047 case 16: return 'h';
7048 case 8 : return 'b';
7049 default: gcc_unreachable ();
7053 /* Return true iff x is a uniform vector of floating-point
7054 constants, and the constant can be represented in
7055 quarter-precision form. Note, as aarch64_float_const_representable
7056 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7058 aarch64_vect_float_const_representable_p (rtx x
)
7061 REAL_VALUE_TYPE r0
, ri
;
7064 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
7067 x0
= CONST_VECTOR_ELT (x
, 0);
7068 if (!CONST_DOUBLE_P (x0
))
7071 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
7073 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
7075 xi
= CONST_VECTOR_ELT (x
, i
);
7076 if (!CONST_DOUBLE_P (xi
))
7079 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
7080 if (!REAL_VALUES_EQUAL (r0
, ri
))
7084 return aarch64_float_const_representable_p (x0
);
7087 /* Return true for valid and false for invalid. */
7089 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
7090 struct simd_immediate_info
*info
)
7092 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7094 for (i = 0; i < idx; i += (STRIDE)) \
7099 immtype = (CLASS); \
7100 elsize = (ELSIZE); \
7106 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
7107 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
7108 unsigned char bytes
[16];
7109 int immtype
= -1, matches
;
7110 unsigned int invmask
= inverse
? 0xff : 0;
7113 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
7115 if (! (aarch64_simd_imm_zero_p (op
, mode
)
7116 || aarch64_vect_float_const_representable_p (op
)))
7121 info
->value
= CONST_VECTOR_ELT (op
, 0);
7122 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
7130 /* Splat vector constant out into a byte vector. */
7131 for (i
= 0; i
< n_elts
; i
++)
7133 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7134 it must be laid out in the vector register in reverse order. */
7135 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
7136 unsigned HOST_WIDE_INT elpart
;
7137 unsigned int part
, parts
;
7139 if (GET_CODE (el
) == CONST_INT
)
7141 elpart
= INTVAL (el
);
7144 else if (GET_CODE (el
) == CONST_DOUBLE
)
7146 elpart
= CONST_DOUBLE_LOW (el
);
7152 for (part
= 0; part
< parts
; part
++)
7155 for (byte
= 0; byte
< innersize
; byte
++)
7157 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
7158 elpart
>>= BITS_PER_UNIT
;
7160 if (GET_CODE (el
) == CONST_DOUBLE
)
7161 elpart
= CONST_DOUBLE_HIGH (el
);
7166 gcc_assert (idx
== GET_MODE_SIZE (mode
));
7170 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
7171 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
7173 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7174 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7176 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7177 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7179 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7180 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
7182 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
7184 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
7186 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
7187 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
7189 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7190 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7192 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7193 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7195 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7196 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
7198 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
7200 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
7202 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7203 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7205 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7206 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7208 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7209 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7211 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7212 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7214 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
7216 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
7217 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
7226 info
->element_width
= elsize
;
7227 info
->mvn
= emvn
!= 0;
7228 info
->shift
= eshift
;
7230 unsigned HOST_WIDE_INT imm
= 0;
7232 if (immtype
>= 12 && immtype
<= 15)
7235 /* Un-invert bytes of recognized vector, if necessary. */
7237 for (i
= 0; i
< idx
; i
++)
7238 bytes
[i
] ^= invmask
;
7242 /* FIXME: Broken on 32-bit H_W_I hosts. */
7243 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
7245 for (i
= 0; i
< 8; i
++)
7246 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
7247 << (i
* BITS_PER_UNIT
);
7250 info
->value
= GEN_INT (imm
);
7254 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
7255 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
7257 /* Construct 'abcdefgh' because the assembler cannot handle
7258 generic constants. */
7261 imm
= (imm
>> info
->shift
) & 0xff;
7262 info
->value
= GEN_INT (imm
);
7271 aarch64_const_vec_all_same_int_p (rtx x
,
7272 HOST_WIDE_INT minval
,
7273 HOST_WIDE_INT maxval
)
7275 HOST_WIDE_INT firstval
;
7278 if (GET_CODE (x
) != CONST_VECTOR
7279 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
7282 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
7283 if (firstval
< minval
|| firstval
> maxval
)
7286 count
= CONST_VECTOR_NUNITS (x
);
7287 for (i
= 1; i
< count
; i
++)
7288 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
7294 /* Check of immediate shift constants are within range. */
7296 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
7298 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
7300 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
7302 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
7305 /* Return true if X is a uniform vector where all elements
7306 are either the floating-point constant 0.0 or the
7307 integer constant 0. */
7309 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
7311 return x
== CONST0_RTX (mode
);
7315 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7317 HOST_WIDE_INT imm
= INTVAL (x
);
7320 for (i
= 0; i
< 8; i
++)
7322 unsigned int byte
= imm
& 0xff;
7323 if (byte
!= 0xff && byte
!= 0)
7332 aarch64_mov_operand_p (rtx x
,
7333 enum aarch64_symbol_context context
,
7334 enum machine_mode mode
)
7336 if (GET_CODE (x
) == HIGH
7337 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7340 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
7343 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
7346 return aarch64_classify_symbolic_expression (x
, context
)
7347 == SYMBOL_TINY_ABSOLUTE
;
7350 /* Return a const_int vector of VAL. */
7352 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
7354 int nunits
= GET_MODE_NUNITS (mode
);
7355 rtvec v
= rtvec_alloc (nunits
);
7358 for (i
=0; i
< nunits
; i
++)
7359 RTVEC_ELT (v
, i
) = GEN_INT (val
);
7361 return gen_rtx_CONST_VECTOR (mode
, v
);
7364 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7367 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
7369 enum machine_mode vmode
;
7371 gcc_assert (!VECTOR_MODE_P (mode
));
7372 vmode
= aarch64_preferred_simd_mode (mode
);
7373 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
7374 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
7377 /* Construct and return a PARALLEL RTX vector. */
7379 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
7381 int nunits
= GET_MODE_NUNITS (mode
);
7382 rtvec v
= rtvec_alloc (nunits
/ 2);
7383 int base
= high
? nunits
/ 2 : 0;
7387 for (i
=0; i
< nunits
/ 2; i
++)
7388 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
7390 t1
= gen_rtx_PARALLEL (mode
, v
);
7394 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7395 HIGH (exclusive). */
7397 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
7400 gcc_assert (GET_CODE (operand
) == CONST_INT
);
7401 lane
= INTVAL (operand
);
7403 if (lane
< low
|| lane
>= high
)
7404 error ("lane out of range");
7408 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
7410 gcc_assert (GET_CODE (operand
) == CONST_INT
);
7411 HOST_WIDE_INT lane
= INTVAL (operand
);
7413 if (lane
< low
|| lane
>= high
)
7414 error ("constant out of range");
7417 /* Emit code to reinterpret one AdvSIMD type as another,
7418 without altering bits. */
7420 aarch64_simd_reinterpret (rtx dest
, rtx src
)
7422 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
7425 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7428 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
7429 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
7432 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
7433 rtx tmp1
= gen_reg_rtx (mode
);
7434 rtx tmp2
= gen_reg_rtx (mode
);
7436 emit_insn (intfn (tmp1
, op1
, tmp2
));
7438 emit_move_insn (mem
, tmp1
);
7439 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
7440 emit_move_insn (mem
, tmp2
);
7443 /* Return TRUE if OP is a valid vector addressing mode. */
7445 aarch64_simd_mem_operand_p (rtx op
)
7447 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
7448 || GET_CODE (XEXP (op
, 0)) == REG
);
7451 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7452 not to early-clobber SRC registers in the process.
7454 We assume that the operands described by SRC and DEST represent a
7455 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7456 number of components into which the copy has been decomposed. */
7458 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
7459 rtx
*src
, unsigned int count
)
7463 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
7464 || REGNO (operands
[0]) < REGNO (operands
[1]))
7466 for (i
= 0; i
< count
; i
++)
7468 operands
[2 * i
] = dest
[i
];
7469 operands
[2 * i
+ 1] = src
[i
];
7474 for (i
= 0; i
< count
; i
++)
7476 operands
[2 * i
] = dest
[count
- i
- 1];
7477 operands
[2 * i
+ 1] = src
[count
- i
- 1];
7482 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7483 one of VSTRUCT modes: OI, CI or XI. */
7485 aarch64_simd_attr_length_move (rtx insn
)
7487 enum machine_mode mode
;
7489 extract_insn_cached (insn
);
7491 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
7493 mode
= GET_MODE (recog_data
.operand
[0]);
7509 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7510 alignment of a vector to 128 bits. */
7511 static HOST_WIDE_INT
7512 aarch64_simd_vector_alignment (const_tree type
)
7514 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
7515 return MIN (align
, 128);
7518 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7520 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
7525 /* We guarantee alignment for vectors up to 128-bits. */
7526 if (tree_int_cst_compare (TYPE_SIZE (type
),
7527 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
7530 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7534 /* If VALS is a vector constant that can be loaded into a register
7535 using DUP, generate instructions to do so and return an RTX to
7536 assign to the register. Otherwise return NULL_RTX. */
7538 aarch64_simd_dup_constant (rtx vals
)
7540 enum machine_mode mode
= GET_MODE (vals
);
7541 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
7542 int n_elts
= GET_MODE_NUNITS (mode
);
7543 bool all_same
= true;
7547 if (GET_CODE (vals
) != CONST_VECTOR
)
7550 for (i
= 1; i
< n_elts
; ++i
)
7552 x
= CONST_VECTOR_ELT (vals
, i
);
7553 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
7560 /* We can load this constant by using DUP and a constant in a
7561 single ARM register. This will be cheaper than a vector
7563 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
7564 return gen_rtx_VEC_DUPLICATE (mode
, x
);
7568 /* Generate code to load VALS, which is a PARALLEL containing only
7569 constants (for vec_init) or CONST_VECTOR, efficiently into a
7570 register. Returns an RTX to copy into the register, or NULL_RTX
7571 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7573 aarch64_simd_make_constant (rtx vals
)
7575 enum machine_mode mode
= GET_MODE (vals
);
7577 rtx const_vec
= NULL_RTX
;
7578 int n_elts
= GET_MODE_NUNITS (mode
);
7582 if (GET_CODE (vals
) == CONST_VECTOR
)
7584 else if (GET_CODE (vals
) == PARALLEL
)
7586 /* A CONST_VECTOR must contain only CONST_INTs and
7587 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7588 Only store valid constants in a CONST_VECTOR. */
7589 for (i
= 0; i
< n_elts
; ++i
)
7591 rtx x
= XVECEXP (vals
, 0, i
);
7592 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
7595 if (n_const
== n_elts
)
7596 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
7601 if (const_vec
!= NULL_RTX
7602 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
7603 /* Load using MOVI/MVNI. */
7605 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
7606 /* Loaded using DUP. */
7608 else if (const_vec
!= NULL_RTX
)
7609 /* Load from constant pool. We can not take advantage of single-cycle
7610 LD1 because we need a PC-relative addressing mode. */
7613 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7614 We can not construct an initializer. */
7619 aarch64_expand_vector_init (rtx target
, rtx vals
)
7621 enum machine_mode mode
= GET_MODE (target
);
7622 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
7623 int n_elts
= GET_MODE_NUNITS (mode
);
7624 int n_var
= 0, one_var
= -1;
7625 bool all_same
= true;
7629 x
= XVECEXP (vals
, 0, 0);
7630 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
7631 n_var
= 1, one_var
= 0;
7633 for (i
= 1; i
< n_elts
; ++i
)
7635 x
= XVECEXP (vals
, 0, i
);
7636 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
7637 ++n_var
, one_var
= i
;
7639 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7645 rtx constant
= aarch64_simd_make_constant (vals
);
7646 if (constant
!= NULL_RTX
)
7648 emit_move_insn (target
, constant
);
7653 /* Splat a single non-constant element if we can. */
7656 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
7657 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
7661 /* One field is non-constant. Load constant then overwrite varying
7662 field. This is more efficient than using the stack. */
7665 rtx copy
= copy_rtx (vals
);
7666 rtx index
= GEN_INT (one_var
);
7667 enum insn_code icode
;
7669 /* Load constant part of vector, substitute neighboring value for
7671 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
7672 aarch64_expand_vector_init (target
, copy
);
7674 /* Insert variable. */
7675 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
7676 icode
= optab_handler (vec_set_optab
, mode
);
7677 gcc_assert (icode
!= CODE_FOR_nothing
);
7678 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
7682 /* Construct the vector in memory one field at a time
7683 and load the whole vector. */
7684 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7685 for (i
= 0; i
< n_elts
; i
++)
7686 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7687 i
* GET_MODE_SIZE (inner_mode
)),
7688 XVECEXP (vals
, 0, i
));
7689 emit_move_insn (target
, mem
);
7693 static unsigned HOST_WIDE_INT
7694 aarch64_shift_truncation_mask (enum machine_mode mode
)
7697 (aarch64_vector_mode_supported_p (mode
)
7698 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
7701 #ifndef TLS_SECTION_ASM_FLAG
7702 #define TLS_SECTION_ASM_FLAG 'T'
7706 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
7707 tree decl ATTRIBUTE_UNUSED
)
7709 char flagchars
[10], *f
= flagchars
;
7711 /* If we have already declared this section, we can use an
7712 abbreviated form to switch back to it -- unless this section is
7713 part of a COMDAT groups, in which case GAS requires the full
7714 declaration every time. */
7715 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7716 && (flags
& SECTION_DECLARED
))
7718 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
7722 if (!(flags
& SECTION_DEBUG
))
7724 if (flags
& SECTION_WRITE
)
7726 if (flags
& SECTION_CODE
)
7728 if (flags
& SECTION_SMALL
)
7730 if (flags
& SECTION_MERGE
)
7732 if (flags
& SECTION_STRINGS
)
7734 if (flags
& SECTION_TLS
)
7735 *f
++ = TLS_SECTION_ASM_FLAG
;
7736 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7740 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
7742 if (!(flags
& SECTION_NOTYPE
))
7747 if (flags
& SECTION_BSS
)
7752 #ifdef TYPE_OPERAND_FMT
7753 format
= "," TYPE_OPERAND_FMT
;
7758 fprintf (asm_out_file
, format
, type
);
7760 if (flags
& SECTION_ENTSIZE
)
7761 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
7762 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7764 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
7765 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
7767 fprintf (asm_out_file
, ",%s,comdat",
7768 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
7772 putc ('\n', asm_out_file
);
7775 /* Select a format to encode pointers in exception handling data. */
7777 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
7780 switch (aarch64_cmodel
)
7782 case AARCH64_CMODEL_TINY
:
7783 case AARCH64_CMODEL_TINY_PIC
:
7784 case AARCH64_CMODEL_SMALL
:
7785 case AARCH64_CMODEL_SMALL_PIC
:
7786 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7788 type
= DW_EH_PE_sdata4
;
7791 /* No assumptions here. 8-byte relocs required. */
7792 type
= DW_EH_PE_sdata8
;
7795 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
7798 /* Emit load exclusive. */
7801 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
7802 rtx mem
, rtx model_rtx
)
7804 rtx (*gen
) (rtx
, rtx
, rtx
);
7808 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
7809 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
7810 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
7811 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
7816 emit_insn (gen (rval
, mem
, model_rtx
));
7819 /* Emit store exclusive. */
7822 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
7823 rtx rval
, rtx mem
, rtx model_rtx
)
7825 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
7829 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
7830 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
7831 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
7832 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
7837 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
7840 /* Mark the previous jump instruction as unlikely. */
7843 aarch64_emit_unlikely_jump (rtx insn
)
7845 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
7847 insn
= emit_jump_insn (insn
);
7848 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
7851 /* Expand a compare and swap pattern. */
7854 aarch64_expand_compare_and_swap (rtx operands
[])
7856 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
7857 enum machine_mode mode
, cmp_mode
;
7858 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
7863 oldval
= operands
[3];
7864 newval
= operands
[4];
7865 is_weak
= operands
[5];
7866 mod_s
= operands
[6];
7867 mod_f
= operands
[7];
7868 mode
= GET_MODE (mem
);
7871 /* Normally the succ memory model must be stronger than fail, but in the
7872 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7873 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7875 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
7876 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
7877 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
7883 /* For short modes, we're going to perform the comparison in SImode,
7884 so do the zero-extension now. */
7886 rval
= gen_reg_rtx (SImode
);
7887 oldval
= convert_modes (SImode
, mode
, oldval
, true);
7892 /* Force the value into a register if needed. */
7893 if (!aarch64_plus_operand (oldval
, mode
))
7894 oldval
= force_reg (cmp_mode
, oldval
);
7903 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
7904 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
7905 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
7906 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
7911 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
7913 if (mode
== QImode
|| mode
== HImode
)
7914 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
7916 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7917 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
7918 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
7921 /* Split a compare and swap pattern. */
7924 aarch64_split_compare_and_swap (rtx operands
[])
7926 rtx rval
, mem
, oldval
, newval
, scratch
;
7927 enum machine_mode mode
;
7929 rtx label1
, label2
, x
, cond
;
7933 oldval
= operands
[2];
7934 newval
= operands
[3];
7935 is_weak
= (operands
[4] != const0_rtx
);
7936 scratch
= operands
[7];
7937 mode
= GET_MODE (mem
);
7942 label1
= gen_label_rtx ();
7943 emit_label (label1
);
7945 label2
= gen_label_rtx ();
7947 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
7949 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
7950 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7951 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7952 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
7953 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7955 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
7959 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
7960 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7961 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
7962 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7966 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7967 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
7968 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
7971 emit_label (label2
);
7974 /* Split an atomic operation. */
7977 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
7978 rtx value
, rtx model_rtx
, rtx cond
)
7980 enum machine_mode mode
= GET_MODE (mem
);
7981 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
7984 label
= gen_label_rtx ();
7988 new_out
= gen_lowpart (wmode
, new_out
);
7990 old_out
= gen_lowpart (wmode
, old_out
);
7993 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
7995 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
8004 x
= gen_rtx_AND (wmode
, old_out
, value
);
8005 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8006 x
= gen_rtx_NOT (wmode
, new_out
);
8007 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8011 if (CONST_INT_P (value
))
8013 value
= GEN_INT (-INTVAL (value
));
8019 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
8020 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8024 aarch64_emit_store_exclusive (mode
, cond
, mem
,
8025 gen_lowpart (mode
, new_out
), model_rtx
);
8027 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
8028 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8029 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
8030 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8034 aarch64_print_extension (void)
8036 const struct aarch64_option_extension
*opt
= NULL
;
8038 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
8039 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
8040 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
8042 asm_fprintf (asm_out_file
, "\n");
8046 aarch64_start_file (void)
8050 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
8051 aarch64_print_extension ();
8053 else if (selected_cpu
)
8055 const char *truncated_name
8056 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
8057 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
8058 aarch64_print_extension ();
8060 default_file_start();
8063 /* Target hook for c_mode_for_suffix. */
8064 static enum machine_mode
8065 aarch64_c_mode_for_suffix (char suffix
)
8073 /* We can only represent floating point constants which will fit in
8074 "quarter-precision" values. These values are characterised by
8075 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8078 (-1)^s * (n/16) * 2^r
8081 's' is the sign bit.
8082 'n' is an integer in the range 16 <= n <= 31.
8083 'r' is an integer in the range -3 <= r <= 4. */
8085 /* Return true iff X can be represented by a quarter-precision
8086 floating point immediate operand X. Note, we cannot represent 0.0. */
8088 aarch64_float_const_representable_p (rtx x
)
8090 /* This represents our current view of how many bits
8091 make up the mantissa. */
8092 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8094 unsigned HOST_WIDE_INT mantissa
, mask
;
8095 REAL_VALUE_TYPE r
, m
;
8098 if (!CONST_DOUBLE_P (x
))
8101 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8103 /* We cannot represent infinities, NaNs or +/-zero. We won't
8104 know if we have +zero until we analyse the mantissa, but we
8105 can reject the other invalid values. */
8106 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
8107 || REAL_VALUE_MINUS_ZERO (r
))
8110 /* Extract exponent. */
8111 r
= real_value_abs (&r
);
8112 exponent
= REAL_EXP (&r
);
8114 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8115 highest (sign) bit, with a fixed binary point at bit point_pos.
8116 m1 holds the low part of the mantissa, m2 the high part.
8117 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8118 bits for the mantissa, this can fail (low bits will be lost). */
8119 real_ldexp (&m
, &r
, point_pos
- exponent
);
8120 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
8122 /* If the low part of the mantissa has bits set we cannot represent
8126 /* We have rejected the lower HOST_WIDE_INT, so update our
8127 understanding of how many bits lie in the mantissa and
8128 look only at the high HOST_WIDE_INT. */
8129 mantissa
= w
.elt (1);
8130 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8132 /* We can only represent values with a mantissa of the form 1.xxxx. */
8133 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8134 if ((mantissa
& mask
) != 0)
8137 /* Having filtered unrepresentable values, we may now remove all
8138 but the highest 5 bits. */
8139 mantissa
>>= point_pos
- 5;
8141 /* We cannot represent the value 0.0, so reject it. This is handled
8146 /* Then, as bit 4 is always set, we can mask it off, leaving
8147 the mantissa in the range [0, 15]. */
8148 mantissa
&= ~(1 << 4);
8149 gcc_assert (mantissa
<= 15);
8151 /* GCC internally does not use IEEE754-like encoding (where normalized
8152 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8153 Our mantissa values are shifted 4 places to the left relative to
8154 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8155 by 5 places to correct for GCC's representation. */
8156 exponent
= 5 - exponent
;
8158 return (exponent
>= 0 && exponent
<= 7);
8162 aarch64_output_simd_mov_immediate (rtx const_vector
,
8163 enum machine_mode mode
,
8167 static char templ
[40];
8168 const char *mnemonic
;
8169 const char *shift_op
;
8170 unsigned int lane_count
= 0;
8173 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
8175 /* This will return true to show const_vector is legal for use as either
8176 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8177 also update INFO to show how the immediate should be generated. */
8178 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
8179 gcc_assert (is_valid
);
8181 element_char
= sizetochar (info
.element_width
);
8182 lane_count
= width
/ info
.element_width
;
8184 mode
= GET_MODE_INNER (mode
);
8185 if (mode
== SFmode
|| mode
== DFmode
)
8187 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
8188 if (aarch64_float_const_zero_rtx_p (info
.value
))
8189 info
.value
= GEN_INT (0);
8194 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
8195 char float_buf
[buf_size
] = {'\0'};
8196 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
8199 if (lane_count
== 1)
8200 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
8202 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
8203 lane_count
, element_char
, float_buf
);
8208 mnemonic
= info
.mvn
? "mvni" : "movi";
8209 shift_op
= info
.msl
? "msl" : "lsl";
8211 if (lane_count
== 1)
8212 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
8213 mnemonic
, UINTVAL (info
.value
));
8214 else if (info
.shift
)
8215 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8216 ", %s %d", mnemonic
, lane_count
, element_char
,
8217 UINTVAL (info
.value
), shift_op
, info
.shift
);
8219 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
8220 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
8225 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
8226 enum machine_mode mode
)
8228 enum machine_mode vmode
;
8230 gcc_assert (!VECTOR_MODE_P (mode
));
8231 vmode
= aarch64_simd_container_mode (mode
, 64);
8232 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
8233 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
8236 /* Split operands into moves from op[1] + op[2] into op[0]. */
8239 aarch64_split_combinev16qi (rtx operands
[3])
8241 unsigned int dest
= REGNO (operands
[0]);
8242 unsigned int src1
= REGNO (operands
[1]);
8243 unsigned int src2
= REGNO (operands
[2]);
8244 enum machine_mode halfmode
= GET_MODE (operands
[1]);
8245 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
8248 gcc_assert (halfmode
== V16QImode
);
8250 if (src1
== dest
&& src2
== dest
+ halfregs
)
8252 /* No-op move. Can't split to nothing; emit something. */
8253 emit_note (NOTE_INSN_DELETED
);
8257 /* Preserve register attributes for variable tracking. */
8258 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
8259 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
8260 GET_MODE_SIZE (halfmode
));
8262 /* Special case of reversed high/low parts. */
8263 if (reg_overlap_mentioned_p (operands
[2], destlo
)
8264 && reg_overlap_mentioned_p (operands
[1], desthi
))
8266 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8267 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
8268 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8270 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
8272 /* Try to avoid unnecessary moves if part of the result
8273 is in the right place already. */
8275 emit_move_insn (destlo
, operands
[1]);
8276 if (src2
!= dest
+ halfregs
)
8277 emit_move_insn (desthi
, operands
[2]);
8281 if (src2
!= dest
+ halfregs
)
8282 emit_move_insn (desthi
, operands
[2]);
8284 emit_move_insn (destlo
, operands
[1]);
8288 /* vec_perm support. */
8290 #define MAX_VECT_LEN 16
8292 struct expand_vec_perm_d
8294 rtx target
, op0
, op1
;
8295 unsigned char perm
[MAX_VECT_LEN
];
8296 enum machine_mode vmode
;
8302 /* Generate a variable permutation. */
8305 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8307 enum machine_mode vmode
= GET_MODE (target
);
8308 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8310 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
8311 gcc_checking_assert (GET_MODE (op0
) == vmode
);
8312 gcc_checking_assert (GET_MODE (op1
) == vmode
);
8313 gcc_checking_assert (GET_MODE (sel
) == vmode
);
8314 gcc_checking_assert (TARGET_SIMD
);
8318 if (vmode
== V8QImode
)
8320 /* Expand the argument to a V16QI mode by duplicating it. */
8321 rtx pair
= gen_reg_rtx (V16QImode
);
8322 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
8323 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8327 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
8334 if (vmode
== V8QImode
)
8336 pair
= gen_reg_rtx (V16QImode
);
8337 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
8338 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8342 pair
= gen_reg_rtx (OImode
);
8343 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
8344 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
8350 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8352 enum machine_mode vmode
= GET_MODE (target
);
8353 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
8354 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8355 rtx rmask
[MAX_VECT_LEN
], mask
;
8357 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
8359 /* The TBL instruction does not use a modulo index, so we must take care
8360 of that ourselves. */
8361 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8362 for (i
= 0; i
< nelt
; ++i
)
8364 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
8365 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
8367 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
8370 /* Recognize patterns suitable for the TRN instructions. */
8372 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
8374 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
8375 rtx out
, in0
, in1
, x
;
8376 rtx (*gen
) (rtx
, rtx
, rtx
);
8377 enum machine_mode vmode
= d
->vmode
;
8379 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8382 /* Note that these are little-endian tests.
8383 We correct for big-endian later. */
8384 if (d
->perm
[0] == 0)
8386 else if (d
->perm
[0] == 1)
8390 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8392 for (i
= 0; i
< nelt
; i
+= 2)
8394 if (d
->perm
[i
] != i
+ odd
)
8396 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
8406 if (BYTES_BIG_ENDIAN
)
8408 x
= in0
, in0
= in1
, in1
= x
;
8417 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
8418 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
8419 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
8420 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
8421 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
8422 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
8423 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
8424 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
8425 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
8426 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
8435 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
8436 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
8437 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
8438 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
8439 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
8440 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
8441 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
8442 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
8443 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
8444 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
8450 emit_insn (gen (out
, in0
, in1
));
8454 /* Recognize patterns suitable for the UZP instructions. */
8456 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
8458 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
8459 rtx out
, in0
, in1
, x
;
8460 rtx (*gen
) (rtx
, rtx
, rtx
);
8461 enum machine_mode vmode
= d
->vmode
;
8463 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8466 /* Note that these are little-endian tests.
8467 We correct for big-endian later. */
8468 if (d
->perm
[0] == 0)
8470 else if (d
->perm
[0] == 1)
8474 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8476 for (i
= 0; i
< nelt
; i
++)
8478 unsigned elt
= (i
* 2 + odd
) & mask
;
8479 if (d
->perm
[i
] != elt
)
8489 if (BYTES_BIG_ENDIAN
)
8491 x
= in0
, in0
= in1
, in1
= x
;
8500 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
8501 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
8502 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
8503 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
8504 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
8505 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
8506 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
8507 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
8508 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
8509 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
8518 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
8519 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
8520 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
8521 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
8522 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
8523 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
8524 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
8525 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
8526 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
8527 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
8533 emit_insn (gen (out
, in0
, in1
));
8537 /* Recognize patterns suitable for the ZIP instructions. */
8539 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
8541 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
8542 rtx out
, in0
, in1
, x
;
8543 rtx (*gen
) (rtx
, rtx
, rtx
);
8544 enum machine_mode vmode
= d
->vmode
;
8546 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8549 /* Note that these are little-endian tests.
8550 We correct for big-endian later. */
8552 if (d
->perm
[0] == high
)
8555 else if (d
->perm
[0] == 0)
8559 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8561 for (i
= 0; i
< nelt
/ 2; i
++)
8563 unsigned elt
= (i
+ high
) & mask
;
8564 if (d
->perm
[i
* 2] != elt
)
8566 elt
= (elt
+ nelt
) & mask
;
8567 if (d
->perm
[i
* 2 + 1] != elt
)
8577 if (BYTES_BIG_ENDIAN
)
8579 x
= in0
, in0
= in1
, in1
= x
;
8588 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
8589 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
8590 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
8591 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
8592 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
8593 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
8594 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
8595 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
8596 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
8597 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
8606 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
8607 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
8608 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
8609 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
8610 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
8611 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
8612 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
8613 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
8614 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
8615 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
8621 emit_insn (gen (out
, in0
, in1
));
8626 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
8628 rtx (*gen
) (rtx
, rtx
, rtx
);
8629 rtx out
= d
->target
;
8631 enum machine_mode vmode
= d
->vmode
;
8632 unsigned int i
, elt
, nelt
= d
->nelt
;
8635 /* TODO: This may not be big-endian safe. */
8636 if (BYTES_BIG_ENDIAN
)
8640 for (i
= 1; i
< nelt
; i
++)
8642 if (elt
!= d
->perm
[i
])
8646 /* The generic preparation in aarch64_expand_vec_perm_const_1
8647 swaps the operand order and the permute indices if it finds
8648 d->perm[0] to be in the second operand. Thus, we can always
8649 use d->op0 and need not do any extra arithmetic to get the
8650 correct lane number. */
8652 lane
= GEN_INT (elt
);
8656 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
8657 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
8658 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
8659 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
8660 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
8661 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
8662 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
8663 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
8664 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
8665 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
8670 emit_insn (gen (out
, in0
, lane
));
8675 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
8677 rtx rperm
[MAX_VECT_LEN
], sel
;
8678 enum machine_mode vmode
= d
->vmode
;
8679 unsigned int i
, nelt
= d
->nelt
;
8684 /* Generic code will try constant permutation twice. Once with the
8685 original mode and again with the elements lowered to QImode.
8686 So wait and don't do the selector expansion ourselves. */
8687 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
8690 for (i
= 0; i
< nelt
; ++i
)
8692 int nunits
= GET_MODE_NUNITS (vmode
);
8694 /* If big-endian and two vectors we end up with a weird mixed-endian
8695 mode on NEON. Reverse the index within each word but not the word
8697 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
8700 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
8701 sel
= force_reg (vmode
, sel
);
8703 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
8708 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
8710 /* The pattern matching functions above are written to look for a small
8711 number to begin the sequence (0, 1, N/2). If we begin with an index
8712 from the second operand, we can swap the operands. */
8713 if (d
->perm
[0] >= d
->nelt
)
8715 unsigned i
, nelt
= d
->nelt
;
8718 gcc_assert (nelt
== (nelt
& -nelt
));
8719 for (i
= 0; i
< nelt
; ++i
)
8720 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
8729 if (aarch64_evpc_zip (d
))
8731 else if (aarch64_evpc_uzp (d
))
8733 else if (aarch64_evpc_trn (d
))
8735 else if (aarch64_evpc_dup (d
))
8737 return aarch64_evpc_tbl (d
);
8742 /* Expand a vec_perm_const pattern. */
8745 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8747 struct expand_vec_perm_d d
;
8754 d
.vmode
= GET_MODE (target
);
8755 gcc_assert (VECTOR_MODE_P (d
.vmode
));
8756 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8757 d
.testing_p
= false;
8759 for (i
= which
= 0; i
< nelt
; ++i
)
8761 rtx e
= XVECEXP (sel
, 0, i
);
8762 int ei
= INTVAL (e
) & (2 * nelt
- 1);
8763 which
|= (ei
< nelt
? 1 : 2);
8773 d
.one_vector_p
= false;
8774 if (!rtx_equal_p (op0
, op1
))
8777 /* The elements of PERM do not suggest that only the first operand
8778 is used, but both operands are identical. Allow easier matching
8779 of the permutation by folding the permutation into the single
8783 for (i
= 0; i
< nelt
; ++i
)
8784 d
.perm
[i
] &= nelt
- 1;
8786 d
.one_vector_p
= true;
8791 d
.one_vector_p
= true;
8795 return aarch64_expand_vec_perm_const_1 (&d
);
8799 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
8800 const unsigned char *sel
)
8802 struct expand_vec_perm_d d
;
8803 unsigned int i
, nelt
, which
;
8807 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8809 memcpy (d
.perm
, sel
, nelt
);
8811 /* Calculate whether all elements are in one vector. */
8812 for (i
= which
= 0; i
< nelt
; ++i
)
8814 unsigned char e
= d
.perm
[i
];
8815 gcc_assert (e
< 2 * nelt
);
8816 which
|= (e
< nelt
? 1 : 2);
8819 /* If all elements are from the second vector, reindex as if from the
8822 for (i
= 0; i
< nelt
; ++i
)
8825 /* Check whether the mask can be applied to a single vector. */
8826 d
.one_vector_p
= (which
!= 3);
8828 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
8829 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
8830 if (!d
.one_vector_p
)
8831 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
8834 ret
= aarch64_expand_vec_perm_const_1 (&d
);
8840 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8842 aarch64_cannot_change_mode_class (enum machine_mode from
,
8843 enum machine_mode to
,
8844 enum reg_class rclass
)
8846 /* Full-reg subregs are allowed on general regs or any class if they are
8848 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
)
8849 || !reg_classes_intersect_p (FP_REGS
, rclass
))
8852 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8853 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8854 2. Scalar to Scalar for integer modes or same size float modes.
8855 3. Vector to Vector modes.
8856 4. On little-endian only, Vector-Structure to Vector modes. */
8857 if (GET_MODE_SIZE (from
) > GET_MODE_SIZE (to
))
8859 if (aarch64_vector_mode_supported_p (from
)
8860 && GET_MODE_SIZE (GET_MODE_INNER (from
)) == GET_MODE_SIZE (to
))
8863 if (GET_MODE_NUNITS (from
) == 1
8864 && GET_MODE_NUNITS (to
) == 1
8865 && (GET_MODE_CLASS (from
) == MODE_INT
8869 if (aarch64_vector_mode_supported_p (from
)
8870 && aarch64_vector_mode_supported_p (to
))
8873 /* Within an vector structure straddling multiple vector registers
8874 we are in a mixed-endian representation. As such, we can't
8875 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
8876 switch between vectors and vector structures cheaply. */
8877 if (!BYTES_BIG_ENDIAN
)
8878 if ((aarch64_vector_mode_supported_p (from
)
8879 && aarch64_vect_struct_mode_p (to
))
8880 || (aarch64_vector_mode_supported_p (to
)
8881 && aarch64_vect_struct_mode_p (from
)))
8888 /* Implement MODES_TIEABLE_P. */
8891 aarch64_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
8893 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
8896 /* We specifically want to allow elements of "structure" modes to
8897 be tieable to the structure. This more general condition allows
8898 other rarer situations too. */
8900 && aarch64_vector_mode_p (mode1
)
8901 && aarch64_vector_mode_p (mode2
))
8907 #undef TARGET_ADDRESS_COST
8908 #define TARGET_ADDRESS_COST aarch64_address_cost
8910 /* This hook will determines whether unnamed bitfields affect the alignment
8911 of the containing structure. The hook returns true if the structure
8912 should inherit the alignment requirements of an unnamed bitfield's
8914 #undef TARGET_ALIGN_ANON_BITFIELD
8915 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8917 #undef TARGET_ASM_ALIGNED_DI_OP
8918 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8920 #undef TARGET_ASM_ALIGNED_HI_OP
8921 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8923 #undef TARGET_ASM_ALIGNED_SI_OP
8924 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8926 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8927 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8928 hook_bool_const_tree_hwi_hwi_const_tree_true
8930 #undef TARGET_ASM_FILE_START
8931 #define TARGET_ASM_FILE_START aarch64_start_file
8933 #undef TARGET_ASM_OUTPUT_MI_THUNK
8934 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8936 #undef TARGET_ASM_SELECT_RTX_SECTION
8937 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8939 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8940 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8942 #undef TARGET_BUILD_BUILTIN_VA_LIST
8943 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8945 #undef TARGET_CALLEE_COPIES
8946 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8948 #undef TARGET_CAN_ELIMINATE
8949 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8951 #undef TARGET_CANNOT_FORCE_CONST_MEM
8952 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8954 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8955 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8957 /* Only the least significant bit is used for initialization guard
8959 #undef TARGET_CXX_GUARD_MASK_BIT
8960 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8962 #undef TARGET_C_MODE_FOR_SUFFIX
8963 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8965 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8966 #undef TARGET_DEFAULT_TARGET_FLAGS
8967 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8970 #undef TARGET_CLASS_MAX_NREGS
8971 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8973 #undef TARGET_BUILTIN_DECL
8974 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8976 #undef TARGET_EXPAND_BUILTIN
8977 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8979 #undef TARGET_EXPAND_BUILTIN_VA_START
8980 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8982 #undef TARGET_FOLD_BUILTIN
8983 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8985 #undef TARGET_FUNCTION_ARG
8986 #define TARGET_FUNCTION_ARG aarch64_function_arg
8988 #undef TARGET_FUNCTION_ARG_ADVANCE
8989 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8991 #undef TARGET_FUNCTION_ARG_BOUNDARY
8992 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8994 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8995 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8997 #undef TARGET_FUNCTION_VALUE
8998 #define TARGET_FUNCTION_VALUE aarch64_function_value
9000 #undef TARGET_FUNCTION_VALUE_REGNO_P
9001 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9003 #undef TARGET_FRAME_POINTER_REQUIRED
9004 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9006 #undef TARGET_GIMPLE_FOLD_BUILTIN
9007 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9009 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9010 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9012 #undef TARGET_INIT_BUILTINS
9013 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9015 #undef TARGET_LEGITIMATE_ADDRESS_P
9016 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9018 #undef TARGET_LEGITIMATE_CONSTANT_P
9019 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9021 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9022 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9025 #define TARGET_LRA_P aarch64_lra_p
9027 #undef TARGET_MANGLE_TYPE
9028 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9030 #undef TARGET_MEMORY_MOVE_COST
9031 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9033 #undef TARGET_MUST_PASS_IN_STACK
9034 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9036 /* This target hook should return true if accesses to volatile bitfields
9037 should use the narrowest mode possible. It should return false if these
9038 accesses should use the bitfield container type. */
9039 #undef TARGET_NARROW_VOLATILE_BITFIELD
9040 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9042 #undef TARGET_OPTION_OVERRIDE
9043 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9045 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9046 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9047 aarch64_override_options_after_change
9049 #undef TARGET_PASS_BY_REFERENCE
9050 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9052 #undef TARGET_PREFERRED_RELOAD_CLASS
9053 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9055 #undef TARGET_SECONDARY_RELOAD
9056 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9058 #undef TARGET_SHIFT_TRUNCATION_MASK
9059 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9061 #undef TARGET_SETUP_INCOMING_VARARGS
9062 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9064 #undef TARGET_STRUCT_VALUE_RTX
9065 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9067 #undef TARGET_REGISTER_MOVE_COST
9068 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9070 #undef TARGET_RETURN_IN_MEMORY
9071 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9073 #undef TARGET_RETURN_IN_MSB
9074 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9076 #undef TARGET_RTX_COSTS
9077 #define TARGET_RTX_COSTS aarch64_rtx_costs
9079 #undef TARGET_SCHED_ISSUE_RATE
9080 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9082 #undef TARGET_TRAMPOLINE_INIT
9083 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9085 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9086 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9088 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9089 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9091 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9092 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9094 #undef TARGET_VECTORIZE_ADD_STMT_COST
9095 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9097 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9098 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9099 aarch64_builtin_vectorization_cost
9101 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9102 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9104 #undef TARGET_VECTORIZE_BUILTINS
9105 #define TARGET_VECTORIZE_BUILTINS
9107 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9108 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9109 aarch64_builtin_vectorized_function
9111 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9112 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9113 aarch64_autovectorize_vector_sizes
9115 /* Section anchor support. */
9117 #undef TARGET_MIN_ANCHOR_OFFSET
9118 #define TARGET_MIN_ANCHOR_OFFSET -256
9120 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9121 byte offset; we can do much more for larger data types, but have no way
9122 to determine the size of the access. We assume accesses are aligned. */
9123 #undef TARGET_MAX_ANCHOR_OFFSET
9124 #define TARGET_MAX_ANCHOR_OFFSET 4095
9126 #undef TARGET_VECTOR_ALIGNMENT
9127 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9129 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9130 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9131 aarch64_simd_vector_alignment_reachable
9133 /* vec_perm support. */
9135 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9136 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9137 aarch64_vectorize_vec_perm_const_ok
9140 #undef TARGET_FIXED_CONDITION_CODE_REGS
9141 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9143 #undef TARGET_FLAGS_REGNUM
9144 #define TARGET_FLAGS_REGNUM CC_REGNUM
9146 struct gcc_target targetm
= TARGET_INITIALIZER
;
9148 #include "gt-aarch64.h"