1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
74 A simple base register plus immediate offset.
77 A base register indexed by immediate offset with writeback.
80 A base register indexed by (optionally scaled) register.
83 A base register indexed by (optionally scaled) zero-extended register.
86 A base register indexed by (optionally scaled) sign-extended register.
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type
{
104 struct aarch64_address_info
{
105 enum aarch64_address_type type
;
109 enum aarch64_symbol_type symbol_type
;
112 struct simd_immediate_info
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel
;
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
133 enum machine_mode
*, int *,
135 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
136 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
139 static unsigned bit_count (unsigned HOST_WIDE_INT
);
140 static bool aarch64_const_vec_all_same_int_p (rtx
,
141 HOST_WIDE_INT
, HOST_WIDE_INT
);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
144 const unsigned char *sel
);
145 static int aarch64_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
147 /* The processor for which instructions should be scheduled. */
148 enum aarch64_processor aarch64_tune
= cortexa53
;
150 /* The current tuning set. */
151 const struct tune_params
*aarch64_tune_params
;
153 /* Mask to specify which instructions we are allowed to generate. */
154 unsigned long aarch64_isa_flags
= 0;
156 /* Mask to specify which instruction scheduling options should be used. */
157 unsigned long aarch64_tune_flags
= 0;
159 /* Tuning parameters. */
161 #if HAVE_DESIGNATED_INITIALIZERS
162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164 #define NAMED_PARAM(NAME, VAL) (VAL)
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174 static const struct cpu_addrcost_table generic_addrcost_table
=
176 #if HAVE_DESIGNATED_INITIALIZERS
185 NAMED_PARAM (pre_modify
, 0),
186 NAMED_PARAM (post_modify
, 0),
187 NAMED_PARAM (register_offset
, 0),
188 NAMED_PARAM (register_extend
, 0),
189 NAMED_PARAM (imm_offset
, 0)
192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
195 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
197 #if HAVE_DESIGNATED_INITIALIZERS
206 NAMED_PARAM (pre_modify
, 0),
207 NAMED_PARAM (post_modify
, 0),
208 NAMED_PARAM (register_offset
, 0),
209 NAMED_PARAM (register_extend
, 0),
210 NAMED_PARAM (imm_offset
, 0),
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 static const struct cpu_regmove_cost generic_regmove_cost
=
218 NAMED_PARAM (GP2GP
, 1),
219 NAMED_PARAM (GP2FP
, 2),
220 NAMED_PARAM (FP2GP
, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP
, 4)
227 /* Generic costs for vector insn classes. */
228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
231 static const struct cpu_vector_cost generic_vector_cost
=
233 NAMED_PARAM (scalar_stmt_cost
, 1),
234 NAMED_PARAM (scalar_load_cost
, 1),
235 NAMED_PARAM (scalar_store_cost
, 1),
236 NAMED_PARAM (vec_stmt_cost
, 1),
237 NAMED_PARAM (vec_to_scalar_cost
, 1),
238 NAMED_PARAM (scalar_to_vec_cost
, 1),
239 NAMED_PARAM (vec_align_load_cost
, 1),
240 NAMED_PARAM (vec_unalign_load_cost
, 1),
241 NAMED_PARAM (vec_unalign_store_cost
, 1),
242 NAMED_PARAM (vec_store_cost
, 1),
243 NAMED_PARAM (cond_taken_branch_cost
, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
247 /* Generic costs for vector insn classes. */
248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
251 static const struct cpu_vector_cost cortexa57_vector_cost
=
253 NAMED_PARAM (scalar_stmt_cost
, 1),
254 NAMED_PARAM (scalar_load_cost
, 4),
255 NAMED_PARAM (scalar_store_cost
, 1),
256 NAMED_PARAM (vec_stmt_cost
, 3),
257 NAMED_PARAM (vec_to_scalar_cost
, 8),
258 NAMED_PARAM (scalar_to_vec_cost
, 8),
259 NAMED_PARAM (vec_align_load_cost
, 5),
260 NAMED_PARAM (vec_unalign_load_cost
, 5),
261 NAMED_PARAM (vec_unalign_store_cost
, 1),
262 NAMED_PARAM (vec_store_cost
, 1),
263 NAMED_PARAM (cond_taken_branch_cost
, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
270 static const struct tune_params generic_tunings
=
272 &cortexa57_extra_costs
,
273 &generic_addrcost_table
,
274 &generic_regmove_cost
,
275 &generic_vector_cost
,
276 NAMED_PARAM (memmov_cost
, 4),
277 NAMED_PARAM (issue_rate
, 2)
280 static const struct tune_params cortexa53_tunings
=
282 &cortexa53_extra_costs
,
283 &generic_addrcost_table
,
284 &generic_regmove_cost
,
285 &generic_vector_cost
,
286 NAMED_PARAM (memmov_cost
, 4),
287 NAMED_PARAM (issue_rate
, 2)
290 static const struct tune_params cortexa57_tunings
=
292 &cortexa57_extra_costs
,
293 &cortexa57_addrcost_table
,
294 &generic_regmove_cost
,
295 &cortexa57_vector_cost
,
296 NAMED_PARAM (memmov_cost
, 4),
297 NAMED_PARAM (issue_rate
, 3)
300 /* A processor implementing AArch64. */
303 const char *const name
;
304 enum aarch64_processor core
;
306 const unsigned long flags
;
307 const struct tune_params
*const tune
;
310 /* Processor cores implementing AArch64. */
311 static const struct processor all_cores
[] =
313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315 #include "aarch64-cores.def"
317 {"generic", cortexa53
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
318 {NULL
, aarch64_none
, NULL
, 0, NULL
}
321 /* Architectures implementing AArch64. */
322 static const struct processor all_architectures
[] =
324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326 #include "aarch64-arches.def"
328 {NULL
, aarch64_none
, NULL
, 0, NULL
}
331 /* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333 static const struct processor
*selected_arch
;
334 static const struct processor
*selected_cpu
;
335 static const struct processor
*selected_tune
;
337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339 /* An ISA extension in the co-processor and main instruction set space. */
340 struct aarch64_option_extension
342 const char *const name
;
343 const unsigned long flags_on
;
344 const unsigned long flags_off
;
347 /* ISA extensions in AArch64. */
348 static const struct aarch64_option_extension all_extensions
[] =
350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352 #include "aarch64-option-extensions.def"
353 #undef AARCH64_OPT_EXTENSION
357 /* Used to track the size of an address when generating a pre/post
358 increment address. */
359 static enum machine_mode aarch64_memory_reference_mode
;
361 /* Used to force GTY into this file. */
362 static GTY(()) int gty_dummy
;
364 /* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
367 #define AARCH64_NUM_BITMASKS 5334
368 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
370 typedef enum aarch64_cond_code
372 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
373 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
374 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380 /* The condition codes of the processor, and the inverse function. */
381 static const char * const aarch64_condition_codes
[] =
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
387 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
389 aarch64_dbx_register_number (unsigned regno
)
391 if (GP_REGNUM_P (regno
))
392 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
393 else if (regno
== SP_REGNUM
)
394 return AARCH64_DWARF_SP
;
395 else if (FP_REGNUM_P (regno
))
396 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS
;
403 /* Return TRUE if MODE is any of the large INT modes. */
405 aarch64_vect_struct_mode_p (enum machine_mode mode
)
407 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
410 /* Return TRUE if MODE is any of the vector modes. */
412 aarch64_vector_mode_p (enum machine_mode mode
)
414 return aarch64_vector_mode_supported_p (mode
)
415 || aarch64_vect_struct_mode_p (mode
);
418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420 aarch64_array_mode_supported_p (enum machine_mode mode
,
421 unsigned HOST_WIDE_INT nelems
)
424 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
425 && (nelems
>= 2 && nelems
<= 4))
431 /* Implement HARD_REGNO_NREGS. */
434 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
436 switch (aarch64_regno_regclass (regno
))
440 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
442 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
447 /* Implement HARD_REGNO_MODE_OK. */
450 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
452 if (GET_MODE_CLASS (mode
) == MODE_CC
)
453 return regno
== CC_REGNUM
;
455 if (regno
== SP_REGNUM
)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode
== Pmode
|| mode
== ptr_mode
;
461 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
462 return mode
== Pmode
;
464 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
467 if (FP_REGNUM_P (regno
))
469 if (aarch64_vect_struct_mode_p (mode
))
471 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
479 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
481 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
482 enum machine_mode mode
)
484 /* Handle modes that fit within single registers. */
485 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
487 if (GET_MODE_SIZE (mode
) >= 4)
492 /* Fall back to generic for multi-reg and very large modes. */
494 return choose_hard_reg_mode (regno
, nregs
, false);
497 /* Return true if calls to DECL should be treated as
498 long-calls (ie called via a register). */
500 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
505 /* Return true if calls to symbol-ref SYM should be treated as
506 long-calls (ie called via a register). */
508 aarch64_is_long_call_p (rtx sym
)
510 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
513 /* Return true if the offsets to a zero/sign-extract operation
514 represent an expression that matches an extend operation. The
515 operands represent the paramters from
517 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
519 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
522 HOST_WIDE_INT mult_val
, extract_val
;
524 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
527 mult_val
= INTVAL (mult_imm
);
528 extract_val
= INTVAL (extract_imm
);
531 && extract_val
< GET_MODE_BITSIZE (mode
)
532 && exact_log2 (extract_val
& ~7) > 0
533 && (extract_val
& 7) <= 4
534 && mult_val
== (1 << (extract_val
& 7)))
540 /* Emit an insn that's a simple single-set. Both the operands must be
541 known to be valid. */
543 emit_set_insn (rtx x
, rtx y
)
545 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
548 /* X and Y are two things to compare using CODE. Emit the compare insn and
549 return the rtx for register 0 in the proper mode. */
551 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
553 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
554 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
556 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
560 /* Build the SYMBOL_REF for __tls_get_addr. */
562 static GTY(()) rtx tls_get_addr_libfunc
;
565 aarch64_tls_get_addr (void)
567 if (!tls_get_addr_libfunc
)
568 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
569 return tls_get_addr_libfunc
;
572 /* Return the TLS model to use for ADDR. */
574 static enum tls_model
575 tls_symbolic_operand_type (rtx addr
)
577 enum tls_model tls_kind
= TLS_MODEL_NONE
;
580 if (GET_CODE (addr
) == CONST
)
582 split_const (addr
, &sym
, &addend
);
583 if (GET_CODE (sym
) == SYMBOL_REF
)
584 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
586 else if (GET_CODE (addr
) == SYMBOL_REF
)
587 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
592 /* We'll allow lo_sum's in addresses in our legitimate addresses
593 so that combine would take care of combining addresses where
594 necessary, but for generation purposes, we'll generate the address
597 tmp = hi (symbol_ref); adrp x1, foo
598 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
602 adrp x1, :got:foo adrp tmp, :tlsgd:foo
603 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
607 Load TLS symbol, depending on TLS mechanism and TLS access model.
609 Global Dynamic - Traditional TLS:
611 add dest, tmp, #:tlsgd_lo12:imm
614 Global Dynamic - TLS Descriptors:
615 adrp dest, :tlsdesc:imm
616 ldr tmp, [dest, #:tlsdesc_lo12:imm]
617 add dest, dest, #:tlsdesc_lo12:imm
624 adrp tmp, :gottprel:imm
625 ldr dest, [tmp, #:gottprel_lo12:imm]
630 add t0, tp, #:tprel_hi12:imm
631 add t0, #:tprel_lo12_nc:imm
635 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
636 enum aarch64_symbol_type type
)
640 case SYMBOL_SMALL_ABSOLUTE
:
642 /* In ILP32, the mode of dest can be either SImode or DImode. */
644 enum machine_mode mode
= GET_MODE (dest
);
646 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
648 if (can_create_pseudo_p ())
649 tmp_reg
= gen_reg_rtx (mode
);
651 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
652 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
656 case SYMBOL_TINY_ABSOLUTE
:
657 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
660 case SYMBOL_SMALL_GOT
:
662 /* In ILP32, the mode of dest can be either SImode or DImode,
663 while the got entry is always of SImode size. The mode of
664 dest depends on how dest is used: if dest is assigned to a
665 pointer (e.g. in the memory), it has SImode; it may have
666 DImode if dest is dereferenced to access the memeory.
667 This is why we have to handle three different ldr_got_small
668 patterns here (two patterns for ILP32). */
670 enum machine_mode mode
= GET_MODE (dest
);
672 if (can_create_pseudo_p ())
673 tmp_reg
= gen_reg_rtx (mode
);
675 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
676 if (mode
== ptr_mode
)
679 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
681 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
685 gcc_assert (mode
== Pmode
);
686 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
692 case SYMBOL_SMALL_TLSGD
:
695 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
698 emit_call_insn (gen_tlsgd_small (result
, imm
));
699 insns
= get_insns ();
702 RTL_CONST_CALL_P (insns
) = 1;
703 emit_libcall_block (insns
, dest
, result
, imm
);
707 case SYMBOL_SMALL_TLSDESC
:
709 enum machine_mode mode
= GET_MODE (dest
);
710 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
713 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
715 /* In ILP32, the got entry is always of SImode size. Unlike
716 small GOT, the dest is fixed at reg 0. */
718 emit_insn (gen_tlsdesc_small_si (imm
));
720 emit_insn (gen_tlsdesc_small_di (imm
));
721 tp
= aarch64_load_tp (NULL
);
724 tp
= gen_lowpart (mode
, tp
);
726 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
727 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
731 case SYMBOL_SMALL_GOTTPREL
:
733 /* In ILP32, the mode of dest can be either SImode or DImode,
734 while the got entry is always of SImode size. The mode of
735 dest depends on how dest is used: if dest is assigned to a
736 pointer (e.g. in the memory), it has SImode; it may have
737 DImode if dest is dereferenced to access the memeory.
738 This is why we have to handle three different tlsie_small
739 patterns here (two patterns for ILP32). */
740 enum machine_mode mode
= GET_MODE (dest
);
741 rtx tmp_reg
= gen_reg_rtx (mode
);
742 rtx tp
= aarch64_load_tp (NULL
);
744 if (mode
== ptr_mode
)
747 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
750 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
751 tp
= gen_lowpart (mode
, tp
);
756 gcc_assert (mode
== Pmode
);
757 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
760 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
761 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
765 case SYMBOL_SMALL_TPREL
:
767 rtx tp
= aarch64_load_tp (NULL
);
768 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
769 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
773 case SYMBOL_TINY_GOT
:
774 emit_insn (gen_ldr_got_tiny (dest
, imm
));
782 /* Emit a move from SRC to DEST. Assume that the move expanders can
783 handle all moves if !can_create_pseudo_p (). The distinction is
784 important because, unlike emit_move_insn, the move expanders know
785 how to force Pmode objects into the constant pool even when the
786 constant pool address is not itself legitimate. */
788 aarch64_emit_move (rtx dest
, rtx src
)
790 return (can_create_pseudo_p ()
791 ? emit_move_insn (dest
, src
)
792 : emit_move_insn_1 (dest
, src
));
795 /* Split a 128-bit move operation into two 64-bit move operations,
796 taking care to handle partial overlap of register to register
797 copies. Special cases are needed when moving between GP regs and
798 FP regs. SRC can be a register, constant or memory; DST a register
799 or memory. If either operand is memory it must not have any side
802 aarch64_split_128bit_move (rtx dst
, rtx src
)
807 enum machine_mode mode
= GET_MODE (dst
);
809 gcc_assert (mode
== TImode
|| mode
== TFmode
);
810 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
811 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
813 if (REG_P (dst
) && REG_P (src
))
815 int src_regno
= REGNO (src
);
816 int dst_regno
= REGNO (dst
);
818 /* Handle FP <-> GP regs. */
819 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
821 src_lo
= gen_lowpart (word_mode
, src
);
822 src_hi
= gen_highpart (word_mode
, src
);
826 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
827 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
831 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
832 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
836 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
838 dst_lo
= gen_lowpart (word_mode
, dst
);
839 dst_hi
= gen_highpart (word_mode
, dst
);
843 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
844 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
848 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
849 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
855 dst_lo
= gen_lowpart (word_mode
, dst
);
856 dst_hi
= gen_highpart (word_mode
, dst
);
857 src_lo
= gen_lowpart (word_mode
, src
);
858 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
860 /* At most one pairing may overlap. */
861 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
863 aarch64_emit_move (dst_hi
, src_hi
);
864 aarch64_emit_move (dst_lo
, src_lo
);
868 aarch64_emit_move (dst_lo
, src_lo
);
869 aarch64_emit_move (dst_hi
, src_hi
);
874 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
876 return (! REG_P (src
)
877 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
880 /* Split a complex SIMD combine. */
883 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
885 enum machine_mode src_mode
= GET_MODE (src1
);
886 enum machine_mode dst_mode
= GET_MODE (dst
);
888 gcc_assert (VECTOR_MODE_P (dst_mode
));
890 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
892 rtx (*gen
) (rtx
, rtx
, rtx
);
897 gen
= gen_aarch64_simd_combinev8qi
;
900 gen
= gen_aarch64_simd_combinev4hi
;
903 gen
= gen_aarch64_simd_combinev2si
;
906 gen
= gen_aarch64_simd_combinev2sf
;
909 gen
= gen_aarch64_simd_combinedi
;
912 gen
= gen_aarch64_simd_combinedf
;
918 emit_insn (gen (dst
, src1
, src2
));
923 /* Split a complex SIMD move. */
926 aarch64_split_simd_move (rtx dst
, rtx src
)
928 enum machine_mode src_mode
= GET_MODE (src
);
929 enum machine_mode dst_mode
= GET_MODE (dst
);
931 gcc_assert (VECTOR_MODE_P (dst_mode
));
933 if (REG_P (dst
) && REG_P (src
))
935 rtx (*gen
) (rtx
, rtx
);
937 gcc_assert (VECTOR_MODE_P (src_mode
));
942 gen
= gen_aarch64_split_simd_movv16qi
;
945 gen
= gen_aarch64_split_simd_movv8hi
;
948 gen
= gen_aarch64_split_simd_movv4si
;
951 gen
= gen_aarch64_split_simd_movv2di
;
954 gen
= gen_aarch64_split_simd_movv4sf
;
957 gen
= gen_aarch64_split_simd_movv2df
;
963 emit_insn (gen (dst
, src
));
969 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
971 if (can_create_pseudo_p ())
972 return force_reg (mode
, value
);
975 x
= aarch64_emit_move (x
, value
);
982 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
984 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
987 /* Load the full offset into a register. This
988 might be improvable in the future. */
989 high
= GEN_INT (offset
);
991 high
= aarch64_force_temporary (mode
, temp
, high
);
992 reg
= aarch64_force_temporary (mode
, temp
,
993 gen_rtx_PLUS (mode
, high
, reg
));
995 return plus_constant (mode
, reg
, offset
);
999 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1001 enum machine_mode mode
= GET_MODE (dest
);
1002 unsigned HOST_WIDE_INT mask
;
1005 unsigned HOST_WIDE_INT val
;
1008 int one_match
, zero_match
;
1010 gcc_assert (mode
== SImode
|| mode
== DImode
);
1012 /* Check on what type of symbol it is. */
1013 if (GET_CODE (imm
) == SYMBOL_REF
1014 || GET_CODE (imm
) == LABEL_REF
1015 || GET_CODE (imm
) == CONST
)
1017 rtx mem
, base
, offset
;
1018 enum aarch64_symbol_type sty
;
1020 /* If we have (const (plus symbol offset)), separate out the offset
1021 before we start classifying the symbol. */
1022 split_const (imm
, &base
, &offset
);
1024 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
1027 case SYMBOL_FORCE_TO_MEM
:
1028 if (offset
!= const0_rtx
1029 && targetm
.cannot_force_const_mem (mode
, imm
))
1031 gcc_assert (can_create_pseudo_p ());
1032 base
= aarch64_force_temporary (mode
, dest
, base
);
1033 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1034 aarch64_emit_move (dest
, base
);
1037 mem
= force_const_mem (ptr_mode
, imm
);
1039 if (mode
!= ptr_mode
)
1040 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1041 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1044 case SYMBOL_SMALL_TLSGD
:
1045 case SYMBOL_SMALL_TLSDESC
:
1046 case SYMBOL_SMALL_GOTTPREL
:
1047 case SYMBOL_SMALL_GOT
:
1048 case SYMBOL_TINY_GOT
:
1049 if (offset
!= const0_rtx
)
1051 gcc_assert(can_create_pseudo_p ());
1052 base
= aarch64_force_temporary (mode
, dest
, base
);
1053 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1054 aarch64_emit_move (dest
, base
);
1059 case SYMBOL_SMALL_TPREL
:
1060 case SYMBOL_SMALL_ABSOLUTE
:
1061 case SYMBOL_TINY_ABSOLUTE
:
1062 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1070 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1072 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1076 if (!CONST_INT_P (imm
))
1078 if (GET_CODE (imm
) == HIGH
)
1079 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1082 rtx mem
= force_const_mem (mode
, imm
);
1084 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1092 /* We know we can't do this in 1 insn, and we must be able to do it
1093 in two; so don't mess around looking for sequences that don't buy
1095 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
1096 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1097 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1101 /* Remaining cases are all for DImode. */
1104 subtargets
= optimize
&& can_create_pseudo_p ();
1110 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1112 if ((val
& mask
) == 0)
1114 else if ((val
& mask
) == mask
)
1121 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1123 if ((val
& mask
) != mask
)
1125 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1126 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1127 GEN_INT ((val
>> i
) & 0xffff)));
1134 if (zero_match
== 2)
1135 goto simple_sequence
;
1137 mask
= 0x0ffff0000UL
;
1138 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1140 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1142 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1144 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1146 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1147 emit_insn (gen_adddi3 (dest
, subtarget
,
1148 GEN_INT (val
- (val
& mask
))));
1151 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1153 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1155 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1156 GEN_INT ((val
+ comp
) & mask
)));
1157 emit_insn (gen_adddi3 (dest
, subtarget
,
1158 GEN_INT (val
- ((val
+ comp
) & mask
))));
1161 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1163 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1165 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1166 GEN_INT ((val
- comp
) | ~mask
)));
1167 emit_insn (gen_adddi3 (dest
, subtarget
,
1168 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1171 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1173 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1175 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1176 GEN_INT (val
| ~mask
)));
1177 emit_insn (gen_adddi3 (dest
, subtarget
,
1178 GEN_INT (val
- (val
| ~mask
))));
1183 /* See if we can do it by arithmetically combining two
1185 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1190 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1191 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1193 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1194 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1195 GEN_INT (aarch64_bitmasks
[i
])));
1196 emit_insn (gen_adddi3 (dest
, subtarget
,
1197 GEN_INT (val
- aarch64_bitmasks
[i
])));
1201 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1203 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1205 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1206 GEN_INT (aarch64_bitmasks
[i
])));
1207 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1208 GEN_INT ((val
>> j
) & 0xffff)));
1214 /* See if we can do it by logically combining two immediates. */
1215 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1217 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1221 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1222 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1224 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1225 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1226 GEN_INT (aarch64_bitmasks
[i
])));
1227 emit_insn (gen_iordi3 (dest
, subtarget
,
1228 GEN_INT (aarch64_bitmasks
[j
])));
1232 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1236 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1237 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1240 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1241 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1242 GEN_INT (aarch64_bitmasks
[j
])));
1243 emit_insn (gen_anddi3 (dest
, subtarget
,
1244 GEN_INT (aarch64_bitmasks
[i
])));
1253 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1255 if ((val
& mask
) != 0)
1259 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1260 GEN_INT (val
& mask
)));
1264 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1265 GEN_INT ((val
>> i
) & 0xffff)));
1271 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
1272 tree exp ATTRIBUTE_UNUSED
)
1274 /* Currently, always true. */
1278 /* Implement TARGET_PASS_BY_REFERENCE. */
1281 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1282 enum machine_mode mode
,
1284 bool named ATTRIBUTE_UNUSED
)
1287 enum machine_mode dummymode
;
1290 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1291 size
= (mode
== BLKmode
&& type
)
1292 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1294 /* Aggregates are passed by reference based on their size. */
1295 if (type
&& AGGREGATE_TYPE_P (type
))
1297 size
= int_size_in_bytes (type
);
1300 /* Variable sized arguments are always returned by reference. */
1304 /* Can this be a candidate to be passed in fp/simd register(s)? */
1305 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1310 /* Arguments which are variable sized or larger than 2 registers are
1311 passed by reference unless they are a homogenous floating point
1313 return size
> 2 * UNITS_PER_WORD
;
1316 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1318 aarch64_return_in_msb (const_tree valtype
)
1320 enum machine_mode dummy_mode
;
1323 /* Never happens in little-endian mode. */
1324 if (!BYTES_BIG_ENDIAN
)
1327 /* Only composite types smaller than or equal to 16 bytes can
1328 be potentially returned in registers. */
1329 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1330 || int_size_in_bytes (valtype
) <= 0
1331 || int_size_in_bytes (valtype
) > 16)
1334 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1335 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1336 is always passed/returned in the least significant bits of fp/simd
1338 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1339 &dummy_mode
, &dummy_int
, NULL
))
1345 /* Implement TARGET_FUNCTION_VALUE.
1346 Define how to find the value returned by a function. */
1349 aarch64_function_value (const_tree type
, const_tree func
,
1350 bool outgoing ATTRIBUTE_UNUSED
)
1352 enum machine_mode mode
;
1355 enum machine_mode ag_mode
;
1357 mode
= TYPE_MODE (type
);
1358 if (INTEGRAL_TYPE_P (type
))
1359 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1361 if (aarch64_return_in_msb (type
))
1363 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1365 if (size
% UNITS_PER_WORD
!= 0)
1367 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1368 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1372 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1373 &ag_mode
, &count
, NULL
))
1375 if (!aarch64_composite_type_p (type
, mode
))
1377 gcc_assert (count
== 1 && mode
== ag_mode
);
1378 return gen_rtx_REG (mode
, V0_REGNUM
);
1385 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1386 for (i
= 0; i
< count
; i
++)
1388 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1389 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1390 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1391 XVECEXP (par
, 0, i
) = tmp
;
1397 return gen_rtx_REG (mode
, R0_REGNUM
);
1400 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1401 Return true if REGNO is the number of a hard register in which the values
1402 of called function may come back. */
1405 aarch64_function_value_regno_p (const unsigned int regno
)
1407 /* Maximum of 16 bytes can be returned in the general registers. Examples
1408 of 16-byte return values are: 128-bit integers and 16-byte small
1409 structures (excluding homogeneous floating-point aggregates). */
1410 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1413 /* Up to four fp/simd registers can return a function value, e.g. a
1414 homogeneous floating-point aggregate having four members. */
1415 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1416 return !TARGET_GENERAL_REGS_ONLY
;
1421 /* Implement TARGET_RETURN_IN_MEMORY.
1423 If the type T of the result of a function is such that
1425 would require that arg be passed as a value in a register (or set of
1426 registers) according to the parameter passing rules, then the result
1427 is returned in the same registers as would be used for such an
1431 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1434 enum machine_mode ag_mode
;
1437 if (!AGGREGATE_TYPE_P (type
)
1438 && TREE_CODE (type
) != COMPLEX_TYPE
1439 && TREE_CODE (type
) != VECTOR_TYPE
)
1440 /* Simple scalar types always returned in registers. */
1443 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1450 /* Types larger than 2 registers returned in memory. */
1451 size
= int_size_in_bytes (type
);
1452 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1456 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1457 const_tree type
, int *nregs
)
1459 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1460 return aarch64_vfp_is_call_or_return_candidate (mode
,
1462 &pcum
->aapcs_vfp_rmode
,
1467 /* Given MODE and TYPE of a function argument, return the alignment in
1468 bits. The idea is to suppress any stronger alignment requested by
1469 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1470 This is a helper function for local use only. */
1473 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1475 unsigned int alignment
;
1479 if (!integer_zerop (TYPE_SIZE (type
)))
1481 if (TYPE_MODE (type
) == mode
)
1482 alignment
= TYPE_ALIGN (type
);
1484 alignment
= GET_MODE_ALIGNMENT (mode
);
1490 alignment
= GET_MODE_ALIGNMENT (mode
);
1495 /* Layout a function argument according to the AAPCS64 rules. The rule
1496 numbers refer to the rule numbers in the AAPCS64. */
1499 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1501 bool named ATTRIBUTE_UNUSED
)
1503 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1504 int ncrn
, nvrn
, nregs
;
1505 bool allocate_ncrn
, allocate_nvrn
;
1508 /* We need to do this once per argument. */
1509 if (pcum
->aapcs_arg_processed
)
1512 pcum
->aapcs_arg_processed
= true;
1514 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1516 = AARCH64_ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1519 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1520 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1525 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1526 The following code thus handles passing by SIMD/FP registers first. */
1528 nvrn
= pcum
->aapcs_nvrn
;
1530 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1531 and homogenous short-vector aggregates (HVA). */
1534 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1536 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1537 if (!aarch64_composite_type_p (type
, mode
))
1539 gcc_assert (nregs
== 1);
1540 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1546 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1547 for (i
= 0; i
< nregs
; i
++)
1549 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1550 V0_REGNUM
+ nvrn
+ i
);
1551 tmp
= gen_rtx_EXPR_LIST
1553 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1554 XVECEXP (par
, 0, i
) = tmp
;
1556 pcum
->aapcs_reg
= par
;
1562 /* C.3 NSRN is set to 8. */
1563 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1568 ncrn
= pcum
->aapcs_ncrn
;
1569 nregs
= size
/ UNITS_PER_WORD
;
1571 /* C6 - C9. though the sign and zero extension semantics are
1572 handled elsewhere. This is the case where the argument fits
1573 entirely general registers. */
1574 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1576 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1578 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1580 /* C.8 if the argument has an alignment of 16 then the NGRN is
1581 rounded up to the next even number. */
1582 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1585 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1587 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1588 A reg is still generated for it, but the caller should be smart
1589 enough not to use it. */
1590 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1592 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1599 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1600 for (i
= 0; i
< nregs
; i
++)
1602 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1603 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1604 GEN_INT (i
* UNITS_PER_WORD
));
1605 XVECEXP (par
, 0, i
) = tmp
;
1607 pcum
->aapcs_reg
= par
;
1610 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1615 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1617 /* The argument is passed on stack; record the needed number of words for
1618 this argument and align the total size if necessary. */
1620 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1621 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1622 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1623 16 / UNITS_PER_WORD
);
1627 /* Implement TARGET_FUNCTION_ARG. */
1630 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1631 const_tree type
, bool named
)
1633 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1634 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1636 if (mode
== VOIDmode
)
1639 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1640 return pcum
->aapcs_reg
;
1644 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1645 const_tree fntype ATTRIBUTE_UNUSED
,
1646 rtx libname ATTRIBUTE_UNUSED
,
1647 const_tree fndecl ATTRIBUTE_UNUSED
,
1648 unsigned n_named ATTRIBUTE_UNUSED
)
1650 pcum
->aapcs_ncrn
= 0;
1651 pcum
->aapcs_nvrn
= 0;
1652 pcum
->aapcs_nextncrn
= 0;
1653 pcum
->aapcs_nextnvrn
= 0;
1654 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1655 pcum
->aapcs_reg
= NULL_RTX
;
1656 pcum
->aapcs_arg_processed
= false;
1657 pcum
->aapcs_stack_words
= 0;
1658 pcum
->aapcs_stack_size
= 0;
1664 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1665 enum machine_mode mode
,
1669 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1670 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1672 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1673 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1674 != (pcum
->aapcs_stack_words
!= 0));
1675 pcum
->aapcs_arg_processed
= false;
1676 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1677 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1678 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1679 pcum
->aapcs_stack_words
= 0;
1680 pcum
->aapcs_reg
= NULL_RTX
;
1685 aarch64_function_arg_regno_p (unsigned regno
)
1687 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1688 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1691 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1692 PARM_BOUNDARY bits of alignment, but will be given anything up
1693 to STACK_BOUNDARY bits if the type requires it. This makes sure
1694 that both before and after the layout of each argument, the Next
1695 Stacked Argument Address (NSAA) will have a minimum alignment of
1699 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1701 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1703 if (alignment
< PARM_BOUNDARY
)
1704 alignment
= PARM_BOUNDARY
;
1705 if (alignment
> STACK_BOUNDARY
)
1706 alignment
= STACK_BOUNDARY
;
1710 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1712 Return true if an argument passed on the stack should be padded upwards,
1713 i.e. if the least-significant byte of the stack slot has useful data.
1715 Small aggregate types are placed in the lowest memory address.
1717 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1720 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1722 /* On little-endian targets, the least significant byte of every stack
1723 argument is passed at the lowest byte address of the stack slot. */
1724 if (!BYTES_BIG_ENDIAN
)
1727 /* Otherwise, integral, floating-point and pointer types are padded downward:
1728 the least significant byte of a stack argument is passed at the highest
1729 byte address of the stack slot. */
1731 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1732 || POINTER_TYPE_P (type
))
1733 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1736 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1740 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1742 It specifies padding for the last (may also be the only)
1743 element of a block move between registers and memory. If
1744 assuming the block is in the memory, padding upward means that
1745 the last element is padded after its highest significant byte,
1746 while in downward padding, the last element is padded at the
1747 its least significant byte side.
1749 Small aggregates and small complex types are always padded
1752 We don't need to worry about homogeneous floating-point or
1753 short-vector aggregates; their move is not affected by the
1754 padding direction determined here. Regardless of endianness,
1755 each element of such an aggregate is put in the least
1756 significant bits of a fp/simd register.
1758 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1759 register has useful data, and return the opposite if the most
1760 significant byte does. */
1763 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1764 bool first ATTRIBUTE_UNUSED
)
1767 /* Small composite types are always padded upward. */
1768 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1770 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1771 : GET_MODE_SIZE (mode
));
1772 if (size
< 2 * UNITS_PER_WORD
)
1776 /* Otherwise, use the default padding. */
1777 return !BYTES_BIG_ENDIAN
;
1780 static enum machine_mode
1781 aarch64_libgcc_cmp_return_mode (void)
1787 aarch64_frame_pointer_required (void)
1789 /* If the function contains dynamic stack allocations, we need to
1790 use the frame pointer to access the static parts of the frame. */
1791 if (cfun
->calls_alloca
)
1794 /* In aarch64_override_options_after_change
1795 flag_omit_leaf_frame_pointer turns off the frame pointer by
1796 default. Turn it back on now if we've not got a leaf
1798 if (flag_omit_leaf_frame_pointer
1799 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
1805 /* Mark the registers that need to be saved by the callee and calculate
1806 the size of the callee-saved registers area and frame record (both FP
1807 and LR may be omitted). */
1809 aarch64_layout_frame (void)
1811 HOST_WIDE_INT offset
= 0;
1814 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1817 #define SLOT_NOT_REQUIRED (-2)
1818 #define SLOT_REQUIRED (-1)
1820 cfun
->machine
->frame
.wb_candidate1
= FIRST_PSEUDO_REGISTER
;
1821 cfun
->machine
->frame
.wb_candidate2
= FIRST_PSEUDO_REGISTER
;
1823 /* First mark all the registers that really need to be saved... */
1824 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1825 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
1827 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1828 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
1830 /* ... that includes the eh data registers (if needed)... */
1831 if (crtl
->calls_eh_return
)
1832 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1833 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
1836 /* ... and any callee saved register that dataflow says is live. */
1837 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1838 if (df_regs_ever_live_p (regno
)
1839 && !call_used_regs
[regno
])
1840 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
1842 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1843 if (df_regs_ever_live_p (regno
)
1844 && !call_used_regs
[regno
])
1845 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
1847 if (frame_pointer_needed
)
1849 /* FP and LR are placed in the linkage record. */
1850 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1851 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
1852 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
1853 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
1854 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1855 offset
+= 2 * UNITS_PER_WORD
;
1858 /* Now assign stack slots for them. */
1859 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1860 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
1862 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1863 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
1864 cfun
->machine
->frame
.wb_candidate1
= regno
;
1865 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
)
1866 cfun
->machine
->frame
.wb_candidate2
= regno
;
1867 offset
+= UNITS_PER_WORD
;
1870 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1871 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
1873 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1874 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
1875 cfun
->machine
->frame
.wb_candidate1
= regno
;
1876 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
1877 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
1878 cfun
->machine
->frame
.wb_candidate2
= regno
;
1879 offset
+= UNITS_PER_WORD
;
1882 cfun
->machine
->frame
.padding0
=
1883 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1884 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1886 cfun
->machine
->frame
.saved_regs_size
= offset
;
1888 cfun
->machine
->frame
.hard_fp_offset
1889 = AARCH64_ROUND_UP (cfun
->machine
->frame
.saved_varargs_size
1891 + cfun
->machine
->frame
.saved_regs_size
,
1892 STACK_BOUNDARY
/ BITS_PER_UNIT
);
1894 cfun
->machine
->frame
.frame_size
1895 = AARCH64_ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
1896 + crtl
->outgoing_args_size
,
1897 STACK_BOUNDARY
/ BITS_PER_UNIT
);
1899 cfun
->machine
->frame
.laid_out
= true;
1902 /* Make the last instruction frame-related and note that it performs
1903 the operation described by FRAME_PATTERN. */
1906 aarch64_set_frame_expr (rtx frame_pattern
)
1910 insn
= get_last_insn ();
1911 RTX_FRAME_RELATED_P (insn
) = 1;
1912 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1913 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1919 aarch64_register_saved_on_entry (int regno
)
1921 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
1925 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
1927 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
1933 aarch64_pushwb_single_reg (enum machine_mode mode
, unsigned regno
,
1934 HOST_WIDE_INT adjustment
)
1936 rtx base_rtx
= stack_pointer_rtx
;
1939 reg
= gen_rtx_REG (mode
, regno
);
1940 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
1941 plus_constant (Pmode
, base_rtx
, -adjustment
));
1942 mem
= gen_rtx_MEM (mode
, mem
);
1944 insn
= emit_move_insn (mem
, reg
);
1945 RTX_FRAME_RELATED_P (insn
) = 1;
1949 aarch64_popwb_single_reg (enum machine_mode mode
, unsigned regno
,
1950 HOST_WIDE_INT adjustment
)
1952 rtx base_rtx
= stack_pointer_rtx
;
1955 reg
= gen_rtx_REG (mode
, regno
);
1956 mem
= gen_rtx_POST_MODIFY (Pmode
, base_rtx
,
1957 plus_constant (Pmode
, base_rtx
, adjustment
));
1958 mem
= gen_rtx_MEM (mode
, mem
);
1960 insn
= emit_move_insn (reg
, mem
);
1961 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
1962 RTX_FRAME_RELATED_P (insn
) = 1;
1966 aarch64_gen_storewb_pair (enum machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
1967 HOST_WIDE_INT adjustment
)
1972 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
1973 GEN_INT (-adjustment
),
1974 GEN_INT (UNITS_PER_WORD
- adjustment
));
1976 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
1977 GEN_INT (-adjustment
),
1978 GEN_INT (UNITS_PER_WORD
- adjustment
));
1985 aarch64_pushwb_pair_reg (enum machine_mode mode
, unsigned regno1
,
1986 unsigned regno2
, HOST_WIDE_INT adjustment
)
1989 rtx reg1
= gen_rtx_REG (mode
, regno1
);
1990 rtx reg2
= gen_rtx_REG (mode
, regno2
);
1992 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
1994 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
1996 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
1997 RTX_FRAME_RELATED_P (insn
) = 1;
2001 aarch64_gen_loadwb_pair (enum machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2002 HOST_WIDE_INT adjustment
)
2007 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2008 GEN_INT (adjustment
+ UNITS_PER_WORD
));
2010 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2011 GEN_INT (adjustment
+ UNITS_PER_WORD
));
2018 aarch64_popwb_pair_reg (enum machine_mode mode
, unsigned regno1
,
2019 unsigned regno2
, HOST_WIDE_INT adjustment
, rtx cfa
)
2022 rtx reg1
= gen_rtx_REG (mode
, regno1
);
2023 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2025 insn
= emit_insn (aarch64_gen_loadwb_pair (mode
, stack_pointer_rtx
, reg1
,
2027 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2028 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2029 RTX_FRAME_RELATED_P (insn
) = 1;
2032 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2033 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2034 plus_constant (Pmode
, cfa
, adjustment
))));
2036 add_reg_note (insn
, REG_CFA_RESTORE
, reg1
);
2037 add_reg_note (insn
, REG_CFA_RESTORE
, reg2
);
2041 aarch64_gen_store_pair (enum machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
2047 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
2050 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
2058 aarch64_gen_load_pair (enum machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
2064 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
2067 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
2076 aarch64_save_callee_saves (enum machine_mode mode
, HOST_WIDE_INT start_offset
,
2077 unsigned start
, unsigned limit
, bool skip_wb
)
2080 rtx (*gen_mem_ref
) (enum machine_mode
, rtx
) = (frame_pointer_needed
2081 ? gen_frame_mem
: gen_rtx_MEM
);
2085 for (regno
= aarch64_next_callee_save (start
, limit
);
2087 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2090 HOST_WIDE_INT offset
;
2093 && (regno
== cfun
->machine
->frame
.wb_candidate1
2094 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2097 reg
= gen_rtx_REG (mode
, regno
);
2098 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2099 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2102 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2105 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2106 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2109 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2112 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2113 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2115 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
2118 /* The first part of a frame-related parallel insn is
2119 always assumed to be relevant to the frame
2120 calculations; subsequent parts, are only
2121 frame-related if explicitly marked. */
2122 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2126 insn
= emit_move_insn (mem
, reg
);
2128 RTX_FRAME_RELATED_P (insn
) = 1;
2133 aarch64_restore_callee_saves (enum machine_mode mode
,
2134 HOST_WIDE_INT start_offset
, unsigned start
,
2135 unsigned limit
, bool skip_wb
)
2138 rtx base_rtx
= stack_pointer_rtx
;
2139 rtx (*gen_mem_ref
) (enum machine_mode
, rtx
) = (frame_pointer_needed
2140 ? gen_frame_mem
: gen_rtx_MEM
);
2143 HOST_WIDE_INT offset
;
2145 for (regno
= aarch64_next_callee_save (start
, limit
);
2147 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2152 && (regno
== cfun
->machine
->frame
.wb_candidate1
2153 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2156 reg
= gen_rtx_REG (mode
, regno
);
2157 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2158 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2160 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2163 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2164 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2166 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2169 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2170 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2171 insn
= emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
,
2173 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
2174 add_reg_note (insn
, REG_CFA_RESTORE
, reg2
);
2176 /* The first part of a frame-related parallel insn is
2177 always assumed to be relevant to the frame
2178 calculations; subsequent parts, are only
2179 frame-related if explicitly marked. */
2180 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2185 insn
= emit_move_insn (reg
, mem
);
2186 add_reg_note (insn
, REG_CFA_RESTORE
, reg
);
2189 RTX_FRAME_RELATED_P (insn
) = 1;
2193 /* AArch64 stack frames generated by this compiler look like:
2195 +-------------------------------+
2197 | incoming stack arguments |
2199 +-------------------------------+
2200 | | <-- incoming stack pointer (aligned)
2201 | callee-allocated save area |
2202 | for register varargs |
2204 +-------------------------------+
2205 | local variables | <-- frame_pointer_rtx
2207 +-------------------------------+
2209 +-------------------------------+ |
2210 | callee-saved registers | | frame.saved_regs_size
2211 +-------------------------------+ |
2213 +-------------------------------+ |
2214 | FP' | / <- hard_frame_pointer_rtx (aligned)
2215 +-------------------------------+
2216 | dynamic allocation |
2217 +-------------------------------+
2219 +-------------------------------+
2220 | outgoing stack arguments | <-- arg_pointer
2222 +-------------------------------+
2223 | | <-- stack_pointer_rtx (aligned)
2225 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2226 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2229 /* Generate the prologue instructions for entry into a function.
2230 Establish the stack frame by decreasing the stack pointer with a
2231 properly calculated size and, if necessary, create a frame record
2232 filled with the values of LR and previous frame pointer. The
2233 current FP is also set up if it is in use. */
2236 aarch64_expand_prologue (void)
2238 /* sub sp, sp, #<frame_size>
2239 stp {fp, lr}, [sp, #<frame_size> - 16]
2240 add fp, sp, #<frame_size> - hardfp_offset
2241 stp {cs_reg}, [fp, #-16] etc.
2243 sub sp, sp, <final_adjustment_if_any>
2245 HOST_WIDE_INT frame_size
, offset
;
2246 HOST_WIDE_INT fp_offset
; /* Offset from hard FP to SP. */
2249 aarch64_layout_frame ();
2251 if (flag_stack_usage_info
)
2252 current_function_static_stack_size
= cfun
->machine
->frame
.frame_size
;
2254 frame_size
= cfun
->machine
->frame
.frame_size
;
2255 offset
= cfun
->machine
->frame
.frame_size
;
2257 fp_offset
= cfun
->machine
->frame
.frame_size
2258 - cfun
->machine
->frame
.hard_fp_offset
;
2260 /* Store pairs and load pairs have a range only -512 to 504. */
2263 /* When the frame has a large size, an initial decrease is done on
2264 the stack pointer to jump over the callee-allocated save area for
2265 register varargs, the local variable area and/or the callee-saved
2266 register area. This will allow the pre-index write-back
2267 store pair instructions to be used for setting up the stack frame
2269 offset
= cfun
->machine
->frame
.hard_fp_offset
;
2271 offset
= cfun
->machine
->frame
.saved_regs_size
;
2273 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2276 if (frame_size
>= 0x1000000)
2278 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2279 emit_move_insn (op0
, GEN_INT (-frame_size
));
2280 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2281 aarch64_set_frame_expr (gen_rtx_SET
2282 (Pmode
, stack_pointer_rtx
,
2283 plus_constant (Pmode
,
2287 else if (frame_size
> 0)
2289 if ((frame_size
& 0xfff) != frame_size
)
2291 insn
= emit_insn (gen_add2_insn
2293 GEN_INT (-(frame_size
2294 & ~(HOST_WIDE_INT
)0xfff))));
2295 RTX_FRAME_RELATED_P (insn
) = 1;
2297 if ((frame_size
& 0xfff) != 0)
2299 insn
= emit_insn (gen_add2_insn
2301 GEN_INT (-(frame_size
2302 & (HOST_WIDE_INT
)0xfff))));
2303 RTX_FRAME_RELATED_P (insn
) = 1;
2312 bool skip_wb
= false;
2314 if (frame_pointer_needed
)
2320 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2321 GEN_INT (-offset
)));
2322 RTX_FRAME_RELATED_P (insn
) = 1;
2323 aarch64_set_frame_expr (gen_rtx_SET
2324 (Pmode
, stack_pointer_rtx
,
2325 gen_rtx_MINUS (Pmode
, stack_pointer_rtx
,
2326 GEN_INT (offset
))));
2328 aarch64_save_callee_saves (DImode
, fp_offset
, R29_REGNUM
,
2332 aarch64_pushwb_pair_reg (DImode
, R29_REGNUM
, R30_REGNUM
, offset
);
2334 /* Set up frame pointer to point to the location of the
2335 previous frame pointer on the stack. */
2336 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2338 GEN_INT (fp_offset
)));
2339 aarch64_set_frame_expr (gen_rtx_SET
2340 (Pmode
, hard_frame_pointer_rtx
,
2341 plus_constant (Pmode
,
2344 RTX_FRAME_RELATED_P (insn
) = 1;
2345 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2346 hard_frame_pointer_rtx
));
2350 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2351 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2354 || reg1
== FIRST_PSEUDO_REGISTER
2355 || (reg2
== FIRST_PSEUDO_REGISTER
2358 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2359 GEN_INT (-offset
)));
2360 RTX_FRAME_RELATED_P (insn
) = 1;
2364 enum machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2368 if (reg2
== FIRST_PSEUDO_REGISTER
)
2369 aarch64_pushwb_single_reg (mode1
, reg1
, offset
);
2371 aarch64_pushwb_pair_reg (mode1
, reg1
, reg2
, offset
);
2375 aarch64_save_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2377 aarch64_save_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2381 /* when offset >= 512,
2382 sub sp, sp, #<outgoing_args_size> */
2383 if (frame_size
> -1)
2385 if (crtl
->outgoing_args_size
> 0)
2387 insn
= emit_insn (gen_add2_insn
2389 GEN_INT (- crtl
->outgoing_args_size
)));
2390 RTX_FRAME_RELATED_P (insn
) = 1;
2395 /* Generate the epilogue instructions for returning from a function. */
2397 aarch64_expand_epilogue (bool for_sibcall
)
2399 HOST_WIDE_INT frame_size
, offset
;
2400 HOST_WIDE_INT fp_offset
;
2404 aarch64_layout_frame ();
2406 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2407 fp_offset
= cfun
->machine
->frame
.frame_size
2408 - cfun
->machine
->frame
.hard_fp_offset
;
2410 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2412 /* Store pairs and load pairs have a range only -512 to 504. */
2415 offset
= cfun
->machine
->frame
.hard_fp_offset
;
2417 offset
= cfun
->machine
->frame
.saved_regs_size
;
2419 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2421 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2423 insn
= emit_insn (gen_add2_insn
2425 GEN_INT (crtl
->outgoing_args_size
)));
2426 RTX_FRAME_RELATED_P (insn
) = 1;
2432 /* If there were outgoing arguments or we've done dynamic stack
2433 allocation, then restore the stack pointer from the frame
2434 pointer. This is at most one insn and more efficient than using
2435 GCC's internal mechanism. */
2436 if (frame_pointer_needed
2437 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2439 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2440 hard_frame_pointer_rtx
,
2442 offset
= offset
- fp_offset
;
2443 RTX_FRAME_RELATED_P (insn
) = 1;
2444 /* As SP is set to (FP - fp_offset), according to the rules in
2445 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2446 from the value of SP from now on. */
2447 cfa_reg
= stack_pointer_rtx
;
2452 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2453 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2454 bool skip_wb
= true;
2456 if (frame_pointer_needed
)
2459 || reg1
== FIRST_PSEUDO_REGISTER
2460 || (reg2
== FIRST_PSEUDO_REGISTER
2464 aarch64_restore_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2466 aarch64_restore_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2471 enum machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2473 if (reg2
== FIRST_PSEUDO_REGISTER
)
2474 aarch64_popwb_single_reg (mode1
, reg1
, offset
);
2477 if (reg1
!= HARD_FRAME_POINTER_REGNUM
)
2480 aarch64_popwb_pair_reg (mode1
, reg1
, reg2
, offset
, cfa_reg
);
2485 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2487 RTX_FRAME_RELATED_P (insn
) = 1;
2491 /* Stack adjustment for exception handler. */
2492 if (crtl
->calls_eh_return
)
2494 /* We need to unwind the stack by the offset computed by
2495 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2496 based on SP. Ideally we would update the SP and define the
2497 CFA along the lines of:
2499 SP = SP + EH_RETURN_STACKADJ_RTX
2500 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2502 However the dwarf emitter only understands a constant
2505 The solution chosen here is to use the otherwise unused IP0
2506 as a temporary register to hold the current SP value. The
2507 CFA is described using IP0 then SP is modified. */
2509 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2511 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2512 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2513 RTX_FRAME_RELATED_P (insn
) = 1;
2515 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2517 /* Ensure the assignment to IP0 does not get optimized away. */
2521 if (frame_size
> -1)
2523 if (frame_size
>= 0x1000000)
2525 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2526 emit_move_insn (op0
, GEN_INT (frame_size
));
2527 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2528 aarch64_set_frame_expr (gen_rtx_SET
2529 (Pmode
, stack_pointer_rtx
,
2530 plus_constant (Pmode
,
2534 else if (frame_size
> 0)
2536 if ((frame_size
& 0xfff) != 0)
2538 insn
= emit_insn (gen_add2_insn
2540 GEN_INT ((frame_size
2541 & (HOST_WIDE_INT
) 0xfff))));
2542 RTX_FRAME_RELATED_P (insn
) = 1;
2544 if ((frame_size
& 0xfff) != frame_size
)
2546 insn
= emit_insn (gen_add2_insn
2548 GEN_INT ((frame_size
2549 & ~ (HOST_WIDE_INT
) 0xfff))));
2550 RTX_FRAME_RELATED_P (insn
) = 1;
2554 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2555 plus_constant (Pmode
,
2560 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2562 emit_jump_insn (ret_rtx
);
2565 /* Return the place to copy the exception unwinding return address to.
2566 This will probably be a stack slot, but could (in theory be the
2567 return register). */
2569 aarch64_final_eh_return_addr (void)
2571 HOST_WIDE_INT fp_offset
;
2573 aarch64_layout_frame ();
2575 fp_offset
= cfun
->machine
->frame
.frame_size
2576 - cfun
->machine
->frame
.hard_fp_offset
;
2578 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2579 return gen_rtx_REG (DImode
, LR_REGNUM
);
2581 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2582 result in a store to save LR introduced by builtin_eh_return () being
2583 incorrectly deleted because the alias is not detected.
2584 So in the calculation of the address to copy the exception unwinding
2585 return address to, we note 2 cases.
2586 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2587 we return a SP-relative location since all the addresses are SP-relative
2588 in this case. This prevents the store from being optimized away.
2589 If the fp_offset is not 0, then the addresses will be FP-relative and
2590 therefore we return a FP-relative location. */
2592 if (frame_pointer_needed
)
2595 return gen_frame_mem (DImode
,
2596 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2598 return gen_frame_mem (DImode
,
2599 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2602 /* If FP is not needed, we calculate the location of LR, which would be
2603 at the top of the saved registers block. */
2605 return gen_frame_mem (DImode
,
2606 plus_constant (Pmode
,
2609 + cfun
->machine
->frame
.saved_regs_size
2610 - 2 * UNITS_PER_WORD
));
2613 /* Possibly output code to build up a constant in a register. For
2614 the benefit of the costs infrastructure, returns the number of
2615 instructions which would be emitted. GENERATE inhibits or
2616 enables code generation. */
2619 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2623 if (aarch64_bitmask_imm (val
, DImode
))
2626 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2634 HOST_WIDE_INT valp
= val
>> 16;
2638 for (i
= 16; i
< 64; i
+= 16)
2640 valm
= (valp
& 0xffff);
2651 /* zcount contains the number of additional MOVK instructions
2652 required if the constant is built up with an initial MOVZ instruction,
2653 while ncount is the number of MOVK instructions required if starting
2654 with a MOVN instruction. Choose the sequence that yields the fewest
2655 number of instructions, preferring MOVZ instructions when they are both
2657 if (ncount
< zcount
)
2660 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2661 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2668 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2669 GEN_INT (val
& 0xffff));
2676 for (i
= 16; i
< 64; i
+= 16)
2678 if ((val
& 0xffff) != tval
)
2681 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2683 GEN_INT (val
& 0xffff)));
2693 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2695 HOST_WIDE_INT mdelta
= delta
;
2696 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2697 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2702 if (mdelta
>= 4096 * 4096)
2704 (void) aarch64_build_constant (scratchreg
, delta
, true);
2705 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2707 else if (mdelta
> 0)
2711 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2712 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2714 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2715 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2717 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2718 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2720 if (mdelta
% 4096 != 0)
2722 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2723 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2724 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2729 /* Output code to add DELTA to the first argument, and then jump
2730 to FUNCTION. Used for C++ multiple inheritance. */
2732 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2733 HOST_WIDE_INT delta
,
2734 HOST_WIDE_INT vcall_offset
,
2737 /* The this pointer is always in x0. Note that this differs from
2738 Arm where the this pointer maybe bumped to r1 if r0 is required
2739 to return a pointer to an aggregate. On AArch64 a result value
2740 pointer will be in x8. */
2741 int this_regno
= R0_REGNUM
;
2742 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2744 reload_completed
= 1;
2745 emit_note (NOTE_INSN_PROLOGUE_END
);
2747 if (vcall_offset
== 0)
2748 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2751 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2753 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2754 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2755 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2760 if (delta
>= -256 && delta
< 256)
2761 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2762 plus_constant (Pmode
, this_rtx
, delta
));
2764 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2767 if (Pmode
== ptr_mode
)
2768 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2770 aarch64_emit_move (temp0
,
2771 gen_rtx_ZERO_EXTEND (Pmode
,
2772 gen_rtx_MEM (ptr_mode
, addr
)));
2774 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2775 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2778 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2779 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2782 if (Pmode
== ptr_mode
)
2783 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2785 aarch64_emit_move (temp1
,
2786 gen_rtx_SIGN_EXTEND (Pmode
,
2787 gen_rtx_MEM (ptr_mode
, addr
)));
2789 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2792 /* Generate a tail call to the target function. */
2793 if (!TREE_USED (function
))
2795 assemble_external (function
);
2796 TREE_USED (function
) = 1;
2798 funexp
= XEXP (DECL_RTL (function
), 0);
2799 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2800 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2801 SIBLING_CALL_P (insn
) = 1;
2803 insn
= get_insns ();
2804 shorten_branches (insn
);
2805 final_start_function (insn
, file
, 1);
2806 final (insn
, file
, 1);
2807 final_end_function ();
2809 /* Stop pretending to be a post-reload pass. */
2810 reload_completed
= 0;
2814 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2816 if (GET_CODE (*x
) == SYMBOL_REF
)
2817 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2819 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2820 TLS offsets, not real symbol references. */
2821 if (GET_CODE (*x
) == UNSPEC
2822 && XINT (*x
, 1) == UNSPEC_TLS
)
2829 aarch64_tls_referenced_p (rtx x
)
2831 if (!TARGET_HAVE_TLS
)
2834 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2839 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2841 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2842 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2853 aarch64_build_bitmask_table (void)
2855 unsigned HOST_WIDE_INT mask
, imm
;
2856 unsigned int log_e
, e
, s
, r
;
2857 unsigned int nimms
= 0;
2859 for (log_e
= 1; log_e
<= 6; log_e
++)
2863 mask
= ~(HOST_WIDE_INT
) 0;
2865 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2866 for (s
= 1; s
< e
; s
++)
2868 for (r
= 0; r
< e
; r
++)
2870 /* set s consecutive bits to 1 (s < 64) */
2871 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2872 /* rotate right by r */
2874 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2875 /* replicate the constant depending on SIMD size */
2877 case 1: imm
|= (imm
<< 2);
2878 case 2: imm
|= (imm
<< 4);
2879 case 3: imm
|= (imm
<< 8);
2880 case 4: imm
|= (imm
<< 16);
2881 case 5: imm
|= (imm
<< 32);
2887 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2888 aarch64_bitmasks
[nimms
++] = imm
;
2893 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2894 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2895 aarch64_bitmasks_cmp
);
2899 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2900 a left shift of 0 or 12 bits. */
2902 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2904 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2905 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2910 /* Return true if val is an immediate that can be loaded into a
2911 register by a MOVZ instruction. */
2913 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2915 if (GET_MODE_SIZE (mode
) > 4)
2917 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2918 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2923 /* Ignore sign extension. */
2924 val
&= (HOST_WIDE_INT
) 0xffffffff;
2926 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2927 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2931 /* Return true if val is a valid bitmask immediate. */
2933 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2935 if (GET_MODE_SIZE (mode
) < 8)
2937 /* Replicate bit pattern. */
2938 val
&= (HOST_WIDE_INT
) 0xffffffff;
2941 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2942 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2946 /* Return true if val is an immediate that can be loaded into a
2947 register in a single instruction. */
2949 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2951 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2953 return aarch64_bitmask_imm (val
, mode
);
2957 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2961 if (GET_CODE (x
) == HIGH
)
2964 split_const (x
, &base
, &offset
);
2965 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2967 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2968 != SYMBOL_FORCE_TO_MEM
)
2971 /* Avoid generating a 64-bit relocation in ILP32; leave
2972 to aarch64_expand_mov_immediate to handle it properly. */
2973 return mode
!= ptr_mode
;
2976 return aarch64_tls_referenced_p (x
);
2979 /* Return true if register REGNO is a valid index register.
2980 STRICT_P is true if REG_OK_STRICT is in effect. */
2983 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2985 if (!HARD_REGISTER_NUM_P (regno
))
2993 regno
= reg_renumber
[regno
];
2995 return GP_REGNUM_P (regno
);
2998 /* Return true if register REGNO is a valid base register for mode MODE.
2999 STRICT_P is true if REG_OK_STRICT is in effect. */
3002 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
3004 if (!HARD_REGISTER_NUM_P (regno
))
3012 regno
= reg_renumber
[regno
];
3015 /* The fake registers will be eliminated to either the stack or
3016 hard frame pointer, both of which are usually valid base registers.
3017 Reload deals with the cases where the eliminated form isn't valid. */
3018 return (GP_REGNUM_P (regno
)
3019 || regno
== SP_REGNUM
3020 || regno
== FRAME_POINTER_REGNUM
3021 || regno
== ARG_POINTER_REGNUM
);
3024 /* Return true if X is a valid base register for mode MODE.
3025 STRICT_P is true if REG_OK_STRICT is in effect. */
3028 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
3030 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
3033 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
3036 /* Return true if address offset is a valid index. If it is, fill in INFO
3037 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3040 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
3041 enum machine_mode mode
, bool strict_p
)
3043 enum aarch64_address_type type
;
3048 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
3049 && GET_MODE (x
) == Pmode
)
3051 type
= ADDRESS_REG_REG
;
3055 /* (sign_extend:DI (reg:SI)) */
3056 else if ((GET_CODE (x
) == SIGN_EXTEND
3057 || GET_CODE (x
) == ZERO_EXTEND
)
3058 && GET_MODE (x
) == DImode
3059 && GET_MODE (XEXP (x
, 0)) == SImode
)
3061 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3062 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3063 index
= XEXP (x
, 0);
3066 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3067 else if (GET_CODE (x
) == MULT
3068 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3069 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3070 && GET_MODE (XEXP (x
, 0)) == DImode
3071 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3072 && CONST_INT_P (XEXP (x
, 1)))
3074 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3075 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3076 index
= XEXP (XEXP (x
, 0), 0);
3077 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3079 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3080 else if (GET_CODE (x
) == ASHIFT
3081 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3082 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3083 && GET_MODE (XEXP (x
, 0)) == DImode
3084 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3085 && CONST_INT_P (XEXP (x
, 1)))
3087 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3088 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3089 index
= XEXP (XEXP (x
, 0), 0);
3090 shift
= INTVAL (XEXP (x
, 1));
3092 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3093 else if ((GET_CODE (x
) == SIGN_EXTRACT
3094 || GET_CODE (x
) == ZERO_EXTRACT
)
3095 && GET_MODE (x
) == DImode
3096 && GET_CODE (XEXP (x
, 0)) == MULT
3097 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3098 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3100 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3101 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3102 index
= XEXP (XEXP (x
, 0), 0);
3103 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3104 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3105 || INTVAL (XEXP (x
, 2)) != 0)
3108 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3109 (const_int 0xffffffff<<shift)) */
3110 else if (GET_CODE (x
) == AND
3111 && GET_MODE (x
) == DImode
3112 && GET_CODE (XEXP (x
, 0)) == MULT
3113 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3114 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3115 && CONST_INT_P (XEXP (x
, 1)))
3117 type
= ADDRESS_REG_UXTW
;
3118 index
= XEXP (XEXP (x
, 0), 0);
3119 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3120 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3123 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3124 else if ((GET_CODE (x
) == SIGN_EXTRACT
3125 || GET_CODE (x
) == ZERO_EXTRACT
)
3126 && GET_MODE (x
) == DImode
3127 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3128 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3129 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3131 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3132 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3133 index
= XEXP (XEXP (x
, 0), 0);
3134 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3135 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3136 || INTVAL (XEXP (x
, 2)) != 0)
3139 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3140 (const_int 0xffffffff<<shift)) */
3141 else if (GET_CODE (x
) == AND
3142 && GET_MODE (x
) == DImode
3143 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3144 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3145 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3146 && CONST_INT_P (XEXP (x
, 1)))
3148 type
= ADDRESS_REG_UXTW
;
3149 index
= XEXP (XEXP (x
, 0), 0);
3150 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3151 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3154 /* (mult:P (reg:P) (const_int scale)) */
3155 else if (GET_CODE (x
) == MULT
3156 && GET_MODE (x
) == Pmode
3157 && GET_MODE (XEXP (x
, 0)) == Pmode
3158 && CONST_INT_P (XEXP (x
, 1)))
3160 type
= ADDRESS_REG_REG
;
3161 index
= XEXP (x
, 0);
3162 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3164 /* (ashift:P (reg:P) (const_int shift)) */
3165 else if (GET_CODE (x
) == ASHIFT
3166 && GET_MODE (x
) == Pmode
3167 && GET_MODE (XEXP (x
, 0)) == Pmode
3168 && CONST_INT_P (XEXP (x
, 1)))
3170 type
= ADDRESS_REG_REG
;
3171 index
= XEXP (x
, 0);
3172 shift
= INTVAL (XEXP (x
, 1));
3177 if (GET_CODE (index
) == SUBREG
)
3178 index
= SUBREG_REG (index
);
3181 (shift
> 0 && shift
<= 3
3182 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3184 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3187 info
->offset
= index
;
3188 info
->shift
= shift
;
3196 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3198 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3199 && offset
< 64 * GET_MODE_SIZE (mode
)
3200 && offset
% GET_MODE_SIZE (mode
) == 0);
3204 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3205 HOST_WIDE_INT offset
)
3207 return offset
>= -256 && offset
< 256;
3211 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3214 && offset
< 4096 * GET_MODE_SIZE (mode
)
3215 && offset
% GET_MODE_SIZE (mode
) == 0);
3218 /* Return true if X is a valid address for machine mode MODE. If it is,
3219 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3220 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3223 aarch64_classify_address (struct aarch64_address_info
*info
,
3224 rtx x
, enum machine_mode mode
,
3225 RTX_CODE outer_code
, bool strict_p
)
3227 enum rtx_code code
= GET_CODE (x
);
3229 bool allow_reg_index_p
=
3230 outer_code
!= PARALLEL
&& (GET_MODE_SIZE (mode
) != 16
3231 || aarch64_vector_mode_supported_p (mode
));
3232 /* Don't support anything other than POST_INC or REG addressing for
3234 if (aarch64_vect_struct_mode_p (mode
)
3235 && (code
!= POST_INC
&& code
!= REG
))
3242 info
->type
= ADDRESS_REG_IMM
;
3244 info
->offset
= const0_rtx
;
3245 return aarch64_base_register_rtx_p (x
, strict_p
);
3250 if (GET_MODE_SIZE (mode
) != 0
3251 && CONST_INT_P (op1
)
3252 && aarch64_base_register_rtx_p (op0
, strict_p
))
3254 HOST_WIDE_INT offset
= INTVAL (op1
);
3256 info
->type
= ADDRESS_REG_IMM
;
3260 /* TImode and TFmode values are allowed in both pairs of X
3261 registers and individual Q registers. The available
3263 X,X: 7-bit signed scaled offset
3264 Q: 9-bit signed offset
3265 We conservatively require an offset representable in either mode.
3267 if (mode
== TImode
|| mode
== TFmode
)
3268 return (offset_7bit_signed_scaled_p (mode
, offset
)
3269 && offset_9bit_signed_unscaled_p (mode
, offset
));
3271 if (outer_code
== PARALLEL
)
3272 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3273 && offset_7bit_signed_scaled_p (mode
, offset
));
3275 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3276 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3279 if (allow_reg_index_p
)
3281 /* Look for base + (scaled/extended) index register. */
3282 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3283 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3288 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3289 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3302 info
->type
= ADDRESS_REG_WB
;
3303 info
->base
= XEXP (x
, 0);
3304 info
->offset
= NULL_RTX
;
3305 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3309 info
->type
= ADDRESS_REG_WB
;
3310 info
->base
= XEXP (x
, 0);
3311 if (GET_CODE (XEXP (x
, 1)) == PLUS
3312 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3313 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3314 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3316 HOST_WIDE_INT offset
;
3317 info
->offset
= XEXP (XEXP (x
, 1), 1);
3318 offset
= INTVAL (info
->offset
);
3320 /* TImode and TFmode values are allowed in both pairs of X
3321 registers and individual Q registers. The available
3323 X,X: 7-bit signed scaled offset
3324 Q: 9-bit signed offset
3325 We conservatively require an offset representable in either mode.
3327 if (mode
== TImode
|| mode
== TFmode
)
3328 return (offset_7bit_signed_scaled_p (mode
, offset
)
3329 && offset_9bit_signed_unscaled_p (mode
, offset
));
3331 if (outer_code
== PARALLEL
)
3332 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3333 && offset_7bit_signed_scaled_p (mode
, offset
));
3335 return offset_9bit_signed_unscaled_p (mode
, offset
);
3342 /* load literal: pc-relative constant pool entry. Only supported
3343 for SI mode or larger. */
3344 info
->type
= ADDRESS_SYMBOLIC
;
3345 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3349 split_const (x
, &sym
, &addend
);
3350 return (GET_CODE (sym
) == LABEL_REF
3351 || (GET_CODE (sym
) == SYMBOL_REF
3352 && CONSTANT_POOL_ADDRESS_P (sym
)));
3357 info
->type
= ADDRESS_LO_SUM
;
3358 info
->base
= XEXP (x
, 0);
3359 info
->offset
= XEXP (x
, 1);
3360 if (allow_reg_index_p
3361 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3364 split_const (info
->offset
, &sym
, &offs
);
3365 if (GET_CODE (sym
) == SYMBOL_REF
3366 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3367 == SYMBOL_SMALL_ABSOLUTE
))
3369 /* The symbol and offset must be aligned to the access size. */
3371 unsigned int ref_size
;
3373 if (CONSTANT_POOL_ADDRESS_P (sym
))
3374 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3375 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3377 tree exp
= SYMBOL_REF_DECL (sym
);
3378 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3379 align
= CONSTANT_ALIGNMENT (exp
, align
);
3381 else if (SYMBOL_REF_DECL (sym
))
3382 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3383 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3384 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3385 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3387 align
= BITS_PER_UNIT
;
3389 ref_size
= GET_MODE_SIZE (mode
);
3391 ref_size
= GET_MODE_SIZE (DImode
);
3393 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3394 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3405 aarch64_symbolic_address_p (rtx x
)
3409 split_const (x
, &x
, &offset
);
3410 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3413 /* Classify the base of symbolic expression X, given that X appears in
3416 enum aarch64_symbol_type
3417 aarch64_classify_symbolic_expression (rtx x
,
3418 enum aarch64_symbol_context context
)
3422 split_const (x
, &x
, &offset
);
3423 return aarch64_classify_symbol (x
, context
);
3427 /* Return TRUE if X is a legitimate address for accessing memory in
3430 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3432 struct aarch64_address_info addr
;
3434 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3437 /* Return TRUE if X is a legitimate address for accessing memory in
3438 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3441 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3442 RTX_CODE outer_code
, bool strict_p
)
3444 struct aarch64_address_info addr
;
3446 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3449 /* Return TRUE if rtx X is immediate constant 0.0 */
3451 aarch64_float_const_zero_rtx_p (rtx x
)
3455 if (GET_MODE (x
) == VOIDmode
)
3458 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3459 if (REAL_VALUE_MINUS_ZERO (r
))
3460 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3461 return REAL_VALUES_EQUAL (r
, dconst0
);
3464 /* Return the fixed registers used for condition codes. */
3467 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3470 *p2
= INVALID_REGNUM
;
3475 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3477 /* All floating point compares return CCFP if it is an equality
3478 comparison, and CCFPE otherwise. */
3479 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3506 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3508 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3509 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3510 || GET_CODE (x
) == NEG
))
3513 /* A compare with a shifted operand. Because of canonicalization,
3514 the comparison will have to be swapped when we emit the assembly
3516 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3517 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3518 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3519 || GET_CODE (x
) == LSHIFTRT
3520 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3523 /* Similarly for a negated operand, but we can only do this for
3525 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3526 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3527 && (code
== EQ
|| code
== NE
)
3528 && GET_CODE (x
) == NEG
)
3531 /* A compare of a mode narrower than SI mode against zero can be done
3532 by extending the value in the comparison. */
3533 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3535 /* Only use sign-extension if we really need it. */
3536 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3537 ? CC_SESWPmode
: CC_ZESWPmode
);
3539 /* For everything else, return CCmode. */
3544 aarch64_get_condition_code (rtx x
)
3546 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3547 enum rtx_code comp_code
= GET_CODE (x
);
3549 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3550 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3558 case GE
: return AARCH64_GE
;
3559 case GT
: return AARCH64_GT
;
3560 case LE
: return AARCH64_LS
;
3561 case LT
: return AARCH64_MI
;
3562 case NE
: return AARCH64_NE
;
3563 case EQ
: return AARCH64_EQ
;
3564 case ORDERED
: return AARCH64_VC
;
3565 case UNORDERED
: return AARCH64_VS
;
3566 case UNLT
: return AARCH64_LT
;
3567 case UNLE
: return AARCH64_LE
;
3568 case UNGT
: return AARCH64_HI
;
3569 case UNGE
: return AARCH64_PL
;
3570 default: gcc_unreachable ();
3577 case NE
: return AARCH64_NE
;
3578 case EQ
: return AARCH64_EQ
;
3579 case GE
: return AARCH64_GE
;
3580 case GT
: return AARCH64_GT
;
3581 case LE
: return AARCH64_LE
;
3582 case LT
: return AARCH64_LT
;
3583 case GEU
: return AARCH64_CS
;
3584 case GTU
: return AARCH64_HI
;
3585 case LEU
: return AARCH64_LS
;
3586 case LTU
: return AARCH64_CC
;
3587 default: gcc_unreachable ();
3596 case NE
: return AARCH64_NE
;
3597 case EQ
: return AARCH64_EQ
;
3598 case GE
: return AARCH64_LE
;
3599 case GT
: return AARCH64_LT
;
3600 case LE
: return AARCH64_GE
;
3601 case LT
: return AARCH64_GT
;
3602 case GEU
: return AARCH64_LS
;
3603 case GTU
: return AARCH64_CC
;
3604 case LEU
: return AARCH64_CS
;
3605 case LTU
: return AARCH64_HI
;
3606 default: gcc_unreachable ();
3613 case NE
: return AARCH64_NE
;
3614 case EQ
: return AARCH64_EQ
;
3615 case GE
: return AARCH64_PL
;
3616 case LT
: return AARCH64_MI
;
3617 default: gcc_unreachable ();
3624 case NE
: return AARCH64_NE
;
3625 case EQ
: return AARCH64_EQ
;
3626 default: gcc_unreachable ();
3637 bit_count (unsigned HOST_WIDE_INT value
)
3651 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3655 /* An integer or symbol address without a preceding # sign. */
3657 switch (GET_CODE (x
))
3660 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3664 output_addr_const (f
, x
);
3668 if (GET_CODE (XEXP (x
, 0)) == PLUS
3669 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3671 output_addr_const (f
, x
);
3677 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3682 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3686 if (GET_CODE (x
) != CONST_INT
3687 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3689 output_operand_lossage ("invalid operand for '%%%c'", code
);
3705 output_operand_lossage ("invalid operand for '%%%c'", code
);
3715 /* Print N such that 2^N == X. */
3716 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3718 output_operand_lossage ("invalid operand for '%%%c'", code
);
3722 asm_fprintf (f
, "%d", n
);
3727 /* Print the number of non-zero bits in X (a const_int). */
3728 if (GET_CODE (x
) != CONST_INT
)
3730 output_operand_lossage ("invalid operand for '%%%c'", code
);
3734 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3738 /* Print the higher numbered register of a pair (TImode) of regs. */
3739 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3741 output_operand_lossage ("invalid operand for '%%%c'", code
);
3745 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3749 /* Print a condition (eq, ne, etc). */
3751 /* CONST_TRUE_RTX means always -- that's the default. */
3752 if (x
== const_true_rtx
)
3755 if (!COMPARISON_P (x
))
3757 output_operand_lossage ("invalid operand for '%%%c'", code
);
3761 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3765 /* Print the inverse of a condition (eq <-> ne, etc). */
3767 /* CONST_TRUE_RTX means never -- that's the default. */
3768 if (x
== const_true_rtx
)
3774 if (!COMPARISON_P (x
))
3776 output_operand_lossage ("invalid operand for '%%%c'", code
);
3780 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3781 (aarch64_get_condition_code (x
))], f
);
3789 /* Print a scalar FP/SIMD register name. */
3790 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3792 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3795 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3802 /* Print the first FP/SIMD register name in a list. */
3803 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3805 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3808 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3812 /* Print bottom 16 bits of integer constant in hex. */
3813 if (GET_CODE (x
) != CONST_INT
)
3815 output_operand_lossage ("invalid operand for '%%%c'", code
);
3818 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3823 /* Print a general register name or the zero register (32-bit or
3826 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3828 asm_fprintf (f
, "%czr", code
);
3832 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3834 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3838 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3840 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3847 /* Print a normal operand, if it's a general register, then we
3851 output_operand_lossage ("missing operand");
3855 switch (GET_CODE (x
))
3858 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3862 aarch64_memory_reference_mode
= GET_MODE (x
);
3863 output_address (XEXP (x
, 0));
3868 output_addr_const (asm_out_file
, x
);
3872 asm_fprintf (f
, "%wd", INTVAL (x
));
3876 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3878 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3880 HOST_WIDE_INT_MAX
));
3881 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3883 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3892 /* CONST_DOUBLE can represent a double-width integer.
3893 In this case, the mode of x is VOIDmode. */
3894 if (GET_MODE (x
) == VOIDmode
)
3896 else if (aarch64_float_const_zero_rtx_p (x
))
3901 else if (aarch64_float_const_representable_p (x
))
3904 char float_buf
[buf_size
] = {'\0'};
3906 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3907 real_to_decimal_for_mode (float_buf
, &r
,
3910 asm_fprintf (asm_out_file
, "%s", float_buf
);
3914 output_operand_lossage ("invalid constant");
3917 output_operand_lossage ("invalid operand");
3923 if (GET_CODE (x
) == HIGH
)
3926 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3928 case SYMBOL_SMALL_GOT
:
3929 asm_fprintf (asm_out_file
, ":got:");
3932 case SYMBOL_SMALL_TLSGD
:
3933 asm_fprintf (asm_out_file
, ":tlsgd:");
3936 case SYMBOL_SMALL_TLSDESC
:
3937 asm_fprintf (asm_out_file
, ":tlsdesc:");
3940 case SYMBOL_SMALL_GOTTPREL
:
3941 asm_fprintf (asm_out_file
, ":gottprel:");
3944 case SYMBOL_SMALL_TPREL
:
3945 asm_fprintf (asm_out_file
, ":tprel:");
3948 case SYMBOL_TINY_GOT
:
3955 output_addr_const (asm_out_file
, x
);
3959 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3961 case SYMBOL_SMALL_GOT
:
3962 asm_fprintf (asm_out_file
, ":lo12:");
3965 case SYMBOL_SMALL_TLSGD
:
3966 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3969 case SYMBOL_SMALL_TLSDESC
:
3970 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3973 case SYMBOL_SMALL_GOTTPREL
:
3974 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3977 case SYMBOL_SMALL_TPREL
:
3978 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3981 case SYMBOL_TINY_GOT
:
3982 asm_fprintf (asm_out_file
, ":got:");
3988 output_addr_const (asm_out_file
, x
);
3993 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3995 case SYMBOL_SMALL_TPREL
:
3996 asm_fprintf (asm_out_file
, ":tprel_hi12:");
4001 output_addr_const (asm_out_file
, x
);
4005 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
4011 aarch64_print_operand_address (FILE *f
, rtx x
)
4013 struct aarch64_address_info addr
;
4015 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
4019 case ADDRESS_REG_IMM
:
4020 if (addr
.offset
== const0_rtx
)
4021 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
4023 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
4024 INTVAL (addr
.offset
));
4027 case ADDRESS_REG_REG
:
4028 if (addr
.shift
== 0)
4029 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
4030 reg_names
[REGNO (addr
.offset
)]);
4032 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
4033 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
4036 case ADDRESS_REG_UXTW
:
4037 if (addr
.shift
== 0)
4038 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
4039 REGNO (addr
.offset
) - R0_REGNUM
);
4041 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
4042 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4045 case ADDRESS_REG_SXTW
:
4046 if (addr
.shift
== 0)
4047 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
4048 REGNO (addr
.offset
) - R0_REGNUM
);
4050 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
4051 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4054 case ADDRESS_REG_WB
:
4055 switch (GET_CODE (x
))
4058 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
4059 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4062 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4063 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4066 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4067 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4070 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4071 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4074 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4075 INTVAL (addr
.offset
));
4078 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4079 INTVAL (addr
.offset
));
4086 case ADDRESS_LO_SUM
:
4087 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4088 output_addr_const (f
, addr
.offset
);
4089 asm_fprintf (f
, "]");
4092 case ADDRESS_SYMBOLIC
:
4096 output_addr_const (f
, x
);
4100 aarch64_label_mentioned_p (rtx x
)
4105 if (GET_CODE (x
) == LABEL_REF
)
4108 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4109 referencing instruction, but they are constant offsets, not
4111 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4114 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4115 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4121 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4122 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4125 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4132 /* Implement REGNO_REG_CLASS. */
4135 aarch64_regno_regclass (unsigned regno
)
4137 if (GP_REGNUM_P (regno
))
4140 if (regno
== SP_REGNUM
)
4143 if (regno
== FRAME_POINTER_REGNUM
4144 || regno
== ARG_POINTER_REGNUM
)
4145 return POINTER_REGS
;
4147 if (FP_REGNUM_P (regno
))
4148 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4153 /* Try a machine-dependent way of reloading an illegitimate address
4154 operand. If we find one, push the reload and return the new rtx. */
4157 aarch64_legitimize_reload_address (rtx
*x_p
,
4158 enum machine_mode mode
,
4159 int opnum
, int type
,
4160 int ind_levels ATTRIBUTE_UNUSED
)
4164 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4165 if (aarch64_vect_struct_mode_p (mode
)
4166 && GET_CODE (x
) == PLUS
4167 && REG_P (XEXP (x
, 0))
4168 && CONST_INT_P (XEXP (x
, 1)))
4172 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4173 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4174 opnum
, (enum reload_type
) type
);
4178 /* We must recognize output that we have already generated ourselves. */
4179 if (GET_CODE (x
) == PLUS
4180 && GET_CODE (XEXP (x
, 0)) == PLUS
4181 && REG_P (XEXP (XEXP (x
, 0), 0))
4182 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4183 && CONST_INT_P (XEXP (x
, 1)))
4185 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4186 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4187 opnum
, (enum reload_type
) type
);
4191 /* We wish to handle large displacements off a base register by splitting
4192 the addend across an add and the mem insn. This can cut the number of
4193 extra insns needed from 3 to 1. It is only useful for load/store of a
4194 single register with 12 bit offset field. */
4195 if (GET_CODE (x
) == PLUS
4196 && REG_P (XEXP (x
, 0))
4197 && CONST_INT_P (XEXP (x
, 1))
4198 && HARD_REGISTER_P (XEXP (x
, 0))
4201 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4203 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4204 HOST_WIDE_INT low
= val
& 0xfff;
4205 HOST_WIDE_INT high
= val
- low
;
4208 enum machine_mode xmode
= GET_MODE (x
);
4210 /* In ILP32, xmode can be either DImode or SImode. */
4211 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4213 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4214 BLKmode alignment. */
4215 if (GET_MODE_SIZE (mode
) == 0)
4218 offs
= low
% GET_MODE_SIZE (mode
);
4220 /* Align misaligned offset by adjusting high part to compensate. */
4223 if (aarch64_uimm12_shift (high
+ offs
))
4232 offs
= GET_MODE_SIZE (mode
) - offs
;
4234 high
= high
+ (low
& 0x1000) - offs
;
4239 /* Check for overflow. */
4240 if (high
+ low
!= val
)
4243 cst
= GEN_INT (high
);
4244 if (!aarch64_uimm12_shift (high
))
4245 cst
= force_const_mem (xmode
, cst
);
4247 /* Reload high part into base reg, leaving the low part
4248 in the mem instruction.
4249 Note that replacing this gen_rtx_PLUS with plus_constant is
4250 wrong in this case because we rely on the
4251 (plus (plus reg c1) c2) structure being preserved so that
4252 XEXP (*p, 0) in push_reload below uses the correct term. */
4253 x
= gen_rtx_PLUS (xmode
,
4254 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4257 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4258 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4259 opnum
, (enum reload_type
) type
);
4268 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4270 enum machine_mode mode
,
4271 secondary_reload_info
*sri
)
4273 /* Without the TARGET_SIMD instructions we cannot move a Q register
4274 to a Q register directly. We need a scratch. */
4275 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4276 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4277 && reg_class_subset_p (rclass
, FP_REGS
))
4280 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4281 else if (mode
== TImode
)
4282 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4286 /* A TFmode or TImode memory access should be handled via an FP_REGS
4287 because AArch64 has richer addressing modes for LDR/STR instructions
4288 than LDP/STP instructions. */
4289 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4290 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4293 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4300 aarch64_can_eliminate (const int from
, const int to
)
4302 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4303 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4305 if (frame_pointer_needed
)
4307 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4309 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4311 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4312 && !cfun
->calls_alloca
)
4314 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4324 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4326 aarch64_layout_frame ();
4328 if (to
== HARD_FRAME_POINTER_REGNUM
)
4330 if (from
== ARG_POINTER_REGNUM
)
4331 return cfun
->machine
->frame
.frame_size
- crtl
->outgoing_args_size
;
4333 if (from
== FRAME_POINTER_REGNUM
)
4334 return (cfun
->machine
->frame
.hard_fp_offset
4335 - cfun
->machine
->frame
.saved_varargs_size
);
4338 if (to
== STACK_POINTER_REGNUM
)
4340 if (from
== FRAME_POINTER_REGNUM
)
4341 return (cfun
->machine
->frame
.frame_size
4342 - cfun
->machine
->frame
.saved_varargs_size
);
4345 return cfun
->machine
->frame
.frame_size
;
4348 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4352 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4356 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4361 aarch64_asm_trampoline_template (FILE *f
)
4365 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4366 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4370 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4371 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4373 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4374 assemble_aligned_integer (4, const0_rtx
);
4375 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4376 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4380 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4382 rtx fnaddr
, mem
, a_tramp
;
4383 const int tramp_code_sz
= 16;
4385 /* Don't need to copy the trailing D-words, we fill those in below. */
4386 emit_block_move (m_tramp
, assemble_trampoline_template (),
4387 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4388 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4389 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4390 if (GET_MODE (fnaddr
) != ptr_mode
)
4391 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4392 emit_move_insn (mem
, fnaddr
);
4394 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4395 emit_move_insn (mem
, chain_value
);
4397 /* XXX We should really define a "clear_cache" pattern and use
4398 gen_clear_cache(). */
4399 a_tramp
= XEXP (m_tramp
, 0);
4400 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4401 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4402 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4406 static unsigned char
4407 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4411 case CALLER_SAVE_REGS
:
4419 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4420 (GET_MODE_SIZE (mode
) + 7) / 8;
4434 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4436 if (regclass
== POINTER_REGS
)
4437 return GENERAL_REGS
;
4439 if (regclass
== STACK_REG
)
4442 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4448 /* If it's an integer immediate that MOVI can't handle, then
4449 FP_REGS is not an option, so we return NO_REGS instead. */
4450 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4451 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4454 /* Register eliminiation can result in a request for
4455 SP+constant->FP_REGS. We cannot support such operations which
4456 use SP as source and an FP_REG as destination, so reject out
4458 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4460 rtx lhs
= XEXP (x
, 0);
4462 /* Look through a possible SUBREG introduced by ILP32. */
4463 if (GET_CODE (lhs
) == SUBREG
)
4464 lhs
= SUBREG_REG (lhs
);
4466 gcc_assert (REG_P (lhs
));
4467 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4476 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4478 asm_fprintf (f
, "%U%s", name
);
4482 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4484 if (priority
== DEFAULT_INIT_PRIORITY
)
4485 default_ctor_section_asm_out_constructor (symbol
, priority
);
4490 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4491 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4492 switch_to_section (s
);
4493 assemble_align (POINTER_SIZE
);
4494 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4499 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4501 if (priority
== DEFAULT_INIT_PRIORITY
)
4502 default_dtor_section_asm_out_destructor (symbol
, priority
);
4507 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4508 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4509 switch_to_section (s
);
4510 assemble_align (POINTER_SIZE
);
4511 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4516 aarch64_output_casesi (rtx
*operands
)
4520 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
4522 static const char *const patterns
[4][2] =
4525 "ldrb\t%w3, [%0,%w1,uxtw]",
4526 "add\t%3, %4, %w3, sxtb #2"
4529 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4530 "add\t%3, %4, %w3, sxth #2"
4533 "ldr\t%w3, [%0,%w1,uxtw #2]",
4534 "add\t%3, %4, %w3, sxtw #2"
4536 /* We assume that DImode is only generated when not optimizing and
4537 that we don't really need 64-bit address offsets. That would
4538 imply an object file with 8GB of code in a single function! */
4540 "ldr\t%w3, [%0,%w1,uxtw #2]",
4541 "add\t%3, %4, %w3, sxtw #2"
4545 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4547 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4549 gcc_assert (index
>= 0 && index
<= 3);
4551 /* Need to implement table size reduction, by chaning the code below. */
4552 output_asm_insn (patterns
[index
][0], operands
);
4553 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4554 snprintf (buf
, sizeof (buf
),
4555 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4556 output_asm_insn (buf
, operands
);
4557 output_asm_insn (patterns
[index
][1], operands
);
4558 output_asm_insn ("br\t%3", operands
);
4559 assemble_label (asm_out_file
, label
);
4564 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4565 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4569 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4571 if (shift
>= 0 && shift
<= 3)
4574 for (size
= 8; size
<= 32; size
*= 2)
4576 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4577 if (mask
== bits
<< shift
)
4585 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4586 const_rtx x ATTRIBUTE_UNUSED
)
4588 /* We can't use blocks for constants when we're using a per-function
4594 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4595 rtx x ATTRIBUTE_UNUSED
,
4596 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4598 /* Force all constant pool entries into the current function section. */
4599 return function_section (current_function_decl
);
4605 /* Helper function for rtx cost calculation. Strip a shift expression
4606 from X. Returns the inner operand if successful, or the original
4607 expression on failure. */
4609 aarch64_strip_shift (rtx x
)
4613 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4614 we can convert both to ROR during final output. */
4615 if ((GET_CODE (op
) == ASHIFT
4616 || GET_CODE (op
) == ASHIFTRT
4617 || GET_CODE (op
) == LSHIFTRT
4618 || GET_CODE (op
) == ROTATERT
4619 || GET_CODE (op
) == ROTATE
)
4620 && CONST_INT_P (XEXP (op
, 1)))
4621 return XEXP (op
, 0);
4623 if (GET_CODE (op
) == MULT
4624 && CONST_INT_P (XEXP (op
, 1))
4625 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4626 return XEXP (op
, 0);
4631 /* Helper function for rtx cost calculation. Strip an extend
4632 expression from X. Returns the inner operand if successful, or the
4633 original expression on failure. We deal with a number of possible
4634 canonicalization variations here. */
4636 aarch64_strip_extend (rtx x
)
4640 /* Zero and sign extraction of a widened value. */
4641 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4642 && XEXP (op
, 2) == const0_rtx
4643 && GET_CODE (XEXP (op
, 0)) == MULT
4644 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4646 return XEXP (XEXP (op
, 0), 0);
4648 /* It can also be represented (for zero-extend) as an AND with an
4650 if (GET_CODE (op
) == AND
4651 && GET_CODE (XEXP (op
, 0)) == MULT
4652 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4653 && CONST_INT_P (XEXP (op
, 1))
4654 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4655 INTVAL (XEXP (op
, 1))) != 0)
4656 return XEXP (XEXP (op
, 0), 0);
4658 /* Now handle extended register, as this may also have an optional
4659 left shift by 1..4. */
4660 if (GET_CODE (op
) == ASHIFT
4661 && CONST_INT_P (XEXP (op
, 1))
4662 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4665 if (GET_CODE (op
) == ZERO_EXTEND
4666 || GET_CODE (op
) == SIGN_EXTEND
)
4675 /* Helper function for rtx cost calculation. Calculate the cost of
4676 a MULT, which may be part of a multiply-accumulate rtx. Return
4677 the calculated cost of the expression, recursing manually in to
4678 operands where needed. */
4681 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
4684 const struct cpu_cost_table
*extra_cost
4685 = aarch64_tune_params
->insn_extra_cost
;
4687 bool maybe_fma
= (outer
== PLUS
|| outer
== MINUS
);
4688 enum machine_mode mode
= GET_MODE (x
);
4690 gcc_checking_assert (code
== MULT
);
4695 if (VECTOR_MODE_P (mode
))
4696 mode
= GET_MODE_INNER (mode
);
4698 /* Integer multiply/fma. */
4699 if (GET_MODE_CLASS (mode
) == MODE_INT
)
4701 /* The multiply will be canonicalized as a shift, cost it as such. */
4702 if (CONST_INT_P (op1
)
4703 && exact_log2 (INTVAL (op1
)) > 0)
4708 /* ADD (shifted register). */
4709 cost
+= extra_cost
->alu
.arith_shift
;
4711 /* LSL (immediate). */
4712 cost
+= extra_cost
->alu
.shift
;
4715 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
4720 /* Integer multiplies or FMAs have zero/sign extending variants. */
4721 if ((GET_CODE (op0
) == ZERO_EXTEND
4722 && GET_CODE (op1
) == ZERO_EXTEND
)
4723 || (GET_CODE (op0
) == SIGN_EXTEND
4724 && GET_CODE (op1
) == SIGN_EXTEND
))
4726 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4727 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
4732 /* MADD/SMADDL/UMADDL. */
4733 cost
+= extra_cost
->mult
[0].extend_add
;
4735 /* MUL/SMULL/UMULL. */
4736 cost
+= extra_cost
->mult
[0].extend
;
4742 /* This is either an integer multiply or an FMA. In both cases
4743 we want to recurse and cost the operands. */
4744 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4745 + rtx_cost (op1
, MULT
, 1, speed
);
4751 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
4754 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
4763 /* Floating-point FMA/FMUL can also support negations of the
4765 if (GET_CODE (op0
) == NEG
)
4766 op0
= XEXP (op0
, 0);
4767 if (GET_CODE (op1
) == NEG
)
4768 op1
= XEXP (op1
, 0);
4771 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4772 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
4775 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
4778 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4779 + rtx_cost (op1
, MULT
, 1, speed
);
4785 aarch64_address_cost (rtx x
,
4786 enum machine_mode mode
,
4787 addr_space_t as ATTRIBUTE_UNUSED
,
4790 enum rtx_code c
= GET_CODE (x
);
4791 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4792 struct aarch64_address_info info
;
4796 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
4798 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
4800 /* This is a CONST or SYMBOL ref which will be split
4801 in a different way depending on the code model in use.
4802 Cost it through the generic infrastructure. */
4803 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
4804 /* Divide through by the cost of one instruction to
4805 bring it to the same units as the address costs. */
4806 cost_symbol_ref
/= COSTS_N_INSNS (1);
4807 /* The cost is then the cost of preparing the address,
4808 followed by an immediate (possibly 0) offset. */
4809 return cost_symbol_ref
+ addr_cost
->imm_offset
;
4813 /* This is most likely a jump table from a case
4815 return addr_cost
->register_offset
;
4821 case ADDRESS_LO_SUM
:
4822 case ADDRESS_SYMBOLIC
:
4823 case ADDRESS_REG_IMM
:
4824 cost
+= addr_cost
->imm_offset
;
4827 case ADDRESS_REG_WB
:
4828 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4829 cost
+= addr_cost
->pre_modify
;
4830 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4831 cost
+= addr_cost
->post_modify
;
4837 case ADDRESS_REG_REG
:
4838 cost
+= addr_cost
->register_offset
;
4841 case ADDRESS_REG_UXTW
:
4842 case ADDRESS_REG_SXTW
:
4843 cost
+= addr_cost
->register_extend
;
4853 /* For the sake of calculating the cost of the shifted register
4854 component, we can treat same sized modes in the same way. */
4855 switch (GET_MODE_BITSIZE (mode
))
4858 cost
+= addr_cost
->addr_scale_costs
.hi
;
4862 cost
+= addr_cost
->addr_scale_costs
.si
;
4866 cost
+= addr_cost
->addr_scale_costs
.di
;
4869 /* We can't tell, or this is a 128-bit vector. */
4871 cost
+= addr_cost
->addr_scale_costs
.ti
;
4879 /* Return true if the RTX X in mode MODE is a zero or sign extract
4880 usable in an ADD or SUB (extended register) instruction. */
4882 aarch64_rtx_arith_op_extract_p (rtx x
, enum machine_mode mode
)
4884 /* Catch add with a sign extract.
4885 This is add_<optab><mode>_multp2. */
4886 if (GET_CODE (x
) == SIGN_EXTRACT
4887 || GET_CODE (x
) == ZERO_EXTRACT
)
4889 rtx op0
= XEXP (x
, 0);
4890 rtx op1
= XEXP (x
, 1);
4891 rtx op2
= XEXP (x
, 2);
4893 if (GET_CODE (op0
) == MULT
4894 && CONST_INT_P (op1
)
4895 && op2
== const0_rtx
4896 && CONST_INT_P (XEXP (op0
, 1))
4897 && aarch64_is_extend_from_extract (mode
,
4909 aarch64_frint_unspec_p (unsigned int u
)
4927 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4928 storing it in *COST. Result is true if the total cost of the operation
4929 has now been calculated. */
4931 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
4935 enum rtx_code cmpcode
;
4937 if (COMPARISON_P (op0
))
4939 inner
= XEXP (op0
, 0);
4940 comparator
= XEXP (op0
, 1);
4941 cmpcode
= GET_CODE (op0
);
4946 comparator
= const0_rtx
;
4950 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
4952 /* Conditional branch. */
4953 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
4957 if (cmpcode
== NE
|| cmpcode
== EQ
)
4959 if (comparator
== const0_rtx
)
4961 /* TBZ/TBNZ/CBZ/CBNZ. */
4962 if (GET_CODE (inner
) == ZERO_EXTRACT
)
4964 *cost
+= rtx_cost (XEXP (inner
, 0), ZERO_EXTRACT
,
4968 *cost
+= rtx_cost (inner
, cmpcode
, 0, speed
);
4973 else if (cmpcode
== LT
|| cmpcode
== GE
)
4976 if (comparator
== const0_rtx
)
4981 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
4983 /* It's a conditional operation based on the status flags,
4984 so it must be some flavor of CSEL. */
4986 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
4987 if (GET_CODE (op1
) == NEG
4988 || GET_CODE (op1
) == NOT
4989 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
4990 op1
= XEXP (op1
, 0);
4992 *cost
+= rtx_cost (op1
, IF_THEN_ELSE
, 1, speed
);
4993 *cost
+= rtx_cost (op2
, IF_THEN_ELSE
, 2, speed
);
4997 /* We don't know what this is, cost all operands. */
5001 /* Calculate the cost of calculating X, storing it in *COST. Result
5002 is true if the total cost of the operation has now been calculated. */
5004 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
5005 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
5008 const struct cpu_cost_table
*extra_cost
5009 = aarch64_tune_params
->insn_extra_cost
;
5010 enum machine_mode mode
= GET_MODE (x
);
5012 /* By default, assume that everything has equivalent cost to the
5013 cheapest instruction. Any additional costs are applied as a delta
5014 above this default. */
5015 *cost
= COSTS_N_INSNS (1);
5017 /* TODO: The cost infrastructure currently does not handle
5018 vector operations. Assume that all vector operations
5019 are equally expensive. */
5020 if (VECTOR_MODE_P (mode
))
5023 *cost
+= extra_cost
->vect
.alu
;
5030 /* The cost depends entirely on the operands to SET. */
5035 switch (GET_CODE (op0
))
5040 rtx address
= XEXP (op0
, 0);
5041 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5042 *cost
+= extra_cost
->ldst
.store
;
5043 else if (mode
== SFmode
)
5044 *cost
+= extra_cost
->ldst
.storef
;
5045 else if (mode
== DFmode
)
5046 *cost
+= extra_cost
->ldst
.stored
;
5049 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5053 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5057 if (! REG_P (SUBREG_REG (op0
)))
5058 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
5062 /* const0_rtx is in general free, but we will use an
5063 instruction to set a register to 0. */
5064 if (REG_P (op1
) || op1
== const0_rtx
)
5066 /* The cost is 1 per register copied. */
5067 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5069 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5072 /* Cost is just the cost of the RHS of the set. */
5073 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5078 /* Bit-field insertion. Strip any redundant widening of
5079 the RHS to meet the width of the target. */
5080 if (GET_CODE (op1
) == SUBREG
)
5081 op1
= SUBREG_REG (op1
);
5082 if ((GET_CODE (op1
) == ZERO_EXTEND
5083 || GET_CODE (op1
) == SIGN_EXTEND
)
5084 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
5085 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5086 >= INTVAL (XEXP (op0
, 1))))
5087 op1
= XEXP (op1
, 0);
5089 if (CONST_INT_P (op1
))
5091 /* MOV immediate is assumed to always be cheap. */
5092 *cost
= COSTS_N_INSNS (1);
5098 *cost
+= extra_cost
->alu
.bfi
;
5099 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5105 /* We can't make sense of this, assume default cost. */
5106 *cost
= COSTS_N_INSNS (1);
5112 /* If an instruction can incorporate a constant within the
5113 instruction, the instruction's expression avoids calling
5114 rtx_cost() on the constant. If rtx_cost() is called on a
5115 constant, then it is usually because the constant must be
5116 moved into a register by one or more instructions.
5118 The exception is constant 0, which can be expressed
5119 as XZR/WZR and is therefore free. The exception to this is
5120 if we have (set (reg) (const0_rtx)) in which case we must cost
5121 the move. However, we can catch that when we cost the SET, so
5122 we don't need to consider that here. */
5123 if (x
== const0_rtx
)
5127 /* To an approximation, building any other constant is
5128 proportionally expensive to the number of instructions
5129 required to build that constant. This is true whether we
5130 are compiling for SPEED or otherwise. */
5131 *cost
= COSTS_N_INSNS (aarch64_build_constant (0,
5140 /* mov[df,sf]_aarch64. */
5141 if (aarch64_float_const_representable_p (x
))
5142 /* FMOV (scalar immediate). */
5143 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5144 else if (!aarch64_float_const_zero_rtx_p (x
))
5146 /* This will be a load from memory. */
5148 *cost
+= extra_cost
->ldst
.loadd
;
5150 *cost
+= extra_cost
->ldst
.loadf
;
5153 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5154 or MOV v0.s[0], wzr - neither of which are modeled by the
5155 cost tables. Just use the default cost. */
5165 /* For loads we want the base cost of a load, plus an
5166 approximation for the additional cost of the addressing
5168 rtx address
= XEXP (x
, 0);
5169 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5170 *cost
+= extra_cost
->ldst
.load
;
5171 else if (mode
== SFmode
)
5172 *cost
+= extra_cost
->ldst
.loadf
;
5173 else if (mode
== DFmode
)
5174 *cost
+= extra_cost
->ldst
.loadd
;
5177 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5186 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5188 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5189 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5192 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5196 /* Cost this as SUB wzr, X. */
5197 op0
= CONST0_RTX (GET_MODE (x
));
5202 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5204 /* Support (neg(fma...)) as a single instruction only if
5205 sign of zeros is unimportant. This matches the decision
5206 making in aarch64.md. */
5207 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5210 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5215 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5224 *cost
+= extra_cost
->alu
.clz
;
5232 if (op1
== const0_rtx
5233 && GET_CODE (op0
) == AND
)
5239 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
5241 /* TODO: A write to the CC flags possibly costs extra, this
5242 needs encoding in the cost tables. */
5244 /* CC_ZESWPmode supports zero extend for free. */
5245 if (GET_MODE (x
) == CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
5246 op0
= XEXP (op0
, 0);
5249 if (GET_CODE (op0
) == AND
)
5255 if (GET_CODE (op0
) == PLUS
)
5257 /* ADDS (and CMN alias). */
5262 if (GET_CODE (op0
) == MINUS
)
5269 if (GET_CODE (op1
) == NEG
)
5273 *cost
+= extra_cost
->alu
.arith
;
5275 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5276 *cost
+= rtx_cost (XEXP (op1
, 0), NEG
, 1, speed
);
5282 Compare can freely swap the order of operands, and
5283 canonicalization puts the more complex operation first.
5284 But the integer MINUS logic expects the shift/extend
5285 operation in op1. */
5287 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5295 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5299 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
5301 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
5303 /* FCMP supports constant 0.0 for no extra cost. */
5317 /* Detect valid immediates. */
5318 if ((GET_MODE_CLASS (mode
) == MODE_INT
5319 || (GET_MODE_CLASS (mode
) == MODE_CC
5320 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5321 && CONST_INT_P (op1
)
5322 && aarch64_uimm12_shift (INTVAL (op1
)))
5324 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5327 /* SUB(S) (immediate). */
5328 *cost
+= extra_cost
->alu
.arith
;
5333 /* Look for SUB (extended register). */
5334 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
5337 *cost
+= extra_cost
->alu
.arith_shift
;
5339 *cost
+= rtx_cost (XEXP (XEXP (op1
, 0), 0),
5340 (enum rtx_code
) GET_CODE (op1
),
5345 rtx new_op1
= aarch64_strip_extend (op1
);
5347 /* Cost this as an FMA-alike operation. */
5348 if ((GET_CODE (new_op1
) == MULT
5349 || GET_CODE (new_op1
) == ASHIFT
)
5352 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5353 (enum rtx_code
) code
,
5355 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5359 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
5363 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5365 *cost
+= extra_cost
->alu
.arith
;
5366 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5368 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5381 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5382 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5385 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
5386 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5390 if (GET_MODE_CLASS (mode
) == MODE_INT
5391 && CONST_INT_P (op1
)
5392 && aarch64_uimm12_shift (INTVAL (op1
)))
5394 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
5397 /* ADD (immediate). */
5398 *cost
+= extra_cost
->alu
.arith
;
5402 /* Look for ADD (extended register). */
5403 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
5406 *cost
+= extra_cost
->alu
.arith_shift
;
5408 *cost
+= rtx_cost (XEXP (XEXP (op0
, 0), 0),
5409 (enum rtx_code
) GET_CODE (op0
),
5414 /* Strip any extend, leave shifts behind as we will
5415 cost them through mult_cost. */
5416 new_op0
= aarch64_strip_extend (op0
);
5418 if (GET_CODE (new_op0
) == MULT
5419 || GET_CODE (new_op0
) == ASHIFT
)
5421 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
5423 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5427 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
5428 + rtx_cost (op1
, PLUS
, 1, speed
));
5432 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5434 *cost
+= extra_cost
->alu
.arith
;
5435 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5437 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5443 *cost
= COSTS_N_INSNS (1);
5446 *cost
+= extra_cost
->alu
.rev
;
5451 if (aarch_rev16_p (x
))
5453 *cost
= COSTS_N_INSNS (1);
5456 *cost
+= extra_cost
->alu
.rev
;
5468 && GET_CODE (op0
) == MULT
5469 && CONST_INT_P (XEXP (op0
, 1))
5470 && CONST_INT_P (op1
)
5471 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
5474 /* This is a UBFM/SBFM. */
5475 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
5477 *cost
+= extra_cost
->alu
.bfx
;
5481 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5483 /* We possibly get the immediate for free, this is not
5485 if (CONST_INT_P (op1
)
5486 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
5488 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
5491 *cost
+= extra_cost
->alu
.logical
;
5499 /* Handle ORN, EON, or BIC. */
5500 if (GET_CODE (op0
) == NOT
)
5501 op0
= XEXP (op0
, 0);
5503 new_op0
= aarch64_strip_shift (op0
);
5505 /* If we had a shift on op0 then this is a logical-shift-
5506 by-register/immediate operation. Otherwise, this is just
5507 a logical operation. */
5512 /* Shift by immediate. */
5513 if (CONST_INT_P (XEXP (op0
, 1)))
5514 *cost
+= extra_cost
->alu
.log_shift
;
5516 *cost
+= extra_cost
->alu
.log_shift_reg
;
5519 *cost
+= extra_cost
->alu
.logical
;
5522 /* In both cases we want to cost both operands. */
5523 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
5524 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5534 *cost
+= extra_cost
->alu
.logical
;
5536 /* The logical instruction could have the shifted register form,
5537 but the cost is the same if the shift is processed as a separate
5538 instruction, so we don't bother with it here. */
5544 /* If a value is written in SI mode, then zero extended to DI
5545 mode, the operation will in general be free as a write to
5546 a 'w' register implicitly zeroes the upper bits of an 'x'
5547 register. However, if this is
5549 (set (reg) (zero_extend (reg)))
5551 we must cost the explicit register move. */
5553 && GET_MODE (op0
) == SImode
5556 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
5558 if (!op_cost
&& speed
)
5560 *cost
+= extra_cost
->alu
.extend
;
5562 /* Free, the cost is that of the SI mode operation. */
5567 else if (MEM_P (XEXP (x
, 0)))
5569 /* All loads can zero extend to any size for free. */
5570 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
5576 *cost
+= extra_cost
->alu
.extend
;
5581 if (MEM_P (XEXP (x
, 0)))
5586 rtx address
= XEXP (XEXP (x
, 0), 0);
5587 *cost
+= extra_cost
->ldst
.load_sign_extend
;
5590 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5597 *cost
+= extra_cost
->alu
.extend
;
5604 if (CONST_INT_P (op1
))
5606 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5609 *cost
+= extra_cost
->alu
.shift
;
5611 /* We can incorporate zero/sign extend for free. */
5612 if (GET_CODE (op0
) == ZERO_EXTEND
5613 || GET_CODE (op0
) == SIGN_EXTEND
)
5614 op0
= XEXP (op0
, 0);
5616 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
5623 *cost
+= extra_cost
->alu
.shift_reg
;
5625 return false; /* All arguments need to be in registers. */
5635 if (CONST_INT_P (op1
))
5637 /* ASR (immediate) and friends. */
5639 *cost
+= extra_cost
->alu
.shift
;
5641 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
5647 /* ASR (register) and friends. */
5649 *cost
+= extra_cost
->alu
.shift_reg
;
5651 return false; /* All arguments need to be in registers. */
5656 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
5660 *cost
+= extra_cost
->ldst
.load
;
5662 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
5663 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
5665 /* ADRP, followed by ADD. */
5666 *cost
+= COSTS_N_INSNS (1);
5668 *cost
+= 2 * extra_cost
->alu
.arith
;
5670 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
5671 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
5675 *cost
+= extra_cost
->alu
.arith
;
5680 /* One extra load instruction, after accessing the GOT. */
5681 *cost
+= COSTS_N_INSNS (1);
5683 *cost
+= extra_cost
->ldst
.load
;
5689 /* ADRP/ADD (immediate). */
5691 *cost
+= extra_cost
->alu
.arith
;
5698 *cost
+= extra_cost
->alu
.bfx
;
5700 /* We can trust that the immediates used will be correct (there
5701 are no by-register forms), so we need only cost op0. */
5702 *cost
+= rtx_cost (XEXP (x
, 0), (enum rtx_code
) code
, 0, speed
);
5706 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
5707 /* aarch64_rtx_mult_cost always handles recursion to its
5715 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5716 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
5717 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
5718 else if (GET_MODE (x
) == DFmode
)
5719 *cost
+= (extra_cost
->fp
[1].mult
5720 + extra_cost
->fp
[1].div
);
5721 else if (GET_MODE (x
) == SFmode
)
5722 *cost
+= (extra_cost
->fp
[0].mult
5723 + extra_cost
->fp
[0].div
);
5725 return false; /* All arguments need to be in registers. */
5732 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5733 /* There is no integer SQRT, so only DIV and UDIV can get
5735 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
5737 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
5739 return false; /* All arguments need to be in registers. */
5742 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
5743 XEXP (x
, 2), cost
, speed
);
5756 return false; /* All arguments must be in registers. */
5764 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5766 /* FMSUB, FNMADD, and FNMSUB are free. */
5767 if (GET_CODE (op0
) == NEG
)
5768 op0
= XEXP (op0
, 0);
5770 if (GET_CODE (op2
) == NEG
)
5771 op2
= XEXP (op2
, 0);
5773 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5774 and the by-element operand as operand 0. */
5775 if (GET_CODE (op1
) == NEG
)
5776 op1
= XEXP (op1
, 0);
5778 /* Catch vector-by-element operations. The by-element operand can
5779 either be (vec_duplicate (vec_select (x))) or just
5780 (vec_select (x)), depending on whether we are multiplying by
5781 a vector or a scalar.
5783 Canonicalization is not very good in these cases, FMA4 will put the
5784 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5785 if (GET_CODE (op0
) == VEC_DUPLICATE
)
5786 op0
= XEXP (op0
, 0);
5787 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
5788 op1
= XEXP (op1
, 0);
5790 if (GET_CODE (op0
) == VEC_SELECT
)
5791 op0
= XEXP (op0
, 0);
5792 else if (GET_CODE (op1
) == VEC_SELECT
)
5793 op1
= XEXP (op1
, 0);
5795 /* If the remaining parameters are not registers,
5796 get the cost to put them into registers. */
5797 *cost
+= rtx_cost (op0
, FMA
, 0, speed
);
5798 *cost
+= rtx_cost (op1
, FMA
, 1, speed
);
5799 *cost
+= rtx_cost (op2
, FMA
, 2, speed
);
5804 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
5807 case FLOAT_TRUNCATE
:
5809 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
5815 /* Strip the rounding part. They will all be implemented
5816 by the fcvt* family of instructions anyway. */
5817 if (GET_CODE (x
) == UNSPEC
)
5819 unsigned int uns_code
= XINT (x
, 1);
5821 if (uns_code
== UNSPEC_FRINTA
5822 || uns_code
== UNSPEC_FRINTM
5823 || uns_code
== UNSPEC_FRINTN
5824 || uns_code
== UNSPEC_FRINTP
5825 || uns_code
== UNSPEC_FRINTZ
)
5826 x
= XVECEXP (x
, 0, 0);
5830 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
5832 *cost
+= rtx_cost (x
, (enum rtx_code
) code
, 0, speed
);
5836 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5838 /* FABS and FNEG are analogous. */
5840 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5844 /* Integer ABS will either be split to
5845 two arithmetic instructions, or will be an ABS
5846 (scalar), which we don't model. */
5847 *cost
= COSTS_N_INSNS (2);
5849 *cost
+= 2 * extra_cost
->alu
.arith
;
5857 /* FMAXNM/FMINNM/FMAX/FMIN.
5858 TODO: This may not be accurate for all implementations, but
5859 we do not model this in the cost tables. */
5860 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5865 /* The floating point round to integer frint* instructions. */
5866 if (aarch64_frint_unspec_p (XINT (x
, 1)))
5869 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
5874 if (XINT (x
, 1) == UNSPEC_RBIT
)
5877 *cost
+= extra_cost
->alu
.rev
;
5885 /* Decompose <su>muldi3_highpart. */
5886 if (/* (truncate:DI */
5889 && GET_MODE (XEXP (x
, 0)) == TImode
5890 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
5892 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5893 /* (ANY_EXTEND:TI (reg:DI))
5894 (ANY_EXTEND:TI (reg:DI))) */
5895 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
5896 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
5897 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
5898 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
5899 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
5900 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
5901 /* (const_int 64) */
5902 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
5903 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
5907 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
5908 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
5910 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
5920 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5922 "\nFailed to cost RTX. Assuming default cost.\n");
5927 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5928 calculated for X. This cost is stored in *COST. Returns true
5929 if the total cost of X was calculated. */
5931 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
5932 int param
, int *cost
, bool speed
)
5934 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
5936 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5938 print_rtl_single (dump_file
, x
);
5939 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
5940 speed
? "Hot" : "Cold",
5941 *cost
, result
? "final" : "partial");
5948 aarch64_register_move_cost (enum machine_mode mode
,
5949 reg_class_t from_i
, reg_class_t to_i
)
5951 enum reg_class from
= (enum reg_class
) from_i
;
5952 enum reg_class to
= (enum reg_class
) to_i
;
5953 const struct cpu_regmove_cost
*regmove_cost
5954 = aarch64_tune_params
->regmove_cost
;
5956 /* Moving between GPR and stack cost is the same as GP2GP. */
5957 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
5958 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
5959 return regmove_cost
->GP2GP
;
5961 /* To/From the stack register, we move via the gprs. */
5962 if (to
== STACK_REG
|| from
== STACK_REG
)
5963 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
5964 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
5966 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
5967 return regmove_cost
->GP2GP
;
5968 else if (from
== GENERAL_REGS
)
5969 return regmove_cost
->GP2FP
;
5970 else if (to
== GENERAL_REGS
)
5971 return regmove_cost
->FP2GP
;
5973 /* When AdvSIMD instructions are disabled it is not possible to move
5974 a 128-bit value directly between Q registers. This is handled in
5975 secondary reload. A general register is used as a scratch to move
5976 the upper DI value and the lower DI value is moved directly,
5977 hence the cost is the sum of three moves. */
5978 if (! TARGET_SIMD
&& GET_MODE_SIZE (mode
) == 128)
5979 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
5981 return regmove_cost
->FP2FP
;
5985 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
5986 reg_class_t rclass ATTRIBUTE_UNUSED
,
5987 bool in ATTRIBUTE_UNUSED
)
5989 return aarch64_tune_params
->memmov_cost
;
5992 /* Return the number of instructions that can be issued per cycle. */
5994 aarch64_sched_issue_rate (void)
5996 return aarch64_tune_params
->issue_rate
;
5999 /* Vectorizer cost model target hooks. */
6001 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6003 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6005 int misalign ATTRIBUTE_UNUSED
)
6009 switch (type_of_cost
)
6012 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
6015 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
6018 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
6021 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6024 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
6027 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
6030 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
6033 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
6035 case unaligned_load
:
6036 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
6038 case unaligned_store
:
6039 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
6041 case cond_branch_taken
:
6042 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
6044 case cond_branch_not_taken
:
6045 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
6048 case vec_promote_demote
:
6049 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6052 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6053 return elements
/ 2 + 1;
6060 /* Implement targetm.vectorize.add_stmt_cost. */
6062 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6063 struct _stmt_vec_info
*stmt_info
, int misalign
,
6064 enum vect_cost_model_location where
)
6066 unsigned *cost
= (unsigned *) data
;
6067 unsigned retval
= 0;
6069 if (flag_vect_cost_model
)
6071 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6073 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
6075 /* Statements in an inner loop relative to the loop being
6076 vectorized are weighted more heavily. The value here is
6077 a function (linear for now) of the loop nest level. */
6078 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6080 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6081 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
6082 unsigned nest_level
= loop_depth (loop
);
6084 count
*= nest_level
;
6087 retval
= (unsigned) (count
* stmt_cost
);
6088 cost
[where
] += retval
;
6094 static void initialize_aarch64_code_model (void);
6096 /* Parse the architecture extension string. */
6099 aarch64_parse_extension (char *str
)
6101 /* The extension string is parsed left to right. */
6102 const struct aarch64_option_extension
*opt
= NULL
;
6104 /* Flag to say whether we are adding or removing an extension. */
6105 int adding_ext
= -1;
6107 while (str
!= NULL
&& *str
!= 0)
6113 ext
= strchr (str
, '+');
6120 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
6131 error ("missing feature modifier after %qs", "+no");
6135 /* Scan over the extensions table trying to find an exact match. */
6136 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
6138 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
6140 /* Add or remove the extension. */
6142 aarch64_isa_flags
|= opt
->flags_on
;
6144 aarch64_isa_flags
&= ~(opt
->flags_off
);
6149 if (opt
->name
== NULL
)
6151 /* Extension not found in list. */
6152 error ("unknown feature modifier %qs", str
);
6162 /* Parse the ARCH string. */
6165 aarch64_parse_arch (void)
6168 const struct processor
*arch
;
6169 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
6172 strcpy (str
, aarch64_arch_string
);
6174 ext
= strchr (str
, '+');
6183 error ("missing arch name in -march=%qs", str
);
6187 /* Loop through the list of supported ARCHs to find a match. */
6188 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
6190 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
6192 selected_arch
= arch
;
6193 aarch64_isa_flags
= selected_arch
->flags
;
6196 selected_cpu
= &all_cores
[selected_arch
->core
];
6200 /* ARCH string contains at least one extension. */
6201 aarch64_parse_extension (ext
);
6204 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
6206 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6207 selected_cpu
->name
, selected_arch
->name
);
6214 /* ARCH name not found in list. */
6215 error ("unknown value %qs for -march", str
);
6219 /* Parse the CPU string. */
6222 aarch64_parse_cpu (void)
6225 const struct processor
*cpu
;
6226 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
6229 strcpy (str
, aarch64_cpu_string
);
6231 ext
= strchr (str
, '+');
6240 error ("missing cpu name in -mcpu=%qs", str
);
6244 /* Loop through the list of supported CPUs to find a match. */
6245 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6247 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
6250 selected_tune
= cpu
;
6251 aarch64_isa_flags
= selected_cpu
->flags
;
6255 /* CPU string contains at least one extension. */
6256 aarch64_parse_extension (ext
);
6263 /* CPU name not found in list. */
6264 error ("unknown value %qs for -mcpu", str
);
6268 /* Parse the TUNE string. */
6271 aarch64_parse_tune (void)
6273 const struct processor
*cpu
;
6274 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
6275 strcpy (str
, aarch64_tune_string
);
6277 /* Loop through the list of supported CPUs to find a match. */
6278 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6280 if (strcmp (cpu
->name
, str
) == 0)
6282 selected_tune
= cpu
;
6287 /* CPU name not found in list. */
6288 error ("unknown value %qs for -mtune", str
);
6293 /* Implement TARGET_OPTION_OVERRIDE. */
6296 aarch64_override_options (void)
6298 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6299 If either of -march or -mtune is given, they override their
6300 respective component of -mcpu.
6302 So, first parse AARCH64_CPU_STRING, then the others, be careful
6303 with -march as, if -mcpu is not present on the command line, march
6304 must set a sensible default CPU. */
6305 if (aarch64_cpu_string
)
6307 aarch64_parse_cpu ();
6310 if (aarch64_arch_string
)
6312 aarch64_parse_arch ();
6315 if (aarch64_tune_string
)
6317 aarch64_parse_tune ();
6320 #ifndef HAVE_AS_MABI_OPTION
6321 /* The compiler may have been configured with 2.23.* binutils, which does
6322 not have support for ILP32. */
6324 error ("Assembler does not support -mabi=ilp32");
6327 initialize_aarch64_code_model ();
6329 aarch64_build_bitmask_table ();
6331 /* This target defaults to strict volatile bitfields. */
6332 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
6333 flag_strict_volatile_bitfields
= 1;
6335 /* If the user did not specify a processor, choose the default
6336 one for them. This will be the CPU set during configuration using
6337 --with-cpu, otherwise it is "generic". */
6340 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
6341 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
6344 gcc_assert (selected_cpu
);
6346 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6348 selected_tune
= &all_cores
[selected_cpu
->core
];
6350 aarch64_tune_flags
= selected_tune
->flags
;
6351 aarch64_tune
= selected_tune
->core
;
6352 aarch64_tune_params
= selected_tune
->tune
;
6354 aarch64_override_options_after_change ();
6357 /* Implement targetm.override_options_after_change. */
6360 aarch64_override_options_after_change (void)
6362 if (flag_omit_frame_pointer
)
6363 flag_omit_leaf_frame_pointer
= false;
6364 else if (flag_omit_leaf_frame_pointer
)
6365 flag_omit_frame_pointer
= true;
6368 static struct machine_function
*
6369 aarch64_init_machine_status (void)
6371 struct machine_function
*machine
;
6372 machine
= ggc_alloc_cleared_machine_function ();
6377 aarch64_init_expanders (void)
6379 init_machine_status
= aarch64_init_machine_status
;
6382 /* A checking mechanism for the implementation of the various code models. */
6384 initialize_aarch64_code_model (void)
6388 switch (aarch64_cmodel_var
)
6390 case AARCH64_CMODEL_TINY
:
6391 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
6393 case AARCH64_CMODEL_SMALL
:
6394 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
6396 case AARCH64_CMODEL_LARGE
:
6397 sorry ("code model %qs with -f%s", "large",
6398 flag_pic
> 1 ? "PIC" : "pic");
6404 aarch64_cmodel
= aarch64_cmodel_var
;
6407 /* Return true if SYMBOL_REF X binds locally. */
6410 aarch64_symbol_binds_local_p (const_rtx x
)
6412 return (SYMBOL_REF_DECL (x
)
6413 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
6414 : SYMBOL_REF_LOCAL_P (x
));
6417 /* Return true if SYMBOL_REF X is thread local */
6419 aarch64_tls_symbol_p (rtx x
)
6421 if (! TARGET_HAVE_TLS
)
6424 if (GET_CODE (x
) != SYMBOL_REF
)
6427 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6430 /* Classify a TLS symbol into one of the TLS kinds. */
6431 enum aarch64_symbol_type
6432 aarch64_classify_tls_symbol (rtx x
)
6434 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
6438 case TLS_MODEL_GLOBAL_DYNAMIC
:
6439 case TLS_MODEL_LOCAL_DYNAMIC
:
6440 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
6442 case TLS_MODEL_INITIAL_EXEC
:
6443 return SYMBOL_SMALL_GOTTPREL
;
6445 case TLS_MODEL_LOCAL_EXEC
:
6446 return SYMBOL_SMALL_TPREL
;
6448 case TLS_MODEL_EMULATED
:
6449 case TLS_MODEL_NONE
:
6450 return SYMBOL_FORCE_TO_MEM
;
6457 /* Return the method that should be used to access SYMBOL_REF or
6458 LABEL_REF X in context CONTEXT. */
6460 enum aarch64_symbol_type
6461 aarch64_classify_symbol (rtx x
,
6462 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
6464 if (GET_CODE (x
) == LABEL_REF
)
6466 switch (aarch64_cmodel
)
6468 case AARCH64_CMODEL_LARGE
:
6469 return SYMBOL_FORCE_TO_MEM
;
6471 case AARCH64_CMODEL_TINY_PIC
:
6472 case AARCH64_CMODEL_TINY
:
6473 return SYMBOL_TINY_ABSOLUTE
;
6475 case AARCH64_CMODEL_SMALL_PIC
:
6476 case AARCH64_CMODEL_SMALL
:
6477 return SYMBOL_SMALL_ABSOLUTE
;
6484 if (GET_CODE (x
) == SYMBOL_REF
)
6486 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6487 return SYMBOL_FORCE_TO_MEM
;
6489 if (aarch64_tls_symbol_p (x
))
6490 return aarch64_classify_tls_symbol (x
);
6492 switch (aarch64_cmodel
)
6494 case AARCH64_CMODEL_TINY
:
6495 if (SYMBOL_REF_WEAK (x
))
6496 return SYMBOL_FORCE_TO_MEM
;
6497 return SYMBOL_TINY_ABSOLUTE
;
6499 case AARCH64_CMODEL_SMALL
:
6500 if (SYMBOL_REF_WEAK (x
))
6501 return SYMBOL_FORCE_TO_MEM
;
6502 return SYMBOL_SMALL_ABSOLUTE
;
6504 case AARCH64_CMODEL_TINY_PIC
:
6505 if (!aarch64_symbol_binds_local_p (x
))
6506 return SYMBOL_TINY_GOT
;
6507 return SYMBOL_TINY_ABSOLUTE
;
6509 case AARCH64_CMODEL_SMALL_PIC
:
6510 if (!aarch64_symbol_binds_local_p (x
))
6511 return SYMBOL_SMALL_GOT
;
6512 return SYMBOL_SMALL_ABSOLUTE
;
6519 /* By default push everything into the constant pool. */
6520 return SYMBOL_FORCE_TO_MEM
;
6524 aarch64_constant_address_p (rtx x
)
6526 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
6530 aarch64_legitimate_pic_operand_p (rtx x
)
6532 if (GET_CODE (x
) == SYMBOL_REF
6533 || (GET_CODE (x
) == CONST
6534 && GET_CODE (XEXP (x
, 0)) == PLUS
6535 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6541 /* Return true if X holds either a quarter-precision or
6542 floating-point +0.0 constant. */
6544 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
6546 if (!CONST_DOUBLE_P (x
))
6549 /* TODO: We could handle moving 0.0 to a TFmode register,
6550 but first we would like to refactor the movtf_aarch64
6551 to be more amicable to split moves properly and
6552 correctly gate on TARGET_SIMD. For now - reject all
6553 constants which are not to SFmode or DFmode registers. */
6554 if (!(mode
== SFmode
|| mode
== DFmode
))
6557 if (aarch64_float_const_zero_rtx_p (x
))
6559 return aarch64_float_const_representable_p (x
);
6563 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6565 /* Do not allow vector struct mode constants. We could support
6566 0 and -1 easily, but they need support in aarch64-simd.md. */
6567 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
6570 /* This could probably go away because
6571 we now decompose CONST_INTs according to expand_mov_immediate. */
6572 if ((GET_CODE (x
) == CONST_VECTOR
6573 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
6574 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
6575 return !targetm
.cannot_force_const_mem (mode
, x
);
6577 if (GET_CODE (x
) == HIGH
6578 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6581 return aarch64_constant_address_p (x
);
6585 aarch64_load_tp (rtx target
)
6588 || GET_MODE (target
) != Pmode
6589 || !register_operand (target
, Pmode
))
6590 target
= gen_reg_rtx (Pmode
);
6592 /* Can return in any reg. */
6593 emit_insn (gen_aarch64_load_tp_hard (target
));
6597 /* On AAPCS systems, this is the "struct __va_list". */
6598 static GTY(()) tree va_list_type
;
6600 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6601 Return the type to use as __builtin_va_list.
6603 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6615 aarch64_build_builtin_va_list (void)
6618 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6620 /* Create the type. */
6621 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
6622 /* Give it the required name. */
6623 va_list_name
= build_decl (BUILTINS_LOCATION
,
6625 get_identifier ("__va_list"),
6627 DECL_ARTIFICIAL (va_list_name
) = 1;
6628 TYPE_NAME (va_list_type
) = va_list_name
;
6629 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
6631 /* Create the fields. */
6632 f_stack
= build_decl (BUILTINS_LOCATION
,
6633 FIELD_DECL
, get_identifier ("__stack"),
6635 f_grtop
= build_decl (BUILTINS_LOCATION
,
6636 FIELD_DECL
, get_identifier ("__gr_top"),
6638 f_vrtop
= build_decl (BUILTINS_LOCATION
,
6639 FIELD_DECL
, get_identifier ("__vr_top"),
6641 f_groff
= build_decl (BUILTINS_LOCATION
,
6642 FIELD_DECL
, get_identifier ("__gr_offs"),
6644 f_vroff
= build_decl (BUILTINS_LOCATION
,
6645 FIELD_DECL
, get_identifier ("__vr_offs"),
6648 DECL_ARTIFICIAL (f_stack
) = 1;
6649 DECL_ARTIFICIAL (f_grtop
) = 1;
6650 DECL_ARTIFICIAL (f_vrtop
) = 1;
6651 DECL_ARTIFICIAL (f_groff
) = 1;
6652 DECL_ARTIFICIAL (f_vroff
) = 1;
6654 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
6655 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
6656 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
6657 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
6658 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
6660 TYPE_FIELDS (va_list_type
) = f_stack
;
6661 DECL_CHAIN (f_stack
) = f_grtop
;
6662 DECL_CHAIN (f_grtop
) = f_vrtop
;
6663 DECL_CHAIN (f_vrtop
) = f_groff
;
6664 DECL_CHAIN (f_groff
) = f_vroff
;
6666 /* Compute its layout. */
6667 layout_type (va_list_type
);
6669 return va_list_type
;
6672 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6674 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
6676 const CUMULATIVE_ARGS
*cum
;
6677 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6678 tree stack
, grtop
, vrtop
, groff
, vroff
;
6680 int gr_save_area_size
;
6681 int vr_save_area_size
;
6684 cum
= &crtl
->args
.info
;
6686 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
6688 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
6690 if (TARGET_GENERAL_REGS_ONLY
)
6692 if (cum
->aapcs_nvrn
> 0)
6693 sorry ("%qs and floating point or vector arguments",
6694 "-mgeneral-regs-only");
6695 vr_save_area_size
= 0;
6698 f_stack
= TYPE_FIELDS (va_list_type_node
);
6699 f_grtop
= DECL_CHAIN (f_stack
);
6700 f_vrtop
= DECL_CHAIN (f_grtop
);
6701 f_groff
= DECL_CHAIN (f_vrtop
);
6702 f_vroff
= DECL_CHAIN (f_groff
);
6704 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
6706 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
6708 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
6710 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
6712 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
6715 /* Emit code to initialize STACK, which points to the next varargs stack
6716 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6717 by named arguments. STACK is 8-byte aligned. */
6718 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
6719 if (cum
->aapcs_stack_size
> 0)
6720 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
6721 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
6722 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6724 /* Emit code to initialize GRTOP, the top of the GR save area.
6725 virtual_incoming_args_rtx should have been 16 byte aligned. */
6726 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
6727 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
6728 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6730 /* Emit code to initialize VRTOP, the top of the VR save area.
6731 This address is gr_save_area_bytes below GRTOP, rounded
6732 down to the next 16-byte boundary. */
6733 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
6734 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
6735 STACK_BOUNDARY
/ BITS_PER_UNIT
);
6738 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
6739 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
6740 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6742 /* Emit code to initialize GROFF, the offset from GRTOP of the
6743 next GPR argument. */
6744 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
6745 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
6746 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6748 /* Likewise emit code to initialize VROFF, the offset from FTOP
6749 of the next VR argument. */
6750 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
6751 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
6752 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6755 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6758 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6759 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
6763 bool is_ha
; /* is HFA or HVA. */
6764 bool dw_align
; /* double-word align. */
6765 enum machine_mode ag_mode
= VOIDmode
;
6767 enum machine_mode mode
;
6769 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6770 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
6771 HOST_WIDE_INT size
, rsize
, adjust
, align
;
6772 tree t
, u
, cond1
, cond2
;
6774 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
6776 type
= build_pointer_type (type
);
6778 mode
= TYPE_MODE (type
);
6780 f_stack
= TYPE_FIELDS (va_list_type_node
);
6781 f_grtop
= DECL_CHAIN (f_stack
);
6782 f_vrtop
= DECL_CHAIN (f_grtop
);
6783 f_groff
= DECL_CHAIN (f_vrtop
);
6784 f_vroff
= DECL_CHAIN (f_groff
);
6786 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
6787 f_stack
, NULL_TREE
);
6788 size
= int_size_in_bytes (type
);
6789 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
6793 if (aarch64_vfp_is_call_or_return_candidate (mode
,
6799 /* TYPE passed in fp/simd registers. */
6800 if (TARGET_GENERAL_REGS_ONLY
)
6801 sorry ("%qs and floating point or vector arguments",
6802 "-mgeneral-regs-only");
6804 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
6805 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
6806 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
6807 unshare_expr (valist
), f_vroff
, NULL_TREE
);
6809 rsize
= nregs
* UNITS_PER_VREG
;
6813 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
6814 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
6816 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6817 && size
< UNITS_PER_VREG
)
6819 adjust
= UNITS_PER_VREG
- size
;
6824 /* TYPE passed in general registers. */
6825 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
6826 unshare_expr (valist
), f_grtop
, NULL_TREE
);
6827 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
6828 unshare_expr (valist
), f_groff
, NULL_TREE
);
6829 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
6830 nregs
= rsize
/ UNITS_PER_WORD
;
6835 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6836 && size
< UNITS_PER_WORD
)
6838 adjust
= UNITS_PER_WORD
- size
;
6842 /* Get a local temporary for the field value. */
6843 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
6845 /* Emit code to branch if off >= 0. */
6846 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
6847 build_int_cst (TREE_TYPE (off
), 0));
6848 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
6852 /* Emit: offs = (offs + 15) & -16. */
6853 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6854 build_int_cst (TREE_TYPE (off
), 15));
6855 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
6856 build_int_cst (TREE_TYPE (off
), -16));
6857 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
6862 /* Update ap.__[g|v]r_offs */
6863 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6864 build_int_cst (TREE_TYPE (off
), rsize
));
6865 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
6869 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6871 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6872 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
6873 build_int_cst (TREE_TYPE (f_off
), 0));
6874 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
6876 /* String up: make sure the assignment happens before the use. */
6877 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
6878 COND_EXPR_ELSE (cond1
) = t
;
6880 /* Prepare the trees handling the argument that is passed on the stack;
6881 the top level node will store in ON_STACK. */
6882 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
6885 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6886 t
= fold_convert (intDI_type_node
, arg
);
6887 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6888 build_int_cst (TREE_TYPE (t
), 15));
6889 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6890 build_int_cst (TREE_TYPE (t
), -16));
6891 t
= fold_convert (TREE_TYPE (arg
), t
);
6892 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
6896 /* Advance ap.__stack */
6897 t
= fold_convert (intDI_type_node
, arg
);
6898 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6899 build_int_cst (TREE_TYPE (t
), size
+ 7));
6900 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6901 build_int_cst (TREE_TYPE (t
), -8));
6902 t
= fold_convert (TREE_TYPE (arg
), t
);
6903 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
6904 /* String up roundup and advance. */
6906 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6907 /* String up with arg */
6908 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
6909 /* Big-endianness related address adjustment. */
6910 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6911 && size
< UNITS_PER_WORD
)
6913 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
6914 size_int (UNITS_PER_WORD
- size
));
6915 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
6918 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
6919 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
6921 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6924 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
6925 build_int_cst (TREE_TYPE (off
), adjust
));
6927 t
= fold_convert (sizetype
, t
);
6928 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
6932 /* type ha; // treat as "struct {ftype field[n];}"
6933 ... [computing offs]
6934 for (i = 0; i <nregs; ++i, offs += 16)
6935 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6938 tree tmp_ha
, field_t
, field_ptr_t
;
6940 /* Declare a local variable. */
6941 tmp_ha
= create_tmp_var_raw (type
, "ha");
6942 gimple_add_tmp_var (tmp_ha
);
6944 /* Establish the base type. */
6948 field_t
= float_type_node
;
6949 field_ptr_t
= float_ptr_type_node
;
6952 field_t
= double_type_node
;
6953 field_ptr_t
= double_ptr_type_node
;
6956 field_t
= long_double_type_node
;
6957 field_ptr_t
= long_double_ptr_type_node
;
6959 /* The half precision and quad precision are not fully supported yet. Enable
6960 the following code after the support is complete. Need to find the correct
6961 type node for __fp16 *. */
6964 field_t
= float_type_node
;
6965 field_ptr_t
= float_ptr_type_node
;
6971 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
6972 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
6973 field_ptr_t
= build_pointer_type (field_t
);
6980 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6981 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
6983 t
= fold_convert (field_ptr_t
, addr
);
6984 t
= build2 (MODIFY_EXPR
, field_t
,
6985 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
6986 build1 (INDIRECT_REF
, field_t
, t
));
6988 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6989 for (i
= 1; i
< nregs
; ++i
)
6991 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
6992 u
= fold_convert (field_ptr_t
, addr
);
6993 u
= build2 (MODIFY_EXPR
, field_t
,
6994 build2 (MEM_REF
, field_t
, tmp_ha
,
6995 build_int_cst (field_ptr_t
,
6997 int_size_in_bytes (field_t
)))),
6998 build1 (INDIRECT_REF
, field_t
, u
));
6999 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
7002 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
7003 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
7006 COND_EXPR_ELSE (cond2
) = t
;
7007 addr
= fold_convert (build_pointer_type (type
), cond1
);
7008 addr
= build_va_arg_indirect_ref (addr
);
7011 addr
= build_va_arg_indirect_ref (addr
);
7016 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7019 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7020 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7023 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7024 CUMULATIVE_ARGS local_cum
;
7025 int gr_saved
, vr_saved
;
7027 /* The caller has advanced CUM up to, but not beyond, the last named
7028 argument. Advance a local copy of CUM past the last "real" named
7029 argument, to find out how many registers are left over. */
7031 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
7033 /* Found out how many registers we need to save. */
7034 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
7035 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
7037 if (TARGET_GENERAL_REGS_ONLY
)
7039 if (local_cum
.aapcs_nvrn
> 0)
7040 sorry ("%qs and floating point or vector arguments",
7041 "-mgeneral-regs-only");
7051 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7052 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
7053 - gr_saved
* UNITS_PER_WORD
);
7054 mem
= gen_frame_mem (BLKmode
, ptr
);
7055 set_mem_alias_set (mem
, get_varargs_alias_set ());
7057 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
7062 /* We can't use move_block_from_reg, because it will use
7063 the wrong mode, storing D regs only. */
7064 enum machine_mode mode
= TImode
;
7067 /* Set OFF to the offset from virtual_incoming_args_rtx of
7068 the first vector register. The VR save area lies below
7069 the GR one, and is aligned to 16 bytes. */
7070 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7071 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7072 off
-= vr_saved
* UNITS_PER_VREG
;
7074 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
7078 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
7079 mem
= gen_frame_mem (mode
, ptr
);
7080 set_mem_alias_set (mem
, get_varargs_alias_set ());
7081 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
7082 off
+= UNITS_PER_VREG
;
7087 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7088 any complication of having crtl->args.pretend_args_size changed. */
7089 cfun
->machine
->frame
.saved_varargs_size
7090 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7091 STACK_BOUNDARY
/ BITS_PER_UNIT
)
7092 + vr_saved
* UNITS_PER_VREG
);
7096 aarch64_conditional_register_usage (void)
7101 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
7104 call_used_regs
[i
] = 1;
7109 /* Walk down the type tree of TYPE counting consecutive base elements.
7110 If *MODEP is VOIDmode, then set it to the first valid floating point
7111 type. If a non-floating point type is found, or if a floating point
7112 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7113 otherwise return the count in the sub-tree. */
7115 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
7117 enum machine_mode mode
;
7120 switch (TREE_CODE (type
))
7123 mode
= TYPE_MODE (type
);
7124 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7127 if (*modep
== VOIDmode
)
7136 mode
= TYPE_MODE (TREE_TYPE (type
));
7137 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7140 if (*modep
== VOIDmode
)
7149 /* Use V2SImode and V4SImode as representatives of all 64-bit
7150 and 128-bit vector types. */
7151 size
= int_size_in_bytes (type
);
7164 if (*modep
== VOIDmode
)
7167 /* Vector modes are considered to be opaque: two vectors are
7168 equivalent for the purposes of being homogeneous aggregates
7169 if they are the same size. */
7178 tree index
= TYPE_DOMAIN (type
);
7180 /* Can't handle incomplete types. */
7181 if (!COMPLETE_TYPE_P (type
))
7184 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
7187 || !TYPE_MAX_VALUE (index
)
7188 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
7189 || !TYPE_MIN_VALUE (index
)
7190 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
7194 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
7195 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
7197 /* There must be no padding. */
7198 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
7199 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
7200 != count
* GET_MODE_BITSIZE (*modep
)))
7212 /* Can't handle incomplete types. */
7213 if (!COMPLETE_TYPE_P (type
))
7216 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7218 if (TREE_CODE (field
) != FIELD_DECL
)
7221 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7227 /* There must be no padding. */
7228 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
7229 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
7230 != count
* GET_MODE_BITSIZE (*modep
)))
7237 case QUAL_UNION_TYPE
:
7239 /* These aren't very interesting except in a degenerate case. */
7244 /* Can't handle incomplete types. */
7245 if (!COMPLETE_TYPE_P (type
))
7248 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7250 if (TREE_CODE (field
) != FIELD_DECL
)
7253 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7256 count
= count
> sub_count
? count
: sub_count
;
7259 /* There must be no padding. */
7260 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
7261 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
7262 != count
* GET_MODE_BITSIZE (*modep
)))
7275 /* Return true if we use LRA instead of reload pass. */
7277 aarch64_lra_p (void)
7279 return aarch64_lra_flag
;
7282 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7283 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7284 array types. The C99 floating-point complex types are also considered
7285 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7286 types, which are GCC extensions and out of the scope of AAPCS64, are
7287 treated as composite types here as well.
7289 Note that MODE itself is not sufficient in determining whether a type
7290 is such a composite type or not. This is because
7291 stor-layout.c:compute_record_mode may have already changed the MODE
7292 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7293 structure with only one field may have its MODE set to the mode of the
7294 field. Also an integer mode whose size matches the size of the
7295 RECORD_TYPE type may be used to substitute the original mode
7296 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7297 solely relied on. */
7300 aarch64_composite_type_p (const_tree type
,
7301 enum machine_mode mode
)
7303 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
7307 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
7308 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
7314 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7315 type as described in AAPCS64 \S 4.1.2.
7317 See the comment above aarch64_composite_type_p for the notes on MODE. */
7320 aarch64_short_vector_p (const_tree type
,
7321 enum machine_mode mode
)
7323 HOST_WIDE_INT size
= -1;
7325 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7326 size
= int_size_in_bytes (type
);
7327 else if (!aarch64_composite_type_p (type
, mode
)
7328 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
7329 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
7330 size
= GET_MODE_SIZE (mode
);
7332 return (size
== 8 || size
== 16) ? true : false;
7335 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7336 shall be passed or returned in simd/fp register(s) (providing these
7337 parameter passing registers are available).
7339 Upon successful return, *COUNT returns the number of needed registers,
7340 *BASE_MODE returns the mode of the individual register and when IS_HAF
7341 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7342 floating-point aggregate or a homogeneous short-vector aggregate. */
7345 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
7347 enum machine_mode
*base_mode
,
7351 enum machine_mode new_mode
= VOIDmode
;
7352 bool composite_p
= aarch64_composite_type_p (type
, mode
);
7354 if (is_ha
!= NULL
) *is_ha
= false;
7356 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7357 || aarch64_short_vector_p (type
, mode
))
7362 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
7364 if (is_ha
!= NULL
) *is_ha
= true;
7366 new_mode
= GET_MODE_INNER (mode
);
7368 else if (type
&& composite_p
)
7370 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
7372 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
7374 if (is_ha
!= NULL
) *is_ha
= true;
7383 *base_mode
= new_mode
;
7387 /* Implement TARGET_STRUCT_VALUE_RTX. */
7390 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
7391 int incoming ATTRIBUTE_UNUSED
)
7393 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
7396 /* Implements target hook vector_mode_supported_p. */
7398 aarch64_vector_mode_supported_p (enum machine_mode mode
)
7401 && (mode
== V4SImode
|| mode
== V8HImode
7402 || mode
== V16QImode
|| mode
== V2DImode
7403 || mode
== V2SImode
|| mode
== V4HImode
7404 || mode
== V8QImode
|| mode
== V2SFmode
7405 || mode
== V4SFmode
|| mode
== V2DFmode
7406 || mode
== V1DFmode
))
7412 /* Return appropriate SIMD container
7413 for MODE within a vector of WIDTH bits. */
7414 static enum machine_mode
7415 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
7417 gcc_assert (width
== 64 || width
== 128);
7456 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7457 static enum machine_mode
7458 aarch64_preferred_simd_mode (enum machine_mode mode
)
7460 return aarch64_simd_container_mode (mode
, 128);
7463 /* Return the bitmask of possible vector sizes for the vectorizer
7466 aarch64_autovectorize_vector_sizes (void)
7471 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7472 vector types in order to conform to the AAPCS64 (see "Procedure
7473 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7474 qualify for emission with the mangled names defined in that document,
7475 a vector type must not only be of the correct mode but also be
7476 composed of AdvSIMD vector element types (e.g.
7477 _builtin_aarch64_simd_qi); these types are registered by
7478 aarch64_init_simd_builtins (). In other words, vector types defined
7479 in other ways e.g. via vector_size attribute will get default
7483 enum machine_mode mode
;
7484 const char *element_type_name
;
7485 const char *mangled_name
;
7486 } aarch64_simd_mangle_map_entry
;
7488 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
7489 /* 64-bit containerized types. */
7490 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7491 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7492 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7493 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7494 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7495 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7496 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7497 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7498 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7499 /* 128-bit containerized types. */
7500 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7501 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7502 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7503 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7504 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7505 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7506 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7507 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7508 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7509 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7510 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7511 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7512 { V2DImode
, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7513 { VOIDmode
, NULL
, NULL
}
7516 /* Implement TARGET_MANGLE_TYPE. */
7519 aarch64_mangle_type (const_tree type
)
7521 /* The AArch64 ABI documents say that "__va_list" has to be
7522 managled as if it is in the "std" namespace. */
7523 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
7524 return "St9__va_list";
7526 /* Check the mode of the vector type, and the name of the vector
7527 element type, against the table. */
7528 if (TREE_CODE (type
) == VECTOR_TYPE
)
7530 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
7532 while (pos
->mode
!= VOIDmode
)
7534 tree elt_type
= TREE_TYPE (type
);
7536 if (pos
->mode
== TYPE_MODE (type
)
7537 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
7538 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
7539 pos
->element_type_name
))
7540 return pos
->mangled_name
;
7546 /* Use the default mangling. */
7550 /* Return the equivalent letter for size. */
7552 sizetochar (int size
)
7556 case 64: return 'd';
7557 case 32: return 's';
7558 case 16: return 'h';
7559 case 8 : return 'b';
7560 default: gcc_unreachable ();
7564 /* Return true iff x is a uniform vector of floating-point
7565 constants, and the constant can be represented in
7566 quarter-precision form. Note, as aarch64_float_const_representable
7567 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7569 aarch64_vect_float_const_representable_p (rtx x
)
7572 REAL_VALUE_TYPE r0
, ri
;
7575 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
7578 x0
= CONST_VECTOR_ELT (x
, 0);
7579 if (!CONST_DOUBLE_P (x0
))
7582 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
7584 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
7586 xi
= CONST_VECTOR_ELT (x
, i
);
7587 if (!CONST_DOUBLE_P (xi
))
7590 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
7591 if (!REAL_VALUES_EQUAL (r0
, ri
))
7595 return aarch64_float_const_representable_p (x0
);
7598 /* Return true for valid and false for invalid. */
7600 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
7601 struct simd_immediate_info
*info
)
7603 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7605 for (i = 0; i < idx; i += (STRIDE)) \
7610 immtype = (CLASS); \
7611 elsize = (ELSIZE); \
7617 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
7618 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
7619 unsigned char bytes
[16];
7620 int immtype
= -1, matches
;
7621 unsigned int invmask
= inverse
? 0xff : 0;
7624 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
7626 if (! (aarch64_simd_imm_zero_p (op
, mode
)
7627 || aarch64_vect_float_const_representable_p (op
)))
7632 info
->value
= CONST_VECTOR_ELT (op
, 0);
7633 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
7641 /* Splat vector constant out into a byte vector. */
7642 for (i
= 0; i
< n_elts
; i
++)
7644 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7645 it must be laid out in the vector register in reverse order. */
7646 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
7647 unsigned HOST_WIDE_INT elpart
;
7648 unsigned int part
, parts
;
7650 if (GET_CODE (el
) == CONST_INT
)
7652 elpart
= INTVAL (el
);
7655 else if (GET_CODE (el
) == CONST_DOUBLE
)
7657 elpart
= CONST_DOUBLE_LOW (el
);
7663 for (part
= 0; part
< parts
; part
++)
7666 for (byte
= 0; byte
< innersize
; byte
++)
7668 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
7669 elpart
>>= BITS_PER_UNIT
;
7671 if (GET_CODE (el
) == CONST_DOUBLE
)
7672 elpart
= CONST_DOUBLE_HIGH (el
);
7677 gcc_assert (idx
== GET_MODE_SIZE (mode
));
7681 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
7682 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
7684 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7685 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7687 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7688 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7690 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7691 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
7693 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
7695 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
7697 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
7698 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
7700 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7701 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7703 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7704 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7706 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7707 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
7709 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
7711 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
7713 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7714 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7716 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7717 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7719 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7720 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7722 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7723 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7725 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
7727 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
7728 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
7737 info
->element_width
= elsize
;
7738 info
->mvn
= emvn
!= 0;
7739 info
->shift
= eshift
;
7741 unsigned HOST_WIDE_INT imm
= 0;
7743 if (immtype
>= 12 && immtype
<= 15)
7746 /* Un-invert bytes of recognized vector, if necessary. */
7748 for (i
= 0; i
< idx
; i
++)
7749 bytes
[i
] ^= invmask
;
7753 /* FIXME: Broken on 32-bit H_W_I hosts. */
7754 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
7756 for (i
= 0; i
< 8; i
++)
7757 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
7758 << (i
* BITS_PER_UNIT
);
7761 info
->value
= GEN_INT (imm
);
7765 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
7766 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
7768 /* Construct 'abcdefgh' because the assembler cannot handle
7769 generic constants. */
7772 imm
= (imm
>> info
->shift
) & 0xff;
7773 info
->value
= GEN_INT (imm
);
7782 aarch64_const_vec_all_same_int_p (rtx x
,
7783 HOST_WIDE_INT minval
,
7784 HOST_WIDE_INT maxval
)
7786 HOST_WIDE_INT firstval
;
7789 if (GET_CODE (x
) != CONST_VECTOR
7790 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
7793 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
7794 if (firstval
< minval
|| firstval
> maxval
)
7797 count
= CONST_VECTOR_NUNITS (x
);
7798 for (i
= 1; i
< count
; i
++)
7799 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
7805 /* Check of immediate shift constants are within range. */
7807 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
7809 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
7811 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
7813 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
7816 /* Return true if X is a uniform vector where all elements
7817 are either the floating-point constant 0.0 or the
7818 integer constant 0. */
7820 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
7822 return x
== CONST0_RTX (mode
);
7826 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7828 HOST_WIDE_INT imm
= INTVAL (x
);
7831 for (i
= 0; i
< 8; i
++)
7833 unsigned int byte
= imm
& 0xff;
7834 if (byte
!= 0xff && byte
!= 0)
7843 aarch64_mov_operand_p (rtx x
,
7844 enum aarch64_symbol_context context
,
7845 enum machine_mode mode
)
7847 if (GET_CODE (x
) == HIGH
7848 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7851 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
7854 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
7857 return aarch64_classify_symbolic_expression (x
, context
)
7858 == SYMBOL_TINY_ABSOLUTE
;
7861 /* Return a const_int vector of VAL. */
7863 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
7865 int nunits
= GET_MODE_NUNITS (mode
);
7866 rtvec v
= rtvec_alloc (nunits
);
7869 for (i
=0; i
< nunits
; i
++)
7870 RTVEC_ELT (v
, i
) = GEN_INT (val
);
7872 return gen_rtx_CONST_VECTOR (mode
, v
);
7875 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7878 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
7880 enum machine_mode vmode
;
7882 gcc_assert (!VECTOR_MODE_P (mode
));
7883 vmode
= aarch64_preferred_simd_mode (mode
);
7884 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
7885 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
7888 /* Construct and return a PARALLEL RTX vector. */
7890 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
7892 int nunits
= GET_MODE_NUNITS (mode
);
7893 rtvec v
= rtvec_alloc (nunits
/ 2);
7894 int base
= high
? nunits
/ 2 : 0;
7898 for (i
=0; i
< nunits
/ 2; i
++)
7899 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
7901 t1
= gen_rtx_PARALLEL (mode
, v
);
7905 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7906 HIGH (exclusive). */
7908 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
7911 gcc_assert (GET_CODE (operand
) == CONST_INT
);
7912 lane
= INTVAL (operand
);
7914 if (lane
< low
|| lane
>= high
)
7915 error ("lane out of range");
7919 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
7921 gcc_assert (GET_CODE (operand
) == CONST_INT
);
7922 HOST_WIDE_INT lane
= INTVAL (operand
);
7924 if (lane
< low
|| lane
>= high
)
7925 error ("constant out of range");
7928 /* Emit code to reinterpret one AdvSIMD type as another,
7929 without altering bits. */
7931 aarch64_simd_reinterpret (rtx dest
, rtx src
)
7933 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
7936 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7939 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
7940 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
7943 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
7944 rtx tmp1
= gen_reg_rtx (mode
);
7945 rtx tmp2
= gen_reg_rtx (mode
);
7947 emit_insn (intfn (tmp1
, op1
, tmp2
));
7949 emit_move_insn (mem
, tmp1
);
7950 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
7951 emit_move_insn (mem
, tmp2
);
7954 /* Return TRUE if OP is a valid vector addressing mode. */
7956 aarch64_simd_mem_operand_p (rtx op
)
7958 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
7959 || GET_CODE (XEXP (op
, 0)) == REG
);
7962 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7963 not to early-clobber SRC registers in the process.
7965 We assume that the operands described by SRC and DEST represent a
7966 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7967 number of components into which the copy has been decomposed. */
7969 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
7970 rtx
*src
, unsigned int count
)
7974 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
7975 || REGNO (operands
[0]) < REGNO (operands
[1]))
7977 for (i
= 0; i
< count
; i
++)
7979 operands
[2 * i
] = dest
[i
];
7980 operands
[2 * i
+ 1] = src
[i
];
7985 for (i
= 0; i
< count
; i
++)
7987 operands
[2 * i
] = dest
[count
- i
- 1];
7988 operands
[2 * i
+ 1] = src
[count
- i
- 1];
7993 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7994 one of VSTRUCT modes: OI, CI or XI. */
7996 aarch64_simd_attr_length_move (rtx insn
)
7998 enum machine_mode mode
;
8000 extract_insn_cached (insn
);
8002 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
8004 mode
= GET_MODE (recog_data
.operand
[0]);
8020 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8021 alignment of a vector to 128 bits. */
8022 static HOST_WIDE_INT
8023 aarch64_simd_vector_alignment (const_tree type
)
8025 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
8026 return MIN (align
, 128);
8029 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8031 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
8036 /* We guarantee alignment for vectors up to 128-bits. */
8037 if (tree_int_cst_compare (TYPE_SIZE (type
),
8038 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
8041 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8045 /* If VALS is a vector constant that can be loaded into a register
8046 using DUP, generate instructions to do so and return an RTX to
8047 assign to the register. Otherwise return NULL_RTX. */
8049 aarch64_simd_dup_constant (rtx vals
)
8051 enum machine_mode mode
= GET_MODE (vals
);
8052 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8053 int n_elts
= GET_MODE_NUNITS (mode
);
8054 bool all_same
= true;
8058 if (GET_CODE (vals
) != CONST_VECTOR
)
8061 for (i
= 1; i
< n_elts
; ++i
)
8063 x
= CONST_VECTOR_ELT (vals
, i
);
8064 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
8071 /* We can load this constant by using DUP and a constant in a
8072 single ARM register. This will be cheaper than a vector
8074 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
8075 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8079 /* Generate code to load VALS, which is a PARALLEL containing only
8080 constants (for vec_init) or CONST_VECTOR, efficiently into a
8081 register. Returns an RTX to copy into the register, or NULL_RTX
8082 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8084 aarch64_simd_make_constant (rtx vals
)
8086 enum machine_mode mode
= GET_MODE (vals
);
8088 rtx const_vec
= NULL_RTX
;
8089 int n_elts
= GET_MODE_NUNITS (mode
);
8093 if (GET_CODE (vals
) == CONST_VECTOR
)
8095 else if (GET_CODE (vals
) == PARALLEL
)
8097 /* A CONST_VECTOR must contain only CONST_INTs and
8098 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8099 Only store valid constants in a CONST_VECTOR. */
8100 for (i
= 0; i
< n_elts
; ++i
)
8102 rtx x
= XVECEXP (vals
, 0, i
);
8103 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
8106 if (n_const
== n_elts
)
8107 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8112 if (const_vec
!= NULL_RTX
8113 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
8114 /* Load using MOVI/MVNI. */
8116 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
8117 /* Loaded using DUP. */
8119 else if (const_vec
!= NULL_RTX
)
8120 /* Load from constant pool. We can not take advantage of single-cycle
8121 LD1 because we need a PC-relative addressing mode. */
8124 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8125 We can not construct an initializer. */
8130 aarch64_expand_vector_init (rtx target
, rtx vals
)
8132 enum machine_mode mode
= GET_MODE (target
);
8133 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8134 int n_elts
= GET_MODE_NUNITS (mode
);
8135 int n_var
= 0, one_var
= -1;
8136 bool all_same
= true;
8140 x
= XVECEXP (vals
, 0, 0);
8141 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8142 n_var
= 1, one_var
= 0;
8144 for (i
= 1; i
< n_elts
; ++i
)
8146 x
= XVECEXP (vals
, 0, i
);
8147 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8148 ++n_var
, one_var
= i
;
8150 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8156 rtx constant
= aarch64_simd_make_constant (vals
);
8157 if (constant
!= NULL_RTX
)
8159 emit_move_insn (target
, constant
);
8164 /* Splat a single non-constant element if we can. */
8167 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8168 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
8172 /* One field is non-constant. Load constant then overwrite varying
8173 field. This is more efficient than using the stack. */
8176 rtx copy
= copy_rtx (vals
);
8177 rtx index
= GEN_INT (one_var
);
8178 enum insn_code icode
;
8180 /* Load constant part of vector, substitute neighboring value for
8182 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
8183 aarch64_expand_vector_init (target
, copy
);
8185 /* Insert variable. */
8186 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8187 icode
= optab_handler (vec_set_optab
, mode
);
8188 gcc_assert (icode
!= CODE_FOR_nothing
);
8189 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
8193 /* Construct the vector in memory one field at a time
8194 and load the whole vector. */
8195 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
8196 for (i
= 0; i
< n_elts
; i
++)
8197 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8198 i
* GET_MODE_SIZE (inner_mode
)),
8199 XVECEXP (vals
, 0, i
));
8200 emit_move_insn (target
, mem
);
8204 static unsigned HOST_WIDE_INT
8205 aarch64_shift_truncation_mask (enum machine_mode mode
)
8208 (aarch64_vector_mode_supported_p (mode
)
8209 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
8212 #ifndef TLS_SECTION_ASM_FLAG
8213 #define TLS_SECTION_ASM_FLAG 'T'
8217 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
8218 tree decl ATTRIBUTE_UNUSED
)
8220 char flagchars
[10], *f
= flagchars
;
8222 /* If we have already declared this section, we can use an
8223 abbreviated form to switch back to it -- unless this section is
8224 part of a COMDAT groups, in which case GAS requires the full
8225 declaration every time. */
8226 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8227 && (flags
& SECTION_DECLARED
))
8229 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
8233 if (!(flags
& SECTION_DEBUG
))
8235 if (flags
& SECTION_WRITE
)
8237 if (flags
& SECTION_CODE
)
8239 if (flags
& SECTION_SMALL
)
8241 if (flags
& SECTION_MERGE
)
8243 if (flags
& SECTION_STRINGS
)
8245 if (flags
& SECTION_TLS
)
8246 *f
++ = TLS_SECTION_ASM_FLAG
;
8247 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8251 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
8253 if (!(flags
& SECTION_NOTYPE
))
8258 if (flags
& SECTION_BSS
)
8263 #ifdef TYPE_OPERAND_FMT
8264 format
= "," TYPE_OPERAND_FMT
;
8269 fprintf (asm_out_file
, format
, type
);
8271 if (flags
& SECTION_ENTSIZE
)
8272 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
8273 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8275 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
8276 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
8278 fprintf (asm_out_file
, ",%s,comdat",
8279 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
8283 putc ('\n', asm_out_file
);
8286 /* Select a format to encode pointers in exception handling data. */
8288 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
8291 switch (aarch64_cmodel
)
8293 case AARCH64_CMODEL_TINY
:
8294 case AARCH64_CMODEL_TINY_PIC
:
8295 case AARCH64_CMODEL_SMALL
:
8296 case AARCH64_CMODEL_SMALL_PIC
:
8297 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8299 type
= DW_EH_PE_sdata4
;
8302 /* No assumptions here. 8-byte relocs required. */
8303 type
= DW_EH_PE_sdata8
;
8306 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
8309 /* Emit load exclusive. */
8312 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
8313 rtx mem
, rtx model_rtx
)
8315 rtx (*gen
) (rtx
, rtx
, rtx
);
8319 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
8320 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
8321 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
8322 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
8327 emit_insn (gen (rval
, mem
, model_rtx
));
8330 /* Emit store exclusive. */
8333 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
8334 rtx rval
, rtx mem
, rtx model_rtx
)
8336 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
8340 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
8341 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
8342 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
8343 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
8348 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
8351 /* Mark the previous jump instruction as unlikely. */
8354 aarch64_emit_unlikely_jump (rtx insn
)
8356 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
8358 insn
= emit_jump_insn (insn
);
8359 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
8362 /* Expand a compare and swap pattern. */
8365 aarch64_expand_compare_and_swap (rtx operands
[])
8367 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
8368 enum machine_mode mode
, cmp_mode
;
8369 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
8374 oldval
= operands
[3];
8375 newval
= operands
[4];
8376 is_weak
= operands
[5];
8377 mod_s
= operands
[6];
8378 mod_f
= operands
[7];
8379 mode
= GET_MODE (mem
);
8382 /* Normally the succ memory model must be stronger than fail, but in the
8383 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8384 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8386 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
8387 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
8388 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
8394 /* For short modes, we're going to perform the comparison in SImode,
8395 so do the zero-extension now. */
8397 rval
= gen_reg_rtx (SImode
);
8398 oldval
= convert_modes (SImode
, mode
, oldval
, true);
8403 /* Force the value into a register if needed. */
8404 if (!aarch64_plus_operand (oldval
, mode
))
8405 oldval
= force_reg (cmp_mode
, oldval
);
8414 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
8415 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
8416 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
8417 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
8422 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
8424 if (mode
== QImode
|| mode
== HImode
)
8425 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
8427 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
8428 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
8429 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
8432 /* Split a compare and swap pattern. */
8435 aarch64_split_compare_and_swap (rtx operands
[])
8437 rtx rval
, mem
, oldval
, newval
, scratch
;
8438 enum machine_mode mode
;
8440 rtx label1
, label2
, x
, cond
;
8444 oldval
= operands
[2];
8445 newval
= operands
[3];
8446 is_weak
= (operands
[4] != const0_rtx
);
8447 scratch
= operands
[7];
8448 mode
= GET_MODE (mem
);
8453 label1
= gen_label_rtx ();
8454 emit_label (label1
);
8456 label2
= gen_label_rtx ();
8458 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
8460 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
8461 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
8462 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8463 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
8464 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8466 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
8470 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
8471 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8472 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
8473 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8477 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
8478 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
8479 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
8482 emit_label (label2
);
8485 /* Split an atomic operation. */
8488 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
8489 rtx value
, rtx model_rtx
, rtx cond
)
8491 enum machine_mode mode
= GET_MODE (mem
);
8492 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
8495 label
= gen_label_rtx ();
8499 new_out
= gen_lowpart (wmode
, new_out
);
8501 old_out
= gen_lowpart (wmode
, old_out
);
8504 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
8506 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
8515 x
= gen_rtx_AND (wmode
, old_out
, value
);
8516 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8517 x
= gen_rtx_NOT (wmode
, new_out
);
8518 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8522 if (CONST_INT_P (value
))
8524 value
= GEN_INT (-INTVAL (value
));
8530 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
8531 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8535 aarch64_emit_store_exclusive (mode
, cond
, mem
,
8536 gen_lowpart (mode
, new_out
), model_rtx
);
8538 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
8539 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8540 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
8541 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8545 aarch64_print_extension (void)
8547 const struct aarch64_option_extension
*opt
= NULL
;
8549 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
8550 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
8551 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
8553 asm_fprintf (asm_out_file
, "\n");
8557 aarch64_start_file (void)
8561 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
8562 aarch64_print_extension ();
8564 else if (selected_cpu
)
8566 const char *truncated_name
8567 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
8568 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
8569 aarch64_print_extension ();
8571 default_file_start();
8574 /* Target hook for c_mode_for_suffix. */
8575 static enum machine_mode
8576 aarch64_c_mode_for_suffix (char suffix
)
8584 /* We can only represent floating point constants which will fit in
8585 "quarter-precision" values. These values are characterised by
8586 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8589 (-1)^s * (n/16) * 2^r
8592 's' is the sign bit.
8593 'n' is an integer in the range 16 <= n <= 31.
8594 'r' is an integer in the range -3 <= r <= 4. */
8596 /* Return true iff X can be represented by a quarter-precision
8597 floating point immediate operand X. Note, we cannot represent 0.0. */
8599 aarch64_float_const_representable_p (rtx x
)
8601 /* This represents our current view of how many bits
8602 make up the mantissa. */
8603 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8605 unsigned HOST_WIDE_INT mantissa
, mask
;
8606 HOST_WIDE_INT m1
, m2
;
8607 REAL_VALUE_TYPE r
, m
;
8609 if (!CONST_DOUBLE_P (x
))
8612 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8614 /* We cannot represent infinities, NaNs or +/-zero. We won't
8615 know if we have +zero until we analyse the mantissa, but we
8616 can reject the other invalid values. */
8617 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
8618 || REAL_VALUE_MINUS_ZERO (r
))
8621 /* Extract exponent. */
8622 r
= real_value_abs (&r
);
8623 exponent
= REAL_EXP (&r
);
8625 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8626 highest (sign) bit, with a fixed binary point at bit point_pos.
8627 m1 holds the low part of the mantissa, m2 the high part.
8628 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8629 bits for the mantissa, this can fail (low bits will be lost). */
8630 real_ldexp (&m
, &r
, point_pos
- exponent
);
8631 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8633 /* If the low part of the mantissa has bits set we cannot represent
8637 /* We have rejected the lower HOST_WIDE_INT, so update our
8638 understanding of how many bits lie in the mantissa and
8639 look only at the high HOST_WIDE_INT. */
8641 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8643 /* We can only represent values with a mantissa of the form 1.xxxx. */
8644 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8645 if ((mantissa
& mask
) != 0)
8648 /* Having filtered unrepresentable values, we may now remove all
8649 but the highest 5 bits. */
8650 mantissa
>>= point_pos
- 5;
8652 /* We cannot represent the value 0.0, so reject it. This is handled
8657 /* Then, as bit 4 is always set, we can mask it off, leaving
8658 the mantissa in the range [0, 15]. */
8659 mantissa
&= ~(1 << 4);
8660 gcc_assert (mantissa
<= 15);
8662 /* GCC internally does not use IEEE754-like encoding (where normalized
8663 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8664 Our mantissa values are shifted 4 places to the left relative to
8665 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8666 by 5 places to correct for GCC's representation. */
8667 exponent
= 5 - exponent
;
8669 return (exponent
>= 0 && exponent
<= 7);
8673 aarch64_output_simd_mov_immediate (rtx const_vector
,
8674 enum machine_mode mode
,
8678 static char templ
[40];
8679 const char *mnemonic
;
8680 const char *shift_op
;
8681 unsigned int lane_count
= 0;
8684 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
8686 /* This will return true to show const_vector is legal for use as either
8687 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8688 also update INFO to show how the immediate should be generated. */
8689 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
8690 gcc_assert (is_valid
);
8692 element_char
= sizetochar (info
.element_width
);
8693 lane_count
= width
/ info
.element_width
;
8695 mode
= GET_MODE_INNER (mode
);
8696 if (mode
== SFmode
|| mode
== DFmode
)
8698 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
8699 if (aarch64_float_const_zero_rtx_p (info
.value
))
8700 info
.value
= GEN_INT (0);
8705 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
8706 char float_buf
[buf_size
] = {'\0'};
8707 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
8710 if (lane_count
== 1)
8711 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
8713 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
8714 lane_count
, element_char
, float_buf
);
8719 mnemonic
= info
.mvn
? "mvni" : "movi";
8720 shift_op
= info
.msl
? "msl" : "lsl";
8722 if (lane_count
== 1)
8723 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
8724 mnemonic
, UINTVAL (info
.value
));
8725 else if (info
.shift
)
8726 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8727 ", %s %d", mnemonic
, lane_count
, element_char
,
8728 UINTVAL (info
.value
), shift_op
, info
.shift
);
8730 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
8731 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
8736 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
8737 enum machine_mode mode
)
8739 enum machine_mode vmode
;
8741 gcc_assert (!VECTOR_MODE_P (mode
));
8742 vmode
= aarch64_simd_container_mode (mode
, 64);
8743 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
8744 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
8747 /* Split operands into moves from op[1] + op[2] into op[0]. */
8750 aarch64_split_combinev16qi (rtx operands
[3])
8752 unsigned int dest
= REGNO (operands
[0]);
8753 unsigned int src1
= REGNO (operands
[1]);
8754 unsigned int src2
= REGNO (operands
[2]);
8755 enum machine_mode halfmode
= GET_MODE (operands
[1]);
8756 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
8759 gcc_assert (halfmode
== V16QImode
);
8761 if (src1
== dest
&& src2
== dest
+ halfregs
)
8763 /* No-op move. Can't split to nothing; emit something. */
8764 emit_note (NOTE_INSN_DELETED
);
8768 /* Preserve register attributes for variable tracking. */
8769 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
8770 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
8771 GET_MODE_SIZE (halfmode
));
8773 /* Special case of reversed high/low parts. */
8774 if (reg_overlap_mentioned_p (operands
[2], destlo
)
8775 && reg_overlap_mentioned_p (operands
[1], desthi
))
8777 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8778 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
8779 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8781 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
8783 /* Try to avoid unnecessary moves if part of the result
8784 is in the right place already. */
8786 emit_move_insn (destlo
, operands
[1]);
8787 if (src2
!= dest
+ halfregs
)
8788 emit_move_insn (desthi
, operands
[2]);
8792 if (src2
!= dest
+ halfregs
)
8793 emit_move_insn (desthi
, operands
[2]);
8795 emit_move_insn (destlo
, operands
[1]);
8799 /* vec_perm support. */
8801 #define MAX_VECT_LEN 16
8803 struct expand_vec_perm_d
8805 rtx target
, op0
, op1
;
8806 unsigned char perm
[MAX_VECT_LEN
];
8807 enum machine_mode vmode
;
8813 /* Generate a variable permutation. */
8816 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8818 enum machine_mode vmode
= GET_MODE (target
);
8819 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8821 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
8822 gcc_checking_assert (GET_MODE (op0
) == vmode
);
8823 gcc_checking_assert (GET_MODE (op1
) == vmode
);
8824 gcc_checking_assert (GET_MODE (sel
) == vmode
);
8825 gcc_checking_assert (TARGET_SIMD
);
8829 if (vmode
== V8QImode
)
8831 /* Expand the argument to a V16QI mode by duplicating it. */
8832 rtx pair
= gen_reg_rtx (V16QImode
);
8833 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
8834 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8838 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
8845 if (vmode
== V8QImode
)
8847 pair
= gen_reg_rtx (V16QImode
);
8848 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
8849 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8853 pair
= gen_reg_rtx (OImode
);
8854 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
8855 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
8861 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8863 enum machine_mode vmode
= GET_MODE (target
);
8864 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
8865 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8868 /* The TBL instruction does not use a modulo index, so we must take care
8869 of that ourselves. */
8870 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
8871 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8872 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
8874 /* For big-endian, we also need to reverse the index within the vector
8875 (but not which vector). */
8876 if (BYTES_BIG_ENDIAN
)
8878 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
8880 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
8881 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
8882 NULL
, 0, OPTAB_LIB_WIDEN
);
8884 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
8887 /* Recognize patterns suitable for the TRN instructions. */
8889 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
8891 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
8892 rtx out
, in0
, in1
, x
;
8893 rtx (*gen
) (rtx
, rtx
, rtx
);
8894 enum machine_mode vmode
= d
->vmode
;
8896 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8899 /* Note that these are little-endian tests.
8900 We correct for big-endian later. */
8901 if (d
->perm
[0] == 0)
8903 else if (d
->perm
[0] == 1)
8907 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8909 for (i
= 0; i
< nelt
; i
+= 2)
8911 if (d
->perm
[i
] != i
+ odd
)
8913 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
8923 if (BYTES_BIG_ENDIAN
)
8925 x
= in0
, in0
= in1
, in1
= x
;
8934 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
8935 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
8936 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
8937 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
8938 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
8939 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
8940 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
8941 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
8942 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
8943 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
8952 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
8953 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
8954 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
8955 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
8956 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
8957 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
8958 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
8959 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
8960 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
8961 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
8967 emit_insn (gen (out
, in0
, in1
));
8971 /* Recognize patterns suitable for the UZP instructions. */
8973 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
8975 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
8976 rtx out
, in0
, in1
, x
;
8977 rtx (*gen
) (rtx
, rtx
, rtx
);
8978 enum machine_mode vmode
= d
->vmode
;
8980 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8983 /* Note that these are little-endian tests.
8984 We correct for big-endian later. */
8985 if (d
->perm
[0] == 0)
8987 else if (d
->perm
[0] == 1)
8991 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8993 for (i
= 0; i
< nelt
; i
++)
8995 unsigned elt
= (i
* 2 + odd
) & mask
;
8996 if (d
->perm
[i
] != elt
)
9006 if (BYTES_BIG_ENDIAN
)
9008 x
= in0
, in0
= in1
, in1
= x
;
9017 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
9018 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
9019 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
9020 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
9021 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
9022 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
9023 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
9024 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
9025 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
9026 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
9035 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
9036 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
9037 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
9038 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
9039 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
9040 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
9041 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
9042 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
9043 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
9044 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
9050 emit_insn (gen (out
, in0
, in1
));
9054 /* Recognize patterns suitable for the ZIP instructions. */
9056 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
9058 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
9059 rtx out
, in0
, in1
, x
;
9060 rtx (*gen
) (rtx
, rtx
, rtx
);
9061 enum machine_mode vmode
= d
->vmode
;
9063 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9066 /* Note that these are little-endian tests.
9067 We correct for big-endian later. */
9069 if (d
->perm
[0] == high
)
9072 else if (d
->perm
[0] == 0)
9076 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9078 for (i
= 0; i
< nelt
/ 2; i
++)
9080 unsigned elt
= (i
+ high
) & mask
;
9081 if (d
->perm
[i
* 2] != elt
)
9083 elt
= (elt
+ nelt
) & mask
;
9084 if (d
->perm
[i
* 2 + 1] != elt
)
9094 if (BYTES_BIG_ENDIAN
)
9096 x
= in0
, in0
= in1
, in1
= x
;
9105 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
9106 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
9107 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
9108 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
9109 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
9110 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
9111 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
9112 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
9113 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
9114 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
9123 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
9124 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
9125 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
9126 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
9127 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
9128 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
9129 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
9130 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
9131 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
9132 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
9138 emit_insn (gen (out
, in0
, in1
));
9142 /* Recognize patterns for the EXT insn. */
9145 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
9147 unsigned int i
, nelt
= d
->nelt
;
9148 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9151 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
9153 /* Check if the extracted indices are increasing by one. */
9154 for (i
= 1; i
< nelt
; i
++)
9156 unsigned int required
= location
+ i
;
9157 if (d
->one_vector_p
)
9159 /* We'll pass the same vector in twice, so allow indices to wrap. */
9160 required
&= (nelt
- 1);
9162 if (d
->perm
[i
] != required
)
9168 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
9169 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
9170 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
9171 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
9172 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
9173 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
9174 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
9175 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
9176 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
9177 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
9186 /* The case where (location == 0) is a no-op for both big- and little-endian,
9187 and is removed by the mid-end at optimization levels -O1 and higher. */
9189 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
9191 /* After setup, we want the high elements of the first vector (stored
9192 at the LSB end of the register), and the low elements of the second
9193 vector (stored at the MSB end of the register). So swap. */
9197 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9198 location
= nelt
- location
;
9201 offset
= GEN_INT (location
);
9202 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
9206 /* Recognize patterns for the REV insns. */
9209 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
9211 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
9212 rtx (*gen
) (rtx
, rtx
);
9214 if (!d
->one_vector_p
)
9223 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
9224 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
9232 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
9233 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
9234 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
9235 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
9243 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
9244 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
9245 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
9246 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
9247 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
9248 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
9249 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
9250 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
9259 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
9260 for (j
= 0; j
<= diff
; j
+= 1)
9262 /* This is guaranteed to be true as the value of diff
9263 is 7, 3, 1 and we should have enough elements in the
9264 queue to generate this. Getting a vector mask with a
9265 value of diff other than these values implies that
9266 something is wrong by the time we get here. */
9267 gcc_assert (i
+ j
< nelt
);
9268 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
9276 emit_insn (gen (d
->target
, d
->op0
));
9281 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
9283 rtx (*gen
) (rtx
, rtx
, rtx
);
9284 rtx out
= d
->target
;
9286 enum machine_mode vmode
= d
->vmode
;
9287 unsigned int i
, elt
, nelt
= d
->nelt
;
9290 /* TODO: This may not be big-endian safe. */
9291 if (BYTES_BIG_ENDIAN
)
9295 for (i
= 1; i
< nelt
; i
++)
9297 if (elt
!= d
->perm
[i
])
9301 /* The generic preparation in aarch64_expand_vec_perm_const_1
9302 swaps the operand order and the permute indices if it finds
9303 d->perm[0] to be in the second operand. Thus, we can always
9304 use d->op0 and need not do any extra arithmetic to get the
9305 correct lane number. */
9307 lane
= GEN_INT (elt
);
9311 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
9312 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
9313 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
9314 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
9315 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
9316 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
9317 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
9318 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
9319 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
9320 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
9325 emit_insn (gen (out
, in0
, lane
));
9330 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
9332 rtx rperm
[MAX_VECT_LEN
], sel
;
9333 enum machine_mode vmode
= d
->vmode
;
9334 unsigned int i
, nelt
= d
->nelt
;
9339 /* Generic code will try constant permutation twice. Once with the
9340 original mode and again with the elements lowered to QImode.
9341 So wait and don't do the selector expansion ourselves. */
9342 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
9345 for (i
= 0; i
< nelt
; ++i
)
9347 int nunits
= GET_MODE_NUNITS (vmode
);
9349 /* If big-endian and two vectors we end up with a weird mixed-endian
9350 mode on NEON. Reverse the index within each word but not the word
9352 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
9355 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
9356 sel
= force_reg (vmode
, sel
);
9358 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
9363 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
9365 /* The pattern matching functions above are written to look for a small
9366 number to begin the sequence (0, 1, N/2). If we begin with an index
9367 from the second operand, we can swap the operands. */
9368 if (d
->perm
[0] >= d
->nelt
)
9370 unsigned i
, nelt
= d
->nelt
;
9373 for (i
= 0; i
< nelt
; ++i
)
9374 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
9383 if (aarch64_evpc_rev (d
))
9385 else if (aarch64_evpc_ext (d
))
9387 else if (aarch64_evpc_zip (d
))
9389 else if (aarch64_evpc_uzp (d
))
9391 else if (aarch64_evpc_trn (d
))
9393 else if (aarch64_evpc_dup (d
))
9395 return aarch64_evpc_tbl (d
);
9400 /* Expand a vec_perm_const pattern. */
9403 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9405 struct expand_vec_perm_d d
;
9412 d
.vmode
= GET_MODE (target
);
9413 gcc_assert (VECTOR_MODE_P (d
.vmode
));
9414 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
9415 d
.testing_p
= false;
9417 for (i
= which
= 0; i
< nelt
; ++i
)
9419 rtx e
= XVECEXP (sel
, 0, i
);
9420 int ei
= INTVAL (e
) & (2 * nelt
- 1);
9421 which
|= (ei
< nelt
? 1 : 2);
9431 d
.one_vector_p
= false;
9432 if (!rtx_equal_p (op0
, op1
))
9435 /* The elements of PERM do not suggest that only the first operand
9436 is used, but both operands are identical. Allow easier matching
9437 of the permutation by folding the permutation into the single
9441 for (i
= 0; i
< nelt
; ++i
)
9442 d
.perm
[i
] &= nelt
- 1;
9444 d
.one_vector_p
= true;
9449 d
.one_vector_p
= true;
9453 return aarch64_expand_vec_perm_const_1 (&d
);
9457 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
9458 const unsigned char *sel
)
9460 struct expand_vec_perm_d d
;
9461 unsigned int i
, nelt
, which
;
9465 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
9467 memcpy (d
.perm
, sel
, nelt
);
9469 /* Calculate whether all elements are in one vector. */
9470 for (i
= which
= 0; i
< nelt
; ++i
)
9472 unsigned char e
= d
.perm
[i
];
9473 gcc_assert (e
< 2 * nelt
);
9474 which
|= (e
< nelt
? 1 : 2);
9477 /* If all elements are from the second vector, reindex as if from the
9480 for (i
= 0; i
< nelt
; ++i
)
9483 /* Check whether the mask can be applied to a single vector. */
9484 d
.one_vector_p
= (which
!= 3);
9486 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
9487 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
9488 if (!d
.one_vector_p
)
9489 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
9492 ret
= aarch64_expand_vec_perm_const_1 (&d
);
9498 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9500 aarch64_cannot_change_mode_class (enum machine_mode from
,
9501 enum machine_mode to
,
9502 enum reg_class rclass
)
9504 /* Full-reg subregs are allowed on general regs or any class if they are
9506 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
)
9507 || !reg_classes_intersect_p (FP_REGS
, rclass
))
9510 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9511 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9512 2. Scalar to Scalar for integer modes or same size float modes.
9513 3. Vector to Vector modes.
9514 4. On little-endian only, Vector-Structure to Vector modes. */
9515 if (GET_MODE_SIZE (from
) > GET_MODE_SIZE (to
))
9517 if (aarch64_vector_mode_supported_p (from
)
9518 && GET_MODE_SIZE (GET_MODE_INNER (from
)) == GET_MODE_SIZE (to
))
9521 if (GET_MODE_NUNITS (from
) == 1
9522 && GET_MODE_NUNITS (to
) == 1
9523 && (GET_MODE_CLASS (from
) == MODE_INT
9527 if (aarch64_vector_mode_supported_p (from
)
9528 && aarch64_vector_mode_supported_p (to
))
9531 /* Within an vector structure straddling multiple vector registers
9532 we are in a mixed-endian representation. As such, we can't
9533 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9534 switch between vectors and vector structures cheaply. */
9535 if (!BYTES_BIG_ENDIAN
)
9536 if ((aarch64_vector_mode_supported_p (from
)
9537 && aarch64_vect_struct_mode_p (to
))
9538 || (aarch64_vector_mode_supported_p (to
)
9539 && aarch64_vect_struct_mode_p (from
)))
9546 /* Implement MODES_TIEABLE_P. */
9549 aarch64_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
9551 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
9554 /* We specifically want to allow elements of "structure" modes to
9555 be tieable to the structure. This more general condition allows
9556 other rarer situations too. */
9558 && aarch64_vector_mode_p (mode1
)
9559 && aarch64_vector_mode_p (mode2
))
9565 /* Return a new RTX holding the result of moving POINTER forward by
9569 aarch64_move_pointer (rtx pointer
, int amount
)
9571 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
9573 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
9577 /* Return a new RTX holding the result of moving POINTER forward by the
9578 size of the mode it points to. */
9581 aarch64_progress_pointer (rtx pointer
)
9583 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
9585 return aarch64_move_pointer (pointer
, amount
);
9588 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9592 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
9593 enum machine_mode mode
)
9595 rtx reg
= gen_reg_rtx (mode
);
9597 /* "Cast" the pointers to the correct mode. */
9598 *src
= adjust_address (*src
, mode
, 0);
9599 *dst
= adjust_address (*dst
, mode
, 0);
9600 /* Emit the memcpy. */
9601 emit_move_insn (reg
, *src
);
9602 emit_move_insn (*dst
, reg
);
9603 /* Move the pointers forward. */
9604 *src
= aarch64_progress_pointer (*src
);
9605 *dst
= aarch64_progress_pointer (*dst
);
9608 /* Expand movmem, as if from a __builtin_memcpy. Return true if
9609 we succeed, otherwise return false. */
9612 aarch64_expand_movmem (rtx
*operands
)
9615 rtx dst
= operands
[0];
9616 rtx src
= operands
[1];
9618 bool speed_p
= !optimize_function_for_size_p (cfun
);
9620 /* When optimizing for size, give a better estimate of the length of a
9621 memcpy call, but use the default otherwise. */
9622 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
9624 /* We can't do anything smart if the amount to copy is not constant. */
9625 if (!CONST_INT_P (operands
[2]))
9628 n
= UINTVAL (operands
[2]);
9630 /* Try to keep the number of instructions low. For cases below 16 bytes we
9631 need to make at most two moves. For cases above 16 bytes it will be one
9632 move for each 16 byte chunk, then at most two additional moves. */
9633 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
9636 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
9637 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
9639 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9640 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
9642 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9648 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
9653 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
9658 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9659 4-byte chunk, partially overlapping with the previously copied chunk. */
9662 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
9668 src
= aarch64_move_pointer (src
, move
);
9669 dst
= aarch64_move_pointer (dst
, move
);
9670 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
9675 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9676 them, then (if applicable) an 8-byte chunk. */
9681 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
9686 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
9691 /* Finish the final bytes of the copy. We can always do this in one
9692 instruction. We either copy the exact amount we need, or partially
9693 overlap with the previous chunk we copied and copy 8-bytes. */
9697 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
9699 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
9701 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
9706 src
= aarch64_move_pointer (src
, -1);
9707 dst
= aarch64_move_pointer (dst
, -1);
9708 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
9714 src
= aarch64_move_pointer (src
, move
);
9715 dst
= aarch64_move_pointer (dst
, move
);
9716 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
9723 #undef TARGET_ADDRESS_COST
9724 #define TARGET_ADDRESS_COST aarch64_address_cost
9726 /* This hook will determines whether unnamed bitfields affect the alignment
9727 of the containing structure. The hook returns true if the structure
9728 should inherit the alignment requirements of an unnamed bitfield's
9730 #undef TARGET_ALIGN_ANON_BITFIELD
9731 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9733 #undef TARGET_ASM_ALIGNED_DI_OP
9734 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9736 #undef TARGET_ASM_ALIGNED_HI_OP
9737 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9739 #undef TARGET_ASM_ALIGNED_SI_OP
9740 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9742 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9743 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9744 hook_bool_const_tree_hwi_hwi_const_tree_true
9746 #undef TARGET_ASM_FILE_START
9747 #define TARGET_ASM_FILE_START aarch64_start_file
9749 #undef TARGET_ASM_OUTPUT_MI_THUNK
9750 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9752 #undef TARGET_ASM_SELECT_RTX_SECTION
9753 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9755 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9756 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9758 #undef TARGET_BUILD_BUILTIN_VA_LIST
9759 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9761 #undef TARGET_CALLEE_COPIES
9762 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9764 #undef TARGET_CAN_ELIMINATE
9765 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9767 #undef TARGET_CANNOT_FORCE_CONST_MEM
9768 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9770 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9771 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9773 /* Only the least significant bit is used for initialization guard
9775 #undef TARGET_CXX_GUARD_MASK_BIT
9776 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9778 #undef TARGET_C_MODE_FOR_SUFFIX
9779 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9781 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9782 #undef TARGET_DEFAULT_TARGET_FLAGS
9783 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9786 #undef TARGET_CLASS_MAX_NREGS
9787 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9789 #undef TARGET_BUILTIN_DECL
9790 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9792 #undef TARGET_EXPAND_BUILTIN
9793 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9795 #undef TARGET_EXPAND_BUILTIN_VA_START
9796 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9798 #undef TARGET_FOLD_BUILTIN
9799 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9801 #undef TARGET_FUNCTION_ARG
9802 #define TARGET_FUNCTION_ARG aarch64_function_arg
9804 #undef TARGET_FUNCTION_ARG_ADVANCE
9805 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9807 #undef TARGET_FUNCTION_ARG_BOUNDARY
9808 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9810 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9811 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9813 #undef TARGET_FUNCTION_VALUE
9814 #define TARGET_FUNCTION_VALUE aarch64_function_value
9816 #undef TARGET_FUNCTION_VALUE_REGNO_P
9817 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9819 #undef TARGET_FRAME_POINTER_REQUIRED
9820 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9822 #undef TARGET_GIMPLE_FOLD_BUILTIN
9823 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9825 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9826 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9828 #undef TARGET_INIT_BUILTINS
9829 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9831 #undef TARGET_LEGITIMATE_ADDRESS_P
9832 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9834 #undef TARGET_LEGITIMATE_CONSTANT_P
9835 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9837 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9838 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9841 #define TARGET_LRA_P aarch64_lra_p
9843 #undef TARGET_MANGLE_TYPE
9844 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9846 #undef TARGET_MEMORY_MOVE_COST
9847 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9849 #undef TARGET_MUST_PASS_IN_STACK
9850 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9852 /* This target hook should return true if accesses to volatile bitfields
9853 should use the narrowest mode possible. It should return false if these
9854 accesses should use the bitfield container type. */
9855 #undef TARGET_NARROW_VOLATILE_BITFIELD
9856 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9858 #undef TARGET_OPTION_OVERRIDE
9859 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9861 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9862 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9863 aarch64_override_options_after_change
9865 #undef TARGET_PASS_BY_REFERENCE
9866 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9868 #undef TARGET_PREFERRED_RELOAD_CLASS
9869 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9871 #undef TARGET_SECONDARY_RELOAD
9872 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9874 #undef TARGET_SHIFT_TRUNCATION_MASK
9875 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9877 #undef TARGET_SETUP_INCOMING_VARARGS
9878 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9880 #undef TARGET_STRUCT_VALUE_RTX
9881 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9883 #undef TARGET_REGISTER_MOVE_COST
9884 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9886 #undef TARGET_RETURN_IN_MEMORY
9887 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9889 #undef TARGET_RETURN_IN_MSB
9890 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9892 #undef TARGET_RTX_COSTS
9893 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9895 #undef TARGET_SCHED_ISSUE_RATE
9896 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9898 #undef TARGET_TRAMPOLINE_INIT
9899 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9901 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9902 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9904 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9905 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9907 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9908 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9910 #undef TARGET_VECTORIZE_ADD_STMT_COST
9911 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9913 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9914 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9915 aarch64_builtin_vectorization_cost
9917 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9918 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9920 #undef TARGET_VECTORIZE_BUILTINS
9921 #define TARGET_VECTORIZE_BUILTINS
9923 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9924 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9925 aarch64_builtin_vectorized_function
9927 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9928 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9929 aarch64_autovectorize_vector_sizes
9931 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9932 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9933 aarch64_atomic_assign_expand_fenv
9935 /* Section anchor support. */
9937 #undef TARGET_MIN_ANCHOR_OFFSET
9938 #define TARGET_MIN_ANCHOR_OFFSET -256
9940 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9941 byte offset; we can do much more for larger data types, but have no way
9942 to determine the size of the access. We assume accesses are aligned. */
9943 #undef TARGET_MAX_ANCHOR_OFFSET
9944 #define TARGET_MAX_ANCHOR_OFFSET 4095
9946 #undef TARGET_VECTOR_ALIGNMENT
9947 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9949 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9950 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9951 aarch64_simd_vector_alignment_reachable
9953 /* vec_perm support. */
9955 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9956 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9957 aarch64_vectorize_vec_perm_const_ok
9960 #undef TARGET_FIXED_CONDITION_CODE_REGS
9961 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9963 struct gcc_target targetm
= TARGET_INITIALIZER
;
9965 #include "gt-aarch64.h"