1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "hash-table.h"
51 #include "basic-block.h"
52 #include "tree-ssa-alias.h"
53 #include "internal-fn.h"
54 #include "gimple-fold.h"
56 #include "gimple-expr.h"
63 #include "tree-vectorizer.h"
64 #include "config/arm/aarch-cost-tables.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
74 A simple base register plus immediate offset.
77 A base register indexed by immediate offset with writeback.
80 A base register indexed by (optionally scaled) register.
83 A base register indexed by (optionally scaled) zero-extended register.
86 A base register indexed by (optionally scaled) sign-extended register.
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type
{
104 struct aarch64_address_info
{
105 enum aarch64_address_type type
;
109 enum aarch64_symbol_type symbol_type
;
112 struct simd_immediate_info
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel
;
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
133 enum machine_mode
*, int *,
135 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
136 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
139 static unsigned bit_count (unsigned HOST_WIDE_INT
);
140 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
141 const unsigned char *sel
);
142 static int aarch64_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
144 /* The processor for which instructions should be scheduled. */
145 enum aarch64_processor aarch64_tune
= cortexa53
;
147 /* The current tuning set. */
148 const struct tune_params
*aarch64_tune_params
;
150 /* Mask to specify which instructions we are allowed to generate. */
151 unsigned long aarch64_isa_flags
= 0;
153 /* Mask to specify which instruction scheduling options should be used. */
154 unsigned long aarch64_tune_flags
= 0;
156 /* Tuning parameters. */
158 #if HAVE_DESIGNATED_INITIALIZERS
159 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
161 #define NAMED_PARAM(NAME, VAL) (VAL)
164 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
171 static const struct cpu_addrcost_table generic_addrcost_table
=
173 #if HAVE_DESIGNATED_INITIALIZERS
182 NAMED_PARAM (pre_modify
, 0),
183 NAMED_PARAM (post_modify
, 0),
184 NAMED_PARAM (register_offset
, 0),
185 NAMED_PARAM (register_extend
, 0),
186 NAMED_PARAM (imm_offset
, 0)
189 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
192 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
194 #if HAVE_DESIGNATED_INITIALIZERS
203 NAMED_PARAM (pre_modify
, 0),
204 NAMED_PARAM (post_modify
, 0),
205 NAMED_PARAM (register_offset
, 0),
206 NAMED_PARAM (register_extend
, 0),
207 NAMED_PARAM (imm_offset
, 0),
210 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
213 static const struct cpu_regmove_cost generic_regmove_cost
=
215 NAMED_PARAM (GP2GP
, 1),
216 NAMED_PARAM (GP2FP
, 2),
217 NAMED_PARAM (FP2GP
, 2),
218 NAMED_PARAM (FP2FP
, 2)
221 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
223 NAMED_PARAM (GP2GP
, 1),
224 /* Avoid the use of slow int<->fp moves for spilling by setting
225 their cost higher than memmov_cost. */
226 NAMED_PARAM (GP2FP
, 5),
227 NAMED_PARAM (FP2GP
, 5),
228 NAMED_PARAM (FP2FP
, 2)
231 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
233 NAMED_PARAM (GP2GP
, 1),
234 /* Avoid the use of slow int<->fp moves for spilling by setting
235 their cost higher than memmov_cost. */
236 NAMED_PARAM (GP2FP
, 5),
237 NAMED_PARAM (FP2GP
, 5),
238 NAMED_PARAM (FP2FP
, 2)
241 /* Generic costs for vector insn classes. */
242 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
245 static const struct cpu_vector_cost generic_vector_cost
=
247 NAMED_PARAM (scalar_stmt_cost
, 1),
248 NAMED_PARAM (scalar_load_cost
, 1),
249 NAMED_PARAM (scalar_store_cost
, 1),
250 NAMED_PARAM (vec_stmt_cost
, 1),
251 NAMED_PARAM (vec_to_scalar_cost
, 1),
252 NAMED_PARAM (scalar_to_vec_cost
, 1),
253 NAMED_PARAM (vec_align_load_cost
, 1),
254 NAMED_PARAM (vec_unalign_load_cost
, 1),
255 NAMED_PARAM (vec_unalign_store_cost
, 1),
256 NAMED_PARAM (vec_store_cost
, 1),
257 NAMED_PARAM (cond_taken_branch_cost
, 3),
258 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
261 /* Generic costs for vector insn classes. */
262 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
265 static const struct cpu_vector_cost cortexa57_vector_cost
=
267 NAMED_PARAM (scalar_stmt_cost
, 1),
268 NAMED_PARAM (scalar_load_cost
, 4),
269 NAMED_PARAM (scalar_store_cost
, 1),
270 NAMED_PARAM (vec_stmt_cost
, 3),
271 NAMED_PARAM (vec_to_scalar_cost
, 8),
272 NAMED_PARAM (scalar_to_vec_cost
, 8),
273 NAMED_PARAM (vec_align_load_cost
, 5),
274 NAMED_PARAM (vec_unalign_load_cost
, 5),
275 NAMED_PARAM (vec_unalign_store_cost
, 1),
276 NAMED_PARAM (vec_store_cost
, 1),
277 NAMED_PARAM (cond_taken_branch_cost
, 1),
278 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
281 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
284 static const struct tune_params generic_tunings
=
286 &cortexa57_extra_costs
,
287 &generic_addrcost_table
,
288 &generic_regmove_cost
,
289 &generic_vector_cost
,
290 NAMED_PARAM (memmov_cost
, 4),
291 NAMED_PARAM (issue_rate
, 2)
294 static const struct tune_params cortexa53_tunings
=
296 &cortexa53_extra_costs
,
297 &generic_addrcost_table
,
298 &cortexa53_regmove_cost
,
299 &generic_vector_cost
,
300 NAMED_PARAM (memmov_cost
, 4),
301 NAMED_PARAM (issue_rate
, 2)
304 static const struct tune_params cortexa57_tunings
=
306 &cortexa57_extra_costs
,
307 &cortexa57_addrcost_table
,
308 &cortexa57_regmove_cost
,
309 &cortexa57_vector_cost
,
310 NAMED_PARAM (memmov_cost
, 4),
311 NAMED_PARAM (issue_rate
, 3)
314 /* A processor implementing AArch64. */
317 const char *const name
;
318 enum aarch64_processor core
;
320 const unsigned long flags
;
321 const struct tune_params
*const tune
;
324 /* Processor cores implementing AArch64. */
325 static const struct processor all_cores
[] =
327 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
328 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
329 #include "aarch64-cores.def"
331 {"generic", cortexa53
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
332 {NULL
, aarch64_none
, NULL
, 0, NULL
}
335 /* Architectures implementing AArch64. */
336 static const struct processor all_architectures
[] =
338 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
339 {NAME, CORE, #ARCH, FLAGS, NULL},
340 #include "aarch64-arches.def"
342 {NULL
, aarch64_none
, NULL
, 0, NULL
}
345 /* Target specification. These are populated as commandline arguments
346 are processed, or NULL if not specified. */
347 static const struct processor
*selected_arch
;
348 static const struct processor
*selected_cpu
;
349 static const struct processor
*selected_tune
;
351 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
353 /* An ISA extension in the co-processor and main instruction set space. */
354 struct aarch64_option_extension
356 const char *const name
;
357 const unsigned long flags_on
;
358 const unsigned long flags_off
;
361 /* ISA extensions in AArch64. */
362 static const struct aarch64_option_extension all_extensions
[] =
364 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
365 {NAME, FLAGS_ON, FLAGS_OFF},
366 #include "aarch64-option-extensions.def"
367 #undef AARCH64_OPT_EXTENSION
371 /* Used to track the size of an address when generating a pre/post
372 increment address. */
373 static enum machine_mode aarch64_memory_reference_mode
;
375 /* Used to force GTY into this file. */
376 static GTY(()) int gty_dummy
;
378 /* A table of valid AArch64 "bitmask immediate" values for
379 logical instructions. */
381 #define AARCH64_NUM_BITMASKS 5334
382 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
384 typedef enum aarch64_cond_code
386 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
387 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
388 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
392 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
394 /* The condition codes of the processor, and the inverse function. */
395 static const char * const aarch64_condition_codes
[] =
397 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
398 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
401 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
403 aarch64_dbx_register_number (unsigned regno
)
405 if (GP_REGNUM_P (regno
))
406 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
407 else if (regno
== SP_REGNUM
)
408 return AARCH64_DWARF_SP
;
409 else if (FP_REGNUM_P (regno
))
410 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
412 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
413 equivalent DWARF register. */
414 return DWARF_FRAME_REGISTERS
;
417 /* Return TRUE if MODE is any of the large INT modes. */
419 aarch64_vect_struct_mode_p (enum machine_mode mode
)
421 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
424 /* Return TRUE if MODE is any of the vector modes. */
426 aarch64_vector_mode_p (enum machine_mode mode
)
428 return aarch64_vector_mode_supported_p (mode
)
429 || aarch64_vect_struct_mode_p (mode
);
432 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
434 aarch64_array_mode_supported_p (enum machine_mode mode
,
435 unsigned HOST_WIDE_INT nelems
)
438 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
439 && (nelems
>= 2 && nelems
<= 4))
445 /* Implement HARD_REGNO_NREGS. */
448 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
450 switch (aarch64_regno_regclass (regno
))
454 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
456 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
461 /* Implement HARD_REGNO_MODE_OK. */
464 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
466 if (GET_MODE_CLASS (mode
) == MODE_CC
)
467 return regno
== CC_REGNUM
;
469 if (regno
== SP_REGNUM
)
470 /* The purpose of comparing with ptr_mode is to support the
471 global register variable associated with the stack pointer
472 register via the syntax of asm ("wsp") in ILP32. */
473 return mode
== Pmode
|| mode
== ptr_mode
;
475 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
476 return mode
== Pmode
;
478 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
481 if (FP_REGNUM_P (regno
))
483 if (aarch64_vect_struct_mode_p (mode
))
485 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
493 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
495 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
496 enum machine_mode mode
)
498 /* Handle modes that fit within single registers. */
499 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
501 if (GET_MODE_SIZE (mode
) >= 4)
506 /* Fall back to generic for multi-reg and very large modes. */
508 return choose_hard_reg_mode (regno
, nregs
, false);
511 /* Return true if calls to DECL should be treated as
512 long-calls (ie called via a register). */
514 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
519 /* Return true if calls to symbol-ref SYM should be treated as
520 long-calls (ie called via a register). */
522 aarch64_is_long_call_p (rtx sym
)
524 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
527 /* Return true if the offsets to a zero/sign-extract operation
528 represent an expression that matches an extend operation. The
529 operands represent the paramters from
531 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
533 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
536 HOST_WIDE_INT mult_val
, extract_val
;
538 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
541 mult_val
= INTVAL (mult_imm
);
542 extract_val
= INTVAL (extract_imm
);
545 && extract_val
< GET_MODE_BITSIZE (mode
)
546 && exact_log2 (extract_val
& ~7) > 0
547 && (extract_val
& 7) <= 4
548 && mult_val
== (1 << (extract_val
& 7)))
554 /* Emit an insn that's a simple single-set. Both the operands must be
555 known to be valid. */
557 emit_set_insn (rtx x
, rtx y
)
559 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
562 /* X and Y are two things to compare using CODE. Emit the compare insn and
563 return the rtx for register 0 in the proper mode. */
565 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
567 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
568 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
570 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
574 /* Build the SYMBOL_REF for __tls_get_addr. */
576 static GTY(()) rtx tls_get_addr_libfunc
;
579 aarch64_tls_get_addr (void)
581 if (!tls_get_addr_libfunc
)
582 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
583 return tls_get_addr_libfunc
;
586 /* Return the TLS model to use for ADDR. */
588 static enum tls_model
589 tls_symbolic_operand_type (rtx addr
)
591 enum tls_model tls_kind
= TLS_MODEL_NONE
;
594 if (GET_CODE (addr
) == CONST
)
596 split_const (addr
, &sym
, &addend
);
597 if (GET_CODE (sym
) == SYMBOL_REF
)
598 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
600 else if (GET_CODE (addr
) == SYMBOL_REF
)
601 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
606 /* We'll allow lo_sum's in addresses in our legitimate addresses
607 so that combine would take care of combining addresses where
608 necessary, but for generation purposes, we'll generate the address
611 tmp = hi (symbol_ref); adrp x1, foo
612 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
616 adrp x1, :got:foo adrp tmp, :tlsgd:foo
617 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
621 Load TLS symbol, depending on TLS mechanism and TLS access model.
623 Global Dynamic - Traditional TLS:
625 add dest, tmp, #:tlsgd_lo12:imm
628 Global Dynamic - TLS Descriptors:
629 adrp dest, :tlsdesc:imm
630 ldr tmp, [dest, #:tlsdesc_lo12:imm]
631 add dest, dest, #:tlsdesc_lo12:imm
638 adrp tmp, :gottprel:imm
639 ldr dest, [tmp, #:gottprel_lo12:imm]
644 add t0, tp, #:tprel_hi12:imm
645 add t0, #:tprel_lo12_nc:imm
649 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
650 enum aarch64_symbol_type type
)
654 case SYMBOL_SMALL_ABSOLUTE
:
656 /* In ILP32, the mode of dest can be either SImode or DImode. */
658 enum machine_mode mode
= GET_MODE (dest
);
660 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
662 if (can_create_pseudo_p ())
663 tmp_reg
= gen_reg_rtx (mode
);
665 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
666 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
670 case SYMBOL_TINY_ABSOLUTE
:
671 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
674 case SYMBOL_SMALL_GOT
:
676 /* In ILP32, the mode of dest can be either SImode or DImode,
677 while the got entry is always of SImode size. The mode of
678 dest depends on how dest is used: if dest is assigned to a
679 pointer (e.g. in the memory), it has SImode; it may have
680 DImode if dest is dereferenced to access the memeory.
681 This is why we have to handle three different ldr_got_small
682 patterns here (two patterns for ILP32). */
684 enum machine_mode mode
= GET_MODE (dest
);
686 if (can_create_pseudo_p ())
687 tmp_reg
= gen_reg_rtx (mode
);
689 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
690 if (mode
== ptr_mode
)
693 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
695 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
699 gcc_assert (mode
== Pmode
);
700 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
706 case SYMBOL_SMALL_TLSGD
:
709 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
712 aarch64_emit_call_insn (gen_tlsgd_small (result
, imm
));
713 insns
= get_insns ();
716 RTL_CONST_CALL_P (insns
) = 1;
717 emit_libcall_block (insns
, dest
, result
, imm
);
721 case SYMBOL_SMALL_TLSDESC
:
723 enum machine_mode mode
= GET_MODE (dest
);
724 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
727 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
729 /* In ILP32, the got entry is always of SImode size. Unlike
730 small GOT, the dest is fixed at reg 0. */
732 emit_insn (gen_tlsdesc_small_si (imm
));
734 emit_insn (gen_tlsdesc_small_di (imm
));
735 tp
= aarch64_load_tp (NULL
);
738 tp
= gen_lowpart (mode
, tp
);
740 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
741 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
745 case SYMBOL_SMALL_GOTTPREL
:
747 /* In ILP32, the mode of dest can be either SImode or DImode,
748 while the got entry is always of SImode size. The mode of
749 dest depends on how dest is used: if dest is assigned to a
750 pointer (e.g. in the memory), it has SImode; it may have
751 DImode if dest is dereferenced to access the memeory.
752 This is why we have to handle three different tlsie_small
753 patterns here (two patterns for ILP32). */
754 enum machine_mode mode
= GET_MODE (dest
);
755 rtx tmp_reg
= gen_reg_rtx (mode
);
756 rtx tp
= aarch64_load_tp (NULL
);
758 if (mode
== ptr_mode
)
761 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
764 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
765 tp
= gen_lowpart (mode
, tp
);
770 gcc_assert (mode
== Pmode
);
771 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
774 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
775 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
779 case SYMBOL_SMALL_TPREL
:
781 rtx tp
= aarch64_load_tp (NULL
);
782 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
783 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
787 case SYMBOL_TINY_GOT
:
788 emit_insn (gen_ldr_got_tiny (dest
, imm
));
796 /* Emit a move from SRC to DEST. Assume that the move expanders can
797 handle all moves if !can_create_pseudo_p (). The distinction is
798 important because, unlike emit_move_insn, the move expanders know
799 how to force Pmode objects into the constant pool even when the
800 constant pool address is not itself legitimate. */
802 aarch64_emit_move (rtx dest
, rtx src
)
804 return (can_create_pseudo_p ()
805 ? emit_move_insn (dest
, src
)
806 : emit_move_insn_1 (dest
, src
));
809 /* Split a 128-bit move operation into two 64-bit move operations,
810 taking care to handle partial overlap of register to register
811 copies. Special cases are needed when moving between GP regs and
812 FP regs. SRC can be a register, constant or memory; DST a register
813 or memory. If either operand is memory it must not have any side
816 aarch64_split_128bit_move (rtx dst
, rtx src
)
821 enum machine_mode mode
= GET_MODE (dst
);
823 gcc_assert (mode
== TImode
|| mode
== TFmode
);
824 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
825 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
827 if (REG_P (dst
) && REG_P (src
))
829 int src_regno
= REGNO (src
);
830 int dst_regno
= REGNO (dst
);
832 /* Handle FP <-> GP regs. */
833 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
835 src_lo
= gen_lowpart (word_mode
, src
);
836 src_hi
= gen_highpart (word_mode
, src
);
840 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
841 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
845 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
846 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
850 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
852 dst_lo
= gen_lowpart (word_mode
, dst
);
853 dst_hi
= gen_highpart (word_mode
, dst
);
857 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
858 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
862 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
863 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
869 dst_lo
= gen_lowpart (word_mode
, dst
);
870 dst_hi
= gen_highpart (word_mode
, dst
);
871 src_lo
= gen_lowpart (word_mode
, src
);
872 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
874 /* At most one pairing may overlap. */
875 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
877 aarch64_emit_move (dst_hi
, src_hi
);
878 aarch64_emit_move (dst_lo
, src_lo
);
882 aarch64_emit_move (dst_lo
, src_lo
);
883 aarch64_emit_move (dst_hi
, src_hi
);
888 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
890 return (! REG_P (src
)
891 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
894 /* Split a complex SIMD combine. */
897 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
899 enum machine_mode src_mode
= GET_MODE (src1
);
900 enum machine_mode dst_mode
= GET_MODE (dst
);
902 gcc_assert (VECTOR_MODE_P (dst_mode
));
904 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
906 rtx (*gen
) (rtx
, rtx
, rtx
);
911 gen
= gen_aarch64_simd_combinev8qi
;
914 gen
= gen_aarch64_simd_combinev4hi
;
917 gen
= gen_aarch64_simd_combinev2si
;
920 gen
= gen_aarch64_simd_combinev2sf
;
923 gen
= gen_aarch64_simd_combinedi
;
926 gen
= gen_aarch64_simd_combinedf
;
932 emit_insn (gen (dst
, src1
, src2
));
937 /* Split a complex SIMD move. */
940 aarch64_split_simd_move (rtx dst
, rtx src
)
942 enum machine_mode src_mode
= GET_MODE (src
);
943 enum machine_mode dst_mode
= GET_MODE (dst
);
945 gcc_assert (VECTOR_MODE_P (dst_mode
));
947 if (REG_P (dst
) && REG_P (src
))
949 rtx (*gen
) (rtx
, rtx
);
951 gcc_assert (VECTOR_MODE_P (src_mode
));
956 gen
= gen_aarch64_split_simd_movv16qi
;
959 gen
= gen_aarch64_split_simd_movv8hi
;
962 gen
= gen_aarch64_split_simd_movv4si
;
965 gen
= gen_aarch64_split_simd_movv2di
;
968 gen
= gen_aarch64_split_simd_movv4sf
;
971 gen
= gen_aarch64_split_simd_movv2df
;
977 emit_insn (gen (dst
, src
));
983 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
985 if (can_create_pseudo_p ())
986 return force_reg (mode
, value
);
989 x
= aarch64_emit_move (x
, value
);
996 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
998 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1001 /* Load the full offset into a register. This
1002 might be improvable in the future. */
1003 high
= GEN_INT (offset
);
1005 high
= aarch64_force_temporary (mode
, temp
, high
);
1006 reg
= aarch64_force_temporary (mode
, temp
,
1007 gen_rtx_PLUS (mode
, high
, reg
));
1009 return plus_constant (mode
, reg
, offset
);
1013 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1015 enum machine_mode mode
= GET_MODE (dest
);
1016 unsigned HOST_WIDE_INT mask
;
1019 unsigned HOST_WIDE_INT val
;
1022 int one_match
, zero_match
, first_not_ffff_match
;
1024 gcc_assert (mode
== SImode
|| mode
== DImode
);
1026 /* Check on what type of symbol it is. */
1027 if (GET_CODE (imm
) == SYMBOL_REF
1028 || GET_CODE (imm
) == LABEL_REF
1029 || GET_CODE (imm
) == CONST
)
1031 rtx mem
, base
, offset
;
1032 enum aarch64_symbol_type sty
;
1034 /* If we have (const (plus symbol offset)), separate out the offset
1035 before we start classifying the symbol. */
1036 split_const (imm
, &base
, &offset
);
1038 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
1041 case SYMBOL_FORCE_TO_MEM
:
1042 if (offset
!= const0_rtx
1043 && targetm
.cannot_force_const_mem (mode
, imm
))
1045 gcc_assert (can_create_pseudo_p ());
1046 base
= aarch64_force_temporary (mode
, dest
, base
);
1047 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1048 aarch64_emit_move (dest
, base
);
1051 mem
= force_const_mem (ptr_mode
, imm
);
1053 if (mode
!= ptr_mode
)
1054 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1055 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1058 case SYMBOL_SMALL_TLSGD
:
1059 case SYMBOL_SMALL_TLSDESC
:
1060 case SYMBOL_SMALL_GOTTPREL
:
1061 case SYMBOL_SMALL_GOT
:
1062 case SYMBOL_TINY_GOT
:
1063 if (offset
!= const0_rtx
)
1065 gcc_assert(can_create_pseudo_p ());
1066 base
= aarch64_force_temporary (mode
, dest
, base
);
1067 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1068 aarch64_emit_move (dest
, base
);
1073 case SYMBOL_SMALL_TPREL
:
1074 case SYMBOL_SMALL_ABSOLUTE
:
1075 case SYMBOL_TINY_ABSOLUTE
:
1076 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1084 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1086 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1090 if (!CONST_INT_P (imm
))
1092 if (GET_CODE (imm
) == HIGH
)
1093 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1096 rtx mem
= force_const_mem (mode
, imm
);
1098 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1106 /* We know we can't do this in 1 insn, and we must be able to do it
1107 in two; so don't mess around looking for sequences that don't buy
1109 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
1110 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1111 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1115 /* Remaining cases are all for DImode. */
1118 subtargets
= optimize
&& can_create_pseudo_p ();
1123 first_not_ffff_match
= -1;
1125 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1127 if ((val
& mask
) == mask
)
1131 if (first_not_ffff_match
< 0)
1132 first_not_ffff_match
= i
;
1133 if ((val
& mask
) == 0)
1140 /* Set one of the quarters and then insert back into result. */
1141 mask
= 0xffffll
<< first_not_ffff_match
;
1142 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1143 emit_insn (gen_insv_immdi (dest
, GEN_INT (first_not_ffff_match
),
1144 GEN_INT ((val
>> first_not_ffff_match
)
1149 if (zero_match
== 2)
1150 goto simple_sequence
;
1152 mask
= 0x0ffff0000UL
;
1153 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1155 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1157 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1159 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1161 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1162 emit_insn (gen_adddi3 (dest
, subtarget
,
1163 GEN_INT (val
- (val
& mask
))));
1166 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1168 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1170 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1171 GEN_INT ((val
+ comp
) & mask
)));
1172 emit_insn (gen_adddi3 (dest
, subtarget
,
1173 GEN_INT (val
- ((val
+ comp
) & mask
))));
1176 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1178 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1180 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1181 GEN_INT ((val
- comp
) | ~mask
)));
1182 emit_insn (gen_adddi3 (dest
, subtarget
,
1183 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1186 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1188 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1190 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1191 GEN_INT (val
| ~mask
)));
1192 emit_insn (gen_adddi3 (dest
, subtarget
,
1193 GEN_INT (val
- (val
| ~mask
))));
1198 /* See if we can do it by arithmetically combining two
1200 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1205 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1206 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1208 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1209 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1210 GEN_INT (aarch64_bitmasks
[i
])));
1211 emit_insn (gen_adddi3 (dest
, subtarget
,
1212 GEN_INT (val
- aarch64_bitmasks
[i
])));
1216 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1218 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1220 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1221 GEN_INT (aarch64_bitmasks
[i
])));
1222 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1223 GEN_INT ((val
>> j
) & 0xffff)));
1229 /* See if we can do it by logically combining two immediates. */
1230 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1232 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1236 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1237 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1239 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1240 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1241 GEN_INT (aarch64_bitmasks
[i
])));
1242 emit_insn (gen_iordi3 (dest
, subtarget
,
1243 GEN_INT (aarch64_bitmasks
[j
])));
1247 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1251 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1252 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1255 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1256 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1257 GEN_INT (aarch64_bitmasks
[j
])));
1258 emit_insn (gen_anddi3 (dest
, subtarget
,
1259 GEN_INT (aarch64_bitmasks
[i
])));
1265 if (one_match
> zero_match
)
1267 /* Set either first three quarters or all but the third. */
1268 mask
= 0xffffll
<< (16 - first_not_ffff_match
);
1269 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1270 GEN_INT (val
| mask
| 0xffffffff00000000ull
)));
1272 /* Now insert other two quarters. */
1273 for (i
= first_not_ffff_match
+ 16, mask
<<= (first_not_ffff_match
<< 1);
1274 i
< 64; i
+= 16, mask
<<= 16)
1276 if ((val
& mask
) != mask
)
1277 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1278 GEN_INT ((val
>> i
) & 0xffff)));
1286 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1288 if ((val
& mask
) != 0)
1292 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1293 GEN_INT (val
& mask
)));
1297 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1298 GEN_INT ((val
>> i
) & 0xffff)));
1304 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
1305 tree exp ATTRIBUTE_UNUSED
)
1307 /* Currently, always true. */
1311 /* Implement TARGET_PASS_BY_REFERENCE. */
1314 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1315 enum machine_mode mode
,
1317 bool named ATTRIBUTE_UNUSED
)
1320 enum machine_mode dummymode
;
1323 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1324 size
= (mode
== BLKmode
&& type
)
1325 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1327 /* Aggregates are passed by reference based on their size. */
1328 if (type
&& AGGREGATE_TYPE_P (type
))
1330 size
= int_size_in_bytes (type
);
1333 /* Variable sized arguments are always returned by reference. */
1337 /* Can this be a candidate to be passed in fp/simd register(s)? */
1338 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1343 /* Arguments which are variable sized or larger than 2 registers are
1344 passed by reference unless they are a homogenous floating point
1346 return size
> 2 * UNITS_PER_WORD
;
1349 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1351 aarch64_return_in_msb (const_tree valtype
)
1353 enum machine_mode dummy_mode
;
1356 /* Never happens in little-endian mode. */
1357 if (!BYTES_BIG_ENDIAN
)
1360 /* Only composite types smaller than or equal to 16 bytes can
1361 be potentially returned in registers. */
1362 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1363 || int_size_in_bytes (valtype
) <= 0
1364 || int_size_in_bytes (valtype
) > 16)
1367 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1368 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1369 is always passed/returned in the least significant bits of fp/simd
1371 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1372 &dummy_mode
, &dummy_int
, NULL
))
1378 /* Implement TARGET_FUNCTION_VALUE.
1379 Define how to find the value returned by a function. */
1382 aarch64_function_value (const_tree type
, const_tree func
,
1383 bool outgoing ATTRIBUTE_UNUSED
)
1385 enum machine_mode mode
;
1388 enum machine_mode ag_mode
;
1390 mode
= TYPE_MODE (type
);
1391 if (INTEGRAL_TYPE_P (type
))
1392 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1394 if (aarch64_return_in_msb (type
))
1396 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1398 if (size
% UNITS_PER_WORD
!= 0)
1400 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1401 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1405 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1406 &ag_mode
, &count
, NULL
))
1408 if (!aarch64_composite_type_p (type
, mode
))
1410 gcc_assert (count
== 1 && mode
== ag_mode
);
1411 return gen_rtx_REG (mode
, V0_REGNUM
);
1418 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1419 for (i
= 0; i
< count
; i
++)
1421 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1422 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1423 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1424 XVECEXP (par
, 0, i
) = tmp
;
1430 return gen_rtx_REG (mode
, R0_REGNUM
);
1433 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1434 Return true if REGNO is the number of a hard register in which the values
1435 of called function may come back. */
1438 aarch64_function_value_regno_p (const unsigned int regno
)
1440 /* Maximum of 16 bytes can be returned in the general registers. Examples
1441 of 16-byte return values are: 128-bit integers and 16-byte small
1442 structures (excluding homogeneous floating-point aggregates). */
1443 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1446 /* Up to four fp/simd registers can return a function value, e.g. a
1447 homogeneous floating-point aggregate having four members. */
1448 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1449 return !TARGET_GENERAL_REGS_ONLY
;
1454 /* Implement TARGET_RETURN_IN_MEMORY.
1456 If the type T of the result of a function is such that
1458 would require that arg be passed as a value in a register (or set of
1459 registers) according to the parameter passing rules, then the result
1460 is returned in the same registers as would be used for such an
1464 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1467 enum machine_mode ag_mode
;
1470 if (!AGGREGATE_TYPE_P (type
)
1471 && TREE_CODE (type
) != COMPLEX_TYPE
1472 && TREE_CODE (type
) != VECTOR_TYPE
)
1473 /* Simple scalar types always returned in registers. */
1476 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1483 /* Types larger than 2 registers returned in memory. */
1484 size
= int_size_in_bytes (type
);
1485 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1489 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1490 const_tree type
, int *nregs
)
1492 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1493 return aarch64_vfp_is_call_or_return_candidate (mode
,
1495 &pcum
->aapcs_vfp_rmode
,
1500 /* Given MODE and TYPE of a function argument, return the alignment in
1501 bits. The idea is to suppress any stronger alignment requested by
1502 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1503 This is a helper function for local use only. */
1506 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1508 unsigned int alignment
;
1512 if (!integer_zerop (TYPE_SIZE (type
)))
1514 if (TYPE_MODE (type
) == mode
)
1515 alignment
= TYPE_ALIGN (type
);
1517 alignment
= GET_MODE_ALIGNMENT (mode
);
1523 alignment
= GET_MODE_ALIGNMENT (mode
);
1528 /* Layout a function argument according to the AAPCS64 rules. The rule
1529 numbers refer to the rule numbers in the AAPCS64. */
1532 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1534 bool named ATTRIBUTE_UNUSED
)
1536 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1537 int ncrn
, nvrn
, nregs
;
1538 bool allocate_ncrn
, allocate_nvrn
;
1541 /* We need to do this once per argument. */
1542 if (pcum
->aapcs_arg_processed
)
1545 pcum
->aapcs_arg_processed
= true;
1547 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1549 = AARCH64_ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1552 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1553 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1558 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1559 The following code thus handles passing by SIMD/FP registers first. */
1561 nvrn
= pcum
->aapcs_nvrn
;
1563 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1564 and homogenous short-vector aggregates (HVA). */
1567 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1569 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1570 if (!aarch64_composite_type_p (type
, mode
))
1572 gcc_assert (nregs
== 1);
1573 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1579 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1580 for (i
= 0; i
< nregs
; i
++)
1582 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1583 V0_REGNUM
+ nvrn
+ i
);
1584 tmp
= gen_rtx_EXPR_LIST
1586 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1587 XVECEXP (par
, 0, i
) = tmp
;
1589 pcum
->aapcs_reg
= par
;
1595 /* C.3 NSRN is set to 8. */
1596 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1601 ncrn
= pcum
->aapcs_ncrn
;
1602 nregs
= size
/ UNITS_PER_WORD
;
1604 /* C6 - C9. though the sign and zero extension semantics are
1605 handled elsewhere. This is the case where the argument fits
1606 entirely general registers. */
1607 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1609 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1611 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1613 /* C.8 if the argument has an alignment of 16 then the NGRN is
1614 rounded up to the next even number. */
1615 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1618 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1620 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1621 A reg is still generated for it, but the caller should be smart
1622 enough not to use it. */
1623 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1625 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1632 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1633 for (i
= 0; i
< nregs
; i
++)
1635 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1636 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1637 GEN_INT (i
* UNITS_PER_WORD
));
1638 XVECEXP (par
, 0, i
) = tmp
;
1640 pcum
->aapcs_reg
= par
;
1643 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1648 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1650 /* The argument is passed on stack; record the needed number of words for
1651 this argument and align the total size if necessary. */
1653 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1654 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1655 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1656 16 / UNITS_PER_WORD
);
1660 /* Implement TARGET_FUNCTION_ARG. */
1663 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1664 const_tree type
, bool named
)
1666 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1667 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1669 if (mode
== VOIDmode
)
1672 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1673 return pcum
->aapcs_reg
;
1677 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1678 const_tree fntype ATTRIBUTE_UNUSED
,
1679 rtx libname ATTRIBUTE_UNUSED
,
1680 const_tree fndecl ATTRIBUTE_UNUSED
,
1681 unsigned n_named ATTRIBUTE_UNUSED
)
1683 pcum
->aapcs_ncrn
= 0;
1684 pcum
->aapcs_nvrn
= 0;
1685 pcum
->aapcs_nextncrn
= 0;
1686 pcum
->aapcs_nextnvrn
= 0;
1687 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1688 pcum
->aapcs_reg
= NULL_RTX
;
1689 pcum
->aapcs_arg_processed
= false;
1690 pcum
->aapcs_stack_words
= 0;
1691 pcum
->aapcs_stack_size
= 0;
1697 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1698 enum machine_mode mode
,
1702 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1703 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1705 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1706 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1707 != (pcum
->aapcs_stack_words
!= 0));
1708 pcum
->aapcs_arg_processed
= false;
1709 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1710 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1711 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1712 pcum
->aapcs_stack_words
= 0;
1713 pcum
->aapcs_reg
= NULL_RTX
;
1718 aarch64_function_arg_regno_p (unsigned regno
)
1720 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1721 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1724 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1725 PARM_BOUNDARY bits of alignment, but will be given anything up
1726 to STACK_BOUNDARY bits if the type requires it. This makes sure
1727 that both before and after the layout of each argument, the Next
1728 Stacked Argument Address (NSAA) will have a minimum alignment of
1732 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1734 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1736 if (alignment
< PARM_BOUNDARY
)
1737 alignment
= PARM_BOUNDARY
;
1738 if (alignment
> STACK_BOUNDARY
)
1739 alignment
= STACK_BOUNDARY
;
1743 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1745 Return true if an argument passed on the stack should be padded upwards,
1746 i.e. if the least-significant byte of the stack slot has useful data.
1748 Small aggregate types are placed in the lowest memory address.
1750 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1753 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1755 /* On little-endian targets, the least significant byte of every stack
1756 argument is passed at the lowest byte address of the stack slot. */
1757 if (!BYTES_BIG_ENDIAN
)
1760 /* Otherwise, integral, floating-point and pointer types are padded downward:
1761 the least significant byte of a stack argument is passed at the highest
1762 byte address of the stack slot. */
1764 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1765 || POINTER_TYPE_P (type
))
1766 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1769 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1773 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1775 It specifies padding for the last (may also be the only)
1776 element of a block move between registers and memory. If
1777 assuming the block is in the memory, padding upward means that
1778 the last element is padded after its highest significant byte,
1779 while in downward padding, the last element is padded at the
1780 its least significant byte side.
1782 Small aggregates and small complex types are always padded
1785 We don't need to worry about homogeneous floating-point or
1786 short-vector aggregates; their move is not affected by the
1787 padding direction determined here. Regardless of endianness,
1788 each element of such an aggregate is put in the least
1789 significant bits of a fp/simd register.
1791 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1792 register has useful data, and return the opposite if the most
1793 significant byte does. */
1796 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1797 bool first ATTRIBUTE_UNUSED
)
1800 /* Small composite types are always padded upward. */
1801 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1803 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1804 : GET_MODE_SIZE (mode
));
1805 if (size
< 2 * UNITS_PER_WORD
)
1809 /* Otherwise, use the default padding. */
1810 return !BYTES_BIG_ENDIAN
;
1813 static enum machine_mode
1814 aarch64_libgcc_cmp_return_mode (void)
1820 aarch64_frame_pointer_required (void)
1822 /* In aarch64_override_options_after_change
1823 flag_omit_leaf_frame_pointer turns off the frame pointer by
1824 default. Turn it back on now if we've not got a leaf
1826 if (flag_omit_leaf_frame_pointer
1827 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
1833 /* Mark the registers that need to be saved by the callee and calculate
1834 the size of the callee-saved registers area and frame record (both FP
1835 and LR may be omitted). */
1837 aarch64_layout_frame (void)
1839 HOST_WIDE_INT offset
= 0;
1842 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1845 #define SLOT_NOT_REQUIRED (-2)
1846 #define SLOT_REQUIRED (-1)
1848 cfun
->machine
->frame
.wb_candidate1
= FIRST_PSEUDO_REGISTER
;
1849 cfun
->machine
->frame
.wb_candidate2
= FIRST_PSEUDO_REGISTER
;
1851 /* First mark all the registers that really need to be saved... */
1852 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1853 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
1855 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1856 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
1858 /* ... that includes the eh data registers (if needed)... */
1859 if (crtl
->calls_eh_return
)
1860 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1861 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
1864 /* ... and any callee saved register that dataflow says is live. */
1865 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1866 if (df_regs_ever_live_p (regno
)
1867 && !call_used_regs
[regno
])
1868 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
1870 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1871 if (df_regs_ever_live_p (regno
)
1872 && !call_used_regs
[regno
])
1873 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
1875 if (frame_pointer_needed
)
1877 /* FP and LR are placed in the linkage record. */
1878 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1879 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
1880 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
1881 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
1882 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1883 offset
+= 2 * UNITS_PER_WORD
;
1886 /* Now assign stack slots for them. */
1887 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1888 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
1890 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1891 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
1892 cfun
->machine
->frame
.wb_candidate1
= regno
;
1893 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
)
1894 cfun
->machine
->frame
.wb_candidate2
= regno
;
1895 offset
+= UNITS_PER_WORD
;
1898 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1899 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
1901 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1902 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
1903 cfun
->machine
->frame
.wb_candidate1
= regno
;
1904 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
1905 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
1906 cfun
->machine
->frame
.wb_candidate2
= regno
;
1907 offset
+= UNITS_PER_WORD
;
1910 cfun
->machine
->frame
.padding0
=
1911 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1912 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1914 cfun
->machine
->frame
.saved_regs_size
= offset
;
1916 cfun
->machine
->frame
.hard_fp_offset
1917 = AARCH64_ROUND_UP (cfun
->machine
->frame
.saved_varargs_size
1919 + cfun
->machine
->frame
.saved_regs_size
,
1920 STACK_BOUNDARY
/ BITS_PER_UNIT
);
1922 cfun
->machine
->frame
.frame_size
1923 = AARCH64_ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
1924 + crtl
->outgoing_args_size
,
1925 STACK_BOUNDARY
/ BITS_PER_UNIT
);
1927 cfun
->machine
->frame
.laid_out
= true;
1931 aarch64_register_saved_on_entry (int regno
)
1933 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
1937 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
1939 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
1945 aarch64_pushwb_single_reg (enum machine_mode mode
, unsigned regno
,
1946 HOST_WIDE_INT adjustment
)
1948 rtx base_rtx
= stack_pointer_rtx
;
1951 reg
= gen_rtx_REG (mode
, regno
);
1952 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
1953 plus_constant (Pmode
, base_rtx
, -adjustment
));
1954 mem
= gen_rtx_MEM (mode
, mem
);
1956 insn
= emit_move_insn (mem
, reg
);
1957 RTX_FRAME_RELATED_P (insn
) = 1;
1961 aarch64_gen_storewb_pair (enum machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
1962 HOST_WIDE_INT adjustment
)
1967 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
1968 GEN_INT (-adjustment
),
1969 GEN_INT (UNITS_PER_WORD
- adjustment
));
1971 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
1972 GEN_INT (-adjustment
),
1973 GEN_INT (UNITS_PER_WORD
- adjustment
));
1980 aarch64_pushwb_pair_reg (enum machine_mode mode
, unsigned regno1
,
1981 unsigned regno2
, HOST_WIDE_INT adjustment
)
1984 rtx reg1
= gen_rtx_REG (mode
, regno1
);
1985 rtx reg2
= gen_rtx_REG (mode
, regno2
);
1987 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
1989 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
1990 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
1991 RTX_FRAME_RELATED_P (insn
) = 1;
1995 aarch64_gen_loadwb_pair (enum machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
1996 HOST_WIDE_INT adjustment
)
2001 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2002 GEN_INT (UNITS_PER_WORD
));
2004 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2005 GEN_INT (UNITS_PER_WORD
));
2012 aarch64_gen_store_pair (enum machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
2018 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
2021 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
2029 aarch64_gen_load_pair (enum machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
2035 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
2038 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
2047 aarch64_save_callee_saves (enum machine_mode mode
, HOST_WIDE_INT start_offset
,
2048 unsigned start
, unsigned limit
, bool skip_wb
)
2051 rtx (*gen_mem_ref
) (enum machine_mode
, rtx
) = (frame_pointer_needed
2052 ? gen_frame_mem
: gen_rtx_MEM
);
2056 for (regno
= aarch64_next_callee_save (start
, limit
);
2058 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2061 HOST_WIDE_INT offset
;
2064 && (regno
== cfun
->machine
->frame
.wb_candidate1
2065 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2068 reg
= gen_rtx_REG (mode
, regno
);
2069 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2070 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2073 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2076 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2077 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2080 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2083 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2084 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2086 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
2089 /* The first part of a frame-related parallel insn is
2090 always assumed to be relevant to the frame
2091 calculations; subsequent parts, are only
2092 frame-related if explicitly marked. */
2093 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2097 insn
= emit_move_insn (mem
, reg
);
2099 RTX_FRAME_RELATED_P (insn
) = 1;
2104 aarch64_restore_callee_saves (enum machine_mode mode
,
2105 HOST_WIDE_INT start_offset
, unsigned start
,
2106 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
2108 rtx base_rtx
= stack_pointer_rtx
;
2109 rtx (*gen_mem_ref
) (enum machine_mode
, rtx
) = (frame_pointer_needed
2110 ? gen_frame_mem
: gen_rtx_MEM
);
2113 HOST_WIDE_INT offset
;
2115 for (regno
= aarch64_next_callee_save (start
, limit
);
2117 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2122 && (regno
== cfun
->machine
->frame
.wb_candidate1
2123 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2126 reg
= gen_rtx_REG (mode
, regno
);
2127 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2128 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2130 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2133 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2134 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2136 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2139 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2140 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2141 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
2143 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
2147 emit_move_insn (reg
, mem
);
2148 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
2152 /* AArch64 stack frames generated by this compiler look like:
2154 +-------------------------------+
2156 | incoming stack arguments |
2158 +-------------------------------+
2159 | | <-- incoming stack pointer (aligned)
2160 | callee-allocated save area |
2161 | for register varargs |
2163 +-------------------------------+
2164 | local variables | <-- frame_pointer_rtx
2166 +-------------------------------+
2168 +-------------------------------+ |
2169 | callee-saved registers | | frame.saved_regs_size
2170 +-------------------------------+ |
2172 +-------------------------------+ |
2173 | FP' | / <- hard_frame_pointer_rtx (aligned)
2174 +-------------------------------+
2175 | dynamic allocation |
2176 +-------------------------------+
2178 +-------------------------------+
2179 | outgoing stack arguments | <-- arg_pointer
2181 +-------------------------------+
2182 | | <-- stack_pointer_rtx (aligned)
2184 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2185 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2188 /* Generate the prologue instructions for entry into a function.
2189 Establish the stack frame by decreasing the stack pointer with a
2190 properly calculated size and, if necessary, create a frame record
2191 filled with the values of LR and previous frame pointer. The
2192 current FP is also set up if it is in use. */
2195 aarch64_expand_prologue (void)
2197 /* sub sp, sp, #<frame_size>
2198 stp {fp, lr}, [sp, #<frame_size> - 16]
2199 add fp, sp, #<frame_size> - hardfp_offset
2200 stp {cs_reg}, [fp, #-16] etc.
2202 sub sp, sp, <final_adjustment_if_any>
2204 HOST_WIDE_INT frame_size
, offset
;
2205 HOST_WIDE_INT fp_offset
; /* Offset from hard FP to SP. */
2206 HOST_WIDE_INT hard_fp_offset
;
2209 aarch64_layout_frame ();
2211 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2212 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2213 fp_offset
= frame_size
- hard_fp_offset
;
2215 if (flag_stack_usage_info
)
2216 current_function_static_stack_size
= frame_size
;
2218 /* Store pairs and load pairs have a range only -512 to 504. */
2221 /* When the frame has a large size, an initial decrease is done on
2222 the stack pointer to jump over the callee-allocated save area for
2223 register varargs, the local variable area and/or the callee-saved
2224 register area. This will allow the pre-index write-back
2225 store pair instructions to be used for setting up the stack frame
2227 offset
= hard_fp_offset
;
2229 offset
= cfun
->machine
->frame
.saved_regs_size
;
2231 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2234 if (frame_size
>= 0x1000000)
2236 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2237 emit_move_insn (op0
, GEN_INT (-frame_size
));
2238 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2240 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2241 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
2242 plus_constant (Pmode
, stack_pointer_rtx
,
2244 RTX_FRAME_RELATED_P (insn
) = 1;
2246 else if (frame_size
> 0)
2248 int hi_ofs
= frame_size
& 0xfff000;
2249 int lo_ofs
= frame_size
& 0x000fff;
2253 insn
= emit_insn (gen_add2_insn
2254 (stack_pointer_rtx
, GEN_INT (-hi_ofs
)));
2255 RTX_FRAME_RELATED_P (insn
) = 1;
2259 insn
= emit_insn (gen_add2_insn
2260 (stack_pointer_rtx
, GEN_INT (-lo_ofs
)));
2261 RTX_FRAME_RELATED_P (insn
) = 1;
2270 bool skip_wb
= false;
2272 if (frame_pointer_needed
)
2278 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2279 GEN_INT (-offset
)));
2280 RTX_FRAME_RELATED_P (insn
) = 1;
2282 aarch64_save_callee_saves (DImode
, fp_offset
, R29_REGNUM
,
2286 aarch64_pushwb_pair_reg (DImode
, R29_REGNUM
, R30_REGNUM
, offset
);
2288 /* Set up frame pointer to point to the location of the
2289 previous frame pointer on the stack. */
2290 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2292 GEN_INT (fp_offset
)));
2293 RTX_FRAME_RELATED_P (insn
) = 1;
2294 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
2298 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2299 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2302 || reg1
== FIRST_PSEUDO_REGISTER
2303 || (reg2
== FIRST_PSEUDO_REGISTER
2306 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2307 GEN_INT (-offset
)));
2308 RTX_FRAME_RELATED_P (insn
) = 1;
2312 enum machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2316 if (reg2
== FIRST_PSEUDO_REGISTER
)
2317 aarch64_pushwb_single_reg (mode1
, reg1
, offset
);
2319 aarch64_pushwb_pair_reg (mode1
, reg1
, reg2
, offset
);
2323 aarch64_save_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2325 aarch64_save_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2329 /* when offset >= 512,
2330 sub sp, sp, #<outgoing_args_size> */
2331 if (frame_size
> -1)
2333 if (crtl
->outgoing_args_size
> 0)
2335 insn
= emit_insn (gen_add2_insn
2337 GEN_INT (- crtl
->outgoing_args_size
)));
2338 RTX_FRAME_RELATED_P (insn
) = 1;
2343 /* Return TRUE if we can use a simple_return insn.
2345 This function checks whether the callee saved stack is empty, which
2346 means no restore actions are need. The pro_and_epilogue will use
2347 this to check whether shrink-wrapping opt is feasible. */
2350 aarch64_use_return_insn_p (void)
2352 if (!reload_completed
)
2358 aarch64_layout_frame ();
2360 return cfun
->machine
->frame
.frame_size
== 0;
2363 /* Generate the epilogue instructions for returning from a function. */
2365 aarch64_expand_epilogue (bool for_sibcall
)
2367 HOST_WIDE_INT frame_size
, offset
;
2368 HOST_WIDE_INT fp_offset
;
2369 HOST_WIDE_INT hard_fp_offset
;
2372 aarch64_layout_frame ();
2374 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2375 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2376 fp_offset
= frame_size
- hard_fp_offset
;
2378 /* Store pairs and load pairs have a range only -512 to 504. */
2381 offset
= hard_fp_offset
;
2383 offset
= cfun
->machine
->frame
.saved_regs_size
;
2385 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2387 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2389 insn
= emit_insn (gen_add2_insn
2391 GEN_INT (crtl
->outgoing_args_size
)));
2392 RTX_FRAME_RELATED_P (insn
) = 1;
2398 /* If there were outgoing arguments or we've done dynamic stack
2399 allocation, then restore the stack pointer from the frame
2400 pointer. This is at most one insn and more efficient than using
2401 GCC's internal mechanism. */
2402 if (frame_pointer_needed
2403 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2405 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2406 hard_frame_pointer_rtx
,
2408 offset
= offset
- fp_offset
;
2413 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2414 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2415 bool skip_wb
= true;
2418 if (frame_pointer_needed
)
2421 || reg1
== FIRST_PSEUDO_REGISTER
2422 || (reg2
== FIRST_PSEUDO_REGISTER
2426 aarch64_restore_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2428 aarch64_restore_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2433 enum machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2434 rtx rreg1
= gen_rtx_REG (mode1
, reg1
);
2436 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg1
, cfi_ops
);
2437 if (reg2
== FIRST_PSEUDO_REGISTER
)
2439 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
2440 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
2441 mem
= gen_rtx_MEM (mode1
, mem
);
2442 insn
= emit_move_insn (rreg1
, mem
);
2446 rtx rreg2
= gen_rtx_REG (mode1
, reg2
);
2448 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg2
, cfi_ops
);
2449 insn
= emit_insn (aarch64_gen_loadwb_pair
2450 (mode1
, stack_pointer_rtx
, rreg1
,
2456 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2460 /* Reset the CFA to be SP + FRAME_SIZE. */
2461 rtx new_cfa
= stack_pointer_rtx
;
2463 new_cfa
= plus_constant (Pmode
, new_cfa
, frame_size
);
2464 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
2465 REG_NOTES (insn
) = cfi_ops
;
2466 RTX_FRAME_RELATED_P (insn
) = 1;
2471 if (frame_size
>= 0x1000000)
2473 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2474 emit_move_insn (op0
, GEN_INT (frame_size
));
2475 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2479 int hi_ofs
= frame_size
& 0xfff000;
2480 int lo_ofs
= frame_size
& 0x000fff;
2482 if (hi_ofs
&& lo_ofs
)
2484 insn
= emit_insn (gen_add2_insn
2485 (stack_pointer_rtx
, GEN_INT (hi_ofs
)));
2486 RTX_FRAME_RELATED_P (insn
) = 1;
2487 frame_size
= lo_ofs
;
2489 insn
= emit_insn (gen_add2_insn
2490 (stack_pointer_rtx
, GEN_INT (frame_size
)));
2493 /* Reset the CFA to be SP + 0. */
2494 add_reg_note (insn
, REG_CFA_DEF_CFA
, stack_pointer_rtx
);
2495 RTX_FRAME_RELATED_P (insn
) = 1;
2498 /* Stack adjustment for exception handler. */
2499 if (crtl
->calls_eh_return
)
2501 /* We need to unwind the stack by the offset computed by
2502 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2503 to be SP; letting the CFA move during this adjustment
2504 is just as correct as retaining the CFA from the body
2505 of the function. Therefore, do nothing special. */
2506 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2509 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2511 emit_jump_insn (ret_rtx
);
2514 /* Return the place to copy the exception unwinding return address to.
2515 This will probably be a stack slot, but could (in theory be the
2516 return register). */
2518 aarch64_final_eh_return_addr (void)
2520 HOST_WIDE_INT fp_offset
;
2522 aarch64_layout_frame ();
2524 fp_offset
= cfun
->machine
->frame
.frame_size
2525 - cfun
->machine
->frame
.hard_fp_offset
;
2527 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2528 return gen_rtx_REG (DImode
, LR_REGNUM
);
2530 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2531 result in a store to save LR introduced by builtin_eh_return () being
2532 incorrectly deleted because the alias is not detected.
2533 So in the calculation of the address to copy the exception unwinding
2534 return address to, we note 2 cases.
2535 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2536 we return a SP-relative location since all the addresses are SP-relative
2537 in this case. This prevents the store from being optimized away.
2538 If the fp_offset is not 0, then the addresses will be FP-relative and
2539 therefore we return a FP-relative location. */
2541 if (frame_pointer_needed
)
2544 return gen_frame_mem (DImode
,
2545 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2547 return gen_frame_mem (DImode
,
2548 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2551 /* If FP is not needed, we calculate the location of LR, which would be
2552 at the top of the saved registers block. */
2554 return gen_frame_mem (DImode
,
2555 plus_constant (Pmode
,
2558 + cfun
->machine
->frame
.saved_regs_size
2559 - 2 * UNITS_PER_WORD
));
2562 /* Possibly output code to build up a constant in a register. For
2563 the benefit of the costs infrastructure, returns the number of
2564 instructions which would be emitted. GENERATE inhibits or
2565 enables code generation. */
2568 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2572 if (aarch64_bitmask_imm (val
, DImode
))
2575 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2583 HOST_WIDE_INT valp
= val
>> 16;
2587 for (i
= 16; i
< 64; i
+= 16)
2589 valm
= (valp
& 0xffff);
2600 /* zcount contains the number of additional MOVK instructions
2601 required if the constant is built up with an initial MOVZ instruction,
2602 while ncount is the number of MOVK instructions required if starting
2603 with a MOVN instruction. Choose the sequence that yields the fewest
2604 number of instructions, preferring MOVZ instructions when they are both
2606 if (ncount
< zcount
)
2609 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2610 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2617 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2618 GEN_INT (val
& 0xffff));
2625 for (i
= 16; i
< 64; i
+= 16)
2627 if ((val
& 0xffff) != tval
)
2630 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2632 GEN_INT (val
& 0xffff)));
2642 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2644 HOST_WIDE_INT mdelta
= delta
;
2645 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2646 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2651 if (mdelta
>= 4096 * 4096)
2653 (void) aarch64_build_constant (scratchreg
, delta
, true);
2654 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2656 else if (mdelta
> 0)
2660 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2661 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2663 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2664 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2666 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2667 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2669 if (mdelta
% 4096 != 0)
2671 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2672 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2673 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2678 /* Output code to add DELTA to the first argument, and then jump
2679 to FUNCTION. Used for C++ multiple inheritance. */
2681 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2682 HOST_WIDE_INT delta
,
2683 HOST_WIDE_INT vcall_offset
,
2686 /* The this pointer is always in x0. Note that this differs from
2687 Arm where the this pointer maybe bumped to r1 if r0 is required
2688 to return a pointer to an aggregate. On AArch64 a result value
2689 pointer will be in x8. */
2690 int this_regno
= R0_REGNUM
;
2691 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
2694 reload_completed
= 1;
2695 emit_note (NOTE_INSN_PROLOGUE_END
);
2697 if (vcall_offset
== 0)
2698 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2701 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2703 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2704 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2705 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2710 if (delta
>= -256 && delta
< 256)
2711 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2712 plus_constant (Pmode
, this_rtx
, delta
));
2714 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2717 if (Pmode
== ptr_mode
)
2718 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2720 aarch64_emit_move (temp0
,
2721 gen_rtx_ZERO_EXTEND (Pmode
,
2722 gen_rtx_MEM (ptr_mode
, addr
)));
2724 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2725 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2728 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2729 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2732 if (Pmode
== ptr_mode
)
2733 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2735 aarch64_emit_move (temp1
,
2736 gen_rtx_SIGN_EXTEND (Pmode
,
2737 gen_rtx_MEM (ptr_mode
, addr
)));
2739 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2742 /* Generate a tail call to the target function. */
2743 if (!TREE_USED (function
))
2745 assemble_external (function
);
2746 TREE_USED (function
) = 1;
2748 funexp
= XEXP (DECL_RTL (function
), 0);
2749 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2750 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2751 SIBLING_CALL_P (insn
) = 1;
2753 insn
= get_insns ();
2754 shorten_branches (insn
);
2755 final_start_function (insn
, file
, 1);
2756 final (insn
, file
, 1);
2757 final_end_function ();
2759 /* Stop pretending to be a post-reload pass. */
2760 reload_completed
= 0;
2764 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2766 if (GET_CODE (*x
) == SYMBOL_REF
)
2767 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2769 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2770 TLS offsets, not real symbol references. */
2771 if (GET_CODE (*x
) == UNSPEC
2772 && XINT (*x
, 1) == UNSPEC_TLS
)
2779 aarch64_tls_referenced_p (rtx x
)
2781 if (!TARGET_HAVE_TLS
)
2784 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2789 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2791 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2792 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2803 aarch64_build_bitmask_table (void)
2805 unsigned HOST_WIDE_INT mask
, imm
;
2806 unsigned int log_e
, e
, s
, r
;
2807 unsigned int nimms
= 0;
2809 for (log_e
= 1; log_e
<= 6; log_e
++)
2813 mask
= ~(HOST_WIDE_INT
) 0;
2815 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2816 for (s
= 1; s
< e
; s
++)
2818 for (r
= 0; r
< e
; r
++)
2820 /* set s consecutive bits to 1 (s < 64) */
2821 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2822 /* rotate right by r */
2824 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2825 /* replicate the constant depending on SIMD size */
2827 case 1: imm
|= (imm
<< 2);
2828 case 2: imm
|= (imm
<< 4);
2829 case 3: imm
|= (imm
<< 8);
2830 case 4: imm
|= (imm
<< 16);
2831 case 5: imm
|= (imm
<< 32);
2837 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2838 aarch64_bitmasks
[nimms
++] = imm
;
2843 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2844 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2845 aarch64_bitmasks_cmp
);
2849 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2850 a left shift of 0 or 12 bits. */
2852 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2854 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2855 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2860 /* Return true if val is an immediate that can be loaded into a
2861 register by a MOVZ instruction. */
2863 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2865 if (GET_MODE_SIZE (mode
) > 4)
2867 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2868 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2873 /* Ignore sign extension. */
2874 val
&= (HOST_WIDE_INT
) 0xffffffff;
2876 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2877 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2881 /* Return true if val is a valid bitmask immediate. */
2883 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2885 if (GET_MODE_SIZE (mode
) < 8)
2887 /* Replicate bit pattern. */
2888 val
&= (HOST_WIDE_INT
) 0xffffffff;
2891 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2892 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2896 /* Return true if val is an immediate that can be loaded into a
2897 register in a single instruction. */
2899 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2901 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2903 return aarch64_bitmask_imm (val
, mode
);
2907 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2911 if (GET_CODE (x
) == HIGH
)
2914 split_const (x
, &base
, &offset
);
2915 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2917 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2918 != SYMBOL_FORCE_TO_MEM
)
2921 /* Avoid generating a 64-bit relocation in ILP32; leave
2922 to aarch64_expand_mov_immediate to handle it properly. */
2923 return mode
!= ptr_mode
;
2926 return aarch64_tls_referenced_p (x
);
2929 /* Return true if register REGNO is a valid index register.
2930 STRICT_P is true if REG_OK_STRICT is in effect. */
2933 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2935 if (!HARD_REGISTER_NUM_P (regno
))
2943 regno
= reg_renumber
[regno
];
2945 return GP_REGNUM_P (regno
);
2948 /* Return true if register REGNO is a valid base register for mode MODE.
2949 STRICT_P is true if REG_OK_STRICT is in effect. */
2952 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2954 if (!HARD_REGISTER_NUM_P (regno
))
2962 regno
= reg_renumber
[regno
];
2965 /* The fake registers will be eliminated to either the stack or
2966 hard frame pointer, both of which are usually valid base registers.
2967 Reload deals with the cases where the eliminated form isn't valid. */
2968 return (GP_REGNUM_P (regno
)
2969 || regno
== SP_REGNUM
2970 || regno
== FRAME_POINTER_REGNUM
2971 || regno
== ARG_POINTER_REGNUM
);
2974 /* Return true if X is a valid base register for mode MODE.
2975 STRICT_P is true if REG_OK_STRICT is in effect. */
2978 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2980 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2983 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2986 /* Return true if address offset is a valid index. If it is, fill in INFO
2987 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2990 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2991 enum machine_mode mode
, bool strict_p
)
2993 enum aarch64_address_type type
;
2998 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2999 && GET_MODE (x
) == Pmode
)
3001 type
= ADDRESS_REG_REG
;
3005 /* (sign_extend:DI (reg:SI)) */
3006 else if ((GET_CODE (x
) == SIGN_EXTEND
3007 || GET_CODE (x
) == ZERO_EXTEND
)
3008 && GET_MODE (x
) == DImode
3009 && GET_MODE (XEXP (x
, 0)) == SImode
)
3011 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3012 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3013 index
= XEXP (x
, 0);
3016 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3017 else if (GET_CODE (x
) == MULT
3018 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3019 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3020 && GET_MODE (XEXP (x
, 0)) == DImode
3021 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3022 && CONST_INT_P (XEXP (x
, 1)))
3024 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3025 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3026 index
= XEXP (XEXP (x
, 0), 0);
3027 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3029 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3030 else if (GET_CODE (x
) == ASHIFT
3031 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3032 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3033 && GET_MODE (XEXP (x
, 0)) == DImode
3034 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3035 && CONST_INT_P (XEXP (x
, 1)))
3037 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3038 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3039 index
= XEXP (XEXP (x
, 0), 0);
3040 shift
= INTVAL (XEXP (x
, 1));
3042 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3043 else if ((GET_CODE (x
) == SIGN_EXTRACT
3044 || GET_CODE (x
) == ZERO_EXTRACT
)
3045 && GET_MODE (x
) == DImode
3046 && GET_CODE (XEXP (x
, 0)) == MULT
3047 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3048 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3050 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3051 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3052 index
= XEXP (XEXP (x
, 0), 0);
3053 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3054 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3055 || INTVAL (XEXP (x
, 2)) != 0)
3058 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3059 (const_int 0xffffffff<<shift)) */
3060 else if (GET_CODE (x
) == AND
3061 && GET_MODE (x
) == DImode
3062 && GET_CODE (XEXP (x
, 0)) == MULT
3063 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3064 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3065 && CONST_INT_P (XEXP (x
, 1)))
3067 type
= ADDRESS_REG_UXTW
;
3068 index
= XEXP (XEXP (x
, 0), 0);
3069 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3070 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3073 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3074 else if ((GET_CODE (x
) == SIGN_EXTRACT
3075 || GET_CODE (x
) == ZERO_EXTRACT
)
3076 && GET_MODE (x
) == DImode
3077 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3078 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3079 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3081 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3082 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3083 index
= XEXP (XEXP (x
, 0), 0);
3084 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3085 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3086 || INTVAL (XEXP (x
, 2)) != 0)
3089 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3090 (const_int 0xffffffff<<shift)) */
3091 else if (GET_CODE (x
) == AND
3092 && GET_MODE (x
) == DImode
3093 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3094 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3095 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3096 && CONST_INT_P (XEXP (x
, 1)))
3098 type
= ADDRESS_REG_UXTW
;
3099 index
= XEXP (XEXP (x
, 0), 0);
3100 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3101 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3104 /* (mult:P (reg:P) (const_int scale)) */
3105 else if (GET_CODE (x
) == MULT
3106 && GET_MODE (x
) == Pmode
3107 && GET_MODE (XEXP (x
, 0)) == Pmode
3108 && CONST_INT_P (XEXP (x
, 1)))
3110 type
= ADDRESS_REG_REG
;
3111 index
= XEXP (x
, 0);
3112 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3114 /* (ashift:P (reg:P) (const_int shift)) */
3115 else if (GET_CODE (x
) == ASHIFT
3116 && GET_MODE (x
) == Pmode
3117 && GET_MODE (XEXP (x
, 0)) == Pmode
3118 && CONST_INT_P (XEXP (x
, 1)))
3120 type
= ADDRESS_REG_REG
;
3121 index
= XEXP (x
, 0);
3122 shift
= INTVAL (XEXP (x
, 1));
3127 if (GET_CODE (index
) == SUBREG
)
3128 index
= SUBREG_REG (index
);
3131 (shift
> 0 && shift
<= 3
3132 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3134 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3137 info
->offset
= index
;
3138 info
->shift
= shift
;
3146 aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3148 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3149 && offset
< 64 * GET_MODE_SIZE (mode
)
3150 && offset
% GET_MODE_SIZE (mode
) == 0);
3154 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3155 HOST_WIDE_INT offset
)
3157 return offset
>= -256 && offset
< 256;
3161 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3164 && offset
< 4096 * GET_MODE_SIZE (mode
)
3165 && offset
% GET_MODE_SIZE (mode
) == 0);
3168 /* Return true if X is a valid address for machine mode MODE. If it is,
3169 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3170 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3173 aarch64_classify_address (struct aarch64_address_info
*info
,
3174 rtx x
, enum machine_mode mode
,
3175 RTX_CODE outer_code
, bool strict_p
)
3177 enum rtx_code code
= GET_CODE (x
);
3179 bool allow_reg_index_p
=
3180 outer_code
!= PARALLEL
&& (GET_MODE_SIZE (mode
) != 16
3181 || aarch64_vector_mode_supported_p (mode
));
3182 /* Don't support anything other than POST_INC or REG addressing for
3184 if (aarch64_vect_struct_mode_p (mode
)
3185 && (code
!= POST_INC
&& code
!= REG
))
3192 info
->type
= ADDRESS_REG_IMM
;
3194 info
->offset
= const0_rtx
;
3195 return aarch64_base_register_rtx_p (x
, strict_p
);
3203 && (op0
== virtual_stack_vars_rtx
3204 || op0
== frame_pointer_rtx
3205 || op0
== arg_pointer_rtx
)
3206 && CONST_INT_P (op1
))
3208 info
->type
= ADDRESS_REG_IMM
;
3215 if (GET_MODE_SIZE (mode
) != 0
3216 && CONST_INT_P (op1
)
3217 && aarch64_base_register_rtx_p (op0
, strict_p
))
3219 HOST_WIDE_INT offset
= INTVAL (op1
);
3221 info
->type
= ADDRESS_REG_IMM
;
3225 /* TImode and TFmode values are allowed in both pairs of X
3226 registers and individual Q registers. The available
3228 X,X: 7-bit signed scaled offset
3229 Q: 9-bit signed offset
3230 We conservatively require an offset representable in either mode.
3232 if (mode
== TImode
|| mode
== TFmode
)
3233 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3234 && offset_9bit_signed_unscaled_p (mode
, offset
));
3236 if (outer_code
== PARALLEL
)
3237 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3238 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3240 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3241 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3244 if (allow_reg_index_p
)
3246 /* Look for base + (scaled/extended) index register. */
3247 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3248 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3253 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3254 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3267 info
->type
= ADDRESS_REG_WB
;
3268 info
->base
= XEXP (x
, 0);
3269 info
->offset
= NULL_RTX
;
3270 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3274 info
->type
= ADDRESS_REG_WB
;
3275 info
->base
= XEXP (x
, 0);
3276 if (GET_CODE (XEXP (x
, 1)) == PLUS
3277 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3278 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3279 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3281 HOST_WIDE_INT offset
;
3282 info
->offset
= XEXP (XEXP (x
, 1), 1);
3283 offset
= INTVAL (info
->offset
);
3285 /* TImode and TFmode values are allowed in both pairs of X
3286 registers and individual Q registers. The available
3288 X,X: 7-bit signed scaled offset
3289 Q: 9-bit signed offset
3290 We conservatively require an offset representable in either mode.
3292 if (mode
== TImode
|| mode
== TFmode
)
3293 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3294 && offset_9bit_signed_unscaled_p (mode
, offset
));
3296 if (outer_code
== PARALLEL
)
3297 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3298 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3300 return offset_9bit_signed_unscaled_p (mode
, offset
);
3307 /* load literal: pc-relative constant pool entry. Only supported
3308 for SI mode or larger. */
3309 info
->type
= ADDRESS_SYMBOLIC
;
3310 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3314 split_const (x
, &sym
, &addend
);
3315 return (GET_CODE (sym
) == LABEL_REF
3316 || (GET_CODE (sym
) == SYMBOL_REF
3317 && CONSTANT_POOL_ADDRESS_P (sym
)));
3322 info
->type
= ADDRESS_LO_SUM
;
3323 info
->base
= XEXP (x
, 0);
3324 info
->offset
= XEXP (x
, 1);
3325 if (allow_reg_index_p
3326 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3329 split_const (info
->offset
, &sym
, &offs
);
3330 if (GET_CODE (sym
) == SYMBOL_REF
3331 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3332 == SYMBOL_SMALL_ABSOLUTE
))
3334 /* The symbol and offset must be aligned to the access size. */
3336 unsigned int ref_size
;
3338 if (CONSTANT_POOL_ADDRESS_P (sym
))
3339 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3340 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3342 tree exp
= SYMBOL_REF_DECL (sym
);
3343 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3344 align
= CONSTANT_ALIGNMENT (exp
, align
);
3346 else if (SYMBOL_REF_DECL (sym
))
3347 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3348 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3349 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3350 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3352 align
= BITS_PER_UNIT
;
3354 ref_size
= GET_MODE_SIZE (mode
);
3356 ref_size
= GET_MODE_SIZE (DImode
);
3358 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3359 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3370 aarch64_symbolic_address_p (rtx x
)
3374 split_const (x
, &x
, &offset
);
3375 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3378 /* Classify the base of symbolic expression X, given that X appears in
3381 enum aarch64_symbol_type
3382 aarch64_classify_symbolic_expression (rtx x
,
3383 enum aarch64_symbol_context context
)
3387 split_const (x
, &x
, &offset
);
3388 return aarch64_classify_symbol (x
, context
);
3392 /* Return TRUE if X is a legitimate address for accessing memory in
3395 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3397 struct aarch64_address_info addr
;
3399 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3402 /* Return TRUE if X is a legitimate address for accessing memory in
3403 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3406 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3407 RTX_CODE outer_code
, bool strict_p
)
3409 struct aarch64_address_info addr
;
3411 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3414 /* Return TRUE if rtx X is immediate constant 0.0 */
3416 aarch64_float_const_zero_rtx_p (rtx x
)
3420 if (GET_MODE (x
) == VOIDmode
)
3423 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3424 if (REAL_VALUE_MINUS_ZERO (r
))
3425 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3426 return REAL_VALUES_EQUAL (r
, dconst0
);
3429 /* Return the fixed registers used for condition codes. */
3432 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3435 *p2
= INVALID_REGNUM
;
3439 /* Emit call insn with PAT and do aarch64-specific handling. */
3442 aarch64_emit_call_insn (rtx pat
)
3444 rtx insn
= emit_call_insn (pat
);
3446 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
3447 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
3448 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
3452 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3454 /* All floating point compares return CCFP if it is an equality
3455 comparison, and CCFPE otherwise. */
3456 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3483 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3485 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3486 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3487 || GET_CODE (x
) == NEG
))
3490 /* A compare with a shifted operand. Because of canonicalization,
3491 the comparison will have to be swapped when we emit the assembly
3493 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3494 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3495 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3496 || GET_CODE (x
) == LSHIFTRT
3497 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3500 /* Similarly for a negated operand, but we can only do this for
3502 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3503 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3504 && (code
== EQ
|| code
== NE
)
3505 && GET_CODE (x
) == NEG
)
3508 /* A compare of a mode narrower than SI mode against zero can be done
3509 by extending the value in the comparison. */
3510 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3512 /* Only use sign-extension if we really need it. */
3513 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3514 ? CC_SESWPmode
: CC_ZESWPmode
);
3516 /* For everything else, return CCmode. */
3521 aarch64_get_condition_code (rtx x
)
3523 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3524 enum rtx_code comp_code
= GET_CODE (x
);
3526 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3527 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3535 case GE
: return AARCH64_GE
;
3536 case GT
: return AARCH64_GT
;
3537 case LE
: return AARCH64_LS
;
3538 case LT
: return AARCH64_MI
;
3539 case NE
: return AARCH64_NE
;
3540 case EQ
: return AARCH64_EQ
;
3541 case ORDERED
: return AARCH64_VC
;
3542 case UNORDERED
: return AARCH64_VS
;
3543 case UNLT
: return AARCH64_LT
;
3544 case UNLE
: return AARCH64_LE
;
3545 case UNGT
: return AARCH64_HI
;
3546 case UNGE
: return AARCH64_PL
;
3554 case NE
: return AARCH64_NE
;
3555 case EQ
: return AARCH64_EQ
;
3556 case GE
: return AARCH64_GE
;
3557 case GT
: return AARCH64_GT
;
3558 case LE
: return AARCH64_LE
;
3559 case LT
: return AARCH64_LT
;
3560 case GEU
: return AARCH64_CS
;
3561 case GTU
: return AARCH64_HI
;
3562 case LEU
: return AARCH64_LS
;
3563 case LTU
: return AARCH64_CC
;
3573 case NE
: return AARCH64_NE
;
3574 case EQ
: return AARCH64_EQ
;
3575 case GE
: return AARCH64_LE
;
3576 case GT
: return AARCH64_LT
;
3577 case LE
: return AARCH64_GE
;
3578 case LT
: return AARCH64_GT
;
3579 case GEU
: return AARCH64_LS
;
3580 case GTU
: return AARCH64_CC
;
3581 case LEU
: return AARCH64_CS
;
3582 case LTU
: return AARCH64_HI
;
3590 case NE
: return AARCH64_NE
;
3591 case EQ
: return AARCH64_EQ
;
3592 case GE
: return AARCH64_PL
;
3593 case LT
: return AARCH64_MI
;
3601 case NE
: return AARCH64_NE
;
3602 case EQ
: return AARCH64_EQ
;
3614 aarch64_const_vec_all_same_in_range_p (rtx x
,
3615 HOST_WIDE_INT minval
,
3616 HOST_WIDE_INT maxval
)
3618 HOST_WIDE_INT firstval
;
3621 if (GET_CODE (x
) != CONST_VECTOR
3622 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
3625 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
3626 if (firstval
< minval
|| firstval
> maxval
)
3629 count
= CONST_VECTOR_NUNITS (x
);
3630 for (i
= 1; i
< count
; i
++)
3631 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
3638 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
3640 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
3644 bit_count (unsigned HOST_WIDE_INT value
)
3658 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3662 /* An integer or symbol address without a preceding # sign. */
3664 switch (GET_CODE (x
))
3667 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3671 output_addr_const (f
, x
);
3675 if (GET_CODE (XEXP (x
, 0)) == PLUS
3676 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3678 output_addr_const (f
, x
);
3684 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3689 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3693 if (!CONST_INT_P (x
)
3694 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3696 output_operand_lossage ("invalid operand for '%%%c'", code
);
3712 output_operand_lossage ("invalid operand for '%%%c'", code
);
3722 /* Print N such that 2^N == X. */
3723 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
3725 output_operand_lossage ("invalid operand for '%%%c'", code
);
3729 asm_fprintf (f
, "%d", n
);
3734 /* Print the number of non-zero bits in X (a const_int). */
3735 if (!CONST_INT_P (x
))
3737 output_operand_lossage ("invalid operand for '%%%c'", code
);
3741 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3745 /* Print the higher numbered register of a pair (TImode) of regs. */
3746 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
3748 output_operand_lossage ("invalid operand for '%%%c'", code
);
3752 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3758 /* Print a condition (eq, ne, etc). */
3760 /* CONST_TRUE_RTX means always -- that's the default. */
3761 if (x
== const_true_rtx
)
3764 if (!COMPARISON_P (x
))
3766 output_operand_lossage ("invalid operand for '%%%c'", code
);
3770 cond_code
= aarch64_get_condition_code (x
);
3771 gcc_assert (cond_code
>= 0);
3772 fputs (aarch64_condition_codes
[cond_code
], f
);
3779 /* Print the inverse of a condition (eq <-> ne, etc). */
3781 /* CONST_TRUE_RTX means never -- that's the default. */
3782 if (x
== const_true_rtx
)
3788 if (!COMPARISON_P (x
))
3790 output_operand_lossage ("invalid operand for '%%%c'", code
);
3793 cond_code
= aarch64_get_condition_code (x
);
3794 gcc_assert (cond_code
>= 0);
3795 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3805 /* Print a scalar FP/SIMD register name. */
3806 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3808 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3811 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3818 /* Print the first FP/SIMD register name in a list. */
3819 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3821 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3824 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3828 /* Print bottom 16 bits of integer constant in hex. */
3829 if (!CONST_INT_P (x
))
3831 output_operand_lossage ("invalid operand for '%%%c'", code
);
3834 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3839 /* Print a general register name or the zero register (32-bit or
3842 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3844 asm_fprintf (f
, "%czr", code
);
3848 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3850 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3854 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3856 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3863 /* Print a normal operand, if it's a general register, then we
3867 output_operand_lossage ("missing operand");
3871 switch (GET_CODE (x
))
3874 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3878 aarch64_memory_reference_mode
= GET_MODE (x
);
3879 output_address (XEXP (x
, 0));
3884 output_addr_const (asm_out_file
, x
);
3888 asm_fprintf (f
, "%wd", INTVAL (x
));
3892 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3895 aarch64_const_vec_all_same_in_range_p (x
,
3897 HOST_WIDE_INT_MAX
));
3898 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3900 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3909 /* CONST_DOUBLE can represent a double-width integer.
3910 In this case, the mode of x is VOIDmode. */
3911 if (GET_MODE (x
) == VOIDmode
)
3913 else if (aarch64_float_const_zero_rtx_p (x
))
3918 else if (aarch64_float_const_representable_p (x
))
3921 char float_buf
[buf_size
] = {'\0'};
3923 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3924 real_to_decimal_for_mode (float_buf
, &r
,
3927 asm_fprintf (asm_out_file
, "%s", float_buf
);
3931 output_operand_lossage ("invalid constant");
3934 output_operand_lossage ("invalid operand");
3940 if (GET_CODE (x
) == HIGH
)
3943 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3945 case SYMBOL_SMALL_GOT
:
3946 asm_fprintf (asm_out_file
, ":got:");
3949 case SYMBOL_SMALL_TLSGD
:
3950 asm_fprintf (asm_out_file
, ":tlsgd:");
3953 case SYMBOL_SMALL_TLSDESC
:
3954 asm_fprintf (asm_out_file
, ":tlsdesc:");
3957 case SYMBOL_SMALL_GOTTPREL
:
3958 asm_fprintf (asm_out_file
, ":gottprel:");
3961 case SYMBOL_SMALL_TPREL
:
3962 asm_fprintf (asm_out_file
, ":tprel:");
3965 case SYMBOL_TINY_GOT
:
3972 output_addr_const (asm_out_file
, x
);
3976 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3978 case SYMBOL_SMALL_GOT
:
3979 asm_fprintf (asm_out_file
, ":lo12:");
3982 case SYMBOL_SMALL_TLSGD
:
3983 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3986 case SYMBOL_SMALL_TLSDESC
:
3987 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3990 case SYMBOL_SMALL_GOTTPREL
:
3991 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3994 case SYMBOL_SMALL_TPREL
:
3995 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3998 case SYMBOL_TINY_GOT
:
3999 asm_fprintf (asm_out_file
, ":got:");
4005 output_addr_const (asm_out_file
, x
);
4010 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4012 case SYMBOL_SMALL_TPREL
:
4013 asm_fprintf (asm_out_file
, ":tprel_hi12:");
4018 output_addr_const (asm_out_file
, x
);
4022 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
4028 aarch64_print_operand_address (FILE *f
, rtx x
)
4030 struct aarch64_address_info addr
;
4032 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
4036 case ADDRESS_REG_IMM
:
4037 if (addr
.offset
== const0_rtx
)
4038 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
4040 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
4041 INTVAL (addr
.offset
));
4044 case ADDRESS_REG_REG
:
4045 if (addr
.shift
== 0)
4046 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
4047 reg_names
[REGNO (addr
.offset
)]);
4049 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
4050 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
4053 case ADDRESS_REG_UXTW
:
4054 if (addr
.shift
== 0)
4055 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
4056 REGNO (addr
.offset
) - R0_REGNUM
);
4058 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
4059 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4062 case ADDRESS_REG_SXTW
:
4063 if (addr
.shift
== 0)
4064 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
4065 REGNO (addr
.offset
) - R0_REGNUM
);
4067 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
4068 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4071 case ADDRESS_REG_WB
:
4072 switch (GET_CODE (x
))
4075 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
4076 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4079 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4080 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4083 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4084 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4087 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4088 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4091 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4092 INTVAL (addr
.offset
));
4095 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4096 INTVAL (addr
.offset
));
4103 case ADDRESS_LO_SUM
:
4104 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4105 output_addr_const (f
, addr
.offset
);
4106 asm_fprintf (f
, "]");
4109 case ADDRESS_SYMBOLIC
:
4113 output_addr_const (f
, x
);
4117 aarch64_label_mentioned_p (rtx x
)
4122 if (GET_CODE (x
) == LABEL_REF
)
4125 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4126 referencing instruction, but they are constant offsets, not
4128 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4131 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4132 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4138 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4139 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4142 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4149 /* Implement REGNO_REG_CLASS. */
4152 aarch64_regno_regclass (unsigned regno
)
4154 if (GP_REGNUM_P (regno
))
4155 return GENERAL_REGS
;
4157 if (regno
== SP_REGNUM
)
4160 if (regno
== FRAME_POINTER_REGNUM
4161 || regno
== ARG_POINTER_REGNUM
)
4162 return POINTER_REGS
;
4164 if (FP_REGNUM_P (regno
))
4165 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4170 /* Try a machine-dependent way of reloading an illegitimate address
4171 operand. If we find one, push the reload and return the new rtx. */
4174 aarch64_legitimize_reload_address (rtx
*x_p
,
4175 enum machine_mode mode
,
4176 int opnum
, int type
,
4177 int ind_levels ATTRIBUTE_UNUSED
)
4181 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4182 if (aarch64_vect_struct_mode_p (mode
)
4183 && GET_CODE (x
) == PLUS
4184 && REG_P (XEXP (x
, 0))
4185 && CONST_INT_P (XEXP (x
, 1)))
4189 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4190 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4191 opnum
, (enum reload_type
) type
);
4195 /* We must recognize output that we have already generated ourselves. */
4196 if (GET_CODE (x
) == PLUS
4197 && GET_CODE (XEXP (x
, 0)) == PLUS
4198 && REG_P (XEXP (XEXP (x
, 0), 0))
4199 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4200 && CONST_INT_P (XEXP (x
, 1)))
4202 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4203 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4204 opnum
, (enum reload_type
) type
);
4208 /* We wish to handle large displacements off a base register by splitting
4209 the addend across an add and the mem insn. This can cut the number of
4210 extra insns needed from 3 to 1. It is only useful for load/store of a
4211 single register with 12 bit offset field. */
4212 if (GET_CODE (x
) == PLUS
4213 && REG_P (XEXP (x
, 0))
4214 && CONST_INT_P (XEXP (x
, 1))
4215 && HARD_REGISTER_P (XEXP (x
, 0))
4218 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4220 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4221 HOST_WIDE_INT low
= val
& 0xfff;
4222 HOST_WIDE_INT high
= val
- low
;
4225 enum machine_mode xmode
= GET_MODE (x
);
4227 /* In ILP32, xmode can be either DImode or SImode. */
4228 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4230 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4231 BLKmode alignment. */
4232 if (GET_MODE_SIZE (mode
) == 0)
4235 offs
= low
% GET_MODE_SIZE (mode
);
4237 /* Align misaligned offset by adjusting high part to compensate. */
4240 if (aarch64_uimm12_shift (high
+ offs
))
4249 offs
= GET_MODE_SIZE (mode
) - offs
;
4251 high
= high
+ (low
& 0x1000) - offs
;
4256 /* Check for overflow. */
4257 if (high
+ low
!= val
)
4260 cst
= GEN_INT (high
);
4261 if (!aarch64_uimm12_shift (high
))
4262 cst
= force_const_mem (xmode
, cst
);
4264 /* Reload high part into base reg, leaving the low part
4265 in the mem instruction.
4266 Note that replacing this gen_rtx_PLUS with plus_constant is
4267 wrong in this case because we rely on the
4268 (plus (plus reg c1) c2) structure being preserved so that
4269 XEXP (*p, 0) in push_reload below uses the correct term. */
4270 x
= gen_rtx_PLUS (xmode
,
4271 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4274 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4275 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4276 opnum
, (enum reload_type
) type
);
4285 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4287 enum machine_mode mode
,
4288 secondary_reload_info
*sri
)
4290 /* Without the TARGET_SIMD instructions we cannot move a Q register
4291 to a Q register directly. We need a scratch. */
4292 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4293 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4294 && reg_class_subset_p (rclass
, FP_REGS
))
4297 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4298 else if (mode
== TImode
)
4299 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4303 /* A TFmode or TImode memory access should be handled via an FP_REGS
4304 because AArch64 has richer addressing modes for LDR/STR instructions
4305 than LDP/STP instructions. */
4306 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== GENERAL_REGS
4307 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4310 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4311 return GENERAL_REGS
;
4317 aarch64_can_eliminate (const int from
, const int to
)
4319 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4320 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4322 if (frame_pointer_needed
)
4324 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4326 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4328 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4329 && !cfun
->calls_alloca
)
4331 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4341 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4343 aarch64_layout_frame ();
4345 if (to
== HARD_FRAME_POINTER_REGNUM
)
4347 if (from
== ARG_POINTER_REGNUM
)
4348 return cfun
->machine
->frame
.frame_size
- crtl
->outgoing_args_size
;
4350 if (from
== FRAME_POINTER_REGNUM
)
4351 return (cfun
->machine
->frame
.hard_fp_offset
4352 - cfun
->machine
->frame
.saved_varargs_size
);
4355 if (to
== STACK_POINTER_REGNUM
)
4357 if (from
== FRAME_POINTER_REGNUM
)
4358 return (cfun
->machine
->frame
.frame_size
4359 - cfun
->machine
->frame
.saved_varargs_size
);
4362 return cfun
->machine
->frame
.frame_size
;
4365 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4369 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4373 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4378 aarch64_asm_trampoline_template (FILE *f
)
4382 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4383 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4387 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4388 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4390 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4391 assemble_aligned_integer (4, const0_rtx
);
4392 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4393 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4397 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4399 rtx fnaddr
, mem
, a_tramp
;
4400 const int tramp_code_sz
= 16;
4402 /* Don't need to copy the trailing D-words, we fill those in below. */
4403 emit_block_move (m_tramp
, assemble_trampoline_template (),
4404 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4405 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4406 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4407 if (GET_MODE (fnaddr
) != ptr_mode
)
4408 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4409 emit_move_insn (mem
, fnaddr
);
4411 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4412 emit_move_insn (mem
, chain_value
);
4414 /* XXX We should really define a "clear_cache" pattern and use
4415 gen_clear_cache(). */
4416 a_tramp
= XEXP (m_tramp
, 0);
4417 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4418 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4419 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4423 static unsigned char
4424 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4428 case CALLER_SAVE_REGS
:
4435 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4436 (GET_MODE_SIZE (mode
) + 7) / 8;
4450 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4452 if (regclass
== POINTER_REGS
)
4453 return GENERAL_REGS
;
4455 if (regclass
== STACK_REG
)
4458 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4464 /* If it's an integer immediate that MOVI can't handle, then
4465 FP_REGS is not an option, so we return NO_REGS instead. */
4466 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4467 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4470 /* Register eliminiation can result in a request for
4471 SP+constant->FP_REGS. We cannot support such operations which
4472 use SP as source and an FP_REG as destination, so reject out
4474 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4476 rtx lhs
= XEXP (x
, 0);
4478 /* Look through a possible SUBREG introduced by ILP32. */
4479 if (GET_CODE (lhs
) == SUBREG
)
4480 lhs
= SUBREG_REG (lhs
);
4482 gcc_assert (REG_P (lhs
));
4483 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4492 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4494 asm_fprintf (f
, "%U%s", name
);
4498 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4500 if (priority
== DEFAULT_INIT_PRIORITY
)
4501 default_ctor_section_asm_out_constructor (symbol
, priority
);
4506 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4507 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4508 switch_to_section (s
);
4509 assemble_align (POINTER_SIZE
);
4510 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4515 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4517 if (priority
== DEFAULT_INIT_PRIORITY
)
4518 default_dtor_section_asm_out_destructor (symbol
, priority
);
4523 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4524 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4525 switch_to_section (s
);
4526 assemble_align (POINTER_SIZE
);
4527 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4532 aarch64_output_casesi (rtx
*operands
)
4536 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
4538 static const char *const patterns
[4][2] =
4541 "ldrb\t%w3, [%0,%w1,uxtw]",
4542 "add\t%3, %4, %w3, sxtb #2"
4545 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4546 "add\t%3, %4, %w3, sxth #2"
4549 "ldr\t%w3, [%0,%w1,uxtw #2]",
4550 "add\t%3, %4, %w3, sxtw #2"
4552 /* We assume that DImode is only generated when not optimizing and
4553 that we don't really need 64-bit address offsets. That would
4554 imply an object file with 8GB of code in a single function! */
4556 "ldr\t%w3, [%0,%w1,uxtw #2]",
4557 "add\t%3, %4, %w3, sxtw #2"
4561 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4563 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4565 gcc_assert (index
>= 0 && index
<= 3);
4567 /* Need to implement table size reduction, by chaning the code below. */
4568 output_asm_insn (patterns
[index
][0], operands
);
4569 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4570 snprintf (buf
, sizeof (buf
),
4571 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4572 output_asm_insn (buf
, operands
);
4573 output_asm_insn (patterns
[index
][1], operands
);
4574 output_asm_insn ("br\t%3", operands
);
4575 assemble_label (asm_out_file
, label
);
4580 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4581 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4585 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4587 if (shift
>= 0 && shift
<= 3)
4590 for (size
= 8; size
<= 32; size
*= 2)
4592 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4593 if (mask
== bits
<< shift
)
4601 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4602 const_rtx x ATTRIBUTE_UNUSED
)
4604 /* We can't use blocks for constants when we're using a per-function
4610 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4611 rtx x ATTRIBUTE_UNUSED
,
4612 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4614 /* Force all constant pool entries into the current function section. */
4615 return function_section (current_function_decl
);
4621 /* Helper function for rtx cost calculation. Strip a shift expression
4622 from X. Returns the inner operand if successful, or the original
4623 expression on failure. */
4625 aarch64_strip_shift (rtx x
)
4629 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4630 we can convert both to ROR during final output. */
4631 if ((GET_CODE (op
) == ASHIFT
4632 || GET_CODE (op
) == ASHIFTRT
4633 || GET_CODE (op
) == LSHIFTRT
4634 || GET_CODE (op
) == ROTATERT
4635 || GET_CODE (op
) == ROTATE
)
4636 && CONST_INT_P (XEXP (op
, 1)))
4637 return XEXP (op
, 0);
4639 if (GET_CODE (op
) == MULT
4640 && CONST_INT_P (XEXP (op
, 1))
4641 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4642 return XEXP (op
, 0);
4647 /* Helper function for rtx cost calculation. Strip an extend
4648 expression from X. Returns the inner operand if successful, or the
4649 original expression on failure. We deal with a number of possible
4650 canonicalization variations here. */
4652 aarch64_strip_extend (rtx x
)
4656 /* Zero and sign extraction of a widened value. */
4657 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4658 && XEXP (op
, 2) == const0_rtx
4659 && GET_CODE (XEXP (op
, 0)) == MULT
4660 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4662 return XEXP (XEXP (op
, 0), 0);
4664 /* It can also be represented (for zero-extend) as an AND with an
4666 if (GET_CODE (op
) == AND
4667 && GET_CODE (XEXP (op
, 0)) == MULT
4668 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4669 && CONST_INT_P (XEXP (op
, 1))
4670 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4671 INTVAL (XEXP (op
, 1))) != 0)
4672 return XEXP (XEXP (op
, 0), 0);
4674 /* Now handle extended register, as this may also have an optional
4675 left shift by 1..4. */
4676 if (GET_CODE (op
) == ASHIFT
4677 && CONST_INT_P (XEXP (op
, 1))
4678 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4681 if (GET_CODE (op
) == ZERO_EXTEND
4682 || GET_CODE (op
) == SIGN_EXTEND
)
4691 /* Helper function for rtx cost calculation. Calculate the cost of
4692 a MULT, which may be part of a multiply-accumulate rtx. Return
4693 the calculated cost of the expression, recursing manually in to
4694 operands where needed. */
4697 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
4700 const struct cpu_cost_table
*extra_cost
4701 = aarch64_tune_params
->insn_extra_cost
;
4703 bool maybe_fma
= (outer
== PLUS
|| outer
== MINUS
);
4704 enum machine_mode mode
= GET_MODE (x
);
4706 gcc_checking_assert (code
== MULT
);
4711 if (VECTOR_MODE_P (mode
))
4712 mode
= GET_MODE_INNER (mode
);
4714 /* Integer multiply/fma. */
4715 if (GET_MODE_CLASS (mode
) == MODE_INT
)
4717 /* The multiply will be canonicalized as a shift, cost it as such. */
4718 if (CONST_INT_P (op1
)
4719 && exact_log2 (INTVAL (op1
)) > 0)
4724 /* ADD (shifted register). */
4725 cost
+= extra_cost
->alu
.arith_shift
;
4727 /* LSL (immediate). */
4728 cost
+= extra_cost
->alu
.shift
;
4731 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
4736 /* Integer multiplies or FMAs have zero/sign extending variants. */
4737 if ((GET_CODE (op0
) == ZERO_EXTEND
4738 && GET_CODE (op1
) == ZERO_EXTEND
)
4739 || (GET_CODE (op0
) == SIGN_EXTEND
4740 && GET_CODE (op1
) == SIGN_EXTEND
))
4742 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4743 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
4748 /* MADD/SMADDL/UMADDL. */
4749 cost
+= extra_cost
->mult
[0].extend_add
;
4751 /* MUL/SMULL/UMULL. */
4752 cost
+= extra_cost
->mult
[0].extend
;
4758 /* This is either an integer multiply or an FMA. In both cases
4759 we want to recurse and cost the operands. */
4760 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4761 + rtx_cost (op1
, MULT
, 1, speed
);
4767 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
4770 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
4779 /* Floating-point FMA/FMUL can also support negations of the
4781 if (GET_CODE (op0
) == NEG
)
4782 op0
= XEXP (op0
, 0);
4783 if (GET_CODE (op1
) == NEG
)
4784 op1
= XEXP (op1
, 0);
4787 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4788 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
4791 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
4794 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4795 + rtx_cost (op1
, MULT
, 1, speed
);
4801 aarch64_address_cost (rtx x
,
4802 enum machine_mode mode
,
4803 addr_space_t as ATTRIBUTE_UNUSED
,
4806 enum rtx_code c
= GET_CODE (x
);
4807 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4808 struct aarch64_address_info info
;
4812 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
4814 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
4816 /* This is a CONST or SYMBOL ref which will be split
4817 in a different way depending on the code model in use.
4818 Cost it through the generic infrastructure. */
4819 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
4820 /* Divide through by the cost of one instruction to
4821 bring it to the same units as the address costs. */
4822 cost_symbol_ref
/= COSTS_N_INSNS (1);
4823 /* The cost is then the cost of preparing the address,
4824 followed by an immediate (possibly 0) offset. */
4825 return cost_symbol_ref
+ addr_cost
->imm_offset
;
4829 /* This is most likely a jump table from a case
4831 return addr_cost
->register_offset
;
4837 case ADDRESS_LO_SUM
:
4838 case ADDRESS_SYMBOLIC
:
4839 case ADDRESS_REG_IMM
:
4840 cost
+= addr_cost
->imm_offset
;
4843 case ADDRESS_REG_WB
:
4844 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4845 cost
+= addr_cost
->pre_modify
;
4846 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4847 cost
+= addr_cost
->post_modify
;
4853 case ADDRESS_REG_REG
:
4854 cost
+= addr_cost
->register_offset
;
4857 case ADDRESS_REG_UXTW
:
4858 case ADDRESS_REG_SXTW
:
4859 cost
+= addr_cost
->register_extend
;
4869 /* For the sake of calculating the cost of the shifted register
4870 component, we can treat same sized modes in the same way. */
4871 switch (GET_MODE_BITSIZE (mode
))
4874 cost
+= addr_cost
->addr_scale_costs
.hi
;
4878 cost
+= addr_cost
->addr_scale_costs
.si
;
4882 cost
+= addr_cost
->addr_scale_costs
.di
;
4885 /* We can't tell, or this is a 128-bit vector. */
4887 cost
+= addr_cost
->addr_scale_costs
.ti
;
4895 /* Return true if the RTX X in mode MODE is a zero or sign extract
4896 usable in an ADD or SUB (extended register) instruction. */
4898 aarch64_rtx_arith_op_extract_p (rtx x
, enum machine_mode mode
)
4900 /* Catch add with a sign extract.
4901 This is add_<optab><mode>_multp2. */
4902 if (GET_CODE (x
) == SIGN_EXTRACT
4903 || GET_CODE (x
) == ZERO_EXTRACT
)
4905 rtx op0
= XEXP (x
, 0);
4906 rtx op1
= XEXP (x
, 1);
4907 rtx op2
= XEXP (x
, 2);
4909 if (GET_CODE (op0
) == MULT
4910 && CONST_INT_P (op1
)
4911 && op2
== const0_rtx
4912 && CONST_INT_P (XEXP (op0
, 1))
4913 && aarch64_is_extend_from_extract (mode
,
4925 aarch64_frint_unspec_p (unsigned int u
)
4943 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4944 storing it in *COST. Result is true if the total cost of the operation
4945 has now been calculated. */
4947 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
4951 enum rtx_code cmpcode
;
4953 if (COMPARISON_P (op0
))
4955 inner
= XEXP (op0
, 0);
4956 comparator
= XEXP (op0
, 1);
4957 cmpcode
= GET_CODE (op0
);
4962 comparator
= const0_rtx
;
4966 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
4968 /* Conditional branch. */
4969 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
4973 if (cmpcode
== NE
|| cmpcode
== EQ
)
4975 if (comparator
== const0_rtx
)
4977 /* TBZ/TBNZ/CBZ/CBNZ. */
4978 if (GET_CODE (inner
) == ZERO_EXTRACT
)
4980 *cost
+= rtx_cost (XEXP (inner
, 0), ZERO_EXTRACT
,
4984 *cost
+= rtx_cost (inner
, cmpcode
, 0, speed
);
4989 else if (cmpcode
== LT
|| cmpcode
== GE
)
4992 if (comparator
== const0_rtx
)
4997 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
4999 /* It's a conditional operation based on the status flags,
5000 so it must be some flavor of CSEL. */
5002 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5003 if (GET_CODE (op1
) == NEG
5004 || GET_CODE (op1
) == NOT
5005 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
5006 op1
= XEXP (op1
, 0);
5008 *cost
+= rtx_cost (op1
, IF_THEN_ELSE
, 1, speed
);
5009 *cost
+= rtx_cost (op2
, IF_THEN_ELSE
, 2, speed
);
5013 /* We don't know what this is, cost all operands. */
5017 /* Calculate the cost of calculating X, storing it in *COST. Result
5018 is true if the total cost of the operation has now been calculated. */
5020 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
5021 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
5024 const struct cpu_cost_table
*extra_cost
5025 = aarch64_tune_params
->insn_extra_cost
;
5026 enum machine_mode mode
= GET_MODE (x
);
5028 /* By default, assume that everything has equivalent cost to the
5029 cheapest instruction. Any additional costs are applied as a delta
5030 above this default. */
5031 *cost
= COSTS_N_INSNS (1);
5033 /* TODO: The cost infrastructure currently does not handle
5034 vector operations. Assume that all vector operations
5035 are equally expensive. */
5036 if (VECTOR_MODE_P (mode
))
5039 *cost
+= extra_cost
->vect
.alu
;
5046 /* The cost depends entirely on the operands to SET. */
5051 switch (GET_CODE (op0
))
5056 rtx address
= XEXP (op0
, 0);
5057 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5058 *cost
+= extra_cost
->ldst
.store
;
5059 else if (mode
== SFmode
)
5060 *cost
+= extra_cost
->ldst
.storef
;
5061 else if (mode
== DFmode
)
5062 *cost
+= extra_cost
->ldst
.stored
;
5065 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5069 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5073 if (! REG_P (SUBREG_REG (op0
)))
5074 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
5078 /* const0_rtx is in general free, but we will use an
5079 instruction to set a register to 0. */
5080 if (REG_P (op1
) || op1
== const0_rtx
)
5082 /* The cost is 1 per register copied. */
5083 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5085 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5088 /* Cost is just the cost of the RHS of the set. */
5089 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5094 /* Bit-field insertion. Strip any redundant widening of
5095 the RHS to meet the width of the target. */
5096 if (GET_CODE (op1
) == SUBREG
)
5097 op1
= SUBREG_REG (op1
);
5098 if ((GET_CODE (op1
) == ZERO_EXTEND
5099 || GET_CODE (op1
) == SIGN_EXTEND
)
5100 && CONST_INT_P (XEXP (op0
, 1))
5101 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5102 >= INTVAL (XEXP (op0
, 1))))
5103 op1
= XEXP (op1
, 0);
5105 if (CONST_INT_P (op1
))
5107 /* MOV immediate is assumed to always be cheap. */
5108 *cost
= COSTS_N_INSNS (1);
5114 *cost
+= extra_cost
->alu
.bfi
;
5115 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5121 /* We can't make sense of this, assume default cost. */
5122 *cost
= COSTS_N_INSNS (1);
5128 /* If an instruction can incorporate a constant within the
5129 instruction, the instruction's expression avoids calling
5130 rtx_cost() on the constant. If rtx_cost() is called on a
5131 constant, then it is usually because the constant must be
5132 moved into a register by one or more instructions.
5134 The exception is constant 0, which can be expressed
5135 as XZR/WZR and is therefore free. The exception to this is
5136 if we have (set (reg) (const0_rtx)) in which case we must cost
5137 the move. However, we can catch that when we cost the SET, so
5138 we don't need to consider that here. */
5139 if (x
== const0_rtx
)
5143 /* To an approximation, building any other constant is
5144 proportionally expensive to the number of instructions
5145 required to build that constant. This is true whether we
5146 are compiling for SPEED or otherwise. */
5147 *cost
= COSTS_N_INSNS (aarch64_build_constant (0,
5156 /* mov[df,sf]_aarch64. */
5157 if (aarch64_float_const_representable_p (x
))
5158 /* FMOV (scalar immediate). */
5159 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5160 else if (!aarch64_float_const_zero_rtx_p (x
))
5162 /* This will be a load from memory. */
5164 *cost
+= extra_cost
->ldst
.loadd
;
5166 *cost
+= extra_cost
->ldst
.loadf
;
5169 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5170 or MOV v0.s[0], wzr - neither of which are modeled by the
5171 cost tables. Just use the default cost. */
5181 /* For loads we want the base cost of a load, plus an
5182 approximation for the additional cost of the addressing
5184 rtx address
= XEXP (x
, 0);
5185 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5186 *cost
+= extra_cost
->ldst
.load
;
5187 else if (mode
== SFmode
)
5188 *cost
+= extra_cost
->ldst
.loadf
;
5189 else if (mode
== DFmode
)
5190 *cost
+= extra_cost
->ldst
.loadd
;
5193 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5202 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5204 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5205 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5208 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5212 /* Cost this as SUB wzr, X. */
5213 op0
= CONST0_RTX (GET_MODE (x
));
5218 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5220 /* Support (neg(fma...)) as a single instruction only if
5221 sign of zeros is unimportant. This matches the decision
5222 making in aarch64.md. */
5223 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5226 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5231 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5240 *cost
+= extra_cost
->alu
.clz
;
5248 if (op1
== const0_rtx
5249 && GET_CODE (op0
) == AND
)
5255 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
5257 /* TODO: A write to the CC flags possibly costs extra, this
5258 needs encoding in the cost tables. */
5260 /* CC_ZESWPmode supports zero extend for free. */
5261 if (GET_MODE (x
) == CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
5262 op0
= XEXP (op0
, 0);
5265 if (GET_CODE (op0
) == AND
)
5271 if (GET_CODE (op0
) == PLUS
)
5273 /* ADDS (and CMN alias). */
5278 if (GET_CODE (op0
) == MINUS
)
5285 if (GET_CODE (op1
) == NEG
)
5289 *cost
+= extra_cost
->alu
.arith
;
5291 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5292 *cost
+= rtx_cost (XEXP (op1
, 0), NEG
, 1, speed
);
5298 Compare can freely swap the order of operands, and
5299 canonicalization puts the more complex operation first.
5300 But the integer MINUS logic expects the shift/extend
5301 operation in op1. */
5303 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5311 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5315 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
5317 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
5319 /* FCMP supports constant 0.0 for no extra cost. */
5333 /* Detect valid immediates. */
5334 if ((GET_MODE_CLASS (mode
) == MODE_INT
5335 || (GET_MODE_CLASS (mode
) == MODE_CC
5336 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5337 && CONST_INT_P (op1
)
5338 && aarch64_uimm12_shift (INTVAL (op1
)))
5340 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5343 /* SUB(S) (immediate). */
5344 *cost
+= extra_cost
->alu
.arith
;
5349 /* Look for SUB (extended register). */
5350 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
5353 *cost
+= extra_cost
->alu
.arith_shift
;
5355 *cost
+= rtx_cost (XEXP (XEXP (op1
, 0), 0),
5356 (enum rtx_code
) GET_CODE (op1
),
5361 rtx new_op1
= aarch64_strip_extend (op1
);
5363 /* Cost this as an FMA-alike operation. */
5364 if ((GET_CODE (new_op1
) == MULT
5365 || GET_CODE (new_op1
) == ASHIFT
)
5368 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5369 (enum rtx_code
) code
,
5371 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5375 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
5379 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5381 *cost
+= extra_cost
->alu
.arith
;
5382 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5384 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5397 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5398 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5401 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
5402 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5406 if (GET_MODE_CLASS (mode
) == MODE_INT
5407 && CONST_INT_P (op1
)
5408 && aarch64_uimm12_shift (INTVAL (op1
)))
5410 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
5413 /* ADD (immediate). */
5414 *cost
+= extra_cost
->alu
.arith
;
5418 /* Look for ADD (extended register). */
5419 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
5422 *cost
+= extra_cost
->alu
.arith_shift
;
5424 *cost
+= rtx_cost (XEXP (XEXP (op0
, 0), 0),
5425 (enum rtx_code
) GET_CODE (op0
),
5430 /* Strip any extend, leave shifts behind as we will
5431 cost them through mult_cost. */
5432 new_op0
= aarch64_strip_extend (op0
);
5434 if (GET_CODE (new_op0
) == MULT
5435 || GET_CODE (new_op0
) == ASHIFT
)
5437 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
5439 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5443 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
5444 + rtx_cost (op1
, PLUS
, 1, speed
));
5448 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5450 *cost
+= extra_cost
->alu
.arith
;
5451 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5453 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5459 *cost
= COSTS_N_INSNS (1);
5462 *cost
+= extra_cost
->alu
.rev
;
5467 if (aarch_rev16_p (x
))
5469 *cost
= COSTS_N_INSNS (1);
5472 *cost
+= extra_cost
->alu
.rev
;
5484 && GET_CODE (op0
) == MULT
5485 && CONST_INT_P (XEXP (op0
, 1))
5486 && CONST_INT_P (op1
)
5487 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
5490 /* This is a UBFM/SBFM. */
5491 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
5493 *cost
+= extra_cost
->alu
.bfx
;
5497 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5499 /* We possibly get the immediate for free, this is not
5501 if (CONST_INT_P (op1
)
5502 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
5504 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
5507 *cost
+= extra_cost
->alu
.logical
;
5515 /* Handle ORN, EON, or BIC. */
5516 if (GET_CODE (op0
) == NOT
)
5517 op0
= XEXP (op0
, 0);
5519 new_op0
= aarch64_strip_shift (op0
);
5521 /* If we had a shift on op0 then this is a logical-shift-
5522 by-register/immediate operation. Otherwise, this is just
5523 a logical operation. */
5528 /* Shift by immediate. */
5529 if (CONST_INT_P (XEXP (op0
, 1)))
5530 *cost
+= extra_cost
->alu
.log_shift
;
5532 *cost
+= extra_cost
->alu
.log_shift_reg
;
5535 *cost
+= extra_cost
->alu
.logical
;
5538 /* In both cases we want to cost both operands. */
5539 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
5540 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5550 *cost
+= extra_cost
->alu
.logical
;
5552 /* The logical instruction could have the shifted register form,
5553 but the cost is the same if the shift is processed as a separate
5554 instruction, so we don't bother with it here. */
5560 /* If a value is written in SI mode, then zero extended to DI
5561 mode, the operation will in general be free as a write to
5562 a 'w' register implicitly zeroes the upper bits of an 'x'
5563 register. However, if this is
5565 (set (reg) (zero_extend (reg)))
5567 we must cost the explicit register move. */
5569 && GET_MODE (op0
) == SImode
5572 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
5574 if (!op_cost
&& speed
)
5576 *cost
+= extra_cost
->alu
.extend
;
5578 /* Free, the cost is that of the SI mode operation. */
5583 else if (MEM_P (XEXP (x
, 0)))
5585 /* All loads can zero extend to any size for free. */
5586 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
5592 *cost
+= extra_cost
->alu
.extend
;
5597 if (MEM_P (XEXP (x
, 0)))
5602 rtx address
= XEXP (XEXP (x
, 0), 0);
5603 *cost
+= extra_cost
->ldst
.load_sign_extend
;
5606 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5613 *cost
+= extra_cost
->alu
.extend
;
5620 if (CONST_INT_P (op1
))
5622 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5625 *cost
+= extra_cost
->alu
.shift
;
5627 /* We can incorporate zero/sign extend for free. */
5628 if (GET_CODE (op0
) == ZERO_EXTEND
5629 || GET_CODE (op0
) == SIGN_EXTEND
)
5630 op0
= XEXP (op0
, 0);
5632 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
5639 *cost
+= extra_cost
->alu
.shift_reg
;
5641 return false; /* All arguments need to be in registers. */
5651 if (CONST_INT_P (op1
))
5653 /* ASR (immediate) and friends. */
5655 *cost
+= extra_cost
->alu
.shift
;
5657 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
5663 /* ASR (register) and friends. */
5665 *cost
+= extra_cost
->alu
.shift_reg
;
5667 return false; /* All arguments need to be in registers. */
5672 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
5676 *cost
+= extra_cost
->ldst
.load
;
5678 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
5679 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
5681 /* ADRP, followed by ADD. */
5682 *cost
+= COSTS_N_INSNS (1);
5684 *cost
+= 2 * extra_cost
->alu
.arith
;
5686 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
5687 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
5691 *cost
+= extra_cost
->alu
.arith
;
5696 /* One extra load instruction, after accessing the GOT. */
5697 *cost
+= COSTS_N_INSNS (1);
5699 *cost
+= extra_cost
->ldst
.load
;
5705 /* ADRP/ADD (immediate). */
5707 *cost
+= extra_cost
->alu
.arith
;
5714 *cost
+= extra_cost
->alu
.bfx
;
5716 /* We can trust that the immediates used will be correct (there
5717 are no by-register forms), so we need only cost op0. */
5718 *cost
+= rtx_cost (XEXP (x
, 0), (enum rtx_code
) code
, 0, speed
);
5722 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
5723 /* aarch64_rtx_mult_cost always handles recursion to its
5731 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5732 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
5733 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
5734 else if (GET_MODE (x
) == DFmode
)
5735 *cost
+= (extra_cost
->fp
[1].mult
5736 + extra_cost
->fp
[1].div
);
5737 else if (GET_MODE (x
) == SFmode
)
5738 *cost
+= (extra_cost
->fp
[0].mult
5739 + extra_cost
->fp
[0].div
);
5741 return false; /* All arguments need to be in registers. */
5748 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5749 /* There is no integer SQRT, so only DIV and UDIV can get
5751 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
5753 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
5755 return false; /* All arguments need to be in registers. */
5758 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
5759 XEXP (x
, 2), cost
, speed
);
5772 return false; /* All arguments must be in registers. */
5780 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5782 /* FMSUB, FNMADD, and FNMSUB are free. */
5783 if (GET_CODE (op0
) == NEG
)
5784 op0
= XEXP (op0
, 0);
5786 if (GET_CODE (op2
) == NEG
)
5787 op2
= XEXP (op2
, 0);
5789 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5790 and the by-element operand as operand 0. */
5791 if (GET_CODE (op1
) == NEG
)
5792 op1
= XEXP (op1
, 0);
5794 /* Catch vector-by-element operations. The by-element operand can
5795 either be (vec_duplicate (vec_select (x))) or just
5796 (vec_select (x)), depending on whether we are multiplying by
5797 a vector or a scalar.
5799 Canonicalization is not very good in these cases, FMA4 will put the
5800 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5801 if (GET_CODE (op0
) == VEC_DUPLICATE
)
5802 op0
= XEXP (op0
, 0);
5803 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
5804 op1
= XEXP (op1
, 0);
5806 if (GET_CODE (op0
) == VEC_SELECT
)
5807 op0
= XEXP (op0
, 0);
5808 else if (GET_CODE (op1
) == VEC_SELECT
)
5809 op1
= XEXP (op1
, 0);
5811 /* If the remaining parameters are not registers,
5812 get the cost to put them into registers. */
5813 *cost
+= rtx_cost (op0
, FMA
, 0, speed
);
5814 *cost
+= rtx_cost (op1
, FMA
, 1, speed
);
5815 *cost
+= rtx_cost (op2
, FMA
, 2, speed
);
5820 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
5823 case FLOAT_TRUNCATE
:
5825 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
5831 /* Strip the rounding part. They will all be implemented
5832 by the fcvt* family of instructions anyway. */
5833 if (GET_CODE (x
) == UNSPEC
)
5835 unsigned int uns_code
= XINT (x
, 1);
5837 if (uns_code
== UNSPEC_FRINTA
5838 || uns_code
== UNSPEC_FRINTM
5839 || uns_code
== UNSPEC_FRINTN
5840 || uns_code
== UNSPEC_FRINTP
5841 || uns_code
== UNSPEC_FRINTZ
)
5842 x
= XVECEXP (x
, 0, 0);
5846 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
5848 *cost
+= rtx_cost (x
, (enum rtx_code
) code
, 0, speed
);
5852 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5854 /* FABS and FNEG are analogous. */
5856 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5860 /* Integer ABS will either be split to
5861 two arithmetic instructions, or will be an ABS
5862 (scalar), which we don't model. */
5863 *cost
= COSTS_N_INSNS (2);
5865 *cost
+= 2 * extra_cost
->alu
.arith
;
5873 /* FMAXNM/FMINNM/FMAX/FMIN.
5874 TODO: This may not be accurate for all implementations, but
5875 we do not model this in the cost tables. */
5876 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5881 /* The floating point round to integer frint* instructions. */
5882 if (aarch64_frint_unspec_p (XINT (x
, 1)))
5885 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
5890 if (XINT (x
, 1) == UNSPEC_RBIT
)
5893 *cost
+= extra_cost
->alu
.rev
;
5901 /* Decompose <su>muldi3_highpart. */
5902 if (/* (truncate:DI */
5905 && GET_MODE (XEXP (x
, 0)) == TImode
5906 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
5908 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5909 /* (ANY_EXTEND:TI (reg:DI))
5910 (ANY_EXTEND:TI (reg:DI))) */
5911 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
5912 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
5913 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
5914 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
5915 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
5916 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
5917 /* (const_int 64) */
5918 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
5919 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
5923 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
5924 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
5926 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
5936 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5938 "\nFailed to cost RTX. Assuming default cost.\n");
5943 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5944 calculated for X. This cost is stored in *COST. Returns true
5945 if the total cost of X was calculated. */
5947 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
5948 int param
, int *cost
, bool speed
)
5950 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
5952 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5954 print_rtl_single (dump_file
, x
);
5955 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
5956 speed
? "Hot" : "Cold",
5957 *cost
, result
? "final" : "partial");
5964 aarch64_register_move_cost (enum machine_mode mode
,
5965 reg_class_t from_i
, reg_class_t to_i
)
5967 enum reg_class from
= (enum reg_class
) from_i
;
5968 enum reg_class to
= (enum reg_class
) to_i
;
5969 const struct cpu_regmove_cost
*regmove_cost
5970 = aarch64_tune_params
->regmove_cost
;
5972 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
5973 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
5976 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
5977 from
= GENERAL_REGS
;
5979 /* Moving between GPR and stack cost is the same as GP2GP. */
5980 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
5981 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
5982 return regmove_cost
->GP2GP
;
5984 /* To/From the stack register, we move via the gprs. */
5985 if (to
== STACK_REG
|| from
== STACK_REG
)
5986 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
5987 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
5989 if (GET_MODE_SIZE (mode
) == 16)
5991 /* 128-bit operations on general registers require 2 instructions. */
5992 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
5993 return regmove_cost
->GP2GP
* 2;
5994 else if (from
== GENERAL_REGS
)
5995 return regmove_cost
->GP2FP
* 2;
5996 else if (to
== GENERAL_REGS
)
5997 return regmove_cost
->FP2GP
* 2;
5999 /* When AdvSIMD instructions are disabled it is not possible to move
6000 a 128-bit value directly between Q registers. This is handled in
6001 secondary reload. A general register is used as a scratch to move
6002 the upper DI value and the lower DI value is moved directly,
6003 hence the cost is the sum of three moves. */
6005 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
6007 return regmove_cost
->FP2FP
;
6010 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6011 return regmove_cost
->GP2GP
;
6012 else if (from
== GENERAL_REGS
)
6013 return regmove_cost
->GP2FP
;
6014 else if (to
== GENERAL_REGS
)
6015 return regmove_cost
->FP2GP
;
6017 return regmove_cost
->FP2FP
;
6021 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
6022 reg_class_t rclass ATTRIBUTE_UNUSED
,
6023 bool in ATTRIBUTE_UNUSED
)
6025 return aarch64_tune_params
->memmov_cost
;
6028 /* Return the number of instructions that can be issued per cycle. */
6030 aarch64_sched_issue_rate (void)
6032 return aarch64_tune_params
->issue_rate
;
6035 /* Vectorizer cost model target hooks. */
6037 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6039 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6041 int misalign ATTRIBUTE_UNUSED
)
6045 switch (type_of_cost
)
6048 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
6051 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
6054 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
6057 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6060 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
6063 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
6066 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
6069 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
6071 case unaligned_load
:
6072 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
6074 case unaligned_store
:
6075 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
6077 case cond_branch_taken
:
6078 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
6080 case cond_branch_not_taken
:
6081 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
6084 case vec_promote_demote
:
6085 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6088 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6089 return elements
/ 2 + 1;
6096 /* Implement targetm.vectorize.add_stmt_cost. */
6098 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6099 struct _stmt_vec_info
*stmt_info
, int misalign
,
6100 enum vect_cost_model_location where
)
6102 unsigned *cost
= (unsigned *) data
;
6103 unsigned retval
= 0;
6105 if (flag_vect_cost_model
)
6107 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6109 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
6111 /* Statements in an inner loop relative to the loop being
6112 vectorized are weighted more heavily. The value here is
6113 a function (linear for now) of the loop nest level. */
6114 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6116 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6117 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
6118 unsigned nest_level
= loop_depth (loop
);
6120 count
*= nest_level
;
6123 retval
= (unsigned) (count
* stmt_cost
);
6124 cost
[where
] += retval
;
6130 static void initialize_aarch64_code_model (void);
6132 /* Parse the architecture extension string. */
6135 aarch64_parse_extension (char *str
)
6137 /* The extension string is parsed left to right. */
6138 const struct aarch64_option_extension
*opt
= NULL
;
6140 /* Flag to say whether we are adding or removing an extension. */
6141 int adding_ext
= -1;
6143 while (str
!= NULL
&& *str
!= 0)
6149 ext
= strchr (str
, '+');
6156 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
6167 error ("missing feature modifier after %qs", "+no");
6171 /* Scan over the extensions table trying to find an exact match. */
6172 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
6174 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
6176 /* Add or remove the extension. */
6178 aarch64_isa_flags
|= opt
->flags_on
;
6180 aarch64_isa_flags
&= ~(opt
->flags_off
);
6185 if (opt
->name
== NULL
)
6187 /* Extension not found in list. */
6188 error ("unknown feature modifier %qs", str
);
6198 /* Parse the ARCH string. */
6201 aarch64_parse_arch (void)
6204 const struct processor
*arch
;
6205 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
6208 strcpy (str
, aarch64_arch_string
);
6210 ext
= strchr (str
, '+');
6219 error ("missing arch name in -march=%qs", str
);
6223 /* Loop through the list of supported ARCHs to find a match. */
6224 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
6226 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
6228 selected_arch
= arch
;
6229 aarch64_isa_flags
= selected_arch
->flags
;
6232 selected_cpu
= &all_cores
[selected_arch
->core
];
6236 /* ARCH string contains at least one extension. */
6237 aarch64_parse_extension (ext
);
6240 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
6242 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6243 selected_cpu
->name
, selected_arch
->name
);
6250 /* ARCH name not found in list. */
6251 error ("unknown value %qs for -march", str
);
6255 /* Parse the CPU string. */
6258 aarch64_parse_cpu (void)
6261 const struct processor
*cpu
;
6262 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
6265 strcpy (str
, aarch64_cpu_string
);
6267 ext
= strchr (str
, '+');
6276 error ("missing cpu name in -mcpu=%qs", str
);
6280 /* Loop through the list of supported CPUs to find a match. */
6281 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6283 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
6286 selected_tune
= cpu
;
6287 aarch64_isa_flags
= selected_cpu
->flags
;
6291 /* CPU string contains at least one extension. */
6292 aarch64_parse_extension (ext
);
6299 /* CPU name not found in list. */
6300 error ("unknown value %qs for -mcpu", str
);
6304 /* Parse the TUNE string. */
6307 aarch64_parse_tune (void)
6309 const struct processor
*cpu
;
6310 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
6311 strcpy (str
, aarch64_tune_string
);
6313 /* Loop through the list of supported CPUs to find a match. */
6314 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6316 if (strcmp (cpu
->name
, str
) == 0)
6318 selected_tune
= cpu
;
6323 /* CPU name not found in list. */
6324 error ("unknown value %qs for -mtune", str
);
6329 /* Implement TARGET_OPTION_OVERRIDE. */
6332 aarch64_override_options (void)
6334 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6335 If either of -march or -mtune is given, they override their
6336 respective component of -mcpu.
6338 So, first parse AARCH64_CPU_STRING, then the others, be careful
6339 with -march as, if -mcpu is not present on the command line, march
6340 must set a sensible default CPU. */
6341 if (aarch64_cpu_string
)
6343 aarch64_parse_cpu ();
6346 if (aarch64_arch_string
)
6348 aarch64_parse_arch ();
6351 if (aarch64_tune_string
)
6353 aarch64_parse_tune ();
6356 #ifndef HAVE_AS_MABI_OPTION
6357 /* The compiler may have been configured with 2.23.* binutils, which does
6358 not have support for ILP32. */
6360 error ("Assembler does not support -mabi=ilp32");
6363 initialize_aarch64_code_model ();
6365 aarch64_build_bitmask_table ();
6367 /* This target defaults to strict volatile bitfields. */
6368 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
6369 flag_strict_volatile_bitfields
= 1;
6371 /* If the user did not specify a processor, choose the default
6372 one for them. This will be the CPU set during configuration using
6373 --with-cpu, otherwise it is "generic". */
6376 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
6377 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
6380 gcc_assert (selected_cpu
);
6382 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6384 selected_tune
= &all_cores
[selected_cpu
->core
];
6386 aarch64_tune_flags
= selected_tune
->flags
;
6387 aarch64_tune
= selected_tune
->core
;
6388 aarch64_tune_params
= selected_tune
->tune
;
6390 aarch64_override_options_after_change ();
6393 /* Implement targetm.override_options_after_change. */
6396 aarch64_override_options_after_change (void)
6398 if (flag_omit_frame_pointer
)
6399 flag_omit_leaf_frame_pointer
= false;
6400 else if (flag_omit_leaf_frame_pointer
)
6401 flag_omit_frame_pointer
= true;
6404 static struct machine_function
*
6405 aarch64_init_machine_status (void)
6407 struct machine_function
*machine
;
6408 machine
= ggc_cleared_alloc
<machine_function
> ();
6413 aarch64_init_expanders (void)
6415 init_machine_status
= aarch64_init_machine_status
;
6418 /* A checking mechanism for the implementation of the various code models. */
6420 initialize_aarch64_code_model (void)
6424 switch (aarch64_cmodel_var
)
6426 case AARCH64_CMODEL_TINY
:
6427 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
6429 case AARCH64_CMODEL_SMALL
:
6430 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
6432 case AARCH64_CMODEL_LARGE
:
6433 sorry ("code model %qs with -f%s", "large",
6434 flag_pic
> 1 ? "PIC" : "pic");
6440 aarch64_cmodel
= aarch64_cmodel_var
;
6443 /* Return true if SYMBOL_REF X binds locally. */
6446 aarch64_symbol_binds_local_p (const_rtx x
)
6448 return (SYMBOL_REF_DECL (x
)
6449 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
6450 : SYMBOL_REF_LOCAL_P (x
));
6453 /* Return true if SYMBOL_REF X is thread local */
6455 aarch64_tls_symbol_p (rtx x
)
6457 if (! TARGET_HAVE_TLS
)
6460 if (GET_CODE (x
) != SYMBOL_REF
)
6463 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6466 /* Classify a TLS symbol into one of the TLS kinds. */
6467 enum aarch64_symbol_type
6468 aarch64_classify_tls_symbol (rtx x
)
6470 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
6474 case TLS_MODEL_GLOBAL_DYNAMIC
:
6475 case TLS_MODEL_LOCAL_DYNAMIC
:
6476 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
6478 case TLS_MODEL_INITIAL_EXEC
:
6479 return SYMBOL_SMALL_GOTTPREL
;
6481 case TLS_MODEL_LOCAL_EXEC
:
6482 return SYMBOL_SMALL_TPREL
;
6484 case TLS_MODEL_EMULATED
:
6485 case TLS_MODEL_NONE
:
6486 return SYMBOL_FORCE_TO_MEM
;
6493 /* Return the method that should be used to access SYMBOL_REF or
6494 LABEL_REF X in context CONTEXT. */
6496 enum aarch64_symbol_type
6497 aarch64_classify_symbol (rtx x
,
6498 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
6500 if (GET_CODE (x
) == LABEL_REF
)
6502 switch (aarch64_cmodel
)
6504 case AARCH64_CMODEL_LARGE
:
6505 return SYMBOL_FORCE_TO_MEM
;
6507 case AARCH64_CMODEL_TINY_PIC
:
6508 case AARCH64_CMODEL_TINY
:
6509 return SYMBOL_TINY_ABSOLUTE
;
6511 case AARCH64_CMODEL_SMALL_PIC
:
6512 case AARCH64_CMODEL_SMALL
:
6513 return SYMBOL_SMALL_ABSOLUTE
;
6520 if (GET_CODE (x
) == SYMBOL_REF
)
6522 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6523 return SYMBOL_FORCE_TO_MEM
;
6525 if (aarch64_tls_symbol_p (x
))
6526 return aarch64_classify_tls_symbol (x
);
6528 switch (aarch64_cmodel
)
6530 case AARCH64_CMODEL_TINY
:
6531 if (SYMBOL_REF_WEAK (x
))
6532 return SYMBOL_FORCE_TO_MEM
;
6533 return SYMBOL_TINY_ABSOLUTE
;
6535 case AARCH64_CMODEL_SMALL
:
6536 if (SYMBOL_REF_WEAK (x
))
6537 return SYMBOL_FORCE_TO_MEM
;
6538 return SYMBOL_SMALL_ABSOLUTE
;
6540 case AARCH64_CMODEL_TINY_PIC
:
6541 if (!aarch64_symbol_binds_local_p (x
))
6542 return SYMBOL_TINY_GOT
;
6543 return SYMBOL_TINY_ABSOLUTE
;
6545 case AARCH64_CMODEL_SMALL_PIC
:
6546 if (!aarch64_symbol_binds_local_p (x
))
6547 return SYMBOL_SMALL_GOT
;
6548 return SYMBOL_SMALL_ABSOLUTE
;
6555 /* By default push everything into the constant pool. */
6556 return SYMBOL_FORCE_TO_MEM
;
6560 aarch64_constant_address_p (rtx x
)
6562 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
6566 aarch64_legitimate_pic_operand_p (rtx x
)
6568 if (GET_CODE (x
) == SYMBOL_REF
6569 || (GET_CODE (x
) == CONST
6570 && GET_CODE (XEXP (x
, 0)) == PLUS
6571 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6577 /* Return true if X holds either a quarter-precision or
6578 floating-point +0.0 constant. */
6580 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
6582 if (!CONST_DOUBLE_P (x
))
6585 /* TODO: We could handle moving 0.0 to a TFmode register,
6586 but first we would like to refactor the movtf_aarch64
6587 to be more amicable to split moves properly and
6588 correctly gate on TARGET_SIMD. For now - reject all
6589 constants which are not to SFmode or DFmode registers. */
6590 if (!(mode
== SFmode
|| mode
== DFmode
))
6593 if (aarch64_float_const_zero_rtx_p (x
))
6595 return aarch64_float_const_representable_p (x
);
6599 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6601 /* Do not allow vector struct mode constants. We could support
6602 0 and -1 easily, but they need support in aarch64-simd.md. */
6603 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
6606 /* This could probably go away because
6607 we now decompose CONST_INTs according to expand_mov_immediate. */
6608 if ((GET_CODE (x
) == CONST_VECTOR
6609 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
6610 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
6611 return !targetm
.cannot_force_const_mem (mode
, x
);
6613 if (GET_CODE (x
) == HIGH
6614 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6617 return aarch64_constant_address_p (x
);
6621 aarch64_load_tp (rtx target
)
6624 || GET_MODE (target
) != Pmode
6625 || !register_operand (target
, Pmode
))
6626 target
= gen_reg_rtx (Pmode
);
6628 /* Can return in any reg. */
6629 emit_insn (gen_aarch64_load_tp_hard (target
));
6633 /* On AAPCS systems, this is the "struct __va_list". */
6634 static GTY(()) tree va_list_type
;
6636 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6637 Return the type to use as __builtin_va_list.
6639 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6651 aarch64_build_builtin_va_list (void)
6654 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6656 /* Create the type. */
6657 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
6658 /* Give it the required name. */
6659 va_list_name
= build_decl (BUILTINS_LOCATION
,
6661 get_identifier ("__va_list"),
6663 DECL_ARTIFICIAL (va_list_name
) = 1;
6664 TYPE_NAME (va_list_type
) = va_list_name
;
6665 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
6667 /* Create the fields. */
6668 f_stack
= build_decl (BUILTINS_LOCATION
,
6669 FIELD_DECL
, get_identifier ("__stack"),
6671 f_grtop
= build_decl (BUILTINS_LOCATION
,
6672 FIELD_DECL
, get_identifier ("__gr_top"),
6674 f_vrtop
= build_decl (BUILTINS_LOCATION
,
6675 FIELD_DECL
, get_identifier ("__vr_top"),
6677 f_groff
= build_decl (BUILTINS_LOCATION
,
6678 FIELD_DECL
, get_identifier ("__gr_offs"),
6680 f_vroff
= build_decl (BUILTINS_LOCATION
,
6681 FIELD_DECL
, get_identifier ("__vr_offs"),
6684 DECL_ARTIFICIAL (f_stack
) = 1;
6685 DECL_ARTIFICIAL (f_grtop
) = 1;
6686 DECL_ARTIFICIAL (f_vrtop
) = 1;
6687 DECL_ARTIFICIAL (f_groff
) = 1;
6688 DECL_ARTIFICIAL (f_vroff
) = 1;
6690 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
6691 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
6692 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
6693 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
6694 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
6696 TYPE_FIELDS (va_list_type
) = f_stack
;
6697 DECL_CHAIN (f_stack
) = f_grtop
;
6698 DECL_CHAIN (f_grtop
) = f_vrtop
;
6699 DECL_CHAIN (f_vrtop
) = f_groff
;
6700 DECL_CHAIN (f_groff
) = f_vroff
;
6702 /* Compute its layout. */
6703 layout_type (va_list_type
);
6705 return va_list_type
;
6708 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6710 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
6712 const CUMULATIVE_ARGS
*cum
;
6713 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6714 tree stack
, grtop
, vrtop
, groff
, vroff
;
6716 int gr_save_area_size
;
6717 int vr_save_area_size
;
6720 cum
= &crtl
->args
.info
;
6722 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
6724 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
6726 if (TARGET_GENERAL_REGS_ONLY
)
6728 if (cum
->aapcs_nvrn
> 0)
6729 sorry ("%qs and floating point or vector arguments",
6730 "-mgeneral-regs-only");
6731 vr_save_area_size
= 0;
6734 f_stack
= TYPE_FIELDS (va_list_type_node
);
6735 f_grtop
= DECL_CHAIN (f_stack
);
6736 f_vrtop
= DECL_CHAIN (f_grtop
);
6737 f_groff
= DECL_CHAIN (f_vrtop
);
6738 f_vroff
= DECL_CHAIN (f_groff
);
6740 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
6742 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
6744 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
6746 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
6748 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
6751 /* Emit code to initialize STACK, which points to the next varargs stack
6752 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6753 by named arguments. STACK is 8-byte aligned. */
6754 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
6755 if (cum
->aapcs_stack_size
> 0)
6756 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
6757 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
6758 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6760 /* Emit code to initialize GRTOP, the top of the GR save area.
6761 virtual_incoming_args_rtx should have been 16 byte aligned. */
6762 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
6763 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
6764 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6766 /* Emit code to initialize VRTOP, the top of the VR save area.
6767 This address is gr_save_area_bytes below GRTOP, rounded
6768 down to the next 16-byte boundary. */
6769 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
6770 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
6771 STACK_BOUNDARY
/ BITS_PER_UNIT
);
6774 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
6775 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
6776 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6778 /* Emit code to initialize GROFF, the offset from GRTOP of the
6779 next GPR argument. */
6780 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
6781 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
6782 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6784 /* Likewise emit code to initialize VROFF, the offset from FTOP
6785 of the next VR argument. */
6786 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
6787 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
6788 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6791 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6794 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6795 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
6799 bool is_ha
; /* is HFA or HVA. */
6800 bool dw_align
; /* double-word align. */
6801 enum machine_mode ag_mode
= VOIDmode
;
6803 enum machine_mode mode
;
6805 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6806 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
6807 HOST_WIDE_INT size
, rsize
, adjust
, align
;
6808 tree t
, u
, cond1
, cond2
;
6810 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
6812 type
= build_pointer_type (type
);
6814 mode
= TYPE_MODE (type
);
6816 f_stack
= TYPE_FIELDS (va_list_type_node
);
6817 f_grtop
= DECL_CHAIN (f_stack
);
6818 f_vrtop
= DECL_CHAIN (f_grtop
);
6819 f_groff
= DECL_CHAIN (f_vrtop
);
6820 f_vroff
= DECL_CHAIN (f_groff
);
6822 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
6823 f_stack
, NULL_TREE
);
6824 size
= int_size_in_bytes (type
);
6825 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
6829 if (aarch64_vfp_is_call_or_return_candidate (mode
,
6835 /* TYPE passed in fp/simd registers. */
6836 if (TARGET_GENERAL_REGS_ONLY
)
6837 sorry ("%qs and floating point or vector arguments",
6838 "-mgeneral-regs-only");
6840 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
6841 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
6842 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
6843 unshare_expr (valist
), f_vroff
, NULL_TREE
);
6845 rsize
= nregs
* UNITS_PER_VREG
;
6849 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
6850 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
6852 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6853 && size
< UNITS_PER_VREG
)
6855 adjust
= UNITS_PER_VREG
- size
;
6860 /* TYPE passed in general registers. */
6861 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
6862 unshare_expr (valist
), f_grtop
, NULL_TREE
);
6863 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
6864 unshare_expr (valist
), f_groff
, NULL_TREE
);
6865 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
6866 nregs
= rsize
/ UNITS_PER_WORD
;
6871 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6872 && size
< UNITS_PER_WORD
)
6874 adjust
= UNITS_PER_WORD
- size
;
6878 /* Get a local temporary for the field value. */
6879 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
6881 /* Emit code to branch if off >= 0. */
6882 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
6883 build_int_cst (TREE_TYPE (off
), 0));
6884 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
6888 /* Emit: offs = (offs + 15) & -16. */
6889 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6890 build_int_cst (TREE_TYPE (off
), 15));
6891 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
6892 build_int_cst (TREE_TYPE (off
), -16));
6893 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
6898 /* Update ap.__[g|v]r_offs */
6899 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6900 build_int_cst (TREE_TYPE (off
), rsize
));
6901 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
6905 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6907 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6908 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
6909 build_int_cst (TREE_TYPE (f_off
), 0));
6910 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
6912 /* String up: make sure the assignment happens before the use. */
6913 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
6914 COND_EXPR_ELSE (cond1
) = t
;
6916 /* Prepare the trees handling the argument that is passed on the stack;
6917 the top level node will store in ON_STACK. */
6918 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
6921 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6922 t
= fold_convert (intDI_type_node
, arg
);
6923 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6924 build_int_cst (TREE_TYPE (t
), 15));
6925 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6926 build_int_cst (TREE_TYPE (t
), -16));
6927 t
= fold_convert (TREE_TYPE (arg
), t
);
6928 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
6932 /* Advance ap.__stack */
6933 t
= fold_convert (intDI_type_node
, arg
);
6934 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6935 build_int_cst (TREE_TYPE (t
), size
+ 7));
6936 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6937 build_int_cst (TREE_TYPE (t
), -8));
6938 t
= fold_convert (TREE_TYPE (arg
), t
);
6939 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
6940 /* String up roundup and advance. */
6942 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6943 /* String up with arg */
6944 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
6945 /* Big-endianness related address adjustment. */
6946 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6947 && size
< UNITS_PER_WORD
)
6949 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
6950 size_int (UNITS_PER_WORD
- size
));
6951 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
6954 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
6955 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
6957 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6960 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
6961 build_int_cst (TREE_TYPE (off
), adjust
));
6963 t
= fold_convert (sizetype
, t
);
6964 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
6968 /* type ha; // treat as "struct {ftype field[n];}"
6969 ... [computing offs]
6970 for (i = 0; i <nregs; ++i, offs += 16)
6971 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6974 tree tmp_ha
, field_t
, field_ptr_t
;
6976 /* Declare a local variable. */
6977 tmp_ha
= create_tmp_var_raw (type
, "ha");
6978 gimple_add_tmp_var (tmp_ha
);
6980 /* Establish the base type. */
6984 field_t
= float_type_node
;
6985 field_ptr_t
= float_ptr_type_node
;
6988 field_t
= double_type_node
;
6989 field_ptr_t
= double_ptr_type_node
;
6992 field_t
= long_double_type_node
;
6993 field_ptr_t
= long_double_ptr_type_node
;
6995 /* The half precision and quad precision are not fully supported yet. Enable
6996 the following code after the support is complete. Need to find the correct
6997 type node for __fp16 *. */
7000 field_t
= float_type_node
;
7001 field_ptr_t
= float_ptr_type_node
;
7007 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
7008 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
7009 field_ptr_t
= build_pointer_type (field_t
);
7016 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7017 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
7019 t
= fold_convert (field_ptr_t
, addr
);
7020 t
= build2 (MODIFY_EXPR
, field_t
,
7021 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
7022 build1 (INDIRECT_REF
, field_t
, t
));
7024 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7025 for (i
= 1; i
< nregs
; ++i
)
7027 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
7028 u
= fold_convert (field_ptr_t
, addr
);
7029 u
= build2 (MODIFY_EXPR
, field_t
,
7030 build2 (MEM_REF
, field_t
, tmp_ha
,
7031 build_int_cst (field_ptr_t
,
7033 int_size_in_bytes (field_t
)))),
7034 build1 (INDIRECT_REF
, field_t
, u
));
7035 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
7038 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
7039 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
7042 COND_EXPR_ELSE (cond2
) = t
;
7043 addr
= fold_convert (build_pointer_type (type
), cond1
);
7044 addr
= build_va_arg_indirect_ref (addr
);
7047 addr
= build_va_arg_indirect_ref (addr
);
7052 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7055 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
7056 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7059 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7060 CUMULATIVE_ARGS local_cum
;
7061 int gr_saved
, vr_saved
;
7063 /* The caller has advanced CUM up to, but not beyond, the last named
7064 argument. Advance a local copy of CUM past the last "real" named
7065 argument, to find out how many registers are left over. */
7067 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
7069 /* Found out how many registers we need to save. */
7070 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
7071 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
7073 if (TARGET_GENERAL_REGS_ONLY
)
7075 if (local_cum
.aapcs_nvrn
> 0)
7076 sorry ("%qs and floating point or vector arguments",
7077 "-mgeneral-regs-only");
7087 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7088 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
7089 - gr_saved
* UNITS_PER_WORD
);
7090 mem
= gen_frame_mem (BLKmode
, ptr
);
7091 set_mem_alias_set (mem
, get_varargs_alias_set ());
7093 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
7098 /* We can't use move_block_from_reg, because it will use
7099 the wrong mode, storing D regs only. */
7100 enum machine_mode mode
= TImode
;
7103 /* Set OFF to the offset from virtual_incoming_args_rtx of
7104 the first vector register. The VR save area lies below
7105 the GR one, and is aligned to 16 bytes. */
7106 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7107 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7108 off
-= vr_saved
* UNITS_PER_VREG
;
7110 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
7114 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
7115 mem
= gen_frame_mem (mode
, ptr
);
7116 set_mem_alias_set (mem
, get_varargs_alias_set ());
7117 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
7118 off
+= UNITS_PER_VREG
;
7123 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7124 any complication of having crtl->args.pretend_args_size changed. */
7125 cfun
->machine
->frame
.saved_varargs_size
7126 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7127 STACK_BOUNDARY
/ BITS_PER_UNIT
)
7128 + vr_saved
* UNITS_PER_VREG
);
7132 aarch64_conditional_register_usage (void)
7137 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
7140 call_used_regs
[i
] = 1;
7145 /* Walk down the type tree of TYPE counting consecutive base elements.
7146 If *MODEP is VOIDmode, then set it to the first valid floating point
7147 type. If a non-floating point type is found, or if a floating point
7148 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7149 otherwise return the count in the sub-tree. */
7151 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
7153 enum machine_mode mode
;
7156 switch (TREE_CODE (type
))
7159 mode
= TYPE_MODE (type
);
7160 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7163 if (*modep
== VOIDmode
)
7172 mode
= TYPE_MODE (TREE_TYPE (type
));
7173 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7176 if (*modep
== VOIDmode
)
7185 /* Use V2SImode and V4SImode as representatives of all 64-bit
7186 and 128-bit vector types. */
7187 size
= int_size_in_bytes (type
);
7200 if (*modep
== VOIDmode
)
7203 /* Vector modes are considered to be opaque: two vectors are
7204 equivalent for the purposes of being homogeneous aggregates
7205 if they are the same size. */
7214 tree index
= TYPE_DOMAIN (type
);
7216 /* Can't handle incomplete types nor sizes that are not
7218 if (!COMPLETE_TYPE_P (type
)
7219 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7222 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
7225 || !TYPE_MAX_VALUE (index
)
7226 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
7227 || !TYPE_MIN_VALUE (index
)
7228 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
7232 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
7233 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
7235 /* There must be no padding. */
7236 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7248 /* Can't handle incomplete types nor sizes that are not
7250 if (!COMPLETE_TYPE_P (type
)
7251 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7254 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7256 if (TREE_CODE (field
) != FIELD_DECL
)
7259 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7265 /* There must be no padding. */
7266 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7273 case QUAL_UNION_TYPE
:
7275 /* These aren't very interesting except in a degenerate case. */
7280 /* Can't handle incomplete types nor sizes that are not
7282 if (!COMPLETE_TYPE_P (type
)
7283 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7286 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7288 if (TREE_CODE (field
) != FIELD_DECL
)
7291 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7294 count
= count
> sub_count
? count
: sub_count
;
7297 /* There must be no padding. */
7298 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7311 /* Return true if we use LRA instead of reload pass. */
7313 aarch64_lra_p (void)
7315 return aarch64_lra_flag
;
7318 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7319 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7320 array types. The C99 floating-point complex types are also considered
7321 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7322 types, which are GCC extensions and out of the scope of AAPCS64, are
7323 treated as composite types here as well.
7325 Note that MODE itself is not sufficient in determining whether a type
7326 is such a composite type or not. This is because
7327 stor-layout.c:compute_record_mode may have already changed the MODE
7328 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7329 structure with only one field may have its MODE set to the mode of the
7330 field. Also an integer mode whose size matches the size of the
7331 RECORD_TYPE type may be used to substitute the original mode
7332 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7333 solely relied on. */
7336 aarch64_composite_type_p (const_tree type
,
7337 enum machine_mode mode
)
7339 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
7343 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
7344 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
7350 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7351 type as described in AAPCS64 \S 4.1.2.
7353 See the comment above aarch64_composite_type_p for the notes on MODE. */
7356 aarch64_short_vector_p (const_tree type
,
7357 enum machine_mode mode
)
7359 HOST_WIDE_INT size
= -1;
7361 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7362 size
= int_size_in_bytes (type
);
7363 else if (!aarch64_composite_type_p (type
, mode
)
7364 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
7365 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
7366 size
= GET_MODE_SIZE (mode
);
7368 return (size
== 8 || size
== 16) ? true : false;
7371 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7372 shall be passed or returned in simd/fp register(s) (providing these
7373 parameter passing registers are available).
7375 Upon successful return, *COUNT returns the number of needed registers,
7376 *BASE_MODE returns the mode of the individual register and when IS_HAF
7377 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7378 floating-point aggregate or a homogeneous short-vector aggregate. */
7381 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
7383 enum machine_mode
*base_mode
,
7387 enum machine_mode new_mode
= VOIDmode
;
7388 bool composite_p
= aarch64_composite_type_p (type
, mode
);
7390 if (is_ha
!= NULL
) *is_ha
= false;
7392 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7393 || aarch64_short_vector_p (type
, mode
))
7398 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
7400 if (is_ha
!= NULL
) *is_ha
= true;
7402 new_mode
= GET_MODE_INNER (mode
);
7404 else if (type
&& composite_p
)
7406 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
7408 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
7410 if (is_ha
!= NULL
) *is_ha
= true;
7419 *base_mode
= new_mode
;
7423 /* Implement TARGET_STRUCT_VALUE_RTX. */
7426 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
7427 int incoming ATTRIBUTE_UNUSED
)
7429 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
7432 /* Implements target hook vector_mode_supported_p. */
7434 aarch64_vector_mode_supported_p (enum machine_mode mode
)
7437 && (mode
== V4SImode
|| mode
== V8HImode
7438 || mode
== V16QImode
|| mode
== V2DImode
7439 || mode
== V2SImode
|| mode
== V4HImode
7440 || mode
== V8QImode
|| mode
== V2SFmode
7441 || mode
== V4SFmode
|| mode
== V2DFmode
7442 || mode
== V1DFmode
))
7448 /* Return appropriate SIMD container
7449 for MODE within a vector of WIDTH bits. */
7450 static enum machine_mode
7451 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
7453 gcc_assert (width
== 64 || width
== 128);
7492 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7493 static enum machine_mode
7494 aarch64_preferred_simd_mode (enum machine_mode mode
)
7496 return aarch64_simd_container_mode (mode
, 128);
7499 /* Return the bitmask of possible vector sizes for the vectorizer
7502 aarch64_autovectorize_vector_sizes (void)
7507 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7508 vector types in order to conform to the AAPCS64 (see "Procedure
7509 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7510 qualify for emission with the mangled names defined in that document,
7511 a vector type must not only be of the correct mode but also be
7512 composed of AdvSIMD vector element types (e.g.
7513 _builtin_aarch64_simd_qi); these types are registered by
7514 aarch64_init_simd_builtins (). In other words, vector types defined
7515 in other ways e.g. via vector_size attribute will get default
7519 enum machine_mode mode
;
7520 const char *element_type_name
;
7521 const char *mangled_name
;
7522 } aarch64_simd_mangle_map_entry
;
7524 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
7525 /* 64-bit containerized types. */
7526 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7527 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7528 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7529 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7530 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7531 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7532 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7533 { DImode
, "__builtin_aarch64_simd_di", "11__Int64x1_t" },
7534 { DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x1_t" },
7535 { V1DFmode
, "__builtin_aarch64_simd_df", "13__Float64x1_t" },
7536 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7537 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7538 /* 128-bit containerized types. */
7539 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7540 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7541 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7542 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7543 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7544 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7545 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7546 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7547 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7548 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7549 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7550 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7551 { V2DImode
, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7552 { VOIDmode
, NULL
, NULL
}
7555 /* Implement TARGET_MANGLE_TYPE. */
7558 aarch64_mangle_type (const_tree type
)
7560 /* The AArch64 ABI documents say that "__va_list" has to be
7561 managled as if it is in the "std" namespace. */
7562 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
7563 return "St9__va_list";
7565 /* Check the mode of the vector type, and the name of the vector
7566 element type, against the table. */
7567 if (TREE_CODE (type
) == VECTOR_TYPE
)
7569 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
7571 while (pos
->mode
!= VOIDmode
)
7573 tree elt_type
= TREE_TYPE (type
);
7575 if (pos
->mode
== TYPE_MODE (type
)
7576 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
7577 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
7578 pos
->element_type_name
))
7579 return pos
->mangled_name
;
7585 /* Use the default mangling. */
7589 /* Return the equivalent letter for size. */
7591 sizetochar (int size
)
7595 case 64: return 'd';
7596 case 32: return 's';
7597 case 16: return 'h';
7598 case 8 : return 'b';
7599 default: gcc_unreachable ();
7603 /* Return true iff x is a uniform vector of floating-point
7604 constants, and the constant can be represented in
7605 quarter-precision form. Note, as aarch64_float_const_representable
7606 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7608 aarch64_vect_float_const_representable_p (rtx x
)
7611 REAL_VALUE_TYPE r0
, ri
;
7614 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
7617 x0
= CONST_VECTOR_ELT (x
, 0);
7618 if (!CONST_DOUBLE_P (x0
))
7621 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
7623 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
7625 xi
= CONST_VECTOR_ELT (x
, i
);
7626 if (!CONST_DOUBLE_P (xi
))
7629 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
7630 if (!REAL_VALUES_EQUAL (r0
, ri
))
7634 return aarch64_float_const_representable_p (x0
);
7637 /* Return true for valid and false for invalid. */
7639 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
7640 struct simd_immediate_info
*info
)
7642 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7644 for (i = 0; i < idx; i += (STRIDE)) \
7649 immtype = (CLASS); \
7650 elsize = (ELSIZE); \
7656 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
7657 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
7658 unsigned char bytes
[16];
7659 int immtype
= -1, matches
;
7660 unsigned int invmask
= inverse
? 0xff : 0;
7663 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
7665 if (! (aarch64_simd_imm_zero_p (op
, mode
)
7666 || aarch64_vect_float_const_representable_p (op
)))
7671 info
->value
= CONST_VECTOR_ELT (op
, 0);
7672 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
7680 /* Splat vector constant out into a byte vector. */
7681 for (i
= 0; i
< n_elts
; i
++)
7683 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7684 it must be laid out in the vector register in reverse order. */
7685 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
7686 unsigned HOST_WIDE_INT elpart
;
7687 unsigned int part
, parts
;
7689 if (CONST_INT_P (el
))
7691 elpart
= INTVAL (el
);
7694 else if (GET_CODE (el
) == CONST_DOUBLE
)
7696 elpart
= CONST_DOUBLE_LOW (el
);
7702 for (part
= 0; part
< parts
; part
++)
7705 for (byte
= 0; byte
< innersize
; byte
++)
7707 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
7708 elpart
>>= BITS_PER_UNIT
;
7710 if (GET_CODE (el
) == CONST_DOUBLE
)
7711 elpart
= CONST_DOUBLE_HIGH (el
);
7716 gcc_assert (idx
== GET_MODE_SIZE (mode
));
7720 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
7721 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
7723 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7724 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7726 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7727 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7729 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7730 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
7732 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
7734 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
7736 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
7737 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
7739 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7740 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7742 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7743 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7745 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7746 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
7748 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
7750 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
7752 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7753 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7755 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7756 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7758 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7759 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7761 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7762 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7764 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
7766 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
7767 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
7776 info
->element_width
= elsize
;
7777 info
->mvn
= emvn
!= 0;
7778 info
->shift
= eshift
;
7780 unsigned HOST_WIDE_INT imm
= 0;
7782 if (immtype
>= 12 && immtype
<= 15)
7785 /* Un-invert bytes of recognized vector, if necessary. */
7787 for (i
= 0; i
< idx
; i
++)
7788 bytes
[i
] ^= invmask
;
7792 /* FIXME: Broken on 32-bit H_W_I hosts. */
7793 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
7795 for (i
= 0; i
< 8; i
++)
7796 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
7797 << (i
* BITS_PER_UNIT
);
7800 info
->value
= GEN_INT (imm
);
7804 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
7805 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
7807 /* Construct 'abcdefgh' because the assembler cannot handle
7808 generic constants. */
7811 imm
= (imm
>> info
->shift
) & 0xff;
7812 info
->value
= GEN_INT (imm
);
7820 /* Check of immediate shift constants are within range. */
7822 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
7824 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
7826 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
7828 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
7831 /* Return true if X is a uniform vector where all elements
7832 are either the floating-point constant 0.0 or the
7833 integer constant 0. */
7835 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
7837 return x
== CONST0_RTX (mode
);
7841 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7843 HOST_WIDE_INT imm
= INTVAL (x
);
7846 for (i
= 0; i
< 8; i
++)
7848 unsigned int byte
= imm
& 0xff;
7849 if (byte
!= 0xff && byte
!= 0)
7858 aarch64_mov_operand_p (rtx x
,
7859 enum aarch64_symbol_context context
,
7860 enum machine_mode mode
)
7862 if (GET_CODE (x
) == HIGH
7863 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7866 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
7869 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
7872 return aarch64_classify_symbolic_expression (x
, context
)
7873 == SYMBOL_TINY_ABSOLUTE
;
7876 /* Return a const_int vector of VAL. */
7878 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
7880 int nunits
= GET_MODE_NUNITS (mode
);
7881 rtvec v
= rtvec_alloc (nunits
);
7884 for (i
=0; i
< nunits
; i
++)
7885 RTVEC_ELT (v
, i
) = GEN_INT (val
);
7887 return gen_rtx_CONST_VECTOR (mode
, v
);
7890 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7893 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
7895 enum machine_mode vmode
;
7897 gcc_assert (!VECTOR_MODE_P (mode
));
7898 vmode
= aarch64_preferred_simd_mode (mode
);
7899 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
7900 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
7903 /* Construct and return a PARALLEL RTX vector with elements numbering the
7904 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
7905 the vector - from the perspective of the architecture. This does not
7906 line up with GCC's perspective on lane numbers, so we end up with
7907 different masks depending on our target endian-ness. The diagram
7908 below may help. We must draw the distinction when building masks
7909 which select one half of the vector. An instruction selecting
7910 architectural low-lanes for a big-endian target, must be described using
7911 a mask selecting GCC high-lanes.
7913 Big-Endian Little-Endian
7916 | x | x | x | x | | x | x | x | x |
7917 Architecture 3 2 1 0 3 2 1 0
7919 Low Mask: { 2, 3 } { 0, 1 }
7920 High Mask: { 0, 1 } { 2, 3 }
7924 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
7926 int nunits
= GET_MODE_NUNITS (mode
);
7927 rtvec v
= rtvec_alloc (nunits
/ 2);
7928 int high_base
= nunits
/ 2;
7934 if (BYTES_BIG_ENDIAN
)
7935 base
= high
? low_base
: high_base
;
7937 base
= high
? high_base
: low_base
;
7939 for (i
= 0; i
< nunits
/ 2; i
++)
7940 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
7942 t1
= gen_rtx_PARALLEL (mode
, v
);
7946 /* Check OP for validity as a PARALLEL RTX vector with elements
7947 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
7948 from the perspective of the architecture. See the diagram above
7949 aarch64_simd_vect_par_cnst_half for more details. */
7952 aarch64_simd_check_vect_par_cnst_half (rtx op
, enum machine_mode mode
,
7955 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
7956 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
7957 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
7960 if (!VECTOR_MODE_P (mode
))
7963 if (count_op
!= count_ideal
)
7966 for (i
= 0; i
< count_ideal
; i
++)
7968 rtx elt_op
= XVECEXP (op
, 0, i
);
7969 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
7971 if (!CONST_INT_P (elt_op
)
7972 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
7978 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7979 HIGH (exclusive). */
7981 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
7984 gcc_assert (CONST_INT_P (operand
));
7985 lane
= INTVAL (operand
);
7987 if (lane
< low
|| lane
>= high
)
7988 error ("lane out of range");
7991 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7994 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
7995 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
7998 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
7999 rtx tmp1
= gen_reg_rtx (mode
);
8000 rtx tmp2
= gen_reg_rtx (mode
);
8002 emit_insn (intfn (tmp1
, op1
, tmp2
));
8004 emit_move_insn (mem
, tmp1
);
8005 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
8006 emit_move_insn (mem
, tmp2
);
8009 /* Return TRUE if OP is a valid vector addressing mode. */
8011 aarch64_simd_mem_operand_p (rtx op
)
8013 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
8014 || REG_P (XEXP (op
, 0)));
8017 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
8018 not to early-clobber SRC registers in the process.
8020 We assume that the operands described by SRC and DEST represent a
8021 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8022 number of components into which the copy has been decomposed. */
8024 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
8025 rtx
*src
, unsigned int count
)
8029 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
8030 || REGNO (operands
[0]) < REGNO (operands
[1]))
8032 for (i
= 0; i
< count
; i
++)
8034 operands
[2 * i
] = dest
[i
];
8035 operands
[2 * i
+ 1] = src
[i
];
8040 for (i
= 0; i
< count
; i
++)
8042 operands
[2 * i
] = dest
[count
- i
- 1];
8043 operands
[2 * i
+ 1] = src
[count
- i
- 1];
8048 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8049 one of VSTRUCT modes: OI, CI or XI. */
8051 aarch64_simd_attr_length_move (rtx_insn
*insn
)
8053 enum machine_mode mode
;
8055 extract_insn_cached (insn
);
8057 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
8059 mode
= GET_MODE (recog_data
.operand
[0]);
8075 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8076 alignment of a vector to 128 bits. */
8077 static HOST_WIDE_INT
8078 aarch64_simd_vector_alignment (const_tree type
)
8080 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
8081 return MIN (align
, 128);
8084 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8086 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
8091 /* We guarantee alignment for vectors up to 128-bits. */
8092 if (tree_int_cst_compare (TYPE_SIZE (type
),
8093 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
8096 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8100 /* If VALS is a vector constant that can be loaded into a register
8101 using DUP, generate instructions to do so and return an RTX to
8102 assign to the register. Otherwise return NULL_RTX. */
8104 aarch64_simd_dup_constant (rtx vals
)
8106 enum machine_mode mode
= GET_MODE (vals
);
8107 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8108 int n_elts
= GET_MODE_NUNITS (mode
);
8109 bool all_same
= true;
8113 if (GET_CODE (vals
) != CONST_VECTOR
)
8116 for (i
= 1; i
< n_elts
; ++i
)
8118 x
= CONST_VECTOR_ELT (vals
, i
);
8119 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
8126 /* We can load this constant by using DUP and a constant in a
8127 single ARM register. This will be cheaper than a vector
8129 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
8130 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8134 /* Generate code to load VALS, which is a PARALLEL containing only
8135 constants (for vec_init) or CONST_VECTOR, efficiently into a
8136 register. Returns an RTX to copy into the register, or NULL_RTX
8137 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8139 aarch64_simd_make_constant (rtx vals
)
8141 enum machine_mode mode
= GET_MODE (vals
);
8143 rtx const_vec
= NULL_RTX
;
8144 int n_elts
= GET_MODE_NUNITS (mode
);
8148 if (GET_CODE (vals
) == CONST_VECTOR
)
8150 else if (GET_CODE (vals
) == PARALLEL
)
8152 /* A CONST_VECTOR must contain only CONST_INTs and
8153 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8154 Only store valid constants in a CONST_VECTOR. */
8155 for (i
= 0; i
< n_elts
; ++i
)
8157 rtx x
= XVECEXP (vals
, 0, i
);
8158 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
8161 if (n_const
== n_elts
)
8162 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8167 if (const_vec
!= NULL_RTX
8168 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
8169 /* Load using MOVI/MVNI. */
8171 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
8172 /* Loaded using DUP. */
8174 else if (const_vec
!= NULL_RTX
)
8175 /* Load from constant pool. We can not take advantage of single-cycle
8176 LD1 because we need a PC-relative addressing mode. */
8179 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8180 We can not construct an initializer. */
8185 aarch64_expand_vector_init (rtx target
, rtx vals
)
8187 enum machine_mode mode
= GET_MODE (target
);
8188 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8189 int n_elts
= GET_MODE_NUNITS (mode
);
8190 int n_var
= 0, one_var
= -1;
8191 bool all_same
= true;
8195 x
= XVECEXP (vals
, 0, 0);
8196 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8197 n_var
= 1, one_var
= 0;
8199 for (i
= 1; i
< n_elts
; ++i
)
8201 x
= XVECEXP (vals
, 0, i
);
8202 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8203 ++n_var
, one_var
= i
;
8205 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8211 rtx constant
= aarch64_simd_make_constant (vals
);
8212 if (constant
!= NULL_RTX
)
8214 emit_move_insn (target
, constant
);
8219 /* Splat a single non-constant element if we can. */
8222 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8223 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
8227 /* One field is non-constant. Load constant then overwrite varying
8228 field. This is more efficient than using the stack. */
8231 rtx copy
= copy_rtx (vals
);
8232 rtx index
= GEN_INT (one_var
);
8233 enum insn_code icode
;
8235 /* Load constant part of vector, substitute neighboring value for
8237 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
8238 aarch64_expand_vector_init (target
, copy
);
8240 /* Insert variable. */
8241 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8242 icode
= optab_handler (vec_set_optab
, mode
);
8243 gcc_assert (icode
!= CODE_FOR_nothing
);
8244 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
8248 /* Construct the vector in memory one field at a time
8249 and load the whole vector. */
8250 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
8251 for (i
= 0; i
< n_elts
; i
++)
8252 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8253 i
* GET_MODE_SIZE (inner_mode
)),
8254 XVECEXP (vals
, 0, i
));
8255 emit_move_insn (target
, mem
);
8259 static unsigned HOST_WIDE_INT
8260 aarch64_shift_truncation_mask (enum machine_mode mode
)
8263 (aarch64_vector_mode_supported_p (mode
)
8264 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
8267 #ifndef TLS_SECTION_ASM_FLAG
8268 #define TLS_SECTION_ASM_FLAG 'T'
8272 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
8273 tree decl ATTRIBUTE_UNUSED
)
8275 char flagchars
[10], *f
= flagchars
;
8277 /* If we have already declared this section, we can use an
8278 abbreviated form to switch back to it -- unless this section is
8279 part of a COMDAT groups, in which case GAS requires the full
8280 declaration every time. */
8281 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8282 && (flags
& SECTION_DECLARED
))
8284 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
8288 if (!(flags
& SECTION_DEBUG
))
8290 if (flags
& SECTION_WRITE
)
8292 if (flags
& SECTION_CODE
)
8294 if (flags
& SECTION_SMALL
)
8296 if (flags
& SECTION_MERGE
)
8298 if (flags
& SECTION_STRINGS
)
8300 if (flags
& SECTION_TLS
)
8301 *f
++ = TLS_SECTION_ASM_FLAG
;
8302 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8306 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
8308 if (!(flags
& SECTION_NOTYPE
))
8313 if (flags
& SECTION_BSS
)
8318 #ifdef TYPE_OPERAND_FMT
8319 format
= "," TYPE_OPERAND_FMT
;
8324 fprintf (asm_out_file
, format
, type
);
8326 if (flags
& SECTION_ENTSIZE
)
8327 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
8328 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8330 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
8331 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
8333 fprintf (asm_out_file
, ",%s,comdat",
8334 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
8338 putc ('\n', asm_out_file
);
8341 /* Select a format to encode pointers in exception handling data. */
8343 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
8346 switch (aarch64_cmodel
)
8348 case AARCH64_CMODEL_TINY
:
8349 case AARCH64_CMODEL_TINY_PIC
:
8350 case AARCH64_CMODEL_SMALL
:
8351 case AARCH64_CMODEL_SMALL_PIC
:
8352 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8354 type
= DW_EH_PE_sdata4
;
8357 /* No assumptions here. 8-byte relocs required. */
8358 type
= DW_EH_PE_sdata8
;
8361 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
8364 /* Emit load exclusive. */
8367 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
8368 rtx mem
, rtx model_rtx
)
8370 rtx (*gen
) (rtx
, rtx
, rtx
);
8374 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
8375 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
8376 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
8377 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
8382 emit_insn (gen (rval
, mem
, model_rtx
));
8385 /* Emit store exclusive. */
8388 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
8389 rtx rval
, rtx mem
, rtx model_rtx
)
8391 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
8395 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
8396 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
8397 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
8398 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
8403 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
8406 /* Mark the previous jump instruction as unlikely. */
8409 aarch64_emit_unlikely_jump (rtx insn
)
8411 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
8413 insn
= emit_jump_insn (insn
);
8414 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
8417 /* Expand a compare and swap pattern. */
8420 aarch64_expand_compare_and_swap (rtx operands
[])
8422 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
8423 enum machine_mode mode
, cmp_mode
;
8424 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
8429 oldval
= operands
[3];
8430 newval
= operands
[4];
8431 is_weak
= operands
[5];
8432 mod_s
= operands
[6];
8433 mod_f
= operands
[7];
8434 mode
= GET_MODE (mem
);
8437 /* Normally the succ memory model must be stronger than fail, but in the
8438 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8439 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8441 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
8442 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
8443 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
8449 /* For short modes, we're going to perform the comparison in SImode,
8450 so do the zero-extension now. */
8452 rval
= gen_reg_rtx (SImode
);
8453 oldval
= convert_modes (SImode
, mode
, oldval
, true);
8458 /* Force the value into a register if needed. */
8459 if (!aarch64_plus_operand (oldval
, mode
))
8460 oldval
= force_reg (cmp_mode
, oldval
);
8469 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
8470 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
8471 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
8472 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
8477 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
8479 if (mode
== QImode
|| mode
== HImode
)
8480 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
8482 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
8483 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
8484 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
8487 /* Split a compare and swap pattern. */
8490 aarch64_split_compare_and_swap (rtx operands
[])
8492 rtx rval
, mem
, oldval
, newval
, scratch
;
8493 enum machine_mode mode
;
8495 rtx_code_label
*label1
, *label2
;
8500 oldval
= operands
[2];
8501 newval
= operands
[3];
8502 is_weak
= (operands
[4] != const0_rtx
);
8503 scratch
= operands
[7];
8504 mode
= GET_MODE (mem
);
8509 label1
= gen_label_rtx ();
8510 emit_label (label1
);
8512 label2
= gen_label_rtx ();
8514 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
8516 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
8517 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
8518 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8519 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
8520 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8522 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
8526 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
8527 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8528 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
8529 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8533 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
8534 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
8535 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
8538 emit_label (label2
);
8541 /* Split an atomic operation. */
8544 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
8545 rtx value
, rtx model_rtx
, rtx cond
)
8547 enum machine_mode mode
= GET_MODE (mem
);
8548 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
8549 rtx_code_label
*label
;
8552 label
= gen_label_rtx ();
8556 new_out
= gen_lowpart (wmode
, new_out
);
8558 old_out
= gen_lowpart (wmode
, old_out
);
8561 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
8563 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
8572 x
= gen_rtx_AND (wmode
, old_out
, value
);
8573 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8574 x
= gen_rtx_NOT (wmode
, new_out
);
8575 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8579 if (CONST_INT_P (value
))
8581 value
= GEN_INT (-INTVAL (value
));
8587 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
8588 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8592 aarch64_emit_store_exclusive (mode
, cond
, mem
,
8593 gen_lowpart (mode
, new_out
), model_rtx
);
8595 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
8596 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8597 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
8598 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8602 aarch64_print_extension (void)
8604 const struct aarch64_option_extension
*opt
= NULL
;
8606 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
8607 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
8608 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
8610 asm_fprintf (asm_out_file
, "\n");
8614 aarch64_start_file (void)
8618 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
8619 aarch64_print_extension ();
8621 else if (selected_cpu
)
8623 const char *truncated_name
8624 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
8625 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
8626 aarch64_print_extension ();
8628 default_file_start();
8631 /* Target hook for c_mode_for_suffix. */
8632 static enum machine_mode
8633 aarch64_c_mode_for_suffix (char suffix
)
8641 /* We can only represent floating point constants which will fit in
8642 "quarter-precision" values. These values are characterised by
8643 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8646 (-1)^s * (n/16) * 2^r
8649 's' is the sign bit.
8650 'n' is an integer in the range 16 <= n <= 31.
8651 'r' is an integer in the range -3 <= r <= 4. */
8653 /* Return true iff X can be represented by a quarter-precision
8654 floating point immediate operand X. Note, we cannot represent 0.0. */
8656 aarch64_float_const_representable_p (rtx x
)
8658 /* This represents our current view of how many bits
8659 make up the mantissa. */
8660 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8662 unsigned HOST_WIDE_INT mantissa
, mask
;
8663 REAL_VALUE_TYPE r
, m
;
8666 if (!CONST_DOUBLE_P (x
))
8669 if (GET_MODE (x
) == VOIDmode
)
8672 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8674 /* We cannot represent infinities, NaNs or +/-zero. We won't
8675 know if we have +zero until we analyse the mantissa, but we
8676 can reject the other invalid values. */
8677 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
8678 || REAL_VALUE_MINUS_ZERO (r
))
8681 /* Extract exponent. */
8682 r
= real_value_abs (&r
);
8683 exponent
= REAL_EXP (&r
);
8685 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8686 highest (sign) bit, with a fixed binary point at bit point_pos.
8687 m1 holds the low part of the mantissa, m2 the high part.
8688 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8689 bits for the mantissa, this can fail (low bits will be lost). */
8690 real_ldexp (&m
, &r
, point_pos
- exponent
);
8691 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
8693 /* If the low part of the mantissa has bits set we cannot represent
8697 /* We have rejected the lower HOST_WIDE_INT, so update our
8698 understanding of how many bits lie in the mantissa and
8699 look only at the high HOST_WIDE_INT. */
8700 mantissa
= w
.elt (1);
8701 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8703 /* We can only represent values with a mantissa of the form 1.xxxx. */
8704 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8705 if ((mantissa
& mask
) != 0)
8708 /* Having filtered unrepresentable values, we may now remove all
8709 but the highest 5 bits. */
8710 mantissa
>>= point_pos
- 5;
8712 /* We cannot represent the value 0.0, so reject it. This is handled
8717 /* Then, as bit 4 is always set, we can mask it off, leaving
8718 the mantissa in the range [0, 15]. */
8719 mantissa
&= ~(1 << 4);
8720 gcc_assert (mantissa
<= 15);
8722 /* GCC internally does not use IEEE754-like encoding (where normalized
8723 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8724 Our mantissa values are shifted 4 places to the left relative to
8725 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8726 by 5 places to correct for GCC's representation. */
8727 exponent
= 5 - exponent
;
8729 return (exponent
>= 0 && exponent
<= 7);
8733 aarch64_output_simd_mov_immediate (rtx const_vector
,
8734 enum machine_mode mode
,
8738 static char templ
[40];
8739 const char *mnemonic
;
8740 const char *shift_op
;
8741 unsigned int lane_count
= 0;
8744 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
8746 /* This will return true to show const_vector is legal for use as either
8747 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8748 also update INFO to show how the immediate should be generated. */
8749 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
8750 gcc_assert (is_valid
);
8752 element_char
= sizetochar (info
.element_width
);
8753 lane_count
= width
/ info
.element_width
;
8755 mode
= GET_MODE_INNER (mode
);
8756 if (mode
== SFmode
|| mode
== DFmode
)
8758 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
8759 if (aarch64_float_const_zero_rtx_p (info
.value
))
8760 info
.value
= GEN_INT (0);
8765 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
8766 char float_buf
[buf_size
] = {'\0'};
8767 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
8770 if (lane_count
== 1)
8771 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
8773 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
8774 lane_count
, element_char
, float_buf
);
8779 mnemonic
= info
.mvn
? "mvni" : "movi";
8780 shift_op
= info
.msl
? "msl" : "lsl";
8782 if (lane_count
== 1)
8783 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
8784 mnemonic
, UINTVAL (info
.value
));
8785 else if (info
.shift
)
8786 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8787 ", %s %d", mnemonic
, lane_count
, element_char
,
8788 UINTVAL (info
.value
), shift_op
, info
.shift
);
8790 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
8791 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
8796 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
8797 enum machine_mode mode
)
8799 enum machine_mode vmode
;
8801 gcc_assert (!VECTOR_MODE_P (mode
));
8802 vmode
= aarch64_simd_container_mode (mode
, 64);
8803 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
8804 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
8807 /* Split operands into moves from op[1] + op[2] into op[0]. */
8810 aarch64_split_combinev16qi (rtx operands
[3])
8812 unsigned int dest
= REGNO (operands
[0]);
8813 unsigned int src1
= REGNO (operands
[1]);
8814 unsigned int src2
= REGNO (operands
[2]);
8815 enum machine_mode halfmode
= GET_MODE (operands
[1]);
8816 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
8819 gcc_assert (halfmode
== V16QImode
);
8821 if (src1
== dest
&& src2
== dest
+ halfregs
)
8823 /* No-op move. Can't split to nothing; emit something. */
8824 emit_note (NOTE_INSN_DELETED
);
8828 /* Preserve register attributes for variable tracking. */
8829 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
8830 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
8831 GET_MODE_SIZE (halfmode
));
8833 /* Special case of reversed high/low parts. */
8834 if (reg_overlap_mentioned_p (operands
[2], destlo
)
8835 && reg_overlap_mentioned_p (operands
[1], desthi
))
8837 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8838 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
8839 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8841 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
8843 /* Try to avoid unnecessary moves if part of the result
8844 is in the right place already. */
8846 emit_move_insn (destlo
, operands
[1]);
8847 if (src2
!= dest
+ halfregs
)
8848 emit_move_insn (desthi
, operands
[2]);
8852 if (src2
!= dest
+ halfregs
)
8853 emit_move_insn (desthi
, operands
[2]);
8855 emit_move_insn (destlo
, operands
[1]);
8859 /* vec_perm support. */
8861 #define MAX_VECT_LEN 16
8863 struct expand_vec_perm_d
8865 rtx target
, op0
, op1
;
8866 unsigned char perm
[MAX_VECT_LEN
];
8867 enum machine_mode vmode
;
8873 /* Generate a variable permutation. */
8876 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8878 enum machine_mode vmode
= GET_MODE (target
);
8879 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8881 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
8882 gcc_checking_assert (GET_MODE (op0
) == vmode
);
8883 gcc_checking_assert (GET_MODE (op1
) == vmode
);
8884 gcc_checking_assert (GET_MODE (sel
) == vmode
);
8885 gcc_checking_assert (TARGET_SIMD
);
8889 if (vmode
== V8QImode
)
8891 /* Expand the argument to a V16QI mode by duplicating it. */
8892 rtx pair
= gen_reg_rtx (V16QImode
);
8893 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
8894 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8898 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
8905 if (vmode
== V8QImode
)
8907 pair
= gen_reg_rtx (V16QImode
);
8908 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
8909 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8913 pair
= gen_reg_rtx (OImode
);
8914 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
8915 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
8921 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8923 enum machine_mode vmode
= GET_MODE (target
);
8924 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
8925 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8928 /* The TBL instruction does not use a modulo index, so we must take care
8929 of that ourselves. */
8930 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
8931 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8932 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
8934 /* For big-endian, we also need to reverse the index within the vector
8935 (but not which vector). */
8936 if (BYTES_BIG_ENDIAN
)
8938 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
8940 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
8941 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
8942 NULL
, 0, OPTAB_LIB_WIDEN
);
8944 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
8947 /* Recognize patterns suitable for the TRN instructions. */
8949 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
8951 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
8952 rtx out
, in0
, in1
, x
;
8953 rtx (*gen
) (rtx
, rtx
, rtx
);
8954 enum machine_mode vmode
= d
->vmode
;
8956 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8959 /* Note that these are little-endian tests.
8960 We correct for big-endian later. */
8961 if (d
->perm
[0] == 0)
8963 else if (d
->perm
[0] == 1)
8967 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8969 for (i
= 0; i
< nelt
; i
+= 2)
8971 if (d
->perm
[i
] != i
+ odd
)
8973 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
8983 if (BYTES_BIG_ENDIAN
)
8985 x
= in0
, in0
= in1
, in1
= x
;
8994 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
8995 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
8996 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
8997 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
8998 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
8999 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
9000 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
9001 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
9002 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
9003 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
9012 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
9013 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
9014 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
9015 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
9016 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
9017 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
9018 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
9019 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
9020 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
9021 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
9027 emit_insn (gen (out
, in0
, in1
));
9031 /* Recognize patterns suitable for the UZP instructions. */
9033 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
9035 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9036 rtx out
, in0
, in1
, x
;
9037 rtx (*gen
) (rtx
, rtx
, rtx
);
9038 enum machine_mode vmode
= d
->vmode
;
9040 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9043 /* Note that these are little-endian tests.
9044 We correct for big-endian later. */
9045 if (d
->perm
[0] == 0)
9047 else if (d
->perm
[0] == 1)
9051 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9053 for (i
= 0; i
< nelt
; i
++)
9055 unsigned elt
= (i
* 2 + odd
) & mask
;
9056 if (d
->perm
[i
] != elt
)
9066 if (BYTES_BIG_ENDIAN
)
9068 x
= in0
, in0
= in1
, in1
= x
;
9077 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
9078 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
9079 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
9080 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
9081 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
9082 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
9083 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
9084 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
9085 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
9086 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
9095 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
9096 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
9097 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
9098 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
9099 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
9100 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
9101 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
9102 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
9103 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
9104 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
9110 emit_insn (gen (out
, in0
, in1
));
9114 /* Recognize patterns suitable for the ZIP instructions. */
9116 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
9118 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
9119 rtx out
, in0
, in1
, x
;
9120 rtx (*gen
) (rtx
, rtx
, rtx
);
9121 enum machine_mode vmode
= d
->vmode
;
9123 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9126 /* Note that these are little-endian tests.
9127 We correct for big-endian later. */
9129 if (d
->perm
[0] == high
)
9132 else if (d
->perm
[0] == 0)
9136 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9138 for (i
= 0; i
< nelt
/ 2; i
++)
9140 unsigned elt
= (i
+ high
) & mask
;
9141 if (d
->perm
[i
* 2] != elt
)
9143 elt
= (elt
+ nelt
) & mask
;
9144 if (d
->perm
[i
* 2 + 1] != elt
)
9154 if (BYTES_BIG_ENDIAN
)
9156 x
= in0
, in0
= in1
, in1
= x
;
9165 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
9166 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
9167 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
9168 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
9169 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
9170 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
9171 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
9172 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
9173 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
9174 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
9183 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
9184 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
9185 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
9186 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
9187 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
9188 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
9189 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
9190 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
9191 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
9192 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
9198 emit_insn (gen (out
, in0
, in1
));
9202 /* Recognize patterns for the EXT insn. */
9205 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
9207 unsigned int i
, nelt
= d
->nelt
;
9208 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9211 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
9213 /* Check if the extracted indices are increasing by one. */
9214 for (i
= 1; i
< nelt
; i
++)
9216 unsigned int required
= location
+ i
;
9217 if (d
->one_vector_p
)
9219 /* We'll pass the same vector in twice, so allow indices to wrap. */
9220 required
&= (nelt
- 1);
9222 if (d
->perm
[i
] != required
)
9228 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
9229 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
9230 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
9231 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
9232 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
9233 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
9234 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
9235 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
9236 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
9237 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
9246 /* The case where (location == 0) is a no-op for both big- and little-endian,
9247 and is removed by the mid-end at optimization levels -O1 and higher. */
9249 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
9251 /* After setup, we want the high elements of the first vector (stored
9252 at the LSB end of the register), and the low elements of the second
9253 vector (stored at the MSB end of the register). So swap. */
9257 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9258 location
= nelt
- location
;
9261 offset
= GEN_INT (location
);
9262 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
9266 /* Recognize patterns for the REV insns. */
9269 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
9271 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
9272 rtx (*gen
) (rtx
, rtx
);
9274 if (!d
->one_vector_p
)
9283 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
9284 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
9292 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
9293 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
9294 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
9295 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
9303 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
9304 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
9305 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
9306 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
9307 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
9308 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
9309 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
9310 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
9319 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
9320 for (j
= 0; j
<= diff
; j
+= 1)
9322 /* This is guaranteed to be true as the value of diff
9323 is 7, 3, 1 and we should have enough elements in the
9324 queue to generate this. Getting a vector mask with a
9325 value of diff other than these values implies that
9326 something is wrong by the time we get here. */
9327 gcc_assert (i
+ j
< nelt
);
9328 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
9336 emit_insn (gen (d
->target
, d
->op0
));
9341 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
9343 rtx (*gen
) (rtx
, rtx
, rtx
);
9344 rtx out
= d
->target
;
9346 enum machine_mode vmode
= d
->vmode
;
9347 unsigned int i
, elt
, nelt
= d
->nelt
;
9351 for (i
= 1; i
< nelt
; i
++)
9353 if (elt
!= d
->perm
[i
])
9357 /* The generic preparation in aarch64_expand_vec_perm_const_1
9358 swaps the operand order and the permute indices if it finds
9359 d->perm[0] to be in the second operand. Thus, we can always
9360 use d->op0 and need not do any extra arithmetic to get the
9361 correct lane number. */
9363 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
9367 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
9368 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
9369 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
9370 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
9371 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
9372 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
9373 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
9374 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
9375 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
9376 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
9381 emit_insn (gen (out
, in0
, lane
));
9386 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
9388 rtx rperm
[MAX_VECT_LEN
], sel
;
9389 enum machine_mode vmode
= d
->vmode
;
9390 unsigned int i
, nelt
= d
->nelt
;
9395 /* Generic code will try constant permutation twice. Once with the
9396 original mode and again with the elements lowered to QImode.
9397 So wait and don't do the selector expansion ourselves. */
9398 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
9401 for (i
= 0; i
< nelt
; ++i
)
9403 int nunits
= GET_MODE_NUNITS (vmode
);
9405 /* If big-endian and two vectors we end up with a weird mixed-endian
9406 mode on NEON. Reverse the index within each word but not the word
9408 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
9411 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
9412 sel
= force_reg (vmode
, sel
);
9414 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
9419 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
9421 /* The pattern matching functions above are written to look for a small
9422 number to begin the sequence (0, 1, N/2). If we begin with an index
9423 from the second operand, we can swap the operands. */
9424 if (d
->perm
[0] >= d
->nelt
)
9426 unsigned i
, nelt
= d
->nelt
;
9429 gcc_assert (nelt
== (nelt
& -nelt
));
9430 for (i
= 0; i
< nelt
; ++i
)
9431 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
9440 if (aarch64_evpc_rev (d
))
9442 else if (aarch64_evpc_ext (d
))
9444 else if (aarch64_evpc_dup (d
))
9446 else if (aarch64_evpc_zip (d
))
9448 else if (aarch64_evpc_uzp (d
))
9450 else if (aarch64_evpc_trn (d
))
9452 return aarch64_evpc_tbl (d
);
9457 /* Expand a vec_perm_const pattern. */
9460 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9462 struct expand_vec_perm_d d
;
9469 d
.vmode
= GET_MODE (target
);
9470 gcc_assert (VECTOR_MODE_P (d
.vmode
));
9471 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
9472 d
.testing_p
= false;
9474 for (i
= which
= 0; i
< nelt
; ++i
)
9476 rtx e
= XVECEXP (sel
, 0, i
);
9477 int ei
= INTVAL (e
) & (2 * nelt
- 1);
9478 which
|= (ei
< nelt
? 1 : 2);
9488 d
.one_vector_p
= false;
9489 if (!rtx_equal_p (op0
, op1
))
9492 /* The elements of PERM do not suggest that only the first operand
9493 is used, but both operands are identical. Allow easier matching
9494 of the permutation by folding the permutation into the single
9498 for (i
= 0; i
< nelt
; ++i
)
9499 d
.perm
[i
] &= nelt
- 1;
9501 d
.one_vector_p
= true;
9506 d
.one_vector_p
= true;
9510 return aarch64_expand_vec_perm_const_1 (&d
);
9514 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
9515 const unsigned char *sel
)
9517 struct expand_vec_perm_d d
;
9518 unsigned int i
, nelt
, which
;
9522 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
9524 memcpy (d
.perm
, sel
, nelt
);
9526 /* Calculate whether all elements are in one vector. */
9527 for (i
= which
= 0; i
< nelt
; ++i
)
9529 unsigned char e
= d
.perm
[i
];
9530 gcc_assert (e
< 2 * nelt
);
9531 which
|= (e
< nelt
? 1 : 2);
9534 /* If all elements are from the second vector, reindex as if from the
9537 for (i
= 0; i
< nelt
; ++i
)
9540 /* Check whether the mask can be applied to a single vector. */
9541 d
.one_vector_p
= (which
!= 3);
9543 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
9544 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
9545 if (!d
.one_vector_p
)
9546 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
9549 ret
= aarch64_expand_vec_perm_const_1 (&d
);
9555 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9557 aarch64_cannot_change_mode_class (enum machine_mode from
,
9558 enum machine_mode to
,
9559 enum reg_class rclass
)
9561 /* Full-reg subregs are allowed on general regs or any class if they are
9563 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
)
9564 || !reg_classes_intersect_p (FP_REGS
, rclass
))
9567 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9568 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9569 2. Scalar to Scalar for integer modes or same size float modes.
9570 3. Vector to Vector modes.
9571 4. On little-endian only, Vector-Structure to Vector modes. */
9572 if (GET_MODE_SIZE (from
) > GET_MODE_SIZE (to
))
9574 if (aarch64_vector_mode_supported_p (from
)
9575 && GET_MODE_SIZE (GET_MODE_INNER (from
)) == GET_MODE_SIZE (to
))
9578 if (GET_MODE_NUNITS (from
) == 1
9579 && GET_MODE_NUNITS (to
) == 1
9580 && (GET_MODE_CLASS (from
) == MODE_INT
9584 if (aarch64_vector_mode_supported_p (from
)
9585 && aarch64_vector_mode_supported_p (to
))
9588 /* Within an vector structure straddling multiple vector registers
9589 we are in a mixed-endian representation. As such, we can't
9590 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9591 switch between vectors and vector structures cheaply. */
9592 if (!BYTES_BIG_ENDIAN
)
9593 if ((aarch64_vector_mode_supported_p (from
)
9594 && aarch64_vect_struct_mode_p (to
))
9595 || (aarch64_vector_mode_supported_p (to
)
9596 && aarch64_vect_struct_mode_p (from
)))
9603 /* Implement MODES_TIEABLE_P. */
9606 aarch64_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
9608 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
9611 /* We specifically want to allow elements of "structure" modes to
9612 be tieable to the structure. This more general condition allows
9613 other rarer situations too. */
9615 && aarch64_vector_mode_p (mode1
)
9616 && aarch64_vector_mode_p (mode2
))
9622 /* Return a new RTX holding the result of moving POINTER forward by
9626 aarch64_move_pointer (rtx pointer
, int amount
)
9628 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
9630 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
9634 /* Return a new RTX holding the result of moving POINTER forward by the
9635 size of the mode it points to. */
9638 aarch64_progress_pointer (rtx pointer
)
9640 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
9642 return aarch64_move_pointer (pointer
, amount
);
9645 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9649 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
9650 enum machine_mode mode
)
9652 rtx reg
= gen_reg_rtx (mode
);
9654 /* "Cast" the pointers to the correct mode. */
9655 *src
= adjust_address (*src
, mode
, 0);
9656 *dst
= adjust_address (*dst
, mode
, 0);
9657 /* Emit the memcpy. */
9658 emit_move_insn (reg
, *src
);
9659 emit_move_insn (*dst
, reg
);
9660 /* Move the pointers forward. */
9661 *src
= aarch64_progress_pointer (*src
);
9662 *dst
= aarch64_progress_pointer (*dst
);
9665 /* Expand movmem, as if from a __builtin_memcpy. Return true if
9666 we succeed, otherwise return false. */
9669 aarch64_expand_movmem (rtx
*operands
)
9672 rtx dst
= operands
[0];
9673 rtx src
= operands
[1];
9675 bool speed_p
= !optimize_function_for_size_p (cfun
);
9677 /* When optimizing for size, give a better estimate of the length of a
9678 memcpy call, but use the default otherwise. */
9679 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
9681 /* We can't do anything smart if the amount to copy is not constant. */
9682 if (!CONST_INT_P (operands
[2]))
9685 n
= UINTVAL (operands
[2]);
9687 /* Try to keep the number of instructions low. For cases below 16 bytes we
9688 need to make at most two moves. For cases above 16 bytes it will be one
9689 move for each 16 byte chunk, then at most two additional moves. */
9690 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
9693 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
9694 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
9696 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
9697 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
9699 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9705 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
9710 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
9715 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9716 4-byte chunk, partially overlapping with the previously copied chunk. */
9719 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
9725 src
= aarch64_move_pointer (src
, move
);
9726 dst
= aarch64_move_pointer (dst
, move
);
9727 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
9732 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9733 them, then (if applicable) an 8-byte chunk. */
9738 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
9743 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
9748 /* Finish the final bytes of the copy. We can always do this in one
9749 instruction. We either copy the exact amount we need, or partially
9750 overlap with the previous chunk we copied and copy 8-bytes. */
9754 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
9756 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
9758 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
9763 src
= aarch64_move_pointer (src
, -1);
9764 dst
= aarch64_move_pointer (dst
, -1);
9765 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
9771 src
= aarch64_move_pointer (src
, move
);
9772 dst
= aarch64_move_pointer (dst
, move
);
9773 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
9780 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
9782 static unsigned HOST_WIDE_INT
9783 aarch64_asan_shadow_offset (void)
9785 return (HOST_WIDE_INT_1
<< 36);
9788 #undef TARGET_ADDRESS_COST
9789 #define TARGET_ADDRESS_COST aarch64_address_cost
9791 /* This hook will determines whether unnamed bitfields affect the alignment
9792 of the containing structure. The hook returns true if the structure
9793 should inherit the alignment requirements of an unnamed bitfield's
9795 #undef TARGET_ALIGN_ANON_BITFIELD
9796 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9798 #undef TARGET_ASM_ALIGNED_DI_OP
9799 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9801 #undef TARGET_ASM_ALIGNED_HI_OP
9802 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9804 #undef TARGET_ASM_ALIGNED_SI_OP
9805 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9807 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9808 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9809 hook_bool_const_tree_hwi_hwi_const_tree_true
9811 #undef TARGET_ASM_FILE_START
9812 #define TARGET_ASM_FILE_START aarch64_start_file
9814 #undef TARGET_ASM_OUTPUT_MI_THUNK
9815 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9817 #undef TARGET_ASM_SELECT_RTX_SECTION
9818 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9820 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9821 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9823 #undef TARGET_BUILD_BUILTIN_VA_LIST
9824 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9826 #undef TARGET_CALLEE_COPIES
9827 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9829 #undef TARGET_CAN_ELIMINATE
9830 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9832 #undef TARGET_CANNOT_FORCE_CONST_MEM
9833 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9835 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9836 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9838 /* Only the least significant bit is used for initialization guard
9840 #undef TARGET_CXX_GUARD_MASK_BIT
9841 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9843 #undef TARGET_C_MODE_FOR_SUFFIX
9844 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9846 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9847 #undef TARGET_DEFAULT_TARGET_FLAGS
9848 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9851 #undef TARGET_CLASS_MAX_NREGS
9852 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9854 #undef TARGET_BUILTIN_DECL
9855 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9857 #undef TARGET_EXPAND_BUILTIN
9858 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9860 #undef TARGET_EXPAND_BUILTIN_VA_START
9861 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9863 #undef TARGET_FOLD_BUILTIN
9864 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9866 #undef TARGET_FUNCTION_ARG
9867 #define TARGET_FUNCTION_ARG aarch64_function_arg
9869 #undef TARGET_FUNCTION_ARG_ADVANCE
9870 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9872 #undef TARGET_FUNCTION_ARG_BOUNDARY
9873 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9875 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9876 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9878 #undef TARGET_FUNCTION_VALUE
9879 #define TARGET_FUNCTION_VALUE aarch64_function_value
9881 #undef TARGET_FUNCTION_VALUE_REGNO_P
9882 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9884 #undef TARGET_FRAME_POINTER_REQUIRED
9885 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9887 #undef TARGET_GIMPLE_FOLD_BUILTIN
9888 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9890 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9891 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9893 #undef TARGET_INIT_BUILTINS
9894 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9896 #undef TARGET_LEGITIMATE_ADDRESS_P
9897 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9899 #undef TARGET_LEGITIMATE_CONSTANT_P
9900 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9902 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9903 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9906 #define TARGET_LRA_P aarch64_lra_p
9908 #undef TARGET_MANGLE_TYPE
9909 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9911 #undef TARGET_MEMORY_MOVE_COST
9912 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9914 #undef TARGET_MUST_PASS_IN_STACK
9915 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9917 /* This target hook should return true if accesses to volatile bitfields
9918 should use the narrowest mode possible. It should return false if these
9919 accesses should use the bitfield container type. */
9920 #undef TARGET_NARROW_VOLATILE_BITFIELD
9921 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9923 #undef TARGET_OPTION_OVERRIDE
9924 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9926 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9927 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9928 aarch64_override_options_after_change
9930 #undef TARGET_PASS_BY_REFERENCE
9931 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9933 #undef TARGET_PREFERRED_RELOAD_CLASS
9934 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9936 #undef TARGET_SECONDARY_RELOAD
9937 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9939 #undef TARGET_SHIFT_TRUNCATION_MASK
9940 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9942 #undef TARGET_SETUP_INCOMING_VARARGS
9943 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9945 #undef TARGET_STRUCT_VALUE_RTX
9946 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9948 #undef TARGET_REGISTER_MOVE_COST
9949 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9951 #undef TARGET_RETURN_IN_MEMORY
9952 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9954 #undef TARGET_RETURN_IN_MSB
9955 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9957 #undef TARGET_RTX_COSTS
9958 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9960 #undef TARGET_SCHED_ISSUE_RATE
9961 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9963 #undef TARGET_TRAMPOLINE_INIT
9964 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9966 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9967 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9969 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9970 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9972 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9973 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9975 #undef TARGET_VECTORIZE_ADD_STMT_COST
9976 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9978 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9979 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9980 aarch64_builtin_vectorization_cost
9982 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9983 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9985 #undef TARGET_VECTORIZE_BUILTINS
9986 #define TARGET_VECTORIZE_BUILTINS
9988 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9989 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9990 aarch64_builtin_vectorized_function
9992 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9993 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9994 aarch64_autovectorize_vector_sizes
9996 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9997 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9998 aarch64_atomic_assign_expand_fenv
10000 /* Section anchor support. */
10002 #undef TARGET_MIN_ANCHOR_OFFSET
10003 #define TARGET_MIN_ANCHOR_OFFSET -256
10005 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
10006 byte offset; we can do much more for larger data types, but have no way
10007 to determine the size of the access. We assume accesses are aligned. */
10008 #undef TARGET_MAX_ANCHOR_OFFSET
10009 #define TARGET_MAX_ANCHOR_OFFSET 4095
10011 #undef TARGET_VECTOR_ALIGNMENT
10012 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
10014 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
10015 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
10016 aarch64_simd_vector_alignment_reachable
10018 /* vec_perm support. */
10020 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
10021 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
10022 aarch64_vectorize_vec_perm_const_ok
10025 #undef TARGET_FIXED_CONDITION_CODE_REGS
10026 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
10028 #undef TARGET_FLAGS_REGNUM
10029 #define TARGET_FLAGS_REGNUM CC_REGNUM
10031 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
10032 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
10034 #undef TARGET_ASAN_SHADOW_OFFSET
10035 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
10037 struct gcc_target targetm
= TARGET_INITIALIZER
;
10039 #include "gt-aarch64.h"