1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
74 A simple base register plus immediate offset.
77 A base register indexed by immediate offset with writeback.
80 A base register indexed by (optionally scaled) register.
83 A base register indexed by (optionally scaled) zero-extended register.
86 A base register indexed by (optionally scaled) sign-extended register.
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type
{
104 struct aarch64_address_info
{
105 enum aarch64_address_type type
;
109 enum aarch64_symbol_type symbol_type
;
112 struct simd_immediate_info
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel
;
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
133 enum machine_mode
*, int *,
135 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
136 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
139 static unsigned bit_count (unsigned HOST_WIDE_INT
);
140 static bool aarch64_const_vec_all_same_int_p (rtx
,
141 HOST_WIDE_INT
, HOST_WIDE_INT
);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
144 const unsigned char *sel
);
145 static int aarch64_address_cost (rtx
, enum machine_mode
, addr_space_t
, bool);
147 /* The processor for which instructions should be scheduled. */
148 enum aarch64_processor aarch64_tune
= cortexa53
;
150 /* The current tuning set. */
151 const struct tune_params
*aarch64_tune_params
;
153 /* Mask to specify which instructions we are allowed to generate. */
154 unsigned long aarch64_isa_flags
= 0;
156 /* Mask to specify which instruction scheduling options should be used. */
157 unsigned long aarch64_tune_flags
= 0;
159 /* Tuning parameters. */
161 #if HAVE_DESIGNATED_INITIALIZERS
162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164 #define NAMED_PARAM(NAME, VAL) (VAL)
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174 static const struct cpu_addrcost_table generic_addrcost_table
=
176 #if HAVE_DESIGNATED_INITIALIZERS
185 NAMED_PARAM (pre_modify
, 0),
186 NAMED_PARAM (post_modify
, 0),
187 NAMED_PARAM (register_offset
, 0),
188 NAMED_PARAM (register_extend
, 0),
189 NAMED_PARAM (imm_offset
, 0)
192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
195 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
197 #if HAVE_DESIGNATED_INITIALIZERS
206 NAMED_PARAM (pre_modify
, 0),
207 NAMED_PARAM (post_modify
, 0),
208 NAMED_PARAM (register_offset
, 0),
209 NAMED_PARAM (register_extend
, 0),
210 NAMED_PARAM (imm_offset
, 0),
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 static const struct cpu_regmove_cost generic_regmove_cost
=
218 NAMED_PARAM (GP2GP
, 1),
219 NAMED_PARAM (GP2FP
, 2),
220 NAMED_PARAM (FP2GP
, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP
, 4)
227 /* Generic costs for vector insn classes. */
228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
231 static const struct cpu_vector_cost generic_vector_cost
=
233 NAMED_PARAM (scalar_stmt_cost
, 1),
234 NAMED_PARAM (scalar_load_cost
, 1),
235 NAMED_PARAM (scalar_store_cost
, 1),
236 NAMED_PARAM (vec_stmt_cost
, 1),
237 NAMED_PARAM (vec_to_scalar_cost
, 1),
238 NAMED_PARAM (scalar_to_vec_cost
, 1),
239 NAMED_PARAM (vec_align_load_cost
, 1),
240 NAMED_PARAM (vec_unalign_load_cost
, 1),
241 NAMED_PARAM (vec_unalign_store_cost
, 1),
242 NAMED_PARAM (vec_store_cost
, 1),
243 NAMED_PARAM (cond_taken_branch_cost
, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
247 /* Generic costs for vector insn classes. */
248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
251 static const struct cpu_vector_cost cortexa57_vector_cost
=
253 NAMED_PARAM (scalar_stmt_cost
, 1),
254 NAMED_PARAM (scalar_load_cost
, 4),
255 NAMED_PARAM (scalar_store_cost
, 1),
256 NAMED_PARAM (vec_stmt_cost
, 3),
257 NAMED_PARAM (vec_to_scalar_cost
, 8),
258 NAMED_PARAM (scalar_to_vec_cost
, 8),
259 NAMED_PARAM (vec_align_load_cost
, 5),
260 NAMED_PARAM (vec_unalign_load_cost
, 5),
261 NAMED_PARAM (vec_unalign_store_cost
, 1),
262 NAMED_PARAM (vec_store_cost
, 1),
263 NAMED_PARAM (cond_taken_branch_cost
, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
270 static const struct tune_params generic_tunings
=
272 &cortexa57_extra_costs
,
273 &generic_addrcost_table
,
274 &generic_regmove_cost
,
275 &generic_vector_cost
,
276 NAMED_PARAM (memmov_cost
, 4),
277 NAMED_PARAM (issue_rate
, 2)
280 static const struct tune_params cortexa53_tunings
=
282 &cortexa53_extra_costs
,
283 &generic_addrcost_table
,
284 &generic_regmove_cost
,
285 &generic_vector_cost
,
286 NAMED_PARAM (memmov_cost
, 4),
287 NAMED_PARAM (issue_rate
, 2)
290 static const struct tune_params cortexa57_tunings
=
292 &cortexa57_extra_costs
,
293 &cortexa57_addrcost_table
,
294 &generic_regmove_cost
,
295 &cortexa57_vector_cost
,
296 NAMED_PARAM (memmov_cost
, 4),
297 NAMED_PARAM (issue_rate
, 3)
300 /* A processor implementing AArch64. */
303 const char *const name
;
304 enum aarch64_processor core
;
306 const unsigned long flags
;
307 const struct tune_params
*const tune
;
310 /* Processor cores implementing AArch64. */
311 static const struct processor all_cores
[] =
313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315 #include "aarch64-cores.def"
317 {"generic", cortexa53
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
318 {NULL
, aarch64_none
, NULL
, 0, NULL
}
321 /* Architectures implementing AArch64. */
322 static const struct processor all_architectures
[] =
324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326 #include "aarch64-arches.def"
328 {NULL
, aarch64_none
, NULL
, 0, NULL
}
331 /* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333 static const struct processor
*selected_arch
;
334 static const struct processor
*selected_cpu
;
335 static const struct processor
*selected_tune
;
337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339 /* An ISA extension in the co-processor and main instruction set space. */
340 struct aarch64_option_extension
342 const char *const name
;
343 const unsigned long flags_on
;
344 const unsigned long flags_off
;
347 /* ISA extensions in AArch64. */
348 static const struct aarch64_option_extension all_extensions
[] =
350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352 #include "aarch64-option-extensions.def"
353 #undef AARCH64_OPT_EXTENSION
357 /* Used to track the size of an address when generating a pre/post
358 increment address. */
359 static enum machine_mode aarch64_memory_reference_mode
;
361 /* Used to force GTY into this file. */
362 static GTY(()) int gty_dummy
;
364 /* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
367 #define AARCH64_NUM_BITMASKS 5334
368 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
370 typedef enum aarch64_cond_code
372 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
373 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
374 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380 /* The condition codes of the processor, and the inverse function. */
381 static const char * const aarch64_condition_codes
[] =
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
387 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
389 aarch64_dbx_register_number (unsigned regno
)
391 if (GP_REGNUM_P (regno
))
392 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
393 else if (regno
== SP_REGNUM
)
394 return AARCH64_DWARF_SP
;
395 else if (FP_REGNUM_P (regno
))
396 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS
;
403 /* Return TRUE if MODE is any of the large INT modes. */
405 aarch64_vect_struct_mode_p (enum machine_mode mode
)
407 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
410 /* Return TRUE if MODE is any of the vector modes. */
412 aarch64_vector_mode_p (enum machine_mode mode
)
414 return aarch64_vector_mode_supported_p (mode
)
415 || aarch64_vect_struct_mode_p (mode
);
418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420 aarch64_array_mode_supported_p (enum machine_mode mode
,
421 unsigned HOST_WIDE_INT nelems
)
424 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
425 && (nelems
>= 2 && nelems
<= 4))
431 /* Implement HARD_REGNO_NREGS. */
434 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
436 switch (aarch64_regno_regclass (regno
))
440 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
442 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
447 /* Implement HARD_REGNO_MODE_OK. */
450 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
452 if (GET_MODE_CLASS (mode
) == MODE_CC
)
453 return regno
== CC_REGNUM
;
455 if (regno
== SP_REGNUM
)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode
== Pmode
|| mode
== ptr_mode
;
461 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
462 return mode
== Pmode
;
464 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
467 if (FP_REGNUM_P (regno
))
469 if (aarch64_vect_struct_mode_p (mode
))
471 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
479 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
481 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
482 enum machine_mode mode
)
484 /* Handle modes that fit within single registers. */
485 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
487 if (GET_MODE_SIZE (mode
) >= 4)
492 /* Fall back to generic for multi-reg and very large modes. */
494 return choose_hard_reg_mode (regno
, nregs
, false);
497 /* Return true if calls to DECL should be treated as
498 long-calls (ie called via a register). */
500 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
505 /* Return true if calls to symbol-ref SYM should be treated as
506 long-calls (ie called via a register). */
508 aarch64_is_long_call_p (rtx sym
)
510 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
513 /* Return true if the offsets to a zero/sign-extract operation
514 represent an expression that matches an extend operation. The
515 operands represent the paramters from
517 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
519 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
522 HOST_WIDE_INT mult_val
, extract_val
;
524 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
527 mult_val
= INTVAL (mult_imm
);
528 extract_val
= INTVAL (extract_imm
);
531 && extract_val
< GET_MODE_BITSIZE (mode
)
532 && exact_log2 (extract_val
& ~7) > 0
533 && (extract_val
& 7) <= 4
534 && mult_val
== (1 << (extract_val
& 7)))
540 /* Emit an insn that's a simple single-set. Both the operands must be
541 known to be valid. */
543 emit_set_insn (rtx x
, rtx y
)
545 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
548 /* X and Y are two things to compare using CODE. Emit the compare insn and
549 return the rtx for register 0 in the proper mode. */
551 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
553 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
554 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
556 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
560 /* Build the SYMBOL_REF for __tls_get_addr. */
562 static GTY(()) rtx tls_get_addr_libfunc
;
565 aarch64_tls_get_addr (void)
567 if (!tls_get_addr_libfunc
)
568 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
569 return tls_get_addr_libfunc
;
572 /* Return the TLS model to use for ADDR. */
574 static enum tls_model
575 tls_symbolic_operand_type (rtx addr
)
577 enum tls_model tls_kind
= TLS_MODEL_NONE
;
580 if (GET_CODE (addr
) == CONST
)
582 split_const (addr
, &sym
, &addend
);
583 if (GET_CODE (sym
) == SYMBOL_REF
)
584 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
586 else if (GET_CODE (addr
) == SYMBOL_REF
)
587 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
592 /* We'll allow lo_sum's in addresses in our legitimate addresses
593 so that combine would take care of combining addresses where
594 necessary, but for generation purposes, we'll generate the address
597 tmp = hi (symbol_ref); adrp x1, foo
598 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
602 adrp x1, :got:foo adrp tmp, :tlsgd:foo
603 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
607 Load TLS symbol, depending on TLS mechanism and TLS access model.
609 Global Dynamic - Traditional TLS:
611 add dest, tmp, #:tlsgd_lo12:imm
614 Global Dynamic - TLS Descriptors:
615 adrp dest, :tlsdesc:imm
616 ldr tmp, [dest, #:tlsdesc_lo12:imm]
617 add dest, dest, #:tlsdesc_lo12:imm
624 adrp tmp, :gottprel:imm
625 ldr dest, [tmp, #:gottprel_lo12:imm]
630 add t0, tp, #:tprel_hi12:imm
631 add t0, #:tprel_lo12_nc:imm
635 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
636 enum aarch64_symbol_type type
)
640 case SYMBOL_SMALL_ABSOLUTE
:
642 /* In ILP32, the mode of dest can be either SImode or DImode. */
644 enum machine_mode mode
= GET_MODE (dest
);
646 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
648 if (can_create_pseudo_p ())
649 tmp_reg
= gen_reg_rtx (mode
);
651 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
652 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
656 case SYMBOL_TINY_ABSOLUTE
:
657 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
660 case SYMBOL_SMALL_GOT
:
662 /* In ILP32, the mode of dest can be either SImode or DImode,
663 while the got entry is always of SImode size. The mode of
664 dest depends on how dest is used: if dest is assigned to a
665 pointer (e.g. in the memory), it has SImode; it may have
666 DImode if dest is dereferenced to access the memeory.
667 This is why we have to handle three different ldr_got_small
668 patterns here (two patterns for ILP32). */
670 enum machine_mode mode
= GET_MODE (dest
);
672 if (can_create_pseudo_p ())
673 tmp_reg
= gen_reg_rtx (mode
);
675 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
676 if (mode
== ptr_mode
)
679 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
681 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
685 gcc_assert (mode
== Pmode
);
686 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
692 case SYMBOL_SMALL_TLSGD
:
695 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
698 emit_call_insn (gen_tlsgd_small (result
, imm
));
699 insns
= get_insns ();
702 RTL_CONST_CALL_P (insns
) = 1;
703 emit_libcall_block (insns
, dest
, result
, imm
);
707 case SYMBOL_SMALL_TLSDESC
:
709 enum machine_mode mode
= GET_MODE (dest
);
710 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
713 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
715 /* In ILP32, the got entry is always of SImode size. Unlike
716 small GOT, the dest is fixed at reg 0. */
718 emit_insn (gen_tlsdesc_small_si (imm
));
720 emit_insn (gen_tlsdesc_small_di (imm
));
721 tp
= aarch64_load_tp (NULL
);
724 tp
= gen_lowpart (mode
, tp
);
726 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
727 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
731 case SYMBOL_SMALL_GOTTPREL
:
733 /* In ILP32, the mode of dest can be either SImode or DImode,
734 while the got entry is always of SImode size. The mode of
735 dest depends on how dest is used: if dest is assigned to a
736 pointer (e.g. in the memory), it has SImode; it may have
737 DImode if dest is dereferenced to access the memeory.
738 This is why we have to handle three different tlsie_small
739 patterns here (two patterns for ILP32). */
740 enum machine_mode mode
= GET_MODE (dest
);
741 rtx tmp_reg
= gen_reg_rtx (mode
);
742 rtx tp
= aarch64_load_tp (NULL
);
744 if (mode
== ptr_mode
)
747 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
750 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
751 tp
= gen_lowpart (mode
, tp
);
756 gcc_assert (mode
== Pmode
);
757 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
760 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
761 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
765 case SYMBOL_SMALL_TPREL
:
767 rtx tp
= aarch64_load_tp (NULL
);
768 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
769 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
773 case SYMBOL_TINY_GOT
:
774 emit_insn (gen_ldr_got_tiny (dest
, imm
));
782 /* Emit a move from SRC to DEST. Assume that the move expanders can
783 handle all moves if !can_create_pseudo_p (). The distinction is
784 important because, unlike emit_move_insn, the move expanders know
785 how to force Pmode objects into the constant pool even when the
786 constant pool address is not itself legitimate. */
788 aarch64_emit_move (rtx dest
, rtx src
)
790 return (can_create_pseudo_p ()
791 ? emit_move_insn (dest
, src
)
792 : emit_move_insn_1 (dest
, src
));
795 /* Split a 128-bit move operation into two 64-bit move operations,
796 taking care to handle partial overlap of register to register
797 copies. Special cases are needed when moving between GP regs and
798 FP regs. SRC can be a register, constant or memory; DST a register
799 or memory. If either operand is memory it must not have any side
802 aarch64_split_128bit_move (rtx dst
, rtx src
)
807 enum machine_mode mode
= GET_MODE (dst
);
809 gcc_assert (mode
== TImode
|| mode
== TFmode
);
810 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
811 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
813 if (REG_P (dst
) && REG_P (src
))
815 int src_regno
= REGNO (src
);
816 int dst_regno
= REGNO (dst
);
818 /* Handle FP <-> GP regs. */
819 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
821 src_lo
= gen_lowpart (word_mode
, src
);
822 src_hi
= gen_highpart (word_mode
, src
);
826 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
827 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
831 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
832 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
836 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
838 dst_lo
= gen_lowpart (word_mode
, dst
);
839 dst_hi
= gen_highpart (word_mode
, dst
);
843 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
844 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
848 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
849 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
855 dst_lo
= gen_lowpart (word_mode
, dst
);
856 dst_hi
= gen_highpart (word_mode
, dst
);
857 src_lo
= gen_lowpart (word_mode
, src
);
858 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
860 /* At most one pairing may overlap. */
861 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
863 aarch64_emit_move (dst_hi
, src_hi
);
864 aarch64_emit_move (dst_lo
, src_lo
);
868 aarch64_emit_move (dst_lo
, src_lo
);
869 aarch64_emit_move (dst_hi
, src_hi
);
874 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
876 return (! REG_P (src
)
877 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
880 /* Split a complex SIMD combine. */
883 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
885 enum machine_mode src_mode
= GET_MODE (src1
);
886 enum machine_mode dst_mode
= GET_MODE (dst
);
888 gcc_assert (VECTOR_MODE_P (dst_mode
));
890 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
892 rtx (*gen
) (rtx
, rtx
, rtx
);
897 gen
= gen_aarch64_simd_combinev8qi
;
900 gen
= gen_aarch64_simd_combinev4hi
;
903 gen
= gen_aarch64_simd_combinev2si
;
906 gen
= gen_aarch64_simd_combinev2sf
;
909 gen
= gen_aarch64_simd_combinedi
;
912 gen
= gen_aarch64_simd_combinedf
;
918 emit_insn (gen (dst
, src1
, src2
));
923 /* Split a complex SIMD move. */
926 aarch64_split_simd_move (rtx dst
, rtx src
)
928 enum machine_mode src_mode
= GET_MODE (src
);
929 enum machine_mode dst_mode
= GET_MODE (dst
);
931 gcc_assert (VECTOR_MODE_P (dst_mode
));
933 if (REG_P (dst
) && REG_P (src
))
935 rtx (*gen
) (rtx
, rtx
);
937 gcc_assert (VECTOR_MODE_P (src_mode
));
942 gen
= gen_aarch64_split_simd_movv16qi
;
945 gen
= gen_aarch64_split_simd_movv8hi
;
948 gen
= gen_aarch64_split_simd_movv4si
;
951 gen
= gen_aarch64_split_simd_movv2di
;
954 gen
= gen_aarch64_split_simd_movv4sf
;
957 gen
= gen_aarch64_split_simd_movv2df
;
963 emit_insn (gen (dst
, src
));
969 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
971 if (can_create_pseudo_p ())
972 return force_reg (mode
, value
);
975 x
= aarch64_emit_move (x
, value
);
982 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
984 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
987 /* Load the full offset into a register. This
988 might be improvable in the future. */
989 high
= GEN_INT (offset
);
991 high
= aarch64_force_temporary (mode
, temp
, high
);
992 reg
= aarch64_force_temporary (mode
, temp
,
993 gen_rtx_PLUS (mode
, high
, reg
));
995 return plus_constant (mode
, reg
, offset
);
999 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1001 enum machine_mode mode
= GET_MODE (dest
);
1002 unsigned HOST_WIDE_INT mask
;
1005 unsigned HOST_WIDE_INT val
;
1008 int one_match
, zero_match
;
1010 gcc_assert (mode
== SImode
|| mode
== DImode
);
1012 /* Check on what type of symbol it is. */
1013 if (GET_CODE (imm
) == SYMBOL_REF
1014 || GET_CODE (imm
) == LABEL_REF
1015 || GET_CODE (imm
) == CONST
)
1017 rtx mem
, base
, offset
;
1018 enum aarch64_symbol_type sty
;
1020 /* If we have (const (plus symbol offset)), separate out the offset
1021 before we start classifying the symbol. */
1022 split_const (imm
, &base
, &offset
);
1024 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
1027 case SYMBOL_FORCE_TO_MEM
:
1028 if (offset
!= const0_rtx
1029 && targetm
.cannot_force_const_mem (mode
, imm
))
1031 gcc_assert (can_create_pseudo_p ());
1032 base
= aarch64_force_temporary (mode
, dest
, base
);
1033 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1034 aarch64_emit_move (dest
, base
);
1037 mem
= force_const_mem (ptr_mode
, imm
);
1039 if (mode
!= ptr_mode
)
1040 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1041 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1044 case SYMBOL_SMALL_TLSGD
:
1045 case SYMBOL_SMALL_TLSDESC
:
1046 case SYMBOL_SMALL_GOTTPREL
:
1047 case SYMBOL_SMALL_GOT
:
1048 case SYMBOL_TINY_GOT
:
1049 if (offset
!= const0_rtx
)
1051 gcc_assert(can_create_pseudo_p ());
1052 base
= aarch64_force_temporary (mode
, dest
, base
);
1053 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1054 aarch64_emit_move (dest
, base
);
1059 case SYMBOL_SMALL_TPREL
:
1060 case SYMBOL_SMALL_ABSOLUTE
:
1061 case SYMBOL_TINY_ABSOLUTE
:
1062 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1070 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1072 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1076 if (!CONST_INT_P (imm
))
1078 if (GET_CODE (imm
) == HIGH
)
1079 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1082 rtx mem
= force_const_mem (mode
, imm
);
1084 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1092 /* We know we can't do this in 1 insn, and we must be able to do it
1093 in two; so don't mess around looking for sequences that don't buy
1095 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
1096 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1097 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1101 /* Remaining cases are all for DImode. */
1104 subtargets
= optimize
&& can_create_pseudo_p ();
1110 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1112 if ((val
& mask
) == 0)
1114 else if ((val
& mask
) == mask
)
1121 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1123 if ((val
& mask
) != mask
)
1125 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1126 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1127 GEN_INT ((val
>> i
) & 0xffff)));
1134 if (zero_match
== 2)
1135 goto simple_sequence
;
1137 mask
= 0x0ffff0000UL
;
1138 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1140 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1142 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1144 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1146 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1147 emit_insn (gen_adddi3 (dest
, subtarget
,
1148 GEN_INT (val
- (val
& mask
))));
1151 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1153 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1155 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1156 GEN_INT ((val
+ comp
) & mask
)));
1157 emit_insn (gen_adddi3 (dest
, subtarget
,
1158 GEN_INT (val
- ((val
+ comp
) & mask
))));
1161 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1163 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1165 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1166 GEN_INT ((val
- comp
) | ~mask
)));
1167 emit_insn (gen_adddi3 (dest
, subtarget
,
1168 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1171 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1173 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1175 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1176 GEN_INT (val
| ~mask
)));
1177 emit_insn (gen_adddi3 (dest
, subtarget
,
1178 GEN_INT (val
- (val
| ~mask
))));
1183 /* See if we can do it by arithmetically combining two
1185 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1190 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1191 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1193 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1194 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1195 GEN_INT (aarch64_bitmasks
[i
])));
1196 emit_insn (gen_adddi3 (dest
, subtarget
,
1197 GEN_INT (val
- aarch64_bitmasks
[i
])));
1201 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1203 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1205 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1206 GEN_INT (aarch64_bitmasks
[i
])));
1207 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1208 GEN_INT ((val
>> j
) & 0xffff)));
1214 /* See if we can do it by logically combining two immediates. */
1215 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1217 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1221 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1222 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1224 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1225 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1226 GEN_INT (aarch64_bitmasks
[i
])));
1227 emit_insn (gen_iordi3 (dest
, subtarget
,
1228 GEN_INT (aarch64_bitmasks
[j
])));
1232 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1236 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1237 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1240 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1241 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1242 GEN_INT (aarch64_bitmasks
[j
])));
1243 emit_insn (gen_anddi3 (dest
, subtarget
,
1244 GEN_INT (aarch64_bitmasks
[i
])));
1253 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1255 if ((val
& mask
) != 0)
1259 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1260 GEN_INT (val
& mask
)));
1264 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1265 GEN_INT ((val
>> i
) & 0xffff)));
1271 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1273 /* Indirect calls are not currently supported. */
1277 /* Cannot tail-call to long-calls, since these are outside of the
1278 range of a branch instruction (we could handle this if we added
1279 support for indirect tail-calls. */
1280 if (aarch64_decl_is_long_call_p (decl
))
1286 /* Implement TARGET_PASS_BY_REFERENCE. */
1289 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1290 enum machine_mode mode
,
1292 bool named ATTRIBUTE_UNUSED
)
1295 enum machine_mode dummymode
;
1298 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1299 size
= (mode
== BLKmode
&& type
)
1300 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1302 /* Aggregates are passed by reference based on their size. */
1303 if (type
&& AGGREGATE_TYPE_P (type
))
1305 size
= int_size_in_bytes (type
);
1308 /* Variable sized arguments are always returned by reference. */
1312 /* Can this be a candidate to be passed in fp/simd register(s)? */
1313 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1318 /* Arguments which are variable sized or larger than 2 registers are
1319 passed by reference unless they are a homogenous floating point
1321 return size
> 2 * UNITS_PER_WORD
;
1324 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1326 aarch64_return_in_msb (const_tree valtype
)
1328 enum machine_mode dummy_mode
;
1331 /* Never happens in little-endian mode. */
1332 if (!BYTES_BIG_ENDIAN
)
1335 /* Only composite types smaller than or equal to 16 bytes can
1336 be potentially returned in registers. */
1337 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1338 || int_size_in_bytes (valtype
) <= 0
1339 || int_size_in_bytes (valtype
) > 16)
1342 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1343 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1344 is always passed/returned in the least significant bits of fp/simd
1346 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1347 &dummy_mode
, &dummy_int
, NULL
))
1353 /* Implement TARGET_FUNCTION_VALUE.
1354 Define how to find the value returned by a function. */
1357 aarch64_function_value (const_tree type
, const_tree func
,
1358 bool outgoing ATTRIBUTE_UNUSED
)
1360 enum machine_mode mode
;
1363 enum machine_mode ag_mode
;
1365 mode
= TYPE_MODE (type
);
1366 if (INTEGRAL_TYPE_P (type
))
1367 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1369 if (aarch64_return_in_msb (type
))
1371 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1373 if (size
% UNITS_PER_WORD
!= 0)
1375 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1376 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1380 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1381 &ag_mode
, &count
, NULL
))
1383 if (!aarch64_composite_type_p (type
, mode
))
1385 gcc_assert (count
== 1 && mode
== ag_mode
);
1386 return gen_rtx_REG (mode
, V0_REGNUM
);
1393 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1394 for (i
= 0; i
< count
; i
++)
1396 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1397 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1398 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1399 XVECEXP (par
, 0, i
) = tmp
;
1405 return gen_rtx_REG (mode
, R0_REGNUM
);
1408 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1409 Return true if REGNO is the number of a hard register in which the values
1410 of called function may come back. */
1413 aarch64_function_value_regno_p (const unsigned int regno
)
1415 /* Maximum of 16 bytes can be returned in the general registers. Examples
1416 of 16-byte return values are: 128-bit integers and 16-byte small
1417 structures (excluding homogeneous floating-point aggregates). */
1418 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1421 /* Up to four fp/simd registers can return a function value, e.g. a
1422 homogeneous floating-point aggregate having four members. */
1423 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1424 return !TARGET_GENERAL_REGS_ONLY
;
1429 /* Implement TARGET_RETURN_IN_MEMORY.
1431 If the type T of the result of a function is such that
1433 would require that arg be passed as a value in a register (or set of
1434 registers) according to the parameter passing rules, then the result
1435 is returned in the same registers as would be used for such an
1439 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1442 enum machine_mode ag_mode
;
1445 if (!AGGREGATE_TYPE_P (type
)
1446 && TREE_CODE (type
) != COMPLEX_TYPE
1447 && TREE_CODE (type
) != VECTOR_TYPE
)
1448 /* Simple scalar types always returned in registers. */
1451 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1458 /* Types larger than 2 registers returned in memory. */
1459 size
= int_size_in_bytes (type
);
1460 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1464 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1465 const_tree type
, int *nregs
)
1467 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1468 return aarch64_vfp_is_call_or_return_candidate (mode
,
1470 &pcum
->aapcs_vfp_rmode
,
1475 /* Given MODE and TYPE of a function argument, return the alignment in
1476 bits. The idea is to suppress any stronger alignment requested by
1477 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1478 This is a helper function for local use only. */
1481 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1483 unsigned int alignment
;
1487 if (!integer_zerop (TYPE_SIZE (type
)))
1489 if (TYPE_MODE (type
) == mode
)
1490 alignment
= TYPE_ALIGN (type
);
1492 alignment
= GET_MODE_ALIGNMENT (mode
);
1498 alignment
= GET_MODE_ALIGNMENT (mode
);
1503 /* Layout a function argument according to the AAPCS64 rules. The rule
1504 numbers refer to the rule numbers in the AAPCS64. */
1507 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1509 bool named ATTRIBUTE_UNUSED
)
1511 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1512 int ncrn
, nvrn
, nregs
;
1513 bool allocate_ncrn
, allocate_nvrn
;
1516 /* We need to do this once per argument. */
1517 if (pcum
->aapcs_arg_processed
)
1520 pcum
->aapcs_arg_processed
= true;
1522 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1524 = AARCH64_ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1527 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1528 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1533 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1534 The following code thus handles passing by SIMD/FP registers first. */
1536 nvrn
= pcum
->aapcs_nvrn
;
1538 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1539 and homogenous short-vector aggregates (HVA). */
1542 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1544 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1545 if (!aarch64_composite_type_p (type
, mode
))
1547 gcc_assert (nregs
== 1);
1548 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1554 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1555 for (i
= 0; i
< nregs
; i
++)
1557 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1558 V0_REGNUM
+ nvrn
+ i
);
1559 tmp
= gen_rtx_EXPR_LIST
1561 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1562 XVECEXP (par
, 0, i
) = tmp
;
1564 pcum
->aapcs_reg
= par
;
1570 /* C.3 NSRN is set to 8. */
1571 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1576 ncrn
= pcum
->aapcs_ncrn
;
1577 nregs
= size
/ UNITS_PER_WORD
;
1579 /* C6 - C9. though the sign and zero extension semantics are
1580 handled elsewhere. This is the case where the argument fits
1581 entirely general registers. */
1582 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1584 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1586 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1588 /* C.8 if the argument has an alignment of 16 then the NGRN is
1589 rounded up to the next even number. */
1590 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1593 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1595 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1596 A reg is still generated for it, but the caller should be smart
1597 enough not to use it. */
1598 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1600 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1607 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1608 for (i
= 0; i
< nregs
; i
++)
1610 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1611 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1612 GEN_INT (i
* UNITS_PER_WORD
));
1613 XVECEXP (par
, 0, i
) = tmp
;
1615 pcum
->aapcs_reg
= par
;
1618 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1623 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1625 /* The argument is passed on stack; record the needed number of words for
1626 this argument and align the total size if necessary. */
1628 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1629 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1630 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1631 16 / UNITS_PER_WORD
);
1635 /* Implement TARGET_FUNCTION_ARG. */
1638 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1639 const_tree type
, bool named
)
1641 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1642 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1644 if (mode
== VOIDmode
)
1647 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1648 return pcum
->aapcs_reg
;
1652 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1653 const_tree fntype ATTRIBUTE_UNUSED
,
1654 rtx libname ATTRIBUTE_UNUSED
,
1655 const_tree fndecl ATTRIBUTE_UNUSED
,
1656 unsigned n_named ATTRIBUTE_UNUSED
)
1658 pcum
->aapcs_ncrn
= 0;
1659 pcum
->aapcs_nvrn
= 0;
1660 pcum
->aapcs_nextncrn
= 0;
1661 pcum
->aapcs_nextnvrn
= 0;
1662 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1663 pcum
->aapcs_reg
= NULL_RTX
;
1664 pcum
->aapcs_arg_processed
= false;
1665 pcum
->aapcs_stack_words
= 0;
1666 pcum
->aapcs_stack_size
= 0;
1672 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1673 enum machine_mode mode
,
1677 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1678 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1680 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1681 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1682 != (pcum
->aapcs_stack_words
!= 0));
1683 pcum
->aapcs_arg_processed
= false;
1684 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1685 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1686 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1687 pcum
->aapcs_stack_words
= 0;
1688 pcum
->aapcs_reg
= NULL_RTX
;
1693 aarch64_function_arg_regno_p (unsigned regno
)
1695 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1696 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1699 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1700 PARM_BOUNDARY bits of alignment, but will be given anything up
1701 to STACK_BOUNDARY bits if the type requires it. This makes sure
1702 that both before and after the layout of each argument, the Next
1703 Stacked Argument Address (NSAA) will have a minimum alignment of
1707 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1709 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1711 if (alignment
< PARM_BOUNDARY
)
1712 alignment
= PARM_BOUNDARY
;
1713 if (alignment
> STACK_BOUNDARY
)
1714 alignment
= STACK_BOUNDARY
;
1718 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1720 Return true if an argument passed on the stack should be padded upwards,
1721 i.e. if the least-significant byte of the stack slot has useful data.
1723 Small aggregate types are placed in the lowest memory address.
1725 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1728 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1730 /* On little-endian targets, the least significant byte of every stack
1731 argument is passed at the lowest byte address of the stack slot. */
1732 if (!BYTES_BIG_ENDIAN
)
1735 /* Otherwise, integral, floating-point and pointer types are padded downward:
1736 the least significant byte of a stack argument is passed at the highest
1737 byte address of the stack slot. */
1739 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1740 || POINTER_TYPE_P (type
))
1741 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1744 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1748 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1750 It specifies padding for the last (may also be the only)
1751 element of a block move between registers and memory. If
1752 assuming the block is in the memory, padding upward means that
1753 the last element is padded after its highest significant byte,
1754 while in downward padding, the last element is padded at the
1755 its least significant byte side.
1757 Small aggregates and small complex types are always padded
1760 We don't need to worry about homogeneous floating-point or
1761 short-vector aggregates; their move is not affected by the
1762 padding direction determined here. Regardless of endianness,
1763 each element of such an aggregate is put in the least
1764 significant bits of a fp/simd register.
1766 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1767 register has useful data, and return the opposite if the most
1768 significant byte does. */
1771 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1772 bool first ATTRIBUTE_UNUSED
)
1775 /* Small composite types are always padded upward. */
1776 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1778 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1779 : GET_MODE_SIZE (mode
));
1780 if (size
< 2 * UNITS_PER_WORD
)
1784 /* Otherwise, use the default padding. */
1785 return !BYTES_BIG_ENDIAN
;
1788 static enum machine_mode
1789 aarch64_libgcc_cmp_return_mode (void)
1795 aarch64_frame_pointer_required (void)
1797 /* If the function contains dynamic stack allocations, we need to
1798 use the frame pointer to access the static parts of the frame. */
1799 if (cfun
->calls_alloca
)
1802 /* In aarch64_override_options_after_change
1803 flag_omit_leaf_frame_pointer turns off the frame pointer by
1804 default. Turn it back on now if we've not got a leaf
1806 if (flag_omit_leaf_frame_pointer
1807 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
1813 /* Mark the registers that need to be saved by the callee and calculate
1814 the size of the callee-saved registers area and frame record (both FP
1815 and LR may be omitted). */
1817 aarch64_layout_frame (void)
1819 HOST_WIDE_INT offset
= 0;
1822 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1825 /* First mark all the registers that really need to be saved... */
1826 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1827 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1829 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1830 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1832 /* ... that includes the eh data registers (if needed)... */
1833 if (crtl
->calls_eh_return
)
1834 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1835 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1837 /* ... and any callee saved register that dataflow says is live. */
1838 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1839 if (df_regs_ever_live_p (regno
)
1840 && !call_used_regs
[regno
])
1841 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1843 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1844 if (df_regs_ever_live_p (regno
)
1845 && !call_used_regs
[regno
])
1846 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1848 if (frame_pointer_needed
)
1850 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1851 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1852 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1855 /* Now assign stack slots for them. */
1856 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1857 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1859 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1860 offset
+= UNITS_PER_WORD
;
1863 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1864 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1866 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1867 offset
+= UNITS_PER_WORD
;
1870 if (frame_pointer_needed
)
1872 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1873 offset
+= UNITS_PER_WORD
;
1876 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1878 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1879 offset
+= UNITS_PER_WORD
;
1882 cfun
->machine
->frame
.padding0
=
1883 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1884 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1886 cfun
->machine
->frame
.saved_regs_size
= offset
;
1887 cfun
->machine
->frame
.laid_out
= true;
1890 /* Make the last instruction frame-related and note that it performs
1891 the operation described by FRAME_PATTERN. */
1894 aarch64_set_frame_expr (rtx frame_pattern
)
1898 insn
= get_last_insn ();
1899 RTX_FRAME_RELATED_P (insn
) = 1;
1900 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1901 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1907 aarch64_register_saved_on_entry (int regno
)
1909 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1914 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1915 bool restore
, rtx base_rtx
)
1921 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
)
1922 = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1924 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1926 if (aarch64_register_saved_on_entry (regno
))
1929 mem
= gen_mem_ref (DFmode
,
1930 plus_constant (Pmode
,
1934 for (regno2
= regno
+ 1;
1935 regno2
<= V31_REGNUM
1936 && !aarch64_register_saved_on_entry (regno2
);
1942 if (regno2
<= V31_REGNUM
&&
1943 aarch64_register_saved_on_entry (regno2
))
1947 /* Next highest register to be saved. */
1948 mem2
= gen_mem_ref (DFmode
,
1952 start_offset
+ increment
));
1953 if (restore
== false)
1956 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1957 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1963 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1964 gen_rtx_REG (DFmode
, regno2
), mem2
));
1966 add_reg_note (insn
, REG_CFA_RESTORE
,
1967 gen_rtx_REG (DFmode
, regno
));
1968 add_reg_note (insn
, REG_CFA_RESTORE
,
1969 gen_rtx_REG (DFmode
, regno2
));
1972 /* The first part of a frame-related parallel insn is
1973 always assumed to be relevant to the frame
1974 calculations; subsequent parts, are only
1975 frame-related if explicitly marked. */
1976 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
1978 start_offset
+= increment
* 2;
1982 if (restore
== false)
1983 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1986 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1987 add_reg_note (insn
, REG_CFA_RESTORE
,
1988 gen_rtx_REG (DFmode
, regno
));
1990 start_offset
+= increment
;
1992 RTX_FRAME_RELATED_P (insn
) = 1;
1998 /* offset from the stack pointer of where the saves and
1999 restore's have to happen. */
2001 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
2005 rtx base_rtx
= stack_pointer_rtx
;
2006 HOST_WIDE_INT start_offset
= offset
;
2007 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
2008 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
2009 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
2013 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
2015 if (aarch64_register_saved_on_entry (regno
))
2018 mem
= gen_mem_ref (Pmode
,
2019 plus_constant (Pmode
,
2023 for (regno2
= regno
+ 1;
2025 && !aarch64_register_saved_on_entry (regno2
);
2030 if (regno2
<= limit
&&
2031 aarch64_register_saved_on_entry (regno2
))
2035 /* Next highest register to be saved. */
2036 mem2
= gen_mem_ref (Pmode
,
2040 start_offset
+ increment
));
2041 if (restore
== false)
2044 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
2045 mem2
, gen_rtx_REG (DImode
, regno2
)));
2051 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
2052 gen_rtx_REG (DImode
, regno2
), mem2
));
2054 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
2055 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
2058 /* The first part of a frame-related parallel insn is
2059 always assumed to be relevant to the frame
2060 calculations; subsequent parts, are only
2061 frame-related if explicitly marked. */
2062 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2064 start_offset
+= increment
* 2;
2068 if (restore
== false)
2069 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
2072 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
2073 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
2075 start_offset
+= increment
;
2077 RTX_FRAME_RELATED_P (insn
) = 1;
2081 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
2084 /* AArch64 stack frames generated by this compiler look like:
2086 +-------------------------------+
2088 | incoming stack arguments |
2090 +-------------------------------+
2091 | | <-- incoming stack pointer (aligned)
2092 | callee-allocated save area |
2093 | for register varargs |
2095 +-------------------------------+
2096 | local variables | <-- frame_pointer_rtx
2098 +-------------------------------+
2100 +-------------------------------+ |
2101 | callee-saved registers | | frame.saved_regs_size
2102 +-------------------------------+ |
2104 +-------------------------------+ |
2105 | FP' | / <- hard_frame_pointer_rtx (aligned)
2106 +-------------------------------+
2107 | dynamic allocation |
2108 +-------------------------------+
2110 +-------------------------------+
2111 | outgoing stack arguments | <-- arg_pointer
2113 +-------------------------------+
2114 | | <-- stack_pointer_rtx (aligned)
2116 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2117 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2120 /* Generate the prologue instructions for entry into a function.
2121 Establish the stack frame by decreasing the stack pointer with a
2122 properly calculated size and, if necessary, create a frame record
2123 filled with the values of LR and previous frame pointer. The
2124 current FP is also set up if it is in use. */
2127 aarch64_expand_prologue (void)
2129 /* sub sp, sp, #<frame_size>
2130 stp {fp, lr}, [sp, #<frame_size> - 16]
2131 add fp, sp, #<frame_size> - hardfp_offset
2132 stp {cs_reg}, [fp, #-16] etc.
2134 sub sp, sp, <final_adjustment_if_any>
2136 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
2137 HOST_WIDE_INT frame_size
, offset
;
2138 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
2141 aarch64_layout_frame ();
2142 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2143 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
2144 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
2145 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2146 + crtl
->outgoing_args_size
);
2147 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2148 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2150 if (flag_stack_usage_info
)
2151 current_function_static_stack_size
= frame_size
;
2154 - original_frame_size
2155 - cfun
->machine
->frame
.saved_regs_size
);
2157 /* Store pairs and load pairs have a range only -512 to 504. */
2160 /* When the frame has a large size, an initial decrease is done on
2161 the stack pointer to jump over the callee-allocated save area for
2162 register varargs, the local variable area and/or the callee-saved
2163 register area. This will allow the pre-index write-back
2164 store pair instructions to be used for setting up the stack frame
2166 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2168 offset
= cfun
->machine
->frame
.saved_regs_size
;
2170 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2173 if (frame_size
>= 0x1000000)
2175 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2176 emit_move_insn (op0
, GEN_INT (-frame_size
));
2177 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2178 aarch64_set_frame_expr (gen_rtx_SET
2179 (Pmode
, stack_pointer_rtx
,
2180 plus_constant (Pmode
,
2184 else if (frame_size
> 0)
2186 if ((frame_size
& 0xfff) != frame_size
)
2188 insn
= emit_insn (gen_add2_insn
2190 GEN_INT (-(frame_size
2191 & ~(HOST_WIDE_INT
)0xfff))));
2192 RTX_FRAME_RELATED_P (insn
) = 1;
2194 if ((frame_size
& 0xfff) != 0)
2196 insn
= emit_insn (gen_add2_insn
2198 GEN_INT (-(frame_size
2199 & (HOST_WIDE_INT
)0xfff))));
2200 RTX_FRAME_RELATED_P (insn
) = 1;
2209 /* Save the frame pointer and lr if the frame pointer is needed
2210 first. Make the frame pointer point to the location of the
2211 old frame pointer on the stack. */
2212 if (frame_pointer_needed
)
2218 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2219 GEN_INT (-offset
)));
2220 RTX_FRAME_RELATED_P (insn
) = 1;
2221 aarch64_set_frame_expr (gen_rtx_SET
2222 (Pmode
, stack_pointer_rtx
,
2223 gen_rtx_MINUS (Pmode
,
2225 GEN_INT (offset
))));
2226 mem_fp
= gen_frame_mem (DImode
,
2227 plus_constant (Pmode
,
2230 mem_lr
= gen_frame_mem (DImode
,
2231 plus_constant (Pmode
,
2235 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2236 hard_frame_pointer_rtx
,
2238 gen_rtx_REG (DImode
,
2243 insn
= emit_insn (gen_storewb_pairdi_di
2244 (stack_pointer_rtx
, stack_pointer_rtx
,
2245 hard_frame_pointer_rtx
,
2246 gen_rtx_REG (DImode
, LR_REGNUM
),
2248 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2249 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2252 /* The first part of a frame-related parallel insn is always
2253 assumed to be relevant to the frame calculations;
2254 subsequent parts, are only frame-related if explicitly
2256 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2257 RTX_FRAME_RELATED_P (insn
) = 1;
2259 /* Set up frame pointer to point to the location of the
2260 previous frame pointer on the stack. */
2261 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2263 GEN_INT (fp_offset
)));
2264 aarch64_set_frame_expr (gen_rtx_SET
2265 (Pmode
, hard_frame_pointer_rtx
,
2266 plus_constant (Pmode
,
2269 RTX_FRAME_RELATED_P (insn
) = 1;
2270 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2271 hard_frame_pointer_rtx
));
2275 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2276 GEN_INT (-offset
)));
2277 RTX_FRAME_RELATED_P (insn
) = 1;
2280 aarch64_save_or_restore_callee_save_registers
2281 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2284 /* when offset >= 512,
2285 sub sp, sp, #<outgoing_args_size> */
2286 if (frame_size
> -1)
2288 if (crtl
->outgoing_args_size
> 0)
2290 insn
= emit_insn (gen_add2_insn
2292 GEN_INT (- crtl
->outgoing_args_size
)));
2293 RTX_FRAME_RELATED_P (insn
) = 1;
2298 /* Generate the epilogue instructions for returning from a function. */
2300 aarch64_expand_epilogue (bool for_sibcall
)
2302 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2303 HOST_WIDE_INT fp_offset
;
2307 aarch64_layout_frame ();
2308 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2309 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2310 + crtl
->outgoing_args_size
);
2311 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2312 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2315 - original_frame_size
2316 - cfun
->machine
->frame
.saved_regs_size
);
2318 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2320 /* Store pairs and load pairs have a range only -512 to 504. */
2323 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2325 offset
= cfun
->machine
->frame
.saved_regs_size
;
2327 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2329 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2331 insn
= emit_insn (gen_add2_insn
2333 GEN_INT (crtl
->outgoing_args_size
)));
2334 RTX_FRAME_RELATED_P (insn
) = 1;
2340 /* If there were outgoing arguments or we've done dynamic stack
2341 allocation, then restore the stack pointer from the frame
2342 pointer. This is at most one insn and more efficient than using
2343 GCC's internal mechanism. */
2344 if (frame_pointer_needed
2345 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2347 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2348 hard_frame_pointer_rtx
,
2349 GEN_INT (- fp_offset
)));
2350 RTX_FRAME_RELATED_P (insn
) = 1;
2351 /* As SP is set to (FP - fp_offset), according to the rules in
2352 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2353 from the value of SP from now on. */
2354 cfa_reg
= stack_pointer_rtx
;
2357 aarch64_save_or_restore_callee_save_registers
2358 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2360 /* Restore the frame pointer and lr if the frame pointer is needed. */
2363 if (frame_pointer_needed
)
2369 mem_fp
= gen_frame_mem (DImode
,
2370 plus_constant (Pmode
,
2373 mem_lr
= gen_frame_mem (DImode
,
2374 plus_constant (Pmode
,
2378 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2380 gen_rtx_REG (DImode
,
2386 insn
= emit_insn (gen_loadwb_pairdi_di
2389 hard_frame_pointer_rtx
,
2390 gen_rtx_REG (DImode
, LR_REGNUM
),
2392 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2393 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2394 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2395 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2396 plus_constant (Pmode
, cfa_reg
,
2400 /* The first part of a frame-related parallel insn
2401 is always assumed to be relevant to the frame
2402 calculations; subsequent parts, are only
2403 frame-related if explicitly marked. */
2404 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2405 RTX_FRAME_RELATED_P (insn
) = 1;
2406 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2407 add_reg_note (insn
, REG_CFA_RESTORE
,
2408 gen_rtx_REG (DImode
, LR_REGNUM
));
2412 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2414 RTX_FRAME_RELATED_P (insn
) = 1;
2419 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2421 RTX_FRAME_RELATED_P (insn
) = 1;
2425 /* Stack adjustment for exception handler. */
2426 if (crtl
->calls_eh_return
)
2428 /* We need to unwind the stack by the offset computed by
2429 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2430 based on SP. Ideally we would update the SP and define the
2431 CFA along the lines of:
2433 SP = SP + EH_RETURN_STACKADJ_RTX
2434 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2436 However the dwarf emitter only understands a constant
2439 The solution chosen here is to use the otherwise unused IP0
2440 as a temporary register to hold the current SP value. The
2441 CFA is described using IP0 then SP is modified. */
2443 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2445 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2446 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2447 RTX_FRAME_RELATED_P (insn
) = 1;
2449 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2451 /* Ensure the assignment to IP0 does not get optimized away. */
2455 if (frame_size
> -1)
2457 if (frame_size
>= 0x1000000)
2459 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2460 emit_move_insn (op0
, GEN_INT (frame_size
));
2461 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2462 aarch64_set_frame_expr (gen_rtx_SET
2463 (Pmode
, stack_pointer_rtx
,
2464 plus_constant (Pmode
,
2468 else if (frame_size
> 0)
2470 if ((frame_size
& 0xfff) != 0)
2472 insn
= emit_insn (gen_add2_insn
2474 GEN_INT ((frame_size
2475 & (HOST_WIDE_INT
) 0xfff))));
2476 RTX_FRAME_RELATED_P (insn
) = 1;
2478 if ((frame_size
& 0xfff) != frame_size
)
2480 insn
= emit_insn (gen_add2_insn
2482 GEN_INT ((frame_size
2483 & ~ (HOST_WIDE_INT
) 0xfff))));
2484 RTX_FRAME_RELATED_P (insn
) = 1;
2488 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2489 plus_constant (Pmode
,
2494 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2496 emit_jump_insn (ret_rtx
);
2499 /* Return the place to copy the exception unwinding return address to.
2500 This will probably be a stack slot, but could (in theory be the
2501 return register). */
2503 aarch64_final_eh_return_addr (void)
2505 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2506 aarch64_layout_frame ();
2507 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2508 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2509 + crtl
->outgoing_args_size
);
2510 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2511 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2513 - original_frame_size
2514 - cfun
->machine
->frame
.saved_regs_size
;
2516 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2517 return gen_rtx_REG (DImode
, LR_REGNUM
);
2519 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2520 result in a store to save LR introduced by builtin_eh_return () being
2521 incorrectly deleted because the alias is not detected.
2522 So in the calculation of the address to copy the exception unwinding
2523 return address to, we note 2 cases.
2524 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2525 we return a SP-relative location since all the addresses are SP-relative
2526 in this case. This prevents the store from being optimized away.
2527 If the fp_offset is not 0, then the addresses will be FP-relative and
2528 therefore we return a FP-relative location. */
2530 if (frame_pointer_needed
)
2533 return gen_frame_mem (DImode
,
2534 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2536 return gen_frame_mem (DImode
,
2537 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2540 /* If FP is not needed, we calculate the location of LR, which would be
2541 at the top of the saved registers block. */
2543 return gen_frame_mem (DImode
,
2544 plus_constant (Pmode
,
2547 + cfun
->machine
->frame
.saved_regs_size
2548 - 2 * UNITS_PER_WORD
));
2551 /* Possibly output code to build up a constant in a register. For
2552 the benefit of the costs infrastructure, returns the number of
2553 instructions which would be emitted. GENERATE inhibits or
2554 enables code generation. */
2557 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2561 if (aarch64_bitmask_imm (val
, DImode
))
2564 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2572 HOST_WIDE_INT valp
= val
>> 16;
2576 for (i
= 16; i
< 64; i
+= 16)
2578 valm
= (valp
& 0xffff);
2589 /* zcount contains the number of additional MOVK instructions
2590 required if the constant is built up with an initial MOVZ instruction,
2591 while ncount is the number of MOVK instructions required if starting
2592 with a MOVN instruction. Choose the sequence that yields the fewest
2593 number of instructions, preferring MOVZ instructions when they are both
2595 if (ncount
< zcount
)
2598 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2599 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2606 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2607 GEN_INT (val
& 0xffff));
2614 for (i
= 16; i
< 64; i
+= 16)
2616 if ((val
& 0xffff) != tval
)
2619 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2621 GEN_INT (val
& 0xffff)));
2631 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2633 HOST_WIDE_INT mdelta
= delta
;
2634 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2635 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2640 if (mdelta
>= 4096 * 4096)
2642 (void) aarch64_build_constant (scratchreg
, delta
, true);
2643 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2645 else if (mdelta
> 0)
2649 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2650 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2652 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2653 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2655 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2656 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2658 if (mdelta
% 4096 != 0)
2660 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2661 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2662 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2667 /* Output code to add DELTA to the first argument, and then jump
2668 to FUNCTION. Used for C++ multiple inheritance. */
2670 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2671 HOST_WIDE_INT delta
,
2672 HOST_WIDE_INT vcall_offset
,
2675 /* The this pointer is always in x0. Note that this differs from
2676 Arm where the this pointer maybe bumped to r1 if r0 is required
2677 to return a pointer to an aggregate. On AArch64 a result value
2678 pointer will be in x8. */
2679 int this_regno
= R0_REGNUM
;
2680 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2682 reload_completed
= 1;
2683 emit_note (NOTE_INSN_PROLOGUE_END
);
2685 if (vcall_offset
== 0)
2686 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2689 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2691 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2692 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2693 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2698 if (delta
>= -256 && delta
< 256)
2699 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2700 plus_constant (Pmode
, this_rtx
, delta
));
2702 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2705 if (Pmode
== ptr_mode
)
2706 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2708 aarch64_emit_move (temp0
,
2709 gen_rtx_ZERO_EXTEND (Pmode
,
2710 gen_rtx_MEM (ptr_mode
, addr
)));
2712 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2713 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2716 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2717 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2720 if (Pmode
== ptr_mode
)
2721 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2723 aarch64_emit_move (temp1
,
2724 gen_rtx_SIGN_EXTEND (Pmode
,
2725 gen_rtx_MEM (ptr_mode
, addr
)));
2727 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2730 /* Generate a tail call to the target function. */
2731 if (!TREE_USED (function
))
2733 assemble_external (function
);
2734 TREE_USED (function
) = 1;
2736 funexp
= XEXP (DECL_RTL (function
), 0);
2737 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2738 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2739 SIBLING_CALL_P (insn
) = 1;
2741 insn
= get_insns ();
2742 shorten_branches (insn
);
2743 final_start_function (insn
, file
, 1);
2744 final (insn
, file
, 1);
2745 final_end_function ();
2747 /* Stop pretending to be a post-reload pass. */
2748 reload_completed
= 0;
2752 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2754 if (GET_CODE (*x
) == SYMBOL_REF
)
2755 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2757 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2758 TLS offsets, not real symbol references. */
2759 if (GET_CODE (*x
) == UNSPEC
2760 && XINT (*x
, 1) == UNSPEC_TLS
)
2767 aarch64_tls_referenced_p (rtx x
)
2769 if (!TARGET_HAVE_TLS
)
2772 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2777 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2779 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2780 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2791 aarch64_build_bitmask_table (void)
2793 unsigned HOST_WIDE_INT mask
, imm
;
2794 unsigned int log_e
, e
, s
, r
;
2795 unsigned int nimms
= 0;
2797 for (log_e
= 1; log_e
<= 6; log_e
++)
2801 mask
= ~(HOST_WIDE_INT
) 0;
2803 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2804 for (s
= 1; s
< e
; s
++)
2806 for (r
= 0; r
< e
; r
++)
2808 /* set s consecutive bits to 1 (s < 64) */
2809 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2810 /* rotate right by r */
2812 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2813 /* replicate the constant depending on SIMD size */
2815 case 1: imm
|= (imm
<< 2);
2816 case 2: imm
|= (imm
<< 4);
2817 case 3: imm
|= (imm
<< 8);
2818 case 4: imm
|= (imm
<< 16);
2819 case 5: imm
|= (imm
<< 32);
2825 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2826 aarch64_bitmasks
[nimms
++] = imm
;
2831 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2832 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2833 aarch64_bitmasks_cmp
);
2837 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2838 a left shift of 0 or 12 bits. */
2840 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2842 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2843 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2848 /* Return true if val is an immediate that can be loaded into a
2849 register by a MOVZ instruction. */
2851 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2853 if (GET_MODE_SIZE (mode
) > 4)
2855 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2856 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2861 /* Ignore sign extension. */
2862 val
&= (HOST_WIDE_INT
) 0xffffffff;
2864 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2865 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2869 /* Return true if val is a valid bitmask immediate. */
2871 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2873 if (GET_MODE_SIZE (mode
) < 8)
2875 /* Replicate bit pattern. */
2876 val
&= (HOST_WIDE_INT
) 0xffffffff;
2879 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2880 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2884 /* Return true if val is an immediate that can be loaded into a
2885 register in a single instruction. */
2887 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2889 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2891 return aarch64_bitmask_imm (val
, mode
);
2895 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2899 if (GET_CODE (x
) == HIGH
)
2902 split_const (x
, &base
, &offset
);
2903 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2905 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2906 != SYMBOL_FORCE_TO_MEM
)
2909 /* Avoid generating a 64-bit relocation in ILP32; leave
2910 to aarch64_expand_mov_immediate to handle it properly. */
2911 return mode
!= ptr_mode
;
2914 return aarch64_tls_referenced_p (x
);
2917 /* Return true if register REGNO is a valid index register.
2918 STRICT_P is true if REG_OK_STRICT is in effect. */
2921 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2923 if (!HARD_REGISTER_NUM_P (regno
))
2931 regno
= reg_renumber
[regno
];
2933 return GP_REGNUM_P (regno
);
2936 /* Return true if register REGNO is a valid base register for mode MODE.
2937 STRICT_P is true if REG_OK_STRICT is in effect. */
2940 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2942 if (!HARD_REGISTER_NUM_P (regno
))
2950 regno
= reg_renumber
[regno
];
2953 /* The fake registers will be eliminated to either the stack or
2954 hard frame pointer, both of which are usually valid base registers.
2955 Reload deals with the cases where the eliminated form isn't valid. */
2956 return (GP_REGNUM_P (regno
)
2957 || regno
== SP_REGNUM
2958 || regno
== FRAME_POINTER_REGNUM
2959 || regno
== ARG_POINTER_REGNUM
);
2962 /* Return true if X is a valid base register for mode MODE.
2963 STRICT_P is true if REG_OK_STRICT is in effect. */
2966 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2968 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2971 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2974 /* Return true if address offset is a valid index. If it is, fill in INFO
2975 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2978 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2979 enum machine_mode mode
, bool strict_p
)
2981 enum aarch64_address_type type
;
2986 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2987 && GET_MODE (x
) == Pmode
)
2989 type
= ADDRESS_REG_REG
;
2993 /* (sign_extend:DI (reg:SI)) */
2994 else if ((GET_CODE (x
) == SIGN_EXTEND
2995 || GET_CODE (x
) == ZERO_EXTEND
)
2996 && GET_MODE (x
) == DImode
2997 && GET_MODE (XEXP (x
, 0)) == SImode
)
2999 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3000 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3001 index
= XEXP (x
, 0);
3004 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3005 else if (GET_CODE (x
) == MULT
3006 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3007 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3008 && GET_MODE (XEXP (x
, 0)) == DImode
3009 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3010 && CONST_INT_P (XEXP (x
, 1)))
3012 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3013 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3014 index
= XEXP (XEXP (x
, 0), 0);
3015 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3017 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3018 else if (GET_CODE (x
) == ASHIFT
3019 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3020 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3021 && GET_MODE (XEXP (x
, 0)) == DImode
3022 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3023 && CONST_INT_P (XEXP (x
, 1)))
3025 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3026 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3027 index
= XEXP (XEXP (x
, 0), 0);
3028 shift
= INTVAL (XEXP (x
, 1));
3030 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3031 else if ((GET_CODE (x
) == SIGN_EXTRACT
3032 || GET_CODE (x
) == ZERO_EXTRACT
)
3033 && GET_MODE (x
) == DImode
3034 && GET_CODE (XEXP (x
, 0)) == MULT
3035 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3036 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3038 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3039 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3040 index
= XEXP (XEXP (x
, 0), 0);
3041 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3042 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3043 || INTVAL (XEXP (x
, 2)) != 0)
3046 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3047 (const_int 0xffffffff<<shift)) */
3048 else if (GET_CODE (x
) == AND
3049 && GET_MODE (x
) == DImode
3050 && GET_CODE (XEXP (x
, 0)) == MULT
3051 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3052 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3053 && CONST_INT_P (XEXP (x
, 1)))
3055 type
= ADDRESS_REG_UXTW
;
3056 index
= XEXP (XEXP (x
, 0), 0);
3057 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3058 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3061 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3062 else if ((GET_CODE (x
) == SIGN_EXTRACT
3063 || GET_CODE (x
) == ZERO_EXTRACT
)
3064 && GET_MODE (x
) == DImode
3065 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3066 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3067 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3069 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3070 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3071 index
= XEXP (XEXP (x
, 0), 0);
3072 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3073 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3074 || INTVAL (XEXP (x
, 2)) != 0)
3077 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3078 (const_int 0xffffffff<<shift)) */
3079 else if (GET_CODE (x
) == AND
3080 && GET_MODE (x
) == DImode
3081 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3082 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3083 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3084 && CONST_INT_P (XEXP (x
, 1)))
3086 type
= ADDRESS_REG_UXTW
;
3087 index
= XEXP (XEXP (x
, 0), 0);
3088 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3089 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3092 /* (mult:P (reg:P) (const_int scale)) */
3093 else if (GET_CODE (x
) == MULT
3094 && GET_MODE (x
) == Pmode
3095 && GET_MODE (XEXP (x
, 0)) == Pmode
3096 && CONST_INT_P (XEXP (x
, 1)))
3098 type
= ADDRESS_REG_REG
;
3099 index
= XEXP (x
, 0);
3100 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3102 /* (ashift:P (reg:P) (const_int shift)) */
3103 else if (GET_CODE (x
) == ASHIFT
3104 && GET_MODE (x
) == Pmode
3105 && GET_MODE (XEXP (x
, 0)) == Pmode
3106 && CONST_INT_P (XEXP (x
, 1)))
3108 type
= ADDRESS_REG_REG
;
3109 index
= XEXP (x
, 0);
3110 shift
= INTVAL (XEXP (x
, 1));
3115 if (GET_CODE (index
) == SUBREG
)
3116 index
= SUBREG_REG (index
);
3119 (shift
> 0 && shift
<= 3
3120 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3122 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3125 info
->offset
= index
;
3126 info
->shift
= shift
;
3134 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3136 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3137 && offset
< 64 * GET_MODE_SIZE (mode
)
3138 && offset
% GET_MODE_SIZE (mode
) == 0);
3142 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3143 HOST_WIDE_INT offset
)
3145 return offset
>= -256 && offset
< 256;
3149 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3152 && offset
< 4096 * GET_MODE_SIZE (mode
)
3153 && offset
% GET_MODE_SIZE (mode
) == 0);
3156 /* Return true if X is a valid address for machine mode MODE. If it is,
3157 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3158 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3161 aarch64_classify_address (struct aarch64_address_info
*info
,
3162 rtx x
, enum machine_mode mode
,
3163 RTX_CODE outer_code
, bool strict_p
)
3165 enum rtx_code code
= GET_CODE (x
);
3167 bool allow_reg_index_p
=
3168 outer_code
!= PARALLEL
&& (GET_MODE_SIZE (mode
) != 16
3169 || aarch64_vector_mode_supported_p (mode
));
3170 /* Don't support anything other than POST_INC or REG addressing for
3172 if (aarch64_vect_struct_mode_p (mode
)
3173 && (code
!= POST_INC
&& code
!= REG
))
3180 info
->type
= ADDRESS_REG_IMM
;
3182 info
->offset
= const0_rtx
;
3183 return aarch64_base_register_rtx_p (x
, strict_p
);
3188 if (GET_MODE_SIZE (mode
) != 0
3189 && CONST_INT_P (op1
)
3190 && aarch64_base_register_rtx_p (op0
, strict_p
))
3192 HOST_WIDE_INT offset
= INTVAL (op1
);
3194 info
->type
= ADDRESS_REG_IMM
;
3198 /* TImode and TFmode values are allowed in both pairs of X
3199 registers and individual Q registers. The available
3201 X,X: 7-bit signed scaled offset
3202 Q: 9-bit signed offset
3203 We conservatively require an offset representable in either mode.
3205 if (mode
== TImode
|| mode
== TFmode
)
3206 return (offset_7bit_signed_scaled_p (mode
, offset
)
3207 && offset_9bit_signed_unscaled_p (mode
, offset
));
3209 if (outer_code
== PARALLEL
)
3210 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3211 && offset_7bit_signed_scaled_p (mode
, offset
));
3213 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3214 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3217 if (allow_reg_index_p
)
3219 /* Look for base + (scaled/extended) index register. */
3220 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3221 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3226 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3227 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3240 info
->type
= ADDRESS_REG_WB
;
3241 info
->base
= XEXP (x
, 0);
3242 info
->offset
= NULL_RTX
;
3243 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3247 info
->type
= ADDRESS_REG_WB
;
3248 info
->base
= XEXP (x
, 0);
3249 if (GET_CODE (XEXP (x
, 1)) == PLUS
3250 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3251 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3252 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3254 HOST_WIDE_INT offset
;
3255 info
->offset
= XEXP (XEXP (x
, 1), 1);
3256 offset
= INTVAL (info
->offset
);
3258 /* TImode and TFmode values are allowed in both pairs of X
3259 registers and individual Q registers. The available
3261 X,X: 7-bit signed scaled offset
3262 Q: 9-bit signed offset
3263 We conservatively require an offset representable in either mode.
3265 if (mode
== TImode
|| mode
== TFmode
)
3266 return (offset_7bit_signed_scaled_p (mode
, offset
)
3267 && offset_9bit_signed_unscaled_p (mode
, offset
));
3269 if (outer_code
== PARALLEL
)
3270 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3271 && offset_7bit_signed_scaled_p (mode
, offset
));
3273 return offset_9bit_signed_unscaled_p (mode
, offset
);
3280 /* load literal: pc-relative constant pool entry. Only supported
3281 for SI mode or larger. */
3282 info
->type
= ADDRESS_SYMBOLIC
;
3283 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3287 split_const (x
, &sym
, &addend
);
3288 return (GET_CODE (sym
) == LABEL_REF
3289 || (GET_CODE (sym
) == SYMBOL_REF
3290 && CONSTANT_POOL_ADDRESS_P (sym
)));
3295 info
->type
= ADDRESS_LO_SUM
;
3296 info
->base
= XEXP (x
, 0);
3297 info
->offset
= XEXP (x
, 1);
3298 if (allow_reg_index_p
3299 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3302 split_const (info
->offset
, &sym
, &offs
);
3303 if (GET_CODE (sym
) == SYMBOL_REF
3304 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3305 == SYMBOL_SMALL_ABSOLUTE
))
3307 /* The symbol and offset must be aligned to the access size. */
3309 unsigned int ref_size
;
3311 if (CONSTANT_POOL_ADDRESS_P (sym
))
3312 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3313 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3315 tree exp
= SYMBOL_REF_DECL (sym
);
3316 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3317 align
= CONSTANT_ALIGNMENT (exp
, align
);
3319 else if (SYMBOL_REF_DECL (sym
))
3320 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3321 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3322 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3323 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3325 align
= BITS_PER_UNIT
;
3327 ref_size
= GET_MODE_SIZE (mode
);
3329 ref_size
= GET_MODE_SIZE (DImode
);
3331 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3332 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3343 aarch64_symbolic_address_p (rtx x
)
3347 split_const (x
, &x
, &offset
);
3348 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3351 /* Classify the base of symbolic expression X, given that X appears in
3354 enum aarch64_symbol_type
3355 aarch64_classify_symbolic_expression (rtx x
,
3356 enum aarch64_symbol_context context
)
3360 split_const (x
, &x
, &offset
);
3361 return aarch64_classify_symbol (x
, context
);
3365 /* Return TRUE if X is a legitimate address for accessing memory in
3368 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3370 struct aarch64_address_info addr
;
3372 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3375 /* Return TRUE if X is a legitimate address for accessing memory in
3376 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3379 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3380 RTX_CODE outer_code
, bool strict_p
)
3382 struct aarch64_address_info addr
;
3384 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3387 /* Return TRUE if rtx X is immediate constant 0.0 */
3389 aarch64_float_const_zero_rtx_p (rtx x
)
3393 if (GET_MODE (x
) == VOIDmode
)
3396 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3397 if (REAL_VALUE_MINUS_ZERO (r
))
3398 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3399 return REAL_VALUES_EQUAL (r
, dconst0
);
3402 /* Return the fixed registers used for condition codes. */
3405 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3408 *p2
= INVALID_REGNUM
;
3413 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3415 /* All floating point compares return CCFP if it is an equality
3416 comparison, and CCFPE otherwise. */
3417 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3444 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3446 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3447 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3448 || GET_CODE (x
) == NEG
))
3451 /* A compare with a shifted operand. Because of canonicalization,
3452 the comparison will have to be swapped when we emit the assembly
3454 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3455 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3456 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3457 || GET_CODE (x
) == LSHIFTRT
3458 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3461 /* Similarly for a negated operand, but we can only do this for
3463 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3464 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3465 && (code
== EQ
|| code
== NE
)
3466 && GET_CODE (x
) == NEG
)
3469 /* A compare of a mode narrower than SI mode against zero can be done
3470 by extending the value in the comparison. */
3471 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3473 /* Only use sign-extension if we really need it. */
3474 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3475 ? CC_SESWPmode
: CC_ZESWPmode
);
3477 /* For everything else, return CCmode. */
3482 aarch64_get_condition_code (rtx x
)
3484 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3485 enum rtx_code comp_code
= GET_CODE (x
);
3487 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3488 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3496 case GE
: return AARCH64_GE
;
3497 case GT
: return AARCH64_GT
;
3498 case LE
: return AARCH64_LS
;
3499 case LT
: return AARCH64_MI
;
3500 case NE
: return AARCH64_NE
;
3501 case EQ
: return AARCH64_EQ
;
3502 case ORDERED
: return AARCH64_VC
;
3503 case UNORDERED
: return AARCH64_VS
;
3504 case UNLT
: return AARCH64_LT
;
3505 case UNLE
: return AARCH64_LE
;
3506 case UNGT
: return AARCH64_HI
;
3507 case UNGE
: return AARCH64_PL
;
3508 default: gcc_unreachable ();
3515 case NE
: return AARCH64_NE
;
3516 case EQ
: return AARCH64_EQ
;
3517 case GE
: return AARCH64_GE
;
3518 case GT
: return AARCH64_GT
;
3519 case LE
: return AARCH64_LE
;
3520 case LT
: return AARCH64_LT
;
3521 case GEU
: return AARCH64_CS
;
3522 case GTU
: return AARCH64_HI
;
3523 case LEU
: return AARCH64_LS
;
3524 case LTU
: return AARCH64_CC
;
3525 default: gcc_unreachable ();
3534 case NE
: return AARCH64_NE
;
3535 case EQ
: return AARCH64_EQ
;
3536 case GE
: return AARCH64_LE
;
3537 case GT
: return AARCH64_LT
;
3538 case LE
: return AARCH64_GE
;
3539 case LT
: return AARCH64_GT
;
3540 case GEU
: return AARCH64_LS
;
3541 case GTU
: return AARCH64_CC
;
3542 case LEU
: return AARCH64_CS
;
3543 case LTU
: return AARCH64_HI
;
3544 default: gcc_unreachable ();
3551 case NE
: return AARCH64_NE
;
3552 case EQ
: return AARCH64_EQ
;
3553 case GE
: return AARCH64_PL
;
3554 case LT
: return AARCH64_MI
;
3555 default: gcc_unreachable ();
3562 case NE
: return AARCH64_NE
;
3563 case EQ
: return AARCH64_EQ
;
3564 default: gcc_unreachable ();
3575 bit_count (unsigned HOST_WIDE_INT value
)
3589 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3593 /* An integer or symbol address without a preceding # sign. */
3595 switch (GET_CODE (x
))
3598 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3602 output_addr_const (f
, x
);
3606 if (GET_CODE (XEXP (x
, 0)) == PLUS
3607 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3609 output_addr_const (f
, x
);
3615 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3620 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3624 if (GET_CODE (x
) != CONST_INT
3625 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3627 output_operand_lossage ("invalid operand for '%%%c'", code
);
3643 output_operand_lossage ("invalid operand for '%%%c'", code
);
3653 /* Print N such that 2^N == X. */
3654 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3656 output_operand_lossage ("invalid operand for '%%%c'", code
);
3660 asm_fprintf (f
, "%d", n
);
3665 /* Print the number of non-zero bits in X (a const_int). */
3666 if (GET_CODE (x
) != CONST_INT
)
3668 output_operand_lossage ("invalid operand for '%%%c'", code
);
3672 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3676 /* Print the higher numbered register of a pair (TImode) of regs. */
3677 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3679 output_operand_lossage ("invalid operand for '%%%c'", code
);
3683 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3687 /* Print a condition (eq, ne, etc). */
3689 /* CONST_TRUE_RTX means always -- that's the default. */
3690 if (x
== const_true_rtx
)
3693 if (!COMPARISON_P (x
))
3695 output_operand_lossage ("invalid operand for '%%%c'", code
);
3699 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3703 /* Print the inverse of a condition (eq <-> ne, etc). */
3705 /* CONST_TRUE_RTX means never -- that's the default. */
3706 if (x
== const_true_rtx
)
3712 if (!COMPARISON_P (x
))
3714 output_operand_lossage ("invalid operand for '%%%c'", code
);
3718 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3719 (aarch64_get_condition_code (x
))], f
);
3727 /* Print a scalar FP/SIMD register name. */
3728 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3730 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3733 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3740 /* Print the first FP/SIMD register name in a list. */
3741 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3743 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3746 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3750 /* Print bottom 16 bits of integer constant in hex. */
3751 if (GET_CODE (x
) != CONST_INT
)
3753 output_operand_lossage ("invalid operand for '%%%c'", code
);
3756 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3761 /* Print a general register name or the zero register (32-bit or
3764 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3766 asm_fprintf (f
, "%czr", code
);
3770 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3772 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3776 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3778 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3785 /* Print a normal operand, if it's a general register, then we
3789 output_operand_lossage ("missing operand");
3793 switch (GET_CODE (x
))
3796 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3800 aarch64_memory_reference_mode
= GET_MODE (x
);
3801 output_address (XEXP (x
, 0));
3806 output_addr_const (asm_out_file
, x
);
3810 asm_fprintf (f
, "%wd", INTVAL (x
));
3814 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3816 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3818 HOST_WIDE_INT_MAX
));
3819 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3821 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3830 /* CONST_DOUBLE can represent a double-width integer.
3831 In this case, the mode of x is VOIDmode. */
3832 if (GET_MODE (x
) == VOIDmode
)
3834 else if (aarch64_float_const_zero_rtx_p (x
))
3839 else if (aarch64_float_const_representable_p (x
))
3842 char float_buf
[buf_size
] = {'\0'};
3844 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3845 real_to_decimal_for_mode (float_buf
, &r
,
3848 asm_fprintf (asm_out_file
, "%s", float_buf
);
3852 output_operand_lossage ("invalid constant");
3855 output_operand_lossage ("invalid operand");
3861 if (GET_CODE (x
) == HIGH
)
3864 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3866 case SYMBOL_SMALL_GOT
:
3867 asm_fprintf (asm_out_file
, ":got:");
3870 case SYMBOL_SMALL_TLSGD
:
3871 asm_fprintf (asm_out_file
, ":tlsgd:");
3874 case SYMBOL_SMALL_TLSDESC
:
3875 asm_fprintf (asm_out_file
, ":tlsdesc:");
3878 case SYMBOL_SMALL_GOTTPREL
:
3879 asm_fprintf (asm_out_file
, ":gottprel:");
3882 case SYMBOL_SMALL_TPREL
:
3883 asm_fprintf (asm_out_file
, ":tprel:");
3886 case SYMBOL_TINY_GOT
:
3893 output_addr_const (asm_out_file
, x
);
3897 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3899 case SYMBOL_SMALL_GOT
:
3900 asm_fprintf (asm_out_file
, ":lo12:");
3903 case SYMBOL_SMALL_TLSGD
:
3904 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3907 case SYMBOL_SMALL_TLSDESC
:
3908 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3911 case SYMBOL_SMALL_GOTTPREL
:
3912 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3915 case SYMBOL_SMALL_TPREL
:
3916 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3919 case SYMBOL_TINY_GOT
:
3920 asm_fprintf (asm_out_file
, ":got:");
3926 output_addr_const (asm_out_file
, x
);
3931 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3933 case SYMBOL_SMALL_TPREL
:
3934 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3939 output_addr_const (asm_out_file
, x
);
3943 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3949 aarch64_print_operand_address (FILE *f
, rtx x
)
3951 struct aarch64_address_info addr
;
3953 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3957 case ADDRESS_REG_IMM
:
3958 if (addr
.offset
== const0_rtx
)
3959 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3961 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
3962 INTVAL (addr
.offset
));
3965 case ADDRESS_REG_REG
:
3966 if (addr
.shift
== 0)
3967 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
3968 reg_names
[REGNO (addr
.offset
)]);
3970 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
3971 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3974 case ADDRESS_REG_UXTW
:
3975 if (addr
.shift
== 0)
3976 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
3977 REGNO (addr
.offset
) - R0_REGNUM
);
3979 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
3980 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3983 case ADDRESS_REG_SXTW
:
3984 if (addr
.shift
== 0)
3985 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
3986 REGNO (addr
.offset
) - R0_REGNUM
);
3988 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
3989 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3992 case ADDRESS_REG_WB
:
3993 switch (GET_CODE (x
))
3996 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
3997 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4000 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4001 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4004 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4005 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4008 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4009 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4012 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4013 INTVAL (addr
.offset
));
4016 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4017 INTVAL (addr
.offset
));
4024 case ADDRESS_LO_SUM
:
4025 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4026 output_addr_const (f
, addr
.offset
);
4027 asm_fprintf (f
, "]");
4030 case ADDRESS_SYMBOLIC
:
4034 output_addr_const (f
, x
);
4038 aarch64_label_mentioned_p (rtx x
)
4043 if (GET_CODE (x
) == LABEL_REF
)
4046 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4047 referencing instruction, but they are constant offsets, not
4049 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4052 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4053 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4059 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4060 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4063 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4070 /* Implement REGNO_REG_CLASS. */
4073 aarch64_regno_regclass (unsigned regno
)
4075 if (GP_REGNUM_P (regno
))
4078 if (regno
== SP_REGNUM
)
4081 if (regno
== FRAME_POINTER_REGNUM
4082 || regno
== ARG_POINTER_REGNUM
)
4083 return POINTER_REGS
;
4085 if (FP_REGNUM_P (regno
))
4086 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4091 /* Try a machine-dependent way of reloading an illegitimate address
4092 operand. If we find one, push the reload and return the new rtx. */
4095 aarch64_legitimize_reload_address (rtx
*x_p
,
4096 enum machine_mode mode
,
4097 int opnum
, int type
,
4098 int ind_levels ATTRIBUTE_UNUSED
)
4102 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4103 if (aarch64_vect_struct_mode_p (mode
)
4104 && GET_CODE (x
) == PLUS
4105 && REG_P (XEXP (x
, 0))
4106 && CONST_INT_P (XEXP (x
, 1)))
4110 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4111 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4112 opnum
, (enum reload_type
) type
);
4116 /* We must recognize output that we have already generated ourselves. */
4117 if (GET_CODE (x
) == PLUS
4118 && GET_CODE (XEXP (x
, 0)) == PLUS
4119 && REG_P (XEXP (XEXP (x
, 0), 0))
4120 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4121 && CONST_INT_P (XEXP (x
, 1)))
4123 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4124 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4125 opnum
, (enum reload_type
) type
);
4129 /* We wish to handle large displacements off a base register by splitting
4130 the addend across an add and the mem insn. This can cut the number of
4131 extra insns needed from 3 to 1. It is only useful for load/store of a
4132 single register with 12 bit offset field. */
4133 if (GET_CODE (x
) == PLUS
4134 && REG_P (XEXP (x
, 0))
4135 && CONST_INT_P (XEXP (x
, 1))
4136 && HARD_REGISTER_P (XEXP (x
, 0))
4139 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4141 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4142 HOST_WIDE_INT low
= val
& 0xfff;
4143 HOST_WIDE_INT high
= val
- low
;
4146 enum machine_mode xmode
= GET_MODE (x
);
4148 /* In ILP32, xmode can be either DImode or SImode. */
4149 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4151 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4152 BLKmode alignment. */
4153 if (GET_MODE_SIZE (mode
) == 0)
4156 offs
= low
% GET_MODE_SIZE (mode
);
4158 /* Align misaligned offset by adjusting high part to compensate. */
4161 if (aarch64_uimm12_shift (high
+ offs
))
4170 offs
= GET_MODE_SIZE (mode
) - offs
;
4172 high
= high
+ (low
& 0x1000) - offs
;
4177 /* Check for overflow. */
4178 if (high
+ low
!= val
)
4181 cst
= GEN_INT (high
);
4182 if (!aarch64_uimm12_shift (high
))
4183 cst
= force_const_mem (xmode
, cst
);
4185 /* Reload high part into base reg, leaving the low part
4186 in the mem instruction.
4187 Note that replacing this gen_rtx_PLUS with plus_constant is
4188 wrong in this case because we rely on the
4189 (plus (plus reg c1) c2) structure being preserved so that
4190 XEXP (*p, 0) in push_reload below uses the correct term. */
4191 x
= gen_rtx_PLUS (xmode
,
4192 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4195 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4196 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4197 opnum
, (enum reload_type
) type
);
4206 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4208 enum machine_mode mode
,
4209 secondary_reload_info
*sri
)
4211 /* Without the TARGET_SIMD instructions we cannot move a Q register
4212 to a Q register directly. We need a scratch. */
4213 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4214 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4215 && reg_class_subset_p (rclass
, FP_REGS
))
4218 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4219 else if (mode
== TImode
)
4220 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4224 /* A TFmode or TImode memory access should be handled via an FP_REGS
4225 because AArch64 has richer addressing modes for LDR/STR instructions
4226 than LDP/STP instructions. */
4227 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4228 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4231 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4238 aarch64_can_eliminate (const int from
, const int to
)
4240 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4241 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4243 if (frame_pointer_needed
)
4245 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4247 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4249 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4250 && !cfun
->calls_alloca
)
4252 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4262 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4264 HOST_WIDE_INT frame_size
;
4265 HOST_WIDE_INT offset
;
4267 aarch64_layout_frame ();
4268 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4269 + crtl
->outgoing_args_size
4270 + cfun
->machine
->saved_varargs_size
);
4272 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4273 offset
= frame_size
;
4275 if (to
== HARD_FRAME_POINTER_REGNUM
)
4277 if (from
== ARG_POINTER_REGNUM
)
4278 return offset
- crtl
->outgoing_args_size
;
4280 if (from
== FRAME_POINTER_REGNUM
)
4281 return cfun
->machine
->frame
.saved_regs_size
+ get_frame_size ();
4284 if (to
== STACK_POINTER_REGNUM
)
4286 if (from
== FRAME_POINTER_REGNUM
)
4288 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4289 + cfun
->machine
->frame
.saved_regs_size
4290 + get_frame_size ();
4291 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4300 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4304 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4308 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4313 aarch64_asm_trampoline_template (FILE *f
)
4317 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4318 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4322 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4323 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4325 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4326 assemble_aligned_integer (4, const0_rtx
);
4327 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4328 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4332 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4334 rtx fnaddr
, mem
, a_tramp
;
4335 const int tramp_code_sz
= 16;
4337 /* Don't need to copy the trailing D-words, we fill those in below. */
4338 emit_block_move (m_tramp
, assemble_trampoline_template (),
4339 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4340 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4341 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4342 if (GET_MODE (fnaddr
) != ptr_mode
)
4343 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4344 emit_move_insn (mem
, fnaddr
);
4346 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4347 emit_move_insn (mem
, chain_value
);
4349 /* XXX We should really define a "clear_cache" pattern and use
4350 gen_clear_cache(). */
4351 a_tramp
= XEXP (m_tramp
, 0);
4352 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4353 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4354 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4358 static unsigned char
4359 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4370 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4371 (GET_MODE_SIZE (mode
) + 7) / 8;
4385 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4387 if (regclass
== POINTER_REGS
)
4388 return GENERAL_REGS
;
4390 if (regclass
== STACK_REG
)
4393 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4399 /* If it's an integer immediate that MOVI can't handle, then
4400 FP_REGS is not an option, so we return NO_REGS instead. */
4401 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4402 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4405 /* Register eliminiation can result in a request for
4406 SP+constant->FP_REGS. We cannot support such operations which
4407 use SP as source and an FP_REG as destination, so reject out
4409 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4411 rtx lhs
= XEXP (x
, 0);
4413 /* Look through a possible SUBREG introduced by ILP32. */
4414 if (GET_CODE (lhs
) == SUBREG
)
4415 lhs
= SUBREG_REG (lhs
);
4417 gcc_assert (REG_P (lhs
));
4418 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4427 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4429 asm_fprintf (f
, "%U%s", name
);
4433 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4435 if (priority
== DEFAULT_INIT_PRIORITY
)
4436 default_ctor_section_asm_out_constructor (symbol
, priority
);
4441 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4442 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4443 switch_to_section (s
);
4444 assemble_align (POINTER_SIZE
);
4445 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4450 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4452 if (priority
== DEFAULT_INIT_PRIORITY
)
4453 default_dtor_section_asm_out_destructor (symbol
, priority
);
4458 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4459 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4460 switch_to_section (s
);
4461 assemble_align (POINTER_SIZE
);
4462 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4467 aarch64_output_casesi (rtx
*operands
)
4471 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
4473 static const char *const patterns
[4][2] =
4476 "ldrb\t%w3, [%0,%w1,uxtw]",
4477 "add\t%3, %4, %w3, sxtb #2"
4480 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4481 "add\t%3, %4, %w3, sxth #2"
4484 "ldr\t%w3, [%0,%w1,uxtw #2]",
4485 "add\t%3, %4, %w3, sxtw #2"
4487 /* We assume that DImode is only generated when not optimizing and
4488 that we don't really need 64-bit address offsets. That would
4489 imply an object file with 8GB of code in a single function! */
4491 "ldr\t%w3, [%0,%w1,uxtw #2]",
4492 "add\t%3, %4, %w3, sxtw #2"
4496 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4498 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4500 gcc_assert (index
>= 0 && index
<= 3);
4502 /* Need to implement table size reduction, by chaning the code below. */
4503 output_asm_insn (patterns
[index
][0], operands
);
4504 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4505 snprintf (buf
, sizeof (buf
),
4506 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4507 output_asm_insn (buf
, operands
);
4508 output_asm_insn (patterns
[index
][1], operands
);
4509 output_asm_insn ("br\t%3", operands
);
4510 assemble_label (asm_out_file
, label
);
4515 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4516 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4520 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4522 if (shift
>= 0 && shift
<= 3)
4525 for (size
= 8; size
<= 32; size
*= 2)
4527 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4528 if (mask
== bits
<< shift
)
4536 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4537 const_rtx x ATTRIBUTE_UNUSED
)
4539 /* We can't use blocks for constants when we're using a per-function
4545 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4546 rtx x ATTRIBUTE_UNUSED
,
4547 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4549 /* Force all constant pool entries into the current function section. */
4550 return function_section (current_function_decl
);
4556 /* Helper function for rtx cost calculation. Strip a shift expression
4557 from X. Returns the inner operand if successful, or the original
4558 expression on failure. */
4560 aarch64_strip_shift (rtx x
)
4564 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4565 we can convert both to ROR during final output. */
4566 if ((GET_CODE (op
) == ASHIFT
4567 || GET_CODE (op
) == ASHIFTRT
4568 || GET_CODE (op
) == LSHIFTRT
4569 || GET_CODE (op
) == ROTATERT
4570 || GET_CODE (op
) == ROTATE
)
4571 && CONST_INT_P (XEXP (op
, 1)))
4572 return XEXP (op
, 0);
4574 if (GET_CODE (op
) == MULT
4575 && CONST_INT_P (XEXP (op
, 1))
4576 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4577 return XEXP (op
, 0);
4582 /* Helper function for rtx cost calculation. Strip an extend
4583 expression from X. Returns the inner operand if successful, or the
4584 original expression on failure. We deal with a number of possible
4585 canonicalization variations here. */
4587 aarch64_strip_extend (rtx x
)
4591 /* Zero and sign extraction of a widened value. */
4592 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4593 && XEXP (op
, 2) == const0_rtx
4594 && GET_CODE (XEXP (op
, 0)) == MULT
4595 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4597 return XEXP (XEXP (op
, 0), 0);
4599 /* It can also be represented (for zero-extend) as an AND with an
4601 if (GET_CODE (op
) == AND
4602 && GET_CODE (XEXP (op
, 0)) == MULT
4603 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4604 && CONST_INT_P (XEXP (op
, 1))
4605 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4606 INTVAL (XEXP (op
, 1))) != 0)
4607 return XEXP (XEXP (op
, 0), 0);
4609 /* Now handle extended register, as this may also have an optional
4610 left shift by 1..4. */
4611 if (GET_CODE (op
) == ASHIFT
4612 && CONST_INT_P (XEXP (op
, 1))
4613 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4616 if (GET_CODE (op
) == ZERO_EXTEND
4617 || GET_CODE (op
) == SIGN_EXTEND
)
4626 /* Helper function for rtx cost calculation. Calculate the cost of
4627 a MULT, which may be part of a multiply-accumulate rtx. Return
4628 the calculated cost of the expression, recursing manually in to
4629 operands where needed. */
4632 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
4635 const struct cpu_cost_table
*extra_cost
4636 = aarch64_tune_params
->insn_extra_cost
;
4638 bool maybe_fma
= (outer
== PLUS
|| outer
== MINUS
);
4639 enum machine_mode mode
= GET_MODE (x
);
4641 gcc_checking_assert (code
== MULT
);
4646 if (VECTOR_MODE_P (mode
))
4647 mode
= GET_MODE_INNER (mode
);
4649 /* Integer multiply/fma. */
4650 if (GET_MODE_CLASS (mode
) == MODE_INT
)
4652 /* The multiply will be canonicalized as a shift, cost it as such. */
4653 if (CONST_INT_P (op1
)
4654 && exact_log2 (INTVAL (op1
)) > 0)
4659 /* ADD (shifted register). */
4660 cost
+= extra_cost
->alu
.arith_shift
;
4662 /* LSL (immediate). */
4663 cost
+= extra_cost
->alu
.shift
;
4666 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
4671 /* Integer multiplies or FMAs have zero/sign extending variants. */
4672 if ((GET_CODE (op0
) == ZERO_EXTEND
4673 && GET_CODE (op1
) == ZERO_EXTEND
)
4674 || (GET_CODE (op0
) == SIGN_EXTEND
4675 && GET_CODE (op1
) == SIGN_EXTEND
))
4677 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4678 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
4683 /* MADD/SMADDL/UMADDL. */
4684 cost
+= extra_cost
->mult
[0].extend_add
;
4686 /* MUL/SMULL/UMULL. */
4687 cost
+= extra_cost
->mult
[0].extend
;
4693 /* This is either an integer multiply or an FMA. In both cases
4694 we want to recurse and cost the operands. */
4695 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4696 + rtx_cost (op1
, MULT
, 1, speed
);
4702 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
4705 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
4714 /* Floating-point FMA/FMUL can also support negations of the
4716 if (GET_CODE (op0
) == NEG
)
4717 op0
= XEXP (op0
, 0);
4718 if (GET_CODE (op1
) == NEG
)
4719 op1
= XEXP (op1
, 0);
4722 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4723 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
4726 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
4729 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4730 + rtx_cost (op1
, MULT
, 1, speed
);
4736 aarch64_address_cost (rtx x
,
4737 enum machine_mode mode
,
4738 addr_space_t as ATTRIBUTE_UNUSED
,
4741 enum rtx_code c
= GET_CODE (x
);
4742 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4743 struct aarch64_address_info info
;
4747 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
4749 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
4751 /* This is a CONST or SYMBOL ref which will be split
4752 in a different way depending on the code model in use.
4753 Cost it through the generic infrastructure. */
4754 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
4755 /* Divide through by the cost of one instruction to
4756 bring it to the same units as the address costs. */
4757 cost_symbol_ref
/= COSTS_N_INSNS (1);
4758 /* The cost is then the cost of preparing the address,
4759 followed by an immediate (possibly 0) offset. */
4760 return cost_symbol_ref
+ addr_cost
->imm_offset
;
4764 /* This is most likely a jump table from a case
4766 return addr_cost
->register_offset
;
4772 case ADDRESS_LO_SUM
:
4773 case ADDRESS_SYMBOLIC
:
4774 case ADDRESS_REG_IMM
:
4775 cost
+= addr_cost
->imm_offset
;
4778 case ADDRESS_REG_WB
:
4779 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4780 cost
+= addr_cost
->pre_modify
;
4781 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4782 cost
+= addr_cost
->post_modify
;
4788 case ADDRESS_REG_REG
:
4789 cost
+= addr_cost
->register_offset
;
4792 case ADDRESS_REG_UXTW
:
4793 case ADDRESS_REG_SXTW
:
4794 cost
+= addr_cost
->register_extend
;
4804 /* For the sake of calculating the cost of the shifted register
4805 component, we can treat same sized modes in the same way. */
4806 switch (GET_MODE_BITSIZE (mode
))
4809 cost
+= addr_cost
->addr_scale_costs
.hi
;
4813 cost
+= addr_cost
->addr_scale_costs
.si
;
4817 cost
+= addr_cost
->addr_scale_costs
.di
;
4820 /* We can't tell, or this is a 128-bit vector. */
4822 cost
+= addr_cost
->addr_scale_costs
.ti
;
4830 /* Return true if the RTX X in mode MODE is a zero or sign extract
4831 usable in an ADD or SUB (extended register) instruction. */
4833 aarch64_rtx_arith_op_extract_p (rtx x
, enum machine_mode mode
)
4835 /* Catch add with a sign extract.
4836 This is add_<optab><mode>_multp2. */
4837 if (GET_CODE (x
) == SIGN_EXTRACT
4838 || GET_CODE (x
) == ZERO_EXTRACT
)
4840 rtx op0
= XEXP (x
, 0);
4841 rtx op1
= XEXP (x
, 1);
4842 rtx op2
= XEXP (x
, 2);
4844 if (GET_CODE (op0
) == MULT
4845 && CONST_INT_P (op1
)
4846 && op2
== const0_rtx
4847 && CONST_INT_P (XEXP (op0
, 1))
4848 && aarch64_is_extend_from_extract (mode
,
4859 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4860 storing it in *COST. Result is true if the total cost of the operation
4861 has now been calculated. */
4863 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
4867 enum rtx_code cmpcode
;
4869 if (COMPARISON_P (op0
))
4871 inner
= XEXP (op0
, 0);
4872 comparator
= XEXP (op0
, 1);
4873 cmpcode
= GET_CODE (op0
);
4878 comparator
= const0_rtx
;
4882 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
4884 /* Conditional branch. */
4885 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
4889 if (cmpcode
== NE
|| cmpcode
== EQ
)
4891 if (comparator
== const0_rtx
)
4893 /* TBZ/TBNZ/CBZ/CBNZ. */
4894 if (GET_CODE (inner
) == ZERO_EXTRACT
)
4896 *cost
+= rtx_cost (XEXP (inner
, 0), ZERO_EXTRACT
,
4900 *cost
+= rtx_cost (inner
, cmpcode
, 0, speed
);
4905 else if (cmpcode
== LT
|| cmpcode
== GE
)
4908 if (comparator
== const0_rtx
)
4913 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
4915 /* It's a conditional operation based on the status flags,
4916 so it must be some flavor of CSEL. */
4918 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
4919 if (GET_CODE (op1
) == NEG
4920 || GET_CODE (op1
) == NOT
4921 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
4922 op1
= XEXP (op1
, 0);
4924 *cost
+= rtx_cost (op1
, IF_THEN_ELSE
, 1, speed
);
4925 *cost
+= rtx_cost (op2
, IF_THEN_ELSE
, 2, speed
);
4929 /* We don't know what this is, cost all operands. */
4933 /* Calculate the cost of calculating X, storing it in *COST. Result
4934 is true if the total cost of the operation has now been calculated. */
4936 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4937 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4940 const struct cpu_cost_table
*extra_cost
4941 = aarch64_tune_params
->insn_extra_cost
;
4942 enum machine_mode mode
= GET_MODE (x
);
4944 /* By default, assume that everything has equivalent cost to the
4945 cheapest instruction. Any additional costs are applied as a delta
4946 above this default. */
4947 *cost
= COSTS_N_INSNS (1);
4949 /* TODO: The cost infrastructure currently does not handle
4950 vector operations. Assume that all vector operations
4951 are equally expensive. */
4952 if (VECTOR_MODE_P (mode
))
4955 *cost
+= extra_cost
->vect
.alu
;
4962 /* The cost depends entirely on the operands to SET. */
4967 switch (GET_CODE (op0
))
4972 rtx address
= XEXP (op0
, 0);
4973 if (GET_MODE_CLASS (mode
) == MODE_INT
)
4974 *cost
+= extra_cost
->ldst
.store
;
4975 else if (mode
== SFmode
)
4976 *cost
+= extra_cost
->ldst
.storef
;
4977 else if (mode
== DFmode
)
4978 *cost
+= extra_cost
->ldst
.stored
;
4981 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
4985 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4989 if (! REG_P (SUBREG_REG (op0
)))
4990 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4994 /* const0_rtx is in general free, but we will use an
4995 instruction to set a register to 0. */
4996 if (REG_P (op1
) || op1
== const0_rtx
)
4998 /* The cost is 1 per register copied. */
4999 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5001 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5004 /* Cost is just the cost of the RHS of the set. */
5005 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5010 /* Bit-field insertion. Strip any redundant widening of
5011 the RHS to meet the width of the target. */
5012 if (GET_CODE (op1
) == SUBREG
)
5013 op1
= SUBREG_REG (op1
);
5014 if ((GET_CODE (op1
) == ZERO_EXTEND
5015 || GET_CODE (op1
) == SIGN_EXTEND
)
5016 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
5017 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5018 >= INTVAL (XEXP (op0
, 1))))
5019 op1
= XEXP (op1
, 0);
5021 if (CONST_INT_P (op1
))
5023 /* MOV immediate is assumed to always be cheap. */
5024 *cost
= COSTS_N_INSNS (1);
5030 *cost
+= extra_cost
->alu
.bfi
;
5031 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5037 /* We can't make sense of this, assume default cost. */
5038 *cost
= COSTS_N_INSNS (1);
5044 /* If an instruction can incorporate a constant within the
5045 instruction, the instruction's expression avoids calling
5046 rtx_cost() on the constant. If rtx_cost() is called on a
5047 constant, then it is usually because the constant must be
5048 moved into a register by one or more instructions.
5050 The exception is constant 0, which can be expressed
5051 as XZR/WZR and is therefore free. The exception to this is
5052 if we have (set (reg) (const0_rtx)) in which case we must cost
5053 the move. However, we can catch that when we cost the SET, so
5054 we don't need to consider that here. */
5055 if (x
== const0_rtx
)
5059 /* To an approximation, building any other constant is
5060 proportionally expensive to the number of instructions
5061 required to build that constant. This is true whether we
5062 are compiling for SPEED or otherwise. */
5063 *cost
= COSTS_N_INSNS (aarch64_build_constant (0,
5072 /* mov[df,sf]_aarch64. */
5073 if (aarch64_float_const_representable_p (x
))
5074 /* FMOV (scalar immediate). */
5075 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5076 else if (!aarch64_float_const_zero_rtx_p (x
))
5078 /* This will be a load from memory. */
5080 *cost
+= extra_cost
->ldst
.loadd
;
5082 *cost
+= extra_cost
->ldst
.loadf
;
5085 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5086 or MOV v0.s[0], wzr - neither of which are modeled by the
5087 cost tables. Just use the default cost. */
5097 /* For loads we want the base cost of a load, plus an
5098 approximation for the additional cost of the addressing
5100 rtx address
= XEXP (x
, 0);
5101 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5102 *cost
+= extra_cost
->ldst
.load
;
5103 else if (mode
== SFmode
)
5104 *cost
+= extra_cost
->ldst
.loadf
;
5105 else if (mode
== DFmode
)
5106 *cost
+= extra_cost
->ldst
.loadd
;
5109 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5118 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5120 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5121 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5124 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5128 /* Cost this as SUB wzr, X. */
5129 op0
= CONST0_RTX (GET_MODE (x
));
5134 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5136 /* Support (neg(fma...)) as a single instruction only if
5137 sign of zeros is unimportant. This matches the decision
5138 making in aarch64.md. */
5139 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5142 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5147 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5157 if (op1
== const0_rtx
5158 && GET_CODE (op0
) == AND
)
5164 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
5166 /* TODO: A write to the CC flags possibly costs extra, this
5167 needs encoding in the cost tables. */
5169 /* CC_ZESWPmode supports zero extend for free. */
5170 if (GET_MODE (x
) == CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
5171 op0
= XEXP (op0
, 0);
5174 if (GET_CODE (op0
) == AND
)
5180 if (GET_CODE (op0
) == PLUS
)
5182 /* ADDS (and CMN alias). */
5187 if (GET_CODE (op0
) == MINUS
)
5194 if (GET_CODE (op1
) == NEG
)
5198 *cost
+= extra_cost
->alu
.arith
;
5200 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5201 *cost
+= rtx_cost (XEXP (op1
, 0), NEG
, 1, speed
);
5207 Compare can freely swap the order of operands, and
5208 canonicalization puts the more complex operation first.
5209 But the integer MINUS logic expects the shift/extend
5210 operation in op1. */
5212 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5220 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5224 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
5226 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
5228 /* FCMP supports constant 0.0 for no extra cost. */
5242 /* Detect valid immediates. */
5243 if ((GET_MODE_CLASS (mode
) == MODE_INT
5244 || (GET_MODE_CLASS (mode
) == MODE_CC
5245 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5246 && CONST_INT_P (op1
)
5247 && aarch64_uimm12_shift (INTVAL (op1
)))
5249 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5252 /* SUB(S) (immediate). */
5253 *cost
+= extra_cost
->alu
.arith
;
5258 /* Look for SUB (extended register). */
5259 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
5262 *cost
+= extra_cost
->alu
.arith_shift
;
5264 *cost
+= rtx_cost (XEXP (XEXP (op1
, 0), 0),
5265 (enum rtx_code
) GET_CODE (op1
),
5270 rtx new_op1
= aarch64_strip_extend (op1
);
5272 /* Cost this as an FMA-alike operation. */
5273 if ((GET_CODE (new_op1
) == MULT
5274 || GET_CODE (new_op1
) == ASHIFT
)
5277 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5278 (enum rtx_code
) code
,
5280 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5284 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
5288 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5290 *cost
+= extra_cost
->alu
.arith
;
5291 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5293 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5306 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5307 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5310 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
5311 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5315 if (GET_MODE_CLASS (mode
) == MODE_INT
5316 && CONST_INT_P (op1
)
5317 && aarch64_uimm12_shift (INTVAL (op1
)))
5319 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
5322 /* ADD (immediate). */
5323 *cost
+= extra_cost
->alu
.arith
;
5327 /* Look for ADD (extended register). */
5328 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
5331 *cost
+= extra_cost
->alu
.arith_shift
;
5333 *cost
+= rtx_cost (XEXP (XEXP (op0
, 0), 0),
5334 (enum rtx_code
) GET_CODE (op0
),
5339 /* Strip any extend, leave shifts behind as we will
5340 cost them through mult_cost. */
5341 new_op0
= aarch64_strip_extend (op0
);
5343 if (GET_CODE (new_op0
) == MULT
5344 || GET_CODE (new_op0
) == ASHIFT
)
5346 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
5348 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5352 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
5353 + rtx_cost (op1
, PLUS
, 1, speed
));
5357 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5359 *cost
+= extra_cost
->alu
.arith
;
5360 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5362 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5368 *cost
= COSTS_N_INSNS (1);
5371 *cost
+= extra_cost
->alu
.rev
;
5376 if (aarch_rev16_p (x
))
5378 *cost
= COSTS_N_INSNS (1);
5381 *cost
+= extra_cost
->alu
.rev
;
5393 && GET_CODE (op0
) == MULT
5394 && CONST_INT_P (XEXP (op0
, 1))
5395 && CONST_INT_P (op1
)
5396 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
5399 /* This is a UBFM/SBFM. */
5400 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
5402 *cost
+= extra_cost
->alu
.bfx
;
5406 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5408 /* We possibly get the immediate for free, this is not
5410 if (CONST_INT_P (op1
)
5411 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
5413 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
5416 *cost
+= extra_cost
->alu
.logical
;
5424 /* Handle ORN, EON, or BIC. */
5425 if (GET_CODE (op0
) == NOT
)
5426 op0
= XEXP (op0
, 0);
5428 new_op0
= aarch64_strip_shift (op0
);
5430 /* If we had a shift on op0 then this is a logical-shift-
5431 by-register/immediate operation. Otherwise, this is just
5432 a logical operation. */
5437 /* Shift by immediate. */
5438 if (CONST_INT_P (XEXP (op0
, 1)))
5439 *cost
+= extra_cost
->alu
.log_shift
;
5441 *cost
+= extra_cost
->alu
.log_shift_reg
;
5444 *cost
+= extra_cost
->alu
.logical
;
5447 /* In both cases we want to cost both operands. */
5448 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
5449 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5459 *cost
+= extra_cost
->alu
.logical
;
5461 /* The logical instruction could have the shifted register form,
5462 but the cost is the same if the shift is processed as a separate
5463 instruction, so we don't bother with it here. */
5469 /* If a value is written in SI mode, then zero extended to DI
5470 mode, the operation will in general be free as a write to
5471 a 'w' register implicitly zeroes the upper bits of an 'x'
5472 register. However, if this is
5474 (set (reg) (zero_extend (reg)))
5476 we must cost the explicit register move. */
5478 && GET_MODE (op0
) == SImode
5481 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
5483 if (!op_cost
&& speed
)
5485 *cost
+= extra_cost
->alu
.extend
;
5487 /* Free, the cost is that of the SI mode operation. */
5492 else if (MEM_P (XEXP (x
, 0)))
5494 /* All loads can zero extend to any size for free. */
5495 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
5501 *cost
+= extra_cost
->alu
.extend
;
5506 if (MEM_P (XEXP (x
, 0)))
5511 rtx address
= XEXP (XEXP (x
, 0), 0);
5512 *cost
+= extra_cost
->ldst
.load_sign_extend
;
5515 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5522 *cost
+= extra_cost
->alu
.extend
;
5529 if (CONST_INT_P (op1
))
5531 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5534 *cost
+= extra_cost
->alu
.shift
;
5536 /* We can incorporate zero/sign extend for free. */
5537 if (GET_CODE (op0
) == ZERO_EXTEND
5538 || GET_CODE (op0
) == SIGN_EXTEND
)
5539 op0
= XEXP (op0
, 0);
5541 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
5548 *cost
+= extra_cost
->alu
.shift_reg
;
5550 return false; /* All arguments need to be in registers. */
5560 if (CONST_INT_P (op1
))
5562 /* ASR (immediate) and friends. */
5564 *cost
+= extra_cost
->alu
.shift
;
5566 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
5572 /* ASR (register) and friends. */
5574 *cost
+= extra_cost
->alu
.shift_reg
;
5576 return false; /* All arguments need to be in registers. */
5581 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
5585 *cost
+= extra_cost
->ldst
.load
;
5587 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
5588 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
5590 /* ADRP, followed by ADD. */
5591 *cost
+= COSTS_N_INSNS (1);
5593 *cost
+= 2 * extra_cost
->alu
.arith
;
5595 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
5596 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
5600 *cost
+= extra_cost
->alu
.arith
;
5605 /* One extra load instruction, after accessing the GOT. */
5606 *cost
+= COSTS_N_INSNS (1);
5608 *cost
+= extra_cost
->ldst
.load
;
5614 /* ADRP/ADD (immediate). */
5616 *cost
+= extra_cost
->alu
.arith
;
5623 *cost
+= extra_cost
->alu
.bfx
;
5625 /* We can trust that the immediates used will be correct (there
5626 are no by-register forms), so we need only cost op0. */
5627 *cost
+= rtx_cost (XEXP (x
, 0), (enum rtx_code
) code
, 0, speed
);
5631 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
5632 /* aarch64_rtx_mult_cost always handles recursion to its
5640 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5641 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
5642 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
5643 else if (GET_MODE (x
) == DFmode
)
5644 *cost
+= (extra_cost
->fp
[1].mult
5645 + extra_cost
->fp
[1].div
);
5646 else if (GET_MODE (x
) == SFmode
)
5647 *cost
+= (extra_cost
->fp
[0].mult
5648 + extra_cost
->fp
[0].div
);
5650 return false; /* All arguments need to be in registers. */
5657 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5658 /* There is no integer SQRT, so only DIV and UDIV can get
5660 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
5662 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
5664 return false; /* All arguments need to be in registers. */
5667 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
5668 XEXP (x
, 2), cost
, speed
);
5681 return false; /* All arguments must be in registers. */
5689 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5691 /* FMSUB, FNMADD, and FNMSUB are free. */
5692 if (GET_CODE (op0
) == NEG
)
5693 op0
= XEXP (op0
, 0);
5695 if (GET_CODE (op2
) == NEG
)
5696 op2
= XEXP (op2
, 0);
5698 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5699 and the by-element operand as operand 0. */
5700 if (GET_CODE (op1
) == NEG
)
5701 op1
= XEXP (op1
, 0);
5703 /* Catch vector-by-element operations. The by-element operand can
5704 either be (vec_duplicate (vec_select (x))) or just
5705 (vec_select (x)), depending on whether we are multiplying by
5706 a vector or a scalar.
5708 Canonicalization is not very good in these cases, FMA4 will put the
5709 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5710 if (GET_CODE (op0
) == VEC_DUPLICATE
)
5711 op0
= XEXP (op0
, 0);
5712 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
5713 op1
= XEXP (op1
, 0);
5715 if (GET_CODE (op0
) == VEC_SELECT
)
5716 op0
= XEXP (op0
, 0);
5717 else if (GET_CODE (op1
) == VEC_SELECT
)
5718 op1
= XEXP (op1
, 0);
5720 /* If the remaining parameters are not registers,
5721 get the cost to put them into registers. */
5722 *cost
+= rtx_cost (op0
, FMA
, 0, speed
);
5723 *cost
+= rtx_cost (op1
, FMA
, 1, speed
);
5724 *cost
+= rtx_cost (op2
, FMA
, 2, speed
);
5729 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
5732 case FLOAT_TRUNCATE
:
5734 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
5738 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5740 /* FABS and FNEG are analogous. */
5742 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5746 /* Integer ABS will either be split to
5747 two arithmetic instructions, or will be an ABS
5748 (scalar), which we don't model. */
5749 *cost
= COSTS_N_INSNS (2);
5751 *cost
+= 2 * extra_cost
->alu
.arith
;
5759 /* FMAXNM/FMINNM/FMAX/FMIN.
5760 TODO: This may not be accurate for all implementations, but
5761 we do not model this in the cost tables. */
5762 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5768 /* Decompose <su>muldi3_highpart. */
5769 if (/* (truncate:DI */
5772 && GET_MODE (XEXP (x
, 0)) == TImode
5773 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
5775 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
5776 /* (ANY_EXTEND:TI (reg:DI))
5777 (ANY_EXTEND:TI (reg:DI))) */
5778 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
5779 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
5780 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
5781 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
5782 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
5783 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
5784 /* (const_int 64) */
5785 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
5786 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
5790 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
5791 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
5793 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
5800 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5802 "\nFailed to cost RTX. Assuming default cost.\n");
5809 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5810 calculated for X. This cost is stored in *COST. Returns true
5811 if the total cost of X was calculated. */
5813 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
5814 int param
, int *cost
, bool speed
)
5816 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
5818 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5820 print_rtl_single (dump_file
, x
);
5821 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
5822 speed
? "Hot" : "Cold",
5823 *cost
, result
? "final" : "partial");
5830 aarch64_register_move_cost (enum machine_mode mode
,
5831 reg_class_t from_i
, reg_class_t to_i
)
5833 enum reg_class from
= (enum reg_class
) from_i
;
5834 enum reg_class to
= (enum reg_class
) to_i
;
5835 const struct cpu_regmove_cost
*regmove_cost
5836 = aarch64_tune_params
->regmove_cost
;
5838 /* Moving between GPR and stack cost is the same as GP2GP. */
5839 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
5840 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
5841 return regmove_cost
->GP2GP
;
5843 /* To/From the stack register, we move via the gprs. */
5844 if (to
== STACK_REG
|| from
== STACK_REG
)
5845 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
5846 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
5848 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
5849 return regmove_cost
->GP2GP
;
5850 else if (from
== GENERAL_REGS
)
5851 return regmove_cost
->GP2FP
;
5852 else if (to
== GENERAL_REGS
)
5853 return regmove_cost
->FP2GP
;
5855 /* When AdvSIMD instructions are disabled it is not possible to move
5856 a 128-bit value directly between Q registers. This is handled in
5857 secondary reload. A general register is used as a scratch to move
5858 the upper DI value and the lower DI value is moved directly,
5859 hence the cost is the sum of three moves. */
5860 if (! TARGET_SIMD
&& GET_MODE_SIZE (mode
) == 128)
5861 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
5863 return regmove_cost
->FP2FP
;
5867 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
5868 reg_class_t rclass ATTRIBUTE_UNUSED
,
5869 bool in ATTRIBUTE_UNUSED
)
5871 return aarch64_tune_params
->memmov_cost
;
5874 /* Return the number of instructions that can be issued per cycle. */
5876 aarch64_sched_issue_rate (void)
5878 return aarch64_tune_params
->issue_rate
;
5881 /* Vectorizer cost model target hooks. */
5883 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5885 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5887 int misalign ATTRIBUTE_UNUSED
)
5891 switch (type_of_cost
)
5894 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
5897 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
5900 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
5903 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
5906 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
5909 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
5912 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
5915 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
5917 case unaligned_load
:
5918 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
5920 case unaligned_store
:
5921 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
5923 case cond_branch_taken
:
5924 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
5926 case cond_branch_not_taken
:
5927 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
5930 case vec_promote_demote
:
5931 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
5934 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5935 return elements
/ 2 + 1;
5942 /* Implement targetm.vectorize.add_stmt_cost. */
5944 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
5945 struct _stmt_vec_info
*stmt_info
, int misalign
,
5946 enum vect_cost_model_location where
)
5948 unsigned *cost
= (unsigned *) data
;
5949 unsigned retval
= 0;
5951 if (flag_vect_cost_model
)
5953 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
5955 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
5957 /* Statements in an inner loop relative to the loop being
5958 vectorized are weighted more heavily. The value here is
5959 a function (linear for now) of the loop nest level. */
5960 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
5962 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5963 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
5964 unsigned nest_level
= loop_depth (loop
);
5966 count
*= nest_level
;
5969 retval
= (unsigned) (count
* stmt_cost
);
5970 cost
[where
] += retval
;
5976 static void initialize_aarch64_code_model (void);
5978 /* Parse the architecture extension string. */
5981 aarch64_parse_extension (char *str
)
5983 /* The extension string is parsed left to right. */
5984 const struct aarch64_option_extension
*opt
= NULL
;
5986 /* Flag to say whether we are adding or removing an extension. */
5987 int adding_ext
= -1;
5989 while (str
!= NULL
&& *str
!= 0)
5995 ext
= strchr (str
, '+');
6002 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
6013 error ("missing feature modifier after %qs", "+no");
6017 /* Scan over the extensions table trying to find an exact match. */
6018 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
6020 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
6022 /* Add or remove the extension. */
6024 aarch64_isa_flags
|= opt
->flags_on
;
6026 aarch64_isa_flags
&= ~(opt
->flags_off
);
6031 if (opt
->name
== NULL
)
6033 /* Extension not found in list. */
6034 error ("unknown feature modifier %qs", str
);
6044 /* Parse the ARCH string. */
6047 aarch64_parse_arch (void)
6050 const struct processor
*arch
;
6051 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
6054 strcpy (str
, aarch64_arch_string
);
6056 ext
= strchr (str
, '+');
6065 error ("missing arch name in -march=%qs", str
);
6069 /* Loop through the list of supported ARCHs to find a match. */
6070 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
6072 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
6074 selected_arch
= arch
;
6075 aarch64_isa_flags
= selected_arch
->flags
;
6078 selected_cpu
= &all_cores
[selected_arch
->core
];
6082 /* ARCH string contains at least one extension. */
6083 aarch64_parse_extension (ext
);
6086 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
6088 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6089 selected_cpu
->name
, selected_arch
->name
);
6096 /* ARCH name not found in list. */
6097 error ("unknown value %qs for -march", str
);
6101 /* Parse the CPU string. */
6104 aarch64_parse_cpu (void)
6107 const struct processor
*cpu
;
6108 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
6111 strcpy (str
, aarch64_cpu_string
);
6113 ext
= strchr (str
, '+');
6122 error ("missing cpu name in -mcpu=%qs", str
);
6126 /* Loop through the list of supported CPUs to find a match. */
6127 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6129 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
6132 selected_tune
= cpu
;
6133 aarch64_isa_flags
= selected_cpu
->flags
;
6137 /* CPU string contains at least one extension. */
6138 aarch64_parse_extension (ext
);
6145 /* CPU name not found in list. */
6146 error ("unknown value %qs for -mcpu", str
);
6150 /* Parse the TUNE string. */
6153 aarch64_parse_tune (void)
6155 const struct processor
*cpu
;
6156 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
6157 strcpy (str
, aarch64_tune_string
);
6159 /* Loop through the list of supported CPUs to find a match. */
6160 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6162 if (strcmp (cpu
->name
, str
) == 0)
6164 selected_tune
= cpu
;
6169 /* CPU name not found in list. */
6170 error ("unknown value %qs for -mtune", str
);
6175 /* Implement TARGET_OPTION_OVERRIDE. */
6178 aarch64_override_options (void)
6180 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6181 If either of -march or -mtune is given, they override their
6182 respective component of -mcpu.
6184 So, first parse AARCH64_CPU_STRING, then the others, be careful
6185 with -march as, if -mcpu is not present on the command line, march
6186 must set a sensible default CPU. */
6187 if (aarch64_cpu_string
)
6189 aarch64_parse_cpu ();
6192 if (aarch64_arch_string
)
6194 aarch64_parse_arch ();
6197 if (aarch64_tune_string
)
6199 aarch64_parse_tune ();
6202 #ifndef HAVE_AS_MABI_OPTION
6203 /* The compiler may have been configured with 2.23.* binutils, which does
6204 not have support for ILP32. */
6206 error ("Assembler does not support -mabi=ilp32");
6209 initialize_aarch64_code_model ();
6211 aarch64_build_bitmask_table ();
6213 /* This target defaults to strict volatile bitfields. */
6214 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
6215 flag_strict_volatile_bitfields
= 1;
6217 /* If the user did not specify a processor, choose the default
6218 one for them. This will be the CPU set during configuration using
6219 --with-cpu, otherwise it is "generic". */
6222 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
6223 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
6226 gcc_assert (selected_cpu
);
6228 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6230 selected_tune
= &all_cores
[selected_cpu
->core
];
6232 aarch64_tune_flags
= selected_tune
->flags
;
6233 aarch64_tune
= selected_tune
->core
;
6234 aarch64_tune_params
= selected_tune
->tune
;
6236 aarch64_override_options_after_change ();
6239 /* Implement targetm.override_options_after_change. */
6242 aarch64_override_options_after_change (void)
6244 if (flag_omit_frame_pointer
)
6245 flag_omit_leaf_frame_pointer
= false;
6246 else if (flag_omit_leaf_frame_pointer
)
6247 flag_omit_frame_pointer
= true;
6250 static struct machine_function
*
6251 aarch64_init_machine_status (void)
6253 struct machine_function
*machine
;
6254 machine
= ggc_alloc_cleared_machine_function ();
6259 aarch64_init_expanders (void)
6261 init_machine_status
= aarch64_init_machine_status
;
6264 /* A checking mechanism for the implementation of the various code models. */
6266 initialize_aarch64_code_model (void)
6270 switch (aarch64_cmodel_var
)
6272 case AARCH64_CMODEL_TINY
:
6273 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
6275 case AARCH64_CMODEL_SMALL
:
6276 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
6278 case AARCH64_CMODEL_LARGE
:
6279 sorry ("code model %qs with -f%s", "large",
6280 flag_pic
> 1 ? "PIC" : "pic");
6286 aarch64_cmodel
= aarch64_cmodel_var
;
6289 /* Return true if SYMBOL_REF X binds locally. */
6292 aarch64_symbol_binds_local_p (const_rtx x
)
6294 return (SYMBOL_REF_DECL (x
)
6295 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
6296 : SYMBOL_REF_LOCAL_P (x
));
6299 /* Return true if SYMBOL_REF X is thread local */
6301 aarch64_tls_symbol_p (rtx x
)
6303 if (! TARGET_HAVE_TLS
)
6306 if (GET_CODE (x
) != SYMBOL_REF
)
6309 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6312 /* Classify a TLS symbol into one of the TLS kinds. */
6313 enum aarch64_symbol_type
6314 aarch64_classify_tls_symbol (rtx x
)
6316 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
6320 case TLS_MODEL_GLOBAL_DYNAMIC
:
6321 case TLS_MODEL_LOCAL_DYNAMIC
:
6322 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
6324 case TLS_MODEL_INITIAL_EXEC
:
6325 return SYMBOL_SMALL_GOTTPREL
;
6327 case TLS_MODEL_LOCAL_EXEC
:
6328 return SYMBOL_SMALL_TPREL
;
6330 case TLS_MODEL_EMULATED
:
6331 case TLS_MODEL_NONE
:
6332 return SYMBOL_FORCE_TO_MEM
;
6339 /* Return the method that should be used to access SYMBOL_REF or
6340 LABEL_REF X in context CONTEXT. */
6342 enum aarch64_symbol_type
6343 aarch64_classify_symbol (rtx x
,
6344 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
6346 if (GET_CODE (x
) == LABEL_REF
)
6348 switch (aarch64_cmodel
)
6350 case AARCH64_CMODEL_LARGE
:
6351 return SYMBOL_FORCE_TO_MEM
;
6353 case AARCH64_CMODEL_TINY_PIC
:
6354 case AARCH64_CMODEL_TINY
:
6355 return SYMBOL_TINY_ABSOLUTE
;
6357 case AARCH64_CMODEL_SMALL_PIC
:
6358 case AARCH64_CMODEL_SMALL
:
6359 return SYMBOL_SMALL_ABSOLUTE
;
6366 if (GET_CODE (x
) == SYMBOL_REF
)
6368 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6369 return SYMBOL_FORCE_TO_MEM
;
6371 if (aarch64_tls_symbol_p (x
))
6372 return aarch64_classify_tls_symbol (x
);
6374 switch (aarch64_cmodel
)
6376 case AARCH64_CMODEL_TINY
:
6377 if (SYMBOL_REF_WEAK (x
))
6378 return SYMBOL_FORCE_TO_MEM
;
6379 return SYMBOL_TINY_ABSOLUTE
;
6381 case AARCH64_CMODEL_SMALL
:
6382 if (SYMBOL_REF_WEAK (x
))
6383 return SYMBOL_FORCE_TO_MEM
;
6384 return SYMBOL_SMALL_ABSOLUTE
;
6386 case AARCH64_CMODEL_TINY_PIC
:
6387 if (!aarch64_symbol_binds_local_p (x
))
6388 return SYMBOL_TINY_GOT
;
6389 return SYMBOL_TINY_ABSOLUTE
;
6391 case AARCH64_CMODEL_SMALL_PIC
:
6392 if (!aarch64_symbol_binds_local_p (x
))
6393 return SYMBOL_SMALL_GOT
;
6394 return SYMBOL_SMALL_ABSOLUTE
;
6401 /* By default push everything into the constant pool. */
6402 return SYMBOL_FORCE_TO_MEM
;
6406 aarch64_constant_address_p (rtx x
)
6408 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
6412 aarch64_legitimate_pic_operand_p (rtx x
)
6414 if (GET_CODE (x
) == SYMBOL_REF
6415 || (GET_CODE (x
) == CONST
6416 && GET_CODE (XEXP (x
, 0)) == PLUS
6417 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
6423 /* Return true if X holds either a quarter-precision or
6424 floating-point +0.0 constant. */
6426 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
6428 if (!CONST_DOUBLE_P (x
))
6431 /* TODO: We could handle moving 0.0 to a TFmode register,
6432 but first we would like to refactor the movtf_aarch64
6433 to be more amicable to split moves properly and
6434 correctly gate on TARGET_SIMD. For now - reject all
6435 constants which are not to SFmode or DFmode registers. */
6436 if (!(mode
== SFmode
|| mode
== DFmode
))
6439 if (aarch64_float_const_zero_rtx_p (x
))
6441 return aarch64_float_const_representable_p (x
);
6445 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
6447 /* Do not allow vector struct mode constants. We could support
6448 0 and -1 easily, but they need support in aarch64-simd.md. */
6449 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
6452 /* This could probably go away because
6453 we now decompose CONST_INTs according to expand_mov_immediate. */
6454 if ((GET_CODE (x
) == CONST_VECTOR
6455 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
6456 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
6457 return !targetm
.cannot_force_const_mem (mode
, x
);
6459 if (GET_CODE (x
) == HIGH
6460 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6463 return aarch64_constant_address_p (x
);
6467 aarch64_load_tp (rtx target
)
6470 || GET_MODE (target
) != Pmode
6471 || !register_operand (target
, Pmode
))
6472 target
= gen_reg_rtx (Pmode
);
6474 /* Can return in any reg. */
6475 emit_insn (gen_aarch64_load_tp_hard (target
));
6479 /* On AAPCS systems, this is the "struct __va_list". */
6480 static GTY(()) tree va_list_type
;
6482 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6483 Return the type to use as __builtin_va_list.
6485 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6497 aarch64_build_builtin_va_list (void)
6500 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6502 /* Create the type. */
6503 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
6504 /* Give it the required name. */
6505 va_list_name
= build_decl (BUILTINS_LOCATION
,
6507 get_identifier ("__va_list"),
6509 DECL_ARTIFICIAL (va_list_name
) = 1;
6510 TYPE_NAME (va_list_type
) = va_list_name
;
6511 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
6513 /* Create the fields. */
6514 f_stack
= build_decl (BUILTINS_LOCATION
,
6515 FIELD_DECL
, get_identifier ("__stack"),
6517 f_grtop
= build_decl (BUILTINS_LOCATION
,
6518 FIELD_DECL
, get_identifier ("__gr_top"),
6520 f_vrtop
= build_decl (BUILTINS_LOCATION
,
6521 FIELD_DECL
, get_identifier ("__vr_top"),
6523 f_groff
= build_decl (BUILTINS_LOCATION
,
6524 FIELD_DECL
, get_identifier ("__gr_offs"),
6526 f_vroff
= build_decl (BUILTINS_LOCATION
,
6527 FIELD_DECL
, get_identifier ("__vr_offs"),
6530 DECL_ARTIFICIAL (f_stack
) = 1;
6531 DECL_ARTIFICIAL (f_grtop
) = 1;
6532 DECL_ARTIFICIAL (f_vrtop
) = 1;
6533 DECL_ARTIFICIAL (f_groff
) = 1;
6534 DECL_ARTIFICIAL (f_vroff
) = 1;
6536 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
6537 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
6538 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
6539 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
6540 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
6542 TYPE_FIELDS (va_list_type
) = f_stack
;
6543 DECL_CHAIN (f_stack
) = f_grtop
;
6544 DECL_CHAIN (f_grtop
) = f_vrtop
;
6545 DECL_CHAIN (f_vrtop
) = f_groff
;
6546 DECL_CHAIN (f_groff
) = f_vroff
;
6548 /* Compute its layout. */
6549 layout_type (va_list_type
);
6551 return va_list_type
;
6554 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6556 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
6558 const CUMULATIVE_ARGS
*cum
;
6559 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6560 tree stack
, grtop
, vrtop
, groff
, vroff
;
6562 int gr_save_area_size
;
6563 int vr_save_area_size
;
6566 cum
= &crtl
->args
.info
;
6568 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
6570 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
6572 if (TARGET_GENERAL_REGS_ONLY
)
6574 if (cum
->aapcs_nvrn
> 0)
6575 sorry ("%qs and floating point or vector arguments",
6576 "-mgeneral-regs-only");
6577 vr_save_area_size
= 0;
6580 f_stack
= TYPE_FIELDS (va_list_type_node
);
6581 f_grtop
= DECL_CHAIN (f_stack
);
6582 f_vrtop
= DECL_CHAIN (f_grtop
);
6583 f_groff
= DECL_CHAIN (f_vrtop
);
6584 f_vroff
= DECL_CHAIN (f_groff
);
6586 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
6588 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
6590 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
6592 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
6594 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
6597 /* Emit code to initialize STACK, which points to the next varargs stack
6598 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6599 by named arguments. STACK is 8-byte aligned. */
6600 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
6601 if (cum
->aapcs_stack_size
> 0)
6602 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
6603 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
6604 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6606 /* Emit code to initialize GRTOP, the top of the GR save area.
6607 virtual_incoming_args_rtx should have been 16 byte aligned. */
6608 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
6609 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
6610 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6612 /* Emit code to initialize VRTOP, the top of the VR save area.
6613 This address is gr_save_area_bytes below GRTOP, rounded
6614 down to the next 16-byte boundary. */
6615 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
6616 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
6617 STACK_BOUNDARY
/ BITS_PER_UNIT
);
6620 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
6621 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
6622 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6624 /* Emit code to initialize GROFF, the offset from GRTOP of the
6625 next GPR argument. */
6626 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
6627 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
6628 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6630 /* Likewise emit code to initialize VROFF, the offset from FTOP
6631 of the next VR argument. */
6632 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
6633 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
6634 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6637 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6640 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6641 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
6645 bool is_ha
; /* is HFA or HVA. */
6646 bool dw_align
; /* double-word align. */
6647 enum machine_mode ag_mode
= VOIDmode
;
6649 enum machine_mode mode
;
6651 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6652 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
6653 HOST_WIDE_INT size
, rsize
, adjust
, align
;
6654 tree t
, u
, cond1
, cond2
;
6656 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
6658 type
= build_pointer_type (type
);
6660 mode
= TYPE_MODE (type
);
6662 f_stack
= TYPE_FIELDS (va_list_type_node
);
6663 f_grtop
= DECL_CHAIN (f_stack
);
6664 f_vrtop
= DECL_CHAIN (f_grtop
);
6665 f_groff
= DECL_CHAIN (f_vrtop
);
6666 f_vroff
= DECL_CHAIN (f_groff
);
6668 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
6669 f_stack
, NULL_TREE
);
6670 size
= int_size_in_bytes (type
);
6671 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
6675 if (aarch64_vfp_is_call_or_return_candidate (mode
,
6681 /* TYPE passed in fp/simd registers. */
6682 if (TARGET_GENERAL_REGS_ONLY
)
6683 sorry ("%qs and floating point or vector arguments",
6684 "-mgeneral-regs-only");
6686 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
6687 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
6688 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
6689 unshare_expr (valist
), f_vroff
, NULL_TREE
);
6691 rsize
= nregs
* UNITS_PER_VREG
;
6695 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
6696 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
6698 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6699 && size
< UNITS_PER_VREG
)
6701 adjust
= UNITS_PER_VREG
- size
;
6706 /* TYPE passed in general registers. */
6707 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
6708 unshare_expr (valist
), f_grtop
, NULL_TREE
);
6709 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
6710 unshare_expr (valist
), f_groff
, NULL_TREE
);
6711 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
6712 nregs
= rsize
/ UNITS_PER_WORD
;
6717 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6718 && size
< UNITS_PER_WORD
)
6720 adjust
= UNITS_PER_WORD
- size
;
6724 /* Get a local temporary for the field value. */
6725 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
6727 /* Emit code to branch if off >= 0. */
6728 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
6729 build_int_cst (TREE_TYPE (off
), 0));
6730 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
6734 /* Emit: offs = (offs + 15) & -16. */
6735 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6736 build_int_cst (TREE_TYPE (off
), 15));
6737 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
6738 build_int_cst (TREE_TYPE (off
), -16));
6739 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
6744 /* Update ap.__[g|v]r_offs */
6745 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6746 build_int_cst (TREE_TYPE (off
), rsize
));
6747 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
6751 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6753 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6754 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
6755 build_int_cst (TREE_TYPE (f_off
), 0));
6756 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
6758 /* String up: make sure the assignment happens before the use. */
6759 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
6760 COND_EXPR_ELSE (cond1
) = t
;
6762 /* Prepare the trees handling the argument that is passed on the stack;
6763 the top level node will store in ON_STACK. */
6764 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
6767 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6768 t
= fold_convert (intDI_type_node
, arg
);
6769 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6770 build_int_cst (TREE_TYPE (t
), 15));
6771 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6772 build_int_cst (TREE_TYPE (t
), -16));
6773 t
= fold_convert (TREE_TYPE (arg
), t
);
6774 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
6778 /* Advance ap.__stack */
6779 t
= fold_convert (intDI_type_node
, arg
);
6780 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6781 build_int_cst (TREE_TYPE (t
), size
+ 7));
6782 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6783 build_int_cst (TREE_TYPE (t
), -8));
6784 t
= fold_convert (TREE_TYPE (arg
), t
);
6785 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
6786 /* String up roundup and advance. */
6788 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6789 /* String up with arg */
6790 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
6791 /* Big-endianness related address adjustment. */
6792 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6793 && size
< UNITS_PER_WORD
)
6795 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
6796 size_int (UNITS_PER_WORD
- size
));
6797 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
6800 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
6801 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
6803 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6806 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
6807 build_int_cst (TREE_TYPE (off
), adjust
));
6809 t
= fold_convert (sizetype
, t
);
6810 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
6814 /* type ha; // treat as "struct {ftype field[n];}"
6815 ... [computing offs]
6816 for (i = 0; i <nregs; ++i, offs += 16)
6817 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6820 tree tmp_ha
, field_t
, field_ptr_t
;
6822 /* Declare a local variable. */
6823 tmp_ha
= create_tmp_var_raw (type
, "ha");
6824 gimple_add_tmp_var (tmp_ha
);
6826 /* Establish the base type. */
6830 field_t
= float_type_node
;
6831 field_ptr_t
= float_ptr_type_node
;
6834 field_t
= double_type_node
;
6835 field_ptr_t
= double_ptr_type_node
;
6838 field_t
= long_double_type_node
;
6839 field_ptr_t
= long_double_ptr_type_node
;
6841 /* The half precision and quad precision are not fully supported yet. Enable
6842 the following code after the support is complete. Need to find the correct
6843 type node for __fp16 *. */
6846 field_t
= float_type_node
;
6847 field_ptr_t
= float_ptr_type_node
;
6853 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
6854 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
6855 field_ptr_t
= build_pointer_type (field_t
);
6862 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6863 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
6865 t
= fold_convert (field_ptr_t
, addr
);
6866 t
= build2 (MODIFY_EXPR
, field_t
,
6867 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
6868 build1 (INDIRECT_REF
, field_t
, t
));
6870 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6871 for (i
= 1; i
< nregs
; ++i
)
6873 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
6874 u
= fold_convert (field_ptr_t
, addr
);
6875 u
= build2 (MODIFY_EXPR
, field_t
,
6876 build2 (MEM_REF
, field_t
, tmp_ha
,
6877 build_int_cst (field_ptr_t
,
6879 int_size_in_bytes (field_t
)))),
6880 build1 (INDIRECT_REF
, field_t
, u
));
6881 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
6884 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
6885 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
6888 COND_EXPR_ELSE (cond2
) = t
;
6889 addr
= fold_convert (build_pointer_type (type
), cond1
);
6890 addr
= build_va_arg_indirect_ref (addr
);
6893 addr
= build_va_arg_indirect_ref (addr
);
6898 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6901 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
6902 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
6905 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6906 CUMULATIVE_ARGS local_cum
;
6907 int gr_saved
, vr_saved
;
6909 /* The caller has advanced CUM up to, but not beyond, the last named
6910 argument. Advance a local copy of CUM past the last "real" named
6911 argument, to find out how many registers are left over. */
6913 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
6915 /* Found out how many registers we need to save. */
6916 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
6917 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
6919 if (TARGET_GENERAL_REGS_ONLY
)
6921 if (local_cum
.aapcs_nvrn
> 0)
6922 sorry ("%qs and floating point or vector arguments",
6923 "-mgeneral-regs-only");
6933 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6934 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
6935 - gr_saved
* UNITS_PER_WORD
);
6936 mem
= gen_frame_mem (BLKmode
, ptr
);
6937 set_mem_alias_set (mem
, get_varargs_alias_set ());
6939 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
6944 /* We can't use move_block_from_reg, because it will use
6945 the wrong mode, storing D regs only. */
6946 enum machine_mode mode
= TImode
;
6949 /* Set OFF to the offset from virtual_incoming_args_rtx of
6950 the first vector register. The VR save area lies below
6951 the GR one, and is aligned to 16 bytes. */
6952 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
6953 STACK_BOUNDARY
/ BITS_PER_UNIT
);
6954 off
-= vr_saved
* UNITS_PER_VREG
;
6956 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
6960 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
6961 mem
= gen_frame_mem (mode
, ptr
);
6962 set_mem_alias_set (mem
, get_varargs_alias_set ());
6963 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
6964 off
+= UNITS_PER_VREG
;
6969 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6970 any complication of having crtl->args.pretend_args_size changed. */
6971 cfun
->machine
->saved_varargs_size
6972 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
6973 STACK_BOUNDARY
/ BITS_PER_UNIT
)
6974 + vr_saved
* UNITS_PER_VREG
);
6978 aarch64_conditional_register_usage (void)
6983 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
6986 call_used_regs
[i
] = 1;
6991 /* Walk down the type tree of TYPE counting consecutive base elements.
6992 If *MODEP is VOIDmode, then set it to the first valid floating point
6993 type. If a non-floating point type is found, or if a floating point
6994 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6995 otherwise return the count in the sub-tree. */
6997 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
6999 enum machine_mode mode
;
7002 switch (TREE_CODE (type
))
7005 mode
= TYPE_MODE (type
);
7006 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7009 if (*modep
== VOIDmode
)
7018 mode
= TYPE_MODE (TREE_TYPE (type
));
7019 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7022 if (*modep
== VOIDmode
)
7031 /* Use V2SImode and V4SImode as representatives of all 64-bit
7032 and 128-bit vector types. */
7033 size
= int_size_in_bytes (type
);
7046 if (*modep
== VOIDmode
)
7049 /* Vector modes are considered to be opaque: two vectors are
7050 equivalent for the purposes of being homogeneous aggregates
7051 if they are the same size. */
7060 tree index
= TYPE_DOMAIN (type
);
7062 /* Can't handle incomplete types. */
7063 if (!COMPLETE_TYPE_P (type
))
7066 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
7069 || !TYPE_MAX_VALUE (index
)
7070 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
7071 || !TYPE_MIN_VALUE (index
)
7072 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
7076 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
7077 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
7079 /* There must be no padding. */
7080 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
7081 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
7082 != count
* GET_MODE_BITSIZE (*modep
)))
7094 /* Can't handle incomplete types. */
7095 if (!COMPLETE_TYPE_P (type
))
7098 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7100 if (TREE_CODE (field
) != FIELD_DECL
)
7103 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7109 /* There must be no padding. */
7110 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
7111 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
7112 != count
* GET_MODE_BITSIZE (*modep
)))
7119 case QUAL_UNION_TYPE
:
7121 /* These aren't very interesting except in a degenerate case. */
7126 /* Can't handle incomplete types. */
7127 if (!COMPLETE_TYPE_P (type
))
7130 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7132 if (TREE_CODE (field
) != FIELD_DECL
)
7135 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7138 count
= count
> sub_count
? count
: sub_count
;
7141 /* There must be no padding. */
7142 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
7143 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
7144 != count
* GET_MODE_BITSIZE (*modep
)))
7157 /* Return true if we use LRA instead of reload pass. */
7159 aarch64_lra_p (void)
7161 return aarch64_lra_flag
;
7164 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7165 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7166 array types. The C99 floating-point complex types are also considered
7167 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7168 types, which are GCC extensions and out of the scope of AAPCS64, are
7169 treated as composite types here as well.
7171 Note that MODE itself is not sufficient in determining whether a type
7172 is such a composite type or not. This is because
7173 stor-layout.c:compute_record_mode may have already changed the MODE
7174 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7175 structure with only one field may have its MODE set to the mode of the
7176 field. Also an integer mode whose size matches the size of the
7177 RECORD_TYPE type may be used to substitute the original mode
7178 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7179 solely relied on. */
7182 aarch64_composite_type_p (const_tree type
,
7183 enum machine_mode mode
)
7185 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
7189 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
7190 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
7196 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7197 type as described in AAPCS64 \S 4.1.2.
7199 See the comment above aarch64_composite_type_p for the notes on MODE. */
7202 aarch64_short_vector_p (const_tree type
,
7203 enum machine_mode mode
)
7205 HOST_WIDE_INT size
= -1;
7207 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7208 size
= int_size_in_bytes (type
);
7209 else if (!aarch64_composite_type_p (type
, mode
)
7210 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
7211 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
7212 size
= GET_MODE_SIZE (mode
);
7214 return (size
== 8 || size
== 16) ? true : false;
7217 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7218 shall be passed or returned in simd/fp register(s) (providing these
7219 parameter passing registers are available).
7221 Upon successful return, *COUNT returns the number of needed registers,
7222 *BASE_MODE returns the mode of the individual register and when IS_HAF
7223 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7224 floating-point aggregate or a homogeneous short-vector aggregate. */
7227 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
7229 enum machine_mode
*base_mode
,
7233 enum machine_mode new_mode
= VOIDmode
;
7234 bool composite_p
= aarch64_composite_type_p (type
, mode
);
7236 if (is_ha
!= NULL
) *is_ha
= false;
7238 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7239 || aarch64_short_vector_p (type
, mode
))
7244 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
7246 if (is_ha
!= NULL
) *is_ha
= true;
7248 new_mode
= GET_MODE_INNER (mode
);
7250 else if (type
&& composite_p
)
7252 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
7254 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
7256 if (is_ha
!= NULL
) *is_ha
= true;
7265 *base_mode
= new_mode
;
7269 /* Implement TARGET_STRUCT_VALUE_RTX. */
7272 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
7273 int incoming ATTRIBUTE_UNUSED
)
7275 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
7278 /* Implements target hook vector_mode_supported_p. */
7280 aarch64_vector_mode_supported_p (enum machine_mode mode
)
7283 && (mode
== V4SImode
|| mode
== V8HImode
7284 || mode
== V16QImode
|| mode
== V2DImode
7285 || mode
== V2SImode
|| mode
== V4HImode
7286 || mode
== V8QImode
|| mode
== V2SFmode
7287 || mode
== V4SFmode
|| mode
== V2DFmode
7288 || mode
== V1DFmode
))
7294 /* Return appropriate SIMD container
7295 for MODE within a vector of WIDTH bits. */
7296 static enum machine_mode
7297 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
7299 gcc_assert (width
== 64 || width
== 128);
7338 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7339 static enum machine_mode
7340 aarch64_preferred_simd_mode (enum machine_mode mode
)
7342 return aarch64_simd_container_mode (mode
, 128);
7345 /* Return the bitmask of possible vector sizes for the vectorizer
7348 aarch64_autovectorize_vector_sizes (void)
7353 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7354 vector types in order to conform to the AAPCS64 (see "Procedure
7355 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7356 qualify for emission with the mangled names defined in that document,
7357 a vector type must not only be of the correct mode but also be
7358 composed of AdvSIMD vector element types (e.g.
7359 _builtin_aarch64_simd_qi); these types are registered by
7360 aarch64_init_simd_builtins (). In other words, vector types defined
7361 in other ways e.g. via vector_size attribute will get default
7365 enum machine_mode mode
;
7366 const char *element_type_name
;
7367 const char *mangled_name
;
7368 } aarch64_simd_mangle_map_entry
;
7370 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
7371 /* 64-bit containerized types. */
7372 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7373 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7374 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7375 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7376 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7377 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7378 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7379 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7380 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7381 /* 128-bit containerized types. */
7382 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7383 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7384 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7385 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7386 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7387 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7388 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7389 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7390 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7391 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7392 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7393 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7394 { V2DImode
, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7395 { VOIDmode
, NULL
, NULL
}
7398 /* Implement TARGET_MANGLE_TYPE. */
7401 aarch64_mangle_type (const_tree type
)
7403 /* The AArch64 ABI documents say that "__va_list" has to be
7404 managled as if it is in the "std" namespace. */
7405 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
7406 return "St9__va_list";
7408 /* Check the mode of the vector type, and the name of the vector
7409 element type, against the table. */
7410 if (TREE_CODE (type
) == VECTOR_TYPE
)
7412 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
7414 while (pos
->mode
!= VOIDmode
)
7416 tree elt_type
= TREE_TYPE (type
);
7418 if (pos
->mode
== TYPE_MODE (type
)
7419 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
7420 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
7421 pos
->element_type_name
))
7422 return pos
->mangled_name
;
7428 /* Use the default mangling. */
7432 /* Return the equivalent letter for size. */
7434 sizetochar (int size
)
7438 case 64: return 'd';
7439 case 32: return 's';
7440 case 16: return 'h';
7441 case 8 : return 'b';
7442 default: gcc_unreachable ();
7446 /* Return true iff x is a uniform vector of floating-point
7447 constants, and the constant can be represented in
7448 quarter-precision form. Note, as aarch64_float_const_representable
7449 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7451 aarch64_vect_float_const_representable_p (rtx x
)
7454 REAL_VALUE_TYPE r0
, ri
;
7457 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
7460 x0
= CONST_VECTOR_ELT (x
, 0);
7461 if (!CONST_DOUBLE_P (x0
))
7464 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
7466 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
7468 xi
= CONST_VECTOR_ELT (x
, i
);
7469 if (!CONST_DOUBLE_P (xi
))
7472 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
7473 if (!REAL_VALUES_EQUAL (r0
, ri
))
7477 return aarch64_float_const_representable_p (x0
);
7480 /* Return true for valid and false for invalid. */
7482 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
7483 struct simd_immediate_info
*info
)
7485 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7487 for (i = 0; i < idx; i += (STRIDE)) \
7492 immtype = (CLASS); \
7493 elsize = (ELSIZE); \
7499 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
7500 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
7501 unsigned char bytes
[16];
7502 int immtype
= -1, matches
;
7503 unsigned int invmask
= inverse
? 0xff : 0;
7506 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
7508 if (! (aarch64_simd_imm_zero_p (op
, mode
)
7509 || aarch64_vect_float_const_representable_p (op
)))
7514 info
->value
= CONST_VECTOR_ELT (op
, 0);
7515 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
7523 /* Splat vector constant out into a byte vector. */
7524 for (i
= 0; i
< n_elts
; i
++)
7526 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7527 it must be laid out in the vector register in reverse order. */
7528 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
7529 unsigned HOST_WIDE_INT elpart
;
7530 unsigned int part
, parts
;
7532 if (GET_CODE (el
) == CONST_INT
)
7534 elpart
= INTVAL (el
);
7537 else if (GET_CODE (el
) == CONST_DOUBLE
)
7539 elpart
= CONST_DOUBLE_LOW (el
);
7545 for (part
= 0; part
< parts
; part
++)
7548 for (byte
= 0; byte
< innersize
; byte
++)
7550 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
7551 elpart
>>= BITS_PER_UNIT
;
7553 if (GET_CODE (el
) == CONST_DOUBLE
)
7554 elpart
= CONST_DOUBLE_HIGH (el
);
7559 gcc_assert (idx
== GET_MODE_SIZE (mode
));
7563 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
7564 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
7566 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7567 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7569 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7570 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7572 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7573 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
7575 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
7577 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
7579 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
7580 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
7582 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7583 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7585 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7586 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7588 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7589 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
7591 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
7593 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
7595 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7596 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7598 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7599 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7601 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7602 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7604 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7605 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7607 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
7609 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
7610 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
7619 info
->element_width
= elsize
;
7620 info
->mvn
= emvn
!= 0;
7621 info
->shift
= eshift
;
7623 unsigned HOST_WIDE_INT imm
= 0;
7625 if (immtype
>= 12 && immtype
<= 15)
7628 /* Un-invert bytes of recognized vector, if necessary. */
7630 for (i
= 0; i
< idx
; i
++)
7631 bytes
[i
] ^= invmask
;
7635 /* FIXME: Broken on 32-bit H_W_I hosts. */
7636 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
7638 for (i
= 0; i
< 8; i
++)
7639 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
7640 << (i
* BITS_PER_UNIT
);
7643 info
->value
= GEN_INT (imm
);
7647 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
7648 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
7650 /* Construct 'abcdefgh' because the assembler cannot handle
7651 generic constants. */
7654 imm
= (imm
>> info
->shift
) & 0xff;
7655 info
->value
= GEN_INT (imm
);
7664 aarch64_const_vec_all_same_int_p (rtx x
,
7665 HOST_WIDE_INT minval
,
7666 HOST_WIDE_INT maxval
)
7668 HOST_WIDE_INT firstval
;
7671 if (GET_CODE (x
) != CONST_VECTOR
7672 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
7675 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
7676 if (firstval
< minval
|| firstval
> maxval
)
7679 count
= CONST_VECTOR_NUNITS (x
);
7680 for (i
= 1; i
< count
; i
++)
7681 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
7687 /* Check of immediate shift constants are within range. */
7689 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
7691 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
7693 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
7695 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
7698 /* Return true if X is a uniform vector where all elements
7699 are either the floating-point constant 0.0 or the
7700 integer constant 0. */
7702 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
7704 return x
== CONST0_RTX (mode
);
7708 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7710 HOST_WIDE_INT imm
= INTVAL (x
);
7713 for (i
= 0; i
< 8; i
++)
7715 unsigned int byte
= imm
& 0xff;
7716 if (byte
!= 0xff && byte
!= 0)
7725 aarch64_mov_operand_p (rtx x
,
7726 enum aarch64_symbol_context context
,
7727 enum machine_mode mode
)
7729 if (GET_CODE (x
) == HIGH
7730 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7733 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
7736 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
7739 return aarch64_classify_symbolic_expression (x
, context
)
7740 == SYMBOL_TINY_ABSOLUTE
;
7743 /* Return a const_int vector of VAL. */
7745 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
7747 int nunits
= GET_MODE_NUNITS (mode
);
7748 rtvec v
= rtvec_alloc (nunits
);
7751 for (i
=0; i
< nunits
; i
++)
7752 RTVEC_ELT (v
, i
) = GEN_INT (val
);
7754 return gen_rtx_CONST_VECTOR (mode
, v
);
7757 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7760 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
7762 enum machine_mode vmode
;
7764 gcc_assert (!VECTOR_MODE_P (mode
));
7765 vmode
= aarch64_preferred_simd_mode (mode
);
7766 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
7767 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
7770 /* Construct and return a PARALLEL RTX vector. */
7772 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
7774 int nunits
= GET_MODE_NUNITS (mode
);
7775 rtvec v
= rtvec_alloc (nunits
/ 2);
7776 int base
= high
? nunits
/ 2 : 0;
7780 for (i
=0; i
< nunits
/ 2; i
++)
7781 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
7783 t1
= gen_rtx_PARALLEL (mode
, v
);
7787 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7788 HIGH (exclusive). */
7790 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
7793 gcc_assert (GET_CODE (operand
) == CONST_INT
);
7794 lane
= INTVAL (operand
);
7796 if (lane
< low
|| lane
>= high
)
7797 error ("lane out of range");
7801 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
7803 gcc_assert (GET_CODE (operand
) == CONST_INT
);
7804 HOST_WIDE_INT lane
= INTVAL (operand
);
7806 if (lane
< low
|| lane
>= high
)
7807 error ("constant out of range");
7810 /* Emit code to reinterpret one AdvSIMD type as another,
7811 without altering bits. */
7813 aarch64_simd_reinterpret (rtx dest
, rtx src
)
7815 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
7818 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7821 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
7822 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
7825 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
7826 rtx tmp1
= gen_reg_rtx (mode
);
7827 rtx tmp2
= gen_reg_rtx (mode
);
7829 emit_insn (intfn (tmp1
, op1
, tmp2
));
7831 emit_move_insn (mem
, tmp1
);
7832 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
7833 emit_move_insn (mem
, tmp2
);
7836 /* Return TRUE if OP is a valid vector addressing mode. */
7838 aarch64_simd_mem_operand_p (rtx op
)
7840 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
7841 || GET_CODE (XEXP (op
, 0)) == REG
);
7844 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7845 not to early-clobber SRC registers in the process.
7847 We assume that the operands described by SRC and DEST represent a
7848 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7849 number of components into which the copy has been decomposed. */
7851 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
7852 rtx
*src
, unsigned int count
)
7856 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
7857 || REGNO (operands
[0]) < REGNO (operands
[1]))
7859 for (i
= 0; i
< count
; i
++)
7861 operands
[2 * i
] = dest
[i
];
7862 operands
[2 * i
+ 1] = src
[i
];
7867 for (i
= 0; i
< count
; i
++)
7869 operands
[2 * i
] = dest
[count
- i
- 1];
7870 operands
[2 * i
+ 1] = src
[count
- i
- 1];
7875 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7876 one of VSTRUCT modes: OI, CI or XI. */
7878 aarch64_simd_attr_length_move (rtx insn
)
7880 enum machine_mode mode
;
7882 extract_insn_cached (insn
);
7884 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
7886 mode
= GET_MODE (recog_data
.operand
[0]);
7902 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7903 alignment of a vector to 128 bits. */
7904 static HOST_WIDE_INT
7905 aarch64_simd_vector_alignment (const_tree type
)
7907 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
7908 return MIN (align
, 128);
7911 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7913 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
7918 /* We guarantee alignment for vectors up to 128-bits. */
7919 if (tree_int_cst_compare (TYPE_SIZE (type
),
7920 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
7923 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7927 /* If VALS is a vector constant that can be loaded into a register
7928 using DUP, generate instructions to do so and return an RTX to
7929 assign to the register. Otherwise return NULL_RTX. */
7931 aarch64_simd_dup_constant (rtx vals
)
7933 enum machine_mode mode
= GET_MODE (vals
);
7934 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
7935 int n_elts
= GET_MODE_NUNITS (mode
);
7936 bool all_same
= true;
7940 if (GET_CODE (vals
) != CONST_VECTOR
)
7943 for (i
= 1; i
< n_elts
; ++i
)
7945 x
= CONST_VECTOR_ELT (vals
, i
);
7946 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
7953 /* We can load this constant by using DUP and a constant in a
7954 single ARM register. This will be cheaper than a vector
7956 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
7957 return gen_rtx_VEC_DUPLICATE (mode
, x
);
7961 /* Generate code to load VALS, which is a PARALLEL containing only
7962 constants (for vec_init) or CONST_VECTOR, efficiently into a
7963 register. Returns an RTX to copy into the register, or NULL_RTX
7964 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7966 aarch64_simd_make_constant (rtx vals
)
7968 enum machine_mode mode
= GET_MODE (vals
);
7970 rtx const_vec
= NULL_RTX
;
7971 int n_elts
= GET_MODE_NUNITS (mode
);
7975 if (GET_CODE (vals
) == CONST_VECTOR
)
7977 else if (GET_CODE (vals
) == PARALLEL
)
7979 /* A CONST_VECTOR must contain only CONST_INTs and
7980 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7981 Only store valid constants in a CONST_VECTOR. */
7982 for (i
= 0; i
< n_elts
; ++i
)
7984 rtx x
= XVECEXP (vals
, 0, i
);
7985 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
7988 if (n_const
== n_elts
)
7989 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
7994 if (const_vec
!= NULL_RTX
7995 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
7996 /* Load using MOVI/MVNI. */
7998 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
7999 /* Loaded using DUP. */
8001 else if (const_vec
!= NULL_RTX
)
8002 /* Load from constant pool. We can not take advantage of single-cycle
8003 LD1 because we need a PC-relative addressing mode. */
8006 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8007 We can not construct an initializer. */
8012 aarch64_expand_vector_init (rtx target
, rtx vals
)
8014 enum machine_mode mode
= GET_MODE (target
);
8015 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
8016 int n_elts
= GET_MODE_NUNITS (mode
);
8017 int n_var
= 0, one_var
= -1;
8018 bool all_same
= true;
8022 x
= XVECEXP (vals
, 0, 0);
8023 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8024 n_var
= 1, one_var
= 0;
8026 for (i
= 1; i
< n_elts
; ++i
)
8028 x
= XVECEXP (vals
, 0, i
);
8029 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8030 ++n_var
, one_var
= i
;
8032 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8038 rtx constant
= aarch64_simd_make_constant (vals
);
8039 if (constant
!= NULL_RTX
)
8041 emit_move_insn (target
, constant
);
8046 /* Splat a single non-constant element if we can. */
8049 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8050 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
8054 /* One field is non-constant. Load constant then overwrite varying
8055 field. This is more efficient than using the stack. */
8058 rtx copy
= copy_rtx (vals
);
8059 rtx index
= GEN_INT (one_var
);
8060 enum insn_code icode
;
8062 /* Load constant part of vector, substitute neighboring value for
8064 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
8065 aarch64_expand_vector_init (target
, copy
);
8067 /* Insert variable. */
8068 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8069 icode
= optab_handler (vec_set_optab
, mode
);
8070 gcc_assert (icode
!= CODE_FOR_nothing
);
8071 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
8075 /* Construct the vector in memory one field at a time
8076 and load the whole vector. */
8077 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
8078 for (i
= 0; i
< n_elts
; i
++)
8079 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8080 i
* GET_MODE_SIZE (inner_mode
)),
8081 XVECEXP (vals
, 0, i
));
8082 emit_move_insn (target
, mem
);
8086 static unsigned HOST_WIDE_INT
8087 aarch64_shift_truncation_mask (enum machine_mode mode
)
8090 (aarch64_vector_mode_supported_p (mode
)
8091 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
8094 #ifndef TLS_SECTION_ASM_FLAG
8095 #define TLS_SECTION_ASM_FLAG 'T'
8099 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
8100 tree decl ATTRIBUTE_UNUSED
)
8102 char flagchars
[10], *f
= flagchars
;
8104 /* If we have already declared this section, we can use an
8105 abbreviated form to switch back to it -- unless this section is
8106 part of a COMDAT groups, in which case GAS requires the full
8107 declaration every time. */
8108 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8109 && (flags
& SECTION_DECLARED
))
8111 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
8115 if (!(flags
& SECTION_DEBUG
))
8117 if (flags
& SECTION_WRITE
)
8119 if (flags
& SECTION_CODE
)
8121 if (flags
& SECTION_SMALL
)
8123 if (flags
& SECTION_MERGE
)
8125 if (flags
& SECTION_STRINGS
)
8127 if (flags
& SECTION_TLS
)
8128 *f
++ = TLS_SECTION_ASM_FLAG
;
8129 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8133 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
8135 if (!(flags
& SECTION_NOTYPE
))
8140 if (flags
& SECTION_BSS
)
8145 #ifdef TYPE_OPERAND_FMT
8146 format
= "," TYPE_OPERAND_FMT
;
8151 fprintf (asm_out_file
, format
, type
);
8153 if (flags
& SECTION_ENTSIZE
)
8154 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
8155 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8157 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
8158 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
8160 fprintf (asm_out_file
, ",%s,comdat",
8161 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
8165 putc ('\n', asm_out_file
);
8168 /* Select a format to encode pointers in exception handling data. */
8170 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
8173 switch (aarch64_cmodel
)
8175 case AARCH64_CMODEL_TINY
:
8176 case AARCH64_CMODEL_TINY_PIC
:
8177 case AARCH64_CMODEL_SMALL
:
8178 case AARCH64_CMODEL_SMALL_PIC
:
8179 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8181 type
= DW_EH_PE_sdata4
;
8184 /* No assumptions here. 8-byte relocs required. */
8185 type
= DW_EH_PE_sdata8
;
8188 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
8191 /* Emit load exclusive. */
8194 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
8195 rtx mem
, rtx model_rtx
)
8197 rtx (*gen
) (rtx
, rtx
, rtx
);
8201 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
8202 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
8203 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
8204 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
8209 emit_insn (gen (rval
, mem
, model_rtx
));
8212 /* Emit store exclusive. */
8215 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
8216 rtx rval
, rtx mem
, rtx model_rtx
)
8218 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
8222 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
8223 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
8224 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
8225 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
8230 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
8233 /* Mark the previous jump instruction as unlikely. */
8236 aarch64_emit_unlikely_jump (rtx insn
)
8238 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
8240 insn
= emit_jump_insn (insn
);
8241 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
8244 /* Expand a compare and swap pattern. */
8247 aarch64_expand_compare_and_swap (rtx operands
[])
8249 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
8250 enum machine_mode mode
, cmp_mode
;
8251 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
8256 oldval
= operands
[3];
8257 newval
= operands
[4];
8258 is_weak
= operands
[5];
8259 mod_s
= operands
[6];
8260 mod_f
= operands
[7];
8261 mode
= GET_MODE (mem
);
8264 /* Normally the succ memory model must be stronger than fail, but in the
8265 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8266 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8268 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
8269 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
8270 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
8276 /* For short modes, we're going to perform the comparison in SImode,
8277 so do the zero-extension now. */
8279 rval
= gen_reg_rtx (SImode
);
8280 oldval
= convert_modes (SImode
, mode
, oldval
, true);
8285 /* Force the value into a register if needed. */
8286 if (!aarch64_plus_operand (oldval
, mode
))
8287 oldval
= force_reg (cmp_mode
, oldval
);
8296 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
8297 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
8298 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
8299 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
8304 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
8306 if (mode
== QImode
|| mode
== HImode
)
8307 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
8309 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
8310 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
8311 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
8314 /* Split a compare and swap pattern. */
8317 aarch64_split_compare_and_swap (rtx operands
[])
8319 rtx rval
, mem
, oldval
, newval
, scratch
;
8320 enum machine_mode mode
;
8322 rtx label1
, label2
, x
, cond
;
8326 oldval
= operands
[2];
8327 newval
= operands
[3];
8328 is_weak
= (operands
[4] != const0_rtx
);
8329 scratch
= operands
[7];
8330 mode
= GET_MODE (mem
);
8335 label1
= gen_label_rtx ();
8336 emit_label (label1
);
8338 label2
= gen_label_rtx ();
8340 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
8342 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
8343 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
8344 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8345 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
8346 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8348 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
8352 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
8353 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8354 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
8355 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8359 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
8360 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
8361 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
8364 emit_label (label2
);
8367 /* Split an atomic operation. */
8370 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
8371 rtx value
, rtx model_rtx
, rtx cond
)
8373 enum machine_mode mode
= GET_MODE (mem
);
8374 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
8377 label
= gen_label_rtx ();
8381 new_out
= gen_lowpart (wmode
, new_out
);
8383 old_out
= gen_lowpart (wmode
, old_out
);
8386 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
8388 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
8397 x
= gen_rtx_AND (wmode
, old_out
, value
);
8398 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8399 x
= gen_rtx_NOT (wmode
, new_out
);
8400 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8404 if (CONST_INT_P (value
))
8406 value
= GEN_INT (-INTVAL (value
));
8412 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
8413 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
8417 aarch64_emit_store_exclusive (mode
, cond
, mem
,
8418 gen_lowpart (mode
, new_out
), model_rtx
);
8420 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
8421 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
8422 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
8423 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
8427 aarch64_print_extension (void)
8429 const struct aarch64_option_extension
*opt
= NULL
;
8431 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
8432 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
8433 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
8435 asm_fprintf (asm_out_file
, "\n");
8439 aarch64_start_file (void)
8443 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
8444 aarch64_print_extension ();
8446 else if (selected_cpu
)
8448 const char *truncated_name
8449 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
8450 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
8451 aarch64_print_extension ();
8453 default_file_start();
8456 /* Target hook for c_mode_for_suffix. */
8457 static enum machine_mode
8458 aarch64_c_mode_for_suffix (char suffix
)
8466 /* We can only represent floating point constants which will fit in
8467 "quarter-precision" values. These values are characterised by
8468 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8471 (-1)^s * (n/16) * 2^r
8474 's' is the sign bit.
8475 'n' is an integer in the range 16 <= n <= 31.
8476 'r' is an integer in the range -3 <= r <= 4. */
8478 /* Return true iff X can be represented by a quarter-precision
8479 floating point immediate operand X. Note, we cannot represent 0.0. */
8481 aarch64_float_const_representable_p (rtx x
)
8483 /* This represents our current view of how many bits
8484 make up the mantissa. */
8485 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
8487 unsigned HOST_WIDE_INT mantissa
, mask
;
8488 HOST_WIDE_INT m1
, m2
;
8489 REAL_VALUE_TYPE r
, m
;
8491 if (!CONST_DOUBLE_P (x
))
8494 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
8496 /* We cannot represent infinities, NaNs or +/-zero. We won't
8497 know if we have +zero until we analyse the mantissa, but we
8498 can reject the other invalid values. */
8499 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
8500 || REAL_VALUE_MINUS_ZERO (r
))
8503 /* Extract exponent. */
8504 r
= real_value_abs (&r
);
8505 exponent
= REAL_EXP (&r
);
8507 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8508 highest (sign) bit, with a fixed binary point at bit point_pos.
8509 m1 holds the low part of the mantissa, m2 the high part.
8510 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8511 bits for the mantissa, this can fail (low bits will be lost). */
8512 real_ldexp (&m
, &r
, point_pos
- exponent
);
8513 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
8515 /* If the low part of the mantissa has bits set we cannot represent
8519 /* We have rejected the lower HOST_WIDE_INT, so update our
8520 understanding of how many bits lie in the mantissa and
8521 look only at the high HOST_WIDE_INT. */
8523 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8525 /* We can only represent values with a mantissa of the form 1.xxxx. */
8526 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8527 if ((mantissa
& mask
) != 0)
8530 /* Having filtered unrepresentable values, we may now remove all
8531 but the highest 5 bits. */
8532 mantissa
>>= point_pos
- 5;
8534 /* We cannot represent the value 0.0, so reject it. This is handled
8539 /* Then, as bit 4 is always set, we can mask it off, leaving
8540 the mantissa in the range [0, 15]. */
8541 mantissa
&= ~(1 << 4);
8542 gcc_assert (mantissa
<= 15);
8544 /* GCC internally does not use IEEE754-like encoding (where normalized
8545 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8546 Our mantissa values are shifted 4 places to the left relative to
8547 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8548 by 5 places to correct for GCC's representation. */
8549 exponent
= 5 - exponent
;
8551 return (exponent
>= 0 && exponent
<= 7);
8555 aarch64_output_simd_mov_immediate (rtx const_vector
,
8556 enum machine_mode mode
,
8560 static char templ
[40];
8561 const char *mnemonic
;
8562 const char *shift_op
;
8563 unsigned int lane_count
= 0;
8566 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
8568 /* This will return true to show const_vector is legal for use as either
8569 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8570 also update INFO to show how the immediate should be generated. */
8571 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
8572 gcc_assert (is_valid
);
8574 element_char
= sizetochar (info
.element_width
);
8575 lane_count
= width
/ info
.element_width
;
8577 mode
= GET_MODE_INNER (mode
);
8578 if (mode
== SFmode
|| mode
== DFmode
)
8580 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
8581 if (aarch64_float_const_zero_rtx_p (info
.value
))
8582 info
.value
= GEN_INT (0);
8587 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
8588 char float_buf
[buf_size
] = {'\0'};
8589 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
8592 if (lane_count
== 1)
8593 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
8595 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
8596 lane_count
, element_char
, float_buf
);
8601 mnemonic
= info
.mvn
? "mvni" : "movi";
8602 shift_op
= info
.msl
? "msl" : "lsl";
8604 if (lane_count
== 1)
8605 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
8606 mnemonic
, UINTVAL (info
.value
));
8607 else if (info
.shift
)
8608 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8609 ", %s %d", mnemonic
, lane_count
, element_char
,
8610 UINTVAL (info
.value
), shift_op
, info
.shift
);
8612 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
8613 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
8618 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
8619 enum machine_mode mode
)
8621 enum machine_mode vmode
;
8623 gcc_assert (!VECTOR_MODE_P (mode
));
8624 vmode
= aarch64_simd_container_mode (mode
, 64);
8625 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
8626 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
8629 /* Split operands into moves from op[1] + op[2] into op[0]. */
8632 aarch64_split_combinev16qi (rtx operands
[3])
8634 unsigned int dest
= REGNO (operands
[0]);
8635 unsigned int src1
= REGNO (operands
[1]);
8636 unsigned int src2
= REGNO (operands
[2]);
8637 enum machine_mode halfmode
= GET_MODE (operands
[1]);
8638 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
8641 gcc_assert (halfmode
== V16QImode
);
8643 if (src1
== dest
&& src2
== dest
+ halfregs
)
8645 /* No-op move. Can't split to nothing; emit something. */
8646 emit_note (NOTE_INSN_DELETED
);
8650 /* Preserve register attributes for variable tracking. */
8651 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
8652 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
8653 GET_MODE_SIZE (halfmode
));
8655 /* Special case of reversed high/low parts. */
8656 if (reg_overlap_mentioned_p (operands
[2], destlo
)
8657 && reg_overlap_mentioned_p (operands
[1], desthi
))
8659 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8660 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
8661 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8663 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
8665 /* Try to avoid unnecessary moves if part of the result
8666 is in the right place already. */
8668 emit_move_insn (destlo
, operands
[1]);
8669 if (src2
!= dest
+ halfregs
)
8670 emit_move_insn (desthi
, operands
[2]);
8674 if (src2
!= dest
+ halfregs
)
8675 emit_move_insn (desthi
, operands
[2]);
8677 emit_move_insn (destlo
, operands
[1]);
8681 /* vec_perm support. */
8683 #define MAX_VECT_LEN 16
8685 struct expand_vec_perm_d
8687 rtx target
, op0
, op1
;
8688 unsigned char perm
[MAX_VECT_LEN
];
8689 enum machine_mode vmode
;
8695 /* Generate a variable permutation. */
8698 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8700 enum machine_mode vmode
= GET_MODE (target
);
8701 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8703 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
8704 gcc_checking_assert (GET_MODE (op0
) == vmode
);
8705 gcc_checking_assert (GET_MODE (op1
) == vmode
);
8706 gcc_checking_assert (GET_MODE (sel
) == vmode
);
8707 gcc_checking_assert (TARGET_SIMD
);
8711 if (vmode
== V8QImode
)
8713 /* Expand the argument to a V16QI mode by duplicating it. */
8714 rtx pair
= gen_reg_rtx (V16QImode
);
8715 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
8716 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8720 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
8727 if (vmode
== V8QImode
)
8729 pair
= gen_reg_rtx (V16QImode
);
8730 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
8731 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8735 pair
= gen_reg_rtx (OImode
);
8736 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
8737 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
8743 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8745 enum machine_mode vmode
= GET_MODE (target
);
8746 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
8747 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8748 rtx rmask
[MAX_VECT_LEN
], mask
;
8750 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
8752 /* The TBL instruction does not use a modulo index, so we must take care
8753 of that ourselves. */
8754 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8755 for (i
= 0; i
< nelt
; ++i
)
8757 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
8758 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
8760 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
8763 /* Recognize patterns suitable for the TRN instructions. */
8765 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
8767 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
8768 rtx out
, in0
, in1
, x
;
8769 rtx (*gen
) (rtx
, rtx
, rtx
);
8770 enum machine_mode vmode
= d
->vmode
;
8772 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8775 /* Note that these are little-endian tests.
8776 We correct for big-endian later. */
8777 if (d
->perm
[0] == 0)
8779 else if (d
->perm
[0] == 1)
8783 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8785 for (i
= 0; i
< nelt
; i
+= 2)
8787 if (d
->perm
[i
] != i
+ odd
)
8789 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
8799 if (BYTES_BIG_ENDIAN
)
8801 x
= in0
, in0
= in1
, in1
= x
;
8810 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
8811 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
8812 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
8813 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
8814 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
8815 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
8816 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
8817 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
8818 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
8819 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
8828 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
8829 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
8830 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
8831 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
8832 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
8833 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
8834 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
8835 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
8836 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
8837 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
8843 emit_insn (gen (out
, in0
, in1
));
8847 /* Recognize patterns suitable for the UZP instructions. */
8849 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
8851 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
8852 rtx out
, in0
, in1
, x
;
8853 rtx (*gen
) (rtx
, rtx
, rtx
);
8854 enum machine_mode vmode
= d
->vmode
;
8856 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8859 /* Note that these are little-endian tests.
8860 We correct for big-endian later. */
8861 if (d
->perm
[0] == 0)
8863 else if (d
->perm
[0] == 1)
8867 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8869 for (i
= 0; i
< nelt
; i
++)
8871 unsigned elt
= (i
* 2 + odd
) & mask
;
8872 if (d
->perm
[i
] != elt
)
8882 if (BYTES_BIG_ENDIAN
)
8884 x
= in0
, in0
= in1
, in1
= x
;
8893 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
8894 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
8895 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
8896 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
8897 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
8898 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
8899 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
8900 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
8901 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
8902 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
8911 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
8912 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
8913 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
8914 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
8915 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
8916 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
8917 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
8918 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
8919 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
8920 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
8926 emit_insn (gen (out
, in0
, in1
));
8930 /* Recognize patterns suitable for the ZIP instructions. */
8932 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
8934 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
8935 rtx out
, in0
, in1
, x
;
8936 rtx (*gen
) (rtx
, rtx
, rtx
);
8937 enum machine_mode vmode
= d
->vmode
;
8939 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8942 /* Note that these are little-endian tests.
8943 We correct for big-endian later. */
8945 if (d
->perm
[0] == high
)
8948 else if (d
->perm
[0] == 0)
8952 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8954 for (i
= 0; i
< nelt
/ 2; i
++)
8956 unsigned elt
= (i
+ high
) & mask
;
8957 if (d
->perm
[i
* 2] != elt
)
8959 elt
= (elt
+ nelt
) & mask
;
8960 if (d
->perm
[i
* 2 + 1] != elt
)
8970 if (BYTES_BIG_ENDIAN
)
8972 x
= in0
, in0
= in1
, in1
= x
;
8981 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
8982 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
8983 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
8984 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
8985 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
8986 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
8987 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
8988 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
8989 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
8990 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
8999 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
9000 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
9001 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
9002 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
9003 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
9004 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
9005 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
9006 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
9007 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
9008 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
9014 emit_insn (gen (out
, in0
, in1
));
9018 /* Recognize patterns for the EXT insn. */
9021 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
9023 unsigned int i
, nelt
= d
->nelt
;
9024 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9027 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
9029 /* Check if the extracted indices are increasing by one. */
9030 for (i
= 1; i
< nelt
; i
++)
9032 unsigned int required
= location
+ i
;
9033 if (d
->one_vector_p
)
9035 /* We'll pass the same vector in twice, so allow indices to wrap. */
9036 required
&= (nelt
- 1);
9038 if (d
->perm
[i
] != required
)
9044 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
9045 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
9046 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
9047 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
9048 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
9049 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
9050 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
9051 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
9052 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
9053 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
9062 /* The case where (location == 0) is a no-op for both big- and little-endian,
9063 and is removed by the mid-end at optimization levels -O1 and higher. */
9065 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
9067 /* After setup, we want the high elements of the first vector (stored
9068 at the LSB end of the register), and the low elements of the second
9069 vector (stored at the MSB end of the register). So swap. */
9073 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9074 location
= nelt
- location
;
9077 offset
= GEN_INT (location
);
9078 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
9083 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
9085 rtx (*gen
) (rtx
, rtx
, rtx
);
9086 rtx out
= d
->target
;
9088 enum machine_mode vmode
= d
->vmode
;
9089 unsigned int i
, elt
, nelt
= d
->nelt
;
9092 /* TODO: This may not be big-endian safe. */
9093 if (BYTES_BIG_ENDIAN
)
9097 for (i
= 1; i
< nelt
; i
++)
9099 if (elt
!= d
->perm
[i
])
9103 /* The generic preparation in aarch64_expand_vec_perm_const_1
9104 swaps the operand order and the permute indices if it finds
9105 d->perm[0] to be in the second operand. Thus, we can always
9106 use d->op0 and need not do any extra arithmetic to get the
9107 correct lane number. */
9109 lane
= GEN_INT (elt
);
9113 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
9114 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
9115 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
9116 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
9117 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
9118 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
9119 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
9120 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
9121 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
9122 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
9127 emit_insn (gen (out
, in0
, lane
));
9132 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
9134 rtx rperm
[MAX_VECT_LEN
], sel
;
9135 enum machine_mode vmode
= d
->vmode
;
9136 unsigned int i
, nelt
= d
->nelt
;
9141 /* Generic code will try constant permutation twice. Once with the
9142 original mode and again with the elements lowered to QImode.
9143 So wait and don't do the selector expansion ourselves. */
9144 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
9147 for (i
= 0; i
< nelt
; ++i
)
9149 int nunits
= GET_MODE_NUNITS (vmode
);
9151 /* If big-endian and two vectors we end up with a weird mixed-endian
9152 mode on NEON. Reverse the index within each word but not the word
9154 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
9157 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
9158 sel
= force_reg (vmode
, sel
);
9160 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
9165 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
9167 /* The pattern matching functions above are written to look for a small
9168 number to begin the sequence (0, 1, N/2). If we begin with an index
9169 from the second operand, we can swap the operands. */
9170 if (d
->perm
[0] >= d
->nelt
)
9172 unsigned i
, nelt
= d
->nelt
;
9175 for (i
= 0; i
< nelt
; ++i
)
9176 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
9185 if (aarch64_evpc_ext (d
))
9187 else if (aarch64_evpc_zip (d
))
9189 else if (aarch64_evpc_uzp (d
))
9191 else if (aarch64_evpc_trn (d
))
9193 else if (aarch64_evpc_dup (d
))
9195 return aarch64_evpc_tbl (d
);
9200 /* Expand a vec_perm_const pattern. */
9203 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9205 struct expand_vec_perm_d d
;
9212 d
.vmode
= GET_MODE (target
);
9213 gcc_assert (VECTOR_MODE_P (d
.vmode
));
9214 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
9215 d
.testing_p
= false;
9217 for (i
= which
= 0; i
< nelt
; ++i
)
9219 rtx e
= XVECEXP (sel
, 0, i
);
9220 int ei
= INTVAL (e
) & (2 * nelt
- 1);
9221 which
|= (ei
< nelt
? 1 : 2);
9231 d
.one_vector_p
= false;
9232 if (!rtx_equal_p (op0
, op1
))
9235 /* The elements of PERM do not suggest that only the first operand
9236 is used, but both operands are identical. Allow easier matching
9237 of the permutation by folding the permutation into the single
9241 for (i
= 0; i
< nelt
; ++i
)
9242 d
.perm
[i
] &= nelt
- 1;
9244 d
.one_vector_p
= true;
9249 d
.one_vector_p
= true;
9253 return aarch64_expand_vec_perm_const_1 (&d
);
9257 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
9258 const unsigned char *sel
)
9260 struct expand_vec_perm_d d
;
9261 unsigned int i
, nelt
, which
;
9265 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
9267 memcpy (d
.perm
, sel
, nelt
);
9269 /* Calculate whether all elements are in one vector. */
9270 for (i
= which
= 0; i
< nelt
; ++i
)
9272 unsigned char e
= d
.perm
[i
];
9273 gcc_assert (e
< 2 * nelt
);
9274 which
|= (e
< nelt
? 1 : 2);
9277 /* If all elements are from the second vector, reindex as if from the
9280 for (i
= 0; i
< nelt
; ++i
)
9283 /* Check whether the mask can be applied to a single vector. */
9284 d
.one_vector_p
= (which
!= 3);
9286 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
9287 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
9288 if (!d
.one_vector_p
)
9289 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
9292 ret
= aarch64_expand_vec_perm_const_1 (&d
);
9298 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9300 aarch64_cannot_change_mode_class (enum machine_mode from
,
9301 enum machine_mode to
,
9302 enum reg_class rclass
)
9304 /* Full-reg subregs are allowed on general regs or any class if they are
9306 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
)
9307 || !reg_classes_intersect_p (FP_REGS
, rclass
))
9310 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9311 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9312 2. Scalar to Scalar for integer modes or same size float modes.
9313 3. Vector to Vector modes.
9314 4. On little-endian only, Vector-Structure to Vector modes. */
9315 if (GET_MODE_SIZE (from
) > GET_MODE_SIZE (to
))
9317 if (aarch64_vector_mode_supported_p (from
)
9318 && GET_MODE_SIZE (GET_MODE_INNER (from
)) == GET_MODE_SIZE (to
))
9321 if (GET_MODE_NUNITS (from
) == 1
9322 && GET_MODE_NUNITS (to
) == 1
9323 && (GET_MODE_CLASS (from
) == MODE_INT
9327 if (aarch64_vector_mode_supported_p (from
)
9328 && aarch64_vector_mode_supported_p (to
))
9331 /* Within an vector structure straddling multiple vector registers
9332 we are in a mixed-endian representation. As such, we can't
9333 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9334 switch between vectors and vector structures cheaply. */
9335 if (!BYTES_BIG_ENDIAN
)
9336 if ((aarch64_vector_mode_supported_p (from
)
9337 && aarch64_vect_struct_mode_p (to
))
9338 || (aarch64_vector_mode_supported_p (to
)
9339 && aarch64_vect_struct_mode_p (from
)))
9346 /* Implement MODES_TIEABLE_P. */
9349 aarch64_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
9351 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
9354 /* We specifically want to allow elements of "structure" modes to
9355 be tieable to the structure. This more general condition allows
9356 other rarer situations too. */
9358 && aarch64_vector_mode_p (mode1
)
9359 && aarch64_vector_mode_p (mode2
))
9365 #undef TARGET_ADDRESS_COST
9366 #define TARGET_ADDRESS_COST aarch64_address_cost
9368 /* This hook will determines whether unnamed bitfields affect the alignment
9369 of the containing structure. The hook returns true if the structure
9370 should inherit the alignment requirements of an unnamed bitfield's
9372 #undef TARGET_ALIGN_ANON_BITFIELD
9373 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9375 #undef TARGET_ASM_ALIGNED_DI_OP
9376 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9378 #undef TARGET_ASM_ALIGNED_HI_OP
9379 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9381 #undef TARGET_ASM_ALIGNED_SI_OP
9382 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9384 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9385 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9386 hook_bool_const_tree_hwi_hwi_const_tree_true
9388 #undef TARGET_ASM_FILE_START
9389 #define TARGET_ASM_FILE_START aarch64_start_file
9391 #undef TARGET_ASM_OUTPUT_MI_THUNK
9392 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9394 #undef TARGET_ASM_SELECT_RTX_SECTION
9395 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9397 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9398 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9400 #undef TARGET_BUILD_BUILTIN_VA_LIST
9401 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9403 #undef TARGET_CALLEE_COPIES
9404 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9406 #undef TARGET_CAN_ELIMINATE
9407 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9409 #undef TARGET_CANNOT_FORCE_CONST_MEM
9410 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9412 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9413 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9415 /* Only the least significant bit is used for initialization guard
9417 #undef TARGET_CXX_GUARD_MASK_BIT
9418 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9420 #undef TARGET_C_MODE_FOR_SUFFIX
9421 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9423 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9424 #undef TARGET_DEFAULT_TARGET_FLAGS
9425 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9428 #undef TARGET_CLASS_MAX_NREGS
9429 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9431 #undef TARGET_BUILTIN_DECL
9432 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9434 #undef TARGET_EXPAND_BUILTIN
9435 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9437 #undef TARGET_EXPAND_BUILTIN_VA_START
9438 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9440 #undef TARGET_FOLD_BUILTIN
9441 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9443 #undef TARGET_FUNCTION_ARG
9444 #define TARGET_FUNCTION_ARG aarch64_function_arg
9446 #undef TARGET_FUNCTION_ARG_ADVANCE
9447 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9449 #undef TARGET_FUNCTION_ARG_BOUNDARY
9450 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9452 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9453 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9455 #undef TARGET_FUNCTION_VALUE
9456 #define TARGET_FUNCTION_VALUE aarch64_function_value
9458 #undef TARGET_FUNCTION_VALUE_REGNO_P
9459 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9461 #undef TARGET_FRAME_POINTER_REQUIRED
9462 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9464 #undef TARGET_GIMPLE_FOLD_BUILTIN
9465 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9467 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9468 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9470 #undef TARGET_INIT_BUILTINS
9471 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9473 #undef TARGET_LEGITIMATE_ADDRESS_P
9474 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9476 #undef TARGET_LEGITIMATE_CONSTANT_P
9477 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9479 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9480 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9483 #define TARGET_LRA_P aarch64_lra_p
9485 #undef TARGET_MANGLE_TYPE
9486 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9488 #undef TARGET_MEMORY_MOVE_COST
9489 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9491 #undef TARGET_MUST_PASS_IN_STACK
9492 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9494 /* This target hook should return true if accesses to volatile bitfields
9495 should use the narrowest mode possible. It should return false if these
9496 accesses should use the bitfield container type. */
9497 #undef TARGET_NARROW_VOLATILE_BITFIELD
9498 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9500 #undef TARGET_OPTION_OVERRIDE
9501 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9503 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9504 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9505 aarch64_override_options_after_change
9507 #undef TARGET_PASS_BY_REFERENCE
9508 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9510 #undef TARGET_PREFERRED_RELOAD_CLASS
9511 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9513 #undef TARGET_SECONDARY_RELOAD
9514 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9516 #undef TARGET_SHIFT_TRUNCATION_MASK
9517 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9519 #undef TARGET_SETUP_INCOMING_VARARGS
9520 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9522 #undef TARGET_STRUCT_VALUE_RTX
9523 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9525 #undef TARGET_REGISTER_MOVE_COST
9526 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9528 #undef TARGET_RETURN_IN_MEMORY
9529 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9531 #undef TARGET_RETURN_IN_MSB
9532 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9534 #undef TARGET_RTX_COSTS
9535 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9537 #undef TARGET_SCHED_ISSUE_RATE
9538 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9540 #undef TARGET_TRAMPOLINE_INIT
9541 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9543 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9544 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9546 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9547 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9549 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9550 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9552 #undef TARGET_VECTORIZE_ADD_STMT_COST
9553 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9555 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9556 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9557 aarch64_builtin_vectorization_cost
9559 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9560 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9562 #undef TARGET_VECTORIZE_BUILTINS
9563 #define TARGET_VECTORIZE_BUILTINS
9565 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9566 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9567 aarch64_builtin_vectorized_function
9569 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9570 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9571 aarch64_autovectorize_vector_sizes
9573 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9574 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9575 aarch64_atomic_assign_expand_fenv
9577 /* Section anchor support. */
9579 #undef TARGET_MIN_ANCHOR_OFFSET
9580 #define TARGET_MIN_ANCHOR_OFFSET -256
9582 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9583 byte offset; we can do much more for larger data types, but have no way
9584 to determine the size of the access. We assume accesses are aligned. */
9585 #undef TARGET_MAX_ANCHOR_OFFSET
9586 #define TARGET_MAX_ANCHOR_OFFSET 4095
9588 #undef TARGET_VECTOR_ALIGNMENT
9589 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9591 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9592 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9593 aarch64_simd_vector_alignment_reachable
9595 /* vec_perm support. */
9597 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9598 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9599 aarch64_vectorize_vec_perm_const_ok
9602 #undef TARGET_FIXED_CONDITION_CODE_REGS
9603 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9605 struct gcc_target targetm
= TARGET_INITIALIZER
;
9607 #include "gt-aarch64.h"