1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
74 A simple base register plus immediate offset.
77 A base register indexed by immediate offset with writeback.
80 A base register indexed by (optionally scaled) register.
83 A base register indexed by (optionally scaled) zero-extended register.
86 A base register indexed by (optionally scaled) sign-extended register.
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type
{
104 struct aarch64_address_info
{
105 enum aarch64_address_type type
;
109 enum aarch64_symbol_type symbol_type
;
112 struct simd_immediate_info
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel
;
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
133 enum machine_mode
*, int *,
135 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
136 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
139 static unsigned bit_count (unsigned HOST_WIDE_INT
);
140 static bool aarch64_const_vec_all_same_int_p (rtx
,
141 HOST_WIDE_INT
, HOST_WIDE_INT
);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
144 const unsigned char *sel
);
146 /* The processor for which instructions should be scheduled. */
147 enum aarch64_processor aarch64_tune
= cortexa53
;
149 /* The current tuning set. */
150 const struct tune_params
*aarch64_tune_params
;
152 /* Mask to specify which instructions we are allowed to generate. */
153 unsigned long aarch64_isa_flags
= 0;
155 /* Mask to specify which instruction scheduling options should be used. */
156 unsigned long aarch64_tune_flags
= 0;
158 /* Tuning parameters. */
160 #if HAVE_DESIGNATED_INITIALIZERS
161 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
163 #define NAMED_PARAM(NAME, VAL) (VAL)
166 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
173 static const struct cpu_addrcost_table generic_addrcost_table
=
175 #if HAVE_DESIGNATED_INITIALIZERS
184 NAMED_PARAM (pre_modify
, 0),
185 NAMED_PARAM (post_modify
, 0),
186 NAMED_PARAM (register_offset
, 0),
187 NAMED_PARAM (register_extend
, 0),
188 NAMED_PARAM (imm_offset
, 0)
191 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
194 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
196 #if HAVE_DESIGNATED_INITIALIZERS
205 NAMED_PARAM (pre_modify
, 0),
206 NAMED_PARAM (post_modify
, 0),
207 NAMED_PARAM (register_offset
, 0),
208 NAMED_PARAM (register_extend
, 0),
209 NAMED_PARAM (imm_offset
, 0),
212 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
215 static const struct cpu_regmove_cost generic_regmove_cost
=
217 NAMED_PARAM (GP2GP
, 1),
218 NAMED_PARAM (GP2FP
, 2),
219 NAMED_PARAM (FP2GP
, 2),
220 /* We currently do not provide direct support for TFmode Q->Q move.
221 Therefore we need to raise the cost above 2 in order to have
222 reload handle the situation. */
223 NAMED_PARAM (FP2FP
, 4)
226 /* Generic costs for vector insn classes. */
227 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
230 static const struct cpu_vector_cost generic_vector_cost
=
232 NAMED_PARAM (scalar_stmt_cost
, 1),
233 NAMED_PARAM (scalar_load_cost
, 1),
234 NAMED_PARAM (scalar_store_cost
, 1),
235 NAMED_PARAM (vec_stmt_cost
, 1),
236 NAMED_PARAM (vec_to_scalar_cost
, 1),
237 NAMED_PARAM (scalar_to_vec_cost
, 1),
238 NAMED_PARAM (vec_align_load_cost
, 1),
239 NAMED_PARAM (vec_unalign_load_cost
, 1),
240 NAMED_PARAM (vec_unalign_store_cost
, 1),
241 NAMED_PARAM (vec_store_cost
, 1),
242 NAMED_PARAM (cond_taken_branch_cost
, 3),
243 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
246 /* Generic costs for vector insn classes. */
247 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
250 static const struct cpu_vector_cost cortexa57_vector_cost
=
252 NAMED_PARAM (scalar_stmt_cost
, 1),
253 NAMED_PARAM (scalar_load_cost
, 4),
254 NAMED_PARAM (scalar_store_cost
, 1),
255 NAMED_PARAM (vec_stmt_cost
, 3),
256 NAMED_PARAM (vec_to_scalar_cost
, 8),
257 NAMED_PARAM (scalar_to_vec_cost
, 8),
258 NAMED_PARAM (vec_align_load_cost
, 5),
259 NAMED_PARAM (vec_unalign_load_cost
, 5),
260 NAMED_PARAM (vec_unalign_store_cost
, 1),
261 NAMED_PARAM (vec_store_cost
, 1),
262 NAMED_PARAM (cond_taken_branch_cost
, 1),
263 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
266 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
269 static const struct tune_params generic_tunings
=
271 &cortexa57_extra_costs
,
272 &generic_addrcost_table
,
273 &generic_regmove_cost
,
274 &generic_vector_cost
,
275 NAMED_PARAM (memmov_cost
, 4),
276 NAMED_PARAM (issue_rate
, 2)
279 static const struct tune_params cortexa53_tunings
=
281 &cortexa53_extra_costs
,
282 &generic_addrcost_table
,
283 &generic_regmove_cost
,
284 &generic_vector_cost
,
285 NAMED_PARAM (memmov_cost
, 4),
286 NAMED_PARAM (issue_rate
, 2)
289 static const struct tune_params cortexa57_tunings
=
291 &cortexa57_extra_costs
,
292 &cortexa57_addrcost_table
,
293 &generic_regmove_cost
,
294 &cortexa57_vector_cost
,
295 NAMED_PARAM (memmov_cost
, 4),
296 NAMED_PARAM (issue_rate
, 3)
299 /* A processor implementing AArch64. */
302 const char *const name
;
303 enum aarch64_processor core
;
305 const unsigned long flags
;
306 const struct tune_params
*const tune
;
309 /* Processor cores implementing AArch64. */
310 static const struct processor all_cores
[] =
312 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
313 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
314 #include "aarch64-cores.def"
316 {"generic", cortexa53
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
317 {NULL
, aarch64_none
, NULL
, 0, NULL
}
320 /* Architectures implementing AArch64. */
321 static const struct processor all_architectures
[] =
323 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
324 {NAME, CORE, #ARCH, FLAGS, NULL},
325 #include "aarch64-arches.def"
327 {NULL
, aarch64_none
, NULL
, 0, NULL
}
330 /* Target specification. These are populated as commandline arguments
331 are processed, or NULL if not specified. */
332 static const struct processor
*selected_arch
;
333 static const struct processor
*selected_cpu
;
334 static const struct processor
*selected_tune
;
336 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
338 /* An ISA extension in the co-processor and main instruction set space. */
339 struct aarch64_option_extension
341 const char *const name
;
342 const unsigned long flags_on
;
343 const unsigned long flags_off
;
346 /* ISA extensions in AArch64. */
347 static const struct aarch64_option_extension all_extensions
[] =
349 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
350 {NAME, FLAGS_ON, FLAGS_OFF},
351 #include "aarch64-option-extensions.def"
352 #undef AARCH64_OPT_EXTENSION
356 /* Used to track the size of an address when generating a pre/post
357 increment address. */
358 static enum machine_mode aarch64_memory_reference_mode
;
360 /* Used to force GTY into this file. */
361 static GTY(()) int gty_dummy
;
363 /* A table of valid AArch64 "bitmask immediate" values for
364 logical instructions. */
366 #define AARCH64_NUM_BITMASKS 5334
367 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
369 typedef enum aarch64_cond_code
371 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
372 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
373 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
377 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
379 /* The condition codes of the processor, and the inverse function. */
380 static const char * const aarch64_condition_codes
[] =
382 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
383 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
386 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
388 aarch64_dbx_register_number (unsigned regno
)
390 if (GP_REGNUM_P (regno
))
391 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
392 else if (regno
== SP_REGNUM
)
393 return AARCH64_DWARF_SP
;
394 else if (FP_REGNUM_P (regno
))
395 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
397 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
398 equivalent DWARF register. */
399 return DWARF_FRAME_REGISTERS
;
402 /* Return TRUE if MODE is any of the large INT modes. */
404 aarch64_vect_struct_mode_p (enum machine_mode mode
)
406 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
409 /* Return TRUE if MODE is any of the vector modes. */
411 aarch64_vector_mode_p (enum machine_mode mode
)
413 return aarch64_vector_mode_supported_p (mode
)
414 || aarch64_vect_struct_mode_p (mode
);
417 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
419 aarch64_array_mode_supported_p (enum machine_mode mode
,
420 unsigned HOST_WIDE_INT nelems
)
423 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
424 && (nelems
>= 2 && nelems
<= 4))
430 /* Implement HARD_REGNO_NREGS. */
433 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
435 switch (aarch64_regno_regclass (regno
))
439 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
441 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
446 /* Implement HARD_REGNO_MODE_OK. */
449 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
451 if (GET_MODE_CLASS (mode
) == MODE_CC
)
452 return regno
== CC_REGNUM
;
454 if (regno
== SP_REGNUM
)
455 /* The purpose of comparing with ptr_mode is to support the
456 global register variable associated with the stack pointer
457 register via the syntax of asm ("wsp") in ILP32. */
458 return mode
== Pmode
|| mode
== ptr_mode
;
460 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
461 return mode
== Pmode
;
463 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
466 if (FP_REGNUM_P (regno
))
468 if (aarch64_vect_struct_mode_p (mode
))
470 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
478 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
480 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
481 enum machine_mode mode
)
483 /* Handle modes that fit within single registers. */
484 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
486 if (GET_MODE_SIZE (mode
) >= 4)
491 /* Fall back to generic for multi-reg and very large modes. */
493 return choose_hard_reg_mode (regno
, nregs
, false);
496 /* Return true if calls to DECL should be treated as
497 long-calls (ie called via a register). */
499 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
504 /* Return true if calls to symbol-ref SYM should be treated as
505 long-calls (ie called via a register). */
507 aarch64_is_long_call_p (rtx sym
)
509 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
512 /* Return true if the offsets to a zero/sign-extract operation
513 represent an expression that matches an extend operation. The
514 operands represent the paramters from
516 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
518 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
521 HOST_WIDE_INT mult_val
, extract_val
;
523 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
526 mult_val
= INTVAL (mult_imm
);
527 extract_val
= INTVAL (extract_imm
);
530 && extract_val
< GET_MODE_BITSIZE (mode
)
531 && exact_log2 (extract_val
& ~7) > 0
532 && (extract_val
& 7) <= 4
533 && mult_val
== (1 << (extract_val
& 7)))
539 /* Emit an insn that's a simple single-set. Both the operands must be
540 known to be valid. */
542 emit_set_insn (rtx x
, rtx y
)
544 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
547 /* X and Y are two things to compare using CODE. Emit the compare insn and
548 return the rtx for register 0 in the proper mode. */
550 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
552 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
553 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
555 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
559 /* Build the SYMBOL_REF for __tls_get_addr. */
561 static GTY(()) rtx tls_get_addr_libfunc
;
564 aarch64_tls_get_addr (void)
566 if (!tls_get_addr_libfunc
)
567 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
568 return tls_get_addr_libfunc
;
571 /* Return the TLS model to use for ADDR. */
573 static enum tls_model
574 tls_symbolic_operand_type (rtx addr
)
576 enum tls_model tls_kind
= TLS_MODEL_NONE
;
579 if (GET_CODE (addr
) == CONST
)
581 split_const (addr
, &sym
, &addend
);
582 if (GET_CODE (sym
) == SYMBOL_REF
)
583 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
585 else if (GET_CODE (addr
) == SYMBOL_REF
)
586 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
591 /* We'll allow lo_sum's in addresses in our legitimate addresses
592 so that combine would take care of combining addresses where
593 necessary, but for generation purposes, we'll generate the address
596 tmp = hi (symbol_ref); adrp x1, foo
597 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
601 adrp x1, :got:foo adrp tmp, :tlsgd:foo
602 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
606 Load TLS symbol, depending on TLS mechanism and TLS access model.
608 Global Dynamic - Traditional TLS:
610 add dest, tmp, #:tlsgd_lo12:imm
613 Global Dynamic - TLS Descriptors:
614 adrp dest, :tlsdesc:imm
615 ldr tmp, [dest, #:tlsdesc_lo12:imm]
616 add dest, dest, #:tlsdesc_lo12:imm
623 adrp tmp, :gottprel:imm
624 ldr dest, [tmp, #:gottprel_lo12:imm]
629 add t0, tp, #:tprel_hi12:imm
630 add t0, #:tprel_lo12_nc:imm
634 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
635 enum aarch64_symbol_type type
)
639 case SYMBOL_SMALL_ABSOLUTE
:
641 /* In ILP32, the mode of dest can be either SImode or DImode. */
643 enum machine_mode mode
= GET_MODE (dest
);
645 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
647 if (can_create_pseudo_p ())
648 tmp_reg
= gen_reg_rtx (mode
);
650 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
651 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
655 case SYMBOL_TINY_ABSOLUTE
:
656 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
659 case SYMBOL_SMALL_GOT
:
661 /* In ILP32, the mode of dest can be either SImode or DImode,
662 while the got entry is always of SImode size. The mode of
663 dest depends on how dest is used: if dest is assigned to a
664 pointer (e.g. in the memory), it has SImode; it may have
665 DImode if dest is dereferenced to access the memeory.
666 This is why we have to handle three different ldr_got_small
667 patterns here (two patterns for ILP32). */
669 enum machine_mode mode
= GET_MODE (dest
);
671 if (can_create_pseudo_p ())
672 tmp_reg
= gen_reg_rtx (mode
);
674 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
675 if (mode
== ptr_mode
)
678 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
680 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
684 gcc_assert (mode
== Pmode
);
685 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
691 case SYMBOL_SMALL_TLSGD
:
694 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
697 emit_call_insn (gen_tlsgd_small (result
, imm
));
698 insns
= get_insns ();
701 RTL_CONST_CALL_P (insns
) = 1;
702 emit_libcall_block (insns
, dest
, result
, imm
);
706 case SYMBOL_SMALL_TLSDESC
:
708 enum machine_mode mode
= GET_MODE (dest
);
709 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
712 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
714 /* In ILP32, the got entry is always of SImode size. Unlike
715 small GOT, the dest is fixed at reg 0. */
717 emit_insn (gen_tlsdesc_small_si (imm
));
719 emit_insn (gen_tlsdesc_small_di (imm
));
720 tp
= aarch64_load_tp (NULL
);
723 tp
= gen_lowpart (mode
, tp
);
725 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
726 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
730 case SYMBOL_SMALL_GOTTPREL
:
732 /* In ILP32, the mode of dest can be either SImode or DImode,
733 while the got entry is always of SImode size. The mode of
734 dest depends on how dest is used: if dest is assigned to a
735 pointer (e.g. in the memory), it has SImode; it may have
736 DImode if dest is dereferenced to access the memeory.
737 This is why we have to handle three different tlsie_small
738 patterns here (two patterns for ILP32). */
739 enum machine_mode mode
= GET_MODE (dest
);
740 rtx tmp_reg
= gen_reg_rtx (mode
);
741 rtx tp
= aarch64_load_tp (NULL
);
743 if (mode
== ptr_mode
)
746 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
749 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
750 tp
= gen_lowpart (mode
, tp
);
755 gcc_assert (mode
== Pmode
);
756 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
759 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
760 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
764 case SYMBOL_SMALL_TPREL
:
766 rtx tp
= aarch64_load_tp (NULL
);
767 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
768 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
772 case SYMBOL_TINY_GOT
:
773 emit_insn (gen_ldr_got_tiny (dest
, imm
));
781 /* Emit a move from SRC to DEST. Assume that the move expanders can
782 handle all moves if !can_create_pseudo_p (). The distinction is
783 important because, unlike emit_move_insn, the move expanders know
784 how to force Pmode objects into the constant pool even when the
785 constant pool address is not itself legitimate. */
787 aarch64_emit_move (rtx dest
, rtx src
)
789 return (can_create_pseudo_p ()
790 ? emit_move_insn (dest
, src
)
791 : emit_move_insn_1 (dest
, src
));
794 /* Split a 128-bit move operation into two 64-bit move operations,
795 taking care to handle partial overlap of register to register
796 copies. Special cases are needed when moving between GP regs and
797 FP regs. SRC can be a register, constant or memory; DST a register
798 or memory. If either operand is memory it must not have any side
801 aarch64_split_128bit_move (rtx dst
, rtx src
)
806 enum machine_mode mode
= GET_MODE (dst
);
808 gcc_assert (mode
== TImode
|| mode
== TFmode
);
809 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
810 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
812 if (REG_P (dst
) && REG_P (src
))
814 int src_regno
= REGNO (src
);
815 int dst_regno
= REGNO (dst
);
817 /* Handle FP <-> GP regs. */
818 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
820 src_lo
= gen_lowpart (word_mode
, src
);
821 src_hi
= gen_highpart (word_mode
, src
);
825 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
826 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
830 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
831 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
835 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
837 dst_lo
= gen_lowpart (word_mode
, dst
);
838 dst_hi
= gen_highpart (word_mode
, dst
);
842 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
843 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
847 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
848 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
854 dst_lo
= gen_lowpart (word_mode
, dst
);
855 dst_hi
= gen_highpart (word_mode
, dst
);
856 src_lo
= gen_lowpart (word_mode
, src
);
857 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
859 /* At most one pairing may overlap. */
860 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
862 aarch64_emit_move (dst_hi
, src_hi
);
863 aarch64_emit_move (dst_lo
, src_lo
);
867 aarch64_emit_move (dst_lo
, src_lo
);
868 aarch64_emit_move (dst_hi
, src_hi
);
873 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
875 return (! REG_P (src
)
876 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
879 /* Split a complex SIMD combine. */
882 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
884 enum machine_mode src_mode
= GET_MODE (src1
);
885 enum machine_mode dst_mode
= GET_MODE (dst
);
887 gcc_assert (VECTOR_MODE_P (dst_mode
));
889 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
891 rtx (*gen
) (rtx
, rtx
, rtx
);
896 gen
= gen_aarch64_simd_combinev8qi
;
899 gen
= gen_aarch64_simd_combinev4hi
;
902 gen
= gen_aarch64_simd_combinev2si
;
905 gen
= gen_aarch64_simd_combinev2sf
;
908 gen
= gen_aarch64_simd_combinedi
;
911 gen
= gen_aarch64_simd_combinedf
;
917 emit_insn (gen (dst
, src1
, src2
));
922 /* Split a complex SIMD move. */
925 aarch64_split_simd_move (rtx dst
, rtx src
)
927 enum machine_mode src_mode
= GET_MODE (src
);
928 enum machine_mode dst_mode
= GET_MODE (dst
);
930 gcc_assert (VECTOR_MODE_P (dst_mode
));
932 if (REG_P (dst
) && REG_P (src
))
934 rtx (*gen
) (rtx
, rtx
);
936 gcc_assert (VECTOR_MODE_P (src_mode
));
941 gen
= gen_aarch64_split_simd_movv16qi
;
944 gen
= gen_aarch64_split_simd_movv8hi
;
947 gen
= gen_aarch64_split_simd_movv4si
;
950 gen
= gen_aarch64_split_simd_movv2di
;
953 gen
= gen_aarch64_split_simd_movv4sf
;
956 gen
= gen_aarch64_split_simd_movv2df
;
962 emit_insn (gen (dst
, src
));
968 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
970 if (can_create_pseudo_p ())
971 return force_reg (mode
, value
);
974 x
= aarch64_emit_move (x
, value
);
981 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
983 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
986 /* Load the full offset into a register. This
987 might be improvable in the future. */
988 high
= GEN_INT (offset
);
990 high
= aarch64_force_temporary (mode
, temp
, high
);
991 reg
= aarch64_force_temporary (mode
, temp
,
992 gen_rtx_PLUS (mode
, high
, reg
));
994 return plus_constant (mode
, reg
, offset
);
998 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1000 enum machine_mode mode
= GET_MODE (dest
);
1001 unsigned HOST_WIDE_INT mask
;
1004 unsigned HOST_WIDE_INT val
;
1007 int one_match
, zero_match
;
1009 gcc_assert (mode
== SImode
|| mode
== DImode
);
1011 /* Check on what type of symbol it is. */
1012 if (GET_CODE (imm
) == SYMBOL_REF
1013 || GET_CODE (imm
) == LABEL_REF
1014 || GET_CODE (imm
) == CONST
)
1016 rtx mem
, base
, offset
;
1017 enum aarch64_symbol_type sty
;
1019 /* If we have (const (plus symbol offset)), separate out the offset
1020 before we start classifying the symbol. */
1021 split_const (imm
, &base
, &offset
);
1023 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
1026 case SYMBOL_FORCE_TO_MEM
:
1027 if (offset
!= const0_rtx
1028 && targetm
.cannot_force_const_mem (mode
, imm
))
1030 gcc_assert (can_create_pseudo_p ());
1031 base
= aarch64_force_temporary (mode
, dest
, base
);
1032 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1033 aarch64_emit_move (dest
, base
);
1036 mem
= force_const_mem (ptr_mode
, imm
);
1038 if (mode
!= ptr_mode
)
1039 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1040 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1043 case SYMBOL_SMALL_TLSGD
:
1044 case SYMBOL_SMALL_TLSDESC
:
1045 case SYMBOL_SMALL_GOTTPREL
:
1046 case SYMBOL_SMALL_GOT
:
1047 case SYMBOL_TINY_GOT
:
1048 if (offset
!= const0_rtx
)
1050 gcc_assert(can_create_pseudo_p ());
1051 base
= aarch64_force_temporary (mode
, dest
, base
);
1052 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1053 aarch64_emit_move (dest
, base
);
1058 case SYMBOL_SMALL_TPREL
:
1059 case SYMBOL_SMALL_ABSOLUTE
:
1060 case SYMBOL_TINY_ABSOLUTE
:
1061 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1069 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1071 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1075 if (!CONST_INT_P (imm
))
1077 if (GET_CODE (imm
) == HIGH
)
1078 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1081 rtx mem
= force_const_mem (mode
, imm
);
1083 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1091 /* We know we can't do this in 1 insn, and we must be able to do it
1092 in two; so don't mess around looking for sequences that don't buy
1094 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
1095 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1096 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1100 /* Remaining cases are all for DImode. */
1103 subtargets
= optimize
&& can_create_pseudo_p ();
1109 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1111 if ((val
& mask
) == 0)
1113 else if ((val
& mask
) == mask
)
1120 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1122 if ((val
& mask
) != mask
)
1124 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1125 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1126 GEN_INT ((val
>> i
) & 0xffff)));
1133 if (zero_match
== 2)
1134 goto simple_sequence
;
1136 mask
= 0x0ffff0000UL
;
1137 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1139 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1141 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1143 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1145 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1146 emit_insn (gen_adddi3 (dest
, subtarget
,
1147 GEN_INT (val
- (val
& mask
))));
1150 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1152 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1154 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1155 GEN_INT ((val
+ comp
) & mask
)));
1156 emit_insn (gen_adddi3 (dest
, subtarget
,
1157 GEN_INT (val
- ((val
+ comp
) & mask
))));
1160 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1162 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1164 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1165 GEN_INT ((val
- comp
) | ~mask
)));
1166 emit_insn (gen_adddi3 (dest
, subtarget
,
1167 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1170 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1172 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1174 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1175 GEN_INT (val
| ~mask
)));
1176 emit_insn (gen_adddi3 (dest
, subtarget
,
1177 GEN_INT (val
- (val
| ~mask
))));
1182 /* See if we can do it by arithmetically combining two
1184 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1189 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1190 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1192 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1193 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1194 GEN_INT (aarch64_bitmasks
[i
])));
1195 emit_insn (gen_adddi3 (dest
, subtarget
,
1196 GEN_INT (val
- aarch64_bitmasks
[i
])));
1200 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1202 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1204 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1205 GEN_INT (aarch64_bitmasks
[i
])));
1206 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1207 GEN_INT ((val
>> j
) & 0xffff)));
1213 /* See if we can do it by logically combining two immediates. */
1214 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1216 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1220 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1221 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1223 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1224 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1225 GEN_INT (aarch64_bitmasks
[i
])));
1226 emit_insn (gen_iordi3 (dest
, subtarget
,
1227 GEN_INT (aarch64_bitmasks
[j
])));
1231 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1235 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1236 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1239 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1240 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1241 GEN_INT (aarch64_bitmasks
[j
])));
1242 emit_insn (gen_anddi3 (dest
, subtarget
,
1243 GEN_INT (aarch64_bitmasks
[i
])));
1252 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1254 if ((val
& mask
) != 0)
1258 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1259 GEN_INT (val
& mask
)));
1263 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1264 GEN_INT ((val
>> i
) & 0xffff)));
1270 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1272 /* Indirect calls are not currently supported. */
1276 /* Cannot tail-call to long-calls, since these are outside of the
1277 range of a branch instruction (we could handle this if we added
1278 support for indirect tail-calls. */
1279 if (aarch64_decl_is_long_call_p (decl
))
1285 /* Implement TARGET_PASS_BY_REFERENCE. */
1288 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1289 enum machine_mode mode
,
1291 bool named ATTRIBUTE_UNUSED
)
1294 enum machine_mode dummymode
;
1297 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1298 size
= (mode
== BLKmode
&& type
)
1299 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1301 /* Aggregates are passed by reference based on their size. */
1302 if (type
&& AGGREGATE_TYPE_P (type
))
1304 size
= int_size_in_bytes (type
);
1307 /* Variable sized arguments are always returned by reference. */
1311 /* Can this be a candidate to be passed in fp/simd register(s)? */
1312 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1317 /* Arguments which are variable sized or larger than 2 registers are
1318 passed by reference unless they are a homogenous floating point
1320 return size
> 2 * UNITS_PER_WORD
;
1323 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1325 aarch64_return_in_msb (const_tree valtype
)
1327 enum machine_mode dummy_mode
;
1330 /* Never happens in little-endian mode. */
1331 if (!BYTES_BIG_ENDIAN
)
1334 /* Only composite types smaller than or equal to 16 bytes can
1335 be potentially returned in registers. */
1336 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1337 || int_size_in_bytes (valtype
) <= 0
1338 || int_size_in_bytes (valtype
) > 16)
1341 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1342 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1343 is always passed/returned in the least significant bits of fp/simd
1345 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1346 &dummy_mode
, &dummy_int
, NULL
))
1352 /* Implement TARGET_FUNCTION_VALUE.
1353 Define how to find the value returned by a function. */
1356 aarch64_function_value (const_tree type
, const_tree func
,
1357 bool outgoing ATTRIBUTE_UNUSED
)
1359 enum machine_mode mode
;
1362 enum machine_mode ag_mode
;
1364 mode
= TYPE_MODE (type
);
1365 if (INTEGRAL_TYPE_P (type
))
1366 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1368 if (aarch64_return_in_msb (type
))
1370 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1372 if (size
% UNITS_PER_WORD
!= 0)
1374 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1375 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1379 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1380 &ag_mode
, &count
, NULL
))
1382 if (!aarch64_composite_type_p (type
, mode
))
1384 gcc_assert (count
== 1 && mode
== ag_mode
);
1385 return gen_rtx_REG (mode
, V0_REGNUM
);
1392 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1393 for (i
= 0; i
< count
; i
++)
1395 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1396 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1397 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1398 XVECEXP (par
, 0, i
) = tmp
;
1404 return gen_rtx_REG (mode
, R0_REGNUM
);
1407 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1408 Return true if REGNO is the number of a hard register in which the values
1409 of called function may come back. */
1412 aarch64_function_value_regno_p (const unsigned int regno
)
1414 /* Maximum of 16 bytes can be returned in the general registers. Examples
1415 of 16-byte return values are: 128-bit integers and 16-byte small
1416 structures (excluding homogeneous floating-point aggregates). */
1417 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1420 /* Up to four fp/simd registers can return a function value, e.g. a
1421 homogeneous floating-point aggregate having four members. */
1422 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1423 return !TARGET_GENERAL_REGS_ONLY
;
1428 /* Implement TARGET_RETURN_IN_MEMORY.
1430 If the type T of the result of a function is such that
1432 would require that arg be passed as a value in a register (or set of
1433 registers) according to the parameter passing rules, then the result
1434 is returned in the same registers as would be used for such an
1438 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1441 enum machine_mode ag_mode
;
1444 if (!AGGREGATE_TYPE_P (type
)
1445 && TREE_CODE (type
) != COMPLEX_TYPE
1446 && TREE_CODE (type
) != VECTOR_TYPE
)
1447 /* Simple scalar types always returned in registers. */
1450 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1457 /* Types larger than 2 registers returned in memory. */
1458 size
= int_size_in_bytes (type
);
1459 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1463 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1464 const_tree type
, int *nregs
)
1466 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1467 return aarch64_vfp_is_call_or_return_candidate (mode
,
1469 &pcum
->aapcs_vfp_rmode
,
1474 /* Given MODE and TYPE of a function argument, return the alignment in
1475 bits. The idea is to suppress any stronger alignment requested by
1476 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1477 This is a helper function for local use only. */
1480 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1482 unsigned int alignment
;
1486 if (!integer_zerop (TYPE_SIZE (type
)))
1488 if (TYPE_MODE (type
) == mode
)
1489 alignment
= TYPE_ALIGN (type
);
1491 alignment
= GET_MODE_ALIGNMENT (mode
);
1497 alignment
= GET_MODE_ALIGNMENT (mode
);
1502 /* Layout a function argument according to the AAPCS64 rules. The rule
1503 numbers refer to the rule numbers in the AAPCS64. */
1506 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1508 bool named ATTRIBUTE_UNUSED
)
1510 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1511 int ncrn
, nvrn
, nregs
;
1512 bool allocate_ncrn
, allocate_nvrn
;
1514 /* We need to do this once per argument. */
1515 if (pcum
->aapcs_arg_processed
)
1518 pcum
->aapcs_arg_processed
= true;
1520 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1521 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1526 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1527 The following code thus handles passing by SIMD/FP registers first. */
1529 nvrn
= pcum
->aapcs_nvrn
;
1531 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1532 and homogenous short-vector aggregates (HVA). */
1535 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1537 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1538 if (!aarch64_composite_type_p (type
, mode
))
1540 gcc_assert (nregs
== 1);
1541 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1547 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1548 for (i
= 0; i
< nregs
; i
++)
1550 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1551 V0_REGNUM
+ nvrn
+ i
);
1552 tmp
= gen_rtx_EXPR_LIST
1554 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1555 XVECEXP (par
, 0, i
) = tmp
;
1557 pcum
->aapcs_reg
= par
;
1563 /* C.3 NSRN is set to 8. */
1564 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1569 ncrn
= pcum
->aapcs_ncrn
;
1570 nregs
= ((type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
))
1571 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1574 /* C6 - C9. though the sign and zero extension semantics are
1575 handled elsewhere. This is the case where the argument fits
1576 entirely general registers. */
1577 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1579 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1581 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1583 /* C.8 if the argument has an alignment of 16 then the NGRN is
1584 rounded up to the next even number. */
1585 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1588 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1590 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1591 A reg is still generated for it, but the caller should be smart
1592 enough not to use it. */
1593 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1595 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1602 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1603 for (i
= 0; i
< nregs
; i
++)
1605 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1606 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1607 GEN_INT (i
* UNITS_PER_WORD
));
1608 XVECEXP (par
, 0, i
) = tmp
;
1610 pcum
->aapcs_reg
= par
;
1613 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1618 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1620 /* The argument is passed on stack; record the needed number of words for
1621 this argument (we can re-use NREGS) and align the total size if
1624 pcum
->aapcs_stack_words
= nregs
;
1625 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1626 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1627 16 / UNITS_PER_WORD
) + 1;
1631 /* Implement TARGET_FUNCTION_ARG. */
1634 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1635 const_tree type
, bool named
)
1637 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1638 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1640 if (mode
== VOIDmode
)
1643 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1644 return pcum
->aapcs_reg
;
1648 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1649 const_tree fntype ATTRIBUTE_UNUSED
,
1650 rtx libname ATTRIBUTE_UNUSED
,
1651 const_tree fndecl ATTRIBUTE_UNUSED
,
1652 unsigned n_named ATTRIBUTE_UNUSED
)
1654 pcum
->aapcs_ncrn
= 0;
1655 pcum
->aapcs_nvrn
= 0;
1656 pcum
->aapcs_nextncrn
= 0;
1657 pcum
->aapcs_nextnvrn
= 0;
1658 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1659 pcum
->aapcs_reg
= NULL_RTX
;
1660 pcum
->aapcs_arg_processed
= false;
1661 pcum
->aapcs_stack_words
= 0;
1662 pcum
->aapcs_stack_size
= 0;
1668 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1669 enum machine_mode mode
,
1673 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1674 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1676 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1677 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1678 != (pcum
->aapcs_stack_words
!= 0));
1679 pcum
->aapcs_arg_processed
= false;
1680 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1681 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1682 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1683 pcum
->aapcs_stack_words
= 0;
1684 pcum
->aapcs_reg
= NULL_RTX
;
1689 aarch64_function_arg_regno_p (unsigned regno
)
1691 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1692 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1695 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1696 PARM_BOUNDARY bits of alignment, but will be given anything up
1697 to STACK_BOUNDARY bits if the type requires it. This makes sure
1698 that both before and after the layout of each argument, the Next
1699 Stacked Argument Address (NSAA) will have a minimum alignment of
1703 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1705 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1707 if (alignment
< PARM_BOUNDARY
)
1708 alignment
= PARM_BOUNDARY
;
1709 if (alignment
> STACK_BOUNDARY
)
1710 alignment
= STACK_BOUNDARY
;
1714 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1716 Return true if an argument passed on the stack should be padded upwards,
1717 i.e. if the least-significant byte of the stack slot has useful data.
1719 Small aggregate types are placed in the lowest memory address.
1721 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1724 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1726 /* On little-endian targets, the least significant byte of every stack
1727 argument is passed at the lowest byte address of the stack slot. */
1728 if (!BYTES_BIG_ENDIAN
)
1731 /* Otherwise, integral, floating-point and pointer types are padded downward:
1732 the least significant byte of a stack argument is passed at the highest
1733 byte address of the stack slot. */
1735 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1736 || POINTER_TYPE_P (type
))
1737 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1740 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1744 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1746 It specifies padding for the last (may also be the only)
1747 element of a block move between registers and memory. If
1748 assuming the block is in the memory, padding upward means that
1749 the last element is padded after its highest significant byte,
1750 while in downward padding, the last element is padded at the
1751 its least significant byte side.
1753 Small aggregates and small complex types are always padded
1756 We don't need to worry about homogeneous floating-point or
1757 short-vector aggregates; their move is not affected by the
1758 padding direction determined here. Regardless of endianness,
1759 each element of such an aggregate is put in the least
1760 significant bits of a fp/simd register.
1762 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1763 register has useful data, and return the opposite if the most
1764 significant byte does. */
1767 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1768 bool first ATTRIBUTE_UNUSED
)
1771 /* Small composite types are always padded upward. */
1772 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1774 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1775 : GET_MODE_SIZE (mode
));
1776 if (size
< 2 * UNITS_PER_WORD
)
1780 /* Otherwise, use the default padding. */
1781 return !BYTES_BIG_ENDIAN
;
1784 static enum machine_mode
1785 aarch64_libgcc_cmp_return_mode (void)
1791 aarch64_frame_pointer_required (void)
1793 /* If the function contains dynamic stack allocations, we need to
1794 use the frame pointer to access the static parts of the frame. */
1795 if (cfun
->calls_alloca
)
1798 /* In aarch64_override_options_after_change
1799 flag_omit_leaf_frame_pointer turns off the frame pointer by
1800 default. Turn it back on now if we've not got a leaf
1802 if (flag_omit_leaf_frame_pointer
1803 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
1809 /* Mark the registers that need to be saved by the callee and calculate
1810 the size of the callee-saved registers area and frame record (both FP
1811 and LR may be omitted). */
1813 aarch64_layout_frame (void)
1815 HOST_WIDE_INT offset
= 0;
1818 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1821 /* First mark all the registers that really need to be saved... */
1822 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1823 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1825 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1826 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1828 /* ... that includes the eh data registers (if needed)... */
1829 if (crtl
->calls_eh_return
)
1830 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1831 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1833 /* ... and any callee saved register that dataflow says is live. */
1834 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1835 if (df_regs_ever_live_p (regno
)
1836 && !call_used_regs
[regno
])
1837 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1839 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1840 if (df_regs_ever_live_p (regno
)
1841 && !call_used_regs
[regno
])
1842 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1844 if (frame_pointer_needed
)
1846 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1847 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1848 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1851 /* Now assign stack slots for them. */
1852 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1853 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1855 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1856 offset
+= UNITS_PER_WORD
;
1859 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1860 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1862 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1863 offset
+= UNITS_PER_WORD
;
1866 if (frame_pointer_needed
)
1868 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1869 offset
+= UNITS_PER_WORD
;
1872 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1874 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1875 offset
+= UNITS_PER_WORD
;
1878 cfun
->machine
->frame
.padding0
=
1879 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1880 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1882 cfun
->machine
->frame
.saved_regs_size
= offset
;
1883 cfun
->machine
->frame
.laid_out
= true;
1886 /* Make the last instruction frame-related and note that it performs
1887 the operation described by FRAME_PATTERN. */
1890 aarch64_set_frame_expr (rtx frame_pattern
)
1894 insn
= get_last_insn ();
1895 RTX_FRAME_RELATED_P (insn
) = 1;
1896 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1897 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1903 aarch64_register_saved_on_entry (int regno
)
1905 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1910 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1911 bool restore
, rtx base_rtx
)
1917 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
)
1918 = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1921 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1923 if (aarch64_register_saved_on_entry (regno
))
1926 mem
= gen_mem_ref (DFmode
,
1927 plus_constant (Pmode
,
1931 for (regno2
= regno
+ 1;
1932 regno2
<= V31_REGNUM
1933 && !aarch64_register_saved_on_entry (regno2
);
1938 if (regno2
<= V31_REGNUM
&&
1939 aarch64_register_saved_on_entry (regno2
))
1942 /* Next highest register to be saved. */
1943 mem2
= gen_mem_ref (DFmode
,
1947 start_offset
+ increment
));
1948 if (restore
== false)
1951 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1952 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1958 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1959 gen_rtx_REG (DFmode
, regno2
), mem2
));
1961 add_reg_note (insn
, REG_CFA_RESTORE
,
1962 gen_rtx_REG (DFmode
, regno
));
1963 add_reg_note (insn
, REG_CFA_RESTORE
,
1964 gen_rtx_REG (DFmode
, regno2
));
1967 /* The first part of a frame-related parallel insn
1968 is always assumed to be relevant to the frame
1969 calculations; subsequent parts, are only
1970 frame-related if explicitly marked. */
1971 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
1973 start_offset
+= increment
* 2;
1977 if (restore
== false)
1978 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1981 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1982 add_reg_note (insn
, REG_CFA_RESTORE
,
1983 gen_rtx_REG (DImode
, regno
));
1985 start_offset
+= increment
;
1987 RTX_FRAME_RELATED_P (insn
) = 1;
1994 /* offset from the stack pointer of where the saves and
1995 restore's have to happen. */
1997 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
2001 rtx base_rtx
= stack_pointer_rtx
;
2002 HOST_WIDE_INT start_offset
= offset
;
2003 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
2004 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
2005 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
2009 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
2011 if (aarch64_register_saved_on_entry (regno
))
2014 mem
= gen_mem_ref (Pmode
,
2015 plus_constant (Pmode
,
2019 for (regno2
= regno
+ 1;
2021 && !aarch64_register_saved_on_entry (regno2
);
2026 if (regno2
<= limit
&&
2027 aarch64_register_saved_on_entry (regno2
))
2030 /* Next highest register to be saved. */
2031 mem2
= gen_mem_ref (Pmode
,
2035 start_offset
+ increment
));
2036 if (restore
== false)
2039 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
2040 mem2
, gen_rtx_REG (DImode
, regno2
)));
2046 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
2047 gen_rtx_REG (DImode
, regno2
), mem2
));
2049 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
2050 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
2053 /* The first part of a frame-related parallel insn
2054 is always assumed to be relevant to the frame
2055 calculations; subsequent parts, are only
2056 frame-related if explicitly marked. */
2057 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
2060 start_offset
+= increment
* 2;
2064 if (restore
== false)
2065 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
2068 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
2069 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
2071 start_offset
+= increment
;
2073 RTX_FRAME_RELATED_P (insn
) = 1;
2077 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
2081 /* AArch64 stack frames generated by this compiler look like:
2083 +-------------------------------+
2085 | incoming stack arguments |
2087 +-------------------------------+ <-- arg_pointer_rtx
2089 | callee-allocated save area |
2090 | for register varargs |
2092 +-------------------------------+ <-- frame_pointer_rtx
2096 +-------------------------------+
2098 +-------------------------------+ |
2101 | callee-saved registers | | frame.saved_regs_size
2103 +-------------------------------+ |
2105 +-------------------------------+ |
2107 P +-------------------------------+ <-- hard_frame_pointer_rtx
2108 | dynamic allocation |
2109 +-------------------------------+
2111 | outgoing stack arguments |
2113 +-------------------------------+ <-- stack_pointer_rtx
2115 Dynamic stack allocations such as alloca insert data at point P.
2116 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2117 hard_frame_pointer_rtx unchanged. */
2119 /* Generate the prologue instructions for entry into a function.
2120 Establish the stack frame by decreasing the stack pointer with a
2121 properly calculated size and, if necessary, create a frame record
2122 filled with the values of LR and previous frame pointer. The
2123 current FP is also set up if it is in use. */
2126 aarch64_expand_prologue (void)
2128 /* sub sp, sp, #<frame_size>
2129 stp {fp, lr}, [sp, #<frame_size> - 16]
2130 add fp, sp, #<frame_size> - hardfp_offset
2131 stp {cs_reg}, [fp, #-16] etc.
2133 sub sp, sp, <final_adjustment_if_any>
2135 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
2136 HOST_WIDE_INT frame_size
, offset
;
2137 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
2140 aarch64_layout_frame ();
2141 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2142 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
2143 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
2144 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2145 + crtl
->outgoing_args_size
);
2146 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2147 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2149 if (flag_stack_usage_info
)
2150 current_function_static_stack_size
= frame_size
;
2153 - original_frame_size
2154 - cfun
->machine
->frame
.saved_regs_size
);
2156 /* Store pairs and load pairs have a range only -512 to 504. */
2159 /* When the frame has a large size, an initial decrease is done on
2160 the stack pointer to jump over the callee-allocated save area for
2161 register varargs, the local variable area and/or the callee-saved
2162 register area. This will allow the pre-index write-back
2163 store pair instructions to be used for setting up the stack frame
2165 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2167 offset
= cfun
->machine
->frame
.saved_regs_size
;
2169 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2172 if (frame_size
>= 0x1000000)
2174 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2175 emit_move_insn (op0
, GEN_INT (-frame_size
));
2176 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2177 aarch64_set_frame_expr (gen_rtx_SET
2178 (Pmode
, stack_pointer_rtx
,
2179 plus_constant (Pmode
,
2183 else if (frame_size
> 0)
2185 if ((frame_size
& 0xfff) != frame_size
)
2187 insn
= emit_insn (gen_add2_insn
2189 GEN_INT (-(frame_size
2190 & ~(HOST_WIDE_INT
)0xfff))));
2191 RTX_FRAME_RELATED_P (insn
) = 1;
2193 if ((frame_size
& 0xfff) != 0)
2195 insn
= emit_insn (gen_add2_insn
2197 GEN_INT (-(frame_size
2198 & (HOST_WIDE_INT
)0xfff))));
2199 RTX_FRAME_RELATED_P (insn
) = 1;
2208 /* Save the frame pointer and lr if the frame pointer is needed
2209 first. Make the frame pointer point to the location of the
2210 old frame pointer on the stack. */
2211 if (frame_pointer_needed
)
2217 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2218 GEN_INT (-offset
)));
2219 RTX_FRAME_RELATED_P (insn
) = 1;
2220 aarch64_set_frame_expr (gen_rtx_SET
2221 (Pmode
, stack_pointer_rtx
,
2222 gen_rtx_MINUS (Pmode
,
2224 GEN_INT (offset
))));
2225 mem_fp
= gen_frame_mem (DImode
,
2226 plus_constant (Pmode
,
2229 mem_lr
= gen_frame_mem (DImode
,
2230 plus_constant (Pmode
,
2234 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2235 hard_frame_pointer_rtx
,
2237 gen_rtx_REG (DImode
,
2242 insn
= emit_insn (gen_storewb_pairdi_di
2243 (stack_pointer_rtx
, stack_pointer_rtx
,
2244 hard_frame_pointer_rtx
,
2245 gen_rtx_REG (DImode
, LR_REGNUM
),
2247 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2248 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2251 /* The first part of a frame-related parallel insn is always
2252 assumed to be relevant to the frame calculations;
2253 subsequent parts, are only frame-related if explicitly
2255 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2256 RTX_FRAME_RELATED_P (insn
) = 1;
2258 /* Set up frame pointer to point to the location of the
2259 previous frame pointer on the stack. */
2260 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2262 GEN_INT (fp_offset
)));
2263 aarch64_set_frame_expr (gen_rtx_SET
2264 (Pmode
, hard_frame_pointer_rtx
,
2265 plus_constant (Pmode
,
2268 RTX_FRAME_RELATED_P (insn
) = 1;
2269 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2270 hard_frame_pointer_rtx
));
2274 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2275 GEN_INT (-offset
)));
2276 RTX_FRAME_RELATED_P (insn
) = 1;
2279 aarch64_save_or_restore_callee_save_registers
2280 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2283 /* when offset >= 512,
2284 sub sp, sp, #<outgoing_args_size> */
2285 if (frame_size
> -1)
2287 if (crtl
->outgoing_args_size
> 0)
2289 insn
= emit_insn (gen_add2_insn
2291 GEN_INT (- crtl
->outgoing_args_size
)));
2292 RTX_FRAME_RELATED_P (insn
) = 1;
2297 /* Generate the epilogue instructions for returning from a function. */
2299 aarch64_expand_epilogue (bool for_sibcall
)
2301 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2302 HOST_WIDE_INT fp_offset
;
2306 aarch64_layout_frame ();
2307 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2308 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2309 + crtl
->outgoing_args_size
);
2310 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2311 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2314 - original_frame_size
2315 - cfun
->machine
->frame
.saved_regs_size
);
2317 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2319 /* Store pairs and load pairs have a range only -512 to 504. */
2322 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2324 offset
= cfun
->machine
->frame
.saved_regs_size
;
2326 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2328 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2330 insn
= emit_insn (gen_add2_insn
2332 GEN_INT (crtl
->outgoing_args_size
)));
2333 RTX_FRAME_RELATED_P (insn
) = 1;
2339 /* If there were outgoing arguments or we've done dynamic stack
2340 allocation, then restore the stack pointer from the frame
2341 pointer. This is at most one insn and more efficient than using
2342 GCC's internal mechanism. */
2343 if (frame_pointer_needed
2344 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2346 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2347 hard_frame_pointer_rtx
,
2348 GEN_INT (- fp_offset
)));
2349 RTX_FRAME_RELATED_P (insn
) = 1;
2350 /* As SP is set to (FP - fp_offset), according to the rules in
2351 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2352 from the value of SP from now on. */
2353 cfa_reg
= stack_pointer_rtx
;
2356 aarch64_save_or_restore_callee_save_registers
2357 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2359 /* Restore the frame pointer and lr if the frame pointer is needed. */
2362 if (frame_pointer_needed
)
2368 mem_fp
= gen_frame_mem (DImode
,
2369 plus_constant (Pmode
,
2372 mem_lr
= gen_frame_mem (DImode
,
2373 plus_constant (Pmode
,
2377 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2379 gen_rtx_REG (DImode
,
2385 insn
= emit_insn (gen_loadwb_pairdi_di
2388 hard_frame_pointer_rtx
,
2389 gen_rtx_REG (DImode
, LR_REGNUM
),
2391 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2392 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2393 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2394 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2395 plus_constant (Pmode
, cfa_reg
,
2399 /* The first part of a frame-related parallel insn
2400 is always assumed to be relevant to the frame
2401 calculations; subsequent parts, are only
2402 frame-related if explicitly marked. */
2403 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2404 RTX_FRAME_RELATED_P (insn
) = 1;
2405 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2406 add_reg_note (insn
, REG_CFA_RESTORE
,
2407 gen_rtx_REG (DImode
, LR_REGNUM
));
2411 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2413 RTX_FRAME_RELATED_P (insn
) = 1;
2418 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2420 RTX_FRAME_RELATED_P (insn
) = 1;
2424 /* Stack adjustment for exception handler. */
2425 if (crtl
->calls_eh_return
)
2427 /* We need to unwind the stack by the offset computed by
2428 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2429 based on SP. Ideally we would update the SP and define the
2430 CFA along the lines of:
2432 SP = SP + EH_RETURN_STACKADJ_RTX
2433 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2435 However the dwarf emitter only understands a constant
2438 The solution chosen here is to use the otherwise unused IP0
2439 as a temporary register to hold the current SP value. The
2440 CFA is described using IP0 then SP is modified. */
2442 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2444 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2445 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2446 RTX_FRAME_RELATED_P (insn
) = 1;
2448 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2450 /* Ensure the assignment to IP0 does not get optimized away. */
2454 if (frame_size
> -1)
2456 if (frame_size
>= 0x1000000)
2458 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2459 emit_move_insn (op0
, GEN_INT (frame_size
));
2460 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2461 aarch64_set_frame_expr (gen_rtx_SET
2462 (Pmode
, stack_pointer_rtx
,
2463 plus_constant (Pmode
,
2467 else if (frame_size
> 0)
2469 if ((frame_size
& 0xfff) != 0)
2471 insn
= emit_insn (gen_add2_insn
2473 GEN_INT ((frame_size
2474 & (HOST_WIDE_INT
) 0xfff))));
2475 RTX_FRAME_RELATED_P (insn
) = 1;
2477 if ((frame_size
& 0xfff) != frame_size
)
2479 insn
= emit_insn (gen_add2_insn
2481 GEN_INT ((frame_size
2482 & ~ (HOST_WIDE_INT
) 0xfff))));
2483 RTX_FRAME_RELATED_P (insn
) = 1;
2487 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2488 plus_constant (Pmode
,
2493 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2495 emit_jump_insn (ret_rtx
);
2498 /* Return the place to copy the exception unwinding return address to.
2499 This will probably be a stack slot, but could (in theory be the
2500 return register). */
2502 aarch64_final_eh_return_addr (void)
2504 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2505 aarch64_layout_frame ();
2506 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2507 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2508 + crtl
->outgoing_args_size
);
2509 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2510 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2512 - original_frame_size
2513 - cfun
->machine
->frame
.saved_regs_size
;
2515 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2516 return gen_rtx_REG (DImode
, LR_REGNUM
);
2518 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2519 result in a store to save LR introduced by builtin_eh_return () being
2520 incorrectly deleted because the alias is not detected.
2521 So in the calculation of the address to copy the exception unwinding
2522 return address to, we note 2 cases.
2523 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2524 we return a SP-relative location since all the addresses are SP-relative
2525 in this case. This prevents the store from being optimized away.
2526 If the fp_offset is not 0, then the addresses will be FP-relative and
2527 therefore we return a FP-relative location. */
2529 if (frame_pointer_needed
)
2532 return gen_frame_mem (DImode
,
2533 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2535 return gen_frame_mem (DImode
,
2536 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2539 /* If FP is not needed, we calculate the location of LR, which would be
2540 at the top of the saved registers block. */
2542 return gen_frame_mem (DImode
,
2543 plus_constant (Pmode
,
2546 + cfun
->machine
->frame
.saved_regs_size
2547 - 2 * UNITS_PER_WORD
));
2550 /* Possibly output code to build up a constant in a register. For
2551 the benefit of the costs infrastructure, returns the number of
2552 instructions which would be emitted. GENERATE inhibits or
2553 enables code generation. */
2556 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2560 if (aarch64_bitmask_imm (val
, DImode
))
2563 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2571 HOST_WIDE_INT valp
= val
>> 16;
2575 for (i
= 16; i
< 64; i
+= 16)
2577 valm
= (valp
& 0xffff);
2588 /* zcount contains the number of additional MOVK instructions
2589 required if the constant is built up with an initial MOVZ instruction,
2590 while ncount is the number of MOVK instructions required if starting
2591 with a MOVN instruction. Choose the sequence that yields the fewest
2592 number of instructions, preferring MOVZ instructions when they are both
2594 if (ncount
< zcount
)
2597 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2598 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2605 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2606 GEN_INT (val
& 0xffff));
2613 for (i
= 16; i
< 64; i
+= 16)
2615 if ((val
& 0xffff) != tval
)
2618 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2620 GEN_INT (val
& 0xffff)));
2630 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2632 HOST_WIDE_INT mdelta
= delta
;
2633 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2634 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2639 if (mdelta
>= 4096 * 4096)
2641 (void) aarch64_build_constant (scratchreg
, delta
, true);
2642 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2644 else if (mdelta
> 0)
2648 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2649 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2651 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2652 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2654 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2655 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2657 if (mdelta
% 4096 != 0)
2659 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2660 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2661 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2666 /* Output code to add DELTA to the first argument, and then jump
2667 to FUNCTION. Used for C++ multiple inheritance. */
2669 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2670 HOST_WIDE_INT delta
,
2671 HOST_WIDE_INT vcall_offset
,
2674 /* The this pointer is always in x0. Note that this differs from
2675 Arm where the this pointer maybe bumped to r1 if r0 is required
2676 to return a pointer to an aggregate. On AArch64 a result value
2677 pointer will be in x8. */
2678 int this_regno
= R0_REGNUM
;
2679 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2681 reload_completed
= 1;
2682 emit_note (NOTE_INSN_PROLOGUE_END
);
2684 if (vcall_offset
== 0)
2685 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2688 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2690 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2691 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2692 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2697 if (delta
>= -256 && delta
< 256)
2698 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2699 plus_constant (Pmode
, this_rtx
, delta
));
2701 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2704 if (Pmode
== ptr_mode
)
2705 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2707 aarch64_emit_move (temp0
,
2708 gen_rtx_ZERO_EXTEND (Pmode
,
2709 gen_rtx_MEM (ptr_mode
, addr
)));
2711 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2712 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2715 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2716 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2719 if (Pmode
== ptr_mode
)
2720 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2722 aarch64_emit_move (temp1
,
2723 gen_rtx_SIGN_EXTEND (Pmode
,
2724 gen_rtx_MEM (ptr_mode
, addr
)));
2726 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2729 /* Generate a tail call to the target function. */
2730 if (!TREE_USED (function
))
2732 assemble_external (function
);
2733 TREE_USED (function
) = 1;
2735 funexp
= XEXP (DECL_RTL (function
), 0);
2736 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2737 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2738 SIBLING_CALL_P (insn
) = 1;
2740 insn
= get_insns ();
2741 shorten_branches (insn
);
2742 final_start_function (insn
, file
, 1);
2743 final (insn
, file
, 1);
2744 final_end_function ();
2746 /* Stop pretending to be a post-reload pass. */
2747 reload_completed
= 0;
2751 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2753 if (GET_CODE (*x
) == SYMBOL_REF
)
2754 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2756 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2757 TLS offsets, not real symbol references. */
2758 if (GET_CODE (*x
) == UNSPEC
2759 && XINT (*x
, 1) == UNSPEC_TLS
)
2766 aarch64_tls_referenced_p (rtx x
)
2768 if (!TARGET_HAVE_TLS
)
2771 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2776 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2778 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2779 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2790 aarch64_build_bitmask_table (void)
2792 unsigned HOST_WIDE_INT mask
, imm
;
2793 unsigned int log_e
, e
, s
, r
;
2794 unsigned int nimms
= 0;
2796 for (log_e
= 1; log_e
<= 6; log_e
++)
2800 mask
= ~(HOST_WIDE_INT
) 0;
2802 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2803 for (s
= 1; s
< e
; s
++)
2805 for (r
= 0; r
< e
; r
++)
2807 /* set s consecutive bits to 1 (s < 64) */
2808 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2809 /* rotate right by r */
2811 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2812 /* replicate the constant depending on SIMD size */
2814 case 1: imm
|= (imm
<< 2);
2815 case 2: imm
|= (imm
<< 4);
2816 case 3: imm
|= (imm
<< 8);
2817 case 4: imm
|= (imm
<< 16);
2818 case 5: imm
|= (imm
<< 32);
2824 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2825 aarch64_bitmasks
[nimms
++] = imm
;
2830 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2831 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2832 aarch64_bitmasks_cmp
);
2836 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2837 a left shift of 0 or 12 bits. */
2839 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2841 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2842 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2847 /* Return true if val is an immediate that can be loaded into a
2848 register by a MOVZ instruction. */
2850 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2852 if (GET_MODE_SIZE (mode
) > 4)
2854 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2855 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2860 /* Ignore sign extension. */
2861 val
&= (HOST_WIDE_INT
) 0xffffffff;
2863 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2864 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2868 /* Return true if val is a valid bitmask immediate. */
2870 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2872 if (GET_MODE_SIZE (mode
) < 8)
2874 /* Replicate bit pattern. */
2875 val
&= (HOST_WIDE_INT
) 0xffffffff;
2878 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2879 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2883 /* Return true if val is an immediate that can be loaded into a
2884 register in a single instruction. */
2886 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2888 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2890 return aarch64_bitmask_imm (val
, mode
);
2894 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2898 if (GET_CODE (x
) == HIGH
)
2901 split_const (x
, &base
, &offset
);
2902 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2904 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2905 != SYMBOL_FORCE_TO_MEM
)
2908 /* Avoid generating a 64-bit relocation in ILP32; leave
2909 to aarch64_expand_mov_immediate to handle it properly. */
2910 return mode
!= ptr_mode
;
2913 return aarch64_tls_referenced_p (x
);
2916 /* Return true if register REGNO is a valid index register.
2917 STRICT_P is true if REG_OK_STRICT is in effect. */
2920 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2922 if (!HARD_REGISTER_NUM_P (regno
))
2930 regno
= reg_renumber
[regno
];
2932 return GP_REGNUM_P (regno
);
2935 /* Return true if register REGNO is a valid base register for mode MODE.
2936 STRICT_P is true if REG_OK_STRICT is in effect. */
2939 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2941 if (!HARD_REGISTER_NUM_P (regno
))
2949 regno
= reg_renumber
[regno
];
2952 /* The fake registers will be eliminated to either the stack or
2953 hard frame pointer, both of which are usually valid base registers.
2954 Reload deals with the cases where the eliminated form isn't valid. */
2955 return (GP_REGNUM_P (regno
)
2956 || regno
== SP_REGNUM
2957 || regno
== FRAME_POINTER_REGNUM
2958 || regno
== ARG_POINTER_REGNUM
);
2961 /* Return true if X is a valid base register for mode MODE.
2962 STRICT_P is true if REG_OK_STRICT is in effect. */
2965 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2967 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2970 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2973 /* Return true if address offset is a valid index. If it is, fill in INFO
2974 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2977 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2978 enum machine_mode mode
, bool strict_p
)
2980 enum aarch64_address_type type
;
2985 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2986 && GET_MODE (x
) == Pmode
)
2988 type
= ADDRESS_REG_REG
;
2992 /* (sign_extend:DI (reg:SI)) */
2993 else if ((GET_CODE (x
) == SIGN_EXTEND
2994 || GET_CODE (x
) == ZERO_EXTEND
)
2995 && GET_MODE (x
) == DImode
2996 && GET_MODE (XEXP (x
, 0)) == SImode
)
2998 type
= (GET_CODE (x
) == SIGN_EXTEND
)
2999 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3000 index
= XEXP (x
, 0);
3003 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3004 else if (GET_CODE (x
) == MULT
3005 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3006 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3007 && GET_MODE (XEXP (x
, 0)) == DImode
3008 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3009 && CONST_INT_P (XEXP (x
, 1)))
3011 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3012 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3013 index
= XEXP (XEXP (x
, 0), 0);
3014 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3016 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3017 else if (GET_CODE (x
) == ASHIFT
3018 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3019 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3020 && GET_MODE (XEXP (x
, 0)) == DImode
3021 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3022 && CONST_INT_P (XEXP (x
, 1)))
3024 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3025 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3026 index
= XEXP (XEXP (x
, 0), 0);
3027 shift
= INTVAL (XEXP (x
, 1));
3029 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3030 else if ((GET_CODE (x
) == SIGN_EXTRACT
3031 || GET_CODE (x
) == ZERO_EXTRACT
)
3032 && GET_MODE (x
) == DImode
3033 && GET_CODE (XEXP (x
, 0)) == MULT
3034 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3035 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3037 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3038 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3039 index
= XEXP (XEXP (x
, 0), 0);
3040 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3041 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3042 || INTVAL (XEXP (x
, 2)) != 0)
3045 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3046 (const_int 0xffffffff<<shift)) */
3047 else if (GET_CODE (x
) == AND
3048 && GET_MODE (x
) == DImode
3049 && GET_CODE (XEXP (x
, 0)) == MULT
3050 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3051 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3052 && CONST_INT_P (XEXP (x
, 1)))
3054 type
= ADDRESS_REG_UXTW
;
3055 index
= XEXP (XEXP (x
, 0), 0);
3056 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3057 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3060 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3061 else if ((GET_CODE (x
) == SIGN_EXTRACT
3062 || GET_CODE (x
) == ZERO_EXTRACT
)
3063 && GET_MODE (x
) == DImode
3064 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3065 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3066 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3068 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3069 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3070 index
= XEXP (XEXP (x
, 0), 0);
3071 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3072 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3073 || INTVAL (XEXP (x
, 2)) != 0)
3076 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3077 (const_int 0xffffffff<<shift)) */
3078 else if (GET_CODE (x
) == AND
3079 && GET_MODE (x
) == DImode
3080 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3081 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3082 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3083 && CONST_INT_P (XEXP (x
, 1)))
3085 type
= ADDRESS_REG_UXTW
;
3086 index
= XEXP (XEXP (x
, 0), 0);
3087 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3088 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3091 /* (mult:P (reg:P) (const_int scale)) */
3092 else if (GET_CODE (x
) == MULT
3093 && GET_MODE (x
) == Pmode
3094 && GET_MODE (XEXP (x
, 0)) == Pmode
3095 && CONST_INT_P (XEXP (x
, 1)))
3097 type
= ADDRESS_REG_REG
;
3098 index
= XEXP (x
, 0);
3099 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3101 /* (ashift:P (reg:P) (const_int shift)) */
3102 else if (GET_CODE (x
) == ASHIFT
3103 && GET_MODE (x
) == Pmode
3104 && GET_MODE (XEXP (x
, 0)) == Pmode
3105 && CONST_INT_P (XEXP (x
, 1)))
3107 type
= ADDRESS_REG_REG
;
3108 index
= XEXP (x
, 0);
3109 shift
= INTVAL (XEXP (x
, 1));
3114 if (GET_CODE (index
) == SUBREG
)
3115 index
= SUBREG_REG (index
);
3118 (shift
> 0 && shift
<= 3
3119 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3121 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3124 info
->offset
= index
;
3125 info
->shift
= shift
;
3133 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3135 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3136 && offset
< 64 * GET_MODE_SIZE (mode
)
3137 && offset
% GET_MODE_SIZE (mode
) == 0);
3141 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3142 HOST_WIDE_INT offset
)
3144 return offset
>= -256 && offset
< 256;
3148 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3151 && offset
< 4096 * GET_MODE_SIZE (mode
)
3152 && offset
% GET_MODE_SIZE (mode
) == 0);
3155 /* Return true if X is a valid address for machine mode MODE. If it is,
3156 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3157 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3160 aarch64_classify_address (struct aarch64_address_info
*info
,
3161 rtx x
, enum machine_mode mode
,
3162 RTX_CODE outer_code
, bool strict_p
)
3164 enum rtx_code code
= GET_CODE (x
);
3166 bool allow_reg_index_p
=
3167 outer_code
!= PARALLEL
&& GET_MODE_SIZE(mode
) != 16;
3169 /* Don't support anything other than POST_INC or REG addressing for
3171 if (aarch64_vector_mode_p (mode
)
3172 && (code
!= POST_INC
&& code
!= REG
))
3179 info
->type
= ADDRESS_REG_IMM
;
3181 info
->offset
= const0_rtx
;
3182 return aarch64_base_register_rtx_p (x
, strict_p
);
3187 if (GET_MODE_SIZE (mode
) != 0
3188 && CONST_INT_P (op1
)
3189 && aarch64_base_register_rtx_p (op0
, strict_p
))
3191 HOST_WIDE_INT offset
= INTVAL (op1
);
3193 info
->type
= ADDRESS_REG_IMM
;
3197 /* TImode and TFmode values are allowed in both pairs of X
3198 registers and individual Q registers. The available
3200 X,X: 7-bit signed scaled offset
3201 Q: 9-bit signed offset
3202 We conservatively require an offset representable in either mode.
3204 if (mode
== TImode
|| mode
== TFmode
)
3205 return (offset_7bit_signed_scaled_p (mode
, offset
)
3206 && offset_9bit_signed_unscaled_p (mode
, offset
));
3208 if (outer_code
== PARALLEL
)
3209 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3210 && offset_7bit_signed_scaled_p (mode
, offset
));
3212 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3213 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3216 if (allow_reg_index_p
)
3218 /* Look for base + (scaled/extended) index register. */
3219 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3220 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3225 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3226 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3239 info
->type
= ADDRESS_REG_WB
;
3240 info
->base
= XEXP (x
, 0);
3241 info
->offset
= NULL_RTX
;
3242 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3246 info
->type
= ADDRESS_REG_WB
;
3247 info
->base
= XEXP (x
, 0);
3248 if (GET_CODE (XEXP (x
, 1)) == PLUS
3249 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3250 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3251 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3253 HOST_WIDE_INT offset
;
3254 info
->offset
= XEXP (XEXP (x
, 1), 1);
3255 offset
= INTVAL (info
->offset
);
3257 /* TImode and TFmode values are allowed in both pairs of X
3258 registers and individual Q registers. The available
3260 X,X: 7-bit signed scaled offset
3261 Q: 9-bit signed offset
3262 We conservatively require an offset representable in either mode.
3264 if (mode
== TImode
|| mode
== TFmode
)
3265 return (offset_7bit_signed_scaled_p (mode
, offset
)
3266 && offset_9bit_signed_unscaled_p (mode
, offset
));
3268 if (outer_code
== PARALLEL
)
3269 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3270 && offset_7bit_signed_scaled_p (mode
, offset
));
3272 return offset_9bit_signed_unscaled_p (mode
, offset
);
3279 /* load literal: pc-relative constant pool entry. Only supported
3280 for SI mode or larger. */
3281 info
->type
= ADDRESS_SYMBOLIC
;
3282 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3286 split_const (x
, &sym
, &addend
);
3287 return (GET_CODE (sym
) == LABEL_REF
3288 || (GET_CODE (sym
) == SYMBOL_REF
3289 && CONSTANT_POOL_ADDRESS_P (sym
)));
3294 info
->type
= ADDRESS_LO_SUM
;
3295 info
->base
= XEXP (x
, 0);
3296 info
->offset
= XEXP (x
, 1);
3297 if (allow_reg_index_p
3298 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3301 split_const (info
->offset
, &sym
, &offs
);
3302 if (GET_CODE (sym
) == SYMBOL_REF
3303 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3304 == SYMBOL_SMALL_ABSOLUTE
))
3306 /* The symbol and offset must be aligned to the access size. */
3308 unsigned int ref_size
;
3310 if (CONSTANT_POOL_ADDRESS_P (sym
))
3311 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3312 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3314 tree exp
= SYMBOL_REF_DECL (sym
);
3315 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3316 align
= CONSTANT_ALIGNMENT (exp
, align
);
3318 else if (SYMBOL_REF_DECL (sym
))
3319 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3320 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3321 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3322 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3324 align
= BITS_PER_UNIT
;
3326 ref_size
= GET_MODE_SIZE (mode
);
3328 ref_size
= GET_MODE_SIZE (DImode
);
3330 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3331 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3342 aarch64_symbolic_address_p (rtx x
)
3346 split_const (x
, &x
, &offset
);
3347 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3350 /* Classify the base of symbolic expression X, given that X appears in
3353 enum aarch64_symbol_type
3354 aarch64_classify_symbolic_expression (rtx x
,
3355 enum aarch64_symbol_context context
)
3359 split_const (x
, &x
, &offset
);
3360 return aarch64_classify_symbol (x
, context
);
3364 /* Return TRUE if X is a legitimate address for accessing memory in
3367 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3369 struct aarch64_address_info addr
;
3371 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3374 /* Return TRUE if X is a legitimate address for accessing memory in
3375 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3378 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3379 RTX_CODE outer_code
, bool strict_p
)
3381 struct aarch64_address_info addr
;
3383 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3386 /* Return TRUE if rtx X is immediate constant 0.0 */
3388 aarch64_float_const_zero_rtx_p (rtx x
)
3392 if (GET_MODE (x
) == VOIDmode
)
3395 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3396 if (REAL_VALUE_MINUS_ZERO (r
))
3397 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3398 return REAL_VALUES_EQUAL (r
, dconst0
);
3401 /* Return the fixed registers used for condition codes. */
3404 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3407 *p2
= INVALID_REGNUM
;
3412 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3414 /* All floating point compares return CCFP if it is an equality
3415 comparison, and CCFPE otherwise. */
3416 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3443 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3445 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3446 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3447 || GET_CODE (x
) == NEG
))
3450 /* A compare with a shifted operand. Because of canonicalization,
3451 the comparison will have to be swapped when we emit the assembly
3453 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3454 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3455 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3456 || GET_CODE (x
) == LSHIFTRT
3457 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3460 /* Similarly for a negated operand, but we can only do this for
3462 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3463 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3464 && (code
== EQ
|| code
== NE
)
3465 && GET_CODE (x
) == NEG
)
3468 /* A compare of a mode narrower than SI mode against zero can be done
3469 by extending the value in the comparison. */
3470 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3472 /* Only use sign-extension if we really need it. */
3473 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3474 ? CC_SESWPmode
: CC_ZESWPmode
);
3476 /* For everything else, return CCmode. */
3481 aarch64_get_condition_code (rtx x
)
3483 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3484 enum rtx_code comp_code
= GET_CODE (x
);
3486 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3487 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3495 case GE
: return AARCH64_GE
;
3496 case GT
: return AARCH64_GT
;
3497 case LE
: return AARCH64_LS
;
3498 case LT
: return AARCH64_MI
;
3499 case NE
: return AARCH64_NE
;
3500 case EQ
: return AARCH64_EQ
;
3501 case ORDERED
: return AARCH64_VC
;
3502 case UNORDERED
: return AARCH64_VS
;
3503 case UNLT
: return AARCH64_LT
;
3504 case UNLE
: return AARCH64_LE
;
3505 case UNGT
: return AARCH64_HI
;
3506 case UNGE
: return AARCH64_PL
;
3507 default: gcc_unreachable ();
3514 case NE
: return AARCH64_NE
;
3515 case EQ
: return AARCH64_EQ
;
3516 case GE
: return AARCH64_GE
;
3517 case GT
: return AARCH64_GT
;
3518 case LE
: return AARCH64_LE
;
3519 case LT
: return AARCH64_LT
;
3520 case GEU
: return AARCH64_CS
;
3521 case GTU
: return AARCH64_HI
;
3522 case LEU
: return AARCH64_LS
;
3523 case LTU
: return AARCH64_CC
;
3524 default: gcc_unreachable ();
3533 case NE
: return AARCH64_NE
;
3534 case EQ
: return AARCH64_EQ
;
3535 case GE
: return AARCH64_LE
;
3536 case GT
: return AARCH64_LT
;
3537 case LE
: return AARCH64_GE
;
3538 case LT
: return AARCH64_GT
;
3539 case GEU
: return AARCH64_LS
;
3540 case GTU
: return AARCH64_CC
;
3541 case LEU
: return AARCH64_CS
;
3542 case LTU
: return AARCH64_HI
;
3543 default: gcc_unreachable ();
3550 case NE
: return AARCH64_NE
;
3551 case EQ
: return AARCH64_EQ
;
3552 case GE
: return AARCH64_PL
;
3553 case LT
: return AARCH64_MI
;
3554 default: gcc_unreachable ();
3561 case NE
: return AARCH64_NE
;
3562 case EQ
: return AARCH64_EQ
;
3563 default: gcc_unreachable ();
3574 bit_count (unsigned HOST_WIDE_INT value
)
3588 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3592 /* An integer or symbol address without a preceding # sign. */
3594 switch (GET_CODE (x
))
3597 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3601 output_addr_const (f
, x
);
3605 if (GET_CODE (XEXP (x
, 0)) == PLUS
3606 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3608 output_addr_const (f
, x
);
3614 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3619 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3623 if (GET_CODE (x
) != CONST_INT
3624 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3626 output_operand_lossage ("invalid operand for '%%%c'", code
);
3642 output_operand_lossage ("invalid operand for '%%%c'", code
);
3652 /* Print N such that 2^N == X. */
3653 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3655 output_operand_lossage ("invalid operand for '%%%c'", code
);
3659 asm_fprintf (f
, "%d", n
);
3664 /* Print the number of non-zero bits in X (a const_int). */
3665 if (GET_CODE (x
) != CONST_INT
)
3667 output_operand_lossage ("invalid operand for '%%%c'", code
);
3671 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3675 /* Print the higher numbered register of a pair (TImode) of regs. */
3676 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3678 output_operand_lossage ("invalid operand for '%%%c'", code
);
3682 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3686 /* Print a condition (eq, ne, etc). */
3688 /* CONST_TRUE_RTX means always -- that's the default. */
3689 if (x
== const_true_rtx
)
3692 if (!COMPARISON_P (x
))
3694 output_operand_lossage ("invalid operand for '%%%c'", code
);
3698 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3702 /* Print the inverse of a condition (eq <-> ne, etc). */
3704 /* CONST_TRUE_RTX means never -- that's the default. */
3705 if (x
== const_true_rtx
)
3711 if (!COMPARISON_P (x
))
3713 output_operand_lossage ("invalid operand for '%%%c'", code
);
3717 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3718 (aarch64_get_condition_code (x
))], f
);
3726 /* Print a scalar FP/SIMD register name. */
3727 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3729 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3732 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3739 /* Print the first FP/SIMD register name in a list. */
3740 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3742 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3745 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3749 /* Print bottom 16 bits of integer constant in hex. */
3750 if (GET_CODE (x
) != CONST_INT
)
3752 output_operand_lossage ("invalid operand for '%%%c'", code
);
3755 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3760 /* Print a general register name or the zero register (32-bit or
3763 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3765 asm_fprintf (f
, "%czr", code
);
3769 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3771 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3775 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3777 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3784 /* Print a normal operand, if it's a general register, then we
3788 output_operand_lossage ("missing operand");
3792 switch (GET_CODE (x
))
3795 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3799 aarch64_memory_reference_mode
= GET_MODE (x
);
3800 output_address (XEXP (x
, 0));
3805 output_addr_const (asm_out_file
, x
);
3809 asm_fprintf (f
, "%wd", INTVAL (x
));
3813 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3815 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3817 HOST_WIDE_INT_MAX
));
3818 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3820 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3829 /* CONST_DOUBLE can represent a double-width integer.
3830 In this case, the mode of x is VOIDmode. */
3831 if (GET_MODE (x
) == VOIDmode
)
3833 else if (aarch64_float_const_zero_rtx_p (x
))
3838 else if (aarch64_float_const_representable_p (x
))
3841 char float_buf
[buf_size
] = {'\0'};
3843 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3844 real_to_decimal_for_mode (float_buf
, &r
,
3847 asm_fprintf (asm_out_file
, "%s", float_buf
);
3851 output_operand_lossage ("invalid constant");
3854 output_operand_lossage ("invalid operand");
3860 if (GET_CODE (x
) == HIGH
)
3863 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3865 case SYMBOL_SMALL_GOT
:
3866 asm_fprintf (asm_out_file
, ":got:");
3869 case SYMBOL_SMALL_TLSGD
:
3870 asm_fprintf (asm_out_file
, ":tlsgd:");
3873 case SYMBOL_SMALL_TLSDESC
:
3874 asm_fprintf (asm_out_file
, ":tlsdesc:");
3877 case SYMBOL_SMALL_GOTTPREL
:
3878 asm_fprintf (asm_out_file
, ":gottprel:");
3881 case SYMBOL_SMALL_TPREL
:
3882 asm_fprintf (asm_out_file
, ":tprel:");
3885 case SYMBOL_TINY_GOT
:
3892 output_addr_const (asm_out_file
, x
);
3896 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3898 case SYMBOL_SMALL_GOT
:
3899 asm_fprintf (asm_out_file
, ":lo12:");
3902 case SYMBOL_SMALL_TLSGD
:
3903 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3906 case SYMBOL_SMALL_TLSDESC
:
3907 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3910 case SYMBOL_SMALL_GOTTPREL
:
3911 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3914 case SYMBOL_SMALL_TPREL
:
3915 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3918 case SYMBOL_TINY_GOT
:
3919 asm_fprintf (asm_out_file
, ":got:");
3925 output_addr_const (asm_out_file
, x
);
3930 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3932 case SYMBOL_SMALL_TPREL
:
3933 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3938 output_addr_const (asm_out_file
, x
);
3942 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3948 aarch64_print_operand_address (FILE *f
, rtx x
)
3950 struct aarch64_address_info addr
;
3952 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3956 case ADDRESS_REG_IMM
:
3957 if (addr
.offset
== const0_rtx
)
3958 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3960 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
3961 INTVAL (addr
.offset
));
3964 case ADDRESS_REG_REG
:
3965 if (addr
.shift
== 0)
3966 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
3967 reg_names
[REGNO (addr
.offset
)]);
3969 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
3970 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3973 case ADDRESS_REG_UXTW
:
3974 if (addr
.shift
== 0)
3975 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
3976 REGNO (addr
.offset
) - R0_REGNUM
);
3978 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
3979 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3982 case ADDRESS_REG_SXTW
:
3983 if (addr
.shift
== 0)
3984 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
3985 REGNO (addr
.offset
) - R0_REGNUM
);
3987 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
3988 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3991 case ADDRESS_REG_WB
:
3992 switch (GET_CODE (x
))
3995 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
3996 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3999 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4000 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4003 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4004 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4007 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4008 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4011 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4012 INTVAL (addr
.offset
));
4015 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4016 INTVAL (addr
.offset
));
4023 case ADDRESS_LO_SUM
:
4024 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4025 output_addr_const (f
, addr
.offset
);
4026 asm_fprintf (f
, "]");
4029 case ADDRESS_SYMBOLIC
:
4033 output_addr_const (f
, x
);
4037 aarch64_label_mentioned_p (rtx x
)
4042 if (GET_CODE (x
) == LABEL_REF
)
4045 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4046 referencing instruction, but they are constant offsets, not
4048 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4051 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4052 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4058 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4059 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4062 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4069 /* Implement REGNO_REG_CLASS. */
4072 aarch64_regno_regclass (unsigned regno
)
4074 if (GP_REGNUM_P (regno
))
4077 if (regno
== SP_REGNUM
)
4080 if (regno
== FRAME_POINTER_REGNUM
4081 || regno
== ARG_POINTER_REGNUM
)
4082 return POINTER_REGS
;
4084 if (FP_REGNUM_P (regno
))
4085 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4090 /* Try a machine-dependent way of reloading an illegitimate address
4091 operand. If we find one, push the reload and return the new rtx. */
4094 aarch64_legitimize_reload_address (rtx
*x_p
,
4095 enum machine_mode mode
,
4096 int opnum
, int type
,
4097 int ind_levels ATTRIBUTE_UNUSED
)
4101 /* Do not allow mem (plus (reg, const)) if vector mode. */
4102 if (aarch64_vector_mode_p (mode
)
4103 && GET_CODE (x
) == PLUS
4104 && REG_P (XEXP (x
, 0))
4105 && CONST_INT_P (XEXP (x
, 1)))
4109 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4110 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4111 opnum
, (enum reload_type
) type
);
4115 /* We must recognize output that we have already generated ourselves. */
4116 if (GET_CODE (x
) == PLUS
4117 && GET_CODE (XEXP (x
, 0)) == PLUS
4118 && REG_P (XEXP (XEXP (x
, 0), 0))
4119 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4120 && CONST_INT_P (XEXP (x
, 1)))
4122 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4123 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4124 opnum
, (enum reload_type
) type
);
4128 /* We wish to handle large displacements off a base register by splitting
4129 the addend across an add and the mem insn. This can cut the number of
4130 extra insns needed from 3 to 1. It is only useful for load/store of a
4131 single register with 12 bit offset field. */
4132 if (GET_CODE (x
) == PLUS
4133 && REG_P (XEXP (x
, 0))
4134 && CONST_INT_P (XEXP (x
, 1))
4135 && HARD_REGISTER_P (XEXP (x
, 0))
4138 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4140 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4141 HOST_WIDE_INT low
= val
& 0xfff;
4142 HOST_WIDE_INT high
= val
- low
;
4145 enum machine_mode xmode
= GET_MODE (x
);
4147 /* In ILP32, xmode can be either DImode or SImode. */
4148 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4150 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4151 BLKmode alignment. */
4152 if (GET_MODE_SIZE (mode
) == 0)
4155 offs
= low
% GET_MODE_SIZE (mode
);
4157 /* Align misaligned offset by adjusting high part to compensate. */
4160 if (aarch64_uimm12_shift (high
+ offs
))
4169 offs
= GET_MODE_SIZE (mode
) - offs
;
4171 high
= high
+ (low
& 0x1000) - offs
;
4176 /* Check for overflow. */
4177 if (high
+ low
!= val
)
4180 cst
= GEN_INT (high
);
4181 if (!aarch64_uimm12_shift (high
))
4182 cst
= force_const_mem (xmode
, cst
);
4184 /* Reload high part into base reg, leaving the low part
4185 in the mem instruction.
4186 Note that replacing this gen_rtx_PLUS with plus_constant is
4187 wrong in this case because we rely on the
4188 (plus (plus reg c1) c2) structure being preserved so that
4189 XEXP (*p, 0) in push_reload below uses the correct term. */
4190 x
= gen_rtx_PLUS (xmode
,
4191 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4194 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4195 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4196 opnum
, (enum reload_type
) type
);
4205 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4207 enum machine_mode mode
,
4208 secondary_reload_info
*sri
)
4210 /* Without the TARGET_SIMD instructions we cannot move a Q register
4211 to a Q register directly. We need a scratch. */
4212 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4213 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4214 && reg_class_subset_p (rclass
, FP_REGS
))
4217 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4218 else if (mode
== TImode
)
4219 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4223 /* A TFmode or TImode memory access should be handled via an FP_REGS
4224 because AArch64 has richer addressing modes for LDR/STR instructions
4225 than LDP/STP instructions. */
4226 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4227 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4230 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4237 aarch64_can_eliminate (const int from
, const int to
)
4239 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4240 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4242 if (frame_pointer_needed
)
4244 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4246 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4248 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4249 && !cfun
->calls_alloca
)
4251 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4261 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4263 HOST_WIDE_INT frame_size
;
4264 HOST_WIDE_INT offset
;
4266 aarch64_layout_frame ();
4267 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4268 + crtl
->outgoing_args_size
4269 + cfun
->machine
->saved_varargs_size
);
4271 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4272 offset
= frame_size
;
4274 if (to
== HARD_FRAME_POINTER_REGNUM
)
4276 if (from
== ARG_POINTER_REGNUM
)
4277 return offset
- crtl
->outgoing_args_size
;
4279 if (from
== FRAME_POINTER_REGNUM
)
4280 return cfun
->machine
->frame
.saved_regs_size
+ get_frame_size ();
4283 if (to
== STACK_POINTER_REGNUM
)
4285 if (from
== FRAME_POINTER_REGNUM
)
4287 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4288 + cfun
->machine
->frame
.saved_regs_size
4289 + get_frame_size ();
4290 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4299 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4303 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4307 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4312 aarch64_asm_trampoline_template (FILE *f
)
4316 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4317 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4321 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4322 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4324 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4325 assemble_aligned_integer (4, const0_rtx
);
4326 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4327 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4331 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4333 rtx fnaddr
, mem
, a_tramp
;
4334 const int tramp_code_sz
= 16;
4336 /* Don't need to copy the trailing D-words, we fill those in below. */
4337 emit_block_move (m_tramp
, assemble_trampoline_template (),
4338 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4339 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4340 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4341 if (GET_MODE (fnaddr
) != ptr_mode
)
4342 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4343 emit_move_insn (mem
, fnaddr
);
4345 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4346 emit_move_insn (mem
, chain_value
);
4348 /* XXX We should really define a "clear_cache" pattern and use
4349 gen_clear_cache(). */
4350 a_tramp
= XEXP (m_tramp
, 0);
4351 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4352 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4353 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4357 static unsigned char
4358 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4369 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4370 (GET_MODE_SIZE (mode
) + 7) / 8;
4384 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4386 if (regclass
== POINTER_REGS
)
4387 return GENERAL_REGS
;
4389 if (regclass
== STACK_REG
)
4392 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4398 /* If it's an integer immediate that MOVI can't handle, then
4399 FP_REGS is not an option, so we return NO_REGS instead. */
4400 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4401 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4404 /* Register eliminiation can result in a request for
4405 SP+constant->FP_REGS. We cannot support such operations which
4406 use SP as source and an FP_REG as destination, so reject out
4408 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4410 rtx lhs
= XEXP (x
, 0);
4412 /* Look through a possible SUBREG introduced by ILP32. */
4413 if (GET_CODE (lhs
) == SUBREG
)
4414 lhs
= SUBREG_REG (lhs
);
4416 gcc_assert (REG_P (lhs
));
4417 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4426 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4428 asm_fprintf (f
, "%U%s", name
);
4432 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4434 if (priority
== DEFAULT_INIT_PRIORITY
)
4435 default_ctor_section_asm_out_constructor (symbol
, priority
);
4440 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4441 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4442 switch_to_section (s
);
4443 assemble_align (POINTER_SIZE
);
4444 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4449 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4451 if (priority
== DEFAULT_INIT_PRIORITY
)
4452 default_dtor_section_asm_out_destructor (symbol
, priority
);
4457 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4458 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4459 switch_to_section (s
);
4460 assemble_align (POINTER_SIZE
);
4461 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4466 aarch64_output_casesi (rtx
*operands
)
4470 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
4472 static const char *const patterns
[4][2] =
4475 "ldrb\t%w3, [%0,%w1,uxtw]",
4476 "add\t%3, %4, %w3, sxtb #2"
4479 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4480 "add\t%3, %4, %w3, sxth #2"
4483 "ldr\t%w3, [%0,%w1,uxtw #2]",
4484 "add\t%3, %4, %w3, sxtw #2"
4486 /* We assume that DImode is only generated when not optimizing and
4487 that we don't really need 64-bit address offsets. That would
4488 imply an object file with 8GB of code in a single function! */
4490 "ldr\t%w3, [%0,%w1,uxtw #2]",
4491 "add\t%3, %4, %w3, sxtw #2"
4495 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4497 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4499 gcc_assert (index
>= 0 && index
<= 3);
4501 /* Need to implement table size reduction, by chaning the code below. */
4502 output_asm_insn (patterns
[index
][0], operands
);
4503 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4504 snprintf (buf
, sizeof (buf
),
4505 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4506 output_asm_insn (buf
, operands
);
4507 output_asm_insn (patterns
[index
][1], operands
);
4508 output_asm_insn ("br\t%3", operands
);
4509 assemble_label (asm_out_file
, label
);
4514 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4515 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4519 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4521 if (shift
>= 0 && shift
<= 3)
4524 for (size
= 8; size
<= 32; size
*= 2)
4526 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4527 if (mask
== bits
<< shift
)
4535 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4536 const_rtx x ATTRIBUTE_UNUSED
)
4538 /* We can't use blocks for constants when we're using a per-function
4544 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4545 rtx x ATTRIBUTE_UNUSED
,
4546 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4548 /* Force all constant pool entries into the current function section. */
4549 return function_section (current_function_decl
);
4555 /* Helper function for rtx cost calculation. Strip a shift expression
4556 from X. Returns the inner operand if successful, or the original
4557 expression on failure. */
4559 aarch64_strip_shift (rtx x
)
4563 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4564 we can convert both to ROR during final output. */
4565 if ((GET_CODE (op
) == ASHIFT
4566 || GET_CODE (op
) == ASHIFTRT
4567 || GET_CODE (op
) == LSHIFTRT
4568 || GET_CODE (op
) == ROTATERT
4569 || GET_CODE (op
) == ROTATE
)
4570 && CONST_INT_P (XEXP (op
, 1)))
4571 return XEXP (op
, 0);
4573 if (GET_CODE (op
) == MULT
4574 && CONST_INT_P (XEXP (op
, 1))
4575 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4576 return XEXP (op
, 0);
4581 /* Helper function for rtx cost calculation. Strip an extend
4582 expression from X. Returns the inner operand if successful, or the
4583 original expression on failure. We deal with a number of possible
4584 canonicalization variations here. */
4586 aarch64_strip_extend (rtx x
)
4590 /* Zero and sign extraction of a widened value. */
4591 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4592 && XEXP (op
, 2) == const0_rtx
4593 && GET_CODE (XEXP (op
, 0)) == MULT
4594 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4596 return XEXP (XEXP (op
, 0), 0);
4598 /* It can also be represented (for zero-extend) as an AND with an
4600 if (GET_CODE (op
) == AND
4601 && GET_CODE (XEXP (op
, 0)) == MULT
4602 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4603 && CONST_INT_P (XEXP (op
, 1))
4604 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4605 INTVAL (XEXP (op
, 1))) != 0)
4606 return XEXP (XEXP (op
, 0), 0);
4608 /* Now handle extended register, as this may also have an optional
4609 left shift by 1..4. */
4610 if (GET_CODE (op
) == ASHIFT
4611 && CONST_INT_P (XEXP (op
, 1))
4612 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4615 if (GET_CODE (op
) == ZERO_EXTEND
4616 || GET_CODE (op
) == SIGN_EXTEND
)
4625 /* Helper function for rtx cost calculation. Calculate the cost of
4626 a MULT, which may be part of a multiply-accumulate rtx. Return
4627 the calculated cost of the expression, recursing manually in to
4628 operands where needed. */
4631 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
4634 const struct cpu_cost_table
*extra_cost
4635 = aarch64_tune_params
->insn_extra_cost
;
4637 bool maybe_fma
= (outer
== PLUS
|| outer
== MINUS
);
4638 enum machine_mode mode
= GET_MODE (x
);
4640 gcc_checking_assert (code
== MULT
);
4645 if (VECTOR_MODE_P (mode
))
4646 mode
= GET_MODE_INNER (mode
);
4648 /* Integer multiply/fma. */
4649 if (GET_MODE_CLASS (mode
) == MODE_INT
)
4651 /* The multiply will be canonicalized as a shift, cost it as such. */
4652 if (CONST_INT_P (op1
)
4653 && exact_log2 (INTVAL (op1
)) > 0)
4658 /* ADD (shifted register). */
4659 cost
+= extra_cost
->alu
.arith_shift
;
4661 /* LSL (immediate). */
4662 cost
+= extra_cost
->alu
.shift
;
4665 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
4670 /* Integer multiplies or FMAs have zero/sign extending variants. */
4671 if ((GET_CODE (op0
) == ZERO_EXTEND
4672 && GET_CODE (op1
) == ZERO_EXTEND
)
4673 || (GET_CODE (op0
) == SIGN_EXTEND
4674 && GET_CODE (op1
) == SIGN_EXTEND
))
4676 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4677 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
4682 /* MADD/SMADDL/UMADDL. */
4683 cost
+= extra_cost
->mult
[0].extend_add
;
4685 /* MUL/SMULL/UMULL. */
4686 cost
+= extra_cost
->mult
[0].extend
;
4692 /* This is either an integer multiply or an FMA. In both cases
4693 we want to recurse and cost the operands. */
4694 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4695 + rtx_cost (op1
, MULT
, 1, speed
);
4701 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
4704 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
4713 /* Floating-point FMA can also support negations of the
4715 if (GET_CODE (op0
) == NEG
)
4718 op0
= XEXP (op0
, 0);
4720 if (GET_CODE (op1
) == NEG
)
4723 op1
= XEXP (op1
, 0);
4727 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4728 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
4731 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
4734 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
4735 + rtx_cost (op1
, MULT
, 1, speed
);
4741 aarch64_address_cost (rtx x
,
4742 enum machine_mode mode
,
4743 addr_space_t as ATTRIBUTE_UNUSED
,
4746 enum rtx_code c
= GET_CODE (x
);
4747 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4748 struct aarch64_address_info info
;
4752 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
4754 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
4756 /* This is a CONST or SYMBOL ref which will be split
4757 in a different way depending on the code model in use.
4758 Cost it through the generic infrastructure. */
4759 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
4760 /* Divide through by the cost of one instruction to
4761 bring it to the same units as the address costs. */
4762 cost_symbol_ref
/= COSTS_N_INSNS (1);
4763 /* The cost is then the cost of preparing the address,
4764 followed by an immediate (possibly 0) offset. */
4765 return cost_symbol_ref
+ addr_cost
->imm_offset
;
4769 /* This is most likely a jump table from a case
4771 return addr_cost
->register_offset
;
4777 case ADDRESS_LO_SUM
:
4778 case ADDRESS_SYMBOLIC
:
4779 case ADDRESS_REG_IMM
:
4780 cost
+= addr_cost
->imm_offset
;
4783 case ADDRESS_REG_WB
:
4784 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4785 cost
+= addr_cost
->pre_modify
;
4786 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4787 cost
+= addr_cost
->post_modify
;
4793 case ADDRESS_REG_REG
:
4794 cost
+= addr_cost
->register_offset
;
4797 case ADDRESS_REG_UXTW
:
4798 case ADDRESS_REG_SXTW
:
4799 cost
+= addr_cost
->register_extend
;
4809 /* For the sake of calculating the cost of the shifted register
4810 component, we can treat same sized modes in the same way. */
4811 switch (GET_MODE_BITSIZE (mode
))
4814 cost
+= addr_cost
->addr_scale_costs
.hi
;
4818 cost
+= addr_cost
->addr_scale_costs
.si
;
4822 cost
+= addr_cost
->addr_scale_costs
.di
;
4825 /* We can't tell, or this is a 128-bit vector. */
4827 cost
+= addr_cost
->addr_scale_costs
.ti
;
4835 /* Calculate the cost of calculating X, storing it in *COST. Result
4836 is true if the total cost of the operation has now been calculated. */
4838 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4839 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4842 const struct cpu_cost_table
*extra_cost
4843 = aarch64_tune_params
->insn_extra_cost
;
4844 enum machine_mode mode
= GET_MODE (x
);
4846 /* By default, assume that everything has equivalent cost to the
4847 cheapest instruction. Any additional costs are applied as a delta
4848 above this default. */
4849 *cost
= COSTS_N_INSNS (1);
4851 /* TODO: The cost infrastructure currently does not handle
4852 vector operations. Assume that all vector operations
4853 are equally expensive. */
4854 if (VECTOR_MODE_P (mode
))
4857 *cost
+= extra_cost
->vect
.alu
;
4864 /* The cost depends entirely on the operands to SET. */
4869 switch (GET_CODE (op0
))
4873 *cost
+= extra_cost
->ldst
.store
;
4875 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4879 if (! REG_P (SUBREG_REG (op0
)))
4880 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4884 /* const0_rtx is in general free, but we will use an
4885 instruction to set a register to 0. */
4886 if (REG_P (op1
) || op1
== const0_rtx
)
4888 /* The cost is 1 per register copied. */
4889 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
4891 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
4894 /* Cost is just the cost of the RHS of the set. */
4895 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4900 /* Bit-field insertion. Strip any redundant widening of
4901 the RHS to meet the width of the target. */
4902 if (GET_CODE (op1
) == SUBREG
)
4903 op1
= SUBREG_REG (op1
);
4904 if ((GET_CODE (op1
) == ZERO_EXTEND
4905 || GET_CODE (op1
) == SIGN_EXTEND
)
4906 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
4907 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
4908 >= INTVAL (XEXP (op0
, 1))))
4909 op1
= XEXP (op1
, 0);
4911 if (CONST_INT_P (op1
))
4913 /* MOV immediate is assumed to always be cheap. */
4914 *cost
= COSTS_N_INSNS (1);
4920 *cost
+= extra_cost
->alu
.bfi
;
4921 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
4927 /* We can't make sense of this, assume default cost. */
4928 *cost
= COSTS_N_INSNS (1);
4934 /* If an instruction can incorporate a constant within the
4935 instruction, the instruction's expression avoids calling
4936 rtx_cost() on the constant. If rtx_cost() is called on a
4937 constant, then it is usually because the constant must be
4938 moved into a register by one or more instructions.
4940 The exception is constant 0, which can be expressed
4941 as XZR/WZR and is therefore free. The exception to this is
4942 if we have (set (reg) (const0_rtx)) in which case we must cost
4943 the move. However, we can catch that when we cost the SET, so
4944 we don't need to consider that here. */
4945 if (x
== const0_rtx
)
4949 /* To an approximation, building any other constant is
4950 proportionally expensive to the number of instructions
4951 required to build that constant. This is true whether we
4952 are compiling for SPEED or otherwise. */
4953 *cost
= COSTS_N_INSNS (aarch64_build_constant (0,
4962 /* mov[df,sf]_aarch64. */
4963 if (aarch64_float_const_representable_p (x
))
4964 /* FMOV (scalar immediate). */
4965 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
4966 else if (!aarch64_float_const_zero_rtx_p (x
))
4968 /* This will be a load from memory. */
4970 *cost
+= extra_cost
->ldst
.loadd
;
4972 *cost
+= extra_cost
->ldst
.loadf
;
4975 /* Otherwise this is +0.0. We get this using MOVI d0, #0
4976 or MOV v0.s[0], wzr - neither of which are modeled by the
4977 cost tables. Just use the default cost. */
4986 *cost
+= extra_cost
->ldst
.load
;
4993 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4995 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
4996 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
4999 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5003 /* Cost this as SUB wzr, X. */
5004 op0
= CONST0_RTX (GET_MODE (x
));
5009 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5011 /* Support (neg(fma...)) as a single instruction only if
5012 sign of zeros is unimportant. This matches the decision
5013 making in aarch64.md. */
5014 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5017 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5022 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5032 if (op1
== const0_rtx
5033 && GET_CODE (op0
) == AND
)
5039 /* Comparisons can work if the order is swapped.
5040 Canonicalization puts the more complex operation first, but
5041 we want it in op1. */
5043 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5056 /* Detect valid immediates. */
5057 if ((GET_MODE_CLASS (mode
) == MODE_INT
5058 || (GET_MODE_CLASS (mode
) == MODE_CC
5059 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5060 && CONST_INT_P (op1
)
5061 && aarch64_uimm12_shift (INTVAL (op1
)))
5063 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5066 /* SUB(S) (immediate). */
5067 *cost
+= extra_cost
->alu
.arith
;
5072 rtx new_op1
= aarch64_strip_extend (op1
);
5074 /* Cost this as an FMA-alike operation. */
5075 if ((GET_CODE (new_op1
) == MULT
5076 || GET_CODE (new_op1
) == ASHIFT
)
5079 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5080 (enum rtx_code
) code
,
5082 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5086 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
5090 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5092 *cost
+= extra_cost
->alu
.arith
;
5093 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5095 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5107 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5108 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5111 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
5112 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5116 if (GET_MODE_CLASS (mode
) == MODE_INT
5117 && CONST_INT_P (op1
)
5118 && aarch64_uimm12_shift (INTVAL (op1
)))
5120 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
5123 /* ADD (immediate). */
5124 *cost
+= extra_cost
->alu
.arith
;
5128 /* Strip any extend, leave shifts behind as we will
5129 cost them through mult_cost. */
5130 new_op0
= aarch64_strip_extend (op0
);
5132 if (GET_CODE (new_op0
) == MULT
5133 || GET_CODE (new_op0
) == ASHIFT
)
5135 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
5137 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5141 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
5142 + rtx_cost (op1
, PLUS
, 1, speed
));
5146 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5148 *cost
+= extra_cost
->alu
.arith
;
5149 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5151 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5157 *cost
= COSTS_N_INSNS (1);
5160 *cost
+= extra_cost
->alu
.rev
;
5165 if (aarch_rev16_p (x
))
5167 *cost
= COSTS_N_INSNS (1);
5170 *cost
+= extra_cost
->alu
.rev
;
5181 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5183 if (CONST_INT_P (op1
)
5184 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
5186 *cost
+= rtx_cost (op0
, AND
, 0, speed
);
5190 if (GET_CODE (op0
) == NOT
)
5191 op0
= XEXP (op0
, 0);
5192 op0
= aarch64_strip_shift (op0
);
5193 *cost
+= (rtx_cost (op0
, AND
, 0, speed
)
5194 + rtx_cost (op1
, AND
, 1, speed
));
5201 if ((GET_MODE (x
) == DImode
5202 && GET_MODE (XEXP (x
, 0)) == SImode
)
5203 || GET_CODE (XEXP (x
, 0)) == MEM
)
5205 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
5211 if (GET_CODE (XEXP (x
, 0)) == MEM
)
5213 *cost
+= rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed
);
5219 if (!CONST_INT_P (XEXP (x
, 1)))
5220 *cost
+= COSTS_N_INSNS (2);
5227 /* Shifting by a register often takes an extra cycle. */
5228 if (speed
&& !CONST_INT_P (XEXP (x
, 1)))
5229 *cost
+= extra_cost
->alu
.arith_shift_reg
;
5231 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed
);
5235 if (!CONSTANT_P (XEXP (x
, 0)))
5236 *cost
+= rtx_cost (XEXP (x
, 0), HIGH
, 0, speed
);
5240 if (!CONSTANT_P (XEXP (x
, 1)))
5241 *cost
+= rtx_cost (XEXP (x
, 1), LO_SUM
, 1, speed
);
5242 *cost
+= rtx_cost (XEXP (x
, 0), LO_SUM
, 0, speed
);
5247 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTRACT
, 0, speed
);
5251 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
5252 /* aarch64_rtx_mult_cost always handles recursion to its
5258 *cost
= COSTS_N_INSNS (2);
5261 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5262 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
5263 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
5264 else if (GET_MODE (x
) == DFmode
)
5265 *cost
+= (extra_cost
->fp
[1].mult
5266 + extra_cost
->fp
[1].div
);
5267 else if (GET_MODE (x
) == SFmode
)
5268 *cost
+= (extra_cost
->fp
[0].mult
5269 + extra_cost
->fp
[0].div
);
5271 return false; /* All arguments need to be in registers. */
5275 *cost
= COSTS_N_INSNS (1);
5278 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5279 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
;
5280 else if (GET_MODE (x
) == DFmode
)
5281 *cost
+= extra_cost
->fp
[1].div
;
5282 else if (GET_MODE (x
) == SFmode
)
5283 *cost
+= extra_cost
->fp
[0].div
;
5285 return false; /* All arguments need to be in registers. */
5293 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5294 calculated for X. This cost is stored in *COST. Returns true
5295 if the total cost of X was calculated. */
5297 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
5298 int param
, int *cost
, bool speed
)
5300 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
5302 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5304 print_rtl_single (dump_file
, x
);
5305 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
5306 speed
? "Hot" : "Cold",
5307 *cost
, result
? "final" : "partial");
5314 aarch64_register_move_cost (enum machine_mode mode
,
5315 reg_class_t from_i
, reg_class_t to_i
)
5317 enum reg_class from
= (enum reg_class
) from_i
;
5318 enum reg_class to
= (enum reg_class
) to_i
;
5319 const struct cpu_regmove_cost
*regmove_cost
5320 = aarch64_tune_params
->regmove_cost
;
5322 /* Moving between GPR and stack cost is the same as GP2GP. */
5323 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
5324 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
5325 return regmove_cost
->GP2GP
;
5327 /* To/From the stack register, we move via the gprs. */
5328 if (to
== STACK_REG
|| from
== STACK_REG
)
5329 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
5330 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
5332 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
5333 return regmove_cost
->GP2GP
;
5334 else if (from
== GENERAL_REGS
)
5335 return regmove_cost
->GP2FP
;
5336 else if (to
== GENERAL_REGS
)
5337 return regmove_cost
->FP2GP
;
5339 /* When AdvSIMD instructions are disabled it is not possible to move
5340 a 128-bit value directly between Q registers. This is handled in
5341 secondary reload. A general register is used as a scratch to move
5342 the upper DI value and the lower DI value is moved directly,
5343 hence the cost is the sum of three moves. */
5344 if (! TARGET_SIMD
&& GET_MODE_SIZE (mode
) == 128)
5345 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
5347 return regmove_cost
->FP2FP
;
5351 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
5352 reg_class_t rclass ATTRIBUTE_UNUSED
,
5353 bool in ATTRIBUTE_UNUSED
)
5355 return aarch64_tune_params
->memmov_cost
;
5358 /* Return the number of instructions that can be issued per cycle. */
5360 aarch64_sched_issue_rate (void)
5362 return aarch64_tune_params
->issue_rate
;
5365 /* Vectorizer cost model target hooks. */
5367 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5369 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
5371 int misalign ATTRIBUTE_UNUSED
)
5375 switch (type_of_cost
)
5378 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
5381 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
5384 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
5387 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
5390 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
5393 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
5396 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
5399 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
5401 case unaligned_load
:
5402 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
5404 case unaligned_store
:
5405 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
5407 case cond_branch_taken
:
5408 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
5410 case cond_branch_not_taken
:
5411 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
5414 case vec_promote_demote
:
5415 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
5418 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
5419 return elements
/ 2 + 1;
5426 /* Implement targetm.vectorize.add_stmt_cost. */
5428 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
5429 struct _stmt_vec_info
*stmt_info
, int misalign
,
5430 enum vect_cost_model_location where
)
5432 unsigned *cost
= (unsigned *) data
;
5433 unsigned retval
= 0;
5435 if (flag_vect_cost_model
)
5437 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
5439 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
5441 /* Statements in an inner loop relative to the loop being
5442 vectorized are weighted more heavily. The value here is
5443 a function (linear for now) of the loop nest level. */
5444 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
5446 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5447 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
5448 unsigned nest_level
= loop_depth (loop
);
5450 count
*= nest_level
;
5453 retval
= (unsigned) (count
* stmt_cost
);
5454 cost
[where
] += retval
;
5460 static void initialize_aarch64_code_model (void);
5462 /* Parse the architecture extension string. */
5465 aarch64_parse_extension (char *str
)
5467 /* The extension string is parsed left to right. */
5468 const struct aarch64_option_extension
*opt
= NULL
;
5470 /* Flag to say whether we are adding or removing an extension. */
5471 int adding_ext
= -1;
5473 while (str
!= NULL
&& *str
!= 0)
5479 ext
= strchr (str
, '+');
5486 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
5497 error ("missing feature modifier after %qs", "+no");
5501 /* Scan over the extensions table trying to find an exact match. */
5502 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
5504 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
5506 /* Add or remove the extension. */
5508 aarch64_isa_flags
|= opt
->flags_on
;
5510 aarch64_isa_flags
&= ~(opt
->flags_off
);
5515 if (opt
->name
== NULL
)
5517 /* Extension not found in list. */
5518 error ("unknown feature modifier %qs", str
);
5528 /* Parse the ARCH string. */
5531 aarch64_parse_arch (void)
5534 const struct processor
*arch
;
5535 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
5538 strcpy (str
, aarch64_arch_string
);
5540 ext
= strchr (str
, '+');
5549 error ("missing arch name in -march=%qs", str
);
5553 /* Loop through the list of supported ARCHs to find a match. */
5554 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
5556 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
5558 selected_arch
= arch
;
5559 aarch64_isa_flags
= selected_arch
->flags
;
5562 selected_cpu
= &all_cores
[selected_arch
->core
];
5566 /* ARCH string contains at least one extension. */
5567 aarch64_parse_extension (ext
);
5570 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
5572 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5573 selected_cpu
->name
, selected_arch
->name
);
5580 /* ARCH name not found in list. */
5581 error ("unknown value %qs for -march", str
);
5585 /* Parse the CPU string. */
5588 aarch64_parse_cpu (void)
5591 const struct processor
*cpu
;
5592 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
5595 strcpy (str
, aarch64_cpu_string
);
5597 ext
= strchr (str
, '+');
5606 error ("missing cpu name in -mcpu=%qs", str
);
5610 /* Loop through the list of supported CPUs to find a match. */
5611 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5613 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
5616 selected_tune
= cpu
;
5617 aarch64_isa_flags
= selected_cpu
->flags
;
5621 /* CPU string contains at least one extension. */
5622 aarch64_parse_extension (ext
);
5629 /* CPU name not found in list. */
5630 error ("unknown value %qs for -mcpu", str
);
5634 /* Parse the TUNE string. */
5637 aarch64_parse_tune (void)
5639 const struct processor
*cpu
;
5640 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
5641 strcpy (str
, aarch64_tune_string
);
5643 /* Loop through the list of supported CPUs to find a match. */
5644 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5646 if (strcmp (cpu
->name
, str
) == 0)
5648 selected_tune
= cpu
;
5653 /* CPU name not found in list. */
5654 error ("unknown value %qs for -mtune", str
);
5659 /* Implement TARGET_OPTION_OVERRIDE. */
5662 aarch64_override_options (void)
5664 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5665 If either of -march or -mtune is given, they override their
5666 respective component of -mcpu.
5668 So, first parse AARCH64_CPU_STRING, then the others, be careful
5669 with -march as, if -mcpu is not present on the command line, march
5670 must set a sensible default CPU. */
5671 if (aarch64_cpu_string
)
5673 aarch64_parse_cpu ();
5676 if (aarch64_arch_string
)
5678 aarch64_parse_arch ();
5681 if (aarch64_tune_string
)
5683 aarch64_parse_tune ();
5686 #ifndef HAVE_AS_MABI_OPTION
5687 /* The compiler may have been configured with 2.23.* binutils, which does
5688 not have support for ILP32. */
5690 error ("Assembler does not support -mabi=ilp32");
5693 initialize_aarch64_code_model ();
5695 aarch64_build_bitmask_table ();
5697 /* This target defaults to strict volatile bitfields. */
5698 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
5699 flag_strict_volatile_bitfields
= 1;
5701 /* If the user did not specify a processor, choose the default
5702 one for them. This will be the CPU set during configuration using
5703 --with-cpu, otherwise it is "generic". */
5706 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
5707 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
5710 gcc_assert (selected_cpu
);
5712 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5714 selected_tune
= &all_cores
[selected_cpu
->core
];
5716 aarch64_tune_flags
= selected_tune
->flags
;
5717 aarch64_tune
= selected_tune
->core
;
5718 aarch64_tune_params
= selected_tune
->tune
;
5720 aarch64_override_options_after_change ();
5723 /* Implement targetm.override_options_after_change. */
5726 aarch64_override_options_after_change (void)
5728 if (flag_omit_frame_pointer
)
5729 flag_omit_leaf_frame_pointer
= false;
5730 else if (flag_omit_leaf_frame_pointer
)
5731 flag_omit_frame_pointer
= true;
5734 static struct machine_function
*
5735 aarch64_init_machine_status (void)
5737 struct machine_function
*machine
;
5738 machine
= ggc_alloc_cleared_machine_function ();
5743 aarch64_init_expanders (void)
5745 init_machine_status
= aarch64_init_machine_status
;
5748 /* A checking mechanism for the implementation of the various code models. */
5750 initialize_aarch64_code_model (void)
5754 switch (aarch64_cmodel_var
)
5756 case AARCH64_CMODEL_TINY
:
5757 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
5759 case AARCH64_CMODEL_SMALL
:
5760 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
5762 case AARCH64_CMODEL_LARGE
:
5763 sorry ("code model %qs with -f%s", "large",
5764 flag_pic
> 1 ? "PIC" : "pic");
5770 aarch64_cmodel
= aarch64_cmodel_var
;
5773 /* Return true if SYMBOL_REF X binds locally. */
5776 aarch64_symbol_binds_local_p (const_rtx x
)
5778 return (SYMBOL_REF_DECL (x
)
5779 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
5780 : SYMBOL_REF_LOCAL_P (x
));
5783 /* Return true if SYMBOL_REF X is thread local */
5785 aarch64_tls_symbol_p (rtx x
)
5787 if (! TARGET_HAVE_TLS
)
5790 if (GET_CODE (x
) != SYMBOL_REF
)
5793 return SYMBOL_REF_TLS_MODEL (x
) != 0;
5796 /* Classify a TLS symbol into one of the TLS kinds. */
5797 enum aarch64_symbol_type
5798 aarch64_classify_tls_symbol (rtx x
)
5800 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
5804 case TLS_MODEL_GLOBAL_DYNAMIC
:
5805 case TLS_MODEL_LOCAL_DYNAMIC
:
5806 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
5808 case TLS_MODEL_INITIAL_EXEC
:
5809 return SYMBOL_SMALL_GOTTPREL
;
5811 case TLS_MODEL_LOCAL_EXEC
:
5812 return SYMBOL_SMALL_TPREL
;
5814 case TLS_MODEL_EMULATED
:
5815 case TLS_MODEL_NONE
:
5816 return SYMBOL_FORCE_TO_MEM
;
5823 /* Return the method that should be used to access SYMBOL_REF or
5824 LABEL_REF X in context CONTEXT. */
5826 enum aarch64_symbol_type
5827 aarch64_classify_symbol (rtx x
,
5828 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
5830 if (GET_CODE (x
) == LABEL_REF
)
5832 switch (aarch64_cmodel
)
5834 case AARCH64_CMODEL_LARGE
:
5835 return SYMBOL_FORCE_TO_MEM
;
5837 case AARCH64_CMODEL_TINY_PIC
:
5838 case AARCH64_CMODEL_TINY
:
5839 return SYMBOL_TINY_ABSOLUTE
;
5841 case AARCH64_CMODEL_SMALL_PIC
:
5842 case AARCH64_CMODEL_SMALL
:
5843 return SYMBOL_SMALL_ABSOLUTE
;
5850 if (GET_CODE (x
) == SYMBOL_REF
)
5852 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
5853 return SYMBOL_FORCE_TO_MEM
;
5855 if (aarch64_tls_symbol_p (x
))
5856 return aarch64_classify_tls_symbol (x
);
5858 switch (aarch64_cmodel
)
5860 case AARCH64_CMODEL_TINY
:
5861 if (SYMBOL_REF_WEAK (x
))
5862 return SYMBOL_FORCE_TO_MEM
;
5863 return SYMBOL_TINY_ABSOLUTE
;
5865 case AARCH64_CMODEL_SMALL
:
5866 if (SYMBOL_REF_WEAK (x
))
5867 return SYMBOL_FORCE_TO_MEM
;
5868 return SYMBOL_SMALL_ABSOLUTE
;
5870 case AARCH64_CMODEL_TINY_PIC
:
5871 if (!aarch64_symbol_binds_local_p (x
))
5872 return SYMBOL_TINY_GOT
;
5873 return SYMBOL_TINY_ABSOLUTE
;
5875 case AARCH64_CMODEL_SMALL_PIC
:
5876 if (!aarch64_symbol_binds_local_p (x
))
5877 return SYMBOL_SMALL_GOT
;
5878 return SYMBOL_SMALL_ABSOLUTE
;
5885 /* By default push everything into the constant pool. */
5886 return SYMBOL_FORCE_TO_MEM
;
5890 aarch64_constant_address_p (rtx x
)
5892 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
5896 aarch64_legitimate_pic_operand_p (rtx x
)
5898 if (GET_CODE (x
) == SYMBOL_REF
5899 || (GET_CODE (x
) == CONST
5900 && GET_CODE (XEXP (x
, 0)) == PLUS
5901 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5907 /* Return true if X holds either a quarter-precision or
5908 floating-point +0.0 constant. */
5910 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
5912 if (!CONST_DOUBLE_P (x
))
5915 /* TODO: We could handle moving 0.0 to a TFmode register,
5916 but first we would like to refactor the movtf_aarch64
5917 to be more amicable to split moves properly and
5918 correctly gate on TARGET_SIMD. For now - reject all
5919 constants which are not to SFmode or DFmode registers. */
5920 if (!(mode
== SFmode
|| mode
== DFmode
))
5923 if (aarch64_float_const_zero_rtx_p (x
))
5925 return aarch64_float_const_representable_p (x
);
5929 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
5931 /* Do not allow vector struct mode constants. We could support
5932 0 and -1 easily, but they need support in aarch64-simd.md. */
5933 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
5936 /* This could probably go away because
5937 we now decompose CONST_INTs according to expand_mov_immediate. */
5938 if ((GET_CODE (x
) == CONST_VECTOR
5939 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
5940 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
5941 return !targetm
.cannot_force_const_mem (mode
, x
);
5943 if (GET_CODE (x
) == HIGH
5944 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
5947 return aarch64_constant_address_p (x
);
5951 aarch64_load_tp (rtx target
)
5954 || GET_MODE (target
) != Pmode
5955 || !register_operand (target
, Pmode
))
5956 target
= gen_reg_rtx (Pmode
);
5958 /* Can return in any reg. */
5959 emit_insn (gen_aarch64_load_tp_hard (target
));
5963 /* On AAPCS systems, this is the "struct __va_list". */
5964 static GTY(()) tree va_list_type
;
5966 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5967 Return the type to use as __builtin_va_list.
5969 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5981 aarch64_build_builtin_va_list (void)
5984 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5986 /* Create the type. */
5987 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
5988 /* Give it the required name. */
5989 va_list_name
= build_decl (BUILTINS_LOCATION
,
5991 get_identifier ("__va_list"),
5993 DECL_ARTIFICIAL (va_list_name
) = 1;
5994 TYPE_NAME (va_list_type
) = va_list_name
;
5995 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
5997 /* Create the fields. */
5998 f_stack
= build_decl (BUILTINS_LOCATION
,
5999 FIELD_DECL
, get_identifier ("__stack"),
6001 f_grtop
= build_decl (BUILTINS_LOCATION
,
6002 FIELD_DECL
, get_identifier ("__gr_top"),
6004 f_vrtop
= build_decl (BUILTINS_LOCATION
,
6005 FIELD_DECL
, get_identifier ("__vr_top"),
6007 f_groff
= build_decl (BUILTINS_LOCATION
,
6008 FIELD_DECL
, get_identifier ("__gr_offs"),
6010 f_vroff
= build_decl (BUILTINS_LOCATION
,
6011 FIELD_DECL
, get_identifier ("__vr_offs"),
6014 DECL_ARTIFICIAL (f_stack
) = 1;
6015 DECL_ARTIFICIAL (f_grtop
) = 1;
6016 DECL_ARTIFICIAL (f_vrtop
) = 1;
6017 DECL_ARTIFICIAL (f_groff
) = 1;
6018 DECL_ARTIFICIAL (f_vroff
) = 1;
6020 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
6021 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
6022 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
6023 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
6024 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
6026 TYPE_FIELDS (va_list_type
) = f_stack
;
6027 DECL_CHAIN (f_stack
) = f_grtop
;
6028 DECL_CHAIN (f_grtop
) = f_vrtop
;
6029 DECL_CHAIN (f_vrtop
) = f_groff
;
6030 DECL_CHAIN (f_groff
) = f_vroff
;
6032 /* Compute its layout. */
6033 layout_type (va_list_type
);
6035 return va_list_type
;
6038 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6040 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
6042 const CUMULATIVE_ARGS
*cum
;
6043 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6044 tree stack
, grtop
, vrtop
, groff
, vroff
;
6046 int gr_save_area_size
;
6047 int vr_save_area_size
;
6050 cum
= &crtl
->args
.info
;
6052 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
6054 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
6056 if (TARGET_GENERAL_REGS_ONLY
)
6058 if (cum
->aapcs_nvrn
> 0)
6059 sorry ("%qs and floating point or vector arguments",
6060 "-mgeneral-regs-only");
6061 vr_save_area_size
= 0;
6064 f_stack
= TYPE_FIELDS (va_list_type_node
);
6065 f_grtop
= DECL_CHAIN (f_stack
);
6066 f_vrtop
= DECL_CHAIN (f_grtop
);
6067 f_groff
= DECL_CHAIN (f_vrtop
);
6068 f_vroff
= DECL_CHAIN (f_groff
);
6070 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
6072 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
6074 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
6076 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
6078 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
6081 /* Emit code to initialize STACK, which points to the next varargs stack
6082 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6083 by named arguments. STACK is 8-byte aligned. */
6084 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
6085 if (cum
->aapcs_stack_size
> 0)
6086 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
6087 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
6088 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6090 /* Emit code to initialize GRTOP, the top of the GR save area.
6091 virtual_incoming_args_rtx should have been 16 byte aligned. */
6092 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
6093 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
6094 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6096 /* Emit code to initialize VRTOP, the top of the VR save area.
6097 This address is gr_save_area_bytes below GRTOP, rounded
6098 down to the next 16-byte boundary. */
6099 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
6100 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
6101 STACK_BOUNDARY
/ BITS_PER_UNIT
);
6104 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
6105 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
6106 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6108 /* Emit code to initialize GROFF, the offset from GRTOP of the
6109 next GPR argument. */
6110 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
6111 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
6112 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6114 /* Likewise emit code to initialize VROFF, the offset from FTOP
6115 of the next VR argument. */
6116 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
6117 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
6118 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
6121 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6124 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
6125 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
6129 bool is_ha
; /* is HFA or HVA. */
6130 bool dw_align
; /* double-word align. */
6131 enum machine_mode ag_mode
= VOIDmode
;
6133 enum machine_mode mode
;
6135 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
6136 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
6137 HOST_WIDE_INT size
, rsize
, adjust
, align
;
6138 tree t
, u
, cond1
, cond2
;
6140 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
6142 type
= build_pointer_type (type
);
6144 mode
= TYPE_MODE (type
);
6146 f_stack
= TYPE_FIELDS (va_list_type_node
);
6147 f_grtop
= DECL_CHAIN (f_stack
);
6148 f_vrtop
= DECL_CHAIN (f_grtop
);
6149 f_groff
= DECL_CHAIN (f_vrtop
);
6150 f_vroff
= DECL_CHAIN (f_groff
);
6152 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
6153 f_stack
, NULL_TREE
);
6154 size
= int_size_in_bytes (type
);
6155 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
6159 if (aarch64_vfp_is_call_or_return_candidate (mode
,
6165 /* TYPE passed in fp/simd registers. */
6166 if (TARGET_GENERAL_REGS_ONLY
)
6167 sorry ("%qs and floating point or vector arguments",
6168 "-mgeneral-regs-only");
6170 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
6171 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
6172 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
6173 unshare_expr (valist
), f_vroff
, NULL_TREE
);
6175 rsize
= nregs
* UNITS_PER_VREG
;
6179 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
6180 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
6182 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6183 && size
< UNITS_PER_VREG
)
6185 adjust
= UNITS_PER_VREG
- size
;
6190 /* TYPE passed in general registers. */
6191 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
6192 unshare_expr (valist
), f_grtop
, NULL_TREE
);
6193 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
6194 unshare_expr (valist
), f_groff
, NULL_TREE
);
6195 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
6196 nregs
= rsize
/ UNITS_PER_WORD
;
6201 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6202 && size
< UNITS_PER_WORD
)
6204 adjust
= UNITS_PER_WORD
- size
;
6208 /* Get a local temporary for the field value. */
6209 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
6211 /* Emit code to branch if off >= 0. */
6212 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
6213 build_int_cst (TREE_TYPE (off
), 0));
6214 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
6218 /* Emit: offs = (offs + 15) & -16. */
6219 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6220 build_int_cst (TREE_TYPE (off
), 15));
6221 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
6222 build_int_cst (TREE_TYPE (off
), -16));
6223 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
6228 /* Update ap.__[g|v]r_offs */
6229 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
6230 build_int_cst (TREE_TYPE (off
), rsize
));
6231 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
6235 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6237 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6238 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
6239 build_int_cst (TREE_TYPE (f_off
), 0));
6240 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
6242 /* String up: make sure the assignment happens before the use. */
6243 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
6244 COND_EXPR_ELSE (cond1
) = t
;
6246 /* Prepare the trees handling the argument that is passed on the stack;
6247 the top level node will store in ON_STACK. */
6248 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
6251 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6252 t
= fold_convert (intDI_type_node
, arg
);
6253 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6254 build_int_cst (TREE_TYPE (t
), 15));
6255 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6256 build_int_cst (TREE_TYPE (t
), -16));
6257 t
= fold_convert (TREE_TYPE (arg
), t
);
6258 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
6262 /* Advance ap.__stack */
6263 t
= fold_convert (intDI_type_node
, arg
);
6264 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
6265 build_int_cst (TREE_TYPE (t
), size
+ 7));
6266 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
6267 build_int_cst (TREE_TYPE (t
), -8));
6268 t
= fold_convert (TREE_TYPE (arg
), t
);
6269 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
6270 /* String up roundup and advance. */
6272 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
6273 /* String up with arg */
6274 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
6275 /* Big-endianness related address adjustment. */
6276 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
6277 && size
< UNITS_PER_WORD
)
6279 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
6280 size_int (UNITS_PER_WORD
- size
));
6281 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
6284 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
6285 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
6287 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6290 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
6291 build_int_cst (TREE_TYPE (off
), adjust
));
6293 t
= fold_convert (sizetype
, t
);
6294 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
6298 /* type ha; // treat as "struct {ftype field[n];}"
6299 ... [computing offs]
6300 for (i = 0; i <nregs; ++i, offs += 16)
6301 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6304 tree tmp_ha
, field_t
, field_ptr_t
;
6306 /* Declare a local variable. */
6307 tmp_ha
= create_tmp_var_raw (type
, "ha");
6308 gimple_add_tmp_var (tmp_ha
);
6310 /* Establish the base type. */
6314 field_t
= float_type_node
;
6315 field_ptr_t
= float_ptr_type_node
;
6318 field_t
= double_type_node
;
6319 field_ptr_t
= double_ptr_type_node
;
6322 field_t
= long_double_type_node
;
6323 field_ptr_t
= long_double_ptr_type_node
;
6325 /* The half precision and quad precision are not fully supported yet. Enable
6326 the following code after the support is complete. Need to find the correct
6327 type node for __fp16 *. */
6330 field_t
= float_type_node
;
6331 field_ptr_t
= float_ptr_type_node
;
6337 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
6338 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
6339 field_ptr_t
= build_pointer_type (field_t
);
6346 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6347 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
6349 t
= fold_convert (field_ptr_t
, addr
);
6350 t
= build2 (MODIFY_EXPR
, field_t
,
6351 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
6352 build1 (INDIRECT_REF
, field_t
, t
));
6354 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6355 for (i
= 1; i
< nregs
; ++i
)
6357 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
6358 u
= fold_convert (field_ptr_t
, addr
);
6359 u
= build2 (MODIFY_EXPR
, field_t
,
6360 build2 (MEM_REF
, field_t
, tmp_ha
,
6361 build_int_cst (field_ptr_t
,
6363 int_size_in_bytes (field_t
)))),
6364 build1 (INDIRECT_REF
, field_t
, u
));
6365 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
6368 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
6369 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
6372 COND_EXPR_ELSE (cond2
) = t
;
6373 addr
= fold_convert (build_pointer_type (type
), cond1
);
6374 addr
= build_va_arg_indirect_ref (addr
);
6377 addr
= build_va_arg_indirect_ref (addr
);
6382 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6385 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
6386 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
6389 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
6390 CUMULATIVE_ARGS local_cum
;
6391 int gr_saved
, vr_saved
;
6393 /* The caller has advanced CUM up to, but not beyond, the last named
6394 argument. Advance a local copy of CUM past the last "real" named
6395 argument, to find out how many registers are left over. */
6397 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
6399 /* Found out how many registers we need to save. */
6400 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
6401 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
6403 if (TARGET_GENERAL_REGS_ONLY
)
6405 if (local_cum
.aapcs_nvrn
> 0)
6406 sorry ("%qs and floating point or vector arguments",
6407 "-mgeneral-regs-only");
6417 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6418 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
6419 - gr_saved
* UNITS_PER_WORD
);
6420 mem
= gen_frame_mem (BLKmode
, ptr
);
6421 set_mem_alias_set (mem
, get_varargs_alias_set ());
6423 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
6428 /* We can't use move_block_from_reg, because it will use
6429 the wrong mode, storing D regs only. */
6430 enum machine_mode mode
= TImode
;
6433 /* Set OFF to the offset from virtual_incoming_args_rtx of
6434 the first vector register. The VR save area lies below
6435 the GR one, and is aligned to 16 bytes. */
6436 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
6437 STACK_BOUNDARY
/ BITS_PER_UNIT
);
6438 off
-= vr_saved
* UNITS_PER_VREG
;
6440 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
6444 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
6445 mem
= gen_frame_mem (mode
, ptr
);
6446 set_mem_alias_set (mem
, get_varargs_alias_set ());
6447 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
6448 off
+= UNITS_PER_VREG
;
6453 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6454 any complication of having crtl->args.pretend_args_size changed. */
6455 cfun
->machine
->saved_varargs_size
6456 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
6457 STACK_BOUNDARY
/ BITS_PER_UNIT
)
6458 + vr_saved
* UNITS_PER_VREG
);
6462 aarch64_conditional_register_usage (void)
6467 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
6470 call_used_regs
[i
] = 1;
6475 /* Walk down the type tree of TYPE counting consecutive base elements.
6476 If *MODEP is VOIDmode, then set it to the first valid floating point
6477 type. If a non-floating point type is found, or if a floating point
6478 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6479 otherwise return the count in the sub-tree. */
6481 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
6483 enum machine_mode mode
;
6486 switch (TREE_CODE (type
))
6489 mode
= TYPE_MODE (type
);
6490 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
6493 if (*modep
== VOIDmode
)
6502 mode
= TYPE_MODE (TREE_TYPE (type
));
6503 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
6506 if (*modep
== VOIDmode
)
6515 /* Use V2SImode and V4SImode as representatives of all 64-bit
6516 and 128-bit vector types. */
6517 size
= int_size_in_bytes (type
);
6530 if (*modep
== VOIDmode
)
6533 /* Vector modes are considered to be opaque: two vectors are
6534 equivalent for the purposes of being homogeneous aggregates
6535 if they are the same size. */
6544 tree index
= TYPE_DOMAIN (type
);
6546 /* Can't handle incomplete types nor sizes that are not
6548 if (!COMPLETE_TYPE_P (type
)
6549 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6552 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
6555 || !TYPE_MAX_VALUE (index
)
6556 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6557 || !TYPE_MIN_VALUE (index
)
6558 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6562 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6563 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6565 /* There must be no padding. */
6566 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
6578 /* Can't handle incomplete types nor sizes that are not
6580 if (!COMPLETE_TYPE_P (type
)
6581 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6584 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6586 if (TREE_CODE (field
) != FIELD_DECL
)
6589 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6595 /* There must be no padding. */
6596 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
6603 case QUAL_UNION_TYPE
:
6605 /* These aren't very interesting except in a degenerate case. */
6610 /* Can't handle incomplete types nor sizes that are not
6612 if (!COMPLETE_TYPE_P (type
)
6613 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
6616 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6618 if (TREE_CODE (field
) != FIELD_DECL
)
6621 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6624 count
= count
> sub_count
? count
: sub_count
;
6627 /* There must be no padding. */
6628 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
6641 /* Return true if we use LRA instead of reload pass. */
6643 aarch64_lra_p (void)
6645 return aarch64_lra_flag
;
6648 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6649 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6650 array types. The C99 floating-point complex types are also considered
6651 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6652 types, which are GCC extensions and out of the scope of AAPCS64, are
6653 treated as composite types here as well.
6655 Note that MODE itself is not sufficient in determining whether a type
6656 is such a composite type or not. This is because
6657 stor-layout.c:compute_record_mode may have already changed the MODE
6658 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6659 structure with only one field may have its MODE set to the mode of the
6660 field. Also an integer mode whose size matches the size of the
6661 RECORD_TYPE type may be used to substitute the original mode
6662 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6663 solely relied on. */
6666 aarch64_composite_type_p (const_tree type
,
6667 enum machine_mode mode
)
6669 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
6673 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
6674 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
6680 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6681 type as described in AAPCS64 \S 4.1.2.
6683 See the comment above aarch64_composite_type_p for the notes on MODE. */
6686 aarch64_short_vector_p (const_tree type
,
6687 enum machine_mode mode
)
6689 HOST_WIDE_INT size
= -1;
6691 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6692 size
= int_size_in_bytes (type
);
6693 else if (!aarch64_composite_type_p (type
, mode
)
6694 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6695 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
6696 size
= GET_MODE_SIZE (mode
);
6698 return (size
== 8 || size
== 16) ? true : false;
6701 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6702 shall be passed or returned in simd/fp register(s) (providing these
6703 parameter passing registers are available).
6705 Upon successful return, *COUNT returns the number of needed registers,
6706 *BASE_MODE returns the mode of the individual register and when IS_HAF
6707 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6708 floating-point aggregate or a homogeneous short-vector aggregate. */
6711 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
6713 enum machine_mode
*base_mode
,
6717 enum machine_mode new_mode
= VOIDmode
;
6718 bool composite_p
= aarch64_composite_type_p (type
, mode
);
6720 if (is_ha
!= NULL
) *is_ha
= false;
6722 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6723 || aarch64_short_vector_p (type
, mode
))
6728 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6730 if (is_ha
!= NULL
) *is_ha
= true;
6732 new_mode
= GET_MODE_INNER (mode
);
6734 else if (type
&& composite_p
)
6736 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6738 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
6740 if (is_ha
!= NULL
) *is_ha
= true;
6749 *base_mode
= new_mode
;
6753 /* Implement TARGET_STRUCT_VALUE_RTX. */
6756 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
6757 int incoming ATTRIBUTE_UNUSED
)
6759 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
6762 /* Implements target hook vector_mode_supported_p. */
6764 aarch64_vector_mode_supported_p (enum machine_mode mode
)
6767 && (mode
== V4SImode
|| mode
== V8HImode
6768 || mode
== V16QImode
|| mode
== V2DImode
6769 || mode
== V2SImode
|| mode
== V4HImode
6770 || mode
== V8QImode
|| mode
== V2SFmode
6771 || mode
== V4SFmode
|| mode
== V2DFmode
))
6777 /* Return appropriate SIMD container
6778 for MODE within a vector of WIDTH bits. */
6779 static enum machine_mode
6780 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
6782 gcc_assert (width
== 64 || width
== 128);
6821 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6822 static enum machine_mode
6823 aarch64_preferred_simd_mode (enum machine_mode mode
)
6825 return aarch64_simd_container_mode (mode
, 128);
6828 /* Return the bitmask of possible vector sizes for the vectorizer
6831 aarch64_autovectorize_vector_sizes (void)
6836 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6837 vector types in order to conform to the AAPCS64 (see "Procedure
6838 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6839 qualify for emission with the mangled names defined in that document,
6840 a vector type must not only be of the correct mode but also be
6841 composed of AdvSIMD vector element types (e.g.
6842 _builtin_aarch64_simd_qi); these types are registered by
6843 aarch64_init_simd_builtins (). In other words, vector types defined
6844 in other ways e.g. via vector_size attribute will get default
6848 enum machine_mode mode
;
6849 const char *element_type_name
;
6850 const char *mangled_name
;
6851 } aarch64_simd_mangle_map_entry
;
6853 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
6854 /* 64-bit containerized types. */
6855 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6856 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6857 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6858 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6859 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6860 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6861 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6862 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6863 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6864 /* 128-bit containerized types. */
6865 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6866 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6867 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6868 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6869 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6870 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6871 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6872 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6873 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6874 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6875 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6876 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6877 { V2DImode
, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6878 { VOIDmode
, NULL
, NULL
}
6881 /* Implement TARGET_MANGLE_TYPE. */
6884 aarch64_mangle_type (const_tree type
)
6886 /* The AArch64 ABI documents say that "__va_list" has to be
6887 managled as if it is in the "std" namespace. */
6888 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
6889 return "St9__va_list";
6891 /* Check the mode of the vector type, and the name of the vector
6892 element type, against the table. */
6893 if (TREE_CODE (type
) == VECTOR_TYPE
)
6895 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
6897 while (pos
->mode
!= VOIDmode
)
6899 tree elt_type
= TREE_TYPE (type
);
6901 if (pos
->mode
== TYPE_MODE (type
)
6902 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
6903 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
6904 pos
->element_type_name
))
6905 return pos
->mangled_name
;
6911 /* Use the default mangling. */
6915 /* Return the equivalent letter for size. */
6917 sizetochar (int size
)
6921 case 64: return 'd';
6922 case 32: return 's';
6923 case 16: return 'h';
6924 case 8 : return 'b';
6925 default: gcc_unreachable ();
6929 /* Return true iff x is a uniform vector of floating-point
6930 constants, and the constant can be represented in
6931 quarter-precision form. Note, as aarch64_float_const_representable
6932 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6934 aarch64_vect_float_const_representable_p (rtx x
)
6937 REAL_VALUE_TYPE r0
, ri
;
6940 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
6943 x0
= CONST_VECTOR_ELT (x
, 0);
6944 if (!CONST_DOUBLE_P (x0
))
6947 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
6949 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
6951 xi
= CONST_VECTOR_ELT (x
, i
);
6952 if (!CONST_DOUBLE_P (xi
))
6955 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
6956 if (!REAL_VALUES_EQUAL (r0
, ri
))
6960 return aarch64_float_const_representable_p (x0
);
6963 /* Return true for valid and false for invalid. */
6965 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
6966 struct simd_immediate_info
*info
)
6968 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6970 for (i = 0; i < idx; i += (STRIDE)) \
6975 immtype = (CLASS); \
6976 elsize = (ELSIZE); \
6982 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
6983 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
6984 unsigned char bytes
[16];
6985 int immtype
= -1, matches
;
6986 unsigned int invmask
= inverse
? 0xff : 0;
6989 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6991 if (! (aarch64_simd_imm_zero_p (op
, mode
)
6992 || aarch64_vect_float_const_representable_p (op
)))
6997 info
->value
= CONST_VECTOR_ELT (op
, 0);
6998 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
7006 /* Splat vector constant out into a byte vector. */
7007 for (i
= 0; i
< n_elts
; i
++)
7009 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7010 it must be laid out in the vector register in reverse order. */
7011 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
7012 unsigned HOST_WIDE_INT elpart
;
7013 unsigned int part
, parts
;
7015 if (GET_CODE (el
) == CONST_INT
)
7017 elpart
= INTVAL (el
);
7020 else if (GET_CODE (el
) == CONST_DOUBLE
)
7022 elpart
= CONST_DOUBLE_LOW (el
);
7028 for (part
= 0; part
< parts
; part
++)
7031 for (byte
= 0; byte
< innersize
; byte
++)
7033 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
7034 elpart
>>= BITS_PER_UNIT
;
7036 if (GET_CODE (el
) == CONST_DOUBLE
)
7037 elpart
= CONST_DOUBLE_HIGH (el
);
7042 gcc_assert (idx
== GET_MODE_SIZE (mode
));
7046 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
7047 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
7049 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7050 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7052 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7053 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7055 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7056 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
7058 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
7060 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
7062 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
7063 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
7065 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7066 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7068 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7069 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7071 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7072 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
7074 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
7076 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
7078 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
7079 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
7081 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
7082 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
7084 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
7085 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
7087 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
7088 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
7090 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
7092 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
7093 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
7102 info
->element_width
= elsize
;
7103 info
->mvn
= emvn
!= 0;
7104 info
->shift
= eshift
;
7106 unsigned HOST_WIDE_INT imm
= 0;
7108 if (immtype
>= 12 && immtype
<= 15)
7111 /* Un-invert bytes of recognized vector, if necessary. */
7113 for (i
= 0; i
< idx
; i
++)
7114 bytes
[i
] ^= invmask
;
7118 /* FIXME: Broken on 32-bit H_W_I hosts. */
7119 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
7121 for (i
= 0; i
< 8; i
++)
7122 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
7123 << (i
* BITS_PER_UNIT
);
7126 info
->value
= GEN_INT (imm
);
7130 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
7131 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
7133 /* Construct 'abcdefgh' because the assembler cannot handle
7134 generic constants. */
7137 imm
= (imm
>> info
->shift
) & 0xff;
7138 info
->value
= GEN_INT (imm
);
7147 aarch64_const_vec_all_same_int_p (rtx x
,
7148 HOST_WIDE_INT minval
,
7149 HOST_WIDE_INT maxval
)
7151 HOST_WIDE_INT firstval
;
7154 if (GET_CODE (x
) != CONST_VECTOR
7155 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
7158 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
7159 if (firstval
< minval
|| firstval
> maxval
)
7162 count
= CONST_VECTOR_NUNITS (x
);
7163 for (i
= 1; i
< count
; i
++)
7164 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
7170 /* Check of immediate shift constants are within range. */
7172 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
7174 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
7176 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
7178 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
7181 /* Return true if X is a uniform vector where all elements
7182 are either the floating-point constant 0.0 or the
7183 integer constant 0. */
7185 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
7187 return x
== CONST0_RTX (mode
);
7191 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
7193 HOST_WIDE_INT imm
= INTVAL (x
);
7196 for (i
= 0; i
< 8; i
++)
7198 unsigned int byte
= imm
& 0xff;
7199 if (byte
!= 0xff && byte
!= 0)
7208 aarch64_mov_operand_p (rtx x
,
7209 enum aarch64_symbol_context context
,
7210 enum machine_mode mode
)
7212 if (GET_CODE (x
) == HIGH
7213 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7216 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
7219 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
7222 return aarch64_classify_symbolic_expression (x
, context
)
7223 == SYMBOL_TINY_ABSOLUTE
;
7226 /* Return a const_int vector of VAL. */
7228 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
7230 int nunits
= GET_MODE_NUNITS (mode
);
7231 rtvec v
= rtvec_alloc (nunits
);
7234 for (i
=0; i
< nunits
; i
++)
7235 RTVEC_ELT (v
, i
) = GEN_INT (val
);
7237 return gen_rtx_CONST_VECTOR (mode
, v
);
7240 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7243 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
7245 enum machine_mode vmode
;
7247 gcc_assert (!VECTOR_MODE_P (mode
));
7248 vmode
= aarch64_preferred_simd_mode (mode
);
7249 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
7250 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
7253 /* Construct and return a PARALLEL RTX vector. */
7255 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
7257 int nunits
= GET_MODE_NUNITS (mode
);
7258 rtvec v
= rtvec_alloc (nunits
/ 2);
7259 int base
= high
? nunits
/ 2 : 0;
7263 for (i
=0; i
< nunits
/ 2; i
++)
7264 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
7266 t1
= gen_rtx_PARALLEL (mode
, v
);
7270 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7271 HIGH (exclusive). */
7273 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
7276 gcc_assert (GET_CODE (operand
) == CONST_INT
);
7277 lane
= INTVAL (operand
);
7279 if (lane
< low
|| lane
>= high
)
7280 error ("lane out of range");
7284 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
7286 gcc_assert (GET_CODE (operand
) == CONST_INT
);
7287 HOST_WIDE_INT lane
= INTVAL (operand
);
7289 if (lane
< low
|| lane
>= high
)
7290 error ("constant out of range");
7293 /* Emit code to reinterpret one AdvSIMD type as another,
7294 without altering bits. */
7296 aarch64_simd_reinterpret (rtx dest
, rtx src
)
7298 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
7301 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7304 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
7305 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
7308 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
7309 rtx tmp1
= gen_reg_rtx (mode
);
7310 rtx tmp2
= gen_reg_rtx (mode
);
7312 emit_insn (intfn (tmp1
, op1
, tmp2
));
7314 emit_move_insn (mem
, tmp1
);
7315 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
7316 emit_move_insn (mem
, tmp2
);
7319 /* Return TRUE if OP is a valid vector addressing mode. */
7321 aarch64_simd_mem_operand_p (rtx op
)
7323 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
7324 || GET_CODE (XEXP (op
, 0)) == REG
);
7327 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7328 not to early-clobber SRC registers in the process.
7330 We assume that the operands described by SRC and DEST represent a
7331 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7332 number of components into which the copy has been decomposed. */
7334 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
7335 rtx
*src
, unsigned int count
)
7339 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
7340 || REGNO (operands
[0]) < REGNO (operands
[1]))
7342 for (i
= 0; i
< count
; i
++)
7344 operands
[2 * i
] = dest
[i
];
7345 operands
[2 * i
+ 1] = src
[i
];
7350 for (i
= 0; i
< count
; i
++)
7352 operands
[2 * i
] = dest
[count
- i
- 1];
7353 operands
[2 * i
+ 1] = src
[count
- i
- 1];
7358 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7359 one of VSTRUCT modes: OI, CI or XI. */
7361 aarch64_simd_attr_length_move (rtx insn
)
7363 enum machine_mode mode
;
7365 extract_insn_cached (insn
);
7367 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
7369 mode
= GET_MODE (recog_data
.operand
[0]);
7385 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7386 alignment of a vector to 128 bits. */
7387 static HOST_WIDE_INT
7388 aarch64_simd_vector_alignment (const_tree type
)
7390 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
7391 return MIN (align
, 128);
7394 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7396 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
7401 /* We guarantee alignment for vectors up to 128-bits. */
7402 if (tree_int_cst_compare (TYPE_SIZE (type
),
7403 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
7406 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7410 /* If VALS is a vector constant that can be loaded into a register
7411 using DUP, generate instructions to do so and return an RTX to
7412 assign to the register. Otherwise return NULL_RTX. */
7414 aarch64_simd_dup_constant (rtx vals
)
7416 enum machine_mode mode
= GET_MODE (vals
);
7417 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
7418 int n_elts
= GET_MODE_NUNITS (mode
);
7419 bool all_same
= true;
7423 if (GET_CODE (vals
) != CONST_VECTOR
)
7426 for (i
= 1; i
< n_elts
; ++i
)
7428 x
= CONST_VECTOR_ELT (vals
, i
);
7429 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
7436 /* We can load this constant by using DUP and a constant in a
7437 single ARM register. This will be cheaper than a vector
7439 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
7440 return gen_rtx_VEC_DUPLICATE (mode
, x
);
7444 /* Generate code to load VALS, which is a PARALLEL containing only
7445 constants (for vec_init) or CONST_VECTOR, efficiently into a
7446 register. Returns an RTX to copy into the register, or NULL_RTX
7447 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7449 aarch64_simd_make_constant (rtx vals
)
7451 enum machine_mode mode
= GET_MODE (vals
);
7453 rtx const_vec
= NULL_RTX
;
7454 int n_elts
= GET_MODE_NUNITS (mode
);
7458 if (GET_CODE (vals
) == CONST_VECTOR
)
7460 else if (GET_CODE (vals
) == PARALLEL
)
7462 /* A CONST_VECTOR must contain only CONST_INTs and
7463 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7464 Only store valid constants in a CONST_VECTOR. */
7465 for (i
= 0; i
< n_elts
; ++i
)
7467 rtx x
= XVECEXP (vals
, 0, i
);
7468 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
7471 if (n_const
== n_elts
)
7472 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
7477 if (const_vec
!= NULL_RTX
7478 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
7479 /* Load using MOVI/MVNI. */
7481 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
7482 /* Loaded using DUP. */
7484 else if (const_vec
!= NULL_RTX
)
7485 /* Load from constant pool. We can not take advantage of single-cycle
7486 LD1 because we need a PC-relative addressing mode. */
7489 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7490 We can not construct an initializer. */
7495 aarch64_expand_vector_init (rtx target
, rtx vals
)
7497 enum machine_mode mode
= GET_MODE (target
);
7498 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
7499 int n_elts
= GET_MODE_NUNITS (mode
);
7500 int n_var
= 0, one_var
= -1;
7501 bool all_same
= true;
7505 x
= XVECEXP (vals
, 0, 0);
7506 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
7507 n_var
= 1, one_var
= 0;
7509 for (i
= 1; i
< n_elts
; ++i
)
7511 x
= XVECEXP (vals
, 0, i
);
7512 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
7513 ++n_var
, one_var
= i
;
7515 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7521 rtx constant
= aarch64_simd_make_constant (vals
);
7522 if (constant
!= NULL_RTX
)
7524 emit_move_insn (target
, constant
);
7529 /* Splat a single non-constant element if we can. */
7532 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
7533 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
7537 /* One field is non-constant. Load constant then overwrite varying
7538 field. This is more efficient than using the stack. */
7541 rtx copy
= copy_rtx (vals
);
7542 rtx index
= GEN_INT (one_var
);
7543 enum insn_code icode
;
7545 /* Load constant part of vector, substitute neighboring value for
7547 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
7548 aarch64_expand_vector_init (target
, copy
);
7550 /* Insert variable. */
7551 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
7552 icode
= optab_handler (vec_set_optab
, mode
);
7553 gcc_assert (icode
!= CODE_FOR_nothing
);
7554 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
7558 /* Construct the vector in memory one field at a time
7559 and load the whole vector. */
7560 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7561 for (i
= 0; i
< n_elts
; i
++)
7562 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7563 i
* GET_MODE_SIZE (inner_mode
)),
7564 XVECEXP (vals
, 0, i
));
7565 emit_move_insn (target
, mem
);
7569 static unsigned HOST_WIDE_INT
7570 aarch64_shift_truncation_mask (enum machine_mode mode
)
7573 (aarch64_vector_mode_supported_p (mode
)
7574 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
7577 #ifndef TLS_SECTION_ASM_FLAG
7578 #define TLS_SECTION_ASM_FLAG 'T'
7582 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
7583 tree decl ATTRIBUTE_UNUSED
)
7585 char flagchars
[10], *f
= flagchars
;
7587 /* If we have already declared this section, we can use an
7588 abbreviated form to switch back to it -- unless this section is
7589 part of a COMDAT groups, in which case GAS requires the full
7590 declaration every time. */
7591 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7592 && (flags
& SECTION_DECLARED
))
7594 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
7598 if (!(flags
& SECTION_DEBUG
))
7600 if (flags
& SECTION_WRITE
)
7602 if (flags
& SECTION_CODE
)
7604 if (flags
& SECTION_SMALL
)
7606 if (flags
& SECTION_MERGE
)
7608 if (flags
& SECTION_STRINGS
)
7610 if (flags
& SECTION_TLS
)
7611 *f
++ = TLS_SECTION_ASM_FLAG
;
7612 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7616 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
7618 if (!(flags
& SECTION_NOTYPE
))
7623 if (flags
& SECTION_BSS
)
7628 #ifdef TYPE_OPERAND_FMT
7629 format
= "," TYPE_OPERAND_FMT
;
7634 fprintf (asm_out_file
, format
, type
);
7636 if (flags
& SECTION_ENTSIZE
)
7637 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
7638 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7640 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
7641 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
7643 fprintf (asm_out_file
, ",%s,comdat",
7644 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
7648 putc ('\n', asm_out_file
);
7651 /* Select a format to encode pointers in exception handling data. */
7653 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
7656 switch (aarch64_cmodel
)
7658 case AARCH64_CMODEL_TINY
:
7659 case AARCH64_CMODEL_TINY_PIC
:
7660 case AARCH64_CMODEL_SMALL
:
7661 case AARCH64_CMODEL_SMALL_PIC
:
7662 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7664 type
= DW_EH_PE_sdata4
;
7667 /* No assumptions here. 8-byte relocs required. */
7668 type
= DW_EH_PE_sdata8
;
7671 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
7674 /* Emit load exclusive. */
7677 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
7678 rtx mem
, rtx model_rtx
)
7680 rtx (*gen
) (rtx
, rtx
, rtx
);
7684 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
7685 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
7686 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
7687 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
7692 emit_insn (gen (rval
, mem
, model_rtx
));
7695 /* Emit store exclusive. */
7698 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
7699 rtx rval
, rtx mem
, rtx model_rtx
)
7701 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
7705 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
7706 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
7707 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
7708 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
7713 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
7716 /* Mark the previous jump instruction as unlikely. */
7719 aarch64_emit_unlikely_jump (rtx insn
)
7721 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
7723 insn
= emit_jump_insn (insn
);
7724 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
7727 /* Expand a compare and swap pattern. */
7730 aarch64_expand_compare_and_swap (rtx operands
[])
7732 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
7733 enum machine_mode mode
, cmp_mode
;
7734 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
7739 oldval
= operands
[3];
7740 newval
= operands
[4];
7741 is_weak
= operands
[5];
7742 mod_s
= operands
[6];
7743 mod_f
= operands
[7];
7744 mode
= GET_MODE (mem
);
7747 /* Normally the succ memory model must be stronger than fail, but in the
7748 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7749 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7751 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
7752 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
7753 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
7759 /* For short modes, we're going to perform the comparison in SImode,
7760 so do the zero-extension now. */
7762 rval
= gen_reg_rtx (SImode
);
7763 oldval
= convert_modes (SImode
, mode
, oldval
, true);
7768 /* Force the value into a register if needed. */
7769 if (!aarch64_plus_operand (oldval
, mode
))
7770 oldval
= force_reg (cmp_mode
, oldval
);
7779 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
7780 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
7781 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
7782 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
7787 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
7789 if (mode
== QImode
|| mode
== HImode
)
7790 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
7792 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7793 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
7794 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
7797 /* Split a compare and swap pattern. */
7800 aarch64_split_compare_and_swap (rtx operands
[])
7802 rtx rval
, mem
, oldval
, newval
, scratch
;
7803 enum machine_mode mode
;
7805 rtx label1
, label2
, x
, cond
;
7809 oldval
= operands
[2];
7810 newval
= operands
[3];
7811 is_weak
= (operands
[4] != const0_rtx
);
7812 scratch
= operands
[7];
7813 mode
= GET_MODE (mem
);
7818 label1
= gen_label_rtx ();
7819 emit_label (label1
);
7821 label2
= gen_label_rtx ();
7823 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
7825 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
7826 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7827 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7828 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
7829 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7831 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
7835 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
7836 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7837 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
7838 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7842 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7843 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
7844 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
7847 emit_label (label2
);
7850 /* Split an atomic operation. */
7853 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
7854 rtx value
, rtx model_rtx
, rtx cond
)
7856 enum machine_mode mode
= GET_MODE (mem
);
7857 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
7860 label
= gen_label_rtx ();
7864 new_out
= gen_lowpart (wmode
, new_out
);
7866 old_out
= gen_lowpart (wmode
, old_out
);
7869 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
7871 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
7880 x
= gen_rtx_AND (wmode
, old_out
, value
);
7881 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7882 x
= gen_rtx_NOT (wmode
, new_out
);
7883 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7887 if (CONST_INT_P (value
))
7889 value
= GEN_INT (-INTVAL (value
));
7895 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
7896 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7900 aarch64_emit_store_exclusive (mode
, cond
, mem
,
7901 gen_lowpart (mode
, new_out
), model_rtx
);
7903 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7904 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7905 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
7906 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7910 aarch64_print_extension (void)
7912 const struct aarch64_option_extension
*opt
= NULL
;
7914 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7915 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
7916 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
7918 asm_fprintf (asm_out_file
, "\n");
7922 aarch64_start_file (void)
7926 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
7927 aarch64_print_extension ();
7929 else if (selected_cpu
)
7931 const char *truncated_name
7932 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
7933 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
7934 aarch64_print_extension ();
7936 default_file_start();
7939 /* Target hook for c_mode_for_suffix. */
7940 static enum machine_mode
7941 aarch64_c_mode_for_suffix (char suffix
)
7949 /* We can only represent floating point constants which will fit in
7950 "quarter-precision" values. These values are characterised by
7951 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7954 (-1)^s * (n/16) * 2^r
7957 's' is the sign bit.
7958 'n' is an integer in the range 16 <= n <= 31.
7959 'r' is an integer in the range -3 <= r <= 4. */
7961 /* Return true iff X can be represented by a quarter-precision
7962 floating point immediate operand X. Note, we cannot represent 0.0. */
7964 aarch64_float_const_representable_p (rtx x
)
7966 /* This represents our current view of how many bits
7967 make up the mantissa. */
7968 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7970 unsigned HOST_WIDE_INT mantissa
, mask
;
7971 REAL_VALUE_TYPE r
, m
;
7974 if (!CONST_DOUBLE_P (x
))
7977 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7979 /* We cannot represent infinities, NaNs or +/-zero. We won't
7980 know if we have +zero until we analyse the mantissa, but we
7981 can reject the other invalid values. */
7982 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
7983 || REAL_VALUE_MINUS_ZERO (r
))
7986 /* Extract exponent. */
7987 r
= real_value_abs (&r
);
7988 exponent
= REAL_EXP (&r
);
7990 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7991 highest (sign) bit, with a fixed binary point at bit point_pos.
7992 m1 holds the low part of the mantissa, m2 the high part.
7993 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7994 bits for the mantissa, this can fail (low bits will be lost). */
7995 real_ldexp (&m
, &r
, point_pos
- exponent
);
7996 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
7998 /* If the low part of the mantissa has bits set we cannot represent
8002 /* We have rejected the lower HOST_WIDE_INT, so update our
8003 understanding of how many bits lie in the mantissa and
8004 look only at the high HOST_WIDE_INT. */
8005 mantissa
= w
.elt (1);
8006 point_pos
-= HOST_BITS_PER_WIDE_INT
;
8008 /* We can only represent values with a mantissa of the form 1.xxxx. */
8009 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
8010 if ((mantissa
& mask
) != 0)
8013 /* Having filtered unrepresentable values, we may now remove all
8014 but the highest 5 bits. */
8015 mantissa
>>= point_pos
- 5;
8017 /* We cannot represent the value 0.0, so reject it. This is handled
8022 /* Then, as bit 4 is always set, we can mask it off, leaving
8023 the mantissa in the range [0, 15]. */
8024 mantissa
&= ~(1 << 4);
8025 gcc_assert (mantissa
<= 15);
8027 /* GCC internally does not use IEEE754-like encoding (where normalized
8028 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8029 Our mantissa values are shifted 4 places to the left relative to
8030 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8031 by 5 places to correct for GCC's representation. */
8032 exponent
= 5 - exponent
;
8034 return (exponent
>= 0 && exponent
<= 7);
8038 aarch64_output_simd_mov_immediate (rtx const_vector
,
8039 enum machine_mode mode
,
8043 static char templ
[40];
8044 const char *mnemonic
;
8045 const char *shift_op
;
8046 unsigned int lane_count
= 0;
8049 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
8051 /* This will return true to show const_vector is legal for use as either
8052 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8053 also update INFO to show how the immediate should be generated. */
8054 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
8055 gcc_assert (is_valid
);
8057 element_char
= sizetochar (info
.element_width
);
8058 lane_count
= width
/ info
.element_width
;
8060 mode
= GET_MODE_INNER (mode
);
8061 if (mode
== SFmode
|| mode
== DFmode
)
8063 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
8064 if (aarch64_float_const_zero_rtx_p (info
.value
))
8065 info
.value
= GEN_INT (0);
8070 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
8071 char float_buf
[buf_size
] = {'\0'};
8072 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
8075 if (lane_count
== 1)
8076 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
8078 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
8079 lane_count
, element_char
, float_buf
);
8084 mnemonic
= info
.mvn
? "mvni" : "movi";
8085 shift_op
= info
.msl
? "msl" : "lsl";
8087 if (lane_count
== 1)
8088 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
8089 mnemonic
, UINTVAL (info
.value
));
8090 else if (info
.shift
)
8091 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8092 ", %s %d", mnemonic
, lane_count
, element_char
,
8093 UINTVAL (info
.value
), shift_op
, info
.shift
);
8095 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
8096 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
8101 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
8102 enum machine_mode mode
)
8104 enum machine_mode vmode
;
8106 gcc_assert (!VECTOR_MODE_P (mode
));
8107 vmode
= aarch64_simd_container_mode (mode
, 64);
8108 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
8109 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
8112 /* Split operands into moves from op[1] + op[2] into op[0]. */
8115 aarch64_split_combinev16qi (rtx operands
[3])
8117 unsigned int dest
= REGNO (operands
[0]);
8118 unsigned int src1
= REGNO (operands
[1]);
8119 unsigned int src2
= REGNO (operands
[2]);
8120 enum machine_mode halfmode
= GET_MODE (operands
[1]);
8121 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
8124 gcc_assert (halfmode
== V16QImode
);
8126 if (src1
== dest
&& src2
== dest
+ halfregs
)
8128 /* No-op move. Can't split to nothing; emit something. */
8129 emit_note (NOTE_INSN_DELETED
);
8133 /* Preserve register attributes for variable tracking. */
8134 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
8135 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
8136 GET_MODE_SIZE (halfmode
));
8138 /* Special case of reversed high/low parts. */
8139 if (reg_overlap_mentioned_p (operands
[2], destlo
)
8140 && reg_overlap_mentioned_p (operands
[1], desthi
))
8142 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8143 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
8144 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
8146 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
8148 /* Try to avoid unnecessary moves if part of the result
8149 is in the right place already. */
8151 emit_move_insn (destlo
, operands
[1]);
8152 if (src2
!= dest
+ halfregs
)
8153 emit_move_insn (desthi
, operands
[2]);
8157 if (src2
!= dest
+ halfregs
)
8158 emit_move_insn (desthi
, operands
[2]);
8160 emit_move_insn (destlo
, operands
[1]);
8164 /* vec_perm support. */
8166 #define MAX_VECT_LEN 16
8168 struct expand_vec_perm_d
8170 rtx target
, op0
, op1
;
8171 unsigned char perm
[MAX_VECT_LEN
];
8172 enum machine_mode vmode
;
8178 /* Generate a variable permutation. */
8181 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8183 enum machine_mode vmode
= GET_MODE (target
);
8184 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8186 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
8187 gcc_checking_assert (GET_MODE (op0
) == vmode
);
8188 gcc_checking_assert (GET_MODE (op1
) == vmode
);
8189 gcc_checking_assert (GET_MODE (sel
) == vmode
);
8190 gcc_checking_assert (TARGET_SIMD
);
8194 if (vmode
== V8QImode
)
8196 /* Expand the argument to a V16QI mode by duplicating it. */
8197 rtx pair
= gen_reg_rtx (V16QImode
);
8198 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
8199 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8203 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
8210 if (vmode
== V8QImode
)
8212 pair
= gen_reg_rtx (V16QImode
);
8213 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
8214 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
8218 pair
= gen_reg_rtx (OImode
);
8219 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
8220 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
8226 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8228 enum machine_mode vmode
= GET_MODE (target
);
8229 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
8230 bool one_vector_p
= rtx_equal_p (op0
, op1
);
8231 rtx rmask
[MAX_VECT_LEN
], mask
;
8233 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
8235 /* The TBL instruction does not use a modulo index, so we must take care
8236 of that ourselves. */
8237 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8238 for (i
= 0; i
< nelt
; ++i
)
8240 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
8241 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
8243 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
8246 /* Recognize patterns suitable for the TRN instructions. */
8248 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
8250 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
8251 rtx out
, in0
, in1
, x
;
8252 rtx (*gen
) (rtx
, rtx
, rtx
);
8253 enum machine_mode vmode
= d
->vmode
;
8255 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8258 /* Note that these are little-endian tests.
8259 We correct for big-endian later. */
8260 if (d
->perm
[0] == 0)
8262 else if (d
->perm
[0] == 1)
8266 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8268 for (i
= 0; i
< nelt
; i
+= 2)
8270 if (d
->perm
[i
] != i
+ odd
)
8272 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
8282 if (BYTES_BIG_ENDIAN
)
8284 x
= in0
, in0
= in1
, in1
= x
;
8293 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
8294 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
8295 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
8296 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
8297 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
8298 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
8299 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
8300 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
8301 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
8302 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
8311 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
8312 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
8313 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
8314 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
8315 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
8316 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
8317 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
8318 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
8319 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
8320 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
8326 emit_insn (gen (out
, in0
, in1
));
8330 /* Recognize patterns suitable for the UZP instructions. */
8332 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
8334 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
8335 rtx out
, in0
, in1
, x
;
8336 rtx (*gen
) (rtx
, rtx
, rtx
);
8337 enum machine_mode vmode
= d
->vmode
;
8339 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8342 /* Note that these are little-endian tests.
8343 We correct for big-endian later. */
8344 if (d
->perm
[0] == 0)
8346 else if (d
->perm
[0] == 1)
8350 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8352 for (i
= 0; i
< nelt
; i
++)
8354 unsigned elt
= (i
* 2 + odd
) & mask
;
8355 if (d
->perm
[i
] != elt
)
8365 if (BYTES_BIG_ENDIAN
)
8367 x
= in0
, in0
= in1
, in1
= x
;
8376 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
8377 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
8378 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
8379 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
8380 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
8381 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
8382 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
8383 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
8384 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
8385 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
8394 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
8395 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
8396 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
8397 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
8398 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
8399 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
8400 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
8401 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
8402 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
8403 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
8409 emit_insn (gen (out
, in0
, in1
));
8413 /* Recognize patterns suitable for the ZIP instructions. */
8415 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
8417 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
8418 rtx out
, in0
, in1
, x
;
8419 rtx (*gen
) (rtx
, rtx
, rtx
);
8420 enum machine_mode vmode
= d
->vmode
;
8422 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
8425 /* Note that these are little-endian tests.
8426 We correct for big-endian later. */
8428 if (d
->perm
[0] == high
)
8431 else if (d
->perm
[0] == 0)
8435 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
8437 for (i
= 0; i
< nelt
/ 2; i
++)
8439 unsigned elt
= (i
+ high
) & mask
;
8440 if (d
->perm
[i
* 2] != elt
)
8442 elt
= (elt
+ nelt
) & mask
;
8443 if (d
->perm
[i
* 2 + 1] != elt
)
8453 if (BYTES_BIG_ENDIAN
)
8455 x
= in0
, in0
= in1
, in1
= x
;
8464 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
8465 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
8466 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
8467 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
8468 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
8469 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
8470 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
8471 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
8472 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
8473 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
8482 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
8483 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
8484 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
8485 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
8486 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
8487 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
8488 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
8489 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
8490 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
8491 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
8497 emit_insn (gen (out
, in0
, in1
));
8502 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
8504 rtx (*gen
) (rtx
, rtx
, rtx
);
8505 rtx out
= d
->target
;
8507 enum machine_mode vmode
= d
->vmode
;
8508 unsigned int i
, elt
, nelt
= d
->nelt
;
8511 /* TODO: This may not be big-endian safe. */
8512 if (BYTES_BIG_ENDIAN
)
8516 for (i
= 1; i
< nelt
; i
++)
8518 if (elt
!= d
->perm
[i
])
8522 /* The generic preparation in aarch64_expand_vec_perm_const_1
8523 swaps the operand order and the permute indices if it finds
8524 d->perm[0] to be in the second operand. Thus, we can always
8525 use d->op0 and need not do any extra arithmetic to get the
8526 correct lane number. */
8528 lane
= GEN_INT (elt
);
8532 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
8533 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
8534 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
8535 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
8536 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
8537 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
8538 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
8539 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
8540 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
8541 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
8546 emit_insn (gen (out
, in0
, lane
));
8551 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
8553 rtx rperm
[MAX_VECT_LEN
], sel
;
8554 enum machine_mode vmode
= d
->vmode
;
8555 unsigned int i
, nelt
= d
->nelt
;
8560 /* Generic code will try constant permutation twice. Once with the
8561 original mode and again with the elements lowered to QImode.
8562 So wait and don't do the selector expansion ourselves. */
8563 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
8566 for (i
= 0; i
< nelt
; ++i
)
8568 int nunits
= GET_MODE_NUNITS (vmode
);
8570 /* If big-endian and two vectors we end up with a weird mixed-endian
8571 mode on NEON. Reverse the index within each word but not the word
8573 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
8576 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
8577 sel
= force_reg (vmode
, sel
);
8579 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
8584 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
8586 /* The pattern matching functions above are written to look for a small
8587 number to begin the sequence (0, 1, N/2). If we begin with an index
8588 from the second operand, we can swap the operands. */
8589 if (d
->perm
[0] >= d
->nelt
)
8591 unsigned i
, nelt
= d
->nelt
;
8594 gcc_assert (nelt
== (nelt
& -nelt
));
8595 for (i
= 0; i
< nelt
; ++i
)
8596 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
8605 if (aarch64_evpc_zip (d
))
8607 else if (aarch64_evpc_uzp (d
))
8609 else if (aarch64_evpc_trn (d
))
8611 else if (aarch64_evpc_dup (d
))
8613 return aarch64_evpc_tbl (d
);
8618 /* Expand a vec_perm_const pattern. */
8621 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8623 struct expand_vec_perm_d d
;
8630 d
.vmode
= GET_MODE (target
);
8631 gcc_assert (VECTOR_MODE_P (d
.vmode
));
8632 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8633 d
.testing_p
= false;
8635 for (i
= which
= 0; i
< nelt
; ++i
)
8637 rtx e
= XVECEXP (sel
, 0, i
);
8638 int ei
= INTVAL (e
) & (2 * nelt
- 1);
8639 which
|= (ei
< nelt
? 1 : 2);
8649 d
.one_vector_p
= false;
8650 if (!rtx_equal_p (op0
, op1
))
8653 /* The elements of PERM do not suggest that only the first operand
8654 is used, but both operands are identical. Allow easier matching
8655 of the permutation by folding the permutation into the single
8659 for (i
= 0; i
< nelt
; ++i
)
8660 d
.perm
[i
] &= nelt
- 1;
8662 d
.one_vector_p
= true;
8667 d
.one_vector_p
= true;
8671 return aarch64_expand_vec_perm_const_1 (&d
);
8675 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
8676 const unsigned char *sel
)
8678 struct expand_vec_perm_d d
;
8679 unsigned int i
, nelt
, which
;
8683 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8685 memcpy (d
.perm
, sel
, nelt
);
8687 /* Calculate whether all elements are in one vector. */
8688 for (i
= which
= 0; i
< nelt
; ++i
)
8690 unsigned char e
= d
.perm
[i
];
8691 gcc_assert (e
< 2 * nelt
);
8692 which
|= (e
< nelt
? 1 : 2);
8695 /* If all elements are from the second vector, reindex as if from the
8698 for (i
= 0; i
< nelt
; ++i
)
8701 /* Check whether the mask can be applied to a single vector. */
8702 d
.one_vector_p
= (which
!= 3);
8704 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
8705 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
8706 if (!d
.one_vector_p
)
8707 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
8710 ret
= aarch64_expand_vec_perm_const_1 (&d
);
8716 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8718 aarch64_cannot_change_mode_class (enum machine_mode from
,
8719 enum machine_mode to
,
8720 enum reg_class rclass
)
8722 /* Full-reg subregs are allowed on general regs or any class if they are
8724 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
)
8725 || !reg_classes_intersect_p (FP_REGS
, rclass
))
8728 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8729 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8730 2. Scalar to Scalar for integer modes or same size float modes.
8731 3. Vector to Vector modes.
8732 4. On little-endian only, Vector-Structure to Vector modes. */
8733 if (GET_MODE_SIZE (from
) > GET_MODE_SIZE (to
))
8735 if (aarch64_vector_mode_supported_p (from
)
8736 && GET_MODE_SIZE (GET_MODE_INNER (from
)) == GET_MODE_SIZE (to
))
8739 if (GET_MODE_NUNITS (from
) == 1
8740 && GET_MODE_NUNITS (to
) == 1
8741 && (GET_MODE_CLASS (from
) == MODE_INT
8745 if (aarch64_vector_mode_supported_p (from
)
8746 && aarch64_vector_mode_supported_p (to
))
8749 /* Within an vector structure straddling multiple vector registers
8750 we are in a mixed-endian representation. As such, we can't
8751 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
8752 switch between vectors and vector structures cheaply. */
8753 if (!BYTES_BIG_ENDIAN
)
8754 if ((aarch64_vector_mode_supported_p (from
)
8755 && aarch64_vect_struct_mode_p (to
))
8756 || (aarch64_vector_mode_supported_p (to
)
8757 && aarch64_vect_struct_mode_p (from
)))
8764 /* Implement MODES_TIEABLE_P. */
8767 aarch64_modes_tieable_p (enum machine_mode mode1
, enum machine_mode mode2
)
8769 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
8772 /* We specifically want to allow elements of "structure" modes to
8773 be tieable to the structure. This more general condition allows
8774 other rarer situations too. */
8776 && aarch64_vector_mode_p (mode1
)
8777 && aarch64_vector_mode_p (mode2
))
8783 #undef TARGET_ADDRESS_COST
8784 #define TARGET_ADDRESS_COST aarch64_address_cost
8786 /* This hook will determines whether unnamed bitfields affect the alignment
8787 of the containing structure. The hook returns true if the structure
8788 should inherit the alignment requirements of an unnamed bitfield's
8790 #undef TARGET_ALIGN_ANON_BITFIELD
8791 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8793 #undef TARGET_ASM_ALIGNED_DI_OP
8794 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8796 #undef TARGET_ASM_ALIGNED_HI_OP
8797 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8799 #undef TARGET_ASM_ALIGNED_SI_OP
8800 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8802 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8803 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8804 hook_bool_const_tree_hwi_hwi_const_tree_true
8806 #undef TARGET_ASM_FILE_START
8807 #define TARGET_ASM_FILE_START aarch64_start_file
8809 #undef TARGET_ASM_OUTPUT_MI_THUNK
8810 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8812 #undef TARGET_ASM_SELECT_RTX_SECTION
8813 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8815 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8816 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8818 #undef TARGET_BUILD_BUILTIN_VA_LIST
8819 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8821 #undef TARGET_CALLEE_COPIES
8822 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8824 #undef TARGET_CAN_ELIMINATE
8825 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8827 #undef TARGET_CANNOT_FORCE_CONST_MEM
8828 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8830 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8831 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8833 /* Only the least significant bit is used for initialization guard
8835 #undef TARGET_CXX_GUARD_MASK_BIT
8836 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8838 #undef TARGET_C_MODE_FOR_SUFFIX
8839 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8841 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8842 #undef TARGET_DEFAULT_TARGET_FLAGS
8843 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8846 #undef TARGET_CLASS_MAX_NREGS
8847 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8849 #undef TARGET_BUILTIN_DECL
8850 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8852 #undef TARGET_EXPAND_BUILTIN
8853 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8855 #undef TARGET_EXPAND_BUILTIN_VA_START
8856 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8858 #undef TARGET_FOLD_BUILTIN
8859 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8861 #undef TARGET_FUNCTION_ARG
8862 #define TARGET_FUNCTION_ARG aarch64_function_arg
8864 #undef TARGET_FUNCTION_ARG_ADVANCE
8865 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8867 #undef TARGET_FUNCTION_ARG_BOUNDARY
8868 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8870 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8871 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8873 #undef TARGET_FUNCTION_VALUE
8874 #define TARGET_FUNCTION_VALUE aarch64_function_value
8876 #undef TARGET_FUNCTION_VALUE_REGNO_P
8877 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8879 #undef TARGET_FRAME_POINTER_REQUIRED
8880 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8882 #undef TARGET_GIMPLE_FOLD_BUILTIN
8883 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8885 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8886 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8888 #undef TARGET_INIT_BUILTINS
8889 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8891 #undef TARGET_LEGITIMATE_ADDRESS_P
8892 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8894 #undef TARGET_LEGITIMATE_CONSTANT_P
8895 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8897 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8898 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8901 #define TARGET_LRA_P aarch64_lra_p
8903 #undef TARGET_MANGLE_TYPE
8904 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8906 #undef TARGET_MEMORY_MOVE_COST
8907 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8909 #undef TARGET_MUST_PASS_IN_STACK
8910 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8912 /* This target hook should return true if accesses to volatile bitfields
8913 should use the narrowest mode possible. It should return false if these
8914 accesses should use the bitfield container type. */
8915 #undef TARGET_NARROW_VOLATILE_BITFIELD
8916 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8918 #undef TARGET_OPTION_OVERRIDE
8919 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8921 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8922 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8923 aarch64_override_options_after_change
8925 #undef TARGET_PASS_BY_REFERENCE
8926 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8928 #undef TARGET_PREFERRED_RELOAD_CLASS
8929 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8931 #undef TARGET_SECONDARY_RELOAD
8932 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8934 #undef TARGET_SHIFT_TRUNCATION_MASK
8935 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8937 #undef TARGET_SETUP_INCOMING_VARARGS
8938 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8940 #undef TARGET_STRUCT_VALUE_RTX
8941 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8943 #undef TARGET_REGISTER_MOVE_COST
8944 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8946 #undef TARGET_RETURN_IN_MEMORY
8947 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8949 #undef TARGET_RETURN_IN_MSB
8950 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8952 #undef TARGET_RTX_COSTS
8953 #define TARGET_RTX_COSTS aarch64_rtx_costs
8955 #undef TARGET_SCHED_ISSUE_RATE
8956 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
8958 #undef TARGET_TRAMPOLINE_INIT
8959 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8961 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8962 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8964 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8965 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8967 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8968 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8970 #undef TARGET_VECTORIZE_ADD_STMT_COST
8971 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8973 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8974 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8975 aarch64_builtin_vectorization_cost
8977 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8978 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8980 #undef TARGET_VECTORIZE_BUILTINS
8981 #define TARGET_VECTORIZE_BUILTINS
8983 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8984 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8985 aarch64_builtin_vectorized_function
8987 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8988 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8989 aarch64_autovectorize_vector_sizes
8991 /* Section anchor support. */
8993 #undef TARGET_MIN_ANCHOR_OFFSET
8994 #define TARGET_MIN_ANCHOR_OFFSET -256
8996 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8997 byte offset; we can do much more for larger data types, but have no way
8998 to determine the size of the access. We assume accesses are aligned. */
8999 #undef TARGET_MAX_ANCHOR_OFFSET
9000 #define TARGET_MAX_ANCHOR_OFFSET 4095
9002 #undef TARGET_VECTOR_ALIGNMENT
9003 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9005 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9006 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9007 aarch64_simd_vector_alignment_reachable
9009 /* vec_perm support. */
9011 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9012 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9013 aarch64_vectorize_vec_perm_const_ok
9016 #undef TARGET_FIXED_CONDITION_CODE_REGS
9017 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9019 #undef TARGET_FLAGS_REGNUM
9020 #define TARGET_FLAGS_REGNUM CC_REGNUM
9022 struct gcc_target targetm
= TARGET_INITIALIZER
;
9024 #include "gt-aarch64.h"