1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
35 #include "hard-reg-set.h"
41 #include "target-def.h"
42 #include "targhooks.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
57 #include "gimple-expr.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
67 /* Defined for convenience. */
68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
70 /* Classifies an address.
73 A simple base register plus immediate offset.
76 A base register indexed by immediate offset with writeback.
79 A base register indexed by (optionally scaled) register.
82 A base register indexed by (optionally scaled) zero-extended register.
85 A base register indexed by (optionally scaled) sign-extended register.
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
91 A constant symbolic address, in pc-relative literal pool. */
93 enum aarch64_address_type
{
103 struct aarch64_address_info
{
104 enum aarch64_address_type type
;
108 enum aarch64_symbol_type symbol_type
;
111 struct simd_immediate_info
120 /* The current code model. */
121 enum aarch64_code_model aarch64_cmodel
;
124 #undef TARGET_HAVE_TLS
125 #define TARGET_HAVE_TLS 1
128 static bool aarch64_lra_p (void);
129 static bool aarch64_composite_type_p (const_tree
, enum machine_mode
);
130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode
,
132 enum machine_mode
*, int *,
134 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
135 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
136 static void aarch64_override_options_after_change (void);
137 static bool aarch64_vector_mode_supported_p (enum machine_mode
);
138 static unsigned bit_count (unsigned HOST_WIDE_INT
);
139 static bool aarch64_const_vec_all_same_int_p (rtx
,
140 HOST_WIDE_INT
, HOST_WIDE_INT
);
142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
143 const unsigned char *sel
);
145 /* The processor for which instructions should be scheduled. */
146 enum aarch64_processor aarch64_tune
= cortexa53
;
148 /* The current tuning set. */
149 const struct tune_params
*aarch64_tune_params
;
151 /* Mask to specify which instructions we are allowed to generate. */
152 unsigned long aarch64_isa_flags
= 0;
154 /* Mask to specify which instruction scheduling options should be used. */
155 unsigned long aarch64_tune_flags
= 0;
157 /* Tuning parameters. */
159 #if HAVE_DESIGNATED_INITIALIZERS
160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
162 #define NAMED_PARAM(NAME, VAL) (VAL)
165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
172 static const struct cpu_addrcost_table generic_addrcost_table
=
174 NAMED_PARAM (pre_modify
, 0),
175 NAMED_PARAM (post_modify
, 0),
176 NAMED_PARAM (register_offset
, 0),
177 NAMED_PARAM (register_extend
, 0),
178 NAMED_PARAM (imm_offset
, 0)
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
184 static const struct cpu_regmove_cost generic_regmove_cost
=
186 NAMED_PARAM (GP2GP
, 1),
187 NAMED_PARAM (GP2FP
, 2),
188 NAMED_PARAM (FP2GP
, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP
, 4)
195 /* Generic costs for vector insn classes. */
196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
199 static const struct cpu_vector_cost generic_vector_cost
=
201 NAMED_PARAM (scalar_stmt_cost
, 1),
202 NAMED_PARAM (scalar_load_cost
, 1),
203 NAMED_PARAM (scalar_store_cost
, 1),
204 NAMED_PARAM (vec_stmt_cost
, 1),
205 NAMED_PARAM (vec_to_scalar_cost
, 1),
206 NAMED_PARAM (scalar_to_vec_cost
, 1),
207 NAMED_PARAM (vec_align_load_cost
, 1),
208 NAMED_PARAM (vec_unalign_load_cost
, 1),
209 NAMED_PARAM (vec_unalign_store_cost
, 1),
210 NAMED_PARAM (vec_store_cost
, 1),
211 NAMED_PARAM (cond_taken_branch_cost
, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
218 static const struct tune_params generic_tunings
=
220 &generic_extra_costs
,
221 &generic_addrcost_table
,
222 &generic_regmove_cost
,
223 &generic_vector_cost
,
224 NAMED_PARAM (memmov_cost
, 4)
227 static const struct tune_params cortexa53_tunings
=
229 &cortexa53_extra_costs
,
230 &generic_addrcost_table
,
231 &generic_regmove_cost
,
232 &generic_vector_cost
,
233 NAMED_PARAM (memmov_cost
, 4)
236 /* A processor implementing AArch64. */
239 const char *const name
;
240 enum aarch64_processor core
;
242 const unsigned long flags
;
243 const struct tune_params
*const tune
;
246 /* Processor cores implementing AArch64. */
247 static const struct processor all_cores
[] =
249 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
250 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
251 #include "aarch64-cores.def"
253 {"generic", cortexa53
, "8", AARCH64_FL_FPSIMD
| AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
254 {NULL
, aarch64_none
, NULL
, 0, NULL
}
257 /* Architectures implementing AArch64. */
258 static const struct processor all_architectures
[] =
260 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
261 {NAME, CORE, #ARCH, FLAGS, NULL},
262 #include "aarch64-arches.def"
264 {NULL
, aarch64_none
, NULL
, 0, NULL
}
267 /* Target specification. These are populated as commandline arguments
268 are processed, or NULL if not specified. */
269 static const struct processor
*selected_arch
;
270 static const struct processor
*selected_cpu
;
271 static const struct processor
*selected_tune
;
273 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
275 /* An ISA extension in the co-processor and main instruction set space. */
276 struct aarch64_option_extension
278 const char *const name
;
279 const unsigned long flags_on
;
280 const unsigned long flags_off
;
283 /* ISA extensions in AArch64. */
284 static const struct aarch64_option_extension all_extensions
[] =
286 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
287 {NAME, FLAGS_ON, FLAGS_OFF},
288 #include "aarch64-option-extensions.def"
289 #undef AARCH64_OPT_EXTENSION
293 /* Used to track the size of an address when generating a pre/post
294 increment address. */
295 static enum machine_mode aarch64_memory_reference_mode
;
297 /* Used to force GTY into this file. */
298 static GTY(()) int gty_dummy
;
300 /* A table of valid AArch64 "bitmask immediate" values for
301 logical instructions. */
303 #define AARCH64_NUM_BITMASKS 5334
304 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
306 /* Did we set flag_omit_frame_pointer just so
307 aarch64_frame_pointer_required would be called? */
308 static bool faked_omit_frame_pointer
;
310 typedef enum aarch64_cond_code
312 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
313 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
314 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
318 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
320 /* The condition codes of the processor, and the inverse function. */
321 static const char * const aarch64_condition_codes
[] =
323 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
324 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
327 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
329 aarch64_dbx_register_number (unsigned regno
)
331 if (GP_REGNUM_P (regno
))
332 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
333 else if (regno
== SP_REGNUM
)
334 return AARCH64_DWARF_SP
;
335 else if (FP_REGNUM_P (regno
))
336 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
338 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
339 equivalent DWARF register. */
340 return DWARF_FRAME_REGISTERS
;
343 /* Return TRUE if MODE is any of the large INT modes. */
345 aarch64_vect_struct_mode_p (enum machine_mode mode
)
347 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
350 /* Return TRUE if MODE is any of the vector modes. */
352 aarch64_vector_mode_p (enum machine_mode mode
)
354 return aarch64_vector_mode_supported_p (mode
)
355 || aarch64_vect_struct_mode_p (mode
);
358 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
360 aarch64_array_mode_supported_p (enum machine_mode mode
,
361 unsigned HOST_WIDE_INT nelems
)
364 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
365 && (nelems
>= 2 && nelems
<= 4))
371 /* Implement HARD_REGNO_NREGS. */
374 aarch64_hard_regno_nregs (unsigned regno
, enum machine_mode mode
)
376 switch (aarch64_regno_regclass (regno
))
380 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
382 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
387 /* Implement HARD_REGNO_MODE_OK. */
390 aarch64_hard_regno_mode_ok (unsigned regno
, enum machine_mode mode
)
392 if (GET_MODE_CLASS (mode
) == MODE_CC
)
393 return regno
== CC_REGNUM
;
395 if (regno
== SP_REGNUM
)
396 /* The purpose of comparing with ptr_mode is to support the
397 global register variable associated with the stack pointer
398 register via the syntax of asm ("wsp") in ILP32. */
399 return mode
== Pmode
|| mode
== ptr_mode
;
401 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
402 return mode
== Pmode
;
404 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
407 if (FP_REGNUM_P (regno
))
409 if (aarch64_vect_struct_mode_p (mode
))
411 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
419 /* Return true if calls to DECL should be treated as
420 long-calls (ie called via a register). */
422 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
427 /* Return true if calls to symbol-ref SYM should be treated as
428 long-calls (ie called via a register). */
430 aarch64_is_long_call_p (rtx sym
)
432 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
435 /* Return true if the offsets to a zero/sign-extract operation
436 represent an expression that matches an extend operation. The
437 operands represent the paramters from
439 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
441 aarch64_is_extend_from_extract (enum machine_mode mode
, rtx mult_imm
,
444 HOST_WIDE_INT mult_val
, extract_val
;
446 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
449 mult_val
= INTVAL (mult_imm
);
450 extract_val
= INTVAL (extract_imm
);
453 && extract_val
< GET_MODE_BITSIZE (mode
)
454 && exact_log2 (extract_val
& ~7) > 0
455 && (extract_val
& 7) <= 4
456 && mult_val
== (1 << (extract_val
& 7)))
462 /* Emit an insn that's a simple single-set. Both the operands must be
463 known to be valid. */
465 emit_set_insn (rtx x
, rtx y
)
467 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
470 /* X and Y are two things to compare using CODE. Emit the compare insn and
471 return the rtx for register 0 in the proper mode. */
473 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
475 enum machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
476 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
478 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
482 /* Build the SYMBOL_REF for __tls_get_addr. */
484 static GTY(()) rtx tls_get_addr_libfunc
;
487 aarch64_tls_get_addr (void)
489 if (!tls_get_addr_libfunc
)
490 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
491 return tls_get_addr_libfunc
;
494 /* Return the TLS model to use for ADDR. */
496 static enum tls_model
497 tls_symbolic_operand_type (rtx addr
)
499 enum tls_model tls_kind
= TLS_MODEL_NONE
;
502 if (GET_CODE (addr
) == CONST
)
504 split_const (addr
, &sym
, &addend
);
505 if (GET_CODE (sym
) == SYMBOL_REF
)
506 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
508 else if (GET_CODE (addr
) == SYMBOL_REF
)
509 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
514 /* We'll allow lo_sum's in addresses in our legitimate addresses
515 so that combine would take care of combining addresses where
516 necessary, but for generation purposes, we'll generate the address
519 tmp = hi (symbol_ref); adrp x1, foo
520 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
524 adrp x1, :got:foo adrp tmp, :tlsgd:foo
525 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
529 Load TLS symbol, depending on TLS mechanism and TLS access model.
531 Global Dynamic - Traditional TLS:
533 add dest, tmp, #:tlsgd_lo12:imm
536 Global Dynamic - TLS Descriptors:
537 adrp dest, :tlsdesc:imm
538 ldr tmp, [dest, #:tlsdesc_lo12:imm]
539 add dest, dest, #:tlsdesc_lo12:imm
546 adrp tmp, :gottprel:imm
547 ldr dest, [tmp, #:gottprel_lo12:imm]
552 add t0, tp, #:tprel_hi12:imm
553 add t0, #:tprel_lo12_nc:imm
557 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
558 enum aarch64_symbol_type type
)
562 case SYMBOL_SMALL_ABSOLUTE
:
564 /* In ILP32, the mode of dest can be either SImode or DImode. */
566 enum machine_mode mode
= GET_MODE (dest
);
568 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
570 if (can_create_pseudo_p ())
571 tmp_reg
= gen_reg_rtx (mode
);
573 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
574 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
578 case SYMBOL_TINY_ABSOLUTE
:
579 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
582 case SYMBOL_SMALL_GOT
:
584 /* In ILP32, the mode of dest can be either SImode or DImode,
585 while the got entry is always of SImode size. The mode of
586 dest depends on how dest is used: if dest is assigned to a
587 pointer (e.g. in the memory), it has SImode; it may have
588 DImode if dest is dereferenced to access the memeory.
589 This is why we have to handle three different ldr_got_small
590 patterns here (two patterns for ILP32). */
592 enum machine_mode mode
= GET_MODE (dest
);
594 if (can_create_pseudo_p ())
595 tmp_reg
= gen_reg_rtx (mode
);
597 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
598 if (mode
== ptr_mode
)
601 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
603 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
607 gcc_assert (mode
== Pmode
);
608 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
614 case SYMBOL_SMALL_TLSGD
:
617 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
620 emit_call_insn (gen_tlsgd_small (result
, imm
));
621 insns
= get_insns ();
624 RTL_CONST_CALL_P (insns
) = 1;
625 emit_libcall_block (insns
, dest
, result
, imm
);
629 case SYMBOL_SMALL_TLSDESC
:
631 rtx x0
= gen_rtx_REG (Pmode
, R0_REGNUM
);
634 emit_insn (gen_tlsdesc_small (imm
));
635 tp
= aarch64_load_tp (NULL
);
636 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, x0
)));
637 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
641 case SYMBOL_SMALL_GOTTPREL
:
643 rtx tmp_reg
= gen_reg_rtx (Pmode
);
644 rtx tp
= aarch64_load_tp (NULL
);
645 emit_insn (gen_tlsie_small (tmp_reg
, imm
));
646 emit_insn (gen_rtx_SET (Pmode
, dest
, gen_rtx_PLUS (Pmode
, tp
, tmp_reg
)));
647 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
651 case SYMBOL_SMALL_TPREL
:
653 rtx tp
= aarch64_load_tp (NULL
);
654 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
655 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
659 case SYMBOL_TINY_GOT
:
660 emit_insn (gen_ldr_got_tiny (dest
, imm
));
668 /* Emit a move from SRC to DEST. Assume that the move expanders can
669 handle all moves if !can_create_pseudo_p (). The distinction is
670 important because, unlike emit_move_insn, the move expanders know
671 how to force Pmode objects into the constant pool even when the
672 constant pool address is not itself legitimate. */
674 aarch64_emit_move (rtx dest
, rtx src
)
676 return (can_create_pseudo_p ()
677 ? emit_move_insn (dest
, src
)
678 : emit_move_insn_1 (dest
, src
));
682 aarch64_split_128bit_move (rtx dst
, rtx src
)
686 enum machine_mode src_mode
= GET_MODE (src
);
687 enum machine_mode dst_mode
= GET_MODE (dst
);
688 int src_regno
= REGNO (src
);
689 int dst_regno
= REGNO (dst
);
691 gcc_assert (dst_mode
== TImode
|| dst_mode
== TFmode
);
693 if (REG_P (dst
) && REG_P (src
))
695 gcc_assert (src_mode
== TImode
|| src_mode
== TFmode
);
697 /* Handle r -> w, w -> r. */
698 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
703 (gen_aarch64_movtilow_di (dst
, gen_lowpart (word_mode
, src
)));
705 (gen_aarch64_movtihigh_di (dst
, gen_highpart (word_mode
, src
)));
709 (gen_aarch64_movtflow_di (dst
, gen_lowpart (word_mode
, src
)));
711 (gen_aarch64_movtfhigh_di (dst
, gen_highpart (word_mode
, src
)));
717 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
722 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode
, dst
), src
));
724 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode
, dst
), src
));
728 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode
, dst
), src
));
730 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode
, dst
), src
));
736 /* Fall through to r -> r cases. */
741 low_dst
= gen_lowpart (word_mode
, dst
);
743 && reg_overlap_mentioned_p (low_dst
, src
))
745 aarch64_emit_move (gen_highpart (word_mode
, dst
),
746 gen_highpart_mode (word_mode
, TImode
, src
));
747 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
751 aarch64_emit_move (low_dst
, gen_lowpart (word_mode
, src
));
752 aarch64_emit_move (gen_highpart (word_mode
, dst
),
753 gen_highpart_mode (word_mode
, TImode
, src
));
757 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
),
758 gen_rtx_REG (DFmode
, src_regno
));
759 emit_move_insn (gen_rtx_REG (DFmode
, dst_regno
+ 1),
760 gen_rtx_REG (DFmode
, src_regno
+ 1));
768 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
770 return (! REG_P (src
)
771 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
774 /* Split a complex SIMD combine. */
777 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
779 enum machine_mode src_mode
= GET_MODE (src1
);
780 enum machine_mode dst_mode
= GET_MODE (dst
);
782 gcc_assert (VECTOR_MODE_P (dst_mode
));
784 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
786 rtx (*gen
) (rtx
, rtx
, rtx
);
791 gen
= gen_aarch64_simd_combinev8qi
;
794 gen
= gen_aarch64_simd_combinev4hi
;
797 gen
= gen_aarch64_simd_combinev2si
;
800 gen
= gen_aarch64_simd_combinev2sf
;
803 gen
= gen_aarch64_simd_combinedi
;
806 gen
= gen_aarch64_simd_combinedf
;
812 emit_insn (gen (dst
, src1
, src2
));
817 /* Split a complex SIMD move. */
820 aarch64_split_simd_move (rtx dst
, rtx src
)
822 enum machine_mode src_mode
= GET_MODE (src
);
823 enum machine_mode dst_mode
= GET_MODE (dst
);
825 gcc_assert (VECTOR_MODE_P (dst_mode
));
827 if (REG_P (dst
) && REG_P (src
))
829 rtx (*gen
) (rtx
, rtx
);
831 gcc_assert (VECTOR_MODE_P (src_mode
));
836 gen
= gen_aarch64_split_simd_movv16qi
;
839 gen
= gen_aarch64_split_simd_movv8hi
;
842 gen
= gen_aarch64_split_simd_movv4si
;
845 gen
= gen_aarch64_split_simd_movv2di
;
848 gen
= gen_aarch64_split_simd_movv4sf
;
851 gen
= gen_aarch64_split_simd_movv2df
;
857 emit_insn (gen (dst
, src
));
863 aarch64_force_temporary (enum machine_mode mode
, rtx x
, rtx value
)
865 if (can_create_pseudo_p ())
866 return force_reg (mode
, value
);
869 x
= aarch64_emit_move (x
, value
);
876 aarch64_add_offset (enum machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
878 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
881 /* Load the full offset into a register. This
882 might be improvable in the future. */
883 high
= GEN_INT (offset
);
885 high
= aarch64_force_temporary (mode
, temp
, high
);
886 reg
= aarch64_force_temporary (mode
, temp
,
887 gen_rtx_PLUS (mode
, high
, reg
));
889 return plus_constant (mode
, reg
, offset
);
893 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
895 enum machine_mode mode
= GET_MODE (dest
);
896 unsigned HOST_WIDE_INT mask
;
899 unsigned HOST_WIDE_INT val
;
902 int one_match
, zero_match
;
904 gcc_assert (mode
== SImode
|| mode
== DImode
);
906 /* Check on what type of symbol it is. */
907 if (GET_CODE (imm
) == SYMBOL_REF
908 || GET_CODE (imm
) == LABEL_REF
909 || GET_CODE (imm
) == CONST
)
911 rtx mem
, base
, offset
;
912 enum aarch64_symbol_type sty
;
914 /* If we have (const (plus symbol offset)), separate out the offset
915 before we start classifying the symbol. */
916 split_const (imm
, &base
, &offset
);
918 sty
= aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
);
921 case SYMBOL_FORCE_TO_MEM
:
922 if (offset
!= const0_rtx
923 && targetm
.cannot_force_const_mem (mode
, imm
))
925 gcc_assert(can_create_pseudo_p ());
926 base
= aarch64_force_temporary (mode
, dest
, base
);
927 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
928 aarch64_emit_move (dest
, base
);
931 mem
= force_const_mem (ptr_mode
, imm
);
933 if (mode
!= ptr_mode
)
934 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
935 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
938 case SYMBOL_SMALL_TLSGD
:
939 case SYMBOL_SMALL_TLSDESC
:
940 case SYMBOL_SMALL_GOTTPREL
:
941 case SYMBOL_SMALL_GOT
:
942 case SYMBOL_TINY_GOT
:
943 if (offset
!= const0_rtx
)
945 gcc_assert(can_create_pseudo_p ());
946 base
= aarch64_force_temporary (mode
, dest
, base
);
947 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
948 aarch64_emit_move (dest
, base
);
953 case SYMBOL_SMALL_TPREL
:
954 case SYMBOL_SMALL_ABSOLUTE
:
955 case SYMBOL_TINY_ABSOLUTE
:
956 aarch64_load_symref_appropriately (dest
, imm
, sty
);
964 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
966 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
970 if (!CONST_INT_P (imm
))
972 if (GET_CODE (imm
) == HIGH
)
973 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
976 rtx mem
= force_const_mem (mode
, imm
);
978 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
986 /* We know we can't do this in 1 insn, and we must be able to do it
987 in two; so don't mess around looking for sequences that don't buy
989 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
990 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
991 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
995 /* Remaining cases are all for DImode. */
998 subtargets
= optimize
&& can_create_pseudo_p ();
1004 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1006 if ((val
& mask
) == 0)
1008 else if ((val
& mask
) == mask
)
1015 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1017 if ((val
& mask
) != mask
)
1019 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1020 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1021 GEN_INT ((val
>> i
) & 0xffff)));
1028 if (zero_match
== 2)
1029 goto simple_sequence
;
1031 mask
= 0x0ffff0000UL
;
1032 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1034 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1036 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1038 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1040 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
, GEN_INT (val
& mask
)));
1041 emit_insn (gen_adddi3 (dest
, subtarget
,
1042 GEN_INT (val
- (val
& mask
))));
1045 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1047 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1049 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1050 GEN_INT ((val
+ comp
) & mask
)));
1051 emit_insn (gen_adddi3 (dest
, subtarget
,
1052 GEN_INT (val
- ((val
+ comp
) & mask
))));
1055 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1057 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1059 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1060 GEN_INT ((val
- comp
) | ~mask
)));
1061 emit_insn (gen_adddi3 (dest
, subtarget
,
1062 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1065 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1067 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1069 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1070 GEN_INT (val
| ~mask
)));
1071 emit_insn (gen_adddi3 (dest
, subtarget
,
1072 GEN_INT (val
- (val
| ~mask
))));
1077 /* See if we can do it by arithmetically combining two
1079 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1084 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1085 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1087 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1088 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1089 GEN_INT (aarch64_bitmasks
[i
])));
1090 emit_insn (gen_adddi3 (dest
, subtarget
,
1091 GEN_INT (val
- aarch64_bitmasks
[i
])));
1095 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1097 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1099 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1100 GEN_INT (aarch64_bitmasks
[i
])));
1101 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1102 GEN_INT ((val
>> j
) & 0xffff)));
1108 /* See if we can do it by logically combining two immediates. */
1109 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1111 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1115 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1116 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1118 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1119 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1120 GEN_INT (aarch64_bitmasks
[i
])));
1121 emit_insn (gen_iordi3 (dest
, subtarget
,
1122 GEN_INT (aarch64_bitmasks
[j
])));
1126 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1130 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1131 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1134 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1135 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1136 GEN_INT (aarch64_bitmasks
[j
])));
1137 emit_insn (gen_anddi3 (dest
, subtarget
,
1138 GEN_INT (aarch64_bitmasks
[i
])));
1147 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1149 if ((val
& mask
) != 0)
1153 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1154 GEN_INT (val
& mask
)));
1158 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1159 GEN_INT ((val
>> i
) & 0xffff)));
1165 aarch64_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
1167 /* Indirect calls are not currently supported. */
1171 /* Cannot tail-call to long-calls, since these are outside of the
1172 range of a branch instruction (we could handle this if we added
1173 support for indirect tail-calls. */
1174 if (aarch64_decl_is_long_call_p (decl
))
1180 /* Implement TARGET_PASS_BY_REFERENCE. */
1183 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1184 enum machine_mode mode
,
1186 bool named ATTRIBUTE_UNUSED
)
1189 enum machine_mode dummymode
;
1192 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1193 size
= (mode
== BLKmode
&& type
)
1194 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1198 /* Arrays always passed by reference. */
1199 if (TREE_CODE (type
) == ARRAY_TYPE
)
1201 /* Other aggregates based on their size. */
1202 if (AGGREGATE_TYPE_P (type
))
1203 size
= int_size_in_bytes (type
);
1206 /* Variable sized arguments are always returned by reference. */
1210 /* Can this be a candidate to be passed in fp/simd register(s)? */
1211 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1216 /* Arguments which are variable sized or larger than 2 registers are
1217 passed by reference unless they are a homogenous floating point
1219 return size
> 2 * UNITS_PER_WORD
;
1222 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1224 aarch64_return_in_msb (const_tree valtype
)
1226 enum machine_mode dummy_mode
;
1229 /* Never happens in little-endian mode. */
1230 if (!BYTES_BIG_ENDIAN
)
1233 /* Only composite types smaller than or equal to 16 bytes can
1234 be potentially returned in registers. */
1235 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1236 || int_size_in_bytes (valtype
) <= 0
1237 || int_size_in_bytes (valtype
) > 16)
1240 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1241 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1242 is always passed/returned in the least significant bits of fp/simd
1244 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1245 &dummy_mode
, &dummy_int
, NULL
))
1251 /* Implement TARGET_FUNCTION_VALUE.
1252 Define how to find the value returned by a function. */
1255 aarch64_function_value (const_tree type
, const_tree func
,
1256 bool outgoing ATTRIBUTE_UNUSED
)
1258 enum machine_mode mode
;
1261 enum machine_mode ag_mode
;
1263 mode
= TYPE_MODE (type
);
1264 if (INTEGRAL_TYPE_P (type
))
1265 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1267 if (aarch64_return_in_msb (type
))
1269 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1271 if (size
% UNITS_PER_WORD
!= 0)
1273 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1274 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1278 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1279 &ag_mode
, &count
, NULL
))
1281 if (!aarch64_composite_type_p (type
, mode
))
1283 gcc_assert (count
== 1 && mode
== ag_mode
);
1284 return gen_rtx_REG (mode
, V0_REGNUM
);
1291 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1292 for (i
= 0; i
< count
; i
++)
1294 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1295 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1296 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1297 XVECEXP (par
, 0, i
) = tmp
;
1303 return gen_rtx_REG (mode
, R0_REGNUM
);
1306 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1307 Return true if REGNO is the number of a hard register in which the values
1308 of called function may come back. */
1311 aarch64_function_value_regno_p (const unsigned int regno
)
1313 /* Maximum of 16 bytes can be returned in the general registers. Examples
1314 of 16-byte return values are: 128-bit integers and 16-byte small
1315 structures (excluding homogeneous floating-point aggregates). */
1316 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1319 /* Up to four fp/simd registers can return a function value, e.g. a
1320 homogeneous floating-point aggregate having four members. */
1321 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1322 return !TARGET_GENERAL_REGS_ONLY
;
1327 /* Implement TARGET_RETURN_IN_MEMORY.
1329 If the type T of the result of a function is such that
1331 would require that arg be passed as a value in a register (or set of
1332 registers) according to the parameter passing rules, then the result
1333 is returned in the same registers as would be used for such an
1337 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1340 enum machine_mode ag_mode
;
1343 if (!AGGREGATE_TYPE_P (type
)
1344 && TREE_CODE (type
) != COMPLEX_TYPE
1345 && TREE_CODE (type
) != VECTOR_TYPE
)
1346 /* Simple scalar types always returned in registers. */
1349 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1356 /* Types larger than 2 registers returned in memory. */
1357 size
= int_size_in_bytes (type
);
1358 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1362 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, enum machine_mode mode
,
1363 const_tree type
, int *nregs
)
1365 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1366 return aarch64_vfp_is_call_or_return_candidate (mode
,
1368 &pcum
->aapcs_vfp_rmode
,
1373 /* Given MODE and TYPE of a function argument, return the alignment in
1374 bits. The idea is to suppress any stronger alignment requested by
1375 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1376 This is a helper function for local use only. */
1379 aarch64_function_arg_alignment (enum machine_mode mode
, const_tree type
)
1381 unsigned int alignment
;
1385 if (!integer_zerop (TYPE_SIZE (type
)))
1387 if (TYPE_MODE (type
) == mode
)
1388 alignment
= TYPE_ALIGN (type
);
1390 alignment
= GET_MODE_ALIGNMENT (mode
);
1396 alignment
= GET_MODE_ALIGNMENT (mode
);
1401 /* Layout a function argument according to the AAPCS64 rules. The rule
1402 numbers refer to the rule numbers in the AAPCS64. */
1405 aarch64_layout_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1407 bool named ATTRIBUTE_UNUSED
)
1409 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1410 int ncrn
, nvrn
, nregs
;
1411 bool allocate_ncrn
, allocate_nvrn
;
1413 /* We need to do this once per argument. */
1414 if (pcum
->aapcs_arg_processed
)
1417 pcum
->aapcs_arg_processed
= true;
1419 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1420 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1425 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1426 The following code thus handles passing by SIMD/FP registers first. */
1428 nvrn
= pcum
->aapcs_nvrn
;
1430 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1431 and homogenous short-vector aggregates (HVA). */
1434 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1436 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1437 if (!aarch64_composite_type_p (type
, mode
))
1439 gcc_assert (nregs
== 1);
1440 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1446 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1447 for (i
= 0; i
< nregs
; i
++)
1449 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1450 V0_REGNUM
+ nvrn
+ i
);
1451 tmp
= gen_rtx_EXPR_LIST
1453 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1454 XVECEXP (par
, 0, i
) = tmp
;
1456 pcum
->aapcs_reg
= par
;
1462 /* C.3 NSRN is set to 8. */
1463 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1468 ncrn
= pcum
->aapcs_ncrn
;
1469 nregs
= ((type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
))
1470 + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
1473 /* C6 - C9. though the sign and zero extension semantics are
1474 handled elsewhere. This is the case where the argument fits
1475 entirely general registers. */
1476 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1478 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1480 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1482 /* C.8 if the argument has an alignment of 16 then the NGRN is
1483 rounded up to the next even number. */
1484 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1487 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1489 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1490 A reg is still generated for it, but the caller should be smart
1491 enough not to use it. */
1492 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1494 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1501 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1502 for (i
= 0; i
< nregs
; i
++)
1504 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1505 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1506 GEN_INT (i
* UNITS_PER_WORD
));
1507 XVECEXP (par
, 0, i
) = tmp
;
1509 pcum
->aapcs_reg
= par
;
1512 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1517 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1519 /* The argument is passed on stack; record the needed number of words for
1520 this argument (we can re-use NREGS) and align the total size if
1523 pcum
->aapcs_stack_words
= nregs
;
1524 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1525 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1526 16 / UNITS_PER_WORD
) + 1;
1530 /* Implement TARGET_FUNCTION_ARG. */
1533 aarch64_function_arg (cumulative_args_t pcum_v
, enum machine_mode mode
,
1534 const_tree type
, bool named
)
1536 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1537 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1539 if (mode
== VOIDmode
)
1542 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1543 return pcum
->aapcs_reg
;
1547 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1548 const_tree fntype ATTRIBUTE_UNUSED
,
1549 rtx libname ATTRIBUTE_UNUSED
,
1550 const_tree fndecl ATTRIBUTE_UNUSED
,
1551 unsigned n_named ATTRIBUTE_UNUSED
)
1553 pcum
->aapcs_ncrn
= 0;
1554 pcum
->aapcs_nvrn
= 0;
1555 pcum
->aapcs_nextncrn
= 0;
1556 pcum
->aapcs_nextnvrn
= 0;
1557 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1558 pcum
->aapcs_reg
= NULL_RTX
;
1559 pcum
->aapcs_arg_processed
= false;
1560 pcum
->aapcs_stack_words
= 0;
1561 pcum
->aapcs_stack_size
= 0;
1567 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1568 enum machine_mode mode
,
1572 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1573 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1575 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1576 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1577 != (pcum
->aapcs_stack_words
!= 0));
1578 pcum
->aapcs_arg_processed
= false;
1579 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1580 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1581 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1582 pcum
->aapcs_stack_words
= 0;
1583 pcum
->aapcs_reg
= NULL_RTX
;
1588 aarch64_function_arg_regno_p (unsigned regno
)
1590 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1591 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1594 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1595 PARM_BOUNDARY bits of alignment, but will be given anything up
1596 to STACK_BOUNDARY bits if the type requires it. This makes sure
1597 that both before and after the layout of each argument, the Next
1598 Stacked Argument Address (NSAA) will have a minimum alignment of
1602 aarch64_function_arg_boundary (enum machine_mode mode
, const_tree type
)
1604 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1606 if (alignment
< PARM_BOUNDARY
)
1607 alignment
= PARM_BOUNDARY
;
1608 if (alignment
> STACK_BOUNDARY
)
1609 alignment
= STACK_BOUNDARY
;
1613 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1615 Return true if an argument passed on the stack should be padded upwards,
1616 i.e. if the least-significant byte of the stack slot has useful data.
1618 Small aggregate types are placed in the lowest memory address.
1620 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1623 aarch64_pad_arg_upward (enum machine_mode mode
, const_tree type
)
1625 /* On little-endian targets, the least significant byte of every stack
1626 argument is passed at the lowest byte address of the stack slot. */
1627 if (!BYTES_BIG_ENDIAN
)
1630 /* Otherwise, integral, floating-point and pointer types are padded downward:
1631 the least significant byte of a stack argument is passed at the highest
1632 byte address of the stack slot. */
1634 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1635 || POINTER_TYPE_P (type
))
1636 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1639 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1643 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1645 It specifies padding for the last (may also be the only)
1646 element of a block move between registers and memory. If
1647 assuming the block is in the memory, padding upward means that
1648 the last element is padded after its highest significant byte,
1649 while in downward padding, the last element is padded at the
1650 its least significant byte side.
1652 Small aggregates and small complex types are always padded
1655 We don't need to worry about homogeneous floating-point or
1656 short-vector aggregates; their move is not affected by the
1657 padding direction determined here. Regardless of endianness,
1658 each element of such an aggregate is put in the least
1659 significant bits of a fp/simd register.
1661 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1662 register has useful data, and return the opposite if the most
1663 significant byte does. */
1666 aarch64_pad_reg_upward (enum machine_mode mode
, const_tree type
,
1667 bool first ATTRIBUTE_UNUSED
)
1670 /* Small composite types are always padded upward. */
1671 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1673 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1674 : GET_MODE_SIZE (mode
));
1675 if (size
< 2 * UNITS_PER_WORD
)
1679 /* Otherwise, use the default padding. */
1680 return !BYTES_BIG_ENDIAN
;
1683 static enum machine_mode
1684 aarch64_libgcc_cmp_return_mode (void)
1690 aarch64_frame_pointer_required (void)
1692 /* If the function contains dynamic stack allocations, we need to
1693 use the frame pointer to access the static parts of the frame. */
1694 if (cfun
->calls_alloca
)
1697 /* We may have turned flag_omit_frame_pointer on in order to have this
1698 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1699 and we'll check it here.
1700 If we really did set flag_omit_frame_pointer normally, then we return false
1701 (no frame pointer required) in all cases. */
1703 if (flag_omit_frame_pointer
&& !faked_omit_frame_pointer
)
1705 else if (flag_omit_leaf_frame_pointer
)
1706 return !crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
);
1710 /* Mark the registers that need to be saved by the callee and calculate
1711 the size of the callee-saved registers area and frame record (both FP
1712 and LR may be omitted). */
1714 aarch64_layout_frame (void)
1716 HOST_WIDE_INT offset
= 0;
1719 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
1722 cfun
->machine
->frame
.fp_lr_offset
= 0;
1724 /* First mark all the registers that really need to be saved... */
1725 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1726 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1728 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1729 cfun
->machine
->frame
.reg_offset
[regno
] = -1;
1731 /* ... that includes the eh data registers (if needed)... */
1732 if (crtl
->calls_eh_return
)
1733 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
1734 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)] = 0;
1736 /* ... and any callee saved register that dataflow says is live. */
1737 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
1738 if (df_regs_ever_live_p (regno
)
1739 && !call_used_regs
[regno
])
1740 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1742 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1743 if (df_regs_ever_live_p (regno
)
1744 && !call_used_regs
[regno
])
1745 cfun
->machine
->frame
.reg_offset
[regno
] = 0;
1747 if (frame_pointer_needed
)
1749 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = 0;
1750 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
1751 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
1754 /* Now assign stack slots for them. */
1755 for (regno
= R0_REGNUM
; regno
<= R28_REGNUM
; regno
++)
1756 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1758 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1759 offset
+= UNITS_PER_WORD
;
1762 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1763 if (cfun
->machine
->frame
.reg_offset
[regno
] != -1)
1765 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
1766 offset
+= UNITS_PER_WORD
;
1769 if (frame_pointer_needed
)
1771 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = offset
;
1772 offset
+= UNITS_PER_WORD
;
1773 cfun
->machine
->frame
.fp_lr_offset
= UNITS_PER_WORD
;
1776 if (cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] != -1)
1778 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = offset
;
1779 offset
+= UNITS_PER_WORD
;
1780 cfun
->machine
->frame
.fp_lr_offset
+= UNITS_PER_WORD
;
1783 cfun
->machine
->frame
.padding0
=
1784 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
1785 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
1787 cfun
->machine
->frame
.saved_regs_size
= offset
;
1788 cfun
->machine
->frame
.laid_out
= true;
1791 /* Make the last instruction frame-related and note that it performs
1792 the operation described by FRAME_PATTERN. */
1795 aarch64_set_frame_expr (rtx frame_pattern
)
1799 insn
= get_last_insn ();
1800 RTX_FRAME_RELATED_P (insn
) = 1;
1801 RTX_FRAME_RELATED_P (frame_pattern
) = 1;
1802 REG_NOTES (insn
) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1808 aarch64_register_saved_on_entry (int regno
)
1810 return cfun
->machine
->frame
.reg_offset
[regno
] != -1;
1815 aarch64_save_or_restore_fprs (int start_offset
, int increment
,
1816 bool restore
, rtx base_rtx
)
1822 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
)
1823 = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1826 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
1828 if (aarch64_register_saved_on_entry (regno
))
1831 mem
= gen_mem_ref (DFmode
,
1832 plus_constant (Pmode
,
1836 for (regno2
= regno
+ 1;
1837 regno2
<= V31_REGNUM
1838 && !aarch64_register_saved_on_entry (regno2
);
1843 if (regno2
<= V31_REGNUM
&&
1844 aarch64_register_saved_on_entry (regno2
))
1847 /* Next highest register to be saved. */
1848 mem2
= gen_mem_ref (DFmode
,
1852 start_offset
+ increment
));
1853 if (restore
== false)
1856 ( gen_store_pairdf (mem
, gen_rtx_REG (DFmode
, regno
),
1857 mem2
, gen_rtx_REG (DFmode
, regno2
)));
1863 ( gen_load_pairdf (gen_rtx_REG (DFmode
, regno
), mem
,
1864 gen_rtx_REG (DFmode
, regno2
), mem2
));
1866 add_reg_note (insn
, REG_CFA_RESTORE
,
1867 gen_rtx_REG (DFmode
, regno
));
1868 add_reg_note (insn
, REG_CFA_RESTORE
,
1869 gen_rtx_REG (DFmode
, regno2
));
1872 /* The first part of a frame-related parallel insn
1873 is always assumed to be relevant to the frame
1874 calculations; subsequent parts, are only
1875 frame-related if explicitly marked. */
1876 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
1878 start_offset
+= increment
* 2;
1882 if (restore
== false)
1883 insn
= emit_move_insn (mem
, gen_rtx_REG (DFmode
, regno
));
1886 insn
= emit_move_insn (gen_rtx_REG (DFmode
, regno
), mem
);
1887 add_reg_note (insn
, REG_CFA_RESTORE
,
1888 gen_rtx_REG (DImode
, regno
));
1890 start_offset
+= increment
;
1892 RTX_FRAME_RELATED_P (insn
) = 1;
1899 /* offset from the stack pointer of where the saves and
1900 restore's have to happen. */
1902 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset
,
1906 rtx base_rtx
= stack_pointer_rtx
;
1907 HOST_WIDE_INT start_offset
= offset
;
1908 HOST_WIDE_INT increment
= UNITS_PER_WORD
;
1909 rtx (*gen_mem_ref
)(enum machine_mode
, rtx
) = (frame_pointer_needed
)? gen_frame_mem
: gen_rtx_MEM
;
1910 unsigned limit
= (frame_pointer_needed
)? R28_REGNUM
: R30_REGNUM
;
1914 for (regno
= R0_REGNUM
; regno
<= limit
; regno
++)
1916 if (aarch64_register_saved_on_entry (regno
))
1919 mem
= gen_mem_ref (Pmode
,
1920 plus_constant (Pmode
,
1924 for (regno2
= regno
+ 1;
1926 && !aarch64_register_saved_on_entry (regno2
);
1931 if (regno2
<= limit
&&
1932 aarch64_register_saved_on_entry (regno2
))
1935 /* Next highest register to be saved. */
1936 mem2
= gen_mem_ref (Pmode
,
1940 start_offset
+ increment
));
1941 if (restore
== false)
1944 ( gen_store_pairdi (mem
, gen_rtx_REG (DImode
, regno
),
1945 mem2
, gen_rtx_REG (DImode
, regno2
)));
1951 ( gen_load_pairdi (gen_rtx_REG (DImode
, regno
), mem
,
1952 gen_rtx_REG (DImode
, regno2
), mem2
));
1954 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1955 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno2
));
1958 /* The first part of a frame-related parallel insn
1959 is always assumed to be relevant to the frame
1960 calculations; subsequent parts, are only
1961 frame-related if explicitly marked. */
1962 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0,
1965 start_offset
+= increment
* 2;
1969 if (restore
== false)
1970 insn
= emit_move_insn (mem
, gen_rtx_REG (DImode
, regno
));
1973 insn
= emit_move_insn (gen_rtx_REG (DImode
, regno
), mem
);
1974 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, regno
));
1976 start_offset
+= increment
;
1978 RTX_FRAME_RELATED_P (insn
) = 1;
1982 aarch64_save_or_restore_fprs (start_offset
, increment
, restore
, base_rtx
);
1986 /* AArch64 stack frames generated by this compiler look like:
1988 +-------------------------------+
1990 | incoming stack arguments |
1992 +-------------------------------+ <-- arg_pointer_rtx
1994 | callee-allocated save area |
1995 | for register varargs |
1997 +-------------------------------+
2001 +-------------------------------+ <-- frame_pointer_rtx
2003 | callee-saved registers |
2005 +-------------------------------+
2007 +-------------------------------+
2009 P +-------------------------------+ <-- hard_frame_pointer_rtx
2010 | dynamic allocation |
2011 +-------------------------------+
2013 | outgoing stack arguments |
2015 +-------------------------------+ <-- stack_pointer_rtx
2017 Dynamic stack allocations such as alloca insert data at point P.
2018 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2019 hard_frame_pointer_rtx unchanged. */
2021 /* Generate the prologue instructions for entry into a function.
2022 Establish the stack frame by decreasing the stack pointer with a
2023 properly calculated size and, if necessary, create a frame record
2024 filled with the values of LR and previous frame pointer. The
2025 current FP is also set up if it is in use. */
2028 aarch64_expand_prologue (void)
2030 /* sub sp, sp, #<frame_size>
2031 stp {fp, lr}, [sp, #<frame_size> - 16]
2032 add fp, sp, #<frame_size> - hardfp_offset
2033 stp {cs_reg}, [fp, #-16] etc.
2035 sub sp, sp, <final_adjustment_if_any>
2037 HOST_WIDE_INT original_frame_size
; /* local variables + vararg save */
2038 HOST_WIDE_INT frame_size
, offset
;
2039 HOST_WIDE_INT fp_offset
; /* FP offset from SP */
2042 aarch64_layout_frame ();
2043 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2044 gcc_assert ((!cfun
->machine
->saved_varargs_size
|| cfun
->stdarg
)
2045 && (cfun
->stdarg
|| !cfun
->machine
->saved_varargs_size
));
2046 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2047 + crtl
->outgoing_args_size
);
2048 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2049 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2051 if (flag_stack_usage_info
)
2052 current_function_static_stack_size
= frame_size
;
2055 - original_frame_size
2056 - cfun
->machine
->frame
.saved_regs_size
);
2058 /* Store pairs and load pairs have a range only -512 to 504. */
2061 /* When the frame has a large size, an initial decrease is done on
2062 the stack pointer to jump over the callee-allocated save area for
2063 register varargs, the local variable area and/or the callee-saved
2064 register area. This will allow the pre-index write-back
2065 store pair instructions to be used for setting up the stack frame
2067 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2069 offset
= cfun
->machine
->frame
.saved_regs_size
;
2071 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2074 if (frame_size
>= 0x1000000)
2076 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2077 emit_move_insn (op0
, GEN_INT (-frame_size
));
2078 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2079 aarch64_set_frame_expr (gen_rtx_SET
2080 (Pmode
, stack_pointer_rtx
,
2081 plus_constant (Pmode
,
2085 else if (frame_size
> 0)
2087 if ((frame_size
& 0xfff) != frame_size
)
2089 insn
= emit_insn (gen_add2_insn
2091 GEN_INT (-(frame_size
2092 & ~(HOST_WIDE_INT
)0xfff))));
2093 RTX_FRAME_RELATED_P (insn
) = 1;
2095 if ((frame_size
& 0xfff) != 0)
2097 insn
= emit_insn (gen_add2_insn
2099 GEN_INT (-(frame_size
2100 & (HOST_WIDE_INT
)0xfff))));
2101 RTX_FRAME_RELATED_P (insn
) = 1;
2110 /* Save the frame pointer and lr if the frame pointer is needed
2111 first. Make the frame pointer point to the location of the
2112 old frame pointer on the stack. */
2113 if (frame_pointer_needed
)
2119 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2120 GEN_INT (-offset
)));
2121 RTX_FRAME_RELATED_P (insn
) = 1;
2122 aarch64_set_frame_expr (gen_rtx_SET
2123 (Pmode
, stack_pointer_rtx
,
2124 gen_rtx_MINUS (Pmode
,
2126 GEN_INT (offset
))));
2127 mem_fp
= gen_frame_mem (DImode
,
2128 plus_constant (Pmode
,
2131 mem_lr
= gen_frame_mem (DImode
,
2132 plus_constant (Pmode
,
2136 insn
= emit_insn (gen_store_pairdi (mem_fp
,
2137 hard_frame_pointer_rtx
,
2139 gen_rtx_REG (DImode
,
2144 insn
= emit_insn (gen_storewb_pairdi_di
2145 (stack_pointer_rtx
, stack_pointer_rtx
,
2146 hard_frame_pointer_rtx
,
2147 gen_rtx_REG (DImode
, LR_REGNUM
),
2149 GEN_INT (GET_MODE_SIZE (DImode
) - offset
)));
2150 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2153 /* The first part of a frame-related parallel insn is always
2154 assumed to be relevant to the frame calculations;
2155 subsequent parts, are only frame-related if explicitly
2157 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2158 RTX_FRAME_RELATED_P (insn
) = 1;
2160 /* Set up frame pointer to point to the location of the
2161 previous frame pointer on the stack. */
2162 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2164 GEN_INT (fp_offset
)));
2165 aarch64_set_frame_expr (gen_rtx_SET
2166 (Pmode
, hard_frame_pointer_rtx
,
2167 plus_constant (Pmode
,
2170 RTX_FRAME_RELATED_P (insn
) = 1;
2171 insn
= emit_insn (gen_stack_tie (stack_pointer_rtx
,
2172 hard_frame_pointer_rtx
));
2176 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2177 GEN_INT (-offset
)));
2178 RTX_FRAME_RELATED_P (insn
) = 1;
2181 aarch64_save_or_restore_callee_save_registers
2182 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 0);
2185 /* when offset >= 512,
2186 sub sp, sp, #<outgoing_args_size> */
2187 if (frame_size
> -1)
2189 if (crtl
->outgoing_args_size
> 0)
2191 insn
= emit_insn (gen_add2_insn
2193 GEN_INT (- crtl
->outgoing_args_size
)));
2194 RTX_FRAME_RELATED_P (insn
) = 1;
2199 /* Generate the epilogue instructions for returning from a function. */
2201 aarch64_expand_epilogue (bool for_sibcall
)
2203 HOST_WIDE_INT original_frame_size
, frame_size
, offset
;
2204 HOST_WIDE_INT fp_offset
;
2208 aarch64_layout_frame ();
2209 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2210 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2211 + crtl
->outgoing_args_size
);
2212 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2213 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2216 - original_frame_size
2217 - cfun
->machine
->frame
.saved_regs_size
);
2219 cfa_reg
= frame_pointer_needed
? hard_frame_pointer_rtx
: stack_pointer_rtx
;
2221 /* Store pairs and load pairs have a range only -512 to 504. */
2224 offset
= original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
;
2226 offset
= cfun
->machine
->frame
.saved_regs_size
;
2228 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2230 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2232 insn
= emit_insn (gen_add2_insn
2234 GEN_INT (crtl
->outgoing_args_size
)));
2235 RTX_FRAME_RELATED_P (insn
) = 1;
2241 /* If there were outgoing arguments or we've done dynamic stack
2242 allocation, then restore the stack pointer from the frame
2243 pointer. This is at most one insn and more efficient than using
2244 GCC's internal mechanism. */
2245 if (frame_pointer_needed
2246 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2248 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2249 hard_frame_pointer_rtx
,
2250 GEN_INT (- fp_offset
)));
2251 RTX_FRAME_RELATED_P (insn
) = 1;
2252 /* As SP is set to (FP - fp_offset), according to the rules in
2253 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2254 from the value of SP from now on. */
2255 cfa_reg
= stack_pointer_rtx
;
2258 aarch64_save_or_restore_callee_save_registers
2259 (fp_offset
+ cfun
->machine
->frame
.hardfp_offset
, 1);
2261 /* Restore the frame pointer and lr if the frame pointer is needed. */
2264 if (frame_pointer_needed
)
2270 mem_fp
= gen_frame_mem (DImode
,
2271 plus_constant (Pmode
,
2274 mem_lr
= gen_frame_mem (DImode
,
2275 plus_constant (Pmode
,
2279 insn
= emit_insn (gen_load_pairdi (hard_frame_pointer_rtx
,
2281 gen_rtx_REG (DImode
,
2287 insn
= emit_insn (gen_loadwb_pairdi_di
2290 hard_frame_pointer_rtx
,
2291 gen_rtx_REG (DImode
, LR_REGNUM
),
2293 GEN_INT (GET_MODE_SIZE (DImode
) + offset
)));
2294 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2295 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2296 (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2297 plus_constant (Pmode
, cfa_reg
,
2301 /* The first part of a frame-related parallel insn
2302 is always assumed to be relevant to the frame
2303 calculations; subsequent parts, are only
2304 frame-related if explicitly marked. */
2305 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2306 RTX_FRAME_RELATED_P (insn
) = 1;
2307 add_reg_note (insn
, REG_CFA_RESTORE
, hard_frame_pointer_rtx
);
2308 add_reg_note (insn
, REG_CFA_RESTORE
,
2309 gen_rtx_REG (DImode
, LR_REGNUM
));
2313 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2315 RTX_FRAME_RELATED_P (insn
) = 1;
2320 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2322 RTX_FRAME_RELATED_P (insn
) = 1;
2326 /* Stack adjustment for exception handler. */
2327 if (crtl
->calls_eh_return
)
2329 /* We need to unwind the stack by the offset computed by
2330 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2331 based on SP. Ideally we would update the SP and define the
2332 CFA along the lines of:
2334 SP = SP + EH_RETURN_STACKADJ_RTX
2335 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2337 However the dwarf emitter only understands a constant
2340 The solution chosen here is to use the otherwise unused IP0
2341 as a temporary register to hold the current SP value. The
2342 CFA is described using IP0 then SP is modified. */
2344 rtx ip0
= gen_rtx_REG (DImode
, IP0_REGNUM
);
2346 insn
= emit_move_insn (ip0
, stack_pointer_rtx
);
2347 add_reg_note (insn
, REG_CFA_DEF_CFA
, ip0
);
2348 RTX_FRAME_RELATED_P (insn
) = 1;
2350 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2352 /* Ensure the assignment to IP0 does not get optimized away. */
2356 if (frame_size
> -1)
2358 if (frame_size
>= 0x1000000)
2360 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2361 emit_move_insn (op0
, GEN_INT (frame_size
));
2362 emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2363 aarch64_set_frame_expr (gen_rtx_SET
2364 (Pmode
, stack_pointer_rtx
,
2365 plus_constant (Pmode
,
2369 else if (frame_size
> 0)
2371 if ((frame_size
& 0xfff) != 0)
2373 insn
= emit_insn (gen_add2_insn
2375 GEN_INT ((frame_size
2376 & (HOST_WIDE_INT
) 0xfff))));
2377 RTX_FRAME_RELATED_P (insn
) = 1;
2379 if ((frame_size
& 0xfff) != frame_size
)
2381 insn
= emit_insn (gen_add2_insn
2383 GEN_INT ((frame_size
2384 & ~ (HOST_WIDE_INT
) 0xfff))));
2385 RTX_FRAME_RELATED_P (insn
) = 1;
2389 aarch64_set_frame_expr (gen_rtx_SET (Pmode
, stack_pointer_rtx
,
2390 plus_constant (Pmode
,
2395 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2397 emit_jump_insn (ret_rtx
);
2400 /* Return the place to copy the exception unwinding return address to.
2401 This will probably be a stack slot, but could (in theory be the
2402 return register). */
2404 aarch64_final_eh_return_addr (void)
2406 HOST_WIDE_INT original_frame_size
, frame_size
, offset
, fp_offset
;
2407 aarch64_layout_frame ();
2408 original_frame_size
= get_frame_size () + cfun
->machine
->saved_varargs_size
;
2409 frame_size
= (original_frame_size
+ cfun
->machine
->frame
.saved_regs_size
2410 + crtl
->outgoing_args_size
);
2411 offset
= frame_size
= AARCH64_ROUND_UP (frame_size
,
2412 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2414 - original_frame_size
2415 - cfun
->machine
->frame
.saved_regs_size
;
2417 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2418 return gen_rtx_REG (DImode
, LR_REGNUM
);
2420 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2421 result in a store to save LR introduced by builtin_eh_return () being
2422 incorrectly deleted because the alias is not detected.
2423 So in the calculation of the address to copy the exception unwinding
2424 return address to, we note 2 cases.
2425 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2426 we return a SP-relative location since all the addresses are SP-relative
2427 in this case. This prevents the store from being optimized away.
2428 If the fp_offset is not 0, then the addresses will be FP-relative and
2429 therefore we return a FP-relative location. */
2431 if (frame_pointer_needed
)
2434 return gen_frame_mem (DImode
,
2435 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2437 return gen_frame_mem (DImode
,
2438 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2441 /* If FP is not needed, we calculate the location of LR, which would be
2442 at the top of the saved registers block. */
2444 return gen_frame_mem (DImode
,
2445 plus_constant (Pmode
,
2448 + cfun
->machine
->frame
.saved_regs_size
2449 - 2 * UNITS_PER_WORD
));
2452 /* Output code to build up a constant in a register. */
2454 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
)
2456 if (aarch64_bitmask_imm (val
, DImode
))
2457 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2463 HOST_WIDE_INT valp
= val
>> 16;
2467 for (i
= 16; i
< 64; i
+= 16)
2469 valm
= (valp
& 0xffff);
2480 /* zcount contains the number of additional MOVK instructions
2481 required if the constant is built up with an initial MOVZ instruction,
2482 while ncount is the number of MOVK instructions required if starting
2483 with a MOVN instruction. Choose the sequence that yields the fewest
2484 number of instructions, preferring MOVZ instructions when they are both
2486 if (ncount
< zcount
)
2488 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2489 GEN_INT ((~val
) & 0xffff));
2494 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2495 GEN_INT (val
& 0xffff));
2501 for (i
= 16; i
< 64; i
+= 16)
2503 if ((val
& 0xffff) != tval
)
2504 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2505 GEN_INT (i
), GEN_INT (val
& 0xffff)));
2512 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2514 HOST_WIDE_INT mdelta
= delta
;
2515 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2516 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2521 if (mdelta
>= 4096 * 4096)
2523 aarch64_build_constant (scratchreg
, delta
);
2524 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2526 else if (mdelta
> 0)
2530 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2531 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2533 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2534 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2536 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2537 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2539 if (mdelta
% 4096 != 0)
2541 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2542 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2543 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2548 /* Output code to add DELTA to the first argument, and then jump
2549 to FUNCTION. Used for C++ multiple inheritance. */
2551 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2552 HOST_WIDE_INT delta
,
2553 HOST_WIDE_INT vcall_offset
,
2556 /* The this pointer is always in x0. Note that this differs from
2557 Arm where the this pointer maybe bumped to r1 if r0 is required
2558 to return a pointer to an aggregate. On AArch64 a result value
2559 pointer will be in x8. */
2560 int this_regno
= R0_REGNUM
;
2561 rtx this_rtx
, temp0
, temp1
, addr
, insn
, funexp
;
2563 reload_completed
= 1;
2564 emit_note (NOTE_INSN_PROLOGUE_END
);
2566 if (vcall_offset
== 0)
2567 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2570 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2572 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2573 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2574 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2579 if (delta
>= -256 && delta
< 256)
2580 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2581 plus_constant (Pmode
, this_rtx
, delta
));
2583 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2586 if (Pmode
== ptr_mode
)
2587 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2589 aarch64_emit_move (temp0
,
2590 gen_rtx_ZERO_EXTEND (Pmode
,
2591 gen_rtx_MEM (ptr_mode
, addr
)));
2593 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2594 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2597 aarch64_build_constant (IP1_REGNUM
, vcall_offset
);
2598 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2601 if (Pmode
== ptr_mode
)
2602 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2604 aarch64_emit_move (temp1
,
2605 gen_rtx_SIGN_EXTEND (Pmode
,
2606 gen_rtx_MEM (ptr_mode
, addr
)));
2608 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2611 /* Generate a tail call to the target function. */
2612 if (!TREE_USED (function
))
2614 assemble_external (function
);
2615 TREE_USED (function
) = 1;
2617 funexp
= XEXP (DECL_RTL (function
), 0);
2618 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2619 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2620 SIBLING_CALL_P (insn
) = 1;
2622 insn
= get_insns ();
2623 shorten_branches (insn
);
2624 final_start_function (insn
, file
, 1);
2625 final (insn
, file
, 1);
2626 final_end_function ();
2628 /* Stop pretending to be a post-reload pass. */
2629 reload_completed
= 0;
2633 aarch64_tls_operand_p_1 (rtx
*x
, void *data ATTRIBUTE_UNUSED
)
2635 if (GET_CODE (*x
) == SYMBOL_REF
)
2636 return SYMBOL_REF_TLS_MODEL (*x
) != 0;
2638 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2639 TLS offsets, not real symbol references. */
2640 if (GET_CODE (*x
) == UNSPEC
2641 && XINT (*x
, 1) == UNSPEC_TLS
)
2648 aarch64_tls_referenced_p (rtx x
)
2650 if (!TARGET_HAVE_TLS
)
2653 return for_each_rtx (&x
, aarch64_tls_operand_p_1
, NULL
);
2658 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2660 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2661 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2672 aarch64_build_bitmask_table (void)
2674 unsigned HOST_WIDE_INT mask
, imm
;
2675 unsigned int log_e
, e
, s
, r
;
2676 unsigned int nimms
= 0;
2678 for (log_e
= 1; log_e
<= 6; log_e
++)
2682 mask
= ~(HOST_WIDE_INT
) 0;
2684 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2685 for (s
= 1; s
< e
; s
++)
2687 for (r
= 0; r
< e
; r
++)
2689 /* set s consecutive bits to 1 (s < 64) */
2690 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2691 /* rotate right by r */
2693 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2694 /* replicate the constant depending on SIMD size */
2696 case 1: imm
|= (imm
<< 2);
2697 case 2: imm
|= (imm
<< 4);
2698 case 3: imm
|= (imm
<< 8);
2699 case 4: imm
|= (imm
<< 16);
2700 case 5: imm
|= (imm
<< 32);
2706 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
2707 aarch64_bitmasks
[nimms
++] = imm
;
2712 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
2713 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
2714 aarch64_bitmasks_cmp
);
2718 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2719 a left shift of 0 or 12 bits. */
2721 aarch64_uimm12_shift (HOST_WIDE_INT val
)
2723 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
2724 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
2729 /* Return true if val is an immediate that can be loaded into a
2730 register by a MOVZ instruction. */
2732 aarch64_movw_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2734 if (GET_MODE_SIZE (mode
) > 4)
2736 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
2737 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
2742 /* Ignore sign extension. */
2743 val
&= (HOST_WIDE_INT
) 0xffffffff;
2745 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
2746 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
2750 /* Return true if val is a valid bitmask immediate. */
2752 aarch64_bitmask_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2754 if (GET_MODE_SIZE (mode
) < 8)
2756 /* Replicate bit pattern. */
2757 val
&= (HOST_WIDE_INT
) 0xffffffff;
2760 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
2761 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
2765 /* Return true if val is an immediate that can be loaded into a
2766 register in a single instruction. */
2768 aarch64_move_imm (HOST_WIDE_INT val
, enum machine_mode mode
)
2770 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
2772 return aarch64_bitmask_imm (val
, mode
);
2776 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
2780 if (GET_CODE (x
) == HIGH
)
2783 split_const (x
, &base
, &offset
);
2784 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
2786 if (aarch64_classify_symbol (base
, SYMBOL_CONTEXT_ADR
)
2787 != SYMBOL_FORCE_TO_MEM
)
2790 /* Avoid generating a 64-bit relocation in ILP32; leave
2791 to aarch64_expand_mov_immediate to handle it properly. */
2792 return mode
!= ptr_mode
;
2795 return aarch64_tls_referenced_p (x
);
2798 /* Return true if register REGNO is a valid index register.
2799 STRICT_P is true if REG_OK_STRICT is in effect. */
2802 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
2804 if (!HARD_REGISTER_NUM_P (regno
))
2812 regno
= reg_renumber
[regno
];
2814 return GP_REGNUM_P (regno
);
2817 /* Return true if register REGNO is a valid base register for mode MODE.
2818 STRICT_P is true if REG_OK_STRICT is in effect. */
2821 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
2823 if (!HARD_REGISTER_NUM_P (regno
))
2831 regno
= reg_renumber
[regno
];
2834 /* The fake registers will be eliminated to either the stack or
2835 hard frame pointer, both of which are usually valid base registers.
2836 Reload deals with the cases where the eliminated form isn't valid. */
2837 return (GP_REGNUM_P (regno
)
2838 || regno
== SP_REGNUM
2839 || regno
== FRAME_POINTER_REGNUM
2840 || regno
== ARG_POINTER_REGNUM
);
2843 /* Return true if X is a valid base register for mode MODE.
2844 STRICT_P is true if REG_OK_STRICT is in effect. */
2847 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
2849 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
2852 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
2855 /* Return true if address offset is a valid index. If it is, fill in INFO
2856 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2859 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
2860 enum machine_mode mode
, bool strict_p
)
2862 enum aarch64_address_type type
;
2867 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
2868 && GET_MODE (x
) == Pmode
)
2870 type
= ADDRESS_REG_REG
;
2874 /* (sign_extend:DI (reg:SI)) */
2875 else if ((GET_CODE (x
) == SIGN_EXTEND
2876 || GET_CODE (x
) == ZERO_EXTEND
)
2877 && GET_MODE (x
) == DImode
2878 && GET_MODE (XEXP (x
, 0)) == SImode
)
2880 type
= (GET_CODE (x
) == SIGN_EXTEND
)
2881 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2882 index
= XEXP (x
, 0);
2885 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2886 else if (GET_CODE (x
) == MULT
2887 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2888 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2889 && GET_MODE (XEXP (x
, 0)) == DImode
2890 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2891 && CONST_INT_P (XEXP (x
, 1)))
2893 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2894 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2895 index
= XEXP (XEXP (x
, 0), 0);
2896 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2898 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2899 else if (GET_CODE (x
) == ASHIFT
2900 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
2901 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
2902 && GET_MODE (XEXP (x
, 0)) == DImode
2903 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
2904 && CONST_INT_P (XEXP (x
, 1)))
2906 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
2907 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2908 index
= XEXP (XEXP (x
, 0), 0);
2909 shift
= INTVAL (XEXP (x
, 1));
2911 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2912 else if ((GET_CODE (x
) == SIGN_EXTRACT
2913 || GET_CODE (x
) == ZERO_EXTRACT
)
2914 && GET_MODE (x
) == DImode
2915 && GET_CODE (XEXP (x
, 0)) == MULT
2916 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2917 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2919 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2920 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2921 index
= XEXP (XEXP (x
, 0), 0);
2922 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2923 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2924 || INTVAL (XEXP (x
, 2)) != 0)
2927 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2928 (const_int 0xffffffff<<shift)) */
2929 else if (GET_CODE (x
) == AND
2930 && GET_MODE (x
) == DImode
2931 && GET_CODE (XEXP (x
, 0)) == MULT
2932 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2933 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2934 && CONST_INT_P (XEXP (x
, 1)))
2936 type
= ADDRESS_REG_UXTW
;
2937 index
= XEXP (XEXP (x
, 0), 0);
2938 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
2939 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2942 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2943 else if ((GET_CODE (x
) == SIGN_EXTRACT
2944 || GET_CODE (x
) == ZERO_EXTRACT
)
2945 && GET_MODE (x
) == DImode
2946 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2947 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2948 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
2950 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
2951 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
2952 index
= XEXP (XEXP (x
, 0), 0);
2953 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2954 if (INTVAL (XEXP (x
, 1)) != 32 + shift
2955 || INTVAL (XEXP (x
, 2)) != 0)
2958 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2959 (const_int 0xffffffff<<shift)) */
2960 else if (GET_CODE (x
) == AND
2961 && GET_MODE (x
) == DImode
2962 && GET_CODE (XEXP (x
, 0)) == ASHIFT
2963 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
2964 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
2965 && CONST_INT_P (XEXP (x
, 1)))
2967 type
= ADDRESS_REG_UXTW
;
2968 index
= XEXP (XEXP (x
, 0), 0);
2969 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
2970 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
2973 /* (mult:P (reg:P) (const_int scale)) */
2974 else if (GET_CODE (x
) == MULT
2975 && GET_MODE (x
) == Pmode
2976 && GET_MODE (XEXP (x
, 0)) == Pmode
2977 && CONST_INT_P (XEXP (x
, 1)))
2979 type
= ADDRESS_REG_REG
;
2980 index
= XEXP (x
, 0);
2981 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
2983 /* (ashift:P (reg:P) (const_int shift)) */
2984 else if (GET_CODE (x
) == ASHIFT
2985 && GET_MODE (x
) == Pmode
2986 && GET_MODE (XEXP (x
, 0)) == Pmode
2987 && CONST_INT_P (XEXP (x
, 1)))
2989 type
= ADDRESS_REG_REG
;
2990 index
= XEXP (x
, 0);
2991 shift
= INTVAL (XEXP (x
, 1));
2996 if (GET_CODE (index
) == SUBREG
)
2997 index
= SUBREG_REG (index
);
3000 (shift
> 0 && shift
<= 3
3001 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3003 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3006 info
->offset
= index
;
3007 info
->shift
= shift
;
3015 offset_7bit_signed_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3017 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3018 && offset
< 64 * GET_MODE_SIZE (mode
)
3019 && offset
% GET_MODE_SIZE (mode
) == 0);
3023 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
3024 HOST_WIDE_INT offset
)
3026 return offset
>= -256 && offset
< 256;
3030 offset_12bit_unsigned_scaled_p (enum machine_mode mode
, HOST_WIDE_INT offset
)
3033 && offset
< 4096 * GET_MODE_SIZE (mode
)
3034 && offset
% GET_MODE_SIZE (mode
) == 0);
3037 /* Return true if X is a valid address for machine mode MODE. If it is,
3038 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3039 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3042 aarch64_classify_address (struct aarch64_address_info
*info
,
3043 rtx x
, enum machine_mode mode
,
3044 RTX_CODE outer_code
, bool strict_p
)
3046 enum rtx_code code
= GET_CODE (x
);
3048 bool allow_reg_index_p
=
3049 outer_code
!= PARALLEL
&& GET_MODE_SIZE(mode
) != 16;
3051 /* Don't support anything other than POST_INC or REG addressing for
3053 if (aarch64_vector_mode_p (mode
)
3054 && (code
!= POST_INC
&& code
!= REG
))
3061 info
->type
= ADDRESS_REG_IMM
;
3063 info
->offset
= const0_rtx
;
3064 return aarch64_base_register_rtx_p (x
, strict_p
);
3069 if (GET_MODE_SIZE (mode
) != 0
3070 && CONST_INT_P (op1
)
3071 && aarch64_base_register_rtx_p (op0
, strict_p
))
3073 HOST_WIDE_INT offset
= INTVAL (op1
);
3075 info
->type
= ADDRESS_REG_IMM
;
3079 /* TImode and TFmode values are allowed in both pairs of X
3080 registers and individual Q registers. The available
3082 X,X: 7-bit signed scaled offset
3083 Q: 9-bit signed offset
3084 We conservatively require an offset representable in either mode.
3086 if (mode
== TImode
|| mode
== TFmode
)
3087 return (offset_7bit_signed_scaled_p (mode
, offset
)
3088 && offset_9bit_signed_unscaled_p (mode
, offset
));
3090 if (outer_code
== PARALLEL
)
3091 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3092 && offset_7bit_signed_scaled_p (mode
, offset
));
3094 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3095 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3098 if (allow_reg_index_p
)
3100 /* Look for base + (scaled/extended) index register. */
3101 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3102 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3107 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3108 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3121 info
->type
= ADDRESS_REG_WB
;
3122 info
->base
= XEXP (x
, 0);
3123 info
->offset
= NULL_RTX
;
3124 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3128 info
->type
= ADDRESS_REG_WB
;
3129 info
->base
= XEXP (x
, 0);
3130 if (GET_CODE (XEXP (x
, 1)) == PLUS
3131 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3132 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3133 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3135 HOST_WIDE_INT offset
;
3136 info
->offset
= XEXP (XEXP (x
, 1), 1);
3137 offset
= INTVAL (info
->offset
);
3139 /* TImode and TFmode values are allowed in both pairs of X
3140 registers and individual Q registers. The available
3142 X,X: 7-bit signed scaled offset
3143 Q: 9-bit signed offset
3144 We conservatively require an offset representable in either mode.
3146 if (mode
== TImode
|| mode
== TFmode
)
3147 return (offset_7bit_signed_scaled_p (mode
, offset
)
3148 && offset_9bit_signed_unscaled_p (mode
, offset
));
3150 if (outer_code
== PARALLEL
)
3151 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3152 && offset_7bit_signed_scaled_p (mode
, offset
));
3154 return offset_9bit_signed_unscaled_p (mode
, offset
);
3161 /* load literal: pc-relative constant pool entry. Only supported
3162 for SI mode or larger. */
3163 info
->type
= ADDRESS_SYMBOLIC
;
3164 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3168 split_const (x
, &sym
, &addend
);
3169 return (GET_CODE (sym
) == LABEL_REF
3170 || (GET_CODE (sym
) == SYMBOL_REF
3171 && CONSTANT_POOL_ADDRESS_P (sym
)));
3176 info
->type
= ADDRESS_LO_SUM
;
3177 info
->base
= XEXP (x
, 0);
3178 info
->offset
= XEXP (x
, 1);
3179 if (allow_reg_index_p
3180 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3183 split_const (info
->offset
, &sym
, &offs
);
3184 if (GET_CODE (sym
) == SYMBOL_REF
3185 && (aarch64_classify_symbol (sym
, SYMBOL_CONTEXT_MEM
)
3186 == SYMBOL_SMALL_ABSOLUTE
))
3188 /* The symbol and offset must be aligned to the access size. */
3190 unsigned int ref_size
;
3192 if (CONSTANT_POOL_ADDRESS_P (sym
))
3193 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3194 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3196 tree exp
= SYMBOL_REF_DECL (sym
);
3197 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3198 align
= CONSTANT_ALIGNMENT (exp
, align
);
3200 else if (SYMBOL_REF_DECL (sym
))
3201 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3203 align
= BITS_PER_UNIT
;
3205 ref_size
= GET_MODE_SIZE (mode
);
3207 ref_size
= GET_MODE_SIZE (DImode
);
3209 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3210 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3221 aarch64_symbolic_address_p (rtx x
)
3225 split_const (x
, &x
, &offset
);
3226 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3229 /* Classify the base of symbolic expression X, given that X appears in
3232 enum aarch64_symbol_type
3233 aarch64_classify_symbolic_expression (rtx x
,
3234 enum aarch64_symbol_context context
)
3238 split_const (x
, &x
, &offset
);
3239 return aarch64_classify_symbol (x
, context
);
3243 /* Return TRUE if X is a legitimate address for accessing memory in
3246 aarch64_legitimate_address_hook_p (enum machine_mode mode
, rtx x
, bool strict_p
)
3248 struct aarch64_address_info addr
;
3250 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3253 /* Return TRUE if X is a legitimate address for accessing memory in
3254 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3257 aarch64_legitimate_address_p (enum machine_mode mode
, rtx x
,
3258 RTX_CODE outer_code
, bool strict_p
)
3260 struct aarch64_address_info addr
;
3262 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3265 /* Return TRUE if rtx X is immediate constant 0.0 */
3267 aarch64_float_const_zero_rtx_p (rtx x
)
3271 if (GET_MODE (x
) == VOIDmode
)
3274 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3275 if (REAL_VALUE_MINUS_ZERO (r
))
3276 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3277 return REAL_VALUES_EQUAL (r
, dconst0
);
3280 /* Return the fixed registers used for condition codes. */
3283 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3286 *p2
= INVALID_REGNUM
;
3291 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3293 /* All floating point compares return CCFP if it is an equality
3294 comparison, and CCFPE otherwise. */
3295 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3322 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3324 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3325 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3326 || GET_CODE (x
) == NEG
))
3329 /* A compare with a shifted or negated operand. Because of canonicalization,
3330 the comparison will have to be swapped when we emit the assembly
3332 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3333 && (GET_CODE (y
) == REG
|| GET_CODE (y
) == SUBREG
)
3334 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3335 || GET_CODE (x
) == LSHIFTRT
3336 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
3337 || GET_CODE (x
) == NEG
))
3340 /* A compare of a mode narrower than SI mode against zero can be done
3341 by extending the value in the comparison. */
3342 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3344 /* Only use sign-extension if we really need it. */
3345 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3346 ? CC_SESWPmode
: CC_ZESWPmode
);
3348 /* For everything else, return CCmode. */
3353 aarch64_get_condition_code (rtx x
)
3355 enum machine_mode mode
= GET_MODE (XEXP (x
, 0));
3356 enum rtx_code comp_code
= GET_CODE (x
);
3358 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3359 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3367 case GE
: return AARCH64_GE
;
3368 case GT
: return AARCH64_GT
;
3369 case LE
: return AARCH64_LS
;
3370 case LT
: return AARCH64_MI
;
3371 case NE
: return AARCH64_NE
;
3372 case EQ
: return AARCH64_EQ
;
3373 case ORDERED
: return AARCH64_VC
;
3374 case UNORDERED
: return AARCH64_VS
;
3375 case UNLT
: return AARCH64_LT
;
3376 case UNLE
: return AARCH64_LE
;
3377 case UNGT
: return AARCH64_HI
;
3378 case UNGE
: return AARCH64_PL
;
3379 default: gcc_unreachable ();
3386 case NE
: return AARCH64_NE
;
3387 case EQ
: return AARCH64_EQ
;
3388 case GE
: return AARCH64_GE
;
3389 case GT
: return AARCH64_GT
;
3390 case LE
: return AARCH64_LE
;
3391 case LT
: return AARCH64_LT
;
3392 case GEU
: return AARCH64_CS
;
3393 case GTU
: return AARCH64_HI
;
3394 case LEU
: return AARCH64_LS
;
3395 case LTU
: return AARCH64_CC
;
3396 default: gcc_unreachable ();
3405 case NE
: return AARCH64_NE
;
3406 case EQ
: return AARCH64_EQ
;
3407 case GE
: return AARCH64_LE
;
3408 case GT
: return AARCH64_LT
;
3409 case LE
: return AARCH64_GE
;
3410 case LT
: return AARCH64_GT
;
3411 case GEU
: return AARCH64_LS
;
3412 case GTU
: return AARCH64_CC
;
3413 case LEU
: return AARCH64_CS
;
3414 case LTU
: return AARCH64_HI
;
3415 default: gcc_unreachable ();
3422 case NE
: return AARCH64_NE
;
3423 case EQ
: return AARCH64_EQ
;
3424 case GE
: return AARCH64_PL
;
3425 case LT
: return AARCH64_MI
;
3426 default: gcc_unreachable ();
3437 bit_count (unsigned HOST_WIDE_INT value
)
3451 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3455 /* An integer or symbol address without a preceding # sign. */
3457 switch (GET_CODE (x
))
3460 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3464 output_addr_const (f
, x
);
3468 if (GET_CODE (XEXP (x
, 0)) == PLUS
3469 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3471 output_addr_const (f
, x
);
3477 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3482 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3486 if (GET_CODE (x
) != CONST_INT
3487 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
3489 output_operand_lossage ("invalid operand for '%%%c'", code
);
3505 output_operand_lossage ("invalid operand for '%%%c'", code
);
3515 /* Print N such that 2^N == X. */
3516 if (GET_CODE (x
) != CONST_INT
|| (n
= exact_log2 (INTVAL (x
))) < 0)
3518 output_operand_lossage ("invalid operand for '%%%c'", code
);
3522 asm_fprintf (f
, "%d", n
);
3527 /* Print the number of non-zero bits in X (a const_int). */
3528 if (GET_CODE (x
) != CONST_INT
)
3530 output_operand_lossage ("invalid operand for '%%%c'", code
);
3534 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
3538 /* Print the higher numbered register of a pair (TImode) of regs. */
3539 if (GET_CODE (x
) != REG
|| !GP_REGNUM_P (REGNO (x
) + 1))
3541 output_operand_lossage ("invalid operand for '%%%c'", code
);
3545 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
3549 /* Print a condition (eq, ne, etc). */
3551 /* CONST_TRUE_RTX means always -- that's the default. */
3552 if (x
== const_true_rtx
)
3555 if (!COMPARISON_P (x
))
3557 output_operand_lossage ("invalid operand for '%%%c'", code
);
3561 fputs (aarch64_condition_codes
[aarch64_get_condition_code (x
)], f
);
3565 /* Print the inverse of a condition (eq <-> ne, etc). */
3567 /* CONST_TRUE_RTX means never -- that's the default. */
3568 if (x
== const_true_rtx
)
3574 if (!COMPARISON_P (x
))
3576 output_operand_lossage ("invalid operand for '%%%c'", code
);
3580 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
3581 (aarch64_get_condition_code (x
))], f
);
3589 /* Print a scalar FP/SIMD register name. */
3590 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3592 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3595 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
3602 /* Print the first FP/SIMD register name in a list. */
3603 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
3605 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
3608 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
3612 /* Print bottom 16 bits of integer constant in hex. */
3613 if (GET_CODE (x
) != CONST_INT
)
3615 output_operand_lossage ("invalid operand for '%%%c'", code
);
3618 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
3623 /* Print a general register name or the zero register (32-bit or
3626 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
3628 asm_fprintf (f
, "%czr", code
);
3632 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
3634 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
3638 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
3640 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
3647 /* Print a normal operand, if it's a general register, then we
3651 output_operand_lossage ("missing operand");
3655 switch (GET_CODE (x
))
3658 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
3662 aarch64_memory_reference_mode
= GET_MODE (x
);
3663 output_address (XEXP (x
, 0));
3668 output_addr_const (asm_out_file
, x
);
3672 asm_fprintf (f
, "%wd", INTVAL (x
));
3676 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
3678 gcc_assert (aarch64_const_vec_all_same_int_p (x
,
3680 HOST_WIDE_INT_MAX
));
3681 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
3683 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
3692 /* CONST_DOUBLE can represent a double-width integer.
3693 In this case, the mode of x is VOIDmode. */
3694 if (GET_MODE (x
) == VOIDmode
)
3696 else if (aarch64_float_const_zero_rtx_p (x
))
3701 else if (aarch64_float_const_representable_p (x
))
3704 char float_buf
[buf_size
] = {'\0'};
3706 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3707 real_to_decimal_for_mode (float_buf
, &r
,
3710 asm_fprintf (asm_out_file
, "%s", float_buf
);
3714 output_operand_lossage ("invalid constant");
3717 output_operand_lossage ("invalid operand");
3723 if (GET_CODE (x
) == HIGH
)
3726 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3728 case SYMBOL_SMALL_GOT
:
3729 asm_fprintf (asm_out_file
, ":got:");
3732 case SYMBOL_SMALL_TLSGD
:
3733 asm_fprintf (asm_out_file
, ":tlsgd:");
3736 case SYMBOL_SMALL_TLSDESC
:
3737 asm_fprintf (asm_out_file
, ":tlsdesc:");
3740 case SYMBOL_SMALL_GOTTPREL
:
3741 asm_fprintf (asm_out_file
, ":gottprel:");
3744 case SYMBOL_SMALL_TPREL
:
3745 asm_fprintf (asm_out_file
, ":tprel:");
3748 case SYMBOL_TINY_GOT
:
3755 output_addr_const (asm_out_file
, x
);
3759 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3761 case SYMBOL_SMALL_GOT
:
3762 asm_fprintf (asm_out_file
, ":lo12:");
3765 case SYMBOL_SMALL_TLSGD
:
3766 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
3769 case SYMBOL_SMALL_TLSDESC
:
3770 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
3773 case SYMBOL_SMALL_GOTTPREL
:
3774 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
3777 case SYMBOL_SMALL_TPREL
:
3778 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
3781 case SYMBOL_TINY_GOT
:
3782 asm_fprintf (asm_out_file
, ":got:");
3788 output_addr_const (asm_out_file
, x
);
3793 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
3795 case SYMBOL_SMALL_TPREL
:
3796 asm_fprintf (asm_out_file
, ":tprel_hi12:");
3801 output_addr_const (asm_out_file
, x
);
3805 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
3811 aarch64_print_operand_address (FILE *f
, rtx x
)
3813 struct aarch64_address_info addr
;
3815 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
3819 case ADDRESS_REG_IMM
:
3820 if (addr
.offset
== const0_rtx
)
3821 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
3823 asm_fprintf (f
, "[%s,%wd]", reg_names
[REGNO (addr
.base
)],
3824 INTVAL (addr
.offset
));
3827 case ADDRESS_REG_REG
:
3828 if (addr
.shift
== 0)
3829 asm_fprintf (f
, "[%s,%s]", reg_names
[REGNO (addr
.base
)],
3830 reg_names
[REGNO (addr
.offset
)]);
3832 asm_fprintf (f
, "[%s,%s,lsl %u]", reg_names
[REGNO (addr
.base
)],
3833 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
3836 case ADDRESS_REG_UXTW
:
3837 if (addr
.shift
== 0)
3838 asm_fprintf (f
, "[%s,w%d,uxtw]", reg_names
[REGNO (addr
.base
)],
3839 REGNO (addr
.offset
) - R0_REGNUM
);
3841 asm_fprintf (f
, "[%s,w%d,uxtw %u]", reg_names
[REGNO (addr
.base
)],
3842 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3845 case ADDRESS_REG_SXTW
:
3846 if (addr
.shift
== 0)
3847 asm_fprintf (f
, "[%s,w%d,sxtw]", reg_names
[REGNO (addr
.base
)],
3848 REGNO (addr
.offset
) - R0_REGNUM
);
3850 asm_fprintf (f
, "[%s,w%d,sxtw %u]", reg_names
[REGNO (addr
.base
)],
3851 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
3854 case ADDRESS_REG_WB
:
3855 switch (GET_CODE (x
))
3858 asm_fprintf (f
, "[%s,%d]!", reg_names
[REGNO (addr
.base
)],
3859 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3862 asm_fprintf (f
, "[%s],%d", reg_names
[REGNO (addr
.base
)],
3863 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3866 asm_fprintf (f
, "[%s,-%d]!", reg_names
[REGNO (addr
.base
)],
3867 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3870 asm_fprintf (f
, "[%s],-%d", reg_names
[REGNO (addr
.base
)],
3871 GET_MODE_SIZE (aarch64_memory_reference_mode
));
3874 asm_fprintf (f
, "[%s,%wd]!", reg_names
[REGNO (addr
.base
)],
3875 INTVAL (addr
.offset
));
3878 asm_fprintf (f
, "[%s],%wd", reg_names
[REGNO (addr
.base
)],
3879 INTVAL (addr
.offset
));
3886 case ADDRESS_LO_SUM
:
3887 asm_fprintf (f
, "[%s,#:lo12:", reg_names
[REGNO (addr
.base
)]);
3888 output_addr_const (f
, addr
.offset
);
3889 asm_fprintf (f
, "]");
3892 case ADDRESS_SYMBOLIC
:
3896 output_addr_const (f
, x
);
3900 aarch64_label_mentioned_p (rtx x
)
3905 if (GET_CODE (x
) == LABEL_REF
)
3908 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3909 referencing instruction, but they are constant offsets, not
3911 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3914 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
3915 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
3921 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
3922 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
3925 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
3932 /* Implement REGNO_REG_CLASS. */
3935 aarch64_regno_regclass (unsigned regno
)
3937 if (GP_REGNUM_P (regno
))
3940 if (regno
== SP_REGNUM
)
3943 if (regno
== FRAME_POINTER_REGNUM
3944 || regno
== ARG_POINTER_REGNUM
)
3945 return POINTER_REGS
;
3947 if (FP_REGNUM_P (regno
))
3948 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
3953 /* Try a machine-dependent way of reloading an illegitimate address
3954 operand. If we find one, push the reload and return the new rtx. */
3957 aarch64_legitimize_reload_address (rtx
*x_p
,
3958 enum machine_mode mode
,
3959 int opnum
, int type
,
3960 int ind_levels ATTRIBUTE_UNUSED
)
3964 /* Do not allow mem (plus (reg, const)) if vector mode. */
3965 if (aarch64_vector_mode_p (mode
)
3966 && GET_CODE (x
) == PLUS
3967 && REG_P (XEXP (x
, 0))
3968 && CONST_INT_P (XEXP (x
, 1)))
3972 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
3973 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3974 opnum
, (enum reload_type
) type
);
3978 /* We must recognize output that we have already generated ourselves. */
3979 if (GET_CODE (x
) == PLUS
3980 && GET_CODE (XEXP (x
, 0)) == PLUS
3981 && REG_P (XEXP (XEXP (x
, 0), 0))
3982 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3983 && CONST_INT_P (XEXP (x
, 1)))
3985 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
3986 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
3987 opnum
, (enum reload_type
) type
);
3991 /* We wish to handle large displacements off a base register by splitting
3992 the addend across an add and the mem insn. This can cut the number of
3993 extra insns needed from 3 to 1. It is only useful for load/store of a
3994 single register with 12 bit offset field. */
3995 if (GET_CODE (x
) == PLUS
3996 && REG_P (XEXP (x
, 0))
3997 && CONST_INT_P (XEXP (x
, 1))
3998 && HARD_REGISTER_P (XEXP (x
, 0))
4001 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4003 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4004 HOST_WIDE_INT low
= val
& 0xfff;
4005 HOST_WIDE_INT high
= val
- low
;
4008 enum machine_mode xmode
= GET_MODE (x
);
4010 /* In ILP32, xmode can be either DImode or SImode. */
4011 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4013 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4014 BLKmode alignment. */
4015 if (GET_MODE_SIZE (mode
) == 0)
4018 offs
= low
% GET_MODE_SIZE (mode
);
4020 /* Align misaligned offset by adjusting high part to compensate. */
4023 if (aarch64_uimm12_shift (high
+ offs
))
4032 offs
= GET_MODE_SIZE (mode
) - offs
;
4034 high
= high
+ (low
& 0x1000) - offs
;
4039 /* Check for overflow. */
4040 if (high
+ low
!= val
)
4043 cst
= GEN_INT (high
);
4044 if (!aarch64_uimm12_shift (high
))
4045 cst
= force_const_mem (xmode
, cst
);
4047 /* Reload high part into base reg, leaving the low part
4048 in the mem instruction.
4049 Note that replacing this gen_rtx_PLUS with plus_constant is
4050 wrong in this case because we rely on the
4051 (plus (plus reg c1) c2) structure being preserved so that
4052 XEXP (*p, 0) in push_reload below uses the correct term. */
4053 x
= gen_rtx_PLUS (xmode
,
4054 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4057 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4058 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4059 opnum
, (enum reload_type
) type
);
4068 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4070 enum machine_mode mode
,
4071 secondary_reload_info
*sri
)
4073 /* Without the TARGET_SIMD instructions we cannot move a Q register
4074 to a Q register directly. We need a scratch. */
4075 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4076 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4077 && reg_class_subset_p (rclass
, FP_REGS
))
4080 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4081 else if (mode
== TImode
)
4082 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4086 /* A TFmode or TImode memory access should be handled via an FP_REGS
4087 because AArch64 has richer addressing modes for LDR/STR instructions
4088 than LDP/STP instructions. */
4089 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== CORE_REGS
4090 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4093 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4100 aarch64_can_eliminate (const int from
, const int to
)
4102 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4103 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4105 if (frame_pointer_needed
)
4107 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4109 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4111 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4112 && !cfun
->calls_alloca
)
4114 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4120 /* If we decided that we didn't need a leaf frame pointer but then used
4121 LR in the function, then we'll want a frame pointer after all, so
4122 prevent this elimination to ensure a frame pointer is used.
4124 NOTE: the original value of flag_omit_frame_pointer gets trashed
4125 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4126 of faked_omit_frame_pointer here (which is true when we always
4127 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4128 pointers when LR is clobbered). */
4129 if (to
== STACK_POINTER_REGNUM
4130 && df_regs_ever_live_p (LR_REGNUM
)
4131 && faked_omit_frame_pointer
)
4139 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4141 HOST_WIDE_INT frame_size
;
4142 HOST_WIDE_INT offset
;
4144 aarch64_layout_frame ();
4145 frame_size
= (get_frame_size () + cfun
->machine
->frame
.saved_regs_size
4146 + crtl
->outgoing_args_size
4147 + cfun
->machine
->saved_varargs_size
);
4149 frame_size
= AARCH64_ROUND_UP (frame_size
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4150 offset
= frame_size
;
4152 if (to
== HARD_FRAME_POINTER_REGNUM
)
4154 if (from
== ARG_POINTER_REGNUM
)
4155 return offset
- crtl
->outgoing_args_size
;
4157 if (from
== FRAME_POINTER_REGNUM
)
4158 return cfun
->machine
->frame
.saved_regs_size
+ get_frame_size ();
4161 if (to
== STACK_POINTER_REGNUM
)
4163 if (from
== FRAME_POINTER_REGNUM
)
4165 HOST_WIDE_INT elim
= crtl
->outgoing_args_size
4166 + cfun
->machine
->frame
.saved_regs_size
4168 - cfun
->machine
->frame
.fp_lr_offset
;
4169 elim
= AARCH64_ROUND_UP (elim
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
4178 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4182 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4186 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4191 aarch64_asm_trampoline_template (FILE *f
)
4195 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4196 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4200 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4201 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4203 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4204 assemble_aligned_integer (4, const0_rtx
);
4205 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4206 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4210 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4212 rtx fnaddr
, mem
, a_tramp
;
4213 const int tramp_code_sz
= 16;
4215 /* Don't need to copy the trailing D-words, we fill those in below. */
4216 emit_block_move (m_tramp
, assemble_trampoline_template (),
4217 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4218 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4219 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4220 if (GET_MODE (fnaddr
) != ptr_mode
)
4221 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4222 emit_move_insn (mem
, fnaddr
);
4224 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4225 emit_move_insn (mem
, chain_value
);
4227 /* XXX We should really define a "clear_cache" pattern and use
4228 gen_clear_cache(). */
4229 a_tramp
= XEXP (m_tramp
, 0);
4230 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4231 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4232 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4236 static unsigned char
4237 aarch64_class_max_nregs (reg_class_t regclass
, enum machine_mode mode
)
4248 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4249 (GET_MODE_SIZE (mode
) + 7) / 8;
4263 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4265 if (regclass
== POINTER_REGS
)
4266 return GENERAL_REGS
;
4268 if (regclass
== STACK_REG
)
4271 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4277 /* If it's an integer immediate that MOVI can't handle, then
4278 FP_REGS is not an option, so we return NO_REGS instead. */
4279 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4280 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4283 /* Register eliminiation can result in a request for
4284 SP+constant->FP_REGS. We cannot support such operations which
4285 use SP as source and an FP_REG as destination, so reject out
4287 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4289 rtx lhs
= XEXP (x
, 0);
4291 /* Look through a possible SUBREG introduced by ILP32. */
4292 if (GET_CODE (lhs
) == SUBREG
)
4293 lhs
= SUBREG_REG (lhs
);
4295 gcc_assert (REG_P (lhs
));
4296 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4305 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4307 asm_fprintf (f
, "%U%s", name
);
4311 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4313 if (priority
== DEFAULT_INIT_PRIORITY
)
4314 default_ctor_section_asm_out_constructor (symbol
, priority
);
4319 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4320 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4321 switch_to_section (s
);
4322 assemble_align (POINTER_SIZE
);
4323 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4328 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4330 if (priority
== DEFAULT_INIT_PRIORITY
)
4331 default_dtor_section_asm_out_destructor (symbol
, priority
);
4336 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4337 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4338 switch_to_section (s
);
4339 assemble_align (POINTER_SIZE
);
4340 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4345 aarch64_output_casesi (rtx
*operands
)
4349 rtx diff_vec
= PATTERN (NEXT_INSN (operands
[2]));
4351 static const char *const patterns
[4][2] =
4354 "ldrb\t%w3, [%0,%w1,uxtw]",
4355 "add\t%3, %4, %w3, sxtb #2"
4358 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4359 "add\t%3, %4, %w3, sxth #2"
4362 "ldr\t%w3, [%0,%w1,uxtw #2]",
4363 "add\t%3, %4, %w3, sxtw #2"
4365 /* We assume that DImode is only generated when not optimizing and
4366 that we don't really need 64-bit address offsets. That would
4367 imply an object file with 8GB of code in a single function! */
4369 "ldr\t%w3, [%0,%w1,uxtw #2]",
4370 "add\t%3, %4, %w3, sxtw #2"
4374 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4376 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4378 gcc_assert (index
>= 0 && index
<= 3);
4380 /* Need to implement table size reduction, by chaning the code below. */
4381 output_asm_insn (patterns
[index
][0], operands
);
4382 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4383 snprintf (buf
, sizeof (buf
),
4384 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4385 output_asm_insn (buf
, operands
);
4386 output_asm_insn (patterns
[index
][1], operands
);
4387 output_asm_insn ("br\t%3", operands
);
4388 assemble_label (asm_out_file
, label
);
4393 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4394 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4398 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4400 if (shift
>= 0 && shift
<= 3)
4403 for (size
= 8; size
<= 32; size
*= 2)
4405 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4406 if (mask
== bits
<< shift
)
4414 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED
,
4415 const_rtx x ATTRIBUTE_UNUSED
)
4417 /* We can't use blocks for constants when we're using a per-function
4423 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED
,
4424 rtx x ATTRIBUTE_UNUSED
,
4425 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
4427 /* Force all constant pool entries into the current function section. */
4428 return function_section (current_function_decl
);
4434 /* Helper function for rtx cost calculation. Strip a shift expression
4435 from X. Returns the inner operand if successful, or the original
4436 expression on failure. */
4438 aarch64_strip_shift (rtx x
)
4442 if ((GET_CODE (op
) == ASHIFT
4443 || GET_CODE (op
) == ASHIFTRT
4444 || GET_CODE (op
) == LSHIFTRT
)
4445 && CONST_INT_P (XEXP (op
, 1)))
4446 return XEXP (op
, 0);
4448 if (GET_CODE (op
) == MULT
4449 && CONST_INT_P (XEXP (op
, 1))
4450 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
4451 return XEXP (op
, 0);
4456 /* Helper function for rtx cost calculation. Strip a shift or extend
4457 expression from X. Returns the inner operand if successful, or the
4458 original expression on failure. We deal with a number of possible
4459 canonicalization variations here. */
4461 aarch64_strip_shift_or_extend (rtx x
)
4465 /* Zero and sign extraction of a widened value. */
4466 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
4467 && XEXP (op
, 2) == const0_rtx
4468 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
4470 return XEXP (XEXP (op
, 0), 0);
4472 /* It can also be represented (for zero-extend) as an AND with an
4474 if (GET_CODE (op
) == AND
4475 && GET_CODE (XEXP (op
, 0)) == MULT
4476 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
4477 && CONST_INT_P (XEXP (op
, 1))
4478 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
4479 INTVAL (XEXP (op
, 1))) != 0)
4480 return XEXP (XEXP (op
, 0), 0);
4482 /* Now handle extended register, as this may also have an optional
4483 left shift by 1..4. */
4484 if (GET_CODE (op
) == ASHIFT
4485 && CONST_INT_P (XEXP (op
, 1))
4486 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
4489 if (GET_CODE (op
) == ZERO_EXTEND
4490 || GET_CODE (op
) == SIGN_EXTEND
)
4496 return aarch64_strip_shift (x
);
4499 /* Calculate the cost of calculating X, storing it in *COST. Result
4500 is true if the total cost of the operation has now been calculated. */
4502 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
4503 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
4506 const struct cpu_cost_table
*extra_cost
4507 = aarch64_tune_params
->insn_extra_cost
;
4515 switch (GET_CODE (op0
))
4519 *cost
+= extra_cost
->ldst
.store
;
4521 if (op1
!= const0_rtx
)
4522 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4526 if (! REG_P (SUBREG_REG (op0
)))
4527 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
4530 /* Cost is just the cost of the RHS of the set. */
4531 *cost
+= rtx_cost (op1
, SET
, 1, true);
4534 case ZERO_EXTRACT
: /* Bit-field insertion. */
4536 /* Strip any redundant widening of the RHS to meet the width of
4538 if (GET_CODE (op1
) == SUBREG
)
4539 op1
= SUBREG_REG (op1
);
4540 if ((GET_CODE (op1
) == ZERO_EXTEND
4541 || GET_CODE (op1
) == SIGN_EXTEND
)
4542 && GET_CODE (XEXP (op0
, 1)) == CONST_INT
4543 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
4544 >= INTVAL (XEXP (op0
, 1))))
4545 op1
= XEXP (op1
, 0);
4546 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
4556 *cost
+= extra_cost
->ldst
.load
;
4561 op0
= CONST0_RTX (GET_MODE (x
));
4569 if (op1
== const0_rtx
4570 && GET_CODE (op0
) == AND
)
4576 /* Comparisons can work if the order is swapped.
4577 Canonicalization puts the more complex operation first, but
4578 we want it in op1. */
4580 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
4592 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
4593 || (GET_MODE_CLASS (GET_MODE (x
)) == MODE_CC
4594 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
4596 if (op0
!= const0_rtx
)
4597 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
4599 if (CONST_INT_P (op1
))
4601 if (!aarch64_uimm12_shift (INTVAL (op1
)))
4602 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4606 op1
= aarch64_strip_shift_or_extend (op1
);
4607 *cost
+= rtx_cost (op1
, MINUS
, 1, speed
);
4618 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4620 if (CONST_INT_P (op1
) && aarch64_uimm12_shift (INTVAL (op1
)))
4622 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
4626 rtx new_op0
= aarch64_strip_shift_or_extend (op0
);
4629 && GET_CODE (op0
) == MULT
)
4631 if ((GET_CODE (XEXP (op0
, 0)) == ZERO_EXTEND
4632 && GET_CODE (XEXP (op0
, 1)) == ZERO_EXTEND
)
4633 || (GET_CODE (XEXP (op0
, 0)) == SIGN_EXTEND
4634 && GET_CODE (XEXP (op0
, 1)) == SIGN_EXTEND
))
4636 *cost
+= (rtx_cost (XEXP (XEXP (op0
, 0), 0), MULT
, 0,
4638 + rtx_cost (XEXP (XEXP (op0
, 1), 0), MULT
, 1,
4640 + rtx_cost (op1
, PLUS
, 1, speed
));
4643 extra_cost
->mult
[GET_MODE (x
) == DImode
].extend_add
;
4646 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4647 + rtx_cost (XEXP (op0
, 1), MULT
, 1, speed
)
4648 + rtx_cost (op1
, PLUS
, 1, speed
));
4651 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].add
;
4654 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
4655 + rtx_cost (op1
, PLUS
, 1, speed
));
4669 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4671 if (CONST_INT_P (op1
)
4672 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
4674 *cost
+= rtx_cost (op0
, AND
, 0, speed
);
4678 if (GET_CODE (op0
) == NOT
)
4679 op0
= XEXP (op0
, 0);
4680 op0
= aarch64_strip_shift (op0
);
4681 *cost
+= (rtx_cost (op0
, AND
, 0, speed
)
4682 + rtx_cost (op1
, AND
, 1, speed
));
4689 if ((GET_MODE (x
) == DImode
4690 && GET_MODE (XEXP (x
, 0)) == SImode
)
4691 || GET_CODE (XEXP (x
, 0)) == MEM
)
4693 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
4699 if (GET_CODE (XEXP (x
, 0)) == MEM
)
4701 *cost
+= rtx_cost (XEXP (x
, 0), SIGN_EXTEND
, 0, speed
);
4707 if (!CONST_INT_P (XEXP (x
, 1)))
4708 *cost
+= COSTS_N_INSNS (2);
4715 /* Shifting by a register often takes an extra cycle. */
4716 if (speed
&& !CONST_INT_P (XEXP (x
, 1)))
4717 *cost
+= extra_cost
->alu
.arith_shift_reg
;
4719 *cost
+= rtx_cost (XEXP (x
, 0), ASHIFT
, 0, speed
);
4723 if (!CONSTANT_P (XEXP (x
, 0)))
4724 *cost
+= rtx_cost (XEXP (x
, 0), HIGH
, 0, speed
);
4728 if (!CONSTANT_P (XEXP (x
, 1)))
4729 *cost
+= rtx_cost (XEXP (x
, 1), LO_SUM
, 1, speed
);
4730 *cost
+= rtx_cost (XEXP (x
, 0), LO_SUM
, 0, speed
);
4735 *cost
+= rtx_cost (XEXP (x
, 0), ZERO_EXTRACT
, 0, speed
);
4742 *cost
= COSTS_N_INSNS (1);
4743 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4745 if (CONST_INT_P (op1
)
4746 && exact_log2 (INTVAL (op1
)) > 0)
4748 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
4752 if ((GET_CODE (op0
) == ZERO_EXTEND
4753 && GET_CODE (op1
) == ZERO_EXTEND
)
4754 || (GET_CODE (op0
) == SIGN_EXTEND
4755 && GET_CODE (op1
) == SIGN_EXTEND
))
4757 *cost
+= (rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
4758 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
));
4760 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].extend
;
4765 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].simple
;
4769 if (GET_MODE (x
) == DFmode
)
4770 *cost
+= extra_cost
->fp
[1].mult
;
4771 else if (GET_MODE (x
) == SFmode
)
4772 *cost
+= extra_cost
->fp
[0].mult
;
4775 return false; /* All arguments need to be in registers. */
4779 *cost
= COSTS_N_INSNS (2);
4782 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4783 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
4784 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
4785 else if (GET_MODE (x
) == DFmode
)
4786 *cost
+= (extra_cost
->fp
[1].mult
4787 + extra_cost
->fp
[1].div
);
4788 else if (GET_MODE (x
) == SFmode
)
4789 *cost
+= (extra_cost
->fp
[0].mult
4790 + extra_cost
->fp
[0].div
);
4792 return false; /* All arguments need to be in registers. */
4796 *cost
= COSTS_N_INSNS (1);
4799 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
4800 *cost
+= extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
;
4801 else if (GET_MODE (x
) == DFmode
)
4802 *cost
+= extra_cost
->fp
[1].div
;
4803 else if (GET_MODE (x
) == SFmode
)
4804 *cost
+= extra_cost
->fp
[0].div
;
4806 return false; /* All arguments need to be in registers. */
4815 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED
,
4816 enum machine_mode mode ATTRIBUTE_UNUSED
,
4817 addr_space_t as ATTRIBUTE_UNUSED
, bool speed ATTRIBUTE_UNUSED
)
4819 enum rtx_code c
= GET_CODE (x
);
4820 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
4822 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
4823 return addr_cost
->pre_modify
;
4825 if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
4826 return addr_cost
->post_modify
;
4830 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4831 return addr_cost
->imm_offset
;
4832 else if (GET_CODE (XEXP (x
, 0)) == MULT
4833 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
4834 || GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
4835 return addr_cost
->register_extend
;
4837 return addr_cost
->register_offset
;
4839 else if (c
== MEM
|| c
== LABEL_REF
|| c
== SYMBOL_REF
)
4840 return addr_cost
->imm_offset
;
4846 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4847 reg_class_t from
, reg_class_t to
)
4849 const struct cpu_regmove_cost
*regmove_cost
4850 = aarch64_tune_params
->regmove_cost
;
4852 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
4853 return regmove_cost
->GP2GP
;
4854 else if (from
== GENERAL_REGS
)
4855 return regmove_cost
->GP2FP
;
4856 else if (to
== GENERAL_REGS
)
4857 return regmove_cost
->FP2GP
;
4859 /* When AdvSIMD instructions are disabled it is not possible to move
4860 a 128-bit value directly between Q registers. This is handled in
4861 secondary reload. A general register is used as a scratch to move
4862 the upper DI value and the lower DI value is moved directly,
4863 hence the cost is the sum of three moves. */
4865 if (! TARGET_SIMD
&& GET_MODE_SIZE (from
) == 128 && GET_MODE_SIZE (to
) == 128)
4866 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
4868 return regmove_cost
->FP2FP
;
4872 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED
,
4873 reg_class_t rclass ATTRIBUTE_UNUSED
,
4874 bool in ATTRIBUTE_UNUSED
)
4876 return aarch64_tune_params
->memmov_cost
;
4879 /* Vectorizer cost model target hooks. */
4881 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4883 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
4885 int misalign ATTRIBUTE_UNUSED
)
4889 switch (type_of_cost
)
4892 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
4895 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
4898 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
4901 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4904 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
4907 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
4910 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
4913 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
4915 case unaligned_load
:
4916 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
4918 case unaligned_store
:
4919 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
4921 case cond_branch_taken
:
4922 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
4924 case cond_branch_not_taken
:
4925 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
4928 case vec_promote_demote
:
4929 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
4932 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
4933 return elements
/ 2 + 1;
4940 /* Implement targetm.vectorize.add_stmt_cost. */
4942 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
4943 struct _stmt_vec_info
*stmt_info
, int misalign
,
4944 enum vect_cost_model_location where
)
4946 unsigned *cost
= (unsigned *) data
;
4947 unsigned retval
= 0;
4949 if (flag_vect_cost_model
)
4951 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
4953 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
4955 /* Statements in an inner loop relative to the loop being
4956 vectorized are weighted more heavily. The value here is
4957 a function (linear for now) of the loop nest level. */
4958 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
4960 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4961 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
4962 unsigned nest_level
= loop_depth (loop
);
4964 count
*= nest_level
;
4967 retval
= (unsigned) (count
* stmt_cost
);
4968 cost
[where
] += retval
;
4974 static void initialize_aarch64_code_model (void);
4976 /* Parse the architecture extension string. */
4979 aarch64_parse_extension (char *str
)
4981 /* The extension string is parsed left to right. */
4982 const struct aarch64_option_extension
*opt
= NULL
;
4984 /* Flag to say whether we are adding or removing an extension. */
4985 int adding_ext
= -1;
4987 while (str
!= NULL
&& *str
!= 0)
4993 ext
= strchr (str
, '+');
5000 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
5011 error ("missing feature modifier after %qs", "+no");
5015 /* Scan over the extensions table trying to find an exact match. */
5016 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
5018 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
5020 /* Add or remove the extension. */
5022 aarch64_isa_flags
|= opt
->flags_on
;
5024 aarch64_isa_flags
&= ~(opt
->flags_off
);
5029 if (opt
->name
== NULL
)
5031 /* Extension not found in list. */
5032 error ("unknown feature modifier %qs", str
);
5042 /* Parse the ARCH string. */
5045 aarch64_parse_arch (void)
5048 const struct processor
*arch
;
5049 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
5052 strcpy (str
, aarch64_arch_string
);
5054 ext
= strchr (str
, '+');
5063 error ("missing arch name in -march=%qs", str
);
5067 /* Loop through the list of supported ARCHs to find a match. */
5068 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
5070 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
5072 selected_arch
= arch
;
5073 aarch64_isa_flags
= selected_arch
->flags
;
5074 selected_cpu
= &all_cores
[selected_arch
->core
];
5078 /* ARCH string contains at least one extension. */
5079 aarch64_parse_extension (ext
);
5086 /* ARCH name not found in list. */
5087 error ("unknown value %qs for -march", str
);
5091 /* Parse the CPU string. */
5094 aarch64_parse_cpu (void)
5097 const struct processor
*cpu
;
5098 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
5101 strcpy (str
, aarch64_cpu_string
);
5103 ext
= strchr (str
, '+');
5112 error ("missing cpu name in -mcpu=%qs", str
);
5116 /* Loop through the list of supported CPUs to find a match. */
5117 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5119 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
5122 aarch64_isa_flags
= selected_cpu
->flags
;
5126 /* CPU string contains at least one extension. */
5127 aarch64_parse_extension (ext
);
5134 /* CPU name not found in list. */
5135 error ("unknown value %qs for -mcpu", str
);
5139 /* Parse the TUNE string. */
5142 aarch64_parse_tune (void)
5144 const struct processor
*cpu
;
5145 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
5146 strcpy (str
, aarch64_tune_string
);
5148 /* Loop through the list of supported CPUs to find a match. */
5149 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
5151 if (strcmp (cpu
->name
, str
) == 0)
5153 selected_tune
= cpu
;
5158 /* CPU name not found in list. */
5159 error ("unknown value %qs for -mtune", str
);
5164 /* Implement TARGET_OPTION_OVERRIDE. */
5167 aarch64_override_options (void)
5169 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5170 otherwise march remains undefined. mtune can be used with either march or
5173 if (aarch64_arch_string
)
5175 aarch64_parse_arch ();
5176 aarch64_cpu_string
= NULL
;
5179 if (aarch64_cpu_string
)
5181 aarch64_parse_cpu ();
5182 selected_arch
= NULL
;
5185 if (aarch64_tune_string
)
5187 aarch64_parse_tune ();
5190 #ifndef HAVE_AS_MABI_OPTION
5191 /* The compiler may have been configured with 2.23.* binutils, which does
5192 not have support for ILP32. */
5194 error ("Assembler does not support -mabi=ilp32");
5197 initialize_aarch64_code_model ();
5199 aarch64_build_bitmask_table ();
5201 /* This target defaults to strict volatile bitfields. */
5202 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
5203 flag_strict_volatile_bitfields
= 1;
5205 /* If the user did not specify a processor, choose the default
5206 one for them. This will be the CPU set during configuration using
5207 --with-cpu, otherwise it is "coretex-a53". */
5210 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
5211 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
5214 gcc_assert (selected_cpu
);
5216 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5218 selected_tune
= &all_cores
[selected_cpu
->core
];
5220 aarch64_tune_flags
= selected_tune
->flags
;
5221 aarch64_tune
= selected_tune
->core
;
5222 aarch64_tune_params
= selected_tune
->tune
;
5224 aarch64_override_options_after_change ();
5227 /* Implement targetm.override_options_after_change. */
5230 aarch64_override_options_after_change (void)
5232 faked_omit_frame_pointer
= false;
5234 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5235 that aarch64_frame_pointer_required will be called. We need to remember
5236 whether flag_omit_frame_pointer was turned on normally or just faked. */
5238 if (flag_omit_leaf_frame_pointer
&& !flag_omit_frame_pointer
)
5240 flag_omit_frame_pointer
= true;
5241 faked_omit_frame_pointer
= true;
5245 static struct machine_function
*
5246 aarch64_init_machine_status (void)
5248 struct machine_function
*machine
;
5249 machine
= ggc_alloc_cleared_machine_function ();
5254 aarch64_init_expanders (void)
5256 init_machine_status
= aarch64_init_machine_status
;
5259 /* A checking mechanism for the implementation of the various code models. */
5261 initialize_aarch64_code_model (void)
5265 switch (aarch64_cmodel_var
)
5267 case AARCH64_CMODEL_TINY
:
5268 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
5270 case AARCH64_CMODEL_SMALL
:
5271 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
5273 case AARCH64_CMODEL_LARGE
:
5274 sorry ("code model %qs with -f%s", "large",
5275 flag_pic
> 1 ? "PIC" : "pic");
5281 aarch64_cmodel
= aarch64_cmodel_var
;
5284 /* Return true if SYMBOL_REF X binds locally. */
5287 aarch64_symbol_binds_local_p (const_rtx x
)
5289 return (SYMBOL_REF_DECL (x
)
5290 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
5291 : SYMBOL_REF_LOCAL_P (x
));
5294 /* Return true if SYMBOL_REF X is thread local */
5296 aarch64_tls_symbol_p (rtx x
)
5298 if (! TARGET_HAVE_TLS
)
5301 if (GET_CODE (x
) != SYMBOL_REF
)
5304 return SYMBOL_REF_TLS_MODEL (x
) != 0;
5307 /* Classify a TLS symbol into one of the TLS kinds. */
5308 enum aarch64_symbol_type
5309 aarch64_classify_tls_symbol (rtx x
)
5311 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
5315 case TLS_MODEL_GLOBAL_DYNAMIC
:
5316 case TLS_MODEL_LOCAL_DYNAMIC
:
5317 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
5319 case TLS_MODEL_INITIAL_EXEC
:
5320 return SYMBOL_SMALL_GOTTPREL
;
5322 case TLS_MODEL_LOCAL_EXEC
:
5323 return SYMBOL_SMALL_TPREL
;
5325 case TLS_MODEL_EMULATED
:
5326 case TLS_MODEL_NONE
:
5327 return SYMBOL_FORCE_TO_MEM
;
5334 /* Return the method that should be used to access SYMBOL_REF or
5335 LABEL_REF X in context CONTEXT. */
5337 enum aarch64_symbol_type
5338 aarch64_classify_symbol (rtx x
,
5339 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
5341 if (GET_CODE (x
) == LABEL_REF
)
5343 switch (aarch64_cmodel
)
5345 case AARCH64_CMODEL_LARGE
:
5346 return SYMBOL_FORCE_TO_MEM
;
5348 case AARCH64_CMODEL_TINY_PIC
:
5349 case AARCH64_CMODEL_TINY
:
5350 return SYMBOL_TINY_ABSOLUTE
;
5352 case AARCH64_CMODEL_SMALL_PIC
:
5353 case AARCH64_CMODEL_SMALL
:
5354 return SYMBOL_SMALL_ABSOLUTE
;
5361 if (GET_CODE (x
) == SYMBOL_REF
)
5363 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
5364 || CONSTANT_POOL_ADDRESS_P (x
))
5365 return SYMBOL_FORCE_TO_MEM
;
5367 if (aarch64_tls_symbol_p (x
))
5368 return aarch64_classify_tls_symbol (x
);
5370 switch (aarch64_cmodel
)
5372 case AARCH64_CMODEL_TINY
:
5373 if (SYMBOL_REF_WEAK (x
))
5374 return SYMBOL_FORCE_TO_MEM
;
5375 return SYMBOL_TINY_ABSOLUTE
;
5377 case AARCH64_CMODEL_SMALL
:
5378 if (SYMBOL_REF_WEAK (x
))
5379 return SYMBOL_FORCE_TO_MEM
;
5380 return SYMBOL_SMALL_ABSOLUTE
;
5382 case AARCH64_CMODEL_TINY_PIC
:
5383 if (!aarch64_symbol_binds_local_p (x
))
5384 return SYMBOL_TINY_GOT
;
5385 return SYMBOL_TINY_ABSOLUTE
;
5387 case AARCH64_CMODEL_SMALL_PIC
:
5388 if (!aarch64_symbol_binds_local_p (x
))
5389 return SYMBOL_SMALL_GOT
;
5390 return SYMBOL_SMALL_ABSOLUTE
;
5397 /* By default push everything into the constant pool. */
5398 return SYMBOL_FORCE_TO_MEM
;
5402 aarch64_constant_address_p (rtx x
)
5404 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
5408 aarch64_legitimate_pic_operand_p (rtx x
)
5410 if (GET_CODE (x
) == SYMBOL_REF
5411 || (GET_CODE (x
) == CONST
5412 && GET_CODE (XEXP (x
, 0)) == PLUS
5413 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
5419 /* Return true if X holds either a quarter-precision or
5420 floating-point +0.0 constant. */
5422 aarch64_valid_floating_const (enum machine_mode mode
, rtx x
)
5424 if (!CONST_DOUBLE_P (x
))
5427 /* TODO: We could handle moving 0.0 to a TFmode register,
5428 but first we would like to refactor the movtf_aarch64
5429 to be more amicable to split moves properly and
5430 correctly gate on TARGET_SIMD. For now - reject all
5431 constants which are not to SFmode or DFmode registers. */
5432 if (!(mode
== SFmode
|| mode
== DFmode
))
5435 if (aarch64_float_const_zero_rtx_p (x
))
5437 return aarch64_float_const_representable_p (x
);
5441 aarch64_legitimate_constant_p (enum machine_mode mode
, rtx x
)
5443 /* Do not allow vector struct mode constants. We could support
5444 0 and -1 easily, but they need support in aarch64-simd.md. */
5445 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
5448 /* This could probably go away because
5449 we now decompose CONST_INTs according to expand_mov_immediate. */
5450 if ((GET_CODE (x
) == CONST_VECTOR
5451 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
5452 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
5453 return !targetm
.cannot_force_const_mem (mode
, x
);
5455 if (GET_CODE (x
) == HIGH
5456 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
5459 return aarch64_constant_address_p (x
);
5463 aarch64_load_tp (rtx target
)
5466 || GET_MODE (target
) != Pmode
5467 || !register_operand (target
, Pmode
))
5468 target
= gen_reg_rtx (Pmode
);
5470 /* Can return in any reg. */
5471 emit_insn (gen_aarch64_load_tp_hard (target
));
5475 /* On AAPCS systems, this is the "struct __va_list". */
5476 static GTY(()) tree va_list_type
;
5478 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5479 Return the type to use as __builtin_va_list.
5481 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5493 aarch64_build_builtin_va_list (void)
5496 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5498 /* Create the type. */
5499 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
5500 /* Give it the required name. */
5501 va_list_name
= build_decl (BUILTINS_LOCATION
,
5503 get_identifier ("__va_list"),
5505 DECL_ARTIFICIAL (va_list_name
) = 1;
5506 TYPE_NAME (va_list_type
) = va_list_name
;
5507 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
5509 /* Create the fields. */
5510 f_stack
= build_decl (BUILTINS_LOCATION
,
5511 FIELD_DECL
, get_identifier ("__stack"),
5513 f_grtop
= build_decl (BUILTINS_LOCATION
,
5514 FIELD_DECL
, get_identifier ("__gr_top"),
5516 f_vrtop
= build_decl (BUILTINS_LOCATION
,
5517 FIELD_DECL
, get_identifier ("__vr_top"),
5519 f_groff
= build_decl (BUILTINS_LOCATION
,
5520 FIELD_DECL
, get_identifier ("__gr_offs"),
5522 f_vroff
= build_decl (BUILTINS_LOCATION
,
5523 FIELD_DECL
, get_identifier ("__vr_offs"),
5526 DECL_ARTIFICIAL (f_stack
) = 1;
5527 DECL_ARTIFICIAL (f_grtop
) = 1;
5528 DECL_ARTIFICIAL (f_vrtop
) = 1;
5529 DECL_ARTIFICIAL (f_groff
) = 1;
5530 DECL_ARTIFICIAL (f_vroff
) = 1;
5532 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
5533 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
5534 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
5535 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
5536 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
5538 TYPE_FIELDS (va_list_type
) = f_stack
;
5539 DECL_CHAIN (f_stack
) = f_grtop
;
5540 DECL_CHAIN (f_grtop
) = f_vrtop
;
5541 DECL_CHAIN (f_vrtop
) = f_groff
;
5542 DECL_CHAIN (f_groff
) = f_vroff
;
5544 /* Compute its layout. */
5545 layout_type (va_list_type
);
5547 return va_list_type
;
5550 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5552 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
5554 const CUMULATIVE_ARGS
*cum
;
5555 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5556 tree stack
, grtop
, vrtop
, groff
, vroff
;
5558 int gr_save_area_size
;
5559 int vr_save_area_size
;
5562 cum
= &crtl
->args
.info
;
5564 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
5566 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
5568 if (TARGET_GENERAL_REGS_ONLY
)
5570 if (cum
->aapcs_nvrn
> 0)
5571 sorry ("%qs and floating point or vector arguments",
5572 "-mgeneral-regs-only");
5573 vr_save_area_size
= 0;
5576 f_stack
= TYPE_FIELDS (va_list_type_node
);
5577 f_grtop
= DECL_CHAIN (f_stack
);
5578 f_vrtop
= DECL_CHAIN (f_grtop
);
5579 f_groff
= DECL_CHAIN (f_vrtop
);
5580 f_vroff
= DECL_CHAIN (f_groff
);
5582 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
5584 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
5586 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
5588 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
5590 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
5593 /* Emit code to initialize STACK, which points to the next varargs stack
5594 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5595 by named arguments. STACK is 8-byte aligned. */
5596 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
5597 if (cum
->aapcs_stack_size
> 0)
5598 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
5599 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
5600 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5602 /* Emit code to initialize GRTOP, the top of the GR save area.
5603 virtual_incoming_args_rtx should have been 16 byte aligned. */
5604 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
5605 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
5606 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5608 /* Emit code to initialize VRTOP, the top of the VR save area.
5609 This address is gr_save_area_bytes below GRTOP, rounded
5610 down to the next 16-byte boundary. */
5611 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
5612 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
5613 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5616 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
5617 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
5618 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5620 /* Emit code to initialize GROFF, the offset from GRTOP of the
5621 next GPR argument. */
5622 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
5623 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
5624 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5626 /* Likewise emit code to initialize VROFF, the offset from FTOP
5627 of the next VR argument. */
5628 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
5629 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
5630 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
5633 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5636 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
5637 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
5641 bool is_ha
; /* is HFA or HVA. */
5642 bool dw_align
; /* double-word align. */
5643 enum machine_mode ag_mode
= VOIDmode
;
5645 enum machine_mode mode
;
5647 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
5648 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
5649 HOST_WIDE_INT size
, rsize
, adjust
, align
;
5650 tree t
, u
, cond1
, cond2
;
5652 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
5654 type
= build_pointer_type (type
);
5656 mode
= TYPE_MODE (type
);
5658 f_stack
= TYPE_FIELDS (va_list_type_node
);
5659 f_grtop
= DECL_CHAIN (f_stack
);
5660 f_vrtop
= DECL_CHAIN (f_grtop
);
5661 f_groff
= DECL_CHAIN (f_vrtop
);
5662 f_vroff
= DECL_CHAIN (f_groff
);
5664 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
5665 f_stack
, NULL_TREE
);
5666 size
= int_size_in_bytes (type
);
5667 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
5671 if (aarch64_vfp_is_call_or_return_candidate (mode
,
5677 /* TYPE passed in fp/simd registers. */
5678 if (TARGET_GENERAL_REGS_ONLY
)
5679 sorry ("%qs and floating point or vector arguments",
5680 "-mgeneral-regs-only");
5682 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
5683 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
5684 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
5685 unshare_expr (valist
), f_vroff
, NULL_TREE
);
5687 rsize
= nregs
* UNITS_PER_VREG
;
5691 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
5692 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
5694 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5695 && size
< UNITS_PER_VREG
)
5697 adjust
= UNITS_PER_VREG
- size
;
5702 /* TYPE passed in general registers. */
5703 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
5704 unshare_expr (valist
), f_grtop
, NULL_TREE
);
5705 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
5706 unshare_expr (valist
), f_groff
, NULL_TREE
);
5707 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
5708 nregs
= rsize
/ UNITS_PER_WORD
;
5713 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5714 && size
< UNITS_PER_WORD
)
5716 adjust
= UNITS_PER_WORD
- size
;
5720 /* Get a local temporary for the field value. */
5721 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
5723 /* Emit code to branch if off >= 0. */
5724 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
5725 build_int_cst (TREE_TYPE (off
), 0));
5726 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
5730 /* Emit: offs = (offs + 15) & -16. */
5731 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5732 build_int_cst (TREE_TYPE (off
), 15));
5733 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
5734 build_int_cst (TREE_TYPE (off
), -16));
5735 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
5740 /* Update ap.__[g|v]r_offs */
5741 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
5742 build_int_cst (TREE_TYPE (off
), rsize
));
5743 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
5747 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5749 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5750 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
5751 build_int_cst (TREE_TYPE (f_off
), 0));
5752 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
5754 /* String up: make sure the assignment happens before the use. */
5755 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
5756 COND_EXPR_ELSE (cond1
) = t
;
5758 /* Prepare the trees handling the argument that is passed on the stack;
5759 the top level node will store in ON_STACK. */
5760 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
5763 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5764 t
= fold_convert (intDI_type_node
, arg
);
5765 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5766 build_int_cst (TREE_TYPE (t
), 15));
5767 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5768 build_int_cst (TREE_TYPE (t
), -16));
5769 t
= fold_convert (TREE_TYPE (arg
), t
);
5770 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
5774 /* Advance ap.__stack */
5775 t
= fold_convert (intDI_type_node
, arg
);
5776 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
5777 build_int_cst (TREE_TYPE (t
), size
+ 7));
5778 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
5779 build_int_cst (TREE_TYPE (t
), -8));
5780 t
= fold_convert (TREE_TYPE (arg
), t
);
5781 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
5782 /* String up roundup and advance. */
5784 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
5785 /* String up with arg */
5786 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
5787 /* Big-endianness related address adjustment. */
5788 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
5789 && size
< UNITS_PER_WORD
)
5791 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
5792 size_int (UNITS_PER_WORD
- size
));
5793 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
5796 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
5797 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
5799 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5802 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
5803 build_int_cst (TREE_TYPE (off
), adjust
));
5805 t
= fold_convert (sizetype
, t
);
5806 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
5810 /* type ha; // treat as "struct {ftype field[n];}"
5811 ... [computing offs]
5812 for (i = 0; i <nregs; ++i, offs += 16)
5813 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5816 tree tmp_ha
, field_t
, field_ptr_t
;
5818 /* Declare a local variable. */
5819 tmp_ha
= create_tmp_var_raw (type
, "ha");
5820 gimple_add_tmp_var (tmp_ha
);
5822 /* Establish the base type. */
5826 field_t
= float_type_node
;
5827 field_ptr_t
= float_ptr_type_node
;
5830 field_t
= double_type_node
;
5831 field_ptr_t
= double_ptr_type_node
;
5834 field_t
= long_double_type_node
;
5835 field_ptr_t
= long_double_ptr_type_node
;
5837 /* The half precision and quad precision are not fully supported yet. Enable
5838 the following code after the support is complete. Need to find the correct
5839 type node for __fp16 *. */
5842 field_t
= float_type_node
;
5843 field_ptr_t
= float_ptr_type_node
;
5849 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
5850 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
5851 field_ptr_t
= build_pointer_type (field_t
);
5858 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5859 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
5861 t
= fold_convert (field_ptr_t
, addr
);
5862 t
= build2 (MODIFY_EXPR
, field_t
,
5863 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
5864 build1 (INDIRECT_REF
, field_t
, t
));
5866 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5867 for (i
= 1; i
< nregs
; ++i
)
5869 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
5870 u
= fold_convert (field_ptr_t
, addr
);
5871 u
= build2 (MODIFY_EXPR
, field_t
,
5872 build2 (MEM_REF
, field_t
, tmp_ha
,
5873 build_int_cst (field_ptr_t
,
5875 int_size_in_bytes (field_t
)))),
5876 build1 (INDIRECT_REF
, field_t
, u
));
5877 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
5880 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
5881 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
5884 COND_EXPR_ELSE (cond2
) = t
;
5885 addr
= fold_convert (build_pointer_type (type
), cond1
);
5886 addr
= build_va_arg_indirect_ref (addr
);
5889 addr
= build_va_arg_indirect_ref (addr
);
5894 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5897 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, enum machine_mode mode
,
5898 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
5901 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
5902 CUMULATIVE_ARGS local_cum
;
5903 int gr_saved
, vr_saved
;
5905 /* The caller has advanced CUM up to, but not beyond, the last named
5906 argument. Advance a local copy of CUM past the last "real" named
5907 argument, to find out how many registers are left over. */
5909 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
5911 /* Found out how many registers we need to save. */
5912 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
5913 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
5915 if (TARGET_GENERAL_REGS_ONLY
)
5917 if (local_cum
.aapcs_nvrn
> 0)
5918 sorry ("%qs and floating point or vector arguments",
5919 "-mgeneral-regs-only");
5929 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5930 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
5931 - gr_saved
* UNITS_PER_WORD
);
5932 mem
= gen_frame_mem (BLKmode
, ptr
);
5933 set_mem_alias_set (mem
, get_varargs_alias_set ());
5935 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
5940 /* We can't use move_block_from_reg, because it will use
5941 the wrong mode, storing D regs only. */
5942 enum machine_mode mode
= TImode
;
5945 /* Set OFF to the offset from virtual_incoming_args_rtx of
5946 the first vector register. The VR save area lies below
5947 the GR one, and is aligned to 16 bytes. */
5948 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5949 STACK_BOUNDARY
/ BITS_PER_UNIT
);
5950 off
-= vr_saved
* UNITS_PER_VREG
;
5952 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
5956 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
5957 mem
= gen_frame_mem (mode
, ptr
);
5958 set_mem_alias_set (mem
, get_varargs_alias_set ());
5959 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
5960 off
+= UNITS_PER_VREG
;
5965 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5966 any complication of having crtl->args.pretend_args_size changed. */
5967 cfun
->machine
->saved_varargs_size
5968 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
5969 STACK_BOUNDARY
/ BITS_PER_UNIT
)
5970 + vr_saved
* UNITS_PER_VREG
);
5974 aarch64_conditional_register_usage (void)
5979 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
5982 call_used_regs
[i
] = 1;
5987 /* Walk down the type tree of TYPE counting consecutive base elements.
5988 If *MODEP is VOIDmode, then set it to the first valid floating point
5989 type. If a non-floating point type is found, or if a floating point
5990 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5991 otherwise return the count in the sub-tree. */
5993 aapcs_vfp_sub_candidate (const_tree type
, enum machine_mode
*modep
)
5995 enum machine_mode mode
;
5998 switch (TREE_CODE (type
))
6001 mode
= TYPE_MODE (type
);
6002 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
6005 if (*modep
== VOIDmode
)
6014 mode
= TYPE_MODE (TREE_TYPE (type
));
6015 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
6018 if (*modep
== VOIDmode
)
6027 /* Use V2SImode and V4SImode as representatives of all 64-bit
6028 and 128-bit vector types. */
6029 size
= int_size_in_bytes (type
);
6042 if (*modep
== VOIDmode
)
6045 /* Vector modes are considered to be opaque: two vectors are
6046 equivalent for the purposes of being homogeneous aggregates
6047 if they are the same size. */
6056 tree index
= TYPE_DOMAIN (type
);
6058 /* Can't handle incomplete types. */
6059 if (!COMPLETE_TYPE_P (type
))
6062 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
6065 || !TYPE_MAX_VALUE (index
)
6066 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
6067 || !TYPE_MIN_VALUE (index
)
6068 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
6072 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
6073 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
6075 /* There must be no padding. */
6076 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
6077 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
6078 != count
* GET_MODE_BITSIZE (*modep
)))
6090 /* Can't handle incomplete types. */
6091 if (!COMPLETE_TYPE_P (type
))
6094 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6096 if (TREE_CODE (field
) != FIELD_DECL
)
6099 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6105 /* There must be no padding. */
6106 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
6107 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
6108 != count
* GET_MODE_BITSIZE (*modep
)))
6115 case QUAL_UNION_TYPE
:
6117 /* These aren't very interesting except in a degenerate case. */
6122 /* Can't handle incomplete types. */
6123 if (!COMPLETE_TYPE_P (type
))
6126 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
6128 if (TREE_CODE (field
) != FIELD_DECL
)
6131 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
6134 count
= count
> sub_count
? count
: sub_count
;
6137 /* There must be no padding. */
6138 if (!tree_fits_uhwi_p (TYPE_SIZE (type
))
6139 || ((HOST_WIDE_INT
) tree_to_uhwi (TYPE_SIZE (type
))
6140 != count
* GET_MODE_BITSIZE (*modep
)))
6153 /* Return true if we use LRA instead of reload pass. */
6155 aarch64_lra_p (void)
6157 return aarch64_lra_flag
;
6160 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6161 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6162 array types. The C99 floating-point complex types are also considered
6163 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6164 types, which are GCC extensions and out of the scope of AAPCS64, are
6165 treated as composite types here as well.
6167 Note that MODE itself is not sufficient in determining whether a type
6168 is such a composite type or not. This is because
6169 stor-layout.c:compute_record_mode may have already changed the MODE
6170 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6171 structure with only one field may have its MODE set to the mode of the
6172 field. Also an integer mode whose size matches the size of the
6173 RECORD_TYPE type may be used to substitute the original mode
6174 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6175 solely relied on. */
6178 aarch64_composite_type_p (const_tree type
,
6179 enum machine_mode mode
)
6181 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
6185 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
6186 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
6192 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6193 type as described in AAPCS64 \S 4.1.2.
6195 See the comment above aarch64_composite_type_p for the notes on MODE. */
6198 aarch64_short_vector_p (const_tree type
,
6199 enum machine_mode mode
)
6201 HOST_WIDE_INT size
= -1;
6203 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
6204 size
= int_size_in_bytes (type
);
6205 else if (!aarch64_composite_type_p (type
, mode
)
6206 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
6207 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
6208 size
= GET_MODE_SIZE (mode
);
6210 return (size
== 8 || size
== 16) ? true : false;
6213 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6214 shall be passed or returned in simd/fp register(s) (providing these
6215 parameter passing registers are available).
6217 Upon successful return, *COUNT returns the number of needed registers,
6218 *BASE_MODE returns the mode of the individual register and when IS_HAF
6219 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6220 floating-point aggregate or a homogeneous short-vector aggregate. */
6223 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode
,
6225 enum machine_mode
*base_mode
,
6229 enum machine_mode new_mode
= VOIDmode
;
6230 bool composite_p
= aarch64_composite_type_p (type
, mode
);
6232 if (is_ha
!= NULL
) *is_ha
= false;
6234 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6235 || aarch64_short_vector_p (type
, mode
))
6240 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
6242 if (is_ha
!= NULL
) *is_ha
= true;
6244 new_mode
= GET_MODE_INNER (mode
);
6246 else if (type
&& composite_p
)
6248 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
6250 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
6252 if (is_ha
!= NULL
) *is_ha
= true;
6261 *base_mode
= new_mode
;
6265 /* Implement TARGET_STRUCT_VALUE_RTX. */
6268 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
6269 int incoming ATTRIBUTE_UNUSED
)
6271 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
6274 /* Implements target hook vector_mode_supported_p. */
6276 aarch64_vector_mode_supported_p (enum machine_mode mode
)
6279 && (mode
== V4SImode
|| mode
== V8HImode
6280 || mode
== V16QImode
|| mode
== V2DImode
6281 || mode
== V2SImode
|| mode
== V4HImode
6282 || mode
== V8QImode
|| mode
== V2SFmode
6283 || mode
== V4SFmode
|| mode
== V2DFmode
))
6289 /* Return appropriate SIMD container
6290 for MODE within a vector of WIDTH bits. */
6291 static enum machine_mode
6292 aarch64_simd_container_mode (enum machine_mode mode
, unsigned width
)
6294 gcc_assert (width
== 64 || width
== 128);
6333 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6334 static enum machine_mode
6335 aarch64_preferred_simd_mode (enum machine_mode mode
)
6337 return aarch64_simd_container_mode (mode
, 128);
6340 /* Return the bitmask of possible vector sizes for the vectorizer
6343 aarch64_autovectorize_vector_sizes (void)
6348 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6349 vector types in order to conform to the AAPCS64 (see "Procedure
6350 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6351 qualify for emission with the mangled names defined in that document,
6352 a vector type must not only be of the correct mode but also be
6353 composed of AdvSIMD vector element types (e.g.
6354 _builtin_aarch64_simd_qi); these types are registered by
6355 aarch64_init_simd_builtins (). In other words, vector types defined
6356 in other ways e.g. via vector_size attribute will get default
6360 enum machine_mode mode
;
6361 const char *element_type_name
;
6362 const char *mangled_name
;
6363 } aarch64_simd_mangle_map_entry
;
6365 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map
[] = {
6366 /* 64-bit containerized types. */
6367 { V8QImode
, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6368 { V8QImode
, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6369 { V4HImode
, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6370 { V4HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6371 { V2SImode
, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6372 { V2SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6373 { V2SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6374 { V8QImode
, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6375 { V4HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6376 /* 128-bit containerized types. */
6377 { V16QImode
, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6378 { V16QImode
, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6379 { V8HImode
, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6380 { V8HImode
, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6381 { V4SImode
, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6382 { V4SImode
, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6383 { V2DImode
, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6384 { V2DImode
, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6385 { V4SFmode
, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6386 { V2DFmode
, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6387 { V16QImode
, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6388 { V8HImode
, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6389 { VOIDmode
, NULL
, NULL
}
6392 /* Implement TARGET_MANGLE_TYPE. */
6395 aarch64_mangle_type (const_tree type
)
6397 /* The AArch64 ABI documents say that "__va_list" has to be
6398 managled as if it is in the "std" namespace. */
6399 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
6400 return "St9__va_list";
6402 /* Check the mode of the vector type, and the name of the vector
6403 element type, against the table. */
6404 if (TREE_CODE (type
) == VECTOR_TYPE
)
6406 aarch64_simd_mangle_map_entry
*pos
= aarch64_simd_mangle_map
;
6408 while (pos
->mode
!= VOIDmode
)
6410 tree elt_type
= TREE_TYPE (type
);
6412 if (pos
->mode
== TYPE_MODE (type
)
6413 && TREE_CODE (TYPE_NAME (elt_type
)) == TYPE_DECL
6414 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type
))),
6415 pos
->element_type_name
))
6416 return pos
->mangled_name
;
6422 /* Use the default mangling. */
6426 /* Return the equivalent letter for size. */
6428 sizetochar (int size
)
6432 case 64: return 'd';
6433 case 32: return 's';
6434 case 16: return 'h';
6435 case 8 : return 'b';
6436 default: gcc_unreachable ();
6440 /* Return true iff x is a uniform vector of floating-point
6441 constants, and the constant can be represented in
6442 quarter-precision form. Note, as aarch64_float_const_representable
6443 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6445 aarch64_vect_float_const_representable_p (rtx x
)
6448 REAL_VALUE_TYPE r0
, ri
;
6451 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
6454 x0
= CONST_VECTOR_ELT (x
, 0);
6455 if (!CONST_DOUBLE_P (x0
))
6458 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
6460 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
6462 xi
= CONST_VECTOR_ELT (x
, i
);
6463 if (!CONST_DOUBLE_P (xi
))
6466 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
6467 if (!REAL_VALUES_EQUAL (r0
, ri
))
6471 return aarch64_float_const_representable_p (x0
);
6474 /* Return true for valid and false for invalid. */
6476 aarch64_simd_valid_immediate (rtx op
, enum machine_mode mode
, bool inverse
,
6477 struct simd_immediate_info
*info
)
6479 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6481 for (i = 0; i < idx; i += (STRIDE)) \
6486 immtype = (CLASS); \
6487 elsize = (ELSIZE); \
6493 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
6494 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
6495 unsigned char bytes
[16];
6496 int immtype
= -1, matches
;
6497 unsigned int invmask
= inverse
? 0xff : 0;
6500 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
6502 if (! (aarch64_simd_imm_zero_p (op
, mode
)
6503 || aarch64_vect_float_const_representable_p (op
)))
6508 info
->value
= CONST_VECTOR_ELT (op
, 0);
6509 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
6517 /* Splat vector constant out into a byte vector. */
6518 for (i
= 0; i
< n_elts
; i
++)
6520 rtx el
= CONST_VECTOR_ELT (op
, i
);
6521 unsigned HOST_WIDE_INT elpart
;
6522 unsigned int part
, parts
;
6524 if (GET_CODE (el
) == CONST_INT
)
6526 elpart
= INTVAL (el
);
6529 else if (GET_CODE (el
) == CONST_DOUBLE
)
6531 elpart
= CONST_DOUBLE_LOW (el
);
6537 for (part
= 0; part
< parts
; part
++)
6540 for (byte
= 0; byte
< innersize
; byte
++)
6542 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
6543 elpart
>>= BITS_PER_UNIT
;
6545 if (GET_CODE (el
) == CONST_DOUBLE
)
6546 elpart
= CONST_DOUBLE_HIGH (el
);
6551 gcc_assert (idx
== GET_MODE_SIZE (mode
));
6555 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
6556 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
6558 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6559 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6561 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6562 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6564 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6565 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
6567 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
6569 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
6571 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
6572 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
6574 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6575 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6577 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6578 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6580 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6581 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
6583 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
6585 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
6587 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
6588 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
6590 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
6591 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
6593 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
6594 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
6596 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
6597 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
6599 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
6601 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
6602 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
6611 info
->element_width
= elsize
;
6612 info
->mvn
= emvn
!= 0;
6613 info
->shift
= eshift
;
6615 unsigned HOST_WIDE_INT imm
= 0;
6617 if (immtype
>= 12 && immtype
<= 15)
6620 /* Un-invert bytes of recognized vector, if necessary. */
6622 for (i
= 0; i
< idx
; i
++)
6623 bytes
[i
] ^= invmask
;
6627 /* FIXME: Broken on 32-bit H_W_I hosts. */
6628 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
6630 for (i
= 0; i
< 8; i
++)
6631 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
6632 << (i
* BITS_PER_UNIT
);
6635 info
->value
= GEN_INT (imm
);
6639 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
6640 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
6642 /* Construct 'abcdefgh' because the assembler cannot handle
6643 generic constants. */
6646 imm
= (imm
>> info
->shift
) & 0xff;
6647 info
->value
= GEN_INT (imm
);
6656 aarch64_const_vec_all_same_int_p (rtx x
,
6657 HOST_WIDE_INT minval
,
6658 HOST_WIDE_INT maxval
)
6660 HOST_WIDE_INT firstval
;
6663 if (GET_CODE (x
) != CONST_VECTOR
6664 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
6667 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
6668 if (firstval
< minval
|| firstval
> maxval
)
6671 count
= CONST_VECTOR_NUNITS (x
);
6672 for (i
= 1; i
< count
; i
++)
6673 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
6679 /* Check of immediate shift constants are within range. */
6681 aarch64_simd_shift_imm_p (rtx x
, enum machine_mode mode
, bool left
)
6683 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
6685 return aarch64_const_vec_all_same_int_p (x
, 0, bit_width
- 1);
6687 return aarch64_const_vec_all_same_int_p (x
, 1, bit_width
);
6690 /* Return true if X is a uniform vector where all elements
6691 are either the floating-point constant 0.0 or the
6692 integer constant 0. */
6694 aarch64_simd_imm_zero_p (rtx x
, enum machine_mode mode
)
6696 return x
== CONST0_RTX (mode
);
6700 aarch64_simd_imm_scalar_p (rtx x
, enum machine_mode mode ATTRIBUTE_UNUSED
)
6702 HOST_WIDE_INT imm
= INTVAL (x
);
6705 for (i
= 0; i
< 8; i
++)
6707 unsigned int byte
= imm
& 0xff;
6708 if (byte
!= 0xff && byte
!= 0)
6717 aarch64_mov_operand_p (rtx x
,
6718 enum aarch64_symbol_context context
,
6719 enum machine_mode mode
)
6721 if (GET_CODE (x
) == HIGH
6722 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
6725 if (CONST_INT_P (x
) && aarch64_move_imm (INTVAL (x
), mode
))
6728 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
6731 return aarch64_classify_symbolic_expression (x
, context
)
6732 == SYMBOL_TINY_ABSOLUTE
;
6735 /* Return a const_int vector of VAL. */
6737 aarch64_simd_gen_const_vector_dup (enum machine_mode mode
, int val
)
6739 int nunits
= GET_MODE_NUNITS (mode
);
6740 rtvec v
= rtvec_alloc (nunits
);
6743 for (i
=0; i
< nunits
; i
++)
6744 RTVEC_ELT (v
, i
) = GEN_INT (val
);
6746 return gen_rtx_CONST_VECTOR (mode
, v
);
6749 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6752 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, enum machine_mode mode
)
6754 enum machine_mode vmode
;
6756 gcc_assert (!VECTOR_MODE_P (mode
));
6757 vmode
= aarch64_preferred_simd_mode (mode
);
6758 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
6759 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
6762 /* Construct and return a PARALLEL RTX vector. */
6764 aarch64_simd_vect_par_cnst_half (enum machine_mode mode
, bool high
)
6766 int nunits
= GET_MODE_NUNITS (mode
);
6767 rtvec v
= rtvec_alloc (nunits
/ 2);
6768 int base
= high
? nunits
/ 2 : 0;
6772 for (i
=0; i
< nunits
/ 2; i
++)
6773 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
6775 t1
= gen_rtx_PARALLEL (mode
, v
);
6779 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6780 HIGH (exclusive). */
6782 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6785 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6786 lane
= INTVAL (operand
);
6788 if (lane
< low
|| lane
>= high
)
6789 error ("lane out of range");
6793 aarch64_simd_const_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
)
6795 gcc_assert (GET_CODE (operand
) == CONST_INT
);
6796 HOST_WIDE_INT lane
= INTVAL (operand
);
6798 if (lane
< low
|| lane
>= high
)
6799 error ("constant out of range");
6802 /* Emit code to reinterpret one AdvSIMD type as another,
6803 without altering bits. */
6805 aarch64_simd_reinterpret (rtx dest
, rtx src
)
6807 emit_move_insn (dest
, gen_lowpart (GET_MODE (dest
), src
));
6810 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6813 aarch64_simd_emit_pair_result_insn (enum machine_mode mode
,
6814 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
6817 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
6818 rtx tmp1
= gen_reg_rtx (mode
);
6819 rtx tmp2
= gen_reg_rtx (mode
);
6821 emit_insn (intfn (tmp1
, op1
, tmp2
));
6823 emit_move_insn (mem
, tmp1
);
6824 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
6825 emit_move_insn (mem
, tmp2
);
6828 /* Return TRUE if OP is a valid vector addressing mode. */
6830 aarch64_simd_mem_operand_p (rtx op
)
6832 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
6833 || GET_CODE (XEXP (op
, 0)) == REG
);
6836 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6837 not to early-clobber SRC registers in the process.
6839 We assume that the operands described by SRC and DEST represent a
6840 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6841 number of components into which the copy has been decomposed. */
6843 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
6844 rtx
*src
, unsigned int count
)
6848 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
6849 || REGNO (operands
[0]) < REGNO (operands
[1]))
6851 for (i
= 0; i
< count
; i
++)
6853 operands
[2 * i
] = dest
[i
];
6854 operands
[2 * i
+ 1] = src
[i
];
6859 for (i
= 0; i
< count
; i
++)
6861 operands
[2 * i
] = dest
[count
- i
- 1];
6862 operands
[2 * i
+ 1] = src
[count
- i
- 1];
6867 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6868 one of VSTRUCT modes: OI, CI or XI. */
6870 aarch64_simd_attr_length_move (rtx insn
)
6872 enum machine_mode mode
;
6874 extract_insn_cached (insn
);
6876 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
6878 mode
= GET_MODE (recog_data
.operand
[0]);
6894 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6895 alignment of a vector to 128 bits. */
6896 static HOST_WIDE_INT
6897 aarch64_simd_vector_alignment (const_tree type
)
6899 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
6900 return MIN (align
, 128);
6903 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6905 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
6910 /* We guarantee alignment for vectors up to 128-bits. */
6911 if (tree_int_cst_compare (TYPE_SIZE (type
),
6912 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
6915 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6919 /* If VALS is a vector constant that can be loaded into a register
6920 using DUP, generate instructions to do so and return an RTX to
6921 assign to the register. Otherwise return NULL_RTX. */
6923 aarch64_simd_dup_constant (rtx vals
)
6925 enum machine_mode mode
= GET_MODE (vals
);
6926 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
6927 int n_elts
= GET_MODE_NUNITS (mode
);
6928 bool all_same
= true;
6932 if (GET_CODE (vals
) != CONST_VECTOR
)
6935 for (i
= 1; i
< n_elts
; ++i
)
6937 x
= CONST_VECTOR_ELT (vals
, i
);
6938 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
6945 /* We can load this constant by using DUP and a constant in a
6946 single ARM register. This will be cheaper than a vector
6948 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
6949 return gen_rtx_VEC_DUPLICATE (mode
, x
);
6953 /* Generate code to load VALS, which is a PARALLEL containing only
6954 constants (for vec_init) or CONST_VECTOR, efficiently into a
6955 register. Returns an RTX to copy into the register, or NULL_RTX
6956 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6958 aarch64_simd_make_constant (rtx vals
)
6960 enum machine_mode mode
= GET_MODE (vals
);
6962 rtx const_vec
= NULL_RTX
;
6963 int n_elts
= GET_MODE_NUNITS (mode
);
6967 if (GET_CODE (vals
) == CONST_VECTOR
)
6969 else if (GET_CODE (vals
) == PARALLEL
)
6971 /* A CONST_VECTOR must contain only CONST_INTs and
6972 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6973 Only store valid constants in a CONST_VECTOR. */
6974 for (i
= 0; i
< n_elts
; ++i
)
6976 rtx x
= XVECEXP (vals
, 0, i
);
6977 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
6980 if (n_const
== n_elts
)
6981 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6986 if (const_vec
!= NULL_RTX
6987 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
6988 /* Load using MOVI/MVNI. */
6990 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
6991 /* Loaded using DUP. */
6993 else if (const_vec
!= NULL_RTX
)
6994 /* Load from constant pool. We can not take advantage of single-cycle
6995 LD1 because we need a PC-relative addressing mode. */
6998 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6999 We can not construct an initializer. */
7004 aarch64_expand_vector_init (rtx target
, rtx vals
)
7006 enum machine_mode mode
= GET_MODE (target
);
7007 enum machine_mode inner_mode
= GET_MODE_INNER (mode
);
7008 int n_elts
= GET_MODE_NUNITS (mode
);
7009 int n_var
= 0, one_var
= -1;
7010 bool all_same
= true;
7014 x
= XVECEXP (vals
, 0, 0);
7015 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
7016 n_var
= 1, one_var
= 0;
7018 for (i
= 1; i
< n_elts
; ++i
)
7020 x
= XVECEXP (vals
, 0, i
);
7021 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
7022 ++n_var
, one_var
= i
;
7024 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
7030 rtx constant
= aarch64_simd_make_constant (vals
);
7031 if (constant
!= NULL_RTX
)
7033 emit_move_insn (target
, constant
);
7038 /* Splat a single non-constant element if we can. */
7041 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
7042 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
7046 /* One field is non-constant. Load constant then overwrite varying
7047 field. This is more efficient than using the stack. */
7050 rtx copy
= copy_rtx (vals
);
7051 rtx index
= GEN_INT (one_var
);
7052 enum insn_code icode
;
7054 /* Load constant part of vector, substitute neighboring value for
7056 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
7057 aarch64_expand_vector_init (target
, copy
);
7059 /* Insert variable. */
7060 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
7061 icode
= optab_handler (vec_set_optab
, mode
);
7062 gcc_assert (icode
!= CODE_FOR_nothing
);
7063 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
7067 /* Construct the vector in memory one field at a time
7068 and load the whole vector. */
7069 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
7070 for (i
= 0; i
< n_elts
; i
++)
7071 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
7072 i
* GET_MODE_SIZE (inner_mode
)),
7073 XVECEXP (vals
, 0, i
));
7074 emit_move_insn (target
, mem
);
7078 static unsigned HOST_WIDE_INT
7079 aarch64_shift_truncation_mask (enum machine_mode mode
)
7082 (aarch64_vector_mode_supported_p (mode
)
7083 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
7086 #ifndef TLS_SECTION_ASM_FLAG
7087 #define TLS_SECTION_ASM_FLAG 'T'
7091 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
7092 tree decl ATTRIBUTE_UNUSED
)
7094 char flagchars
[10], *f
= flagchars
;
7096 /* If we have already declared this section, we can use an
7097 abbreviated form to switch back to it -- unless this section is
7098 part of a COMDAT groups, in which case GAS requires the full
7099 declaration every time. */
7100 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7101 && (flags
& SECTION_DECLARED
))
7103 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
7107 if (!(flags
& SECTION_DEBUG
))
7109 if (flags
& SECTION_WRITE
)
7111 if (flags
& SECTION_CODE
)
7113 if (flags
& SECTION_SMALL
)
7115 if (flags
& SECTION_MERGE
)
7117 if (flags
& SECTION_STRINGS
)
7119 if (flags
& SECTION_TLS
)
7120 *f
++ = TLS_SECTION_ASM_FLAG
;
7121 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7125 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
7127 if (!(flags
& SECTION_NOTYPE
))
7132 if (flags
& SECTION_BSS
)
7137 #ifdef TYPE_OPERAND_FMT
7138 format
= "," TYPE_OPERAND_FMT
;
7143 fprintf (asm_out_file
, format
, type
);
7145 if (flags
& SECTION_ENTSIZE
)
7146 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
7147 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
7149 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
7150 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
7152 fprintf (asm_out_file
, ",%s,comdat",
7153 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
7157 putc ('\n', asm_out_file
);
7160 /* Select a format to encode pointers in exception handling data. */
7162 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
7165 switch (aarch64_cmodel
)
7167 case AARCH64_CMODEL_TINY
:
7168 case AARCH64_CMODEL_TINY_PIC
:
7169 case AARCH64_CMODEL_SMALL
:
7170 case AARCH64_CMODEL_SMALL_PIC
:
7171 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7173 type
= DW_EH_PE_sdata4
;
7176 /* No assumptions here. 8-byte relocs required. */
7177 type
= DW_EH_PE_sdata8
;
7180 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
7183 /* Emit load exclusive. */
7186 aarch64_emit_load_exclusive (enum machine_mode mode
, rtx rval
,
7187 rtx mem
, rtx model_rtx
)
7189 rtx (*gen
) (rtx
, rtx
, rtx
);
7193 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
7194 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
7195 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
7196 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
7201 emit_insn (gen (rval
, mem
, model_rtx
));
7204 /* Emit store exclusive. */
7207 aarch64_emit_store_exclusive (enum machine_mode mode
, rtx bval
,
7208 rtx rval
, rtx mem
, rtx model_rtx
)
7210 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
7214 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
7215 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
7216 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
7217 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
7222 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
7225 /* Mark the previous jump instruction as unlikely. */
7228 aarch64_emit_unlikely_jump (rtx insn
)
7230 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
7232 insn
= emit_jump_insn (insn
);
7233 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
7236 /* Expand a compare and swap pattern. */
7239 aarch64_expand_compare_and_swap (rtx operands
[])
7241 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
7242 enum machine_mode mode
, cmp_mode
;
7243 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
7248 oldval
= operands
[3];
7249 newval
= operands
[4];
7250 is_weak
= operands
[5];
7251 mod_s
= operands
[6];
7252 mod_f
= operands
[7];
7253 mode
= GET_MODE (mem
);
7256 /* Normally the succ memory model must be stronger than fail, but in the
7257 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7258 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7260 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
7261 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
7262 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
7268 /* For short modes, we're going to perform the comparison in SImode,
7269 so do the zero-extension now. */
7271 rval
= gen_reg_rtx (SImode
);
7272 oldval
= convert_modes (SImode
, mode
, oldval
, true);
7277 /* Force the value into a register if needed. */
7278 if (!aarch64_plus_operand (oldval
, mode
))
7279 oldval
= force_reg (cmp_mode
, oldval
);
7288 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
7289 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
7290 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
7291 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
7296 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
7298 if (mode
== QImode
|| mode
== HImode
)
7299 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
7301 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7302 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
7303 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
7306 /* Split a compare and swap pattern. */
7309 aarch64_split_compare_and_swap (rtx operands
[])
7311 rtx rval
, mem
, oldval
, newval
, scratch
;
7312 enum machine_mode mode
;
7314 rtx label1
, label2
, x
, cond
;
7318 oldval
= operands
[2];
7319 newval
= operands
[3];
7320 is_weak
= (operands
[4] != const0_rtx
);
7321 scratch
= operands
[7];
7322 mode
= GET_MODE (mem
);
7327 label1
= gen_label_rtx ();
7328 emit_label (label1
);
7330 label2
= gen_label_rtx ();
7332 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
7334 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
7335 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7336 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7337 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
7338 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7340 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
7344 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
7345 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7346 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
7347 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7351 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
7352 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
7353 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
7356 emit_label (label2
);
7359 /* Split an atomic operation. */
7362 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
7363 rtx value
, rtx model_rtx
, rtx cond
)
7365 enum machine_mode mode
= GET_MODE (mem
);
7366 enum machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
7369 label
= gen_label_rtx ();
7373 new_out
= gen_lowpart (wmode
, new_out
);
7375 old_out
= gen_lowpart (wmode
, old_out
);
7378 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
7380 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
7389 x
= gen_rtx_AND (wmode
, old_out
, value
);
7390 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7391 x
= gen_rtx_NOT (wmode
, new_out
);
7392 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7396 if (CONST_INT_P (value
))
7398 value
= GEN_INT (-INTVAL (value
));
7404 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
7405 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
7409 aarch64_emit_store_exclusive (mode
, cond
, mem
,
7410 gen_lowpart (mode
, new_out
), model_rtx
);
7412 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
7413 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
7414 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
7415 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
7419 aarch64_print_extension (void)
7421 const struct aarch64_option_extension
*opt
= NULL
;
7423 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7424 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
7425 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
7427 asm_fprintf (asm_out_file
, "\n");
7431 aarch64_start_file (void)
7435 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
7436 aarch64_print_extension ();
7438 else if (selected_cpu
)
7440 asm_fprintf (asm_out_file
, "\t.cpu %s", selected_cpu
->name
);
7441 aarch64_print_extension ();
7443 default_file_start();
7446 /* Target hook for c_mode_for_suffix. */
7447 static enum machine_mode
7448 aarch64_c_mode_for_suffix (char suffix
)
7456 /* We can only represent floating point constants which will fit in
7457 "quarter-precision" values. These values are characterised by
7458 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7461 (-1)^s * (n/16) * 2^r
7464 's' is the sign bit.
7465 'n' is an integer in the range 16 <= n <= 31.
7466 'r' is an integer in the range -3 <= r <= 4. */
7468 /* Return true iff X can be represented by a quarter-precision
7469 floating point immediate operand X. Note, we cannot represent 0.0. */
7471 aarch64_float_const_representable_p (rtx x
)
7473 /* This represents our current view of how many bits
7474 make up the mantissa. */
7475 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
7477 unsigned HOST_WIDE_INT mantissa
, mask
;
7478 HOST_WIDE_INT m1
, m2
;
7479 REAL_VALUE_TYPE r
, m
;
7481 if (!CONST_DOUBLE_P (x
))
7484 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
7486 /* We cannot represent infinities, NaNs or +/-zero. We won't
7487 know if we have +zero until we analyse the mantissa, but we
7488 can reject the other invalid values. */
7489 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
7490 || REAL_VALUE_MINUS_ZERO (r
))
7493 /* Extract exponent. */
7494 r
= real_value_abs (&r
);
7495 exponent
= REAL_EXP (&r
);
7497 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7498 highest (sign) bit, with a fixed binary point at bit point_pos.
7499 m1 holds the low part of the mantissa, m2 the high part.
7500 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7501 bits for the mantissa, this can fail (low bits will be lost). */
7502 real_ldexp (&m
, &r
, point_pos
- exponent
);
7503 REAL_VALUE_TO_INT (&m1
, &m2
, m
);
7505 /* If the low part of the mantissa has bits set we cannot represent
7509 /* We have rejected the lower HOST_WIDE_INT, so update our
7510 understanding of how many bits lie in the mantissa and
7511 look only at the high HOST_WIDE_INT. */
7513 point_pos
-= HOST_BITS_PER_WIDE_INT
;
7515 /* We can only represent values with a mantissa of the form 1.xxxx. */
7516 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
7517 if ((mantissa
& mask
) != 0)
7520 /* Having filtered unrepresentable values, we may now remove all
7521 but the highest 5 bits. */
7522 mantissa
>>= point_pos
- 5;
7524 /* We cannot represent the value 0.0, so reject it. This is handled
7529 /* Then, as bit 4 is always set, we can mask it off, leaving
7530 the mantissa in the range [0, 15]. */
7531 mantissa
&= ~(1 << 4);
7532 gcc_assert (mantissa
<= 15);
7534 /* GCC internally does not use IEEE754-like encoding (where normalized
7535 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7536 Our mantissa values are shifted 4 places to the left relative to
7537 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7538 by 5 places to correct for GCC's representation. */
7539 exponent
= 5 - exponent
;
7541 return (exponent
>= 0 && exponent
<= 7);
7545 aarch64_output_simd_mov_immediate (rtx const_vector
,
7546 enum machine_mode mode
,
7550 static char templ
[40];
7551 const char *mnemonic
;
7552 const char *shift_op
;
7553 unsigned int lane_count
= 0;
7556 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
7558 /* This will return true to show const_vector is legal for use as either
7559 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7560 also update INFO to show how the immediate should be generated. */
7561 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
7562 gcc_assert (is_valid
);
7564 element_char
= sizetochar (info
.element_width
);
7565 lane_count
= width
/ info
.element_width
;
7567 mode
= GET_MODE_INNER (mode
);
7568 if (mode
== SFmode
|| mode
== DFmode
)
7570 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
7571 if (aarch64_float_const_zero_rtx_p (info
.value
))
7572 info
.value
= GEN_INT (0);
7577 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
7578 char float_buf
[buf_size
] = {'\0'};
7579 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
7582 if (lane_count
== 1)
7583 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
7585 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
7586 lane_count
, element_char
, float_buf
);
7591 mnemonic
= info
.mvn
? "mvni" : "movi";
7592 shift_op
= info
.msl
? "msl" : "lsl";
7594 if (lane_count
== 1)
7595 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
7596 mnemonic
, UINTVAL (info
.value
));
7597 else if (info
.shift
)
7598 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7599 ", %s %d", mnemonic
, lane_count
, element_char
,
7600 UINTVAL (info
.value
), shift_op
, info
.shift
);
7602 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
7603 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
7608 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
7609 enum machine_mode mode
)
7611 enum machine_mode vmode
;
7613 gcc_assert (!VECTOR_MODE_P (mode
));
7614 vmode
= aarch64_simd_container_mode (mode
, 64);
7615 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
7616 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
7619 /* Split operands into moves from op[1] + op[2] into op[0]. */
7622 aarch64_split_combinev16qi (rtx operands
[3])
7624 unsigned int dest
= REGNO (operands
[0]);
7625 unsigned int src1
= REGNO (operands
[1]);
7626 unsigned int src2
= REGNO (operands
[2]);
7627 enum machine_mode halfmode
= GET_MODE (operands
[1]);
7628 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
7631 gcc_assert (halfmode
== V16QImode
);
7633 if (src1
== dest
&& src2
== dest
+ halfregs
)
7635 /* No-op move. Can't split to nothing; emit something. */
7636 emit_note (NOTE_INSN_DELETED
);
7640 /* Preserve register attributes for variable tracking. */
7641 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
7642 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
7643 GET_MODE_SIZE (halfmode
));
7645 /* Special case of reversed high/low parts. */
7646 if (reg_overlap_mentioned_p (operands
[2], destlo
)
7647 && reg_overlap_mentioned_p (operands
[1], desthi
))
7649 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7650 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
7651 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
7653 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
7655 /* Try to avoid unnecessary moves if part of the result
7656 is in the right place already. */
7658 emit_move_insn (destlo
, operands
[1]);
7659 if (src2
!= dest
+ halfregs
)
7660 emit_move_insn (desthi
, operands
[2]);
7664 if (src2
!= dest
+ halfregs
)
7665 emit_move_insn (desthi
, operands
[2]);
7667 emit_move_insn (destlo
, operands
[1]);
7671 /* vec_perm support. */
7673 #define MAX_VECT_LEN 16
7675 struct expand_vec_perm_d
7677 rtx target
, op0
, op1
;
7678 unsigned char perm
[MAX_VECT_LEN
];
7679 enum machine_mode vmode
;
7685 /* Generate a variable permutation. */
7688 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7690 enum machine_mode vmode
= GET_MODE (target
);
7691 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7693 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
7694 gcc_checking_assert (GET_MODE (op0
) == vmode
);
7695 gcc_checking_assert (GET_MODE (op1
) == vmode
);
7696 gcc_checking_assert (GET_MODE (sel
) == vmode
);
7697 gcc_checking_assert (TARGET_SIMD
);
7701 if (vmode
== V8QImode
)
7703 /* Expand the argument to a V16QI mode by duplicating it. */
7704 rtx pair
= gen_reg_rtx (V16QImode
);
7705 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
7706 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7710 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
7717 if (vmode
== V8QImode
)
7719 pair
= gen_reg_rtx (V16QImode
);
7720 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
7721 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
7725 pair
= gen_reg_rtx (OImode
);
7726 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
7727 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
7733 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
7735 enum machine_mode vmode
= GET_MODE (target
);
7736 unsigned int i
, nelt
= GET_MODE_NUNITS (vmode
);
7737 bool one_vector_p
= rtx_equal_p (op0
, op1
);
7738 rtx rmask
[MAX_VECT_LEN
], mask
;
7740 gcc_checking_assert (!BYTES_BIG_ENDIAN
);
7742 /* The TBL instruction does not use a modulo index, so we must take care
7743 of that ourselves. */
7744 mask
= GEN_INT (one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7745 for (i
= 0; i
< nelt
; ++i
)
7747 mask
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rmask
));
7748 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
7750 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
7753 /* Recognize patterns suitable for the TRN instructions. */
7755 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
7757 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7758 rtx out
, in0
, in1
, x
;
7759 rtx (*gen
) (rtx
, rtx
, rtx
);
7760 enum machine_mode vmode
= d
->vmode
;
7762 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7765 /* Note that these are little-endian tests.
7766 We correct for big-endian later. */
7767 if (d
->perm
[0] == 0)
7769 else if (d
->perm
[0] == 1)
7773 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7775 for (i
= 0; i
< nelt
; i
+= 2)
7777 if (d
->perm
[i
] != i
+ odd
)
7779 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
7789 if (BYTES_BIG_ENDIAN
)
7791 x
= in0
, in0
= in1
, in1
= x
;
7800 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
7801 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
7802 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
7803 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
7804 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
7805 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
7806 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
7807 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
7808 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
7809 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
7818 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
7819 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
7820 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
7821 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
7822 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
7823 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
7824 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
7825 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
7826 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
7827 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
7833 emit_insn (gen (out
, in0
, in1
));
7837 /* Recognize patterns suitable for the UZP instructions. */
7839 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
7841 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
7842 rtx out
, in0
, in1
, x
;
7843 rtx (*gen
) (rtx
, rtx
, rtx
);
7844 enum machine_mode vmode
= d
->vmode
;
7846 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7849 /* Note that these are little-endian tests.
7850 We correct for big-endian later. */
7851 if (d
->perm
[0] == 0)
7853 else if (d
->perm
[0] == 1)
7857 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7859 for (i
= 0; i
< nelt
; i
++)
7861 unsigned elt
= (i
* 2 + odd
) & mask
;
7862 if (d
->perm
[i
] != elt
)
7872 if (BYTES_BIG_ENDIAN
)
7874 x
= in0
, in0
= in1
, in1
= x
;
7883 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
7884 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
7885 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
7886 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
7887 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
7888 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
7889 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
7890 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
7891 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
7892 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
7901 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
7902 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
7903 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
7904 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
7905 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
7906 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
7907 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
7908 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
7909 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
7910 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
7916 emit_insn (gen (out
, in0
, in1
));
7920 /* Recognize patterns suitable for the ZIP instructions. */
7922 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
7924 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
7925 rtx out
, in0
, in1
, x
;
7926 rtx (*gen
) (rtx
, rtx
, rtx
);
7927 enum machine_mode vmode
= d
->vmode
;
7929 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
7932 /* Note that these are little-endian tests.
7933 We correct for big-endian later. */
7935 if (d
->perm
[0] == high
)
7938 else if (d
->perm
[0] == 0)
7942 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
7944 for (i
= 0; i
< nelt
/ 2; i
++)
7946 unsigned elt
= (i
+ high
) & mask
;
7947 if (d
->perm
[i
* 2] != elt
)
7949 elt
= (elt
+ nelt
) & mask
;
7950 if (d
->perm
[i
* 2 + 1] != elt
)
7960 if (BYTES_BIG_ENDIAN
)
7962 x
= in0
, in0
= in1
, in1
= x
;
7971 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
7972 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
7973 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
7974 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
7975 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
7976 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
7977 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
7978 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
7979 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
7980 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
7989 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
7990 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
7991 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
7992 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
7993 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
7994 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
7995 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
7996 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
7997 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
7998 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
8004 emit_insn (gen (out
, in0
, in1
));
8009 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
8011 rtx (*gen
) (rtx
, rtx
, rtx
);
8012 rtx out
= d
->target
;
8014 enum machine_mode vmode
= d
->vmode
;
8015 unsigned int i
, elt
, nelt
= d
->nelt
;
8018 /* TODO: This may not be big-endian safe. */
8019 if (BYTES_BIG_ENDIAN
)
8023 for (i
= 1; i
< nelt
; i
++)
8025 if (elt
!= d
->perm
[i
])
8029 /* The generic preparation in aarch64_expand_vec_perm_const_1
8030 swaps the operand order and the permute indices if it finds
8031 d->perm[0] to be in the second operand. Thus, we can always
8032 use d->op0 and need not do any extra arithmetic to get the
8033 correct lane number. */
8035 lane
= GEN_INT (elt
);
8039 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
8040 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
8041 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
8042 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
8043 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
8044 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
8045 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
8046 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
8047 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
8048 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
8053 emit_insn (gen (out
, in0
, lane
));
8058 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
8060 rtx rperm
[MAX_VECT_LEN
], sel
;
8061 enum machine_mode vmode
= d
->vmode
;
8062 unsigned int i
, nelt
= d
->nelt
;
8064 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8065 numbering of elements for big-endian, we must reverse the order. */
8066 if (BYTES_BIG_ENDIAN
)
8072 /* Generic code will try constant permutation twice. Once with the
8073 original mode and again with the elements lowered to QImode.
8074 So wait and don't do the selector expansion ourselves. */
8075 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
8078 for (i
= 0; i
< nelt
; ++i
)
8079 rperm
[i
] = GEN_INT (d
->perm
[i
]);
8080 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
8081 sel
= force_reg (vmode
, sel
);
8083 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
8088 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
8090 /* The pattern matching functions above are written to look for a small
8091 number to begin the sequence (0, 1, N/2). If we begin with an index
8092 from the second operand, we can swap the operands. */
8093 if (d
->perm
[0] >= d
->nelt
)
8095 unsigned i
, nelt
= d
->nelt
;
8098 for (i
= 0; i
< nelt
; ++i
)
8099 d
->perm
[i
] = (d
->perm
[i
] + nelt
) & (2 * nelt
- 1);
8108 if (aarch64_evpc_zip (d
))
8110 else if (aarch64_evpc_uzp (d
))
8112 else if (aarch64_evpc_trn (d
))
8114 else if (aarch64_evpc_dup (d
))
8116 return aarch64_evpc_tbl (d
);
8121 /* Expand a vec_perm_const pattern. */
8124 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
8126 struct expand_vec_perm_d d
;
8133 d
.vmode
= GET_MODE (target
);
8134 gcc_assert (VECTOR_MODE_P (d
.vmode
));
8135 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8136 d
.testing_p
= false;
8138 for (i
= which
= 0; i
< nelt
; ++i
)
8140 rtx e
= XVECEXP (sel
, 0, i
);
8141 int ei
= INTVAL (e
) & (2 * nelt
- 1);
8142 which
|= (ei
< nelt
? 1 : 2);
8152 d
.one_vector_p
= false;
8153 if (!rtx_equal_p (op0
, op1
))
8156 /* The elements of PERM do not suggest that only the first operand
8157 is used, but both operands are identical. Allow easier matching
8158 of the permutation by folding the permutation into the single
8162 for (i
= 0; i
< nelt
; ++i
)
8163 d
.perm
[i
] &= nelt
- 1;
8165 d
.one_vector_p
= true;
8170 d
.one_vector_p
= true;
8174 return aarch64_expand_vec_perm_const_1 (&d
);
8178 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode
,
8179 const unsigned char *sel
)
8181 struct expand_vec_perm_d d
;
8182 unsigned int i
, nelt
, which
;
8186 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
8188 memcpy (d
.perm
, sel
, nelt
);
8190 /* Calculate whether all elements are in one vector. */
8191 for (i
= which
= 0; i
< nelt
; ++i
)
8193 unsigned char e
= d
.perm
[i
];
8194 gcc_assert (e
< 2 * nelt
);
8195 which
|= (e
< nelt
? 1 : 2);
8198 /* If all elements are from the second vector, reindex as if from the
8201 for (i
= 0; i
< nelt
; ++i
)
8204 /* Check whether the mask can be applied to a single vector. */
8205 d
.one_vector_p
= (which
!= 3);
8207 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
8208 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
8209 if (!d
.one_vector_p
)
8210 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
8213 ret
= aarch64_expand_vec_perm_const_1 (&d
);
8219 #undef TARGET_ADDRESS_COST
8220 #define TARGET_ADDRESS_COST aarch64_address_cost
8222 /* This hook will determines whether unnamed bitfields affect the alignment
8223 of the containing structure. The hook returns true if the structure
8224 should inherit the alignment requirements of an unnamed bitfield's
8226 #undef TARGET_ALIGN_ANON_BITFIELD
8227 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8229 #undef TARGET_ASM_ALIGNED_DI_OP
8230 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8232 #undef TARGET_ASM_ALIGNED_HI_OP
8233 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8235 #undef TARGET_ASM_ALIGNED_SI_OP
8236 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8238 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8239 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8240 hook_bool_const_tree_hwi_hwi_const_tree_true
8242 #undef TARGET_ASM_FILE_START
8243 #define TARGET_ASM_FILE_START aarch64_start_file
8245 #undef TARGET_ASM_OUTPUT_MI_THUNK
8246 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8248 #undef TARGET_ASM_SELECT_RTX_SECTION
8249 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8251 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8252 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8254 #undef TARGET_BUILD_BUILTIN_VA_LIST
8255 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8257 #undef TARGET_CALLEE_COPIES
8258 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8260 #undef TARGET_CAN_ELIMINATE
8261 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8263 #undef TARGET_CANNOT_FORCE_CONST_MEM
8264 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8266 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8267 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8269 /* Only the least significant bit is used for initialization guard
8271 #undef TARGET_CXX_GUARD_MASK_BIT
8272 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8274 #undef TARGET_C_MODE_FOR_SUFFIX
8275 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8277 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8278 #undef TARGET_DEFAULT_TARGET_FLAGS
8279 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8282 #undef TARGET_CLASS_MAX_NREGS
8283 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8285 #undef TARGET_BUILTIN_DECL
8286 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8288 #undef TARGET_EXPAND_BUILTIN
8289 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8291 #undef TARGET_EXPAND_BUILTIN_VA_START
8292 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8294 #undef TARGET_FOLD_BUILTIN
8295 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8297 #undef TARGET_FUNCTION_ARG
8298 #define TARGET_FUNCTION_ARG aarch64_function_arg
8300 #undef TARGET_FUNCTION_ARG_ADVANCE
8301 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8303 #undef TARGET_FUNCTION_ARG_BOUNDARY
8304 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8306 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8307 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8309 #undef TARGET_FUNCTION_VALUE
8310 #define TARGET_FUNCTION_VALUE aarch64_function_value
8312 #undef TARGET_FUNCTION_VALUE_REGNO_P
8313 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8315 #undef TARGET_FRAME_POINTER_REQUIRED
8316 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8318 #undef TARGET_GIMPLE_FOLD_BUILTIN
8319 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8321 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8322 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8324 #undef TARGET_INIT_BUILTINS
8325 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8327 #undef TARGET_LEGITIMATE_ADDRESS_P
8328 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8330 #undef TARGET_LEGITIMATE_CONSTANT_P
8331 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8333 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8334 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8337 #define TARGET_LRA_P aarch64_lra_p
8339 #undef TARGET_MANGLE_TYPE
8340 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8342 #undef TARGET_MEMORY_MOVE_COST
8343 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8345 #undef TARGET_MUST_PASS_IN_STACK
8346 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8348 /* This target hook should return true if accesses to volatile bitfields
8349 should use the narrowest mode possible. It should return false if these
8350 accesses should use the bitfield container type. */
8351 #undef TARGET_NARROW_VOLATILE_BITFIELD
8352 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8354 #undef TARGET_OPTION_OVERRIDE
8355 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8357 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8358 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8359 aarch64_override_options_after_change
8361 #undef TARGET_PASS_BY_REFERENCE
8362 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8364 #undef TARGET_PREFERRED_RELOAD_CLASS
8365 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8367 #undef TARGET_SECONDARY_RELOAD
8368 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8370 #undef TARGET_SHIFT_TRUNCATION_MASK
8371 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8373 #undef TARGET_SETUP_INCOMING_VARARGS
8374 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8376 #undef TARGET_STRUCT_VALUE_RTX
8377 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8379 #undef TARGET_REGISTER_MOVE_COST
8380 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8382 #undef TARGET_RETURN_IN_MEMORY
8383 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8385 #undef TARGET_RETURN_IN_MSB
8386 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8388 #undef TARGET_RTX_COSTS
8389 #define TARGET_RTX_COSTS aarch64_rtx_costs
8391 #undef TARGET_TRAMPOLINE_INIT
8392 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8394 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8395 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8397 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8398 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8400 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8401 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8403 #undef TARGET_VECTORIZE_ADD_STMT_COST
8404 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8406 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8407 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8408 aarch64_builtin_vectorization_cost
8410 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8411 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8413 #undef TARGET_VECTORIZE_BUILTINS
8414 #define TARGET_VECTORIZE_BUILTINS
8416 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8417 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8418 aarch64_builtin_vectorized_function
8420 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8421 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8422 aarch64_autovectorize_vector_sizes
8424 /* Section anchor support. */
8426 #undef TARGET_MIN_ANCHOR_OFFSET
8427 #define TARGET_MIN_ANCHOR_OFFSET -256
8429 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8430 byte offset; we can do much more for larger data types, but have no way
8431 to determine the size of the access. We assume accesses are aligned. */
8432 #undef TARGET_MAX_ANCHOR_OFFSET
8433 #define TARGET_MAX_ANCHOR_OFFSET 4095
8435 #undef TARGET_VECTOR_ALIGNMENT
8436 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8438 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8439 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8440 aarch64_simd_vector_alignment_reachable
8442 /* vec_perm support. */
8444 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8445 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8446 aarch64_vectorize_vec_perm_const_ok
8449 #undef TARGET_FIXED_CONDITION_CODE_REGS
8450 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8452 struct gcc_target targetm
= TARGET_INITIALIZER
;
8454 #include "gt-aarch64.h"