1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "double-int.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "stor-layout.h"
44 #include "dominance.h"
50 #include "cfgcleanup.h"
52 #include "basic-block.h"
54 #include "hard-reg-set.h"
59 #include "statistics.h"
61 #include "fixed-value.h"
62 #include "insn-config.h"
72 #include "target-def.h"
73 #include "targhooks.h"
77 #include "langhooks.h"
78 #include "diagnostic-core.h"
79 #include "hash-table.h"
80 #include "tree-ssa-alias.h"
81 #include "internal-fn.h"
82 #include "gimple-fold.h"
84 #include "gimple-expr.h"
91 #include "tree-vectorizer.h"
92 #include "aarch64-cost-tables.h"
96 #include "tm-constrs.h"
97 #include "sched-int.h"
98 #include "cortex-a57-fma-steering.h"
100 /* Defined for convenience. */
101 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
103 /* Classifies an address.
106 A simple base register plus immediate offset.
109 A base register indexed by immediate offset with writeback.
112 A base register indexed by (optionally scaled) register.
115 A base register indexed by (optionally scaled) zero-extended register.
118 A base register indexed by (optionally scaled) sign-extended register.
121 A LO_SUM rtx with a base register and "LO12" symbol relocation.
124 A constant symbolic address, in pc-relative literal pool. */
126 enum aarch64_address_type
{
136 struct aarch64_address_info
{
137 enum aarch64_address_type type
;
141 enum aarch64_symbol_type symbol_type
;
144 struct simd_immediate_info
153 /* The current code model. */
154 enum aarch64_code_model aarch64_cmodel
;
157 #undef TARGET_HAVE_TLS
158 #define TARGET_HAVE_TLS 1
161 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
162 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
164 machine_mode
*, int *,
166 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
167 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
168 static void aarch64_override_options_after_change (void);
169 static bool aarch64_vector_mode_supported_p (machine_mode
);
170 static unsigned bit_count (unsigned HOST_WIDE_INT
);
171 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
172 const unsigned char *sel
);
173 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
175 /* Major revision number of the ARM Architecture implemented by the target. */
176 unsigned aarch64_architecture_version
;
178 /* The processor for which instructions should be scheduled. */
179 enum aarch64_processor aarch64_tune
= cortexa53
;
181 /* The current tuning set. */
182 const struct tune_params
*aarch64_tune_params
;
184 /* Mask to specify which instructions we are allowed to generate. */
185 unsigned long aarch64_isa_flags
= 0;
187 /* Mask to specify which instruction scheduling options should be used. */
188 unsigned long aarch64_tune_flags
= 0;
190 /* Tuning parameters. */
192 static const struct cpu_addrcost_table generic_addrcost_table
=
202 0, /* register_offset */
203 0, /* register_extend */
207 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
217 0, /* register_offset */
218 0, /* register_extend */
222 static const struct cpu_addrcost_table xgene1_addrcost_table
=
232 0, /* register_offset */
233 1, /* register_extend */
237 static const struct cpu_regmove_cost generic_regmove_cost
=
240 /* Avoid the use of slow int<->fp moves for spilling by setting
241 their cost higher than memmov_cost. */
247 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
250 /* Avoid the use of slow int<->fp moves for spilling by setting
251 their cost higher than memmov_cost. */
257 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
260 /* Avoid the use of slow int<->fp moves for spilling by setting
261 their cost higher than memmov_cost. */
267 static const struct cpu_regmove_cost thunderx_regmove_cost
=
275 static const struct cpu_regmove_cost xgene1_regmove_cost
=
278 /* Avoid the use of slow int<->fp moves for spilling by setting
279 their cost higher than memmov_cost. */
285 /* Generic costs for vector insn classes. */
286 static const struct cpu_vector_cost generic_vector_cost
=
288 1, /* scalar_stmt_cost */
289 1, /* scalar_load_cost */
290 1, /* scalar_store_cost */
291 1, /* vec_stmt_cost */
292 1, /* vec_to_scalar_cost */
293 1, /* scalar_to_vec_cost */
294 1, /* vec_align_load_cost */
295 1, /* vec_unalign_load_cost */
296 1, /* vec_unalign_store_cost */
297 1, /* vec_store_cost */
298 3, /* cond_taken_branch_cost */
299 1 /* cond_not_taken_branch_cost */
302 /* Generic costs for vector insn classes. */
303 static const struct cpu_vector_cost cortexa57_vector_cost
=
305 1, /* scalar_stmt_cost */
306 4, /* scalar_load_cost */
307 1, /* scalar_store_cost */
308 3, /* vec_stmt_cost */
309 8, /* vec_to_scalar_cost */
310 8, /* scalar_to_vec_cost */
311 5, /* vec_align_load_cost */
312 5, /* vec_unalign_load_cost */
313 1, /* vec_unalign_store_cost */
314 1, /* vec_store_cost */
315 1, /* cond_taken_branch_cost */
316 1 /* cond_not_taken_branch_cost */
319 /* Generic costs for vector insn classes. */
320 static const struct cpu_vector_cost xgene1_vector_cost
=
322 1, /* scalar_stmt_cost */
323 5, /* scalar_load_cost */
324 1, /* scalar_store_cost */
325 2, /* vec_stmt_cost */
326 4, /* vec_to_scalar_cost */
327 4, /* scalar_to_vec_cost */
328 10, /* vec_align_load_cost */
329 10, /* vec_unalign_load_cost */
330 2, /* vec_unalign_store_cost */
331 2, /* vec_store_cost */
332 2, /* cond_taken_branch_cost */
333 1 /* cond_not_taken_branch_cost */
336 #define AARCH64_FUSE_NOTHING (0)
337 #define AARCH64_FUSE_MOV_MOVK (1 << 0)
338 #define AARCH64_FUSE_ADRP_ADD (1 << 1)
339 #define AARCH64_FUSE_MOVK_MOVK (1 << 2)
340 #define AARCH64_FUSE_ADRP_LDR (1 << 3)
341 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
343 static const struct tune_params generic_tunings
=
345 &cortexa57_extra_costs
,
346 &generic_addrcost_table
,
347 &generic_regmove_cost
,
348 &generic_vector_cost
,
351 AARCH64_FUSE_NOTHING
, /* fuseable_ops */
352 8, /* function_align. */
355 2, /* int_reassoc_width. */
356 4, /* fp_reassoc_width. */
357 1 /* vec_reassoc_width. */
360 static const struct tune_params cortexa53_tunings
=
362 &cortexa53_extra_costs
,
363 &generic_addrcost_table
,
364 &cortexa53_regmove_cost
,
365 &generic_vector_cost
,
368 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
369 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fuseable_ops */
370 8, /* function_align. */
373 2, /* int_reassoc_width. */
374 4, /* fp_reassoc_width. */
375 1 /* vec_reassoc_width. */
378 static const struct tune_params cortexa57_tunings
=
380 &cortexa57_extra_costs
,
381 &cortexa57_addrcost_table
,
382 &cortexa57_regmove_cost
,
383 &cortexa57_vector_cost
,
386 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
387 | AARCH64_FUSE_MOVK_MOVK
), /* fuseable_ops */
388 16, /* function_align. */
391 2, /* int_reassoc_width. */
392 4, /* fp_reassoc_width. */
393 1 /* vec_reassoc_width. */
396 static const struct tune_params thunderx_tunings
=
398 &thunderx_extra_costs
,
399 &generic_addrcost_table
,
400 &thunderx_regmove_cost
,
401 &generic_vector_cost
,
404 AARCH64_FUSE_CMP_BRANCH
, /* fuseable_ops */
405 8, /* function_align. */
408 2, /* int_reassoc_width. */
409 4, /* fp_reassoc_width. */
410 1 /* vec_reassoc_width. */
413 static const struct tune_params xgene1_tunings
=
416 &xgene1_addrcost_table
,
417 &xgene1_regmove_cost
,
421 AARCH64_FUSE_NOTHING
, /* fuseable_ops */
422 16, /* function_align. */
424 16, /* loop_align. */
425 2, /* int_reassoc_width. */
426 4, /* fp_reassoc_width. */
427 1 /* vec_reassoc_width. */
430 /* A processor implementing AArch64. */
433 const char *const name
;
434 enum aarch64_processor core
;
436 unsigned architecture_version
;
437 const unsigned long flags
;
438 const struct tune_params
*const tune
;
441 /* Processor cores implementing AArch64. */
442 static const struct processor all_cores
[] =
444 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
445 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
446 #include "aarch64-cores.def"
448 {"generic", cortexa53
, "8", 8, AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
449 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
452 /* Architectures implementing AArch64. */
453 static const struct processor all_architectures
[] =
455 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
456 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
457 #include "aarch64-arches.def"
459 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
462 /* Target specification. These are populated as commandline arguments
463 are processed, or NULL if not specified. */
464 static const struct processor
*selected_arch
;
465 static const struct processor
*selected_cpu
;
466 static const struct processor
*selected_tune
;
468 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
470 /* An ISA extension in the co-processor and main instruction set space. */
471 struct aarch64_option_extension
473 const char *const name
;
474 const unsigned long flags_on
;
475 const unsigned long flags_off
;
478 /* ISA extensions in AArch64. */
479 static const struct aarch64_option_extension all_extensions
[] =
481 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
482 {NAME, FLAGS_ON, FLAGS_OFF},
483 #include "aarch64-option-extensions.def"
484 #undef AARCH64_OPT_EXTENSION
488 /* Used to track the size of an address when generating a pre/post
489 increment address. */
490 static machine_mode aarch64_memory_reference_mode
;
492 /* A table of valid AArch64 "bitmask immediate" values for
493 logical instructions. */
495 #define AARCH64_NUM_BITMASKS 5334
496 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
498 typedef enum aarch64_cond_code
500 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
501 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
502 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
506 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
508 /* The condition codes of the processor, and the inverse function. */
509 static const char * const aarch64_condition_codes
[] =
511 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
512 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
516 aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED
)
522 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
523 enum machine_mode mode
)
525 if (VECTOR_MODE_P (mode
))
526 return aarch64_tune_params
->vec_reassoc_width
;
527 if (INTEGRAL_MODE_P (mode
))
528 return aarch64_tune_params
->int_reassoc_width
;
529 if (FLOAT_MODE_P (mode
))
530 return aarch64_tune_params
->fp_reassoc_width
;
534 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
536 aarch64_dbx_register_number (unsigned regno
)
538 if (GP_REGNUM_P (regno
))
539 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
540 else if (regno
== SP_REGNUM
)
541 return AARCH64_DWARF_SP
;
542 else if (FP_REGNUM_P (regno
))
543 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
545 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
546 equivalent DWARF register. */
547 return DWARF_FRAME_REGISTERS
;
550 /* Return TRUE if MODE is any of the large INT modes. */
552 aarch64_vect_struct_mode_p (machine_mode mode
)
554 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
557 /* Return TRUE if MODE is any of the vector modes. */
559 aarch64_vector_mode_p (machine_mode mode
)
561 return aarch64_vector_mode_supported_p (mode
)
562 || aarch64_vect_struct_mode_p (mode
);
565 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
567 aarch64_array_mode_supported_p (machine_mode mode
,
568 unsigned HOST_WIDE_INT nelems
)
571 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
572 && (nelems
>= 2 && nelems
<= 4))
578 /* Implement HARD_REGNO_NREGS. */
581 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
583 switch (aarch64_regno_regclass (regno
))
587 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
589 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
594 /* Implement HARD_REGNO_MODE_OK. */
597 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
599 if (GET_MODE_CLASS (mode
) == MODE_CC
)
600 return regno
== CC_REGNUM
;
602 if (regno
== SP_REGNUM
)
603 /* The purpose of comparing with ptr_mode is to support the
604 global register variable associated with the stack pointer
605 register via the syntax of asm ("wsp") in ILP32. */
606 return mode
== Pmode
|| mode
== ptr_mode
;
608 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
609 return mode
== Pmode
;
611 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
614 if (FP_REGNUM_P (regno
))
616 if (aarch64_vect_struct_mode_p (mode
))
618 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
626 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
628 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
631 /* Handle modes that fit within single registers. */
632 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
634 if (GET_MODE_SIZE (mode
) >= 4)
639 /* Fall back to generic for multi-reg and very large modes. */
641 return choose_hard_reg_mode (regno
, nregs
, false);
644 /* Return true if calls to DECL should be treated as
645 long-calls (ie called via a register). */
647 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
652 /* Return true if calls to symbol-ref SYM should be treated as
653 long-calls (ie called via a register). */
655 aarch64_is_long_call_p (rtx sym
)
657 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
660 /* Return true if the offsets to a zero/sign-extract operation
661 represent an expression that matches an extend operation. The
662 operands represent the paramters from
664 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
666 aarch64_is_extend_from_extract (machine_mode mode
, rtx mult_imm
,
669 HOST_WIDE_INT mult_val
, extract_val
;
671 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
674 mult_val
= INTVAL (mult_imm
);
675 extract_val
= INTVAL (extract_imm
);
678 && extract_val
< GET_MODE_BITSIZE (mode
)
679 && exact_log2 (extract_val
& ~7) > 0
680 && (extract_val
& 7) <= 4
681 && mult_val
== (1 << (extract_val
& 7)))
687 /* Emit an insn that's a simple single-set. Both the operands must be
688 known to be valid. */
690 emit_set_insn (rtx x
, rtx y
)
692 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
695 /* X and Y are two things to compare using CODE. Emit the compare insn and
696 return the rtx for register 0 in the proper mode. */
698 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
700 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
701 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
703 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
707 /* Build the SYMBOL_REF for __tls_get_addr. */
709 static GTY(()) rtx tls_get_addr_libfunc
;
712 aarch64_tls_get_addr (void)
714 if (!tls_get_addr_libfunc
)
715 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
716 return tls_get_addr_libfunc
;
719 /* Return the TLS model to use for ADDR. */
721 static enum tls_model
722 tls_symbolic_operand_type (rtx addr
)
724 enum tls_model tls_kind
= TLS_MODEL_NONE
;
727 if (GET_CODE (addr
) == CONST
)
729 split_const (addr
, &sym
, &addend
);
730 if (GET_CODE (sym
) == SYMBOL_REF
)
731 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
733 else if (GET_CODE (addr
) == SYMBOL_REF
)
734 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
739 /* We'll allow lo_sum's in addresses in our legitimate addresses
740 so that combine would take care of combining addresses where
741 necessary, but for generation purposes, we'll generate the address
744 tmp = hi (symbol_ref); adrp x1, foo
745 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
749 adrp x1, :got:foo adrp tmp, :tlsgd:foo
750 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
754 Load TLS symbol, depending on TLS mechanism and TLS access model.
756 Global Dynamic - Traditional TLS:
758 add dest, tmp, #:tlsgd_lo12:imm
761 Global Dynamic - TLS Descriptors:
762 adrp dest, :tlsdesc:imm
763 ldr tmp, [dest, #:tlsdesc_lo12:imm]
764 add dest, dest, #:tlsdesc_lo12:imm
771 adrp tmp, :gottprel:imm
772 ldr dest, [tmp, #:gottprel_lo12:imm]
777 add t0, tp, #:tprel_hi12:imm, lsl #12
778 add t0, t0, #:tprel_lo12_nc:imm
782 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
783 enum aarch64_symbol_type type
)
787 case SYMBOL_SMALL_ABSOLUTE
:
789 /* In ILP32, the mode of dest can be either SImode or DImode. */
791 machine_mode mode
= GET_MODE (dest
);
793 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
795 if (can_create_pseudo_p ())
796 tmp_reg
= gen_reg_rtx (mode
);
798 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
799 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
803 case SYMBOL_TINY_ABSOLUTE
:
804 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
807 case SYMBOL_SMALL_GOT
:
809 /* In ILP32, the mode of dest can be either SImode or DImode,
810 while the got entry is always of SImode size. The mode of
811 dest depends on how dest is used: if dest is assigned to a
812 pointer (e.g. in the memory), it has SImode; it may have
813 DImode if dest is dereferenced to access the memeory.
814 This is why we have to handle three different ldr_got_small
815 patterns here (two patterns for ILP32). */
817 machine_mode mode
= GET_MODE (dest
);
819 if (can_create_pseudo_p ())
820 tmp_reg
= gen_reg_rtx (mode
);
822 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
823 if (mode
== ptr_mode
)
826 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
828 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
832 gcc_assert (mode
== Pmode
);
833 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
839 case SYMBOL_SMALL_TLSGD
:
842 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
845 aarch64_emit_call_insn (gen_tlsgd_small (result
, imm
));
846 insns
= get_insns ();
849 RTL_CONST_CALL_P (insns
) = 1;
850 emit_libcall_block (insns
, dest
, result
, imm
);
854 case SYMBOL_SMALL_TLSDESC
:
856 machine_mode mode
= GET_MODE (dest
);
857 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
860 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
862 /* In ILP32, the got entry is always of SImode size. Unlike
863 small GOT, the dest is fixed at reg 0. */
865 emit_insn (gen_tlsdesc_small_si (imm
));
867 emit_insn (gen_tlsdesc_small_di (imm
));
868 tp
= aarch64_load_tp (NULL
);
871 tp
= gen_lowpart (mode
, tp
);
873 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
874 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
878 case SYMBOL_SMALL_GOTTPREL
:
880 /* In ILP32, the mode of dest can be either SImode or DImode,
881 while the got entry is always of SImode size. The mode of
882 dest depends on how dest is used: if dest is assigned to a
883 pointer (e.g. in the memory), it has SImode; it may have
884 DImode if dest is dereferenced to access the memeory.
885 This is why we have to handle three different tlsie_small
886 patterns here (two patterns for ILP32). */
887 machine_mode mode
= GET_MODE (dest
);
888 rtx tmp_reg
= gen_reg_rtx (mode
);
889 rtx tp
= aarch64_load_tp (NULL
);
891 if (mode
== ptr_mode
)
894 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
897 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
898 tp
= gen_lowpart (mode
, tp
);
903 gcc_assert (mode
== Pmode
);
904 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
907 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
908 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
912 case SYMBOL_SMALL_TPREL
:
914 rtx tp
= aarch64_load_tp (NULL
);
916 if (GET_MODE (dest
) != Pmode
)
917 tp
= gen_lowpart (GET_MODE (dest
), tp
);
919 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
920 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
924 case SYMBOL_TINY_GOT
:
925 emit_insn (gen_ldr_got_tiny (dest
, imm
));
933 /* Emit a move from SRC to DEST. Assume that the move expanders can
934 handle all moves if !can_create_pseudo_p (). The distinction is
935 important because, unlike emit_move_insn, the move expanders know
936 how to force Pmode objects into the constant pool even when the
937 constant pool address is not itself legitimate. */
939 aarch64_emit_move (rtx dest
, rtx src
)
941 return (can_create_pseudo_p ()
942 ? emit_move_insn (dest
, src
)
943 : emit_move_insn_1 (dest
, src
));
946 /* Split a 128-bit move operation into two 64-bit move operations,
947 taking care to handle partial overlap of register to register
948 copies. Special cases are needed when moving between GP regs and
949 FP regs. SRC can be a register, constant or memory; DST a register
950 or memory. If either operand is memory it must not have any side
953 aarch64_split_128bit_move (rtx dst
, rtx src
)
958 machine_mode mode
= GET_MODE (dst
);
960 gcc_assert (mode
== TImode
|| mode
== TFmode
);
961 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
962 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
964 if (REG_P (dst
) && REG_P (src
))
966 int src_regno
= REGNO (src
);
967 int dst_regno
= REGNO (dst
);
969 /* Handle FP <-> GP regs. */
970 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
972 src_lo
= gen_lowpart (word_mode
, src
);
973 src_hi
= gen_highpart (word_mode
, src
);
977 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
978 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
982 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
983 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
987 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
989 dst_lo
= gen_lowpart (word_mode
, dst
);
990 dst_hi
= gen_highpart (word_mode
, dst
);
994 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
995 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
999 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
1000 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
1006 dst_lo
= gen_lowpart (word_mode
, dst
);
1007 dst_hi
= gen_highpart (word_mode
, dst
);
1008 src_lo
= gen_lowpart (word_mode
, src
);
1009 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
1011 /* At most one pairing may overlap. */
1012 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
1014 aarch64_emit_move (dst_hi
, src_hi
);
1015 aarch64_emit_move (dst_lo
, src_lo
);
1019 aarch64_emit_move (dst_lo
, src_lo
);
1020 aarch64_emit_move (dst_hi
, src_hi
);
1025 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
1027 return (! REG_P (src
)
1028 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
1031 /* Split a complex SIMD combine. */
1034 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
1036 machine_mode src_mode
= GET_MODE (src1
);
1037 machine_mode dst_mode
= GET_MODE (dst
);
1039 gcc_assert (VECTOR_MODE_P (dst_mode
));
1041 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
1043 rtx (*gen
) (rtx
, rtx
, rtx
);
1048 gen
= gen_aarch64_simd_combinev8qi
;
1051 gen
= gen_aarch64_simd_combinev4hi
;
1054 gen
= gen_aarch64_simd_combinev2si
;
1057 gen
= gen_aarch64_simd_combinev2sf
;
1060 gen
= gen_aarch64_simd_combinedi
;
1063 gen
= gen_aarch64_simd_combinedf
;
1069 emit_insn (gen (dst
, src1
, src2
));
1074 /* Split a complex SIMD move. */
1077 aarch64_split_simd_move (rtx dst
, rtx src
)
1079 machine_mode src_mode
= GET_MODE (src
);
1080 machine_mode dst_mode
= GET_MODE (dst
);
1082 gcc_assert (VECTOR_MODE_P (dst_mode
));
1084 if (REG_P (dst
) && REG_P (src
))
1086 rtx (*gen
) (rtx
, rtx
);
1088 gcc_assert (VECTOR_MODE_P (src_mode
));
1093 gen
= gen_aarch64_split_simd_movv16qi
;
1096 gen
= gen_aarch64_split_simd_movv8hi
;
1099 gen
= gen_aarch64_split_simd_movv4si
;
1102 gen
= gen_aarch64_split_simd_movv2di
;
1105 gen
= gen_aarch64_split_simd_movv4sf
;
1108 gen
= gen_aarch64_split_simd_movv2df
;
1114 emit_insn (gen (dst
, src
));
1120 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1122 if (can_create_pseudo_p ())
1123 return force_reg (mode
, value
);
1126 x
= aarch64_emit_move (x
, value
);
1133 aarch64_add_offset (machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
1135 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1138 /* Load the full offset into a register. This
1139 might be improvable in the future. */
1140 high
= GEN_INT (offset
);
1142 high
= aarch64_force_temporary (mode
, temp
, high
);
1143 reg
= aarch64_force_temporary (mode
, temp
,
1144 gen_rtx_PLUS (mode
, high
, reg
));
1146 return plus_constant (mode
, reg
, offset
);
1150 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1153 unsigned HOST_WIDE_INT mask
;
1156 unsigned HOST_WIDE_INT val
;
1159 int one_match
, zero_match
, first_not_ffff_match
;
1162 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1165 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1172 /* We know we can't do this in 1 insn, and we must be able to do it
1173 in two; so don't mess around looking for sequences that don't buy
1177 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1178 GEN_INT (INTVAL (imm
) & 0xffff)));
1179 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1180 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1186 /* Remaining cases are all for DImode. */
1189 subtargets
= optimize
&& can_create_pseudo_p ();
1194 first_not_ffff_match
= -1;
1196 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1198 if ((val
& mask
) == mask
)
1202 if (first_not_ffff_match
< 0)
1203 first_not_ffff_match
= i
;
1204 if ((val
& mask
) == 0)
1211 /* Set one of the quarters and then insert back into result. */
1212 mask
= 0xffffll
<< first_not_ffff_match
;
1215 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1216 emit_insn (gen_insv_immdi (dest
, GEN_INT (first_not_ffff_match
),
1217 GEN_INT ((val
>> first_not_ffff_match
)
1224 if (zero_match
== 2)
1225 goto simple_sequence
;
1227 mask
= 0x0ffff0000UL
;
1228 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1230 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1232 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1236 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1237 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1238 GEN_INT (val
& mask
)));
1239 emit_insn (gen_adddi3 (dest
, subtarget
,
1240 GEN_INT (val
- (val
& mask
))));
1245 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1249 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1250 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1251 GEN_INT ((val
+ comp
) & mask
)));
1252 emit_insn (gen_adddi3 (dest
, subtarget
,
1253 GEN_INT (val
- ((val
+ comp
) & mask
))));
1258 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1262 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1263 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1264 GEN_INT ((val
- comp
) | ~mask
)));
1265 emit_insn (gen_adddi3 (dest
, subtarget
,
1266 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1271 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1275 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1276 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1277 GEN_INT (val
| ~mask
)));
1278 emit_insn (gen_adddi3 (dest
, subtarget
,
1279 GEN_INT (val
- (val
| ~mask
))));
1286 /* See if we can do it by arithmetically combining two
1288 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1293 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1294 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1298 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1299 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1300 GEN_INT (aarch64_bitmasks
[i
])));
1301 emit_insn (gen_adddi3 (dest
, subtarget
,
1302 GEN_INT (val
- aarch64_bitmasks
[i
])));
1308 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1310 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1314 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1315 GEN_INT (aarch64_bitmasks
[i
])));
1316 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1317 GEN_INT ((val
>> j
) & 0xffff)));
1325 /* See if we can do it by logically combining two immediates. */
1326 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1328 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1332 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1333 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1337 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1338 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1339 GEN_INT (aarch64_bitmasks
[i
])));
1340 emit_insn (gen_iordi3 (dest
, subtarget
,
1341 GEN_INT (aarch64_bitmasks
[j
])));
1347 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1351 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1352 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1356 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1357 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1358 GEN_INT (aarch64_bitmasks
[j
])));
1359 emit_insn (gen_anddi3 (dest
, subtarget
,
1360 GEN_INT (aarch64_bitmasks
[i
])));
1368 if (one_match
> zero_match
)
1370 /* Set either first three quarters or all but the third. */
1371 mask
= 0xffffll
<< (16 - first_not_ffff_match
);
1373 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1374 GEN_INT (val
| mask
| 0xffffffff00000000ull
)));
1377 /* Now insert other two quarters. */
1378 for (i
= first_not_ffff_match
+ 16, mask
<<= (first_not_ffff_match
<< 1);
1379 i
< 64; i
+= 16, mask
<<= 16)
1381 if ((val
& mask
) != mask
)
1384 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1385 GEN_INT ((val
>> i
) & 0xffff)));
1395 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1397 if ((val
& mask
) != 0)
1402 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1403 GEN_INT (val
& mask
)));
1410 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1411 GEN_INT ((val
>> i
) & 0xffff)));
1422 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1424 machine_mode mode
= GET_MODE (dest
);
1426 gcc_assert (mode
== SImode
|| mode
== DImode
);
1428 /* Check on what type of symbol it is. */
1429 if (GET_CODE (imm
) == SYMBOL_REF
1430 || GET_CODE (imm
) == LABEL_REF
1431 || GET_CODE (imm
) == CONST
)
1433 rtx mem
, base
, offset
;
1434 enum aarch64_symbol_type sty
;
1436 /* If we have (const (plus symbol offset)), separate out the offset
1437 before we start classifying the symbol. */
1438 split_const (imm
, &base
, &offset
);
1440 sty
= aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
);
1443 case SYMBOL_FORCE_TO_MEM
:
1444 if (offset
!= const0_rtx
1445 && targetm
.cannot_force_const_mem (mode
, imm
))
1447 gcc_assert (can_create_pseudo_p ());
1448 base
= aarch64_force_temporary (mode
, dest
, base
);
1449 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1450 aarch64_emit_move (dest
, base
);
1453 mem
= force_const_mem (ptr_mode
, imm
);
1455 if (mode
!= ptr_mode
)
1456 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1457 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1460 case SYMBOL_SMALL_TLSGD
:
1461 case SYMBOL_SMALL_TLSDESC
:
1462 case SYMBOL_SMALL_GOTTPREL
:
1463 case SYMBOL_SMALL_GOT
:
1464 case SYMBOL_TINY_GOT
:
1465 if (offset
!= const0_rtx
)
1467 gcc_assert(can_create_pseudo_p ());
1468 base
= aarch64_force_temporary (mode
, dest
, base
);
1469 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1470 aarch64_emit_move (dest
, base
);
1475 case SYMBOL_SMALL_TPREL
:
1476 case SYMBOL_SMALL_ABSOLUTE
:
1477 case SYMBOL_TINY_ABSOLUTE
:
1478 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1486 if (!CONST_INT_P (imm
))
1488 if (GET_CODE (imm
) == HIGH
)
1489 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1492 rtx mem
= force_const_mem (mode
, imm
);
1494 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1500 aarch64_internal_mov_immediate (dest
, imm
, true, GET_MODE (dest
));
1504 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
1505 tree exp ATTRIBUTE_UNUSED
)
1507 /* Currently, always true. */
1511 /* Implement TARGET_PASS_BY_REFERENCE. */
1514 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1517 bool named ATTRIBUTE_UNUSED
)
1520 machine_mode dummymode
;
1523 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1524 size
= (mode
== BLKmode
&& type
)
1525 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1527 /* Aggregates are passed by reference based on their size. */
1528 if (type
&& AGGREGATE_TYPE_P (type
))
1530 size
= int_size_in_bytes (type
);
1533 /* Variable sized arguments are always returned by reference. */
1537 /* Can this be a candidate to be passed in fp/simd register(s)? */
1538 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1543 /* Arguments which are variable sized or larger than 2 registers are
1544 passed by reference unless they are a homogenous floating point
1546 return size
> 2 * UNITS_PER_WORD
;
1549 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1551 aarch64_return_in_msb (const_tree valtype
)
1553 machine_mode dummy_mode
;
1556 /* Never happens in little-endian mode. */
1557 if (!BYTES_BIG_ENDIAN
)
1560 /* Only composite types smaller than or equal to 16 bytes can
1561 be potentially returned in registers. */
1562 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1563 || int_size_in_bytes (valtype
) <= 0
1564 || int_size_in_bytes (valtype
) > 16)
1567 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1568 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1569 is always passed/returned in the least significant bits of fp/simd
1571 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1572 &dummy_mode
, &dummy_int
, NULL
))
1578 /* Implement TARGET_FUNCTION_VALUE.
1579 Define how to find the value returned by a function. */
1582 aarch64_function_value (const_tree type
, const_tree func
,
1583 bool outgoing ATTRIBUTE_UNUSED
)
1588 machine_mode ag_mode
;
1590 mode
= TYPE_MODE (type
);
1591 if (INTEGRAL_TYPE_P (type
))
1592 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1594 if (aarch64_return_in_msb (type
))
1596 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1598 if (size
% UNITS_PER_WORD
!= 0)
1600 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1601 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1605 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1606 &ag_mode
, &count
, NULL
))
1608 if (!aarch64_composite_type_p (type
, mode
))
1610 gcc_assert (count
== 1 && mode
== ag_mode
);
1611 return gen_rtx_REG (mode
, V0_REGNUM
);
1618 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1619 for (i
= 0; i
< count
; i
++)
1621 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1622 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1623 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1624 XVECEXP (par
, 0, i
) = tmp
;
1630 return gen_rtx_REG (mode
, R0_REGNUM
);
1633 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1634 Return true if REGNO is the number of a hard register in which the values
1635 of called function may come back. */
1638 aarch64_function_value_regno_p (const unsigned int regno
)
1640 /* Maximum of 16 bytes can be returned in the general registers. Examples
1641 of 16-byte return values are: 128-bit integers and 16-byte small
1642 structures (excluding homogeneous floating-point aggregates). */
1643 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1646 /* Up to four fp/simd registers can return a function value, e.g. a
1647 homogeneous floating-point aggregate having four members. */
1648 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1649 return !TARGET_GENERAL_REGS_ONLY
;
1654 /* Implement TARGET_RETURN_IN_MEMORY.
1656 If the type T of the result of a function is such that
1658 would require that arg be passed as a value in a register (or set of
1659 registers) according to the parameter passing rules, then the result
1660 is returned in the same registers as would be used for such an
1664 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1667 machine_mode ag_mode
;
1670 if (!AGGREGATE_TYPE_P (type
)
1671 && TREE_CODE (type
) != COMPLEX_TYPE
1672 && TREE_CODE (type
) != VECTOR_TYPE
)
1673 /* Simple scalar types always returned in registers. */
1676 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1683 /* Types larger than 2 registers returned in memory. */
1684 size
= int_size_in_bytes (type
);
1685 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1689 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
1690 const_tree type
, int *nregs
)
1692 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1693 return aarch64_vfp_is_call_or_return_candidate (mode
,
1695 &pcum
->aapcs_vfp_rmode
,
1700 /* Given MODE and TYPE of a function argument, return the alignment in
1701 bits. The idea is to suppress any stronger alignment requested by
1702 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1703 This is a helper function for local use only. */
1706 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
1708 unsigned int alignment
;
1712 if (!integer_zerop (TYPE_SIZE (type
)))
1714 if (TYPE_MODE (type
) == mode
)
1715 alignment
= TYPE_ALIGN (type
);
1717 alignment
= GET_MODE_ALIGNMENT (mode
);
1723 alignment
= GET_MODE_ALIGNMENT (mode
);
1728 /* Layout a function argument according to the AAPCS64 rules. The rule
1729 numbers refer to the rule numbers in the AAPCS64. */
1732 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1734 bool named ATTRIBUTE_UNUSED
)
1736 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1737 int ncrn
, nvrn
, nregs
;
1738 bool allocate_ncrn
, allocate_nvrn
;
1741 /* We need to do this once per argument. */
1742 if (pcum
->aapcs_arg_processed
)
1745 pcum
->aapcs_arg_processed
= true;
1747 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1749 = AARCH64_ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1752 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1753 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1758 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1759 The following code thus handles passing by SIMD/FP registers first. */
1761 nvrn
= pcum
->aapcs_nvrn
;
1763 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1764 and homogenous short-vector aggregates (HVA). */
1767 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1769 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1770 if (!aarch64_composite_type_p (type
, mode
))
1772 gcc_assert (nregs
== 1);
1773 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1779 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1780 for (i
= 0; i
< nregs
; i
++)
1782 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1783 V0_REGNUM
+ nvrn
+ i
);
1784 tmp
= gen_rtx_EXPR_LIST
1786 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1787 XVECEXP (par
, 0, i
) = tmp
;
1789 pcum
->aapcs_reg
= par
;
1795 /* C.3 NSRN is set to 8. */
1796 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1801 ncrn
= pcum
->aapcs_ncrn
;
1802 nregs
= size
/ UNITS_PER_WORD
;
1804 /* C6 - C9. though the sign and zero extension semantics are
1805 handled elsewhere. This is the case where the argument fits
1806 entirely general registers. */
1807 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1809 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1811 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1813 /* C.8 if the argument has an alignment of 16 then the NGRN is
1814 rounded up to the next even number. */
1815 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1818 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1820 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1821 A reg is still generated for it, but the caller should be smart
1822 enough not to use it. */
1823 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1825 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1832 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1833 for (i
= 0; i
< nregs
; i
++)
1835 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1836 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1837 GEN_INT (i
* UNITS_PER_WORD
));
1838 XVECEXP (par
, 0, i
) = tmp
;
1840 pcum
->aapcs_reg
= par
;
1843 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1848 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1850 /* The argument is passed on stack; record the needed number of words for
1851 this argument and align the total size if necessary. */
1853 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1854 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1855 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1856 16 / UNITS_PER_WORD
);
1860 /* Implement TARGET_FUNCTION_ARG. */
1863 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1864 const_tree type
, bool named
)
1866 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1867 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1869 if (mode
== VOIDmode
)
1872 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1873 return pcum
->aapcs_reg
;
1877 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1878 const_tree fntype ATTRIBUTE_UNUSED
,
1879 rtx libname ATTRIBUTE_UNUSED
,
1880 const_tree fndecl ATTRIBUTE_UNUSED
,
1881 unsigned n_named ATTRIBUTE_UNUSED
)
1883 pcum
->aapcs_ncrn
= 0;
1884 pcum
->aapcs_nvrn
= 0;
1885 pcum
->aapcs_nextncrn
= 0;
1886 pcum
->aapcs_nextnvrn
= 0;
1887 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1888 pcum
->aapcs_reg
= NULL_RTX
;
1889 pcum
->aapcs_arg_processed
= false;
1890 pcum
->aapcs_stack_words
= 0;
1891 pcum
->aapcs_stack_size
= 0;
1897 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1902 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1903 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1905 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1906 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1907 != (pcum
->aapcs_stack_words
!= 0));
1908 pcum
->aapcs_arg_processed
= false;
1909 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1910 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1911 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1912 pcum
->aapcs_stack_words
= 0;
1913 pcum
->aapcs_reg
= NULL_RTX
;
1918 aarch64_function_arg_regno_p (unsigned regno
)
1920 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1921 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1924 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1925 PARM_BOUNDARY bits of alignment, but will be given anything up
1926 to STACK_BOUNDARY bits if the type requires it. This makes sure
1927 that both before and after the layout of each argument, the Next
1928 Stacked Argument Address (NSAA) will have a minimum alignment of
1932 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
1934 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1936 if (alignment
< PARM_BOUNDARY
)
1937 alignment
= PARM_BOUNDARY
;
1938 if (alignment
> STACK_BOUNDARY
)
1939 alignment
= STACK_BOUNDARY
;
1943 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1945 Return true if an argument passed on the stack should be padded upwards,
1946 i.e. if the least-significant byte of the stack slot has useful data.
1948 Small aggregate types are placed in the lowest memory address.
1950 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1953 aarch64_pad_arg_upward (machine_mode mode
, const_tree type
)
1955 /* On little-endian targets, the least significant byte of every stack
1956 argument is passed at the lowest byte address of the stack slot. */
1957 if (!BYTES_BIG_ENDIAN
)
1960 /* Otherwise, integral, floating-point and pointer types are padded downward:
1961 the least significant byte of a stack argument is passed at the highest
1962 byte address of the stack slot. */
1964 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1965 || POINTER_TYPE_P (type
))
1966 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1969 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1973 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1975 It specifies padding for the last (may also be the only)
1976 element of a block move between registers and memory. If
1977 assuming the block is in the memory, padding upward means that
1978 the last element is padded after its highest significant byte,
1979 while in downward padding, the last element is padded at the
1980 its least significant byte side.
1982 Small aggregates and small complex types are always padded
1985 We don't need to worry about homogeneous floating-point or
1986 short-vector aggregates; their move is not affected by the
1987 padding direction determined here. Regardless of endianness,
1988 each element of such an aggregate is put in the least
1989 significant bits of a fp/simd register.
1991 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1992 register has useful data, and return the opposite if the most
1993 significant byte does. */
1996 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
1997 bool first ATTRIBUTE_UNUSED
)
2000 /* Small composite types are always padded upward. */
2001 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
2003 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
2004 : GET_MODE_SIZE (mode
));
2005 if (size
< 2 * UNITS_PER_WORD
)
2009 /* Otherwise, use the default padding. */
2010 return !BYTES_BIG_ENDIAN
;
2014 aarch64_libgcc_cmp_return_mode (void)
2020 aarch64_frame_pointer_required (void)
2022 /* In aarch64_override_options_after_change
2023 flag_omit_leaf_frame_pointer turns off the frame pointer by
2024 default. Turn it back on now if we've not got a leaf
2026 if (flag_omit_leaf_frame_pointer
2027 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
2033 /* Mark the registers that need to be saved by the callee and calculate
2034 the size of the callee-saved registers area and frame record (both FP
2035 and LR may be omitted). */
2037 aarch64_layout_frame (void)
2039 HOST_WIDE_INT offset
= 0;
2042 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2045 #define SLOT_NOT_REQUIRED (-2)
2046 #define SLOT_REQUIRED (-1)
2048 cfun
->machine
->frame
.wb_candidate1
= FIRST_PSEUDO_REGISTER
;
2049 cfun
->machine
->frame
.wb_candidate2
= FIRST_PSEUDO_REGISTER
;
2051 /* First mark all the registers that really need to be saved... */
2052 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2053 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2055 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2056 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2058 /* ... that includes the eh data registers (if needed)... */
2059 if (crtl
->calls_eh_return
)
2060 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2061 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2064 /* ... and any callee saved register that dataflow says is live. */
2065 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2066 if (df_regs_ever_live_p (regno
)
2067 && (regno
== R30_REGNUM
2068 || !call_used_regs
[regno
]))
2069 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2071 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2072 if (df_regs_ever_live_p (regno
)
2073 && !call_used_regs
[regno
])
2074 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2076 if (frame_pointer_needed
)
2078 /* FP and LR are placed in the linkage record. */
2079 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2080 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2081 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2082 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2083 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
2084 offset
+= 2 * UNITS_PER_WORD
;
2087 /* Now assign stack slots for them. */
2088 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2089 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2091 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2092 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2093 cfun
->machine
->frame
.wb_candidate1
= regno
;
2094 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
)
2095 cfun
->machine
->frame
.wb_candidate2
= regno
;
2096 offset
+= UNITS_PER_WORD
;
2099 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2100 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2102 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2103 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2104 cfun
->machine
->frame
.wb_candidate1
= regno
;
2105 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
2106 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2107 cfun
->machine
->frame
.wb_candidate2
= regno
;
2108 offset
+= UNITS_PER_WORD
;
2111 cfun
->machine
->frame
.padding0
=
2112 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
2113 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2115 cfun
->machine
->frame
.saved_regs_size
= offset
;
2117 cfun
->machine
->frame
.hard_fp_offset
2118 = AARCH64_ROUND_UP (cfun
->machine
->frame
.saved_varargs_size
2120 + cfun
->machine
->frame
.saved_regs_size
,
2121 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2123 cfun
->machine
->frame
.frame_size
2124 = AARCH64_ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
2125 + crtl
->outgoing_args_size
,
2126 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2128 cfun
->machine
->frame
.laid_out
= true;
2132 aarch64_register_saved_on_entry (int regno
)
2134 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
2138 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
2140 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
2146 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
2147 HOST_WIDE_INT adjustment
)
2149 rtx base_rtx
= stack_pointer_rtx
;
2152 reg
= gen_rtx_REG (mode
, regno
);
2153 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
2154 plus_constant (Pmode
, base_rtx
, -adjustment
));
2155 mem
= gen_rtx_MEM (mode
, mem
);
2157 insn
= emit_move_insn (mem
, reg
);
2158 RTX_FRAME_RELATED_P (insn
) = 1;
2162 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2163 HOST_WIDE_INT adjustment
)
2168 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
2169 GEN_INT (-adjustment
),
2170 GEN_INT (UNITS_PER_WORD
- adjustment
));
2172 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
2173 GEN_INT (-adjustment
),
2174 GEN_INT (UNITS_PER_WORD
- adjustment
));
2181 aarch64_pushwb_pair_reg (machine_mode mode
, unsigned regno1
,
2182 unsigned regno2
, HOST_WIDE_INT adjustment
)
2185 rtx reg1
= gen_rtx_REG (mode
, regno1
);
2186 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2188 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
2190 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2191 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2192 RTX_FRAME_RELATED_P (insn
) = 1;
2196 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2197 HOST_WIDE_INT adjustment
)
2202 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2203 GEN_INT (UNITS_PER_WORD
));
2205 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2206 GEN_INT (UNITS_PER_WORD
));
2213 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
2219 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
2222 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
2230 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
2236 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
2239 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
2248 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
2249 unsigned start
, unsigned limit
, bool skip_wb
)
2252 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2253 ? gen_frame_mem
: gen_rtx_MEM
);
2257 for (regno
= aarch64_next_callee_save (start
, limit
);
2259 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2262 HOST_WIDE_INT offset
;
2265 && (regno
== cfun
->machine
->frame
.wb_candidate1
2266 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2269 reg
= gen_rtx_REG (mode
, regno
);
2270 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2271 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2274 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2277 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2278 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2281 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2284 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2285 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2287 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
2290 /* The first part of a frame-related parallel insn is
2291 always assumed to be relevant to the frame
2292 calculations; subsequent parts, are only
2293 frame-related if explicitly marked. */
2294 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2298 insn
= emit_move_insn (mem
, reg
);
2300 RTX_FRAME_RELATED_P (insn
) = 1;
2305 aarch64_restore_callee_saves (machine_mode mode
,
2306 HOST_WIDE_INT start_offset
, unsigned start
,
2307 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
2309 rtx base_rtx
= stack_pointer_rtx
;
2310 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2311 ? gen_frame_mem
: gen_rtx_MEM
);
2314 HOST_WIDE_INT offset
;
2316 for (regno
= aarch64_next_callee_save (start
, limit
);
2318 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2323 && (regno
== cfun
->machine
->frame
.wb_candidate1
2324 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2327 reg
= gen_rtx_REG (mode
, regno
);
2328 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2329 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2331 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2334 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2335 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2337 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2340 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2341 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2342 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
2344 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
2348 emit_move_insn (reg
, mem
);
2349 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
2353 /* AArch64 stack frames generated by this compiler look like:
2355 +-------------------------------+
2357 | incoming stack arguments |
2359 +-------------------------------+
2360 | | <-- incoming stack pointer (aligned)
2361 | callee-allocated save area |
2362 | for register varargs |
2364 +-------------------------------+
2365 | local variables | <-- frame_pointer_rtx
2367 +-------------------------------+
2369 +-------------------------------+ |
2370 | callee-saved registers | | frame.saved_regs_size
2371 +-------------------------------+ |
2373 +-------------------------------+ |
2374 | FP' | / <- hard_frame_pointer_rtx (aligned)
2375 +-------------------------------+
2376 | dynamic allocation |
2377 +-------------------------------+
2379 +-------------------------------+
2380 | outgoing stack arguments | <-- arg_pointer
2382 +-------------------------------+
2383 | | <-- stack_pointer_rtx (aligned)
2385 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2386 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2389 /* Generate the prologue instructions for entry into a function.
2390 Establish the stack frame by decreasing the stack pointer with a
2391 properly calculated size and, if necessary, create a frame record
2392 filled with the values of LR and previous frame pointer. The
2393 current FP is also set up if it is in use. */
2396 aarch64_expand_prologue (void)
2398 /* sub sp, sp, #<frame_size>
2399 stp {fp, lr}, [sp, #<frame_size> - 16]
2400 add fp, sp, #<frame_size> - hardfp_offset
2401 stp {cs_reg}, [fp, #-16] etc.
2403 sub sp, sp, <final_adjustment_if_any>
2405 HOST_WIDE_INT frame_size
, offset
;
2406 HOST_WIDE_INT fp_offset
; /* Offset from hard FP to SP. */
2407 HOST_WIDE_INT hard_fp_offset
;
2410 aarch64_layout_frame ();
2412 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2413 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2414 fp_offset
= frame_size
- hard_fp_offset
;
2416 if (flag_stack_usage_info
)
2417 current_function_static_stack_size
= frame_size
;
2419 /* Store pairs and load pairs have a range only -512 to 504. */
2422 /* When the frame has a large size, an initial decrease is done on
2423 the stack pointer to jump over the callee-allocated save area for
2424 register varargs, the local variable area and/or the callee-saved
2425 register area. This will allow the pre-index write-back
2426 store pair instructions to be used for setting up the stack frame
2428 offset
= hard_fp_offset
;
2430 offset
= cfun
->machine
->frame
.saved_regs_size
;
2432 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2435 if (frame_size
>= 0x1000000)
2437 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2438 emit_move_insn (op0
, GEN_INT (-frame_size
));
2439 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2441 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2442 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
2443 plus_constant (Pmode
, stack_pointer_rtx
,
2445 RTX_FRAME_RELATED_P (insn
) = 1;
2447 else if (frame_size
> 0)
2449 int hi_ofs
= frame_size
& 0xfff000;
2450 int lo_ofs
= frame_size
& 0x000fff;
2454 insn
= emit_insn (gen_add2_insn
2455 (stack_pointer_rtx
, GEN_INT (-hi_ofs
)));
2456 RTX_FRAME_RELATED_P (insn
) = 1;
2460 insn
= emit_insn (gen_add2_insn
2461 (stack_pointer_rtx
, GEN_INT (-lo_ofs
)));
2462 RTX_FRAME_RELATED_P (insn
) = 1;
2471 bool skip_wb
= false;
2473 if (frame_pointer_needed
)
2479 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2480 GEN_INT (-offset
)));
2481 RTX_FRAME_RELATED_P (insn
) = 1;
2483 aarch64_save_callee_saves (DImode
, fp_offset
, R29_REGNUM
,
2487 aarch64_pushwb_pair_reg (DImode
, R29_REGNUM
, R30_REGNUM
, offset
);
2489 /* Set up frame pointer to point to the location of the
2490 previous frame pointer on the stack. */
2491 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2493 GEN_INT (fp_offset
)));
2494 RTX_FRAME_RELATED_P (insn
) = 1;
2495 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
2499 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2500 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2503 || reg1
== FIRST_PSEUDO_REGISTER
2504 || (reg2
== FIRST_PSEUDO_REGISTER
2507 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2508 GEN_INT (-offset
)));
2509 RTX_FRAME_RELATED_P (insn
) = 1;
2513 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2517 if (reg2
== FIRST_PSEUDO_REGISTER
)
2518 aarch64_pushwb_single_reg (mode1
, reg1
, offset
);
2520 aarch64_pushwb_pair_reg (mode1
, reg1
, reg2
, offset
);
2524 aarch64_save_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2526 aarch64_save_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2530 /* when offset >= 512,
2531 sub sp, sp, #<outgoing_args_size> */
2532 if (frame_size
> -1)
2534 if (crtl
->outgoing_args_size
> 0)
2536 insn
= emit_insn (gen_add2_insn
2538 GEN_INT (- crtl
->outgoing_args_size
)));
2539 RTX_FRAME_RELATED_P (insn
) = 1;
2544 /* Return TRUE if we can use a simple_return insn.
2546 This function checks whether the callee saved stack is empty, which
2547 means no restore actions are need. The pro_and_epilogue will use
2548 this to check whether shrink-wrapping opt is feasible. */
2551 aarch64_use_return_insn_p (void)
2553 if (!reload_completed
)
2559 aarch64_layout_frame ();
2561 return cfun
->machine
->frame
.frame_size
== 0;
2564 /* Generate the epilogue instructions for returning from a function. */
2566 aarch64_expand_epilogue (bool for_sibcall
)
2568 HOST_WIDE_INT frame_size
, offset
;
2569 HOST_WIDE_INT fp_offset
;
2570 HOST_WIDE_INT hard_fp_offset
;
2572 /* We need to add memory barrier to prevent read from deallocated stack. */
2573 bool need_barrier_p
= (get_frame_size () != 0
2574 || cfun
->machine
->frame
.saved_varargs_size
);
2576 aarch64_layout_frame ();
2578 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2579 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2580 fp_offset
= frame_size
- hard_fp_offset
;
2582 /* Store pairs and load pairs have a range only -512 to 504. */
2585 offset
= hard_fp_offset
;
2587 offset
= cfun
->machine
->frame
.saved_regs_size
;
2589 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2591 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2593 insn
= emit_insn (gen_add2_insn
2595 GEN_INT (crtl
->outgoing_args_size
)));
2596 RTX_FRAME_RELATED_P (insn
) = 1;
2602 /* If there were outgoing arguments or we've done dynamic stack
2603 allocation, then restore the stack pointer from the frame
2604 pointer. This is at most one insn and more efficient than using
2605 GCC's internal mechanism. */
2606 if (frame_pointer_needed
2607 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2609 if (cfun
->calls_alloca
)
2610 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2612 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2613 hard_frame_pointer_rtx
,
2615 offset
= offset
- fp_offset
;
2620 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2621 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2622 bool skip_wb
= true;
2625 if (frame_pointer_needed
)
2628 || reg1
== FIRST_PSEUDO_REGISTER
2629 || (reg2
== FIRST_PSEUDO_REGISTER
2633 aarch64_restore_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2635 aarch64_restore_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2639 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2643 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2644 rtx rreg1
= gen_rtx_REG (mode1
, reg1
);
2646 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg1
, cfi_ops
);
2647 if (reg2
== FIRST_PSEUDO_REGISTER
)
2649 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
2650 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
2651 mem
= gen_rtx_MEM (mode1
, mem
);
2652 insn
= emit_move_insn (rreg1
, mem
);
2656 rtx rreg2
= gen_rtx_REG (mode1
, reg2
);
2658 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg2
, cfi_ops
);
2659 insn
= emit_insn (aarch64_gen_loadwb_pair
2660 (mode1
, stack_pointer_rtx
, rreg1
,
2666 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2670 /* Reset the CFA to be SP + FRAME_SIZE. */
2671 rtx new_cfa
= stack_pointer_rtx
;
2673 new_cfa
= plus_constant (Pmode
, new_cfa
, frame_size
);
2674 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
2675 REG_NOTES (insn
) = cfi_ops
;
2676 RTX_FRAME_RELATED_P (insn
) = 1;
2682 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2684 if (frame_size
>= 0x1000000)
2686 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2687 emit_move_insn (op0
, GEN_INT (frame_size
));
2688 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2692 int hi_ofs
= frame_size
& 0xfff000;
2693 int lo_ofs
= frame_size
& 0x000fff;
2695 if (hi_ofs
&& lo_ofs
)
2697 insn
= emit_insn (gen_add2_insn
2698 (stack_pointer_rtx
, GEN_INT (hi_ofs
)));
2699 RTX_FRAME_RELATED_P (insn
) = 1;
2700 frame_size
= lo_ofs
;
2702 insn
= emit_insn (gen_add2_insn
2703 (stack_pointer_rtx
, GEN_INT (frame_size
)));
2706 /* Reset the CFA to be SP + 0. */
2707 add_reg_note (insn
, REG_CFA_DEF_CFA
, stack_pointer_rtx
);
2708 RTX_FRAME_RELATED_P (insn
) = 1;
2711 /* Stack adjustment for exception handler. */
2712 if (crtl
->calls_eh_return
)
2714 /* We need to unwind the stack by the offset computed by
2715 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2716 to be SP; letting the CFA move during this adjustment
2717 is just as correct as retaining the CFA from the body
2718 of the function. Therefore, do nothing special. */
2719 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2722 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2724 emit_jump_insn (ret_rtx
);
2727 /* Return the place to copy the exception unwinding return address to.
2728 This will probably be a stack slot, but could (in theory be the
2729 return register). */
2731 aarch64_final_eh_return_addr (void)
2733 HOST_WIDE_INT fp_offset
;
2735 aarch64_layout_frame ();
2737 fp_offset
= cfun
->machine
->frame
.frame_size
2738 - cfun
->machine
->frame
.hard_fp_offset
;
2740 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2741 return gen_rtx_REG (DImode
, LR_REGNUM
);
2743 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2744 result in a store to save LR introduced by builtin_eh_return () being
2745 incorrectly deleted because the alias is not detected.
2746 So in the calculation of the address to copy the exception unwinding
2747 return address to, we note 2 cases.
2748 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2749 we return a SP-relative location since all the addresses are SP-relative
2750 in this case. This prevents the store from being optimized away.
2751 If the fp_offset is not 0, then the addresses will be FP-relative and
2752 therefore we return a FP-relative location. */
2754 if (frame_pointer_needed
)
2757 return gen_frame_mem (DImode
,
2758 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2760 return gen_frame_mem (DImode
,
2761 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2764 /* If FP is not needed, we calculate the location of LR, which would be
2765 at the top of the saved registers block. */
2767 return gen_frame_mem (DImode
,
2768 plus_constant (Pmode
,
2771 + cfun
->machine
->frame
.saved_regs_size
2772 - 2 * UNITS_PER_WORD
));
2775 /* Possibly output code to build up a constant in a register. For
2776 the benefit of the costs infrastructure, returns the number of
2777 instructions which would be emitted. GENERATE inhibits or
2778 enables code generation. */
2781 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2785 if (aarch64_bitmask_imm (val
, DImode
))
2788 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2796 HOST_WIDE_INT valp
= val
>> 16;
2800 for (i
= 16; i
< 64; i
+= 16)
2802 valm
= (valp
& 0xffff);
2813 /* zcount contains the number of additional MOVK instructions
2814 required if the constant is built up with an initial MOVZ instruction,
2815 while ncount is the number of MOVK instructions required if starting
2816 with a MOVN instruction. Choose the sequence that yields the fewest
2817 number of instructions, preferring MOVZ instructions when they are both
2819 if (ncount
< zcount
)
2822 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2823 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2830 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2831 GEN_INT (val
& 0xffff));
2838 for (i
= 16; i
< 64; i
+= 16)
2840 if ((val
& 0xffff) != tval
)
2843 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2845 GEN_INT (val
& 0xffff)));
2855 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2857 HOST_WIDE_INT mdelta
= delta
;
2858 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2859 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2864 if (mdelta
>= 4096 * 4096)
2866 (void) aarch64_build_constant (scratchreg
, delta
, true);
2867 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2869 else if (mdelta
> 0)
2873 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2874 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2876 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2877 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2879 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2880 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2882 if (mdelta
% 4096 != 0)
2884 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2885 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2886 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2891 /* Output code to add DELTA to the first argument, and then jump
2892 to FUNCTION. Used for C++ multiple inheritance. */
2894 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2895 HOST_WIDE_INT delta
,
2896 HOST_WIDE_INT vcall_offset
,
2899 /* The this pointer is always in x0. Note that this differs from
2900 Arm where the this pointer maybe bumped to r1 if r0 is required
2901 to return a pointer to an aggregate. On AArch64 a result value
2902 pointer will be in x8. */
2903 int this_regno
= R0_REGNUM
;
2904 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
2907 reload_completed
= 1;
2908 emit_note (NOTE_INSN_PROLOGUE_END
);
2910 if (vcall_offset
== 0)
2911 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2914 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2916 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2917 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2918 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2923 if (delta
>= -256 && delta
< 256)
2924 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2925 plus_constant (Pmode
, this_rtx
, delta
));
2927 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2930 if (Pmode
== ptr_mode
)
2931 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2933 aarch64_emit_move (temp0
,
2934 gen_rtx_ZERO_EXTEND (Pmode
,
2935 gen_rtx_MEM (ptr_mode
, addr
)));
2937 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2938 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2941 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2942 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2945 if (Pmode
== ptr_mode
)
2946 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2948 aarch64_emit_move (temp1
,
2949 gen_rtx_SIGN_EXTEND (Pmode
,
2950 gen_rtx_MEM (ptr_mode
, addr
)));
2952 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2955 /* Generate a tail call to the target function. */
2956 if (!TREE_USED (function
))
2958 assemble_external (function
);
2959 TREE_USED (function
) = 1;
2961 funexp
= XEXP (DECL_RTL (function
), 0);
2962 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2963 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2964 SIBLING_CALL_P (insn
) = 1;
2966 insn
= get_insns ();
2967 shorten_branches (insn
);
2968 final_start_function (insn
, file
, 1);
2969 final (insn
, file
, 1);
2970 final_end_function ();
2972 /* Stop pretending to be a post-reload pass. */
2973 reload_completed
= 0;
2977 aarch64_tls_referenced_p (rtx x
)
2979 if (!TARGET_HAVE_TLS
)
2981 subrtx_iterator::array_type array
;
2982 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
2984 const_rtx x
= *iter
;
2985 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
2987 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2988 TLS offsets, not real symbol references. */
2989 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
2990 iter
.skip_subrtxes ();
2997 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2999 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
3000 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
3011 aarch64_build_bitmask_table (void)
3013 unsigned HOST_WIDE_INT mask
, imm
;
3014 unsigned int log_e
, e
, s
, r
;
3015 unsigned int nimms
= 0;
3017 for (log_e
= 1; log_e
<= 6; log_e
++)
3021 mask
= ~(HOST_WIDE_INT
) 0;
3023 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
3024 for (s
= 1; s
< e
; s
++)
3026 for (r
= 0; r
< e
; r
++)
3028 /* set s consecutive bits to 1 (s < 64) */
3029 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
3030 /* rotate right by r */
3032 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
3033 /* replicate the constant depending on SIMD size */
3035 case 1: imm
|= (imm
<< 2);
3036 case 2: imm
|= (imm
<< 4);
3037 case 3: imm
|= (imm
<< 8);
3038 case 4: imm
|= (imm
<< 16);
3039 case 5: imm
|= (imm
<< 32);
3045 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
3046 aarch64_bitmasks
[nimms
++] = imm
;
3051 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
3052 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
3053 aarch64_bitmasks_cmp
);
3057 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3058 a left shift of 0 or 12 bits. */
3060 aarch64_uimm12_shift (HOST_WIDE_INT val
)
3062 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
3063 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
3068 /* Return true if val is an immediate that can be loaded into a
3069 register by a MOVZ instruction. */
3071 aarch64_movw_imm (HOST_WIDE_INT val
, machine_mode mode
)
3073 if (GET_MODE_SIZE (mode
) > 4)
3075 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
3076 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
3081 /* Ignore sign extension. */
3082 val
&= (HOST_WIDE_INT
) 0xffffffff;
3084 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
3085 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
3089 /* Return true if val is a valid bitmask immediate. */
3091 aarch64_bitmask_imm (HOST_WIDE_INT val
, machine_mode mode
)
3093 if (GET_MODE_SIZE (mode
) < 8)
3095 /* Replicate bit pattern. */
3096 val
&= (HOST_WIDE_INT
) 0xffffffff;
3099 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
3100 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
3104 /* Return true if val is an immediate that can be loaded into a
3105 register in a single instruction. */
3107 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
3109 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
3111 return aarch64_bitmask_imm (val
, mode
);
3115 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3119 if (GET_CODE (x
) == HIGH
)
3122 split_const (x
, &base
, &offset
);
3123 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
3125 if (aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
)
3126 != SYMBOL_FORCE_TO_MEM
)
3129 /* Avoid generating a 64-bit relocation in ILP32; leave
3130 to aarch64_expand_mov_immediate to handle it properly. */
3131 return mode
!= ptr_mode
;
3134 return aarch64_tls_referenced_p (x
);
3137 /* Return true if register REGNO is a valid index register.
3138 STRICT_P is true if REG_OK_STRICT is in effect. */
3141 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
3143 if (!HARD_REGISTER_NUM_P (regno
))
3151 regno
= reg_renumber
[regno
];
3153 return GP_REGNUM_P (regno
);
3156 /* Return true if register REGNO is a valid base register for mode MODE.
3157 STRICT_P is true if REG_OK_STRICT is in effect. */
3160 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
3162 if (!HARD_REGISTER_NUM_P (regno
))
3170 regno
= reg_renumber
[regno
];
3173 /* The fake registers will be eliminated to either the stack or
3174 hard frame pointer, both of which are usually valid base registers.
3175 Reload deals with the cases where the eliminated form isn't valid. */
3176 return (GP_REGNUM_P (regno
)
3177 || regno
== SP_REGNUM
3178 || regno
== FRAME_POINTER_REGNUM
3179 || regno
== ARG_POINTER_REGNUM
);
3182 /* Return true if X is a valid base register for mode MODE.
3183 STRICT_P is true if REG_OK_STRICT is in effect. */
3186 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
3188 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
3191 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
3194 /* Return true if address offset is a valid index. If it is, fill in INFO
3195 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3198 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
3199 machine_mode mode
, bool strict_p
)
3201 enum aarch64_address_type type
;
3206 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
3207 && GET_MODE (x
) == Pmode
)
3209 type
= ADDRESS_REG_REG
;
3213 /* (sign_extend:DI (reg:SI)) */
3214 else if ((GET_CODE (x
) == SIGN_EXTEND
3215 || GET_CODE (x
) == ZERO_EXTEND
)
3216 && GET_MODE (x
) == DImode
3217 && GET_MODE (XEXP (x
, 0)) == SImode
)
3219 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3220 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3221 index
= XEXP (x
, 0);
3224 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3225 else if (GET_CODE (x
) == MULT
3226 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3227 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3228 && GET_MODE (XEXP (x
, 0)) == DImode
3229 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3230 && CONST_INT_P (XEXP (x
, 1)))
3232 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3233 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3234 index
= XEXP (XEXP (x
, 0), 0);
3235 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3237 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3238 else if (GET_CODE (x
) == ASHIFT
3239 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3240 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3241 && GET_MODE (XEXP (x
, 0)) == DImode
3242 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3243 && CONST_INT_P (XEXP (x
, 1)))
3245 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3246 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3247 index
= XEXP (XEXP (x
, 0), 0);
3248 shift
= INTVAL (XEXP (x
, 1));
3250 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3251 else if ((GET_CODE (x
) == SIGN_EXTRACT
3252 || GET_CODE (x
) == ZERO_EXTRACT
)
3253 && GET_MODE (x
) == DImode
3254 && GET_CODE (XEXP (x
, 0)) == MULT
3255 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3256 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3258 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3259 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3260 index
= XEXP (XEXP (x
, 0), 0);
3261 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3262 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3263 || INTVAL (XEXP (x
, 2)) != 0)
3266 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3267 (const_int 0xffffffff<<shift)) */
3268 else if (GET_CODE (x
) == AND
3269 && GET_MODE (x
) == DImode
3270 && GET_CODE (XEXP (x
, 0)) == MULT
3271 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3272 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3273 && CONST_INT_P (XEXP (x
, 1)))
3275 type
= ADDRESS_REG_UXTW
;
3276 index
= XEXP (XEXP (x
, 0), 0);
3277 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3278 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3281 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3282 else if ((GET_CODE (x
) == SIGN_EXTRACT
3283 || GET_CODE (x
) == ZERO_EXTRACT
)
3284 && GET_MODE (x
) == DImode
3285 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3286 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3287 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3289 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3290 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3291 index
= XEXP (XEXP (x
, 0), 0);
3292 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3293 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3294 || INTVAL (XEXP (x
, 2)) != 0)
3297 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3298 (const_int 0xffffffff<<shift)) */
3299 else if (GET_CODE (x
) == AND
3300 && GET_MODE (x
) == DImode
3301 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3302 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3303 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3304 && CONST_INT_P (XEXP (x
, 1)))
3306 type
= ADDRESS_REG_UXTW
;
3307 index
= XEXP (XEXP (x
, 0), 0);
3308 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3309 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3312 /* (mult:P (reg:P) (const_int scale)) */
3313 else if (GET_CODE (x
) == MULT
3314 && GET_MODE (x
) == Pmode
3315 && GET_MODE (XEXP (x
, 0)) == Pmode
3316 && CONST_INT_P (XEXP (x
, 1)))
3318 type
= ADDRESS_REG_REG
;
3319 index
= XEXP (x
, 0);
3320 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3322 /* (ashift:P (reg:P) (const_int shift)) */
3323 else if (GET_CODE (x
) == ASHIFT
3324 && GET_MODE (x
) == Pmode
3325 && GET_MODE (XEXP (x
, 0)) == Pmode
3326 && CONST_INT_P (XEXP (x
, 1)))
3328 type
= ADDRESS_REG_REG
;
3329 index
= XEXP (x
, 0);
3330 shift
= INTVAL (XEXP (x
, 1));
3335 if (GET_CODE (index
) == SUBREG
)
3336 index
= SUBREG_REG (index
);
3339 (shift
> 0 && shift
<= 3
3340 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3342 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3345 info
->offset
= index
;
3346 info
->shift
= shift
;
3354 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3356 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3357 && offset
< 64 * GET_MODE_SIZE (mode
)
3358 && offset
% GET_MODE_SIZE (mode
) == 0);
3362 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3363 HOST_WIDE_INT offset
)
3365 return offset
>= -256 && offset
< 256;
3369 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3372 && offset
< 4096 * GET_MODE_SIZE (mode
)
3373 && offset
% GET_MODE_SIZE (mode
) == 0);
3376 /* Return true if X is a valid address for machine mode MODE. If it is,
3377 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3378 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3381 aarch64_classify_address (struct aarch64_address_info
*info
,
3382 rtx x
, machine_mode mode
,
3383 RTX_CODE outer_code
, bool strict_p
)
3385 enum rtx_code code
= GET_CODE (x
);
3388 /* On BE, we use load/store pair for all large int mode load/stores. */
3389 bool load_store_pair_p
= (outer_code
== PARALLEL
3390 || (BYTES_BIG_ENDIAN
3391 && aarch64_vect_struct_mode_p (mode
)));
3393 bool allow_reg_index_p
=
3395 && (GET_MODE_SIZE (mode
) != 16 || aarch64_vector_mode_supported_p (mode
))
3396 && !aarch64_vect_struct_mode_p (mode
);
3398 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3400 if (aarch64_vect_struct_mode_p (mode
) && !BYTES_BIG_ENDIAN
3401 && (code
!= POST_INC
&& code
!= REG
))
3408 info
->type
= ADDRESS_REG_IMM
;
3410 info
->offset
= const0_rtx
;
3411 return aarch64_base_register_rtx_p (x
, strict_p
);
3419 && (op0
== virtual_stack_vars_rtx
3420 || op0
== frame_pointer_rtx
3421 || op0
== arg_pointer_rtx
)
3422 && CONST_INT_P (op1
))
3424 info
->type
= ADDRESS_REG_IMM
;
3431 if (GET_MODE_SIZE (mode
) != 0
3432 && CONST_INT_P (op1
)
3433 && aarch64_base_register_rtx_p (op0
, strict_p
))
3435 HOST_WIDE_INT offset
= INTVAL (op1
);
3437 info
->type
= ADDRESS_REG_IMM
;
3441 /* TImode and TFmode values are allowed in both pairs of X
3442 registers and individual Q registers. The available
3444 X,X: 7-bit signed scaled offset
3445 Q: 9-bit signed offset
3446 We conservatively require an offset representable in either mode.
3448 if (mode
== TImode
|| mode
== TFmode
)
3449 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3450 && offset_9bit_signed_unscaled_p (mode
, offset
));
3452 /* A 7bit offset check because OImode will emit a ldp/stp
3453 instruction (only big endian will get here).
3454 For ldp/stp instructions, the offset is scaled for the size of a
3455 single element of the pair. */
3457 return aarch64_offset_7bit_signed_scaled_p (TImode
, offset
);
3459 /* Three 9/12 bit offsets checks because CImode will emit three
3460 ldr/str instructions (only big endian will get here). */
3462 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3463 && (offset_9bit_signed_unscaled_p (V16QImode
, offset
+ 32)
3464 || offset_12bit_unsigned_scaled_p (V16QImode
,
3467 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3468 instructions (only big endian will get here). */
3470 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3471 && aarch64_offset_7bit_signed_scaled_p (TImode
,
3474 if (load_store_pair_p
)
3475 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3476 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3478 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3479 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3482 if (allow_reg_index_p
)
3484 /* Look for base + (scaled/extended) index register. */
3485 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3486 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3491 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3492 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3505 info
->type
= ADDRESS_REG_WB
;
3506 info
->base
= XEXP (x
, 0);
3507 info
->offset
= NULL_RTX
;
3508 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3512 info
->type
= ADDRESS_REG_WB
;
3513 info
->base
= XEXP (x
, 0);
3514 if (GET_CODE (XEXP (x
, 1)) == PLUS
3515 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3516 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3517 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3519 HOST_WIDE_INT offset
;
3520 info
->offset
= XEXP (XEXP (x
, 1), 1);
3521 offset
= INTVAL (info
->offset
);
3523 /* TImode and TFmode values are allowed in both pairs of X
3524 registers and individual Q registers. The available
3526 X,X: 7-bit signed scaled offset
3527 Q: 9-bit signed offset
3528 We conservatively require an offset representable in either mode.
3530 if (mode
== TImode
|| mode
== TFmode
)
3531 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3532 && offset_9bit_signed_unscaled_p (mode
, offset
));
3534 if (load_store_pair_p
)
3535 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3536 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3538 return offset_9bit_signed_unscaled_p (mode
, offset
);
3545 /* load literal: pc-relative constant pool entry. Only supported
3546 for SI mode or larger. */
3547 info
->type
= ADDRESS_SYMBOLIC
;
3549 if (!load_store_pair_p
&& GET_MODE_SIZE (mode
) >= 4)
3553 split_const (x
, &sym
, &addend
);
3554 return (GET_CODE (sym
) == LABEL_REF
3555 || (GET_CODE (sym
) == SYMBOL_REF
3556 && CONSTANT_POOL_ADDRESS_P (sym
)));
3561 info
->type
= ADDRESS_LO_SUM
;
3562 info
->base
= XEXP (x
, 0);
3563 info
->offset
= XEXP (x
, 1);
3564 if (allow_reg_index_p
3565 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3568 split_const (info
->offset
, &sym
, &offs
);
3569 if (GET_CODE (sym
) == SYMBOL_REF
3570 && (aarch64_classify_symbol (sym
, offs
, SYMBOL_CONTEXT_MEM
)
3571 == SYMBOL_SMALL_ABSOLUTE
))
3573 /* The symbol and offset must be aligned to the access size. */
3575 unsigned int ref_size
;
3577 if (CONSTANT_POOL_ADDRESS_P (sym
))
3578 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3579 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3581 tree exp
= SYMBOL_REF_DECL (sym
);
3582 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3583 align
= CONSTANT_ALIGNMENT (exp
, align
);
3585 else if (SYMBOL_REF_DECL (sym
))
3586 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3587 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3588 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3589 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3591 align
= BITS_PER_UNIT
;
3593 ref_size
= GET_MODE_SIZE (mode
);
3595 ref_size
= GET_MODE_SIZE (DImode
);
3597 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3598 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3609 aarch64_symbolic_address_p (rtx x
)
3613 split_const (x
, &x
, &offset
);
3614 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3617 /* Classify the base of symbolic expression X, given that X appears in
3620 enum aarch64_symbol_type
3621 aarch64_classify_symbolic_expression (rtx x
,
3622 enum aarch64_symbol_context context
)
3626 split_const (x
, &x
, &offset
);
3627 return aarch64_classify_symbol (x
, offset
, context
);
3631 /* Return TRUE if X is a legitimate address for accessing memory in
3634 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
3636 struct aarch64_address_info addr
;
3638 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3641 /* Return TRUE if X is a legitimate address for accessing memory in
3642 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3645 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
3646 RTX_CODE outer_code
, bool strict_p
)
3648 struct aarch64_address_info addr
;
3650 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3653 /* Return TRUE if rtx X is immediate constant 0.0 */
3655 aarch64_float_const_zero_rtx_p (rtx x
)
3659 if (GET_MODE (x
) == VOIDmode
)
3662 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3663 if (REAL_VALUE_MINUS_ZERO (r
))
3664 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3665 return REAL_VALUES_EQUAL (r
, dconst0
);
3668 /* Return the fixed registers used for condition codes. */
3671 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3674 *p2
= INVALID_REGNUM
;
3678 /* Emit call insn with PAT and do aarch64-specific handling. */
3681 aarch64_emit_call_insn (rtx pat
)
3683 rtx insn
= emit_call_insn (pat
);
3685 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
3686 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
3687 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
3691 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3693 /* All floating point compares return CCFP if it is an equality
3694 comparison, and CCFPE otherwise. */
3695 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3722 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3724 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3725 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3726 || GET_CODE (x
) == NEG
))
3729 /* A compare with a shifted operand. Because of canonicalization,
3730 the comparison will have to be swapped when we emit the assembly
3732 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3733 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3734 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3735 || GET_CODE (x
) == LSHIFTRT
3736 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3739 /* Similarly for a negated operand, but we can only do this for
3741 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3742 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3743 && (code
== EQ
|| code
== NE
)
3744 && GET_CODE (x
) == NEG
)
3747 /* A compare of a mode narrower than SI mode against zero can be done
3748 by extending the value in the comparison. */
3749 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3751 /* Only use sign-extension if we really need it. */
3752 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3753 ? CC_SESWPmode
: CC_ZESWPmode
);
3755 /* For everything else, return CCmode. */
3760 aarch64_get_condition_code_1 (enum machine_mode
, enum rtx_code
);
3763 aarch64_get_condition_code (rtx x
)
3765 machine_mode mode
= GET_MODE (XEXP (x
, 0));
3766 enum rtx_code comp_code
= GET_CODE (x
);
3768 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3769 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3770 return aarch64_get_condition_code_1 (mode
, comp_code
);
3774 aarch64_get_condition_code_1 (enum machine_mode mode
, enum rtx_code comp_code
)
3776 int ne
= -1, eq
= -1;
3783 case GE
: return AARCH64_GE
;
3784 case GT
: return AARCH64_GT
;
3785 case LE
: return AARCH64_LS
;
3786 case LT
: return AARCH64_MI
;
3787 case NE
: return AARCH64_NE
;
3788 case EQ
: return AARCH64_EQ
;
3789 case ORDERED
: return AARCH64_VC
;
3790 case UNORDERED
: return AARCH64_VS
;
3791 case UNLT
: return AARCH64_LT
;
3792 case UNLE
: return AARCH64_LE
;
3793 case UNGT
: return AARCH64_HI
;
3794 case UNGE
: return AARCH64_PL
;
3852 case NE
: return AARCH64_NE
;
3853 case EQ
: return AARCH64_EQ
;
3854 case GE
: return AARCH64_GE
;
3855 case GT
: return AARCH64_GT
;
3856 case LE
: return AARCH64_LE
;
3857 case LT
: return AARCH64_LT
;
3858 case GEU
: return AARCH64_CS
;
3859 case GTU
: return AARCH64_HI
;
3860 case LEU
: return AARCH64_LS
;
3861 case LTU
: return AARCH64_CC
;
3871 case NE
: return AARCH64_NE
;
3872 case EQ
: return AARCH64_EQ
;
3873 case GE
: return AARCH64_LE
;
3874 case GT
: return AARCH64_LT
;
3875 case LE
: return AARCH64_GE
;
3876 case LT
: return AARCH64_GT
;
3877 case GEU
: return AARCH64_LS
;
3878 case GTU
: return AARCH64_CC
;
3879 case LEU
: return AARCH64_CS
;
3880 case LTU
: return AARCH64_HI
;
3888 case NE
: return AARCH64_NE
;
3889 case EQ
: return AARCH64_EQ
;
3890 case GE
: return AARCH64_PL
;
3891 case LT
: return AARCH64_MI
;
3899 case NE
: return AARCH64_NE
;
3900 case EQ
: return AARCH64_EQ
;
3910 if (comp_code
== NE
)
3913 if (comp_code
== EQ
)
3920 aarch64_const_vec_all_same_in_range_p (rtx x
,
3921 HOST_WIDE_INT minval
,
3922 HOST_WIDE_INT maxval
)
3924 HOST_WIDE_INT firstval
;
3927 if (GET_CODE (x
) != CONST_VECTOR
3928 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
3931 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
3932 if (firstval
< minval
|| firstval
> maxval
)
3935 count
= CONST_VECTOR_NUNITS (x
);
3936 for (i
= 1; i
< count
; i
++)
3937 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
3944 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
3946 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
3950 bit_count (unsigned HOST_WIDE_INT value
)
3964 #define AARCH64_CC_V 1
3965 #define AARCH64_CC_C (1 << 1)
3966 #define AARCH64_CC_Z (1 << 2)
3967 #define AARCH64_CC_N (1 << 3)
3969 /* N Z C V flags for ccmp. The first code is for AND op and the other
3970 is for IOR op. Indexed by AARCH64_COND_CODE. */
3971 static const int aarch64_nzcv_codes
[][2] =
3973 {AARCH64_CC_Z
, 0}, /* EQ, Z == 1. */
3974 {0, AARCH64_CC_Z
}, /* NE, Z == 0. */
3975 {AARCH64_CC_C
, 0}, /* CS, C == 1. */
3976 {0, AARCH64_CC_C
}, /* CC, C == 0. */
3977 {AARCH64_CC_N
, 0}, /* MI, N == 1. */
3978 {0, AARCH64_CC_N
}, /* PL, N == 0. */
3979 {AARCH64_CC_V
, 0}, /* VS, V == 1. */
3980 {0, AARCH64_CC_V
}, /* VC, V == 0. */
3981 {AARCH64_CC_C
, 0}, /* HI, C ==1 && Z == 0. */
3982 {0, AARCH64_CC_C
}, /* LS, !(C == 1 && Z == 0). */
3983 {0, AARCH64_CC_V
}, /* GE, N == V. */
3984 {AARCH64_CC_V
, 0}, /* LT, N != V. */
3985 {0, AARCH64_CC_Z
}, /* GT, Z == 0 && N == V. */
3986 {AARCH64_CC_Z
, 0}, /* LE, !(Z == 0 && N == V). */
3987 {0, 0}, /* AL, Any. */
3988 {0, 0}, /* NV, Any. */
3992 aarch64_ccmp_mode_to_code (enum machine_mode mode
)
4033 aarch64_print_operand (FILE *f
, rtx x
, char code
)
4037 /* An integer or symbol address without a preceding # sign. */
4039 switch (GET_CODE (x
))
4042 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
4046 output_addr_const (f
, x
);
4050 if (GET_CODE (XEXP (x
, 0)) == PLUS
4051 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4053 output_addr_const (f
, x
);
4059 output_operand_lossage ("Unsupported operand for code '%c'", code
);
4064 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4068 if (!CONST_INT_P (x
)
4069 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
4071 output_operand_lossage ("invalid operand for '%%%c'", code
);
4087 output_operand_lossage ("invalid operand for '%%%c'", code
);
4097 /* Print N such that 2^N == X. */
4098 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
4100 output_operand_lossage ("invalid operand for '%%%c'", code
);
4104 asm_fprintf (f
, "%d", n
);
4109 /* Print the number of non-zero bits in X (a const_int). */
4110 if (!CONST_INT_P (x
))
4112 output_operand_lossage ("invalid operand for '%%%c'", code
);
4116 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
4120 /* Print the higher numbered register of a pair (TImode) of regs. */
4121 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
4123 output_operand_lossage ("invalid operand for '%%%c'", code
);
4127 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
4133 /* Print a condition (eq, ne, etc). */
4135 /* CONST_TRUE_RTX means always -- that's the default. */
4136 if (x
== const_true_rtx
)
4139 if (!COMPARISON_P (x
))
4141 output_operand_lossage ("invalid operand for '%%%c'", code
);
4145 cond_code
= aarch64_get_condition_code (x
);
4146 gcc_assert (cond_code
>= 0);
4147 fputs (aarch64_condition_codes
[cond_code
], f
);
4154 /* Print the inverse of a condition (eq <-> ne, etc). */
4156 /* CONST_TRUE_RTX means never -- that's the default. */
4157 if (x
== const_true_rtx
)
4163 if (!COMPARISON_P (x
))
4165 output_operand_lossage ("invalid operand for '%%%c'", code
);
4168 cond_code
= aarch64_get_condition_code (x
);
4169 gcc_assert (cond_code
>= 0);
4170 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
4180 /* Print a scalar FP/SIMD register name. */
4181 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4183 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4186 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
4193 /* Print the first FP/SIMD register name in a list. */
4194 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4196 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4199 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
4203 /* Print a scalar FP/SIMD register name + 1. */
4204 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4206 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4209 asm_fprintf (f
, "q%d", REGNO (x
) - V0_REGNUM
+ 1);
4213 /* Print bottom 16 bits of integer constant in hex. */
4214 if (!CONST_INT_P (x
))
4216 output_operand_lossage ("invalid operand for '%%%c'", code
);
4219 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
4224 /* Print a general register name or the zero register (32-bit or
4227 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
4229 asm_fprintf (f
, "%czr", code
);
4233 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
4235 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
4239 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
4241 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
4248 /* Print a normal operand, if it's a general register, then we
4252 output_operand_lossage ("missing operand");
4256 switch (GET_CODE (x
))
4259 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
4263 aarch64_memory_reference_mode
= GET_MODE (x
);
4264 output_address (XEXP (x
, 0));
4269 output_addr_const (asm_out_file
, x
);
4273 asm_fprintf (f
, "%wd", INTVAL (x
));
4277 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
4280 aarch64_const_vec_all_same_in_range_p (x
,
4282 HOST_WIDE_INT_MAX
));
4283 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
4285 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
4294 /* CONST_DOUBLE can represent a double-width integer.
4295 In this case, the mode of x is VOIDmode. */
4296 if (GET_MODE (x
) == VOIDmode
)
4298 else if (aarch64_float_const_zero_rtx_p (x
))
4303 else if (aarch64_float_const_representable_p (x
))
4306 char float_buf
[buf_size
] = {'\0'};
4308 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4309 real_to_decimal_for_mode (float_buf
, &r
,
4312 asm_fprintf (asm_out_file
, "%s", float_buf
);
4316 output_operand_lossage ("invalid constant");
4319 output_operand_lossage ("invalid operand");
4325 if (GET_CODE (x
) == HIGH
)
4328 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4330 case SYMBOL_SMALL_GOT
:
4331 asm_fprintf (asm_out_file
, ":got:");
4334 case SYMBOL_SMALL_TLSGD
:
4335 asm_fprintf (asm_out_file
, ":tlsgd:");
4338 case SYMBOL_SMALL_TLSDESC
:
4339 asm_fprintf (asm_out_file
, ":tlsdesc:");
4342 case SYMBOL_SMALL_GOTTPREL
:
4343 asm_fprintf (asm_out_file
, ":gottprel:");
4346 case SYMBOL_SMALL_TPREL
:
4347 asm_fprintf (asm_out_file
, ":tprel:");
4350 case SYMBOL_TINY_GOT
:
4357 output_addr_const (asm_out_file
, x
);
4361 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4363 case SYMBOL_SMALL_GOT
:
4364 asm_fprintf (asm_out_file
, ":lo12:");
4367 case SYMBOL_SMALL_TLSGD
:
4368 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
4371 case SYMBOL_SMALL_TLSDESC
:
4372 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
4375 case SYMBOL_SMALL_GOTTPREL
:
4376 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
4379 case SYMBOL_SMALL_TPREL
:
4380 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
4383 case SYMBOL_TINY_GOT
:
4384 asm_fprintf (asm_out_file
, ":got:");
4390 output_addr_const (asm_out_file
, x
);
4395 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4397 case SYMBOL_SMALL_TPREL
:
4398 asm_fprintf (asm_out_file
, ":tprel_hi12:");
4403 output_addr_const (asm_out_file
, x
);
4411 if (!COMPARISON_P (x
))
4413 output_operand_lossage ("invalid operand for '%%%c'", code
);
4417 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4418 gcc_assert (cond_code
>= 0);
4419 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][0]);
4428 if (!COMPARISON_P (x
))
4430 output_operand_lossage ("invalid operand for '%%%c'", code
);
4434 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4435 gcc_assert (cond_code
>= 0);
4436 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][1]);
4441 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
4447 aarch64_print_operand_address (FILE *f
, rtx x
)
4449 struct aarch64_address_info addr
;
4451 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
4455 case ADDRESS_REG_IMM
:
4456 if (addr
.offset
== const0_rtx
)
4457 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
4459 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
4460 INTVAL (addr
.offset
));
4463 case ADDRESS_REG_REG
:
4464 if (addr
.shift
== 0)
4465 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
4466 reg_names
[REGNO (addr
.offset
)]);
4468 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
4469 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
4472 case ADDRESS_REG_UXTW
:
4473 if (addr
.shift
== 0)
4474 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
4475 REGNO (addr
.offset
) - R0_REGNUM
);
4477 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
4478 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4481 case ADDRESS_REG_SXTW
:
4482 if (addr
.shift
== 0)
4483 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
4484 REGNO (addr
.offset
) - R0_REGNUM
);
4486 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
4487 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4490 case ADDRESS_REG_WB
:
4491 switch (GET_CODE (x
))
4494 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
4495 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4498 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4499 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4502 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4503 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4506 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4507 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4510 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4511 INTVAL (addr
.offset
));
4514 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4515 INTVAL (addr
.offset
));
4522 case ADDRESS_LO_SUM
:
4523 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4524 output_addr_const (f
, addr
.offset
);
4525 asm_fprintf (f
, "]");
4528 case ADDRESS_SYMBOLIC
:
4532 output_addr_const (f
, x
);
4536 aarch64_label_mentioned_p (rtx x
)
4541 if (GET_CODE (x
) == LABEL_REF
)
4544 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4545 referencing instruction, but they are constant offsets, not
4547 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4550 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4551 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4557 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4558 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4561 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4568 /* Implement REGNO_REG_CLASS. */
4571 aarch64_regno_regclass (unsigned regno
)
4573 if (GP_REGNUM_P (regno
))
4574 return GENERAL_REGS
;
4576 if (regno
== SP_REGNUM
)
4579 if (regno
== FRAME_POINTER_REGNUM
4580 || regno
== ARG_POINTER_REGNUM
)
4581 return POINTER_REGS
;
4583 if (FP_REGNUM_P (regno
))
4584 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4590 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
4592 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4593 where mask is selected by alignment and size of the offset.
4594 We try to pick as large a range for the offset as possible to
4595 maximize the chance of a CSE. However, for aligned addresses
4596 we limit the range to 4k so that structures with different sized
4597 elements are likely to use the same base. */
4599 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
4601 HOST_WIDE_INT offset
= INTVAL (XEXP (x
, 1));
4602 HOST_WIDE_INT base_offset
;
4604 /* Does it look like we'll need a load/store-pair operation? */
4605 if (GET_MODE_SIZE (mode
) > 16
4607 base_offset
= ((offset
+ 64 * GET_MODE_SIZE (mode
))
4608 & ~((128 * GET_MODE_SIZE (mode
)) - 1));
4609 /* For offsets aren't a multiple of the access size, the limit is
4611 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
4612 base_offset
= (offset
+ 0x100) & ~0x1ff;
4614 base_offset
= offset
& ~0xfff;
4616 if (base_offset
== 0)
4619 offset
-= base_offset
;
4620 rtx base_reg
= gen_reg_rtx (Pmode
);
4621 rtx val
= force_operand (plus_constant (Pmode
, XEXP (x
, 0), base_offset
),
4623 emit_move_insn (base_reg
, val
);
4624 x
= plus_constant (Pmode
, base_reg
, offset
);
4630 /* Try a machine-dependent way of reloading an illegitimate address
4631 operand. If we find one, push the reload and return the new rtx. */
4634 aarch64_legitimize_reload_address (rtx
*x_p
,
4636 int opnum
, int type
,
4637 int ind_levels ATTRIBUTE_UNUSED
)
4641 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4642 if (aarch64_vect_struct_mode_p (mode
)
4643 && GET_CODE (x
) == PLUS
4644 && REG_P (XEXP (x
, 0))
4645 && CONST_INT_P (XEXP (x
, 1)))
4649 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4650 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4651 opnum
, (enum reload_type
) type
);
4655 /* We must recognize output that we have already generated ourselves. */
4656 if (GET_CODE (x
) == PLUS
4657 && GET_CODE (XEXP (x
, 0)) == PLUS
4658 && REG_P (XEXP (XEXP (x
, 0), 0))
4659 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4660 && CONST_INT_P (XEXP (x
, 1)))
4662 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4663 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4664 opnum
, (enum reload_type
) type
);
4668 /* We wish to handle large displacements off a base register by splitting
4669 the addend across an add and the mem insn. This can cut the number of
4670 extra insns needed from 3 to 1. It is only useful for load/store of a
4671 single register with 12 bit offset field. */
4672 if (GET_CODE (x
) == PLUS
4673 && REG_P (XEXP (x
, 0))
4674 && CONST_INT_P (XEXP (x
, 1))
4675 && HARD_REGISTER_P (XEXP (x
, 0))
4678 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4680 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4681 HOST_WIDE_INT low
= val
& 0xfff;
4682 HOST_WIDE_INT high
= val
- low
;
4685 machine_mode xmode
= GET_MODE (x
);
4687 /* In ILP32, xmode can be either DImode or SImode. */
4688 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4690 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4691 BLKmode alignment. */
4692 if (GET_MODE_SIZE (mode
) == 0)
4695 offs
= low
% GET_MODE_SIZE (mode
);
4697 /* Align misaligned offset by adjusting high part to compensate. */
4700 if (aarch64_uimm12_shift (high
+ offs
))
4709 offs
= GET_MODE_SIZE (mode
) - offs
;
4711 high
= high
+ (low
& 0x1000) - offs
;
4716 /* Check for overflow. */
4717 if (high
+ low
!= val
)
4720 cst
= GEN_INT (high
);
4721 if (!aarch64_uimm12_shift (high
))
4722 cst
= force_const_mem (xmode
, cst
);
4724 /* Reload high part into base reg, leaving the low part
4725 in the mem instruction.
4726 Note that replacing this gen_rtx_PLUS with plus_constant is
4727 wrong in this case because we rely on the
4728 (plus (plus reg c1) c2) structure being preserved so that
4729 XEXP (*p, 0) in push_reload below uses the correct term. */
4730 x
= gen_rtx_PLUS (xmode
,
4731 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4734 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4735 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4736 opnum
, (enum reload_type
) type
);
4745 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4748 secondary_reload_info
*sri
)
4750 /* Without the TARGET_SIMD instructions we cannot move a Q register
4751 to a Q register directly. We need a scratch. */
4752 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4753 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4754 && reg_class_subset_p (rclass
, FP_REGS
))
4757 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4758 else if (mode
== TImode
)
4759 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4763 /* A TFmode or TImode memory access should be handled via an FP_REGS
4764 because AArch64 has richer addressing modes for LDR/STR instructions
4765 than LDP/STP instructions. */
4766 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== GENERAL_REGS
4767 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4770 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4771 return GENERAL_REGS
;
4777 aarch64_can_eliminate (const int from
, const int to
)
4779 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4780 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4782 if (frame_pointer_needed
)
4784 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4786 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4788 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4789 && !cfun
->calls_alloca
)
4791 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4798 /* If we decided that we didn't need a leaf frame pointer but then used
4799 LR in the function, then we'll want a frame pointer after all, so
4800 prevent this elimination to ensure a frame pointer is used. */
4801 if (to
== STACK_POINTER_REGNUM
4802 && flag_omit_leaf_frame_pointer
4803 && df_regs_ever_live_p (LR_REGNUM
))
4811 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4813 aarch64_layout_frame ();
4815 if (to
== HARD_FRAME_POINTER_REGNUM
)
4817 if (from
== ARG_POINTER_REGNUM
)
4818 return cfun
->machine
->frame
.frame_size
- crtl
->outgoing_args_size
;
4820 if (from
== FRAME_POINTER_REGNUM
)
4821 return (cfun
->machine
->frame
.hard_fp_offset
4822 - cfun
->machine
->frame
.saved_varargs_size
);
4825 if (to
== STACK_POINTER_REGNUM
)
4827 if (from
== FRAME_POINTER_REGNUM
)
4828 return (cfun
->machine
->frame
.frame_size
4829 - cfun
->machine
->frame
.saved_varargs_size
);
4832 return cfun
->machine
->frame
.frame_size
;
4835 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4839 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4843 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4848 aarch64_asm_trampoline_template (FILE *f
)
4852 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4853 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4857 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4858 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4860 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4861 assemble_aligned_integer (4, const0_rtx
);
4862 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4863 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4867 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4869 rtx fnaddr
, mem
, a_tramp
;
4870 const int tramp_code_sz
= 16;
4872 /* Don't need to copy the trailing D-words, we fill those in below. */
4873 emit_block_move (m_tramp
, assemble_trampoline_template (),
4874 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4875 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4876 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4877 if (GET_MODE (fnaddr
) != ptr_mode
)
4878 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4879 emit_move_insn (mem
, fnaddr
);
4881 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4882 emit_move_insn (mem
, chain_value
);
4884 /* XXX We should really define a "clear_cache" pattern and use
4885 gen_clear_cache(). */
4886 a_tramp
= XEXP (m_tramp
, 0);
4887 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4888 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4889 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4893 static unsigned char
4894 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
4898 case CALLER_SAVE_REGS
:
4905 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4906 (GET_MODE_SIZE (mode
) + 7) / 8;
4920 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4922 if (regclass
== POINTER_REGS
)
4923 return GENERAL_REGS
;
4925 if (regclass
== STACK_REG
)
4928 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4934 /* If it's an integer immediate that MOVI can't handle, then
4935 FP_REGS is not an option, so we return NO_REGS instead. */
4936 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4937 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4940 /* Register eliminiation can result in a request for
4941 SP+constant->FP_REGS. We cannot support such operations which
4942 use SP as source and an FP_REG as destination, so reject out
4944 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4946 rtx lhs
= XEXP (x
, 0);
4948 /* Look through a possible SUBREG introduced by ILP32. */
4949 if (GET_CODE (lhs
) == SUBREG
)
4950 lhs
= SUBREG_REG (lhs
);
4952 gcc_assert (REG_P (lhs
));
4953 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4962 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4964 asm_fprintf (f
, "%U%s", name
);
4968 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4970 if (priority
== DEFAULT_INIT_PRIORITY
)
4971 default_ctor_section_asm_out_constructor (symbol
, priority
);
4976 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4977 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4978 switch_to_section (s
);
4979 assemble_align (POINTER_SIZE
);
4980 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4985 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4987 if (priority
== DEFAULT_INIT_PRIORITY
)
4988 default_dtor_section_asm_out_destructor (symbol
, priority
);
4993 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4994 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4995 switch_to_section (s
);
4996 assemble_align (POINTER_SIZE
);
4997 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5002 aarch64_output_casesi (rtx
*operands
)
5006 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
5008 static const char *const patterns
[4][2] =
5011 "ldrb\t%w3, [%0,%w1,uxtw]",
5012 "add\t%3, %4, %w3, sxtb #2"
5015 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5016 "add\t%3, %4, %w3, sxth #2"
5019 "ldr\t%w3, [%0,%w1,uxtw #2]",
5020 "add\t%3, %4, %w3, sxtw #2"
5022 /* We assume that DImode is only generated when not optimizing and
5023 that we don't really need 64-bit address offsets. That would
5024 imply an object file with 8GB of code in a single function! */
5026 "ldr\t%w3, [%0,%w1,uxtw #2]",
5027 "add\t%3, %4, %w3, sxtw #2"
5031 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
5033 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
5035 gcc_assert (index
>= 0 && index
<= 3);
5037 /* Need to implement table size reduction, by chaning the code below. */
5038 output_asm_insn (patterns
[index
][0], operands
);
5039 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
5040 snprintf (buf
, sizeof (buf
),
5041 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
5042 output_asm_insn (buf
, operands
);
5043 output_asm_insn (patterns
[index
][1], operands
);
5044 output_asm_insn ("br\t%3", operands
);
5045 assemble_label (asm_out_file
, label
);
5050 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5051 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5055 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
5057 if (shift
>= 0 && shift
<= 3)
5060 for (size
= 8; size
<= 32; size
*= 2)
5062 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
5063 if (mask
== bits
<< shift
)
5071 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED
,
5072 const_rtx x ATTRIBUTE_UNUSED
)
5074 /* We can't use blocks for constants when we're using a per-function
5080 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED
,
5081 rtx x ATTRIBUTE_UNUSED
,
5082 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
5084 /* Force all constant pool entries into the current function section. */
5085 return function_section (current_function_decl
);
5091 /* Helper function for rtx cost calculation. Strip a shift expression
5092 from X. Returns the inner operand if successful, or the original
5093 expression on failure. */
5095 aarch64_strip_shift (rtx x
)
5099 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5100 we can convert both to ROR during final output. */
5101 if ((GET_CODE (op
) == ASHIFT
5102 || GET_CODE (op
) == ASHIFTRT
5103 || GET_CODE (op
) == LSHIFTRT
5104 || GET_CODE (op
) == ROTATERT
5105 || GET_CODE (op
) == ROTATE
)
5106 && CONST_INT_P (XEXP (op
, 1)))
5107 return XEXP (op
, 0);
5109 if (GET_CODE (op
) == MULT
5110 && CONST_INT_P (XEXP (op
, 1))
5111 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
5112 return XEXP (op
, 0);
5117 /* Helper function for rtx cost calculation. Strip an extend
5118 expression from X. Returns the inner operand if successful, or the
5119 original expression on failure. We deal with a number of possible
5120 canonicalization variations here. */
5122 aarch64_strip_extend (rtx x
)
5126 /* Zero and sign extraction of a widened value. */
5127 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
5128 && XEXP (op
, 2) == const0_rtx
5129 && GET_CODE (XEXP (op
, 0)) == MULT
5130 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
5132 return XEXP (XEXP (op
, 0), 0);
5134 /* It can also be represented (for zero-extend) as an AND with an
5136 if (GET_CODE (op
) == AND
5137 && GET_CODE (XEXP (op
, 0)) == MULT
5138 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
5139 && CONST_INT_P (XEXP (op
, 1))
5140 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
5141 INTVAL (XEXP (op
, 1))) != 0)
5142 return XEXP (XEXP (op
, 0), 0);
5144 /* Now handle extended register, as this may also have an optional
5145 left shift by 1..4. */
5146 if (GET_CODE (op
) == ASHIFT
5147 && CONST_INT_P (XEXP (op
, 1))
5148 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
5151 if (GET_CODE (op
) == ZERO_EXTEND
5152 || GET_CODE (op
) == SIGN_EXTEND
)
5161 /* Return true iff CODE is a shift supported in combination
5162 with arithmetic instructions. */
5165 aarch64_shift_p (enum rtx_code code
)
5167 return code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
;
5170 /* Helper function for rtx cost calculation. Calculate the cost of
5171 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5172 Return the calculated cost of the expression, recursing manually in to
5173 operands where needed. */
5176 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
5179 const struct cpu_cost_table
*extra_cost
5180 = aarch64_tune_params
->insn_extra_cost
;
5182 bool compound_p
= (outer
== PLUS
|| outer
== MINUS
);
5183 machine_mode mode
= GET_MODE (x
);
5185 gcc_checking_assert (code
== MULT
);
5190 if (VECTOR_MODE_P (mode
))
5191 mode
= GET_MODE_INNER (mode
);
5193 /* Integer multiply/fma. */
5194 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5196 /* The multiply will be canonicalized as a shift, cost it as such. */
5197 if (aarch64_shift_p (GET_CODE (x
))
5198 || (CONST_INT_P (op1
)
5199 && exact_log2 (INTVAL (op1
)) > 0))
5201 bool is_extend
= GET_CODE (op0
) == ZERO_EXTEND
5202 || GET_CODE (op0
) == SIGN_EXTEND
;
5208 /* ARITH + shift-by-register. */
5209 cost
+= extra_cost
->alu
.arith_shift_reg
;
5211 /* ARITH + extended register. We don't have a cost field
5212 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5213 cost
+= extra_cost
->alu
.extend_arith
;
5215 /* ARITH + shift-by-immediate. */
5216 cost
+= extra_cost
->alu
.arith_shift
;
5219 /* LSL (immediate). */
5220 cost
+= extra_cost
->alu
.shift
;
5223 /* Strip extends as we will have costed them in the case above. */
5225 op0
= aarch64_strip_extend (op0
);
5227 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
5232 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5233 compound and let the below cases handle it. After all, MNEG is a
5234 special-case alias of MSUB. */
5235 if (GET_CODE (op0
) == NEG
)
5237 op0
= XEXP (op0
, 0);
5241 /* Integer multiplies or FMAs have zero/sign extending variants. */
5242 if ((GET_CODE (op0
) == ZERO_EXTEND
5243 && GET_CODE (op1
) == ZERO_EXTEND
)
5244 || (GET_CODE (op0
) == SIGN_EXTEND
5245 && GET_CODE (op1
) == SIGN_EXTEND
))
5247 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
5248 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
5253 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
5254 cost
+= extra_cost
->mult
[0].extend_add
;
5256 /* MUL/SMULL/UMULL. */
5257 cost
+= extra_cost
->mult
[0].extend
;
5263 /* This is either an integer multiply or a MADD. In both cases
5264 we want to recurse and cost the operands. */
5265 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5266 + rtx_cost (op1
, MULT
, 1, speed
);
5272 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
5275 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
5284 /* Floating-point FMA/FMUL can also support negations of the
5286 if (GET_CODE (op0
) == NEG
)
5287 op0
= XEXP (op0
, 0);
5288 if (GET_CODE (op1
) == NEG
)
5289 op1
= XEXP (op1
, 0);
5292 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5293 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5296 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
5299 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5300 + rtx_cost (op1
, MULT
, 1, speed
);
5306 aarch64_address_cost (rtx x
,
5308 addr_space_t as ATTRIBUTE_UNUSED
,
5311 enum rtx_code c
= GET_CODE (x
);
5312 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
5313 struct aarch64_address_info info
;
5317 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
5319 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
5321 /* This is a CONST or SYMBOL ref which will be split
5322 in a different way depending on the code model in use.
5323 Cost it through the generic infrastructure. */
5324 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
5325 /* Divide through by the cost of one instruction to
5326 bring it to the same units as the address costs. */
5327 cost_symbol_ref
/= COSTS_N_INSNS (1);
5328 /* The cost is then the cost of preparing the address,
5329 followed by an immediate (possibly 0) offset. */
5330 return cost_symbol_ref
+ addr_cost
->imm_offset
;
5334 /* This is most likely a jump table from a case
5336 return addr_cost
->register_offset
;
5342 case ADDRESS_LO_SUM
:
5343 case ADDRESS_SYMBOLIC
:
5344 case ADDRESS_REG_IMM
:
5345 cost
+= addr_cost
->imm_offset
;
5348 case ADDRESS_REG_WB
:
5349 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
5350 cost
+= addr_cost
->pre_modify
;
5351 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
5352 cost
+= addr_cost
->post_modify
;
5358 case ADDRESS_REG_REG
:
5359 cost
+= addr_cost
->register_offset
;
5362 case ADDRESS_REG_UXTW
:
5363 case ADDRESS_REG_SXTW
:
5364 cost
+= addr_cost
->register_extend
;
5374 /* For the sake of calculating the cost of the shifted register
5375 component, we can treat same sized modes in the same way. */
5376 switch (GET_MODE_BITSIZE (mode
))
5379 cost
+= addr_cost
->addr_scale_costs
.hi
;
5383 cost
+= addr_cost
->addr_scale_costs
.si
;
5387 cost
+= addr_cost
->addr_scale_costs
.di
;
5390 /* We can't tell, or this is a 128-bit vector. */
5392 cost
+= addr_cost
->addr_scale_costs
.ti
;
5400 /* Return true if the RTX X in mode MODE is a zero or sign extract
5401 usable in an ADD or SUB (extended register) instruction. */
5403 aarch64_rtx_arith_op_extract_p (rtx x
, machine_mode mode
)
5405 /* Catch add with a sign extract.
5406 This is add_<optab><mode>_multp2. */
5407 if (GET_CODE (x
) == SIGN_EXTRACT
5408 || GET_CODE (x
) == ZERO_EXTRACT
)
5410 rtx op0
= XEXP (x
, 0);
5411 rtx op1
= XEXP (x
, 1);
5412 rtx op2
= XEXP (x
, 2);
5414 if (GET_CODE (op0
) == MULT
5415 && CONST_INT_P (op1
)
5416 && op2
== const0_rtx
5417 && CONST_INT_P (XEXP (op0
, 1))
5418 && aarch64_is_extend_from_extract (mode
,
5430 aarch64_frint_unspec_p (unsigned int u
)
5448 /* Return true iff X is an rtx that will match an extr instruction
5449 i.e. as described in the *extr<mode>5_insn family of patterns.
5450 OP0 and OP1 will be set to the operands of the shifts involved
5451 on success and will be NULL_RTX otherwise. */
5454 aarch64_extr_rtx_p (rtx x
, rtx
*res_op0
, rtx
*res_op1
)
5457 machine_mode mode
= GET_MODE (x
);
5459 *res_op0
= NULL_RTX
;
5460 *res_op1
= NULL_RTX
;
5462 if (GET_CODE (x
) != IOR
)
5468 if ((GET_CODE (op0
) == ASHIFT
&& GET_CODE (op1
) == LSHIFTRT
)
5469 || (GET_CODE (op1
) == ASHIFT
&& GET_CODE (op0
) == LSHIFTRT
))
5471 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5472 if (GET_CODE (op1
) == ASHIFT
)
5473 std::swap (op0
, op1
);
5475 if (!CONST_INT_P (XEXP (op0
, 1)) || !CONST_INT_P (XEXP (op1
, 1)))
5478 unsigned HOST_WIDE_INT shft_amnt_0
= UINTVAL (XEXP (op0
, 1));
5479 unsigned HOST_WIDE_INT shft_amnt_1
= UINTVAL (XEXP (op1
, 1));
5481 if (shft_amnt_0
< GET_MODE_BITSIZE (mode
)
5482 && shft_amnt_0
+ shft_amnt_1
== GET_MODE_BITSIZE (mode
))
5484 *res_op0
= XEXP (op0
, 0);
5485 *res_op1
= XEXP (op1
, 0);
5493 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5494 storing it in *COST. Result is true if the total cost of the operation
5495 has now been calculated. */
5497 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
5501 enum rtx_code cmpcode
;
5503 if (COMPARISON_P (op0
))
5505 inner
= XEXP (op0
, 0);
5506 comparator
= XEXP (op0
, 1);
5507 cmpcode
= GET_CODE (op0
);
5512 comparator
= const0_rtx
;
5516 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
5518 /* Conditional branch. */
5519 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5523 if (cmpcode
== NE
|| cmpcode
== EQ
)
5525 if (comparator
== const0_rtx
)
5527 /* TBZ/TBNZ/CBZ/CBNZ. */
5528 if (GET_CODE (inner
) == ZERO_EXTRACT
)
5530 *cost
+= rtx_cost (XEXP (inner
, 0), ZERO_EXTRACT
,
5534 *cost
+= rtx_cost (inner
, cmpcode
, 0, speed
);
5539 else if (cmpcode
== LT
|| cmpcode
== GE
)
5542 if (comparator
== const0_rtx
)
5547 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5549 /* It's a conditional operation based on the status flags,
5550 so it must be some flavor of CSEL. */
5552 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5553 if (GET_CODE (op1
) == NEG
5554 || GET_CODE (op1
) == NOT
5555 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
5556 op1
= XEXP (op1
, 0);
5558 *cost
+= rtx_cost (op1
, IF_THEN_ELSE
, 1, speed
);
5559 *cost
+= rtx_cost (op2
, IF_THEN_ELSE
, 2, speed
);
5563 /* We don't know what this is, cost all operands. */
5567 /* Calculate the cost of calculating X, storing it in *COST. Result
5568 is true if the total cost of the operation has now been calculated. */
5570 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
5571 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
5574 const struct cpu_cost_table
*extra_cost
5575 = aarch64_tune_params
->insn_extra_cost
;
5576 machine_mode mode
= GET_MODE (x
);
5578 /* By default, assume that everything has equivalent cost to the
5579 cheapest instruction. Any additional costs are applied as a delta
5580 above this default. */
5581 *cost
= COSTS_N_INSNS (1);
5583 /* TODO: The cost infrastructure currently does not handle
5584 vector operations. Assume that all vector operations
5585 are equally expensive. */
5586 if (VECTOR_MODE_P (mode
))
5589 *cost
+= extra_cost
->vect
.alu
;
5596 /* The cost depends entirely on the operands to SET. */
5601 switch (GET_CODE (op0
))
5606 rtx address
= XEXP (op0
, 0);
5607 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5608 *cost
+= extra_cost
->ldst
.store
;
5609 else if (mode
== SFmode
)
5610 *cost
+= extra_cost
->ldst
.storef
;
5611 else if (mode
== DFmode
)
5612 *cost
+= extra_cost
->ldst
.stored
;
5615 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5619 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5623 if (! REG_P (SUBREG_REG (op0
)))
5624 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
5628 /* const0_rtx is in general free, but we will use an
5629 instruction to set a register to 0. */
5630 if (REG_P (op1
) || op1
== const0_rtx
)
5632 /* The cost is 1 per register copied. */
5633 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5635 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5638 /* Cost is just the cost of the RHS of the set. */
5639 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5644 /* Bit-field insertion. Strip any redundant widening of
5645 the RHS to meet the width of the target. */
5646 if (GET_CODE (op1
) == SUBREG
)
5647 op1
= SUBREG_REG (op1
);
5648 if ((GET_CODE (op1
) == ZERO_EXTEND
5649 || GET_CODE (op1
) == SIGN_EXTEND
)
5650 && CONST_INT_P (XEXP (op0
, 1))
5651 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5652 >= INTVAL (XEXP (op0
, 1))))
5653 op1
= XEXP (op1
, 0);
5655 if (CONST_INT_P (op1
))
5657 /* MOV immediate is assumed to always be cheap. */
5658 *cost
= COSTS_N_INSNS (1);
5664 *cost
+= extra_cost
->alu
.bfi
;
5665 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5671 /* We can't make sense of this, assume default cost. */
5672 *cost
= COSTS_N_INSNS (1);
5678 /* If an instruction can incorporate a constant within the
5679 instruction, the instruction's expression avoids calling
5680 rtx_cost() on the constant. If rtx_cost() is called on a
5681 constant, then it is usually because the constant must be
5682 moved into a register by one or more instructions.
5684 The exception is constant 0, which can be expressed
5685 as XZR/WZR and is therefore free. The exception to this is
5686 if we have (set (reg) (const0_rtx)) in which case we must cost
5687 the move. However, we can catch that when we cost the SET, so
5688 we don't need to consider that here. */
5689 if (x
== const0_rtx
)
5693 /* To an approximation, building any other constant is
5694 proportionally expensive to the number of instructions
5695 required to build that constant. This is true whether we
5696 are compiling for SPEED or otherwise. */
5697 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
5698 (NULL_RTX
, x
, false, mode
));
5705 /* mov[df,sf]_aarch64. */
5706 if (aarch64_float_const_representable_p (x
))
5707 /* FMOV (scalar immediate). */
5708 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5709 else if (!aarch64_float_const_zero_rtx_p (x
))
5711 /* This will be a load from memory. */
5713 *cost
+= extra_cost
->ldst
.loadd
;
5715 *cost
+= extra_cost
->ldst
.loadf
;
5718 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5719 or MOV v0.s[0], wzr - neither of which are modeled by the
5720 cost tables. Just use the default cost. */
5730 /* For loads we want the base cost of a load, plus an
5731 approximation for the additional cost of the addressing
5733 rtx address
= XEXP (x
, 0);
5734 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5735 *cost
+= extra_cost
->ldst
.load
;
5736 else if (mode
== SFmode
)
5737 *cost
+= extra_cost
->ldst
.loadf
;
5738 else if (mode
== DFmode
)
5739 *cost
+= extra_cost
->ldst
.loadd
;
5742 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5751 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5753 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5754 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5757 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5761 /* Cost this as SUB wzr, X. */
5762 op0
= CONST0_RTX (GET_MODE (x
));
5767 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5769 /* Support (neg(fma...)) as a single instruction only if
5770 sign of zeros is unimportant. This matches the decision
5771 making in aarch64.md. */
5772 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5775 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5780 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5789 *cost
+= extra_cost
->alu
.clz
;
5797 if (op1
== const0_rtx
5798 && GET_CODE (op0
) == AND
)
5804 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
5806 /* TODO: A write to the CC flags possibly costs extra, this
5807 needs encoding in the cost tables. */
5809 /* CC_ZESWPmode supports zero extend for free. */
5810 if (GET_MODE (x
) == CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
5811 op0
= XEXP (op0
, 0);
5814 if (GET_CODE (op0
) == AND
)
5820 if (GET_CODE (op0
) == PLUS
)
5822 /* ADDS (and CMN alias). */
5827 if (GET_CODE (op0
) == MINUS
)
5834 if (GET_CODE (op1
) == NEG
)
5838 *cost
+= extra_cost
->alu
.arith
;
5840 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5841 *cost
+= rtx_cost (XEXP (op1
, 0), NEG
, 1, speed
);
5847 Compare can freely swap the order of operands, and
5848 canonicalization puts the more complex operation first.
5849 But the integer MINUS logic expects the shift/extend
5850 operation in op1. */
5852 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5860 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5864 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
5866 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
5868 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5869 /* FCMP supports constant 0.0 for no extra cost. */
5883 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5885 /* Detect valid immediates. */
5886 if ((GET_MODE_CLASS (mode
) == MODE_INT
5887 || (GET_MODE_CLASS (mode
) == MODE_CC
5888 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5889 && CONST_INT_P (op1
)
5890 && aarch64_uimm12_shift (INTVAL (op1
)))
5893 /* SUB(S) (immediate). */
5894 *cost
+= extra_cost
->alu
.arith
;
5898 /* Look for SUB (extended register). */
5899 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
5902 *cost
+= extra_cost
->alu
.extend_arith
;
5904 *cost
+= rtx_cost (XEXP (XEXP (op1
, 0), 0),
5905 (enum rtx_code
) GET_CODE (op1
),
5910 rtx new_op1
= aarch64_strip_extend (op1
);
5912 /* Cost this as an FMA-alike operation. */
5913 if ((GET_CODE (new_op1
) == MULT
5914 || aarch64_shift_p (GET_CODE (new_op1
)))
5917 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5918 (enum rtx_code
) code
,
5923 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
5927 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5929 *cost
+= extra_cost
->alu
.arith
;
5930 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5932 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5945 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5946 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5949 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
5950 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5954 if (GET_MODE_CLASS (mode
) == MODE_INT
5955 && CONST_INT_P (op1
)
5956 && aarch64_uimm12_shift (INTVAL (op1
)))
5958 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
5961 /* ADD (immediate). */
5962 *cost
+= extra_cost
->alu
.arith
;
5966 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5968 /* Look for ADD (extended register). */
5969 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
5972 *cost
+= extra_cost
->alu
.extend_arith
;
5974 *cost
+= rtx_cost (XEXP (XEXP (op0
, 0), 0),
5975 (enum rtx_code
) GET_CODE (op0
),
5980 /* Strip any extend, leave shifts behind as we will
5981 cost them through mult_cost. */
5982 new_op0
= aarch64_strip_extend (op0
);
5984 if (GET_CODE (new_op0
) == MULT
5985 || aarch64_shift_p (GET_CODE (new_op0
)))
5987 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
5992 *cost
+= rtx_cost (new_op0
, PLUS
, 0, speed
);
5996 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5998 *cost
+= extra_cost
->alu
.arith
;
5999 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6001 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6007 *cost
= COSTS_N_INSNS (1);
6010 *cost
+= extra_cost
->alu
.rev
;
6015 if (aarch_rev16_p (x
))
6017 *cost
= COSTS_N_INSNS (1);
6020 *cost
+= extra_cost
->alu
.rev
;
6025 if (aarch64_extr_rtx_p (x
, &op0
, &op1
))
6027 *cost
+= rtx_cost (op0
, IOR
, 0, speed
)
6028 + rtx_cost (op1
, IOR
, 1, speed
);
6030 *cost
+= extra_cost
->alu
.shift
;
6042 && GET_CODE (op0
) == MULT
6043 && CONST_INT_P (XEXP (op0
, 1))
6044 && CONST_INT_P (op1
)
6045 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
6048 /* This is a UBFM/SBFM. */
6049 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
6051 *cost
+= extra_cost
->alu
.bfx
;
6055 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6057 /* We possibly get the immediate for free, this is not
6059 if (CONST_INT_P (op1
)
6060 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
6062 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6065 *cost
+= extra_cost
->alu
.logical
;
6073 /* Handle ORN, EON, or BIC. */
6074 if (GET_CODE (op0
) == NOT
)
6075 op0
= XEXP (op0
, 0);
6077 new_op0
= aarch64_strip_shift (op0
);
6079 /* If we had a shift on op0 then this is a logical-shift-
6080 by-register/immediate operation. Otherwise, this is just
6081 a logical operation. */
6086 /* Shift by immediate. */
6087 if (CONST_INT_P (XEXP (op0
, 1)))
6088 *cost
+= extra_cost
->alu
.log_shift
;
6090 *cost
+= extra_cost
->alu
.log_shift_reg
;
6093 *cost
+= extra_cost
->alu
.logical
;
6096 /* In both cases we want to cost both operands. */
6097 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
6098 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
6107 op0
= aarch64_strip_shift (x
);
6109 /* MVN-shifted-reg. */
6112 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6115 *cost
+= extra_cost
->alu
.log_shift
;
6119 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6120 Handle the second form here taking care that 'a' in the above can
6122 else if (GET_CODE (op0
) == XOR
)
6124 rtx newop0
= XEXP (op0
, 0);
6125 rtx newop1
= XEXP (op0
, 1);
6126 rtx op0_stripped
= aarch64_strip_shift (newop0
);
6128 *cost
+= rtx_cost (newop1
, (enum rtx_code
) code
, 1, speed
)
6129 + rtx_cost (op0_stripped
, XOR
, 0, speed
);
6133 if (op0_stripped
!= newop0
)
6134 *cost
+= extra_cost
->alu
.log_shift
;
6136 *cost
+= extra_cost
->alu
.logical
;
6143 *cost
+= extra_cost
->alu
.logical
;
6150 /* If a value is written in SI mode, then zero extended to DI
6151 mode, the operation will in general be free as a write to
6152 a 'w' register implicitly zeroes the upper bits of an 'x'
6153 register. However, if this is
6155 (set (reg) (zero_extend (reg)))
6157 we must cost the explicit register move. */
6159 && GET_MODE (op0
) == SImode
6162 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
6164 if (!op_cost
&& speed
)
6166 *cost
+= extra_cost
->alu
.extend
;
6168 /* Free, the cost is that of the SI mode operation. */
6173 else if (MEM_P (XEXP (x
, 0)))
6175 /* All loads can zero extend to any size for free. */
6176 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
6182 *cost
+= extra_cost
->alu
.extend
;
6187 if (MEM_P (XEXP (x
, 0)))
6192 rtx address
= XEXP (XEXP (x
, 0), 0);
6193 *cost
+= extra_cost
->ldst
.load_sign_extend
;
6196 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6203 *cost
+= extra_cost
->alu
.extend
;
6210 if (CONST_INT_P (op1
))
6212 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6215 *cost
+= extra_cost
->alu
.shift
;
6217 /* We can incorporate zero/sign extend for free. */
6218 if (GET_CODE (op0
) == ZERO_EXTEND
6219 || GET_CODE (op0
) == SIGN_EXTEND
)
6220 op0
= XEXP (op0
, 0);
6222 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
6229 *cost
+= extra_cost
->alu
.shift_reg
;
6231 return false; /* All arguments need to be in registers. */
6241 if (CONST_INT_P (op1
))
6243 /* ASR (immediate) and friends. */
6245 *cost
+= extra_cost
->alu
.shift
;
6247 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6253 /* ASR (register) and friends. */
6255 *cost
+= extra_cost
->alu
.shift_reg
;
6257 return false; /* All arguments need to be in registers. */
6262 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6266 *cost
+= extra_cost
->ldst
.load
;
6268 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
6269 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
6271 /* ADRP, followed by ADD. */
6272 *cost
+= COSTS_N_INSNS (1);
6274 *cost
+= 2 * extra_cost
->alu
.arith
;
6276 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
6277 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
6281 *cost
+= extra_cost
->alu
.arith
;
6286 /* One extra load instruction, after accessing the GOT. */
6287 *cost
+= COSTS_N_INSNS (1);
6289 *cost
+= extra_cost
->ldst
.load
;
6295 /* ADRP/ADD (immediate). */
6297 *cost
+= extra_cost
->alu
.arith
;
6304 *cost
+= extra_cost
->alu
.bfx
;
6306 /* We can trust that the immediates used will be correct (there
6307 are no by-register forms), so we need only cost op0. */
6308 *cost
+= rtx_cost (XEXP (x
, 0), (enum rtx_code
) code
, 0, speed
);
6312 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
6313 /* aarch64_rtx_mult_cost always handles recursion to its
6321 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6322 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
6323 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
6324 else if (GET_MODE (x
) == DFmode
)
6325 *cost
+= (extra_cost
->fp
[1].mult
6326 + extra_cost
->fp
[1].div
);
6327 else if (GET_MODE (x
) == SFmode
)
6328 *cost
+= (extra_cost
->fp
[0].mult
6329 + extra_cost
->fp
[0].div
);
6331 return false; /* All arguments need to be in registers. */
6338 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6339 /* There is no integer SQRT, so only DIV and UDIV can get
6341 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
6343 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
6345 return false; /* All arguments need to be in registers. */
6348 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
6349 XEXP (x
, 2), cost
, speed
);
6362 return false; /* All arguments must be in registers. */
6370 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6372 /* FMSUB, FNMADD, and FNMSUB are free. */
6373 if (GET_CODE (op0
) == NEG
)
6374 op0
= XEXP (op0
, 0);
6376 if (GET_CODE (op2
) == NEG
)
6377 op2
= XEXP (op2
, 0);
6379 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6380 and the by-element operand as operand 0. */
6381 if (GET_CODE (op1
) == NEG
)
6382 op1
= XEXP (op1
, 0);
6384 /* Catch vector-by-element operations. The by-element operand can
6385 either be (vec_duplicate (vec_select (x))) or just
6386 (vec_select (x)), depending on whether we are multiplying by
6387 a vector or a scalar.
6389 Canonicalization is not very good in these cases, FMA4 will put the
6390 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6391 if (GET_CODE (op0
) == VEC_DUPLICATE
)
6392 op0
= XEXP (op0
, 0);
6393 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
6394 op1
= XEXP (op1
, 0);
6396 if (GET_CODE (op0
) == VEC_SELECT
)
6397 op0
= XEXP (op0
, 0);
6398 else if (GET_CODE (op1
) == VEC_SELECT
)
6399 op1
= XEXP (op1
, 0);
6401 /* If the remaining parameters are not registers,
6402 get the cost to put them into registers. */
6403 *cost
+= rtx_cost (op0
, FMA
, 0, speed
);
6404 *cost
+= rtx_cost (op1
, FMA
, 1, speed
);
6405 *cost
+= rtx_cost (op2
, FMA
, 2, speed
);
6410 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
6413 case FLOAT_TRUNCATE
:
6415 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
6421 /* Strip the rounding part. They will all be implemented
6422 by the fcvt* family of instructions anyway. */
6423 if (GET_CODE (x
) == UNSPEC
)
6425 unsigned int uns_code
= XINT (x
, 1);
6427 if (uns_code
== UNSPEC_FRINTA
6428 || uns_code
== UNSPEC_FRINTM
6429 || uns_code
== UNSPEC_FRINTN
6430 || uns_code
== UNSPEC_FRINTP
6431 || uns_code
== UNSPEC_FRINTZ
)
6432 x
= XVECEXP (x
, 0, 0);
6436 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
6438 *cost
+= rtx_cost (x
, (enum rtx_code
) code
, 0, speed
);
6442 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6446 /* FABD, which is analogous to FADD. */
6447 if (GET_CODE (op0
) == MINUS
)
6449 *cost
+= rtx_cost (XEXP (op0
, 0), MINUS
, 0, speed
);
6450 + rtx_cost (XEXP (op0
, 1), MINUS
, 1, speed
);
6452 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6456 /* Simple FABS is analogous to FNEG. */
6458 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6462 /* Integer ABS will either be split to
6463 two arithmetic instructions, or will be an ABS
6464 (scalar), which we don't model. */
6465 *cost
= COSTS_N_INSNS (2);
6467 *cost
+= 2 * extra_cost
->alu
.arith
;
6475 /* FMAXNM/FMINNM/FMAX/FMIN.
6476 TODO: This may not be accurate for all implementations, but
6477 we do not model this in the cost tables. */
6478 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6483 /* The floating point round to integer frint* instructions. */
6484 if (aarch64_frint_unspec_p (XINT (x
, 1)))
6487 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
6492 if (XINT (x
, 1) == UNSPEC_RBIT
)
6495 *cost
+= extra_cost
->alu
.rev
;
6503 /* Decompose <su>muldi3_highpart. */
6504 if (/* (truncate:DI */
6507 && GET_MODE (XEXP (x
, 0)) == TImode
6508 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6510 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6511 /* (ANY_EXTEND:TI (reg:DI))
6512 (ANY_EXTEND:TI (reg:DI))) */
6513 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6514 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
6515 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
6516 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
6517 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
6518 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
6519 /* (const_int 64) */
6520 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
6521 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
6525 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
6526 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
6528 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
6538 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6540 "\nFailed to cost RTX. Assuming default cost.\n");
6545 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6546 calculated for X. This cost is stored in *COST. Returns true
6547 if the total cost of X was calculated. */
6549 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
6550 int param
, int *cost
, bool speed
)
6552 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
6554 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6556 print_rtl_single (dump_file
, x
);
6557 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
6558 speed
? "Hot" : "Cold",
6559 *cost
, result
? "final" : "partial");
6566 aarch64_register_move_cost (machine_mode mode
,
6567 reg_class_t from_i
, reg_class_t to_i
)
6569 enum reg_class from
= (enum reg_class
) from_i
;
6570 enum reg_class to
= (enum reg_class
) to_i
;
6571 const struct cpu_regmove_cost
*regmove_cost
6572 = aarch64_tune_params
->regmove_cost
;
6574 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6575 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
6578 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
6579 from
= GENERAL_REGS
;
6581 /* Moving between GPR and stack cost is the same as GP2GP. */
6582 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
6583 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
6584 return regmove_cost
->GP2GP
;
6586 /* To/From the stack register, we move via the gprs. */
6587 if (to
== STACK_REG
|| from
== STACK_REG
)
6588 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
6589 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
6591 if (GET_MODE_SIZE (mode
) == 16)
6593 /* 128-bit operations on general registers require 2 instructions. */
6594 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6595 return regmove_cost
->GP2GP
* 2;
6596 else if (from
== GENERAL_REGS
)
6597 return regmove_cost
->GP2FP
* 2;
6598 else if (to
== GENERAL_REGS
)
6599 return regmove_cost
->FP2GP
* 2;
6601 /* When AdvSIMD instructions are disabled it is not possible to move
6602 a 128-bit value directly between Q registers. This is handled in
6603 secondary reload. A general register is used as a scratch to move
6604 the upper DI value and the lower DI value is moved directly,
6605 hence the cost is the sum of three moves. */
6607 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
6609 return regmove_cost
->FP2FP
;
6612 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6613 return regmove_cost
->GP2GP
;
6614 else if (from
== GENERAL_REGS
)
6615 return regmove_cost
->GP2FP
;
6616 else if (to
== GENERAL_REGS
)
6617 return regmove_cost
->FP2GP
;
6619 return regmove_cost
->FP2FP
;
6623 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
6624 reg_class_t rclass ATTRIBUTE_UNUSED
,
6625 bool in ATTRIBUTE_UNUSED
)
6627 return aarch64_tune_params
->memmov_cost
;
6630 /* Return the number of instructions that can be issued per cycle. */
6632 aarch64_sched_issue_rate (void)
6634 return aarch64_tune_params
->issue_rate
;
6638 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6640 int issue_rate
= aarch64_sched_issue_rate ();
6642 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
6645 /* Vectorizer cost model target hooks. */
6647 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6649 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6651 int misalign ATTRIBUTE_UNUSED
)
6655 switch (type_of_cost
)
6658 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
6661 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
6664 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
6667 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6670 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
6673 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
6676 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
6679 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
6681 case unaligned_load
:
6682 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
6684 case unaligned_store
:
6685 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
6687 case cond_branch_taken
:
6688 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
6690 case cond_branch_not_taken
:
6691 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
6694 case vec_promote_demote
:
6695 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6698 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6699 return elements
/ 2 + 1;
6706 /* Implement targetm.vectorize.add_stmt_cost. */
6708 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6709 struct _stmt_vec_info
*stmt_info
, int misalign
,
6710 enum vect_cost_model_location where
)
6712 unsigned *cost
= (unsigned *) data
;
6713 unsigned retval
= 0;
6715 if (flag_vect_cost_model
)
6717 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6719 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
6721 /* Statements in an inner loop relative to the loop being
6722 vectorized are weighted more heavily. The value here is
6723 a function (linear for now) of the loop nest level. */
6724 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6726 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6727 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
6728 unsigned nest_level
= loop_depth (loop
);
6730 count
*= nest_level
;
6733 retval
= (unsigned) (count
* stmt_cost
);
6734 cost
[where
] += retval
;
6740 static void initialize_aarch64_code_model (void);
6742 /* Parse the architecture extension string. */
6745 aarch64_parse_extension (char *str
)
6747 /* The extension string is parsed left to right. */
6748 const struct aarch64_option_extension
*opt
= NULL
;
6750 /* Flag to say whether we are adding or removing an extension. */
6751 int adding_ext
= -1;
6753 while (str
!= NULL
&& *str
!= 0)
6759 ext
= strchr (str
, '+');
6766 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
6777 error ("missing feature modifier after %qs", adding_ext
? "+"
6782 /* Scan over the extensions table trying to find an exact match. */
6783 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
6785 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
6787 /* Add or remove the extension. */
6789 aarch64_isa_flags
|= opt
->flags_on
;
6791 aarch64_isa_flags
&= ~(opt
->flags_off
);
6796 if (opt
->name
== NULL
)
6798 /* Extension not found in list. */
6799 error ("unknown feature modifier %qs", str
);
6809 /* Parse the ARCH string. */
6812 aarch64_parse_arch (void)
6815 const struct processor
*arch
;
6816 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
6819 strcpy (str
, aarch64_arch_string
);
6821 ext
= strchr (str
, '+');
6830 error ("missing arch name in -march=%qs", str
);
6834 /* Loop through the list of supported ARCHs to find a match. */
6835 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
6837 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
6839 selected_arch
= arch
;
6840 aarch64_isa_flags
= selected_arch
->flags
;
6843 selected_cpu
= &all_cores
[selected_arch
->core
];
6847 /* ARCH string contains at least one extension. */
6848 aarch64_parse_extension (ext
);
6851 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
6853 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6854 selected_cpu
->name
, selected_arch
->name
);
6861 /* ARCH name not found in list. */
6862 error ("unknown value %qs for -march", str
);
6866 /* Parse the CPU string. */
6869 aarch64_parse_cpu (void)
6872 const struct processor
*cpu
;
6873 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
6876 strcpy (str
, aarch64_cpu_string
);
6878 ext
= strchr (str
, '+');
6887 error ("missing cpu name in -mcpu=%qs", str
);
6891 /* Loop through the list of supported CPUs to find a match. */
6892 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6894 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
6897 aarch64_isa_flags
= selected_cpu
->flags
;
6901 /* CPU string contains at least one extension. */
6902 aarch64_parse_extension (ext
);
6909 /* CPU name not found in list. */
6910 error ("unknown value %qs for -mcpu", str
);
6914 /* Parse the TUNE string. */
6917 aarch64_parse_tune (void)
6919 const struct processor
*cpu
;
6920 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
6921 strcpy (str
, aarch64_tune_string
);
6923 /* Loop through the list of supported CPUs to find a match. */
6924 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6926 if (strcmp (cpu
->name
, str
) == 0)
6928 selected_tune
= cpu
;
6933 /* CPU name not found in list. */
6934 error ("unknown value %qs for -mtune", str
);
6939 /* Implement TARGET_OPTION_OVERRIDE. */
6942 aarch64_override_options (void)
6944 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6945 If either of -march or -mtune is given, they override their
6946 respective component of -mcpu.
6948 So, first parse AARCH64_CPU_STRING, then the others, be careful
6949 with -march as, if -mcpu is not present on the command line, march
6950 must set a sensible default CPU. */
6951 if (aarch64_cpu_string
)
6953 aarch64_parse_cpu ();
6956 if (aarch64_arch_string
)
6958 aarch64_parse_arch ();
6961 if (aarch64_tune_string
)
6963 aarch64_parse_tune ();
6966 #ifndef HAVE_AS_MABI_OPTION
6967 /* The compiler may have been configured with 2.23.* binutils, which does
6968 not have support for ILP32. */
6970 error ("Assembler does not support -mabi=ilp32");
6973 initialize_aarch64_code_model ();
6975 aarch64_build_bitmask_table ();
6977 /* This target defaults to strict volatile bitfields. */
6978 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
6979 flag_strict_volatile_bitfields
= 1;
6981 /* If the user did not specify a processor, choose the default
6982 one for them. This will be the CPU set during configuration using
6983 --with-cpu, otherwise it is "generic". */
6986 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
6987 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
6990 gcc_assert (selected_cpu
);
6993 selected_tune
= selected_cpu
;
6995 aarch64_tune_flags
= selected_tune
->flags
;
6996 aarch64_tune
= selected_tune
->core
;
6997 aarch64_tune_params
= selected_tune
->tune
;
6998 aarch64_architecture_version
= selected_cpu
->architecture_version
;
7000 if (aarch64_fix_a53_err835769
== 2)
7002 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
7003 aarch64_fix_a53_err835769
= 1;
7005 aarch64_fix_a53_err835769
= 0;
7009 /* If not opzimizing for size, set the default
7010 alignment to what the target wants */
7013 if (align_loops
<= 0)
7014 align_loops
= aarch64_tune_params
->loop_align
;
7015 if (align_jumps
<= 0)
7016 align_jumps
= aarch64_tune_params
->jump_align
;
7017 if (align_functions
<= 0)
7018 align_functions
= aarch64_tune_params
->function_align
;
7021 if (AARCH64_TUNE_FMA_STEERING
)
7022 aarch64_register_fma_steering ();
7024 aarch64_override_options_after_change ();
7027 /* Implement targetm.override_options_after_change. */
7030 aarch64_override_options_after_change (void)
7032 if (flag_omit_frame_pointer
)
7033 flag_omit_leaf_frame_pointer
= false;
7034 else if (flag_omit_leaf_frame_pointer
)
7035 flag_omit_frame_pointer
= true;
7038 static struct machine_function
*
7039 aarch64_init_machine_status (void)
7041 struct machine_function
*machine
;
7042 machine
= ggc_cleared_alloc
<machine_function
> ();
7047 aarch64_init_expanders (void)
7049 init_machine_status
= aarch64_init_machine_status
;
7052 /* A checking mechanism for the implementation of the various code models. */
7054 initialize_aarch64_code_model (void)
7058 switch (aarch64_cmodel_var
)
7060 case AARCH64_CMODEL_TINY
:
7061 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
7063 case AARCH64_CMODEL_SMALL
:
7064 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
7066 case AARCH64_CMODEL_LARGE
:
7067 sorry ("code model %qs with -f%s", "large",
7068 flag_pic
> 1 ? "PIC" : "pic");
7074 aarch64_cmodel
= aarch64_cmodel_var
;
7077 /* Return true if SYMBOL_REF X binds locally. */
7080 aarch64_symbol_binds_local_p (const_rtx x
)
7082 return (SYMBOL_REF_DECL (x
)
7083 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
7084 : SYMBOL_REF_LOCAL_P (x
));
7087 /* Return true if SYMBOL_REF X is thread local */
7089 aarch64_tls_symbol_p (rtx x
)
7091 if (! TARGET_HAVE_TLS
)
7094 if (GET_CODE (x
) != SYMBOL_REF
)
7097 return SYMBOL_REF_TLS_MODEL (x
) != 0;
7100 /* Classify a TLS symbol into one of the TLS kinds. */
7101 enum aarch64_symbol_type
7102 aarch64_classify_tls_symbol (rtx x
)
7104 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
7108 case TLS_MODEL_GLOBAL_DYNAMIC
:
7109 case TLS_MODEL_LOCAL_DYNAMIC
:
7110 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
7112 case TLS_MODEL_INITIAL_EXEC
:
7113 return SYMBOL_SMALL_GOTTPREL
;
7115 case TLS_MODEL_LOCAL_EXEC
:
7116 return SYMBOL_SMALL_TPREL
;
7118 case TLS_MODEL_EMULATED
:
7119 case TLS_MODEL_NONE
:
7120 return SYMBOL_FORCE_TO_MEM
;
7127 /* Return the method that should be used to access SYMBOL_REF or
7128 LABEL_REF X in context CONTEXT. */
7130 enum aarch64_symbol_type
7131 aarch64_classify_symbol (rtx x
, rtx offset
,
7132 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
7134 if (GET_CODE (x
) == LABEL_REF
)
7136 switch (aarch64_cmodel
)
7138 case AARCH64_CMODEL_LARGE
:
7139 return SYMBOL_FORCE_TO_MEM
;
7141 case AARCH64_CMODEL_TINY_PIC
:
7142 case AARCH64_CMODEL_TINY
:
7143 return SYMBOL_TINY_ABSOLUTE
;
7145 case AARCH64_CMODEL_SMALL_PIC
:
7146 case AARCH64_CMODEL_SMALL
:
7147 return SYMBOL_SMALL_ABSOLUTE
;
7154 if (GET_CODE (x
) == SYMBOL_REF
)
7156 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
7157 return SYMBOL_FORCE_TO_MEM
;
7159 if (aarch64_tls_symbol_p (x
))
7160 return aarch64_classify_tls_symbol (x
);
7162 switch (aarch64_cmodel
)
7164 case AARCH64_CMODEL_TINY
:
7165 /* When we retreive symbol + offset address, we have to make sure
7166 the offset does not cause overflow of the final address. But
7167 we have no way of knowing the address of symbol at compile time
7168 so we can't accurately say if the distance between the PC and
7169 symbol + offset is outside the addressible range of +/-1M in the
7170 TINY code model. So we rely on images not being greater than
7171 1M and cap the offset at 1M and anything beyond 1M will have to
7172 be loaded using an alternative mechanism. */
7173 if (SYMBOL_REF_WEAK (x
)
7174 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
7175 return SYMBOL_FORCE_TO_MEM
;
7176 return SYMBOL_TINY_ABSOLUTE
;
7178 case AARCH64_CMODEL_SMALL
:
7179 /* Same reasoning as the tiny code model, but the offset cap here is
7181 if (SYMBOL_REF_WEAK (x
)
7182 || !IN_RANGE (INTVAL (offset
), HOST_WIDE_INT_C (-4294967263),
7183 HOST_WIDE_INT_C (4294967264)))
7184 return SYMBOL_FORCE_TO_MEM
;
7185 return SYMBOL_SMALL_ABSOLUTE
;
7187 case AARCH64_CMODEL_TINY_PIC
:
7188 if (!aarch64_symbol_binds_local_p (x
))
7189 return SYMBOL_TINY_GOT
;
7190 return SYMBOL_TINY_ABSOLUTE
;
7192 case AARCH64_CMODEL_SMALL_PIC
:
7193 if (!aarch64_symbol_binds_local_p (x
))
7194 return SYMBOL_SMALL_GOT
;
7195 return SYMBOL_SMALL_ABSOLUTE
;
7202 /* By default push everything into the constant pool. */
7203 return SYMBOL_FORCE_TO_MEM
;
7207 aarch64_constant_address_p (rtx x
)
7209 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
7213 aarch64_legitimate_pic_operand_p (rtx x
)
7215 if (GET_CODE (x
) == SYMBOL_REF
7216 || (GET_CODE (x
) == CONST
7217 && GET_CODE (XEXP (x
, 0)) == PLUS
7218 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7224 /* Return true if X holds either a quarter-precision or
7225 floating-point +0.0 constant. */
7227 aarch64_valid_floating_const (machine_mode mode
, rtx x
)
7229 if (!CONST_DOUBLE_P (x
))
7232 /* TODO: We could handle moving 0.0 to a TFmode register,
7233 but first we would like to refactor the movtf_aarch64
7234 to be more amicable to split moves properly and
7235 correctly gate on TARGET_SIMD. For now - reject all
7236 constants which are not to SFmode or DFmode registers. */
7237 if (!(mode
== SFmode
|| mode
== DFmode
))
7240 if (aarch64_float_const_zero_rtx_p (x
))
7242 return aarch64_float_const_representable_p (x
);
7246 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
7248 /* Do not allow vector struct mode constants. We could support
7249 0 and -1 easily, but they need support in aarch64-simd.md. */
7250 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
7253 /* This could probably go away because
7254 we now decompose CONST_INTs according to expand_mov_immediate. */
7255 if ((GET_CODE (x
) == CONST_VECTOR
7256 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
7257 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
7258 return !targetm
.cannot_force_const_mem (mode
, x
);
7260 if (GET_CODE (x
) == HIGH
7261 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7264 return aarch64_constant_address_p (x
);
7268 aarch64_load_tp (rtx target
)
7271 || GET_MODE (target
) != Pmode
7272 || !register_operand (target
, Pmode
))
7273 target
= gen_reg_rtx (Pmode
);
7275 /* Can return in any reg. */
7276 emit_insn (gen_aarch64_load_tp_hard (target
));
7280 /* On AAPCS systems, this is the "struct __va_list". */
7281 static GTY(()) tree va_list_type
;
7283 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7284 Return the type to use as __builtin_va_list.
7286 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7298 aarch64_build_builtin_va_list (void)
7301 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7303 /* Create the type. */
7304 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7305 /* Give it the required name. */
7306 va_list_name
= build_decl (BUILTINS_LOCATION
,
7308 get_identifier ("__va_list"),
7310 DECL_ARTIFICIAL (va_list_name
) = 1;
7311 TYPE_NAME (va_list_type
) = va_list_name
;
7312 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
7314 /* Create the fields. */
7315 f_stack
= build_decl (BUILTINS_LOCATION
,
7316 FIELD_DECL
, get_identifier ("__stack"),
7318 f_grtop
= build_decl (BUILTINS_LOCATION
,
7319 FIELD_DECL
, get_identifier ("__gr_top"),
7321 f_vrtop
= build_decl (BUILTINS_LOCATION
,
7322 FIELD_DECL
, get_identifier ("__vr_top"),
7324 f_groff
= build_decl (BUILTINS_LOCATION
,
7325 FIELD_DECL
, get_identifier ("__gr_offs"),
7327 f_vroff
= build_decl (BUILTINS_LOCATION
,
7328 FIELD_DECL
, get_identifier ("__vr_offs"),
7331 DECL_ARTIFICIAL (f_stack
) = 1;
7332 DECL_ARTIFICIAL (f_grtop
) = 1;
7333 DECL_ARTIFICIAL (f_vrtop
) = 1;
7334 DECL_ARTIFICIAL (f_groff
) = 1;
7335 DECL_ARTIFICIAL (f_vroff
) = 1;
7337 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
7338 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
7339 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
7340 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
7341 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
7343 TYPE_FIELDS (va_list_type
) = f_stack
;
7344 DECL_CHAIN (f_stack
) = f_grtop
;
7345 DECL_CHAIN (f_grtop
) = f_vrtop
;
7346 DECL_CHAIN (f_vrtop
) = f_groff
;
7347 DECL_CHAIN (f_groff
) = f_vroff
;
7349 /* Compute its layout. */
7350 layout_type (va_list_type
);
7352 return va_list_type
;
7355 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7357 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
7359 const CUMULATIVE_ARGS
*cum
;
7360 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7361 tree stack
, grtop
, vrtop
, groff
, vroff
;
7363 int gr_save_area_size
;
7364 int vr_save_area_size
;
7367 cum
= &crtl
->args
.info
;
7369 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
7371 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
7373 if (TARGET_GENERAL_REGS_ONLY
)
7375 if (cum
->aapcs_nvrn
> 0)
7376 sorry ("%qs and floating point or vector arguments",
7377 "-mgeneral-regs-only");
7378 vr_save_area_size
= 0;
7381 f_stack
= TYPE_FIELDS (va_list_type_node
);
7382 f_grtop
= DECL_CHAIN (f_stack
);
7383 f_vrtop
= DECL_CHAIN (f_grtop
);
7384 f_groff
= DECL_CHAIN (f_vrtop
);
7385 f_vroff
= DECL_CHAIN (f_groff
);
7387 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
7389 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
7391 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
7393 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
7395 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
7398 /* Emit code to initialize STACK, which points to the next varargs stack
7399 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7400 by named arguments. STACK is 8-byte aligned. */
7401 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
7402 if (cum
->aapcs_stack_size
> 0)
7403 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
7404 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
7405 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7407 /* Emit code to initialize GRTOP, the top of the GR save area.
7408 virtual_incoming_args_rtx should have been 16 byte aligned. */
7409 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
7410 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
7411 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7413 /* Emit code to initialize VRTOP, the top of the VR save area.
7414 This address is gr_save_area_bytes below GRTOP, rounded
7415 down to the next 16-byte boundary. */
7416 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
7417 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
7418 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7421 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
7422 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
7423 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7425 /* Emit code to initialize GROFF, the offset from GRTOP of the
7426 next GPR argument. */
7427 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
7428 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
7429 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7431 /* Likewise emit code to initialize VROFF, the offset from FTOP
7432 of the next VR argument. */
7433 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
7434 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
7435 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7438 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7441 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
7442 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
7446 bool is_ha
; /* is HFA or HVA. */
7447 bool dw_align
; /* double-word align. */
7448 machine_mode ag_mode
= VOIDmode
;
7452 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7453 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
7454 HOST_WIDE_INT size
, rsize
, adjust
, align
;
7455 tree t
, u
, cond1
, cond2
;
7457 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7459 type
= build_pointer_type (type
);
7461 mode
= TYPE_MODE (type
);
7463 f_stack
= TYPE_FIELDS (va_list_type_node
);
7464 f_grtop
= DECL_CHAIN (f_stack
);
7465 f_vrtop
= DECL_CHAIN (f_grtop
);
7466 f_groff
= DECL_CHAIN (f_vrtop
);
7467 f_vroff
= DECL_CHAIN (f_groff
);
7469 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
7470 f_stack
, NULL_TREE
);
7471 size
= int_size_in_bytes (type
);
7472 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
7476 if (aarch64_vfp_is_call_or_return_candidate (mode
,
7482 /* TYPE passed in fp/simd registers. */
7483 if (TARGET_GENERAL_REGS_ONLY
)
7484 sorry ("%qs and floating point or vector arguments",
7485 "-mgeneral-regs-only");
7487 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
7488 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
7489 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
7490 unshare_expr (valist
), f_vroff
, NULL_TREE
);
7492 rsize
= nregs
* UNITS_PER_VREG
;
7496 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
7497 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
7499 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7500 && size
< UNITS_PER_VREG
)
7502 adjust
= UNITS_PER_VREG
- size
;
7507 /* TYPE passed in general registers. */
7508 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
7509 unshare_expr (valist
), f_grtop
, NULL_TREE
);
7510 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
7511 unshare_expr (valist
), f_groff
, NULL_TREE
);
7512 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
7513 nregs
= rsize
/ UNITS_PER_WORD
;
7518 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7519 && size
< UNITS_PER_WORD
)
7521 adjust
= UNITS_PER_WORD
- size
;
7525 /* Get a local temporary for the field value. */
7526 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
7528 /* Emit code to branch if off >= 0. */
7529 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
7530 build_int_cst (TREE_TYPE (off
), 0));
7531 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
7535 /* Emit: offs = (offs + 15) & -16. */
7536 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7537 build_int_cst (TREE_TYPE (off
), 15));
7538 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
7539 build_int_cst (TREE_TYPE (off
), -16));
7540 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
7545 /* Update ap.__[g|v]r_offs */
7546 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7547 build_int_cst (TREE_TYPE (off
), rsize
));
7548 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
7552 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7554 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7555 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
7556 build_int_cst (TREE_TYPE (f_off
), 0));
7557 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
7559 /* String up: make sure the assignment happens before the use. */
7560 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
7561 COND_EXPR_ELSE (cond1
) = t
;
7563 /* Prepare the trees handling the argument that is passed on the stack;
7564 the top level node will store in ON_STACK. */
7565 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
7568 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7569 t
= fold_convert (intDI_type_node
, arg
);
7570 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7571 build_int_cst (TREE_TYPE (t
), 15));
7572 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7573 build_int_cst (TREE_TYPE (t
), -16));
7574 t
= fold_convert (TREE_TYPE (arg
), t
);
7575 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
7579 /* Advance ap.__stack */
7580 t
= fold_convert (intDI_type_node
, arg
);
7581 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7582 build_int_cst (TREE_TYPE (t
), size
+ 7));
7583 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7584 build_int_cst (TREE_TYPE (t
), -8));
7585 t
= fold_convert (TREE_TYPE (arg
), t
);
7586 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
7587 /* String up roundup and advance. */
7589 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7590 /* String up with arg */
7591 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
7592 /* Big-endianness related address adjustment. */
7593 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7594 && size
< UNITS_PER_WORD
)
7596 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
7597 size_int (UNITS_PER_WORD
- size
));
7598 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
7601 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
7602 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
7604 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7607 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
7608 build_int_cst (TREE_TYPE (off
), adjust
));
7610 t
= fold_convert (sizetype
, t
);
7611 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
7615 /* type ha; // treat as "struct {ftype field[n];}"
7616 ... [computing offs]
7617 for (i = 0; i <nregs; ++i, offs += 16)
7618 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7621 tree tmp_ha
, field_t
, field_ptr_t
;
7623 /* Declare a local variable. */
7624 tmp_ha
= create_tmp_var_raw (type
, "ha");
7625 gimple_add_tmp_var (tmp_ha
);
7627 /* Establish the base type. */
7631 field_t
= float_type_node
;
7632 field_ptr_t
= float_ptr_type_node
;
7635 field_t
= double_type_node
;
7636 field_ptr_t
= double_ptr_type_node
;
7639 field_t
= long_double_type_node
;
7640 field_ptr_t
= long_double_ptr_type_node
;
7642 /* The half precision and quad precision are not fully supported yet. Enable
7643 the following code after the support is complete. Need to find the correct
7644 type node for __fp16 *. */
7647 field_t
= float_type_node
;
7648 field_ptr_t
= float_ptr_type_node
;
7654 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
7655 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
7656 field_ptr_t
= build_pointer_type (field_t
);
7663 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7664 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
7666 t
= fold_convert (field_ptr_t
, addr
);
7667 t
= build2 (MODIFY_EXPR
, field_t
,
7668 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
7669 build1 (INDIRECT_REF
, field_t
, t
));
7671 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7672 for (i
= 1; i
< nregs
; ++i
)
7674 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
7675 u
= fold_convert (field_ptr_t
, addr
);
7676 u
= build2 (MODIFY_EXPR
, field_t
,
7677 build2 (MEM_REF
, field_t
, tmp_ha
,
7678 build_int_cst (field_ptr_t
,
7680 int_size_in_bytes (field_t
)))),
7681 build1 (INDIRECT_REF
, field_t
, u
));
7682 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
7685 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
7686 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
7689 COND_EXPR_ELSE (cond2
) = t
;
7690 addr
= fold_convert (build_pointer_type (type
), cond1
);
7691 addr
= build_va_arg_indirect_ref (addr
);
7694 addr
= build_va_arg_indirect_ref (addr
);
7699 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7702 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
7703 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7706 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7707 CUMULATIVE_ARGS local_cum
;
7708 int gr_saved
, vr_saved
;
7710 /* The caller has advanced CUM up to, but not beyond, the last named
7711 argument. Advance a local copy of CUM past the last "real" named
7712 argument, to find out how many registers are left over. */
7714 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
7716 /* Found out how many registers we need to save. */
7717 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
7718 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
7720 if (TARGET_GENERAL_REGS_ONLY
)
7722 if (local_cum
.aapcs_nvrn
> 0)
7723 sorry ("%qs and floating point or vector arguments",
7724 "-mgeneral-regs-only");
7734 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7735 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
7736 - gr_saved
* UNITS_PER_WORD
);
7737 mem
= gen_frame_mem (BLKmode
, ptr
);
7738 set_mem_alias_set (mem
, get_varargs_alias_set ());
7740 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
7745 /* We can't use move_block_from_reg, because it will use
7746 the wrong mode, storing D regs only. */
7747 machine_mode mode
= TImode
;
7750 /* Set OFF to the offset from virtual_incoming_args_rtx of
7751 the first vector register. The VR save area lies below
7752 the GR one, and is aligned to 16 bytes. */
7753 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7754 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7755 off
-= vr_saved
* UNITS_PER_VREG
;
7757 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
7761 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
7762 mem
= gen_frame_mem (mode
, ptr
);
7763 set_mem_alias_set (mem
, get_varargs_alias_set ());
7764 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
7765 off
+= UNITS_PER_VREG
;
7770 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7771 any complication of having crtl->args.pretend_args_size changed. */
7772 cfun
->machine
->frame
.saved_varargs_size
7773 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7774 STACK_BOUNDARY
/ BITS_PER_UNIT
)
7775 + vr_saved
* UNITS_PER_VREG
);
7779 aarch64_conditional_register_usage (void)
7784 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
7787 call_used_regs
[i
] = 1;
7792 /* Walk down the type tree of TYPE counting consecutive base elements.
7793 If *MODEP is VOIDmode, then set it to the first valid floating point
7794 type. If a non-floating point type is found, or if a floating point
7795 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7796 otherwise return the count in the sub-tree. */
7798 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
7803 switch (TREE_CODE (type
))
7806 mode
= TYPE_MODE (type
);
7807 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7810 if (*modep
== VOIDmode
)
7819 mode
= TYPE_MODE (TREE_TYPE (type
));
7820 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7823 if (*modep
== VOIDmode
)
7832 /* Use V2SImode and V4SImode as representatives of all 64-bit
7833 and 128-bit vector types. */
7834 size
= int_size_in_bytes (type
);
7847 if (*modep
== VOIDmode
)
7850 /* Vector modes are considered to be opaque: two vectors are
7851 equivalent for the purposes of being homogeneous aggregates
7852 if they are the same size. */
7861 tree index
= TYPE_DOMAIN (type
);
7863 /* Can't handle incomplete types nor sizes that are not
7865 if (!COMPLETE_TYPE_P (type
)
7866 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7869 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
7872 || !TYPE_MAX_VALUE (index
)
7873 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
7874 || !TYPE_MIN_VALUE (index
)
7875 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
7879 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
7880 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
7882 /* There must be no padding. */
7883 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7895 /* Can't handle incomplete types nor sizes that are not
7897 if (!COMPLETE_TYPE_P (type
)
7898 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7901 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7903 if (TREE_CODE (field
) != FIELD_DECL
)
7906 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7912 /* There must be no padding. */
7913 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7920 case QUAL_UNION_TYPE
:
7922 /* These aren't very interesting except in a degenerate case. */
7927 /* Can't handle incomplete types nor sizes that are not
7929 if (!COMPLETE_TYPE_P (type
)
7930 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7933 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7935 if (TREE_CODE (field
) != FIELD_DECL
)
7938 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7941 count
= count
> sub_count
? count
: sub_count
;
7944 /* There must be no padding. */
7945 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7958 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7959 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7960 array types. The C99 floating-point complex types are also considered
7961 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7962 types, which are GCC extensions and out of the scope of AAPCS64, are
7963 treated as composite types here as well.
7965 Note that MODE itself is not sufficient in determining whether a type
7966 is such a composite type or not. This is because
7967 stor-layout.c:compute_record_mode may have already changed the MODE
7968 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7969 structure with only one field may have its MODE set to the mode of the
7970 field. Also an integer mode whose size matches the size of the
7971 RECORD_TYPE type may be used to substitute the original mode
7972 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7973 solely relied on. */
7976 aarch64_composite_type_p (const_tree type
,
7979 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
7983 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
7984 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
7990 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7991 type as described in AAPCS64 \S 4.1.2.
7993 See the comment above aarch64_composite_type_p for the notes on MODE. */
7996 aarch64_short_vector_p (const_tree type
,
7999 HOST_WIDE_INT size
= -1;
8001 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
8002 size
= int_size_in_bytes (type
);
8003 else if (!aarch64_composite_type_p (type
, mode
)
8004 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
8005 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
8006 size
= GET_MODE_SIZE (mode
);
8008 return (size
== 8 || size
== 16) ? true : false;
8011 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
8012 shall be passed or returned in simd/fp register(s) (providing these
8013 parameter passing registers are available).
8015 Upon successful return, *COUNT returns the number of needed registers,
8016 *BASE_MODE returns the mode of the individual register and when IS_HAF
8017 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
8018 floating-point aggregate or a homogeneous short-vector aggregate. */
8021 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
8023 machine_mode
*base_mode
,
8027 machine_mode new_mode
= VOIDmode
;
8028 bool composite_p
= aarch64_composite_type_p (type
, mode
);
8030 if (is_ha
!= NULL
) *is_ha
= false;
8032 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8033 || aarch64_short_vector_p (type
, mode
))
8038 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
8040 if (is_ha
!= NULL
) *is_ha
= true;
8042 new_mode
= GET_MODE_INNER (mode
);
8044 else if (type
&& composite_p
)
8046 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
8048 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
8050 if (is_ha
!= NULL
) *is_ha
= true;
8059 *base_mode
= new_mode
;
8063 /* Implement TARGET_STRUCT_VALUE_RTX. */
8066 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
8067 int incoming ATTRIBUTE_UNUSED
)
8069 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
8072 /* Implements target hook vector_mode_supported_p. */
8074 aarch64_vector_mode_supported_p (machine_mode mode
)
8077 && (mode
== V4SImode
|| mode
== V8HImode
8078 || mode
== V16QImode
|| mode
== V2DImode
8079 || mode
== V2SImode
|| mode
== V4HImode
8080 || mode
== V8QImode
|| mode
== V2SFmode
8081 || mode
== V4SFmode
|| mode
== V2DFmode
8082 || mode
== V1DFmode
))
8088 /* Return appropriate SIMD container
8089 for MODE within a vector of WIDTH bits. */
8091 aarch64_simd_container_mode (machine_mode mode
, unsigned width
)
8093 gcc_assert (width
== 64 || width
== 128);
8132 /* Return 128-bit container as the preferred SIMD mode for MODE. */
8134 aarch64_preferred_simd_mode (machine_mode mode
)
8136 return aarch64_simd_container_mode (mode
, 128);
8139 /* Return the bitmask of possible vector sizes for the vectorizer
8142 aarch64_autovectorize_vector_sizes (void)
8147 /* Implement TARGET_MANGLE_TYPE. */
8150 aarch64_mangle_type (const_tree type
)
8152 /* The AArch64 ABI documents say that "__va_list" has to be
8153 managled as if it is in the "std" namespace. */
8154 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
8155 return "St9__va_list";
8157 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8159 if (TYPE_NAME (type
) != NULL
)
8160 return aarch64_mangle_builtin_type (type
);
8162 /* Use the default mangling. */
8167 /* Return true if the rtx_insn contains a MEM RTX somewhere
8171 has_memory_op (rtx_insn
*mem_insn
)
8173 subrtx_iterator::array_type array
;
8174 FOR_EACH_SUBRTX (iter
, array
, PATTERN (mem_insn
), ALL
)
8181 /* Find the first rtx_insn before insn that will generate an assembly
8185 aarch64_prev_real_insn (rtx_insn
*insn
)
8192 insn
= prev_real_insn (insn
);
8194 while (insn
&& recog_memoized (insn
) < 0);
8200 is_madd_op (enum attr_type t1
)
8203 /* A number of these may be AArch32 only. */
8204 enum attr_type mlatypes
[] = {
8205 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
8206 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
8207 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
8210 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
8212 if (t1
== mlatypes
[i
])
8219 /* Check if there is a register dependency between a load and the insn
8220 for which we hold recog_data. */
8223 dep_between_memop_and_curr (rtx memop
)
8228 gcc_assert (GET_CODE (memop
) == SET
);
8230 if (!REG_P (SET_DEST (memop
)))
8233 load_reg
= SET_DEST (memop
);
8234 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
8236 rtx operand
= recog_data
.operand
[opno
];
8238 && reg_overlap_mentioned_p (load_reg
, operand
))
8246 /* When working around the Cortex-A53 erratum 835769,
8247 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8248 instruction and has a preceding memory instruction such that a NOP
8249 should be inserted between them. */
8252 aarch64_madd_needs_nop (rtx_insn
* insn
)
8254 enum attr_type attr_type
;
8258 if (!aarch64_fix_a53_err835769
)
8261 if (recog_memoized (insn
) < 0)
8264 attr_type
= get_attr_type (insn
);
8265 if (!is_madd_op (attr_type
))
8268 prev
= aarch64_prev_real_insn (insn
);
8269 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8270 Restore recog state to INSN to avoid state corruption. */
8271 extract_constrain_insn_cached (insn
);
8273 if (!prev
|| !has_memory_op (prev
))
8276 body
= single_set (prev
);
8278 /* If the previous insn is a memory op and there is no dependency between
8279 it and the DImode madd, emit a NOP between them. If body is NULL then we
8280 have a complex memory operation, probably a load/store pair.
8281 Be conservative for now and emit a NOP. */
8282 if (GET_MODE (recog_data
.operand
[0]) == DImode
8283 && (!body
|| !dep_between_memop_and_curr (body
)))
8291 /* Implement FINAL_PRESCAN_INSN. */
8294 aarch64_final_prescan_insn (rtx_insn
*insn
)
8296 if (aarch64_madd_needs_nop (insn
))
8297 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
8301 /* Return the equivalent letter for size. */
8303 sizetochar (int size
)
8307 case 64: return 'd';
8308 case 32: return 's';
8309 case 16: return 'h';
8310 case 8 : return 'b';
8311 default: gcc_unreachable ();
8315 /* Return true iff x is a uniform vector of floating-point
8316 constants, and the constant can be represented in
8317 quarter-precision form. Note, as aarch64_float_const_representable
8318 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8320 aarch64_vect_float_const_representable_p (rtx x
)
8323 REAL_VALUE_TYPE r0
, ri
;
8326 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
8329 x0
= CONST_VECTOR_ELT (x
, 0);
8330 if (!CONST_DOUBLE_P (x0
))
8333 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
8335 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
8337 xi
= CONST_VECTOR_ELT (x
, i
);
8338 if (!CONST_DOUBLE_P (xi
))
8341 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
8342 if (!REAL_VALUES_EQUAL (r0
, ri
))
8346 return aarch64_float_const_representable_p (x0
);
8349 /* Return true for valid and false for invalid. */
8351 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
8352 struct simd_immediate_info
*info
)
8354 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8356 for (i = 0; i < idx; i += (STRIDE)) \
8361 immtype = (CLASS); \
8362 elsize = (ELSIZE); \
8368 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8369 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8370 unsigned char bytes
[16];
8371 int immtype
= -1, matches
;
8372 unsigned int invmask
= inverse
? 0xff : 0;
8375 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8377 if (! (aarch64_simd_imm_zero_p (op
, mode
)
8378 || aarch64_vect_float_const_representable_p (op
)))
8383 info
->value
= CONST_VECTOR_ELT (op
, 0);
8384 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
8392 /* Splat vector constant out into a byte vector. */
8393 for (i
= 0; i
< n_elts
; i
++)
8395 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8396 it must be laid out in the vector register in reverse order. */
8397 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
8398 unsigned HOST_WIDE_INT elpart
;
8399 unsigned int part
, parts
;
8401 if (CONST_INT_P (el
))
8403 elpart
= INTVAL (el
);
8406 else if (GET_CODE (el
) == CONST_DOUBLE
)
8408 elpart
= CONST_DOUBLE_LOW (el
);
8414 for (part
= 0; part
< parts
; part
++)
8417 for (byte
= 0; byte
< innersize
; byte
++)
8419 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8420 elpart
>>= BITS_PER_UNIT
;
8422 if (GET_CODE (el
) == CONST_DOUBLE
)
8423 elpart
= CONST_DOUBLE_HIGH (el
);
8428 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8432 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8433 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
8435 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8436 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8438 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8439 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8441 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8442 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
8444 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
8446 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
8448 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8449 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
8451 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8452 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8454 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8455 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8457 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8458 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
8460 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
8462 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
8464 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8465 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8467 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8468 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8470 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8471 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8473 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8474 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8476 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
8478 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8479 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
8488 info
->element_width
= elsize
;
8489 info
->mvn
= emvn
!= 0;
8490 info
->shift
= eshift
;
8492 unsigned HOST_WIDE_INT imm
= 0;
8494 if (immtype
>= 12 && immtype
<= 15)
8497 /* Un-invert bytes of recognized vector, if necessary. */
8499 for (i
= 0; i
< idx
; i
++)
8500 bytes
[i
] ^= invmask
;
8504 /* FIXME: Broken on 32-bit H_W_I hosts. */
8505 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8507 for (i
= 0; i
< 8; i
++)
8508 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8509 << (i
* BITS_PER_UNIT
);
8512 info
->value
= GEN_INT (imm
);
8516 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8517 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8519 /* Construct 'abcdefgh' because the assembler cannot handle
8520 generic constants. */
8523 imm
= (imm
>> info
->shift
) & 0xff;
8524 info
->value
= GEN_INT (imm
);
8532 /* Check of immediate shift constants are within range. */
8534 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
8536 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
8538 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
8540 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
8543 /* Return true if X is a uniform vector where all elements
8544 are either the floating-point constant 0.0 or the
8545 integer constant 0. */
8547 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
8549 return x
== CONST0_RTX (mode
);
8553 aarch64_simd_imm_scalar_p (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
)
8555 HOST_WIDE_INT imm
= INTVAL (x
);
8558 for (i
= 0; i
< 8; i
++)
8560 unsigned int byte
= imm
& 0xff;
8561 if (byte
!= 0xff && byte
!= 0)
8570 aarch64_mov_operand_p (rtx x
,
8571 enum aarch64_symbol_context context
,
8574 if (GET_CODE (x
) == HIGH
8575 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
8578 if (CONST_INT_P (x
))
8581 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
8584 return aarch64_classify_symbolic_expression (x
, context
)
8585 == SYMBOL_TINY_ABSOLUTE
;
8588 /* Return a const_int vector of VAL. */
8590 aarch64_simd_gen_const_vector_dup (machine_mode mode
, int val
)
8592 int nunits
= GET_MODE_NUNITS (mode
);
8593 rtvec v
= rtvec_alloc (nunits
);
8596 for (i
=0; i
< nunits
; i
++)
8597 RTVEC_ELT (v
, i
) = GEN_INT (val
);
8599 return gen_rtx_CONST_VECTOR (mode
, v
);
8602 /* Check OP is a legal scalar immediate for the MOVI instruction. */
8605 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, machine_mode mode
)
8609 gcc_assert (!VECTOR_MODE_P (mode
));
8610 vmode
= aarch64_preferred_simd_mode (mode
);
8611 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
8612 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
8615 /* Construct and return a PARALLEL RTX vector with elements numbering the
8616 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8617 the vector - from the perspective of the architecture. This does not
8618 line up with GCC's perspective on lane numbers, so we end up with
8619 different masks depending on our target endian-ness. The diagram
8620 below may help. We must draw the distinction when building masks
8621 which select one half of the vector. An instruction selecting
8622 architectural low-lanes for a big-endian target, must be described using
8623 a mask selecting GCC high-lanes.
8625 Big-Endian Little-Endian
8628 | x | x | x | x | | x | x | x | x |
8629 Architecture 3 2 1 0 3 2 1 0
8631 Low Mask: { 2, 3 } { 0, 1 }
8632 High Mask: { 0, 1 } { 2, 3 }
8636 aarch64_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
8638 int nunits
= GET_MODE_NUNITS (mode
);
8639 rtvec v
= rtvec_alloc (nunits
/ 2);
8640 int high_base
= nunits
/ 2;
8646 if (BYTES_BIG_ENDIAN
)
8647 base
= high
? low_base
: high_base
;
8649 base
= high
? high_base
: low_base
;
8651 for (i
= 0; i
< nunits
/ 2; i
++)
8652 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
8654 t1
= gen_rtx_PARALLEL (mode
, v
);
8658 /* Check OP for validity as a PARALLEL RTX vector with elements
8659 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8660 from the perspective of the architecture. See the diagram above
8661 aarch64_simd_vect_par_cnst_half for more details. */
8664 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
8667 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
8668 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
8669 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
8672 if (!VECTOR_MODE_P (mode
))
8675 if (count_op
!= count_ideal
)
8678 for (i
= 0; i
< count_ideal
; i
++)
8680 rtx elt_op
= XVECEXP (op
, 0, i
);
8681 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
8683 if (!CONST_INT_P (elt_op
)
8684 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
8690 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8691 HIGH (exclusive). */
8693 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8697 gcc_assert (CONST_INT_P (operand
));
8698 lane
= INTVAL (operand
);
8700 if (lane
< low
|| lane
>= high
)
8703 error ("%Klane %ld out of range %ld - %ld", exp
, lane
, low
, high
- 1);
8705 error ("lane %ld out of range %ld - %ld", lane
, low
, high
- 1);
8709 /* Return TRUE if OP is a valid vector addressing mode. */
8711 aarch64_simd_mem_operand_p (rtx op
)
8713 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
8714 || REG_P (XEXP (op
, 0)));
8717 /* Emit a register copy from operand to operand, taking care not to
8718 early-clobber source registers in the process.
8720 COUNT is the number of components into which the copy needs to be
8723 aarch64_simd_emit_reg_reg_move (rtx
*operands
, enum machine_mode mode
,
8727 int rdest
= REGNO (operands
[0]);
8728 int rsrc
= REGNO (operands
[1]);
8730 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
8732 for (i
= 0; i
< count
; i
++)
8733 emit_move_insn (gen_rtx_REG (mode
, rdest
+ i
),
8734 gen_rtx_REG (mode
, rsrc
+ i
));
8736 for (i
= 0; i
< count
; i
++)
8737 emit_move_insn (gen_rtx_REG (mode
, rdest
+ count
- i
- 1),
8738 gen_rtx_REG (mode
, rsrc
+ count
- i
- 1));
8741 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8742 one of VSTRUCT modes: OI, CI or XI. */
8744 aarch64_simd_attr_length_move (rtx_insn
*insn
)
8748 extract_insn_cached (insn
);
8750 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
8752 mode
= GET_MODE (recog_data
.operand
[0]);
8768 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
8769 one of VSTRUCT modes: OI, CI, EI, or XI. */
8771 aarch64_simd_attr_length_rglist (enum machine_mode mode
)
8773 return (GET_MODE_SIZE (mode
) / UNITS_PER_VREG
) * 4;
8776 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8777 alignment of a vector to 128 bits. */
8778 static HOST_WIDE_INT
8779 aarch64_simd_vector_alignment (const_tree type
)
8781 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
8782 return MIN (align
, 128);
8785 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8787 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
8792 /* We guarantee alignment for vectors up to 128-bits. */
8793 if (tree_int_cst_compare (TYPE_SIZE (type
),
8794 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
8797 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8801 /* If VALS is a vector constant that can be loaded into a register
8802 using DUP, generate instructions to do so and return an RTX to
8803 assign to the register. Otherwise return NULL_RTX. */
8805 aarch64_simd_dup_constant (rtx vals
)
8807 machine_mode mode
= GET_MODE (vals
);
8808 machine_mode inner_mode
= GET_MODE_INNER (mode
);
8809 int n_elts
= GET_MODE_NUNITS (mode
);
8810 bool all_same
= true;
8814 if (GET_CODE (vals
) != CONST_VECTOR
)
8817 for (i
= 1; i
< n_elts
; ++i
)
8819 x
= CONST_VECTOR_ELT (vals
, i
);
8820 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
8827 /* We can load this constant by using DUP and a constant in a
8828 single ARM register. This will be cheaper than a vector
8830 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
8831 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8835 /* Generate code to load VALS, which is a PARALLEL containing only
8836 constants (for vec_init) or CONST_VECTOR, efficiently into a
8837 register. Returns an RTX to copy into the register, or NULL_RTX
8838 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8840 aarch64_simd_make_constant (rtx vals
)
8842 machine_mode mode
= GET_MODE (vals
);
8844 rtx const_vec
= NULL_RTX
;
8845 int n_elts
= GET_MODE_NUNITS (mode
);
8849 if (GET_CODE (vals
) == CONST_VECTOR
)
8851 else if (GET_CODE (vals
) == PARALLEL
)
8853 /* A CONST_VECTOR must contain only CONST_INTs and
8854 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8855 Only store valid constants in a CONST_VECTOR. */
8856 for (i
= 0; i
< n_elts
; ++i
)
8858 rtx x
= XVECEXP (vals
, 0, i
);
8859 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
8862 if (n_const
== n_elts
)
8863 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8868 if (const_vec
!= NULL_RTX
8869 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
8870 /* Load using MOVI/MVNI. */
8872 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
8873 /* Loaded using DUP. */
8875 else if (const_vec
!= NULL_RTX
)
8876 /* Load from constant pool. We can not take advantage of single-cycle
8877 LD1 because we need a PC-relative addressing mode. */
8880 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8881 We can not construct an initializer. */
8886 aarch64_expand_vector_init (rtx target
, rtx vals
)
8888 machine_mode mode
= GET_MODE (target
);
8889 machine_mode inner_mode
= GET_MODE_INNER (mode
);
8890 int n_elts
= GET_MODE_NUNITS (mode
);
8892 rtx any_const
= NULL_RTX
;
8893 bool all_same
= true;
8895 for (int i
= 0; i
< n_elts
; ++i
)
8897 rtx x
= XVECEXP (vals
, 0, i
);
8898 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8903 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8909 rtx constant
= aarch64_simd_make_constant (vals
);
8910 if (constant
!= NULL_RTX
)
8912 emit_move_insn (target
, constant
);
8917 /* Splat a single non-constant element if we can. */
8920 rtx x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8921 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
8925 /* Half the fields (or less) are non-constant. Load constant then overwrite
8926 varying fields. Hope that this is more efficient than using the stack. */
8927 if (n_var
<= n_elts
/2)
8929 rtx copy
= copy_rtx (vals
);
8931 /* Load constant part of vector. We really don't care what goes into the
8932 parts we will overwrite, but we're more likely to be able to load the
8933 constant efficiently if it has fewer, larger, repeating parts
8934 (see aarch64_simd_valid_immediate). */
8935 for (int i
= 0; i
< n_elts
; i
++)
8937 rtx x
= XVECEXP (vals
, 0, i
);
8938 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
8940 rtx subst
= any_const
;
8941 for (int bit
= n_elts
/ 2; bit
> 0; bit
/= 2)
8943 /* Look in the copied vector, as more elements are const. */
8944 rtx test
= XVECEXP (copy
, 0, i
^ bit
);
8945 if (CONST_INT_P (test
) || CONST_DOUBLE_P (test
))
8951 XVECEXP (copy
, 0, i
) = subst
;
8953 aarch64_expand_vector_init (target
, copy
);
8955 /* Insert variables. */
8956 enum insn_code icode
= optab_handler (vec_set_optab
, mode
);
8957 gcc_assert (icode
!= CODE_FOR_nothing
);
8959 for (int i
= 0; i
< n_elts
; i
++)
8961 rtx x
= XVECEXP (vals
, 0, i
);
8962 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
8964 x
= copy_to_mode_reg (inner_mode
, x
);
8965 emit_insn (GEN_FCN (icode
) (target
, x
, GEN_INT (i
)));
8970 /* Construct the vector in memory one field at a time
8971 and load the whole vector. */
8972 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
8973 for (int i
= 0; i
< n_elts
; i
++)
8974 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8975 i
* GET_MODE_SIZE (inner_mode
)),
8976 XVECEXP (vals
, 0, i
));
8977 emit_move_insn (target
, mem
);
8981 static unsigned HOST_WIDE_INT
8982 aarch64_shift_truncation_mask (machine_mode mode
)
8985 (aarch64_vector_mode_supported_p (mode
)
8986 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
8989 #ifndef TLS_SECTION_ASM_FLAG
8990 #define TLS_SECTION_ASM_FLAG 'T'
8994 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
8995 tree decl ATTRIBUTE_UNUSED
)
8997 char flagchars
[10], *f
= flagchars
;
8999 /* If we have already declared this section, we can use an
9000 abbreviated form to switch back to it -- unless this section is
9001 part of a COMDAT groups, in which case GAS requires the full
9002 declaration every time. */
9003 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
9004 && (flags
& SECTION_DECLARED
))
9006 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
9010 if (!(flags
& SECTION_DEBUG
))
9012 if (flags
& SECTION_WRITE
)
9014 if (flags
& SECTION_CODE
)
9016 if (flags
& SECTION_SMALL
)
9018 if (flags
& SECTION_MERGE
)
9020 if (flags
& SECTION_STRINGS
)
9022 if (flags
& SECTION_TLS
)
9023 *f
++ = TLS_SECTION_ASM_FLAG
;
9024 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
9028 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
9030 if (!(flags
& SECTION_NOTYPE
))
9035 if (flags
& SECTION_BSS
)
9040 #ifdef TYPE_OPERAND_FMT
9041 format
= "," TYPE_OPERAND_FMT
;
9046 fprintf (asm_out_file
, format
, type
);
9048 if (flags
& SECTION_ENTSIZE
)
9049 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
9050 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
9052 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
9053 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
9055 fprintf (asm_out_file
, ",%s,comdat",
9056 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
9060 putc ('\n', asm_out_file
);
9063 /* Select a format to encode pointers in exception handling data. */
9065 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
9068 switch (aarch64_cmodel
)
9070 case AARCH64_CMODEL_TINY
:
9071 case AARCH64_CMODEL_TINY_PIC
:
9072 case AARCH64_CMODEL_SMALL
:
9073 case AARCH64_CMODEL_SMALL_PIC
:
9074 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
9076 type
= DW_EH_PE_sdata4
;
9079 /* No assumptions here. 8-byte relocs required. */
9080 type
= DW_EH_PE_sdata8
;
9083 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
9086 /* Emit load exclusive. */
9089 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
9090 rtx mem
, rtx model_rtx
)
9092 rtx (*gen
) (rtx
, rtx
, rtx
);
9096 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
9097 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
9098 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
9099 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
9104 emit_insn (gen (rval
, mem
, model_rtx
));
9107 /* Emit store exclusive. */
9110 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
9111 rtx rval
, rtx mem
, rtx model_rtx
)
9113 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9117 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
9118 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
9119 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
9120 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
9125 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
9128 /* Mark the previous jump instruction as unlikely. */
9131 aarch64_emit_unlikely_jump (rtx insn
)
9133 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
9135 insn
= emit_jump_insn (insn
);
9136 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
9139 /* Expand a compare and swap pattern. */
9142 aarch64_expand_compare_and_swap (rtx operands
[])
9144 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
9145 machine_mode mode
, cmp_mode
;
9146 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
9151 oldval
= operands
[3];
9152 newval
= operands
[4];
9153 is_weak
= operands
[5];
9154 mod_s
= operands
[6];
9155 mod_f
= operands
[7];
9156 mode
= GET_MODE (mem
);
9159 /* Normally the succ memory model must be stronger than fail, but in the
9160 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9161 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9163 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
9164 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
9165 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
9171 /* For short modes, we're going to perform the comparison in SImode,
9172 so do the zero-extension now. */
9174 rval
= gen_reg_rtx (SImode
);
9175 oldval
= convert_modes (SImode
, mode
, oldval
, true);
9180 /* Force the value into a register if needed. */
9181 if (!aarch64_plus_operand (oldval
, mode
))
9182 oldval
= force_reg (cmp_mode
, oldval
);
9191 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
9192 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
9193 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
9194 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
9199 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
9201 if (mode
== QImode
|| mode
== HImode
)
9202 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
9204 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9205 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
9206 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
9209 /* Split a compare and swap pattern. */
9212 aarch64_split_compare_and_swap (rtx operands
[])
9214 rtx rval
, mem
, oldval
, newval
, scratch
;
9217 rtx_code_label
*label1
, *label2
;
9222 oldval
= operands
[2];
9223 newval
= operands
[3];
9224 is_weak
= (operands
[4] != const0_rtx
);
9225 scratch
= operands
[7];
9226 mode
= GET_MODE (mem
);
9231 label1
= gen_label_rtx ();
9232 emit_label (label1
);
9234 label2
= gen_label_rtx ();
9236 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
9238 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
9239 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9240 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9241 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
9242 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9244 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
9248 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
9249 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9250 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
9251 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9255 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9256 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
9257 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
9260 emit_label (label2
);
9263 /* Split an atomic operation. */
9266 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
9267 rtx value
, rtx model_rtx
, rtx cond
)
9269 machine_mode mode
= GET_MODE (mem
);
9270 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
9271 rtx_code_label
*label
;
9274 label
= gen_label_rtx ();
9278 new_out
= gen_lowpart (wmode
, new_out
);
9280 old_out
= gen_lowpart (wmode
, old_out
);
9283 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
9285 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
9294 x
= gen_rtx_AND (wmode
, old_out
, value
);
9295 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9296 x
= gen_rtx_NOT (wmode
, new_out
);
9297 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9301 if (CONST_INT_P (value
))
9303 value
= GEN_INT (-INTVAL (value
));
9309 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
9310 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9314 aarch64_emit_store_exclusive (mode
, cond
, mem
,
9315 gen_lowpart (mode
, new_out
), model_rtx
);
9317 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9318 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9319 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
9320 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9324 aarch64_print_extension (void)
9326 const struct aarch64_option_extension
*opt
= NULL
;
9328 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
9329 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
9330 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
9332 asm_fprintf (asm_out_file
, "\n");
9336 aarch64_start_file (void)
9340 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
9341 aarch64_print_extension ();
9343 else if (selected_cpu
)
9345 const char *truncated_name
9346 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
9347 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
9348 aarch64_print_extension ();
9350 default_file_start();
9353 /* Target hook for c_mode_for_suffix. */
9355 aarch64_c_mode_for_suffix (char suffix
)
9363 /* We can only represent floating point constants which will fit in
9364 "quarter-precision" values. These values are characterised by
9365 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9368 (-1)^s * (n/16) * 2^r
9371 's' is the sign bit.
9372 'n' is an integer in the range 16 <= n <= 31.
9373 'r' is an integer in the range -3 <= r <= 4. */
9375 /* Return true iff X can be represented by a quarter-precision
9376 floating point immediate operand X. Note, we cannot represent 0.0. */
9378 aarch64_float_const_representable_p (rtx x
)
9380 /* This represents our current view of how many bits
9381 make up the mantissa. */
9382 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
9384 unsigned HOST_WIDE_INT mantissa
, mask
;
9385 REAL_VALUE_TYPE r
, m
;
9388 if (!CONST_DOUBLE_P (x
))
9391 if (GET_MODE (x
) == VOIDmode
)
9394 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9396 /* We cannot represent infinities, NaNs or +/-zero. We won't
9397 know if we have +zero until we analyse the mantissa, but we
9398 can reject the other invalid values. */
9399 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
9400 || REAL_VALUE_MINUS_ZERO (r
))
9403 /* Extract exponent. */
9404 r
= real_value_abs (&r
);
9405 exponent
= REAL_EXP (&r
);
9407 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9408 highest (sign) bit, with a fixed binary point at bit point_pos.
9409 m1 holds the low part of the mantissa, m2 the high part.
9410 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9411 bits for the mantissa, this can fail (low bits will be lost). */
9412 real_ldexp (&m
, &r
, point_pos
- exponent
);
9413 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
9415 /* If the low part of the mantissa has bits set we cannot represent
9419 /* We have rejected the lower HOST_WIDE_INT, so update our
9420 understanding of how many bits lie in the mantissa and
9421 look only at the high HOST_WIDE_INT. */
9422 mantissa
= w
.elt (1);
9423 point_pos
-= HOST_BITS_PER_WIDE_INT
;
9425 /* We can only represent values with a mantissa of the form 1.xxxx. */
9426 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
9427 if ((mantissa
& mask
) != 0)
9430 /* Having filtered unrepresentable values, we may now remove all
9431 but the highest 5 bits. */
9432 mantissa
>>= point_pos
- 5;
9434 /* We cannot represent the value 0.0, so reject it. This is handled
9439 /* Then, as bit 4 is always set, we can mask it off, leaving
9440 the mantissa in the range [0, 15]. */
9441 mantissa
&= ~(1 << 4);
9442 gcc_assert (mantissa
<= 15);
9444 /* GCC internally does not use IEEE754-like encoding (where normalized
9445 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9446 Our mantissa values are shifted 4 places to the left relative to
9447 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9448 by 5 places to correct for GCC's representation. */
9449 exponent
= 5 - exponent
;
9451 return (exponent
>= 0 && exponent
<= 7);
9455 aarch64_output_simd_mov_immediate (rtx const_vector
,
9460 static char templ
[40];
9461 const char *mnemonic
;
9462 const char *shift_op
;
9463 unsigned int lane_count
= 0;
9466 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
9468 /* This will return true to show const_vector is legal for use as either
9469 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9470 also update INFO to show how the immediate should be generated. */
9471 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
9472 gcc_assert (is_valid
);
9474 element_char
= sizetochar (info
.element_width
);
9475 lane_count
= width
/ info
.element_width
;
9477 mode
= GET_MODE_INNER (mode
);
9478 if (mode
== SFmode
|| mode
== DFmode
)
9480 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
9481 if (aarch64_float_const_zero_rtx_p (info
.value
))
9482 info
.value
= GEN_INT (0);
9487 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
9488 char float_buf
[buf_size
] = {'\0'};
9489 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
9492 if (lane_count
== 1)
9493 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
9495 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
9496 lane_count
, element_char
, float_buf
);
9501 mnemonic
= info
.mvn
? "mvni" : "movi";
9502 shift_op
= info
.msl
? "msl" : "lsl";
9504 if (lane_count
== 1)
9505 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
9506 mnemonic
, UINTVAL (info
.value
));
9507 else if (info
.shift
)
9508 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9509 ", %s %d", mnemonic
, lane_count
, element_char
,
9510 UINTVAL (info
.value
), shift_op
, info
.shift
);
9512 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
9513 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
9518 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
9523 gcc_assert (!VECTOR_MODE_P (mode
));
9524 vmode
= aarch64_simd_container_mode (mode
, 64);
9525 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
9526 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
9529 /* Split operands into moves from op[1] + op[2] into op[0]. */
9532 aarch64_split_combinev16qi (rtx operands
[3])
9534 unsigned int dest
= REGNO (operands
[0]);
9535 unsigned int src1
= REGNO (operands
[1]);
9536 unsigned int src2
= REGNO (operands
[2]);
9537 machine_mode halfmode
= GET_MODE (operands
[1]);
9538 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
9541 gcc_assert (halfmode
== V16QImode
);
9543 if (src1
== dest
&& src2
== dest
+ halfregs
)
9545 /* No-op move. Can't split to nothing; emit something. */
9546 emit_note (NOTE_INSN_DELETED
);
9550 /* Preserve register attributes for variable tracking. */
9551 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
9552 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
9553 GET_MODE_SIZE (halfmode
));
9555 /* Special case of reversed high/low parts. */
9556 if (reg_overlap_mentioned_p (operands
[2], destlo
)
9557 && reg_overlap_mentioned_p (operands
[1], desthi
))
9559 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9560 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
9561 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9563 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
9565 /* Try to avoid unnecessary moves if part of the result
9566 is in the right place already. */
9568 emit_move_insn (destlo
, operands
[1]);
9569 if (src2
!= dest
+ halfregs
)
9570 emit_move_insn (desthi
, operands
[2]);
9574 if (src2
!= dest
+ halfregs
)
9575 emit_move_insn (desthi
, operands
[2]);
9577 emit_move_insn (destlo
, operands
[1]);
9581 /* vec_perm support. */
9583 #define MAX_VECT_LEN 16
9585 struct expand_vec_perm_d
9587 rtx target
, op0
, op1
;
9588 unsigned char perm
[MAX_VECT_LEN
];
9595 /* Generate a variable permutation. */
9598 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9600 machine_mode vmode
= GET_MODE (target
);
9601 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9603 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
9604 gcc_checking_assert (GET_MODE (op0
) == vmode
);
9605 gcc_checking_assert (GET_MODE (op1
) == vmode
);
9606 gcc_checking_assert (GET_MODE (sel
) == vmode
);
9607 gcc_checking_assert (TARGET_SIMD
);
9611 if (vmode
== V8QImode
)
9613 /* Expand the argument to a V16QI mode by duplicating it. */
9614 rtx pair
= gen_reg_rtx (V16QImode
);
9615 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
9616 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9620 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
9627 if (vmode
== V8QImode
)
9629 pair
= gen_reg_rtx (V16QImode
);
9630 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
9631 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9635 pair
= gen_reg_rtx (OImode
);
9636 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
9637 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
9643 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9645 machine_mode vmode
= GET_MODE (target
);
9646 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
9647 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9650 /* The TBL instruction does not use a modulo index, so we must take care
9651 of that ourselves. */
9652 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
9653 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9654 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
9656 /* For big-endian, we also need to reverse the index within the vector
9657 (but not which vector). */
9658 if (BYTES_BIG_ENDIAN
)
9660 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9662 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
9663 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
9664 NULL
, 0, OPTAB_LIB_WIDEN
);
9666 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
9669 /* Recognize patterns suitable for the TRN instructions. */
9671 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
9673 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9674 rtx out
, in0
, in1
, x
;
9675 rtx (*gen
) (rtx
, rtx
, rtx
);
9676 machine_mode vmode
= d
->vmode
;
9678 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9681 /* Note that these are little-endian tests.
9682 We correct for big-endian later. */
9683 if (d
->perm
[0] == 0)
9685 else if (d
->perm
[0] == 1)
9689 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9691 for (i
= 0; i
< nelt
; i
+= 2)
9693 if (d
->perm
[i
] != i
+ odd
)
9695 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
9705 if (BYTES_BIG_ENDIAN
)
9707 x
= in0
, in0
= in1
, in1
= x
;
9716 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
9717 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
9718 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
9719 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
9720 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
9721 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
9722 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
9723 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
9724 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
9725 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
9734 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
9735 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
9736 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
9737 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
9738 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
9739 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
9740 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
9741 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
9742 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
9743 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
9749 emit_insn (gen (out
, in0
, in1
));
9753 /* Recognize patterns suitable for the UZP instructions. */
9755 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
9757 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9758 rtx out
, in0
, in1
, x
;
9759 rtx (*gen
) (rtx
, rtx
, rtx
);
9760 machine_mode vmode
= d
->vmode
;
9762 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9765 /* Note that these are little-endian tests.
9766 We correct for big-endian later. */
9767 if (d
->perm
[0] == 0)
9769 else if (d
->perm
[0] == 1)
9773 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9775 for (i
= 0; i
< nelt
; i
++)
9777 unsigned elt
= (i
* 2 + odd
) & mask
;
9778 if (d
->perm
[i
] != elt
)
9788 if (BYTES_BIG_ENDIAN
)
9790 x
= in0
, in0
= in1
, in1
= x
;
9799 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
9800 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
9801 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
9802 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
9803 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
9804 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
9805 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
9806 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
9807 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
9808 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
9817 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
9818 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
9819 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
9820 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
9821 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
9822 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
9823 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
9824 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
9825 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
9826 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
9832 emit_insn (gen (out
, in0
, in1
));
9836 /* Recognize patterns suitable for the ZIP instructions. */
9838 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
9840 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
9841 rtx out
, in0
, in1
, x
;
9842 rtx (*gen
) (rtx
, rtx
, rtx
);
9843 machine_mode vmode
= d
->vmode
;
9845 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9848 /* Note that these are little-endian tests.
9849 We correct for big-endian later. */
9851 if (d
->perm
[0] == high
)
9854 else if (d
->perm
[0] == 0)
9858 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9860 for (i
= 0; i
< nelt
/ 2; i
++)
9862 unsigned elt
= (i
+ high
) & mask
;
9863 if (d
->perm
[i
* 2] != elt
)
9865 elt
= (elt
+ nelt
) & mask
;
9866 if (d
->perm
[i
* 2 + 1] != elt
)
9876 if (BYTES_BIG_ENDIAN
)
9878 x
= in0
, in0
= in1
, in1
= x
;
9887 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
9888 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
9889 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
9890 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
9891 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
9892 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
9893 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
9894 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
9895 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
9896 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
9905 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
9906 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
9907 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
9908 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
9909 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
9910 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
9911 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
9912 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
9913 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
9914 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
9920 emit_insn (gen (out
, in0
, in1
));
9924 /* Recognize patterns for the EXT insn. */
9927 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
9929 unsigned int i
, nelt
= d
->nelt
;
9930 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9933 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
9935 /* Check if the extracted indices are increasing by one. */
9936 for (i
= 1; i
< nelt
; i
++)
9938 unsigned int required
= location
+ i
;
9939 if (d
->one_vector_p
)
9941 /* We'll pass the same vector in twice, so allow indices to wrap. */
9942 required
&= (nelt
- 1);
9944 if (d
->perm
[i
] != required
)
9950 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
9951 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
9952 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
9953 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
9954 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
9955 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
9956 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
9957 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
9958 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
9959 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
9968 /* The case where (location == 0) is a no-op for both big- and little-endian,
9969 and is removed by the mid-end at optimization levels -O1 and higher. */
9971 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
9973 /* After setup, we want the high elements of the first vector (stored
9974 at the LSB end of the register), and the low elements of the second
9975 vector (stored at the MSB end of the register). So swap. */
9976 std::swap (d
->op0
, d
->op1
);
9977 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9978 location
= nelt
- location
;
9981 offset
= GEN_INT (location
);
9982 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
9986 /* Recognize patterns for the REV insns. */
9989 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
9991 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
9992 rtx (*gen
) (rtx
, rtx
);
9994 if (!d
->one_vector_p
)
10003 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
10004 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
10012 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
10013 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
10014 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
10015 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
10023 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
10024 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
10025 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
10026 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
10027 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
10028 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
10029 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
10030 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
10039 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
10040 for (j
= 0; j
<= diff
; j
+= 1)
10042 /* This is guaranteed to be true as the value of diff
10043 is 7, 3, 1 and we should have enough elements in the
10044 queue to generate this. Getting a vector mask with a
10045 value of diff other than these values implies that
10046 something is wrong by the time we get here. */
10047 gcc_assert (i
+ j
< nelt
);
10048 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
10056 emit_insn (gen (d
->target
, d
->op0
));
10061 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
10063 rtx (*gen
) (rtx
, rtx
, rtx
);
10064 rtx out
= d
->target
;
10066 machine_mode vmode
= d
->vmode
;
10067 unsigned int i
, elt
, nelt
= d
->nelt
;
10071 for (i
= 1; i
< nelt
; i
++)
10073 if (elt
!= d
->perm
[i
])
10077 /* The generic preparation in aarch64_expand_vec_perm_const_1
10078 swaps the operand order and the permute indices if it finds
10079 d->perm[0] to be in the second operand. Thus, we can always
10080 use d->op0 and need not do any extra arithmetic to get the
10081 correct lane number. */
10083 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
10087 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
10088 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
10089 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
10090 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
10091 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
10092 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
10093 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
10094 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
10095 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
10096 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
10101 emit_insn (gen (out
, in0
, lane
));
10106 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
10108 rtx rperm
[MAX_VECT_LEN
], sel
;
10109 machine_mode vmode
= d
->vmode
;
10110 unsigned int i
, nelt
= d
->nelt
;
10115 /* Generic code will try constant permutation twice. Once with the
10116 original mode and again with the elements lowered to QImode.
10117 So wait and don't do the selector expansion ourselves. */
10118 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
10121 for (i
= 0; i
< nelt
; ++i
)
10123 int nunits
= GET_MODE_NUNITS (vmode
);
10125 /* If big-endian and two vectors we end up with a weird mixed-endian
10126 mode on NEON. Reverse the index within each word but not the word
10128 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
10131 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
10132 sel
= force_reg (vmode
, sel
);
10134 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
10139 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
10141 /* The pattern matching functions above are written to look for a small
10142 number to begin the sequence (0, 1, N/2). If we begin with an index
10143 from the second operand, we can swap the operands. */
10144 if (d
->perm
[0] >= d
->nelt
)
10146 unsigned i
, nelt
= d
->nelt
;
10148 gcc_assert (nelt
== (nelt
& -nelt
));
10149 for (i
= 0; i
< nelt
; ++i
)
10150 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
10152 std::swap (d
->op0
, d
->op1
);
10157 if (aarch64_evpc_rev (d
))
10159 else if (aarch64_evpc_ext (d
))
10161 else if (aarch64_evpc_dup (d
))
10163 else if (aarch64_evpc_zip (d
))
10165 else if (aarch64_evpc_uzp (d
))
10167 else if (aarch64_evpc_trn (d
))
10169 return aarch64_evpc_tbl (d
);
10174 /* Expand a vec_perm_const pattern. */
10177 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
10179 struct expand_vec_perm_d d
;
10180 int i
, nelt
, which
;
10186 d
.vmode
= GET_MODE (target
);
10187 gcc_assert (VECTOR_MODE_P (d
.vmode
));
10188 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10189 d
.testing_p
= false;
10191 for (i
= which
= 0; i
< nelt
; ++i
)
10193 rtx e
= XVECEXP (sel
, 0, i
);
10194 int ei
= INTVAL (e
) & (2 * nelt
- 1);
10195 which
|= (ei
< nelt
? 1 : 2);
10202 gcc_unreachable ();
10205 d
.one_vector_p
= false;
10206 if (!rtx_equal_p (op0
, op1
))
10209 /* The elements of PERM do not suggest that only the first operand
10210 is used, but both operands are identical. Allow easier matching
10211 of the permutation by folding the permutation into the single
10213 /* Fall Through. */
10215 for (i
= 0; i
< nelt
; ++i
)
10216 d
.perm
[i
] &= nelt
- 1;
10218 d
.one_vector_p
= true;
10223 d
.one_vector_p
= true;
10227 return aarch64_expand_vec_perm_const_1 (&d
);
10231 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
10232 const unsigned char *sel
)
10234 struct expand_vec_perm_d d
;
10235 unsigned int i
, nelt
, which
;
10239 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10240 d
.testing_p
= true;
10241 memcpy (d
.perm
, sel
, nelt
);
10243 /* Calculate whether all elements are in one vector. */
10244 for (i
= which
= 0; i
< nelt
; ++i
)
10246 unsigned char e
= d
.perm
[i
];
10247 gcc_assert (e
< 2 * nelt
);
10248 which
|= (e
< nelt
? 1 : 2);
10251 /* If all elements are from the second vector, reindex as if from the
10254 for (i
= 0; i
< nelt
; ++i
)
10257 /* Check whether the mask can be applied to a single vector. */
10258 d
.one_vector_p
= (which
!= 3);
10260 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
10261 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
10262 if (!d
.one_vector_p
)
10263 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
10266 ret
= aarch64_expand_vec_perm_const_1 (&d
);
10273 aarch64_reverse_mask (enum machine_mode mode
)
10275 /* We have to reverse each vector because we dont have
10276 a permuted load that can reverse-load according to ABI rules. */
10278 rtvec v
= rtvec_alloc (16);
10280 int nunits
= GET_MODE_NUNITS (mode
);
10281 int usize
= GET_MODE_UNIT_SIZE (mode
);
10283 gcc_assert (BYTES_BIG_ENDIAN
);
10284 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode
));
10286 for (i
= 0; i
< nunits
; i
++)
10287 for (j
= 0; j
< usize
; j
++)
10288 RTVEC_ELT (v
, i
* usize
+ j
) = GEN_INT ((i
+ 1) * usize
- 1 - j
);
10289 mask
= gen_rtx_CONST_VECTOR (V16QImode
, v
);
10290 return force_reg (V16QImode
, mask
);
10293 /* Implement MODES_TIEABLE_P. */
10296 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10298 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
10301 /* We specifically want to allow elements of "structure" modes to
10302 be tieable to the structure. This more general condition allows
10303 other rarer situations too. */
10305 && aarch64_vector_mode_p (mode1
)
10306 && aarch64_vector_mode_p (mode2
))
10312 /* Return a new RTX holding the result of moving POINTER forward by
10316 aarch64_move_pointer (rtx pointer
, int amount
)
10318 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
10320 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
10324 /* Return a new RTX holding the result of moving POINTER forward by the
10325 size of the mode it points to. */
10328 aarch64_progress_pointer (rtx pointer
)
10330 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
10332 return aarch64_move_pointer (pointer
, amount
);
10335 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10339 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
10342 rtx reg
= gen_reg_rtx (mode
);
10344 /* "Cast" the pointers to the correct mode. */
10345 *src
= adjust_address (*src
, mode
, 0);
10346 *dst
= adjust_address (*dst
, mode
, 0);
10347 /* Emit the memcpy. */
10348 emit_move_insn (reg
, *src
);
10349 emit_move_insn (*dst
, reg
);
10350 /* Move the pointers forward. */
10351 *src
= aarch64_progress_pointer (*src
);
10352 *dst
= aarch64_progress_pointer (*dst
);
10355 /* Expand movmem, as if from a __builtin_memcpy. Return true if
10356 we succeed, otherwise return false. */
10359 aarch64_expand_movmem (rtx
*operands
)
10362 rtx dst
= operands
[0];
10363 rtx src
= operands
[1];
10365 bool speed_p
= !optimize_function_for_size_p (cfun
);
10367 /* When optimizing for size, give a better estimate of the length of a
10368 memcpy call, but use the default otherwise. */
10369 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
10371 /* We can't do anything smart if the amount to copy is not constant. */
10372 if (!CONST_INT_P (operands
[2]))
10375 n
= UINTVAL (operands
[2]);
10377 /* Try to keep the number of instructions low. For cases below 16 bytes we
10378 need to make at most two moves. For cases above 16 bytes it will be one
10379 move for each 16 byte chunk, then at most two additional moves. */
10380 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
10383 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10384 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
10386 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10387 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
10389 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10395 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10400 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10405 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10406 4-byte chunk, partially overlapping with the previously copied chunk. */
10409 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10415 src
= aarch64_move_pointer (src
, move
);
10416 dst
= aarch64_move_pointer (dst
, move
);
10417 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10422 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10423 them, then (if applicable) an 8-byte chunk. */
10428 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
10433 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10438 /* Finish the final bytes of the copy. We can always do this in one
10439 instruction. We either copy the exact amount we need, or partially
10440 overlap with the previous chunk we copied and copy 8-bytes. */
10444 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10446 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10448 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10453 src
= aarch64_move_pointer (src
, -1);
10454 dst
= aarch64_move_pointer (dst
, -1);
10455 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10461 src
= aarch64_move_pointer (src
, move
);
10462 dst
= aarch64_move_pointer (dst
, move
);
10463 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10470 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10472 static unsigned HOST_WIDE_INT
10473 aarch64_asan_shadow_offset (void)
10475 return (HOST_WIDE_INT_1
<< 36);
10479 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
10480 unsigned int align
,
10481 enum by_pieces_operation op
,
10484 /* STORE_BY_PIECES can be used when copying a constant string, but
10485 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10486 For now we always fail this and let the move_by_pieces code copy
10487 the string from read-only memory. */
10488 if (op
== STORE_BY_PIECES
)
10491 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
10494 static enum machine_mode
10495 aarch64_code_to_ccmode (enum rtx_code code
)
10518 return CC_DLEUmode
;
10521 return CC_DLTUmode
;
10524 return CC_DGEUmode
;
10527 return CC_DGTUmode
;
10535 aarch64_gen_ccmp_first (rtx
*prep_seq
, rtx
*gen_seq
,
10536 int code
, tree treeop0
, tree treeop1
)
10538 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
10539 rtx op0
, op1
, cmp
, target
;
10540 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
10541 enum insn_code icode
;
10542 struct expand_operand ops
[4];
10544 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) code
);
10545 if (cc_mode
== CCmode
)
10549 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
10551 op_mode
= GET_MODE (op0
);
10552 if (op_mode
== VOIDmode
)
10553 op_mode
= GET_MODE (op1
);
10561 icode
= CODE_FOR_cmpsi
;
10566 icode
= CODE_FOR_cmpdi
;
10574 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
10575 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
10581 *prep_seq
= get_insns ();
10584 cmp
= gen_rtx_fmt_ee ((enum rtx_code
) code
, cmp_mode
, op0
, op1
);
10585 target
= gen_rtx_REG (CCmode
, CC_REGNUM
);
10587 create_output_operand (&ops
[0], target
, CCmode
);
10588 create_fixed_operand (&ops
[1], cmp
);
10589 create_fixed_operand (&ops
[2], op0
);
10590 create_fixed_operand (&ops
[3], op1
);
10593 if (!maybe_expand_insn (icode
, 4, ops
))
10598 *gen_seq
= get_insns ();
10601 return gen_rtx_REG (cc_mode
, CC_REGNUM
);
10605 aarch64_gen_ccmp_next (rtx
*prep_seq
, rtx
*gen_seq
, rtx prev
, int cmp_code
,
10606 tree treeop0
, tree treeop1
, int bit_code
)
10608 rtx op0
, op1
, cmp0
, cmp1
, target
;
10609 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
10610 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
10611 enum insn_code icode
= CODE_FOR_ccmp_andsi
;
10612 struct expand_operand ops
[6];
10614 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) cmp_code
);
10615 if (cc_mode
== CCmode
)
10618 push_to_sequence ((rtx_insn
*) *prep_seq
);
10619 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
10621 op_mode
= GET_MODE (op0
);
10622 if (op_mode
== VOIDmode
)
10623 op_mode
= GET_MODE (op1
);
10631 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_andsi
10632 : CODE_FOR_ccmp_iorsi
;
10637 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_anddi
10638 : CODE_FOR_ccmp_iordi
;
10646 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
10647 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
10653 *prep_seq
= get_insns ();
10656 target
= gen_rtx_REG (cc_mode
, CC_REGNUM
);
10657 cmp1
= gen_rtx_fmt_ee ((enum rtx_code
) cmp_code
, cmp_mode
, op0
, op1
);
10658 cmp0
= gen_rtx_fmt_ee (NE
, cmp_mode
, prev
, const0_rtx
);
10660 create_fixed_operand (&ops
[0], prev
);
10661 create_fixed_operand (&ops
[1], target
);
10662 create_fixed_operand (&ops
[2], op0
);
10663 create_fixed_operand (&ops
[3], op1
);
10664 create_fixed_operand (&ops
[4], cmp0
);
10665 create_fixed_operand (&ops
[5], cmp1
);
10667 push_to_sequence ((rtx_insn
*) *gen_seq
);
10668 if (!maybe_expand_insn (icode
, 6, ops
))
10674 *gen_seq
= get_insns ();
10680 #undef TARGET_GEN_CCMP_FIRST
10681 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
10683 #undef TARGET_GEN_CCMP_NEXT
10684 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
10686 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10687 instruction fusion of some sort. */
10690 aarch64_macro_fusion_p (void)
10692 return aarch64_tune_params
->fuseable_ops
!= AARCH64_FUSE_NOTHING
;
10696 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10697 should be kept together during scheduling. */
10700 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
10703 rtx prev_set
= single_set (prev
);
10704 rtx curr_set
= single_set (curr
);
10705 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10706 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
10708 if (!aarch64_macro_fusion_p ())
10712 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_MOV_MOVK
))
10714 /* We are trying to match:
10715 prev (mov) == (set (reg r0) (const_int imm16))
10716 curr (movk) == (set (zero_extract (reg r0)
10719 (const_int imm16_1)) */
10721 set_dest
= SET_DEST (curr_set
);
10723 if (GET_CODE (set_dest
) == ZERO_EXTRACT
10724 && CONST_INT_P (SET_SRC (curr_set
))
10725 && CONST_INT_P (SET_SRC (prev_set
))
10726 && CONST_INT_P (XEXP (set_dest
, 2))
10727 && INTVAL (XEXP (set_dest
, 2)) == 16
10728 && REG_P (XEXP (set_dest
, 0))
10729 && REG_P (SET_DEST (prev_set
))
10730 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
10737 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_ADRP_ADD
))
10740 /* We're trying to match:
10741 prev (adrp) == (set (reg r1)
10742 (high (symbol_ref ("SYM"))))
10743 curr (add) == (set (reg r0)
10745 (symbol_ref ("SYM"))))
10746 Note that r0 need not necessarily be the same as r1, especially
10747 during pre-regalloc scheduling. */
10749 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
10750 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
10752 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
10753 && REG_P (XEXP (SET_SRC (curr_set
), 0))
10754 && REGNO (XEXP (SET_SRC (curr_set
), 0))
10755 == REGNO (SET_DEST (prev_set
))
10756 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
10757 XEXP (SET_SRC (curr_set
), 1)))
10763 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_MOVK_MOVK
))
10766 /* We're trying to match:
10767 prev (movk) == (set (zero_extract (reg r0)
10770 (const_int imm16_1))
10771 curr (movk) == (set (zero_extract (reg r0)
10774 (const_int imm16_2)) */
10776 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
10777 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
10778 && REG_P (XEXP (SET_DEST (prev_set
), 0))
10779 && REG_P (XEXP (SET_DEST (curr_set
), 0))
10780 && REGNO (XEXP (SET_DEST (prev_set
), 0))
10781 == REGNO (XEXP (SET_DEST (curr_set
), 0))
10782 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
10783 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
10784 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
10785 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
10786 && CONST_INT_P (SET_SRC (prev_set
))
10787 && CONST_INT_P (SET_SRC (curr_set
)))
10792 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_ADRP_LDR
))
10794 /* We're trying to match:
10795 prev (adrp) == (set (reg r0)
10796 (high (symbol_ref ("SYM"))))
10797 curr (ldr) == (set (reg r1)
10798 (mem (lo_sum (reg r0)
10799 (symbol_ref ("SYM")))))
10801 curr (ldr) == (set (reg r1)
10804 (symbol_ref ("SYM")))))) */
10805 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
10806 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
10808 rtx curr_src
= SET_SRC (curr_set
);
10810 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
10811 curr_src
= XEXP (curr_src
, 0);
10813 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
10814 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
10815 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
10816 == REGNO (SET_DEST (prev_set
))
10817 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
10818 XEXP (SET_SRC (prev_set
), 0)))
10823 if ((aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_CMP_BRANCH
)
10824 && any_condjump_p (curr
))
10826 enum attr_type prev_type
= get_attr_type (prev
);
10828 /* FIXME: this misses some which is considered simple arthematic
10829 instructions for ThunderX. Simple shifts are missed here. */
10830 if (prev_type
== TYPE_ALUS_SREG
10831 || prev_type
== TYPE_ALUS_IMM
10832 || prev_type
== TYPE_LOGICS_REG
10833 || prev_type
== TYPE_LOGICS_IMM
)
10840 /* If MEM is in the form of [base+offset], extract the two parts
10841 of address and set to BASE and OFFSET, otherwise return false
10842 after clearing BASE and OFFSET. */
10845 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
10849 gcc_assert (MEM_P (mem
));
10851 addr
= XEXP (mem
, 0);
10856 *offset
= const0_rtx
;
10860 if (GET_CODE (addr
) == PLUS
10861 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
10863 *base
= XEXP (addr
, 0);
10864 *offset
= XEXP (addr
, 1);
10869 *offset
= NULL_RTX
;
10874 /* Types for scheduling fusion. */
10875 enum sched_fusion_type
10877 SCHED_FUSION_NONE
= 0,
10878 SCHED_FUSION_LD_SIGN_EXTEND
,
10879 SCHED_FUSION_LD_ZERO_EXTEND
,
10885 /* If INSN is a load or store of address in the form of [base+offset],
10886 extract the two parts and set to BASE and OFFSET. Return scheduling
10887 fusion type this INSN is. */
10889 static enum sched_fusion_type
10890 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
10893 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
10895 gcc_assert (INSN_P (insn
));
10896 x
= PATTERN (insn
);
10897 if (GET_CODE (x
) != SET
)
10898 return SCHED_FUSION_NONE
;
10901 dest
= SET_DEST (x
);
10903 if (GET_MODE (dest
) != SImode
&& GET_MODE (dest
) != DImode
10904 && GET_MODE (dest
) != SFmode
&& GET_MODE (dest
) != DFmode
)
10905 return SCHED_FUSION_NONE
;
10907 if (GET_CODE (src
) == SIGN_EXTEND
)
10909 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
10910 src
= XEXP (src
, 0);
10911 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
10912 return SCHED_FUSION_NONE
;
10914 else if (GET_CODE (src
) == ZERO_EXTEND
)
10916 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
10917 src
= XEXP (src
, 0);
10918 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
10919 return SCHED_FUSION_NONE
;
10922 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
10923 extract_base_offset_in_addr (src
, base
, offset
);
10924 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
10926 fusion
= SCHED_FUSION_ST
;
10927 extract_base_offset_in_addr (dest
, base
, offset
);
10930 return SCHED_FUSION_NONE
;
10932 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
10933 fusion
= SCHED_FUSION_NONE
;
10938 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10940 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10941 and PRI are only calculated for these instructions. For other instruction,
10942 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10943 type instruction fusion can be added by returning different priorities.
10945 It's important that irrelevant instructions get the largest FUSION_PRI. */
10948 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
10949 int *fusion_pri
, int *pri
)
10953 enum sched_fusion_type fusion
;
10955 gcc_assert (INSN_P (insn
));
10958 fusion
= fusion_load_store (insn
, &base
, &offset
);
10959 if (fusion
== SCHED_FUSION_NONE
)
10966 /* Set FUSION_PRI according to fusion type and base register. */
10967 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
10969 /* Calculate PRI. */
10972 /* INSN with smaller offset goes first. */
10973 off_val
= (int)(INTVAL (offset
));
10975 tmp
-= (off_val
& 0xfffff);
10977 tmp
+= ((- off_val
) & 0xfffff);
10983 /* Given OPERANDS of consecutive load/store, check if we can merge
10984 them into ldp/stp. LOAD is true if they are load instructions.
10985 MODE is the mode of memory operands. */
10988 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
10989 enum machine_mode mode
)
10991 HOST_WIDE_INT offval_1
, offval_2
, msize
;
10992 enum reg_class rclass_1
, rclass_2
;
10993 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
10997 mem_1
= operands
[1];
10998 mem_2
= operands
[3];
10999 reg_1
= operands
[0];
11000 reg_2
= operands
[2];
11001 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
11002 if (REGNO (reg_1
) == REGNO (reg_2
))
11007 mem_1
= operands
[0];
11008 mem_2
= operands
[2];
11009 reg_1
= operands
[1];
11010 reg_2
= operands
[3];
11013 /* The mems cannot be volatile. */
11014 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
11017 /* Check if the addresses are in the form of [base+offset]. */
11018 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
11019 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
11021 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
11022 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
11025 /* Check if the bases are same. */
11026 if (!rtx_equal_p (base_1
, base_2
))
11029 offval_1
= INTVAL (offset_1
);
11030 offval_2
= INTVAL (offset_2
);
11031 msize
= GET_MODE_SIZE (mode
);
11032 /* Check if the offsets are consecutive. */
11033 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
11036 /* Check if the addresses are clobbered by load. */
11039 if (reg_mentioned_p (reg_1
, mem_1
))
11042 /* In increasing order, the last load can clobber the address. */
11043 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
11047 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
11048 rclass_1
= FP_REGS
;
11050 rclass_1
= GENERAL_REGS
;
11052 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
11053 rclass_2
= FP_REGS
;
11055 rclass_2
= GENERAL_REGS
;
11057 /* Check if the registers are of same class. */
11058 if (rclass_1
!= rclass_2
)
11064 /* Given OPERANDS of consecutive load/store, check if we can merge
11065 them into ldp/stp by adjusting the offset. LOAD is true if they
11066 are load instructions. MODE is the mode of memory operands.
11068 Given below consecutive stores:
11070 str w1, [xb, 0x100]
11071 str w1, [xb, 0x104]
11072 str w1, [xb, 0x108]
11073 str w1, [xb, 0x10c]
11075 Though the offsets are out of the range supported by stp, we can
11076 still pair them after adjusting the offset, like:
11078 add scratch, xb, 0x100
11079 stp w1, w1, [scratch]
11080 stp w1, w1, [scratch, 0x8]
11082 The peephole patterns detecting this opportunity should guarantee
11083 the scratch register is avaliable. */
11086 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
11087 enum machine_mode mode
)
11089 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
11090 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
11091 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
11092 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
11096 reg_1
= operands
[0];
11097 mem_1
= operands
[1];
11098 reg_2
= operands
[2];
11099 mem_2
= operands
[3];
11100 reg_3
= operands
[4];
11101 mem_3
= operands
[5];
11102 reg_4
= operands
[6];
11103 mem_4
= operands
[7];
11104 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
11105 && REG_P (reg_3
) && REG_P (reg_4
));
11106 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
11111 mem_1
= operands
[0];
11112 reg_1
= operands
[1];
11113 mem_2
= operands
[2];
11114 reg_2
= operands
[3];
11115 mem_3
= operands
[4];
11116 reg_3
= operands
[5];
11117 mem_4
= operands
[6];
11118 reg_4
= operands
[7];
11120 /* Skip if memory operand is by itslef valid for ldp/stp. */
11121 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
11124 /* The mems cannot be volatile. */
11125 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
11126 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
11129 /* Check if the addresses are in the form of [base+offset]. */
11130 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
11131 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
11133 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
11134 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
11136 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
11137 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
11139 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
11140 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
11143 /* Check if the bases are same. */
11144 if (!rtx_equal_p (base_1
, base_2
)
11145 || !rtx_equal_p (base_2
, base_3
)
11146 || !rtx_equal_p (base_3
, base_4
))
11149 offval_1
= INTVAL (offset_1
);
11150 offval_2
= INTVAL (offset_2
);
11151 offval_3
= INTVAL (offset_3
);
11152 offval_4
= INTVAL (offset_4
);
11153 msize
= GET_MODE_SIZE (mode
);
11154 /* Check if the offsets are consecutive. */
11155 if ((offval_1
!= (offval_2
+ msize
)
11156 || offval_1
!= (offval_3
+ msize
* 2)
11157 || offval_1
!= (offval_4
+ msize
* 3))
11158 && (offval_4
!= (offval_3
+ msize
)
11159 || offval_4
!= (offval_2
+ msize
* 2)
11160 || offval_4
!= (offval_1
+ msize
* 3)))
11163 /* Check if the addresses are clobbered by load. */
11166 if (reg_mentioned_p (reg_1
, mem_1
)
11167 || reg_mentioned_p (reg_2
, mem_2
)
11168 || reg_mentioned_p (reg_3
, mem_3
))
11171 /* In increasing order, the last load can clobber the address. */
11172 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
11176 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
11177 rclass_1
= FP_REGS
;
11179 rclass_1
= GENERAL_REGS
;
11181 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
11182 rclass_2
= FP_REGS
;
11184 rclass_2
= GENERAL_REGS
;
11186 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
11187 rclass_3
= FP_REGS
;
11189 rclass_3
= GENERAL_REGS
;
11191 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
11192 rclass_4
= FP_REGS
;
11194 rclass_4
= GENERAL_REGS
;
11196 /* Check if the registers are of same class. */
11197 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
11203 /* Given OPERANDS of consecutive load/store, this function pairs them
11204 into ldp/stp after adjusting the offset. It depends on the fact
11205 that addresses of load/store instructions are in increasing order.
11206 MODE is the mode of memory operands. CODE is the rtl operator
11207 which should be applied to all memory operands, it's SIGN_EXTEND,
11208 ZERO_EXTEND or UNKNOWN. */
11211 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
11212 enum machine_mode mode
, RTX_CODE code
)
11214 rtx base
, offset
, t1
, t2
;
11215 rtx mem_1
, mem_2
, mem_3
, mem_4
;
11216 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
11220 mem_1
= operands
[1];
11221 mem_2
= operands
[3];
11222 mem_3
= operands
[5];
11223 mem_4
= operands
[7];
11227 mem_1
= operands
[0];
11228 mem_2
= operands
[2];
11229 mem_3
= operands
[4];
11230 mem_4
= operands
[6];
11231 gcc_assert (code
== UNKNOWN
);
11234 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
11235 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
11237 /* Adjust offset thus it can fit in ldp/stp instruction. */
11238 msize
= GET_MODE_SIZE (mode
);
11239 stp_off_limit
= msize
* 0x40;
11240 off_val
= INTVAL (offset
);
11241 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
11242 new_off
= abs_off
% stp_off_limit
;
11243 adj_off
= abs_off
- new_off
;
11245 /* Further adjust to make sure all offsets are OK. */
11246 if ((new_off
+ msize
* 2) >= stp_off_limit
)
11248 adj_off
+= stp_off_limit
;
11249 new_off
-= stp_off_limit
;
11252 /* Make sure the adjustment can be done with ADD/SUB instructions. */
11253 if (adj_off
>= 0x1000)
11258 adj_off
= -adj_off
;
11259 new_off
= -new_off
;
11262 /* Create new memory references. */
11263 mem_1
= change_address (mem_1
, VOIDmode
,
11264 plus_constant (DImode
, operands
[8], new_off
));
11266 /* Check if the adjusted address is OK for ldp/stp. */
11267 if (!aarch64_mem_pair_operand (mem_1
, mode
))
11270 msize
= GET_MODE_SIZE (mode
);
11271 mem_2
= change_address (mem_2
, VOIDmode
,
11272 plus_constant (DImode
,
11275 mem_3
= change_address (mem_3
, VOIDmode
,
11276 plus_constant (DImode
,
11278 new_off
+ msize
* 2));
11279 mem_4
= change_address (mem_4
, VOIDmode
,
11280 plus_constant (DImode
,
11282 new_off
+ msize
* 3));
11284 if (code
== ZERO_EXTEND
)
11286 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
11287 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
11288 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
11289 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
11291 else if (code
== SIGN_EXTEND
)
11293 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
11294 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
11295 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
11296 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
11301 operands
[1] = mem_1
;
11302 operands
[3] = mem_2
;
11303 operands
[5] = mem_3
;
11304 operands
[7] = mem_4
;
11308 operands
[0] = mem_1
;
11309 operands
[2] = mem_2
;
11310 operands
[4] = mem_3
;
11311 operands
[6] = mem_4
;
11314 /* Emit adjusting instruction. */
11315 emit_insn (gen_rtx_SET (VOIDmode
, operands
[8],
11316 plus_constant (DImode
, base
, adj_off
)));
11317 /* Emit ldp/stp instructions. */
11318 t1
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
11319 t2
= gen_rtx_SET (VOIDmode
, operands
[2], operands
[3]);
11320 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
11321 t1
= gen_rtx_SET (VOIDmode
, operands
[4], operands
[5]);
11322 t2
= gen_rtx_SET (VOIDmode
, operands
[6], operands
[7]);
11323 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
11327 #undef TARGET_ADDRESS_COST
11328 #define TARGET_ADDRESS_COST aarch64_address_cost
11330 /* This hook will determines whether unnamed bitfields affect the alignment
11331 of the containing structure. The hook returns true if the structure
11332 should inherit the alignment requirements of an unnamed bitfield's
11334 #undef TARGET_ALIGN_ANON_BITFIELD
11335 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11337 #undef TARGET_ASM_ALIGNED_DI_OP
11338 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11340 #undef TARGET_ASM_ALIGNED_HI_OP
11341 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11343 #undef TARGET_ASM_ALIGNED_SI_OP
11344 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11346 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11347 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11348 hook_bool_const_tree_hwi_hwi_const_tree_true
11350 #undef TARGET_ASM_FILE_START
11351 #define TARGET_ASM_FILE_START aarch64_start_file
11353 #undef TARGET_ASM_OUTPUT_MI_THUNK
11354 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11356 #undef TARGET_ASM_SELECT_RTX_SECTION
11357 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11359 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11360 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11362 #undef TARGET_BUILD_BUILTIN_VA_LIST
11363 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11365 #undef TARGET_CALLEE_COPIES
11366 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11368 #undef TARGET_CAN_ELIMINATE
11369 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11371 #undef TARGET_CANNOT_FORCE_CONST_MEM
11372 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11374 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11375 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11377 /* Only the least significant bit is used for initialization guard
11379 #undef TARGET_CXX_GUARD_MASK_BIT
11380 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11382 #undef TARGET_C_MODE_FOR_SUFFIX
11383 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11385 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11386 #undef TARGET_DEFAULT_TARGET_FLAGS
11387 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11390 #undef TARGET_CLASS_MAX_NREGS
11391 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11393 #undef TARGET_BUILTIN_DECL
11394 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11396 #undef TARGET_EXPAND_BUILTIN
11397 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11399 #undef TARGET_EXPAND_BUILTIN_VA_START
11400 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11402 #undef TARGET_FOLD_BUILTIN
11403 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11405 #undef TARGET_FUNCTION_ARG
11406 #define TARGET_FUNCTION_ARG aarch64_function_arg
11408 #undef TARGET_FUNCTION_ARG_ADVANCE
11409 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11411 #undef TARGET_FUNCTION_ARG_BOUNDARY
11412 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11414 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11415 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11417 #undef TARGET_FUNCTION_VALUE
11418 #define TARGET_FUNCTION_VALUE aarch64_function_value
11420 #undef TARGET_FUNCTION_VALUE_REGNO_P
11421 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11423 #undef TARGET_FRAME_POINTER_REQUIRED
11424 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11426 #undef TARGET_GIMPLE_FOLD_BUILTIN
11427 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11429 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11430 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11432 #undef TARGET_INIT_BUILTINS
11433 #define TARGET_INIT_BUILTINS aarch64_init_builtins
11435 #undef TARGET_LEGITIMATE_ADDRESS_P
11436 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11438 #undef TARGET_LEGITIMATE_CONSTANT_P
11439 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11441 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11442 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11444 #undef TARGET_LRA_P
11445 #define TARGET_LRA_P hook_bool_void_true
11447 #undef TARGET_MANGLE_TYPE
11448 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11450 #undef TARGET_MEMORY_MOVE_COST
11451 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11453 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11454 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11456 #undef TARGET_MUST_PASS_IN_STACK
11457 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11459 /* This target hook should return true if accesses to volatile bitfields
11460 should use the narrowest mode possible. It should return false if these
11461 accesses should use the bitfield container type. */
11462 #undef TARGET_NARROW_VOLATILE_BITFIELD
11463 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11465 #undef TARGET_OPTION_OVERRIDE
11466 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11468 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11469 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11470 aarch64_override_options_after_change
11472 #undef TARGET_PASS_BY_REFERENCE
11473 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11475 #undef TARGET_PREFERRED_RELOAD_CLASS
11476 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11478 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11479 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11481 #undef TARGET_SECONDARY_RELOAD
11482 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11484 #undef TARGET_SHIFT_TRUNCATION_MASK
11485 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11487 #undef TARGET_SETUP_INCOMING_VARARGS
11488 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11490 #undef TARGET_STRUCT_VALUE_RTX
11491 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11493 #undef TARGET_REGISTER_MOVE_COST
11494 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11496 #undef TARGET_RETURN_IN_MEMORY
11497 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11499 #undef TARGET_RETURN_IN_MSB
11500 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11502 #undef TARGET_RTX_COSTS
11503 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11505 #undef TARGET_SCHED_ISSUE_RATE
11506 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11508 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11509 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
11510 aarch64_sched_first_cycle_multipass_dfa_lookahead
11512 #undef TARGET_TRAMPOLINE_INIT
11513 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11515 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11516 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11518 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11519 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11521 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11522 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11524 #undef TARGET_VECTORIZE_ADD_STMT_COST
11525 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11527 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11528 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11529 aarch64_builtin_vectorization_cost
11531 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11532 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11534 #undef TARGET_VECTORIZE_BUILTINS
11535 #define TARGET_VECTORIZE_BUILTINS
11537 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11538 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11539 aarch64_builtin_vectorized_function
11541 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11542 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11543 aarch64_autovectorize_vector_sizes
11545 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11546 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11547 aarch64_atomic_assign_expand_fenv
11549 /* Section anchor support. */
11551 #undef TARGET_MIN_ANCHOR_OFFSET
11552 #define TARGET_MIN_ANCHOR_OFFSET -256
11554 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11555 byte offset; we can do much more for larger data types, but have no way
11556 to determine the size of the access. We assume accesses are aligned. */
11557 #undef TARGET_MAX_ANCHOR_OFFSET
11558 #define TARGET_MAX_ANCHOR_OFFSET 4095
11560 #undef TARGET_VECTOR_ALIGNMENT
11561 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11563 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11564 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11565 aarch64_simd_vector_alignment_reachable
11567 /* vec_perm support. */
11569 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11570 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11571 aarch64_vectorize_vec_perm_const_ok
11574 #undef TARGET_FIXED_CONDITION_CODE_REGS
11575 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11577 #undef TARGET_FLAGS_REGNUM
11578 #define TARGET_FLAGS_REGNUM CC_REGNUM
11580 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11581 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11583 #undef TARGET_ASAN_SHADOW_OFFSET
11584 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11586 #undef TARGET_LEGITIMIZE_ADDRESS
11587 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11589 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11590 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11591 aarch64_use_by_pieces_infrastructure_p
11593 #undef TARGET_CAN_USE_DOLOOP_P
11594 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11596 #undef TARGET_SCHED_MACRO_FUSION_P
11597 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11599 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11600 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11602 #undef TARGET_SCHED_FUSION_PRIORITY
11603 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11605 struct gcc_target targetm
= TARGET_INITIALIZER
;
11607 #include "gt-aarch64.h"