1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "fold-const.h"
32 #include "stringpool.h"
33 #include "stor-layout.h"
37 #include "dominance.h"
43 #include "cfgcleanup.h"
45 #include "basic-block.h"
47 #include "hard-reg-set.h"
51 #include "insn-config.h"
61 #include "targhooks.h"
64 #include "langhooks.h"
65 #include "diagnostic-core.h"
66 #include "tree-ssa-alias.h"
67 #include "internal-fn.h"
68 #include "gimple-fold.h"
70 #include "gimple-expr.h"
76 #include "tree-vectorizer.h"
77 #include "aarch64-cost-tables.h"
81 #include "tm-constrs.h"
82 #include "sched-int.h"
83 #include "cortex-a57-fma-steering.h"
85 #include "target-def.h"
87 /* Defined for convenience. */
88 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
90 /* Classifies an address.
93 A simple base register plus immediate offset.
96 A base register indexed by immediate offset with writeback.
99 A base register indexed by (optionally scaled) register.
102 A base register indexed by (optionally scaled) zero-extended register.
105 A base register indexed by (optionally scaled) sign-extended register.
108 A LO_SUM rtx with a base register and "LO12" symbol relocation.
111 A constant symbolic address, in pc-relative literal pool. */
113 enum aarch64_address_type
{
123 struct aarch64_address_info
{
124 enum aarch64_address_type type
;
128 enum aarch64_symbol_type symbol_type
;
131 struct simd_immediate_info
140 /* The current code model. */
141 enum aarch64_code_model aarch64_cmodel
;
144 #undef TARGET_HAVE_TLS
145 #define TARGET_HAVE_TLS 1
148 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
149 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
151 machine_mode
*, int *,
153 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
154 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
155 static void aarch64_override_options_after_change (void);
156 static bool aarch64_vector_mode_supported_p (machine_mode
);
157 static unsigned bit_count (unsigned HOST_WIDE_INT
);
158 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
159 const unsigned char *sel
);
160 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
162 /* Major revision number of the ARM Architecture implemented by the target. */
163 unsigned aarch64_architecture_version
;
165 /* The processor for which instructions should be scheduled. */
166 enum aarch64_processor aarch64_tune
= cortexa53
;
168 /* The current tuning set. */
169 const struct tune_params
*aarch64_tune_params
;
171 /* Mask to specify which instructions we are allowed to generate. */
172 unsigned long aarch64_isa_flags
= 0;
174 /* Mask to specify which instruction scheduling options should be used. */
175 unsigned long aarch64_tune_flags
= 0;
177 /* Tuning parameters. */
179 static const struct cpu_addrcost_table generic_addrcost_table
=
189 0, /* register_offset */
190 0, /* register_extend */
194 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
204 0, /* register_offset */
205 0, /* register_extend */
209 static const struct cpu_addrcost_table xgene1_addrcost_table
=
219 0, /* register_offset */
220 1, /* register_extend */
224 static const struct cpu_regmove_cost generic_regmove_cost
=
227 /* Avoid the use of slow int<->fp moves for spilling by setting
228 their cost higher than memmov_cost. */
234 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
237 /* Avoid the use of slow int<->fp moves for spilling by setting
238 their cost higher than memmov_cost. */
244 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
247 /* Avoid the use of slow int<->fp moves for spilling by setting
248 their cost higher than memmov_cost. */
254 static const struct cpu_regmove_cost thunderx_regmove_cost
=
262 static const struct cpu_regmove_cost xgene1_regmove_cost
=
265 /* Avoid the use of slow int<->fp moves for spilling by setting
266 their cost higher than memmov_cost. */
272 /* Generic costs for vector insn classes. */
273 static const struct cpu_vector_cost generic_vector_cost
=
275 1, /* scalar_stmt_cost */
276 1, /* scalar_load_cost */
277 1, /* scalar_store_cost */
278 1, /* vec_stmt_cost */
279 1, /* vec_to_scalar_cost */
280 1, /* scalar_to_vec_cost */
281 1, /* vec_align_load_cost */
282 1, /* vec_unalign_load_cost */
283 1, /* vec_unalign_store_cost */
284 1, /* vec_store_cost */
285 3, /* cond_taken_branch_cost */
286 1 /* cond_not_taken_branch_cost */
289 /* Generic costs for vector insn classes. */
290 static const struct cpu_vector_cost cortexa57_vector_cost
=
292 1, /* scalar_stmt_cost */
293 4, /* scalar_load_cost */
294 1, /* scalar_store_cost */
295 3, /* vec_stmt_cost */
296 8, /* vec_to_scalar_cost */
297 8, /* scalar_to_vec_cost */
298 5, /* vec_align_load_cost */
299 5, /* vec_unalign_load_cost */
300 1, /* vec_unalign_store_cost */
301 1, /* vec_store_cost */
302 1, /* cond_taken_branch_cost */
303 1 /* cond_not_taken_branch_cost */
306 /* Generic costs for vector insn classes. */
307 static const struct cpu_vector_cost xgene1_vector_cost
=
309 1, /* scalar_stmt_cost */
310 5, /* scalar_load_cost */
311 1, /* scalar_store_cost */
312 2, /* vec_stmt_cost */
313 4, /* vec_to_scalar_cost */
314 4, /* scalar_to_vec_cost */
315 10, /* vec_align_load_cost */
316 10, /* vec_unalign_load_cost */
317 2, /* vec_unalign_store_cost */
318 2, /* vec_store_cost */
319 2, /* cond_taken_branch_cost */
320 1 /* cond_not_taken_branch_cost */
323 #define AARCH64_FUSE_NOTHING (0)
324 #define AARCH64_FUSE_MOV_MOVK (1 << 0)
325 #define AARCH64_FUSE_ADRP_ADD (1 << 1)
326 #define AARCH64_FUSE_MOVK_MOVK (1 << 2)
327 #define AARCH64_FUSE_ADRP_LDR (1 << 3)
328 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
330 /* Generic costs for branch instructions. */
331 static const struct cpu_branch_cost generic_branch_cost
=
333 2, /* Predictable. */
334 2 /* Unpredictable. */
337 static const struct tune_params generic_tunings
=
339 &cortexa57_extra_costs
,
340 &generic_addrcost_table
,
341 &generic_regmove_cost
,
342 &generic_vector_cost
,
343 &generic_branch_cost
,
346 AARCH64_FUSE_NOTHING
, /* fusible_ops */
347 8, /* function_align. */
350 2, /* int_reassoc_width. */
351 4, /* fp_reassoc_width. */
352 1, /* vec_reassoc_width. */
353 2, /* min_div_recip_mul_sf. */
354 2 /* min_div_recip_mul_df. */
357 static const struct tune_params cortexa53_tunings
=
359 &cortexa53_extra_costs
,
360 &generic_addrcost_table
,
361 &cortexa53_regmove_cost
,
362 &generic_vector_cost
,
363 &generic_branch_cost
,
366 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
367 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
368 8, /* function_align. */
371 2, /* int_reassoc_width. */
372 4, /* fp_reassoc_width. */
373 1, /* vec_reassoc_width. */
374 2, /* min_div_recip_mul_sf. */
375 2 /* min_div_recip_mul_df. */
378 static const struct tune_params cortexa57_tunings
=
380 &cortexa57_extra_costs
,
381 &cortexa57_addrcost_table
,
382 &cortexa57_regmove_cost
,
383 &cortexa57_vector_cost
,
384 &generic_branch_cost
,
387 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
388 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
389 16, /* function_align. */
392 2, /* int_reassoc_width. */
393 4, /* fp_reassoc_width. */
394 1, /* vec_reassoc_width. */
395 2, /* min_div_recip_mul_sf. */
396 2 /* min_div_recip_mul_df. */
399 static const struct tune_params thunderx_tunings
=
401 &thunderx_extra_costs
,
402 &generic_addrcost_table
,
403 &thunderx_regmove_cost
,
404 &generic_vector_cost
,
405 &generic_branch_cost
,
408 AARCH64_FUSE_CMP_BRANCH
, /* fusible_ops */
409 8, /* function_align. */
412 2, /* int_reassoc_width. */
413 4, /* fp_reassoc_width. */
414 1, /* vec_reassoc_width. */
415 2, /* min_div_recip_mul_sf. */
416 2 /* min_div_recip_mul_df. */
419 static const struct tune_params xgene1_tunings
=
422 &xgene1_addrcost_table
,
423 &xgene1_regmove_cost
,
425 &generic_branch_cost
,
428 AARCH64_FUSE_NOTHING
, /* fusible_ops */
429 16, /* function_align. */
431 16, /* loop_align. */
432 2, /* int_reassoc_width. */
433 4, /* fp_reassoc_width. */
434 1, /* vec_reassoc_width. */
435 2, /* min_div_recip_mul_sf. */
436 2 /* min_div_recip_mul_df. */
439 /* A processor implementing AArch64. */
442 const char *const name
;
443 enum aarch64_processor core
;
445 unsigned architecture_version
;
446 const unsigned long flags
;
447 const struct tune_params
*const tune
;
450 /* Processor cores implementing AArch64. */
451 static const struct processor all_cores
[] =
453 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
454 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
455 #include "aarch64-cores.def"
457 {"generic", cortexa53
, "8", 8, AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
458 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
461 /* Architectures implementing AArch64. */
462 static const struct processor all_architectures
[] =
464 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
465 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
466 #include "aarch64-arches.def"
468 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
471 /* Target specification. These are populated as commandline arguments
472 are processed, or NULL if not specified. */
473 static const struct processor
*selected_arch
;
474 static const struct processor
*selected_cpu
;
475 static const struct processor
*selected_tune
;
477 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
479 /* An ISA extension in the co-processor and main instruction set space. */
480 struct aarch64_option_extension
482 const char *const name
;
483 const unsigned long flags_on
;
484 const unsigned long flags_off
;
487 /* ISA extensions in AArch64. */
488 static const struct aarch64_option_extension all_extensions
[] =
490 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
491 {NAME, FLAGS_ON, FLAGS_OFF},
492 #include "aarch64-option-extensions.def"
493 #undef AARCH64_OPT_EXTENSION
497 /* Used to track the size of an address when generating a pre/post
498 increment address. */
499 static machine_mode aarch64_memory_reference_mode
;
501 /* A table of valid AArch64 "bitmask immediate" values for
502 logical instructions. */
504 #define AARCH64_NUM_BITMASKS 5334
505 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
507 typedef enum aarch64_cond_code
509 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
510 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
511 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
515 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
517 /* The condition codes of the processor, and the inverse function. */
518 static const char * const aarch64_condition_codes
[] =
520 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
521 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
525 aarch64_err_no_fpadvsimd (machine_mode mode
, const char *msg
)
527 const char *mc
= FLOAT_MODE_P (mode
) ? "floating-point" : "vector";
528 if (TARGET_GENERAL_REGS_ONLY
)
529 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc
, msg
);
531 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc
, msg
);
535 aarch64_min_divisions_for_recip_mul (enum machine_mode mode
)
537 if (GET_MODE_UNIT_SIZE (mode
) == 4)
538 return aarch64_tune_params
->min_div_recip_mul_sf
;
539 return aarch64_tune_params
->min_div_recip_mul_df
;
543 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
544 enum machine_mode mode
)
546 if (VECTOR_MODE_P (mode
))
547 return aarch64_tune_params
->vec_reassoc_width
;
548 if (INTEGRAL_MODE_P (mode
))
549 return aarch64_tune_params
->int_reassoc_width
;
550 if (FLOAT_MODE_P (mode
))
551 return aarch64_tune_params
->fp_reassoc_width
;
555 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
557 aarch64_dbx_register_number (unsigned regno
)
559 if (GP_REGNUM_P (regno
))
560 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
561 else if (regno
== SP_REGNUM
)
562 return AARCH64_DWARF_SP
;
563 else if (FP_REGNUM_P (regno
))
564 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
566 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
567 equivalent DWARF register. */
568 return DWARF_FRAME_REGISTERS
;
571 /* Return TRUE if MODE is any of the large INT modes. */
573 aarch64_vect_struct_mode_p (machine_mode mode
)
575 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
578 /* Return TRUE if MODE is any of the vector modes. */
580 aarch64_vector_mode_p (machine_mode mode
)
582 return aarch64_vector_mode_supported_p (mode
)
583 || aarch64_vect_struct_mode_p (mode
);
586 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
588 aarch64_array_mode_supported_p (machine_mode mode
,
589 unsigned HOST_WIDE_INT nelems
)
592 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
593 && (nelems
>= 2 && nelems
<= 4))
599 /* Implement HARD_REGNO_NREGS. */
602 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
604 switch (aarch64_regno_regclass (regno
))
608 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
610 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
615 /* Implement HARD_REGNO_MODE_OK. */
618 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
620 if (GET_MODE_CLASS (mode
) == MODE_CC
)
621 return regno
== CC_REGNUM
;
623 if (regno
== SP_REGNUM
)
624 /* The purpose of comparing with ptr_mode is to support the
625 global register variable associated with the stack pointer
626 register via the syntax of asm ("wsp") in ILP32. */
627 return mode
== Pmode
|| mode
== ptr_mode
;
629 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
630 return mode
== Pmode
;
632 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
635 if (FP_REGNUM_P (regno
))
637 if (aarch64_vect_struct_mode_p (mode
))
639 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
647 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
649 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
652 /* Handle modes that fit within single registers. */
653 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
655 if (GET_MODE_SIZE (mode
) >= 4)
660 /* Fall back to generic for multi-reg and very large modes. */
662 return choose_hard_reg_mode (regno
, nregs
, false);
665 /* Return true if calls to DECL should be treated as
666 long-calls (ie called via a register). */
668 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
673 /* Return true if calls to symbol-ref SYM should be treated as
674 long-calls (ie called via a register). */
676 aarch64_is_long_call_p (rtx sym
)
678 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
681 /* Return true if the offsets to a zero/sign-extract operation
682 represent an expression that matches an extend operation. The
683 operands represent the paramters from
685 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
687 aarch64_is_extend_from_extract (machine_mode mode
, rtx mult_imm
,
690 HOST_WIDE_INT mult_val
, extract_val
;
692 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
695 mult_val
= INTVAL (mult_imm
);
696 extract_val
= INTVAL (extract_imm
);
699 && extract_val
< GET_MODE_BITSIZE (mode
)
700 && exact_log2 (extract_val
& ~7) > 0
701 && (extract_val
& 7) <= 4
702 && mult_val
== (1 << (extract_val
& 7)))
708 /* Emit an insn that's a simple single-set. Both the operands must be
709 known to be valid. */
711 emit_set_insn (rtx x
, rtx y
)
713 return emit_insn (gen_rtx_SET (x
, y
));
716 /* X and Y are two things to compare using CODE. Emit the compare insn and
717 return the rtx for register 0 in the proper mode. */
719 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
721 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
722 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
724 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
728 /* Build the SYMBOL_REF for __tls_get_addr. */
730 static GTY(()) rtx tls_get_addr_libfunc
;
733 aarch64_tls_get_addr (void)
735 if (!tls_get_addr_libfunc
)
736 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
737 return tls_get_addr_libfunc
;
740 /* Return the TLS model to use for ADDR. */
742 static enum tls_model
743 tls_symbolic_operand_type (rtx addr
)
745 enum tls_model tls_kind
= TLS_MODEL_NONE
;
748 if (GET_CODE (addr
) == CONST
)
750 split_const (addr
, &sym
, &addend
);
751 if (GET_CODE (sym
) == SYMBOL_REF
)
752 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
754 else if (GET_CODE (addr
) == SYMBOL_REF
)
755 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
760 /* We'll allow lo_sum's in addresses in our legitimate addresses
761 so that combine would take care of combining addresses where
762 necessary, but for generation purposes, we'll generate the address
765 tmp = hi (symbol_ref); adrp x1, foo
766 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
770 adrp x1, :got:foo adrp tmp, :tlsgd:foo
771 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
775 Load TLS symbol, depending on TLS mechanism and TLS access model.
777 Global Dynamic - Traditional TLS:
779 add dest, tmp, #:tlsgd_lo12:imm
782 Global Dynamic - TLS Descriptors:
783 adrp dest, :tlsdesc:imm
784 ldr tmp, [dest, #:tlsdesc_lo12:imm]
785 add dest, dest, #:tlsdesc_lo12:imm
792 adrp tmp, :gottprel:imm
793 ldr dest, [tmp, #:gottprel_lo12:imm]
798 add t0, tp, #:tprel_hi12:imm, lsl #12
799 add t0, t0, #:tprel_lo12_nc:imm
803 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
804 enum aarch64_symbol_type type
)
808 case SYMBOL_SMALL_ABSOLUTE
:
810 /* In ILP32, the mode of dest can be either SImode or DImode. */
812 machine_mode mode
= GET_MODE (dest
);
814 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
816 if (can_create_pseudo_p ())
817 tmp_reg
= gen_reg_rtx (mode
);
819 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
820 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
824 case SYMBOL_TINY_ABSOLUTE
:
825 emit_insn (gen_rtx_SET (dest
, imm
));
828 case SYMBOL_SMALL_GOT
:
830 /* In ILP32, the mode of dest can be either SImode or DImode,
831 while the got entry is always of SImode size. The mode of
832 dest depends on how dest is used: if dest is assigned to a
833 pointer (e.g. in the memory), it has SImode; it may have
834 DImode if dest is dereferenced to access the memeory.
835 This is why we have to handle three different ldr_got_small
836 patterns here (two patterns for ILP32). */
838 machine_mode mode
= GET_MODE (dest
);
840 if (can_create_pseudo_p ())
841 tmp_reg
= gen_reg_rtx (mode
);
843 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
844 if (mode
== ptr_mode
)
847 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
849 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
853 gcc_assert (mode
== Pmode
);
854 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
860 case SYMBOL_SMALL_TLSGD
:
863 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
866 aarch64_emit_call_insn (gen_tlsgd_small (result
, imm
));
867 insns
= get_insns ();
870 RTL_CONST_CALL_P (insns
) = 1;
871 emit_libcall_block (insns
, dest
, result
, imm
);
875 case SYMBOL_SMALL_TLSDESC
:
877 machine_mode mode
= GET_MODE (dest
);
878 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
881 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
883 /* In ILP32, the got entry is always of SImode size. Unlike
884 small GOT, the dest is fixed at reg 0. */
886 emit_insn (gen_tlsdesc_small_si (imm
));
888 emit_insn (gen_tlsdesc_small_di (imm
));
889 tp
= aarch64_load_tp (NULL
);
892 tp
= gen_lowpart (mode
, tp
);
894 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
895 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
899 case SYMBOL_SMALL_GOTTPREL
:
901 /* In ILP32, the mode of dest can be either SImode or DImode,
902 while the got entry is always of SImode size. The mode of
903 dest depends on how dest is used: if dest is assigned to a
904 pointer (e.g. in the memory), it has SImode; it may have
905 DImode if dest is dereferenced to access the memeory.
906 This is why we have to handle three different tlsie_small
907 patterns here (two patterns for ILP32). */
908 machine_mode mode
= GET_MODE (dest
);
909 rtx tmp_reg
= gen_reg_rtx (mode
);
910 rtx tp
= aarch64_load_tp (NULL
);
912 if (mode
== ptr_mode
)
915 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
918 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
919 tp
= gen_lowpart (mode
, tp
);
924 gcc_assert (mode
== Pmode
);
925 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
928 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
929 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
933 case SYMBOL_SMALL_TPREL
:
935 rtx tp
= aarch64_load_tp (NULL
);
937 if (GET_MODE (dest
) != Pmode
)
938 tp
= gen_lowpart (GET_MODE (dest
), tp
);
940 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
941 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
945 case SYMBOL_TINY_GOT
:
946 emit_insn (gen_ldr_got_tiny (dest
, imm
));
954 /* Emit a move from SRC to DEST. Assume that the move expanders can
955 handle all moves if !can_create_pseudo_p (). The distinction is
956 important because, unlike emit_move_insn, the move expanders know
957 how to force Pmode objects into the constant pool even when the
958 constant pool address is not itself legitimate. */
960 aarch64_emit_move (rtx dest
, rtx src
)
962 return (can_create_pseudo_p ()
963 ? emit_move_insn (dest
, src
)
964 : emit_move_insn_1 (dest
, src
));
967 /* Split a 128-bit move operation into two 64-bit move operations,
968 taking care to handle partial overlap of register to register
969 copies. Special cases are needed when moving between GP regs and
970 FP regs. SRC can be a register, constant or memory; DST a register
971 or memory. If either operand is memory it must not have any side
974 aarch64_split_128bit_move (rtx dst
, rtx src
)
979 machine_mode mode
= GET_MODE (dst
);
981 gcc_assert (mode
== TImode
|| mode
== TFmode
);
982 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
983 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
985 if (REG_P (dst
) && REG_P (src
))
987 int src_regno
= REGNO (src
);
988 int dst_regno
= REGNO (dst
);
990 /* Handle FP <-> GP regs. */
991 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
993 src_lo
= gen_lowpart (word_mode
, src
);
994 src_hi
= gen_highpart (word_mode
, src
);
998 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
999 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
1003 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
1004 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
1008 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
1010 dst_lo
= gen_lowpart (word_mode
, dst
);
1011 dst_hi
= gen_highpart (word_mode
, dst
);
1015 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
1016 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
1020 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
1021 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
1027 dst_lo
= gen_lowpart (word_mode
, dst
);
1028 dst_hi
= gen_highpart (word_mode
, dst
);
1029 src_lo
= gen_lowpart (word_mode
, src
);
1030 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
1032 /* At most one pairing may overlap. */
1033 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
1035 aarch64_emit_move (dst_hi
, src_hi
);
1036 aarch64_emit_move (dst_lo
, src_lo
);
1040 aarch64_emit_move (dst_lo
, src_lo
);
1041 aarch64_emit_move (dst_hi
, src_hi
);
1046 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
1048 return (! REG_P (src
)
1049 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
1052 /* Split a complex SIMD combine. */
1055 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
1057 machine_mode src_mode
= GET_MODE (src1
);
1058 machine_mode dst_mode
= GET_MODE (dst
);
1060 gcc_assert (VECTOR_MODE_P (dst_mode
));
1062 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
1064 rtx (*gen
) (rtx
, rtx
, rtx
);
1069 gen
= gen_aarch64_simd_combinev8qi
;
1072 gen
= gen_aarch64_simd_combinev4hi
;
1075 gen
= gen_aarch64_simd_combinev2si
;
1078 gen
= gen_aarch64_simd_combinev2sf
;
1081 gen
= gen_aarch64_simd_combinedi
;
1084 gen
= gen_aarch64_simd_combinedf
;
1090 emit_insn (gen (dst
, src1
, src2
));
1095 /* Split a complex SIMD move. */
1098 aarch64_split_simd_move (rtx dst
, rtx src
)
1100 machine_mode src_mode
= GET_MODE (src
);
1101 machine_mode dst_mode
= GET_MODE (dst
);
1103 gcc_assert (VECTOR_MODE_P (dst_mode
));
1105 if (REG_P (dst
) && REG_P (src
))
1107 rtx (*gen
) (rtx
, rtx
);
1109 gcc_assert (VECTOR_MODE_P (src_mode
));
1114 gen
= gen_aarch64_split_simd_movv16qi
;
1117 gen
= gen_aarch64_split_simd_movv8hi
;
1120 gen
= gen_aarch64_split_simd_movv4si
;
1123 gen
= gen_aarch64_split_simd_movv2di
;
1126 gen
= gen_aarch64_split_simd_movv4sf
;
1129 gen
= gen_aarch64_split_simd_movv2df
;
1135 emit_insn (gen (dst
, src
));
1141 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1143 if (can_create_pseudo_p ())
1144 return force_reg (mode
, value
);
1147 x
= aarch64_emit_move (x
, value
);
1154 aarch64_add_offset (machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
1156 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1159 /* Load the full offset into a register. This
1160 might be improvable in the future. */
1161 high
= GEN_INT (offset
);
1163 high
= aarch64_force_temporary (mode
, temp
, high
);
1164 reg
= aarch64_force_temporary (mode
, temp
,
1165 gen_rtx_PLUS (mode
, high
, reg
));
1167 return plus_constant (mode
, reg
, offset
);
1171 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1174 unsigned HOST_WIDE_INT mask
;
1177 unsigned HOST_WIDE_INT val
;
1180 int one_match
, zero_match
, first_not_ffff_match
;
1183 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1186 emit_insn (gen_rtx_SET (dest
, imm
));
1193 /* We know we can't do this in 1 insn, and we must be able to do it
1194 in two; so don't mess around looking for sequences that don't buy
1198 emit_insn (gen_rtx_SET (dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
1199 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1200 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1206 /* Remaining cases are all for DImode. */
1209 subtargets
= optimize
&& can_create_pseudo_p ();
1214 first_not_ffff_match
= -1;
1216 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1218 if ((val
& mask
) == mask
)
1222 if (first_not_ffff_match
< 0)
1223 first_not_ffff_match
= i
;
1224 if ((val
& mask
) == 0)
1231 /* Set one of the quarters and then insert back into result. */
1232 mask
= 0xffffll
<< first_not_ffff_match
;
1235 emit_insn (gen_rtx_SET (dest
, GEN_INT (val
| mask
)));
1236 emit_insn (gen_insv_immdi (dest
, GEN_INT (first_not_ffff_match
),
1237 GEN_INT ((val
>> first_not_ffff_match
)
1244 if (zero_match
== 2)
1245 goto simple_sequence
;
1247 mask
= 0x0ffff0000UL
;
1248 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1250 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1252 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1256 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1257 emit_insn (gen_rtx_SET (subtarget
, GEN_INT (val
& mask
)));
1258 emit_insn (gen_adddi3 (dest
, subtarget
,
1259 GEN_INT (val
- (val
& mask
))));
1264 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1268 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1269 emit_insn (gen_rtx_SET (subtarget
,
1270 GEN_INT ((val
+ comp
) & mask
)));
1271 emit_insn (gen_adddi3 (dest
, subtarget
,
1272 GEN_INT (val
- ((val
+ comp
) & mask
))));
1277 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1281 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1282 emit_insn (gen_rtx_SET (subtarget
,
1283 GEN_INT ((val
- comp
) | ~mask
)));
1284 emit_insn (gen_adddi3 (dest
, subtarget
,
1285 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1290 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1294 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1295 emit_insn (gen_rtx_SET (subtarget
, GEN_INT (val
| ~mask
)));
1296 emit_insn (gen_adddi3 (dest
, subtarget
,
1297 GEN_INT (val
- (val
| ~mask
))));
1304 /* See if we can do it by arithmetically combining two
1306 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1311 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1312 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1316 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1317 emit_insn (gen_rtx_SET (subtarget
,
1318 GEN_INT (aarch64_bitmasks
[i
])));
1319 emit_insn (gen_adddi3 (dest
, subtarget
,
1320 GEN_INT (val
- aarch64_bitmasks
[i
])));
1326 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1328 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1332 emit_insn (gen_rtx_SET (dest
,
1333 GEN_INT (aarch64_bitmasks
[i
])));
1334 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1335 GEN_INT ((val
>> j
) & 0xffff)));
1343 /* See if we can do it by logically combining two immediates. */
1344 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1346 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1350 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1351 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1355 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1356 emit_insn (gen_rtx_SET (subtarget
,
1357 GEN_INT (aarch64_bitmasks
[i
])));
1358 emit_insn (gen_iordi3 (dest
, subtarget
,
1359 GEN_INT (aarch64_bitmasks
[j
])));
1365 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1369 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1370 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1374 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1375 emit_insn (gen_rtx_SET (subtarget
,
1376 GEN_INT (aarch64_bitmasks
[j
])));
1377 emit_insn (gen_anddi3 (dest
, subtarget
,
1378 GEN_INT (aarch64_bitmasks
[i
])));
1386 if (one_match
> zero_match
)
1388 /* Set either first three quarters or all but the third. */
1389 mask
= 0xffffll
<< (16 - first_not_ffff_match
);
1391 emit_insn (gen_rtx_SET (dest
,
1392 GEN_INT (val
| mask
| 0xffffffff00000000ull
)));
1395 /* Now insert other two quarters. */
1396 for (i
= first_not_ffff_match
+ 16, mask
<<= (first_not_ffff_match
<< 1);
1397 i
< 64; i
+= 16, mask
<<= 16)
1399 if ((val
& mask
) != mask
)
1402 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1403 GEN_INT ((val
>> i
) & 0xffff)));
1413 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1415 if ((val
& mask
) != 0)
1420 emit_insn (gen_rtx_SET (dest
, GEN_INT (val
& mask
)));
1427 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1428 GEN_INT ((val
>> i
) & 0xffff)));
1439 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1441 machine_mode mode
= GET_MODE (dest
);
1443 gcc_assert (mode
== SImode
|| mode
== DImode
);
1445 /* Check on what type of symbol it is. */
1446 if (GET_CODE (imm
) == SYMBOL_REF
1447 || GET_CODE (imm
) == LABEL_REF
1448 || GET_CODE (imm
) == CONST
)
1450 rtx mem
, base
, offset
;
1451 enum aarch64_symbol_type sty
;
1453 /* If we have (const (plus symbol offset)), separate out the offset
1454 before we start classifying the symbol. */
1455 split_const (imm
, &base
, &offset
);
1457 sty
= aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
);
1460 case SYMBOL_FORCE_TO_MEM
:
1461 if (offset
!= const0_rtx
1462 && targetm
.cannot_force_const_mem (mode
, imm
))
1464 gcc_assert (can_create_pseudo_p ());
1465 base
= aarch64_force_temporary (mode
, dest
, base
);
1466 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1467 aarch64_emit_move (dest
, base
);
1470 mem
= force_const_mem (ptr_mode
, imm
);
1472 if (mode
!= ptr_mode
)
1473 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1474 emit_insn (gen_rtx_SET (dest
, mem
));
1477 case SYMBOL_SMALL_TLSGD
:
1478 case SYMBOL_SMALL_TLSDESC
:
1479 case SYMBOL_SMALL_GOTTPREL
:
1480 case SYMBOL_SMALL_GOT
:
1481 case SYMBOL_TINY_GOT
:
1482 if (offset
!= const0_rtx
)
1484 gcc_assert(can_create_pseudo_p ());
1485 base
= aarch64_force_temporary (mode
, dest
, base
);
1486 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1487 aarch64_emit_move (dest
, base
);
1492 case SYMBOL_SMALL_TPREL
:
1493 case SYMBOL_SMALL_ABSOLUTE
:
1494 case SYMBOL_TINY_ABSOLUTE
:
1495 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1503 if (!CONST_INT_P (imm
))
1505 if (GET_CODE (imm
) == HIGH
)
1506 emit_insn (gen_rtx_SET (dest
, imm
));
1509 rtx mem
= force_const_mem (mode
, imm
);
1511 emit_insn (gen_rtx_SET (dest
, mem
));
1517 aarch64_internal_mov_immediate (dest
, imm
, true, GET_MODE (dest
));
1521 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
1522 tree exp ATTRIBUTE_UNUSED
)
1524 /* Currently, always true. */
1528 /* Implement TARGET_PASS_BY_REFERENCE. */
1531 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1534 bool named ATTRIBUTE_UNUSED
)
1537 machine_mode dummymode
;
1540 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1541 size
= (mode
== BLKmode
&& type
)
1542 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1544 /* Aggregates are passed by reference based on their size. */
1545 if (type
&& AGGREGATE_TYPE_P (type
))
1547 size
= int_size_in_bytes (type
);
1550 /* Variable sized arguments are always returned by reference. */
1554 /* Can this be a candidate to be passed in fp/simd register(s)? */
1555 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1560 /* Arguments which are variable sized or larger than 2 registers are
1561 passed by reference unless they are a homogenous floating point
1563 return size
> 2 * UNITS_PER_WORD
;
1566 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1568 aarch64_return_in_msb (const_tree valtype
)
1570 machine_mode dummy_mode
;
1573 /* Never happens in little-endian mode. */
1574 if (!BYTES_BIG_ENDIAN
)
1577 /* Only composite types smaller than or equal to 16 bytes can
1578 be potentially returned in registers. */
1579 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1580 || int_size_in_bytes (valtype
) <= 0
1581 || int_size_in_bytes (valtype
) > 16)
1584 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1585 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1586 is always passed/returned in the least significant bits of fp/simd
1588 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1589 &dummy_mode
, &dummy_int
, NULL
))
1595 /* Implement TARGET_FUNCTION_VALUE.
1596 Define how to find the value returned by a function. */
1599 aarch64_function_value (const_tree type
, const_tree func
,
1600 bool outgoing ATTRIBUTE_UNUSED
)
1605 machine_mode ag_mode
;
1607 mode
= TYPE_MODE (type
);
1608 if (INTEGRAL_TYPE_P (type
))
1609 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1611 if (aarch64_return_in_msb (type
))
1613 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1615 if (size
% UNITS_PER_WORD
!= 0)
1617 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1618 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1622 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1623 &ag_mode
, &count
, NULL
))
1625 if (!aarch64_composite_type_p (type
, mode
))
1627 gcc_assert (count
== 1 && mode
== ag_mode
);
1628 return gen_rtx_REG (mode
, V0_REGNUM
);
1635 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1636 for (i
= 0; i
< count
; i
++)
1638 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1639 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1640 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1641 XVECEXP (par
, 0, i
) = tmp
;
1647 return gen_rtx_REG (mode
, R0_REGNUM
);
1650 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1651 Return true if REGNO is the number of a hard register in which the values
1652 of called function may come back. */
1655 aarch64_function_value_regno_p (const unsigned int regno
)
1657 /* Maximum of 16 bytes can be returned in the general registers. Examples
1658 of 16-byte return values are: 128-bit integers and 16-byte small
1659 structures (excluding homogeneous floating-point aggregates). */
1660 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1663 /* Up to four fp/simd registers can return a function value, e.g. a
1664 homogeneous floating-point aggregate having four members. */
1665 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1666 return TARGET_FLOAT
;
1671 /* Implement TARGET_RETURN_IN_MEMORY.
1673 If the type T of the result of a function is such that
1675 would require that arg be passed as a value in a register (or set of
1676 registers) according to the parameter passing rules, then the result
1677 is returned in the same registers as would be used for such an
1681 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1684 machine_mode ag_mode
;
1687 if (!AGGREGATE_TYPE_P (type
)
1688 && TREE_CODE (type
) != COMPLEX_TYPE
1689 && TREE_CODE (type
) != VECTOR_TYPE
)
1690 /* Simple scalar types always returned in registers. */
1693 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1700 /* Types larger than 2 registers returned in memory. */
1701 size
= int_size_in_bytes (type
);
1702 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1706 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
1707 const_tree type
, int *nregs
)
1709 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1710 return aarch64_vfp_is_call_or_return_candidate (mode
,
1712 &pcum
->aapcs_vfp_rmode
,
1717 /* Given MODE and TYPE of a function argument, return the alignment in
1718 bits. The idea is to suppress any stronger alignment requested by
1719 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1720 This is a helper function for local use only. */
1723 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
1725 unsigned int alignment
;
1729 if (!integer_zerop (TYPE_SIZE (type
)))
1731 if (TYPE_MODE (type
) == mode
)
1732 alignment
= TYPE_ALIGN (type
);
1734 alignment
= GET_MODE_ALIGNMENT (mode
);
1740 alignment
= GET_MODE_ALIGNMENT (mode
);
1745 /* Layout a function argument according to the AAPCS64 rules. The rule
1746 numbers refer to the rule numbers in the AAPCS64. */
1749 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1751 bool named ATTRIBUTE_UNUSED
)
1753 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1754 int ncrn
, nvrn
, nregs
;
1755 bool allocate_ncrn
, allocate_nvrn
;
1758 /* We need to do this once per argument. */
1759 if (pcum
->aapcs_arg_processed
)
1762 pcum
->aapcs_arg_processed
= true;
1764 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1766 = AARCH64_ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1769 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1770 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1775 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1776 The following code thus handles passing by SIMD/FP registers first. */
1778 nvrn
= pcum
->aapcs_nvrn
;
1780 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1781 and homogenous short-vector aggregates (HVA). */
1785 aarch64_err_no_fpadvsimd (mode
, "argument");
1787 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1789 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1790 if (!aarch64_composite_type_p (type
, mode
))
1792 gcc_assert (nregs
== 1);
1793 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1799 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1800 for (i
= 0; i
< nregs
; i
++)
1802 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1803 V0_REGNUM
+ nvrn
+ i
);
1804 tmp
= gen_rtx_EXPR_LIST
1806 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1807 XVECEXP (par
, 0, i
) = tmp
;
1809 pcum
->aapcs_reg
= par
;
1815 /* C.3 NSRN is set to 8. */
1816 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1821 ncrn
= pcum
->aapcs_ncrn
;
1822 nregs
= size
/ UNITS_PER_WORD
;
1824 /* C6 - C9. though the sign and zero extension semantics are
1825 handled elsewhere. This is the case where the argument fits
1826 entirely general registers. */
1827 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1829 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1831 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1833 /* C.8 if the argument has an alignment of 16 then the NGRN is
1834 rounded up to the next even number. */
1835 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1838 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1840 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1841 A reg is still generated for it, but the caller should be smart
1842 enough not to use it. */
1843 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1845 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1852 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1853 for (i
= 0; i
< nregs
; i
++)
1855 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1856 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1857 GEN_INT (i
* UNITS_PER_WORD
));
1858 XVECEXP (par
, 0, i
) = tmp
;
1860 pcum
->aapcs_reg
= par
;
1863 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1868 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1870 /* The argument is passed on stack; record the needed number of words for
1871 this argument and align the total size if necessary. */
1873 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1874 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1875 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1876 16 / UNITS_PER_WORD
);
1880 /* Implement TARGET_FUNCTION_ARG. */
1883 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1884 const_tree type
, bool named
)
1886 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1887 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1889 if (mode
== VOIDmode
)
1892 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1893 return pcum
->aapcs_reg
;
1897 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1898 const_tree fntype ATTRIBUTE_UNUSED
,
1899 rtx libname ATTRIBUTE_UNUSED
,
1900 const_tree fndecl ATTRIBUTE_UNUSED
,
1901 unsigned n_named ATTRIBUTE_UNUSED
)
1903 pcum
->aapcs_ncrn
= 0;
1904 pcum
->aapcs_nvrn
= 0;
1905 pcum
->aapcs_nextncrn
= 0;
1906 pcum
->aapcs_nextnvrn
= 0;
1907 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1908 pcum
->aapcs_reg
= NULL_RTX
;
1909 pcum
->aapcs_arg_processed
= false;
1910 pcum
->aapcs_stack_words
= 0;
1911 pcum
->aapcs_stack_size
= 0;
1914 && fndecl
&& TREE_PUBLIC (fndecl
)
1915 && fntype
&& fntype
!= error_mark_node
)
1917 const_tree type
= TREE_TYPE (fntype
);
1918 machine_mode mode ATTRIBUTE_UNUSED
; /* To pass pointer as argument. */
1919 int nregs ATTRIBUTE_UNUSED
; /* Likewise. */
1920 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
), type
,
1921 &mode
, &nregs
, NULL
))
1922 aarch64_err_no_fpadvsimd (TYPE_MODE (type
), "return type");
1928 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1933 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1934 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1936 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1937 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1938 != (pcum
->aapcs_stack_words
!= 0));
1939 pcum
->aapcs_arg_processed
= false;
1940 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1941 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1942 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1943 pcum
->aapcs_stack_words
= 0;
1944 pcum
->aapcs_reg
= NULL_RTX
;
1949 aarch64_function_arg_regno_p (unsigned regno
)
1951 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1952 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1955 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1956 PARM_BOUNDARY bits of alignment, but will be given anything up
1957 to STACK_BOUNDARY bits if the type requires it. This makes sure
1958 that both before and after the layout of each argument, the Next
1959 Stacked Argument Address (NSAA) will have a minimum alignment of
1963 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
1965 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1967 if (alignment
< PARM_BOUNDARY
)
1968 alignment
= PARM_BOUNDARY
;
1969 if (alignment
> STACK_BOUNDARY
)
1970 alignment
= STACK_BOUNDARY
;
1974 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1976 Return true if an argument passed on the stack should be padded upwards,
1977 i.e. if the least-significant byte of the stack slot has useful data.
1979 Small aggregate types are placed in the lowest memory address.
1981 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1984 aarch64_pad_arg_upward (machine_mode mode
, const_tree type
)
1986 /* On little-endian targets, the least significant byte of every stack
1987 argument is passed at the lowest byte address of the stack slot. */
1988 if (!BYTES_BIG_ENDIAN
)
1991 /* Otherwise, integral, floating-point and pointer types are padded downward:
1992 the least significant byte of a stack argument is passed at the highest
1993 byte address of the stack slot. */
1995 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1996 || POINTER_TYPE_P (type
))
1997 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
2000 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2004 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2006 It specifies padding for the last (may also be the only)
2007 element of a block move between registers and memory. If
2008 assuming the block is in the memory, padding upward means that
2009 the last element is padded after its highest significant byte,
2010 while in downward padding, the last element is padded at the
2011 its least significant byte side.
2013 Small aggregates and small complex types are always padded
2016 We don't need to worry about homogeneous floating-point or
2017 short-vector aggregates; their move is not affected by the
2018 padding direction determined here. Regardless of endianness,
2019 each element of such an aggregate is put in the least
2020 significant bits of a fp/simd register.
2022 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2023 register has useful data, and return the opposite if the most
2024 significant byte does. */
2027 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
2028 bool first ATTRIBUTE_UNUSED
)
2031 /* Small composite types are always padded upward. */
2032 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
2034 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
2035 : GET_MODE_SIZE (mode
));
2036 if (size
< 2 * UNITS_PER_WORD
)
2040 /* Otherwise, use the default padding. */
2041 return !BYTES_BIG_ENDIAN
;
2045 aarch64_libgcc_cmp_return_mode (void)
2051 aarch64_frame_pointer_required (void)
2053 /* In aarch64_override_options_after_change
2054 flag_omit_leaf_frame_pointer turns off the frame pointer by
2055 default. Turn it back on now if we've not got a leaf
2057 if (flag_omit_leaf_frame_pointer
2058 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
2064 /* Mark the registers that need to be saved by the callee and calculate
2065 the size of the callee-saved registers area and frame record (both FP
2066 and LR may be omitted). */
2068 aarch64_layout_frame (void)
2070 HOST_WIDE_INT offset
= 0;
2073 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2076 #define SLOT_NOT_REQUIRED (-2)
2077 #define SLOT_REQUIRED (-1)
2079 cfun
->machine
->frame
.wb_candidate1
= FIRST_PSEUDO_REGISTER
;
2080 cfun
->machine
->frame
.wb_candidate2
= FIRST_PSEUDO_REGISTER
;
2082 /* First mark all the registers that really need to be saved... */
2083 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2084 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2086 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2087 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2089 /* ... that includes the eh data registers (if needed)... */
2090 if (crtl
->calls_eh_return
)
2091 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2092 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2095 /* ... and any callee saved register that dataflow says is live. */
2096 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2097 if (df_regs_ever_live_p (regno
)
2098 && (regno
== R30_REGNUM
2099 || !call_used_regs
[regno
]))
2100 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2102 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2103 if (df_regs_ever_live_p (regno
)
2104 && !call_used_regs
[regno
])
2105 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2107 if (frame_pointer_needed
)
2109 /* FP and LR are placed in the linkage record. */
2110 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2111 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2112 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2113 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2114 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
2115 offset
+= 2 * UNITS_PER_WORD
;
2118 /* Now assign stack slots for them. */
2119 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2120 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2122 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2123 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2124 cfun
->machine
->frame
.wb_candidate1
= regno
;
2125 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
)
2126 cfun
->machine
->frame
.wb_candidate2
= regno
;
2127 offset
+= UNITS_PER_WORD
;
2130 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2131 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2133 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2134 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2135 cfun
->machine
->frame
.wb_candidate1
= regno
;
2136 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
2137 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2138 cfun
->machine
->frame
.wb_candidate2
= regno
;
2139 offset
+= UNITS_PER_WORD
;
2142 cfun
->machine
->frame
.padding0
=
2143 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
2144 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2146 cfun
->machine
->frame
.saved_regs_size
= offset
;
2148 cfun
->machine
->frame
.hard_fp_offset
2149 = AARCH64_ROUND_UP (cfun
->machine
->frame
.saved_varargs_size
2151 + cfun
->machine
->frame
.saved_regs_size
,
2152 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2154 cfun
->machine
->frame
.frame_size
2155 = AARCH64_ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
2156 + crtl
->outgoing_args_size
,
2157 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2159 cfun
->machine
->frame
.laid_out
= true;
2163 aarch64_register_saved_on_entry (int regno
)
2165 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
2169 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
2171 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
2177 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
2178 HOST_WIDE_INT adjustment
)
2180 rtx base_rtx
= stack_pointer_rtx
;
2183 reg
= gen_rtx_REG (mode
, regno
);
2184 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
2185 plus_constant (Pmode
, base_rtx
, -adjustment
));
2186 mem
= gen_rtx_MEM (mode
, mem
);
2188 insn
= emit_move_insn (mem
, reg
);
2189 RTX_FRAME_RELATED_P (insn
) = 1;
2193 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2194 HOST_WIDE_INT adjustment
)
2199 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
2200 GEN_INT (-adjustment
),
2201 GEN_INT (UNITS_PER_WORD
- adjustment
));
2203 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
2204 GEN_INT (-adjustment
),
2205 GEN_INT (UNITS_PER_WORD
- adjustment
));
2212 aarch64_pushwb_pair_reg (machine_mode mode
, unsigned regno1
,
2213 unsigned regno2
, HOST_WIDE_INT adjustment
)
2216 rtx reg1
= gen_rtx_REG (mode
, regno1
);
2217 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2219 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
2221 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2222 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2223 RTX_FRAME_RELATED_P (insn
) = 1;
2227 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2228 HOST_WIDE_INT adjustment
)
2233 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2234 GEN_INT (UNITS_PER_WORD
));
2236 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2237 GEN_INT (UNITS_PER_WORD
));
2244 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
2250 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
2253 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
2261 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
2267 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
2270 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
2279 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
2280 unsigned start
, unsigned limit
, bool skip_wb
)
2283 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2284 ? gen_frame_mem
: gen_rtx_MEM
);
2288 for (regno
= aarch64_next_callee_save (start
, limit
);
2290 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2293 HOST_WIDE_INT offset
;
2296 && (regno
== cfun
->machine
->frame
.wb_candidate1
2297 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2300 reg
= gen_rtx_REG (mode
, regno
);
2301 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2302 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2305 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2308 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2309 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2312 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2315 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2316 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2318 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
2321 /* The first part of a frame-related parallel insn is
2322 always assumed to be relevant to the frame
2323 calculations; subsequent parts, are only
2324 frame-related if explicitly marked. */
2325 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2329 insn
= emit_move_insn (mem
, reg
);
2331 RTX_FRAME_RELATED_P (insn
) = 1;
2336 aarch64_restore_callee_saves (machine_mode mode
,
2337 HOST_WIDE_INT start_offset
, unsigned start
,
2338 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
2340 rtx base_rtx
= stack_pointer_rtx
;
2341 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2342 ? gen_frame_mem
: gen_rtx_MEM
);
2345 HOST_WIDE_INT offset
;
2347 for (regno
= aarch64_next_callee_save (start
, limit
);
2349 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2354 && (regno
== cfun
->machine
->frame
.wb_candidate1
2355 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2358 reg
= gen_rtx_REG (mode
, regno
);
2359 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2360 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2362 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2365 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2366 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2368 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2371 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2372 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2373 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
2375 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
2379 emit_move_insn (reg
, mem
);
2380 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
2384 /* AArch64 stack frames generated by this compiler look like:
2386 +-------------------------------+
2388 | incoming stack arguments |
2390 +-------------------------------+
2391 | | <-- incoming stack pointer (aligned)
2392 | callee-allocated save area |
2393 | for register varargs |
2395 +-------------------------------+
2396 | local variables | <-- frame_pointer_rtx
2398 +-------------------------------+
2400 +-------------------------------+ |
2401 | callee-saved registers | | frame.saved_regs_size
2402 +-------------------------------+ |
2404 +-------------------------------+ |
2405 | FP' | / <- hard_frame_pointer_rtx (aligned)
2406 +-------------------------------+
2407 | dynamic allocation |
2408 +-------------------------------+
2410 +-------------------------------+
2411 | outgoing stack arguments | <-- arg_pointer
2413 +-------------------------------+
2414 | | <-- stack_pointer_rtx (aligned)
2416 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2417 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2420 /* Generate the prologue instructions for entry into a function.
2421 Establish the stack frame by decreasing the stack pointer with a
2422 properly calculated size and, if necessary, create a frame record
2423 filled with the values of LR and previous frame pointer. The
2424 current FP is also set up if it is in use. */
2427 aarch64_expand_prologue (void)
2429 /* sub sp, sp, #<frame_size>
2430 stp {fp, lr}, [sp, #<frame_size> - 16]
2431 add fp, sp, #<frame_size> - hardfp_offset
2432 stp {cs_reg}, [fp, #-16] etc.
2434 sub sp, sp, <final_adjustment_if_any>
2436 HOST_WIDE_INT frame_size
, offset
;
2437 HOST_WIDE_INT fp_offset
; /* Offset from hard FP to SP. */
2438 HOST_WIDE_INT hard_fp_offset
;
2441 aarch64_layout_frame ();
2443 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2444 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2445 fp_offset
= frame_size
- hard_fp_offset
;
2447 if (flag_stack_usage_info
)
2448 current_function_static_stack_size
= frame_size
;
2450 /* Store pairs and load pairs have a range only -512 to 504. */
2453 /* When the frame has a large size, an initial decrease is done on
2454 the stack pointer to jump over the callee-allocated save area for
2455 register varargs, the local variable area and/or the callee-saved
2456 register area. This will allow the pre-index write-back
2457 store pair instructions to be used for setting up the stack frame
2459 offset
= hard_fp_offset
;
2461 offset
= cfun
->machine
->frame
.saved_regs_size
;
2463 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2466 if (frame_size
>= 0x1000000)
2468 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2469 emit_move_insn (op0
, GEN_INT (-frame_size
));
2470 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2472 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2473 gen_rtx_SET (stack_pointer_rtx
,
2474 plus_constant (Pmode
, stack_pointer_rtx
,
2476 RTX_FRAME_RELATED_P (insn
) = 1;
2478 else if (frame_size
> 0)
2480 int hi_ofs
= frame_size
& 0xfff000;
2481 int lo_ofs
= frame_size
& 0x000fff;
2485 insn
= emit_insn (gen_add2_insn
2486 (stack_pointer_rtx
, GEN_INT (-hi_ofs
)));
2487 RTX_FRAME_RELATED_P (insn
) = 1;
2491 insn
= emit_insn (gen_add2_insn
2492 (stack_pointer_rtx
, GEN_INT (-lo_ofs
)));
2493 RTX_FRAME_RELATED_P (insn
) = 1;
2502 bool skip_wb
= false;
2504 if (frame_pointer_needed
)
2510 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2511 GEN_INT (-offset
)));
2512 RTX_FRAME_RELATED_P (insn
) = 1;
2514 aarch64_save_callee_saves (DImode
, fp_offset
, R29_REGNUM
,
2518 aarch64_pushwb_pair_reg (DImode
, R29_REGNUM
, R30_REGNUM
, offset
);
2520 /* Set up frame pointer to point to the location of the
2521 previous frame pointer on the stack. */
2522 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2524 GEN_INT (fp_offset
)));
2525 RTX_FRAME_RELATED_P (insn
) = 1;
2526 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
2530 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2531 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2534 || reg1
== FIRST_PSEUDO_REGISTER
2535 || (reg2
== FIRST_PSEUDO_REGISTER
2538 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2539 GEN_INT (-offset
)));
2540 RTX_FRAME_RELATED_P (insn
) = 1;
2544 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2548 if (reg2
== FIRST_PSEUDO_REGISTER
)
2549 aarch64_pushwb_single_reg (mode1
, reg1
, offset
);
2551 aarch64_pushwb_pair_reg (mode1
, reg1
, reg2
, offset
);
2555 aarch64_save_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2557 aarch64_save_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2561 /* when offset >= 512,
2562 sub sp, sp, #<outgoing_args_size> */
2563 if (frame_size
> -1)
2565 if (crtl
->outgoing_args_size
> 0)
2567 insn
= emit_insn (gen_add2_insn
2569 GEN_INT (- crtl
->outgoing_args_size
)));
2570 RTX_FRAME_RELATED_P (insn
) = 1;
2575 /* Return TRUE if we can use a simple_return insn.
2577 This function checks whether the callee saved stack is empty, which
2578 means no restore actions are need. The pro_and_epilogue will use
2579 this to check whether shrink-wrapping opt is feasible. */
2582 aarch64_use_return_insn_p (void)
2584 if (!reload_completed
)
2590 aarch64_layout_frame ();
2592 return cfun
->machine
->frame
.frame_size
== 0;
2595 /* Generate the epilogue instructions for returning from a function. */
2597 aarch64_expand_epilogue (bool for_sibcall
)
2599 HOST_WIDE_INT frame_size
, offset
;
2600 HOST_WIDE_INT fp_offset
;
2601 HOST_WIDE_INT hard_fp_offset
;
2603 /* We need to add memory barrier to prevent read from deallocated stack. */
2604 bool need_barrier_p
= (get_frame_size () != 0
2605 || cfun
->machine
->frame
.saved_varargs_size
);
2607 aarch64_layout_frame ();
2609 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2610 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2611 fp_offset
= frame_size
- hard_fp_offset
;
2613 /* Store pairs and load pairs have a range only -512 to 504. */
2616 offset
= hard_fp_offset
;
2618 offset
= cfun
->machine
->frame
.saved_regs_size
;
2620 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2622 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2624 insn
= emit_insn (gen_add2_insn
2626 GEN_INT (crtl
->outgoing_args_size
)));
2627 RTX_FRAME_RELATED_P (insn
) = 1;
2633 /* If there were outgoing arguments or we've done dynamic stack
2634 allocation, then restore the stack pointer from the frame
2635 pointer. This is at most one insn and more efficient than using
2636 GCC's internal mechanism. */
2637 if (frame_pointer_needed
2638 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2640 if (cfun
->calls_alloca
)
2641 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2643 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2644 hard_frame_pointer_rtx
,
2646 offset
= offset
- fp_offset
;
2651 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2652 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2653 bool skip_wb
= true;
2656 if (frame_pointer_needed
)
2659 || reg1
== FIRST_PSEUDO_REGISTER
2660 || (reg2
== FIRST_PSEUDO_REGISTER
2664 aarch64_restore_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2666 aarch64_restore_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2670 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2674 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2675 rtx rreg1
= gen_rtx_REG (mode1
, reg1
);
2677 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg1
, cfi_ops
);
2678 if (reg2
== FIRST_PSEUDO_REGISTER
)
2680 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
2681 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
2682 mem
= gen_rtx_MEM (mode1
, mem
);
2683 insn
= emit_move_insn (rreg1
, mem
);
2687 rtx rreg2
= gen_rtx_REG (mode1
, reg2
);
2689 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg2
, cfi_ops
);
2690 insn
= emit_insn (aarch64_gen_loadwb_pair
2691 (mode1
, stack_pointer_rtx
, rreg1
,
2697 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2701 /* Reset the CFA to be SP + FRAME_SIZE. */
2702 rtx new_cfa
= stack_pointer_rtx
;
2704 new_cfa
= plus_constant (Pmode
, new_cfa
, frame_size
);
2705 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
2706 REG_NOTES (insn
) = cfi_ops
;
2707 RTX_FRAME_RELATED_P (insn
) = 1;
2713 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2715 if (frame_size
>= 0x1000000)
2717 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2718 emit_move_insn (op0
, GEN_INT (frame_size
));
2719 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2723 int hi_ofs
= frame_size
& 0xfff000;
2724 int lo_ofs
= frame_size
& 0x000fff;
2726 if (hi_ofs
&& lo_ofs
)
2728 insn
= emit_insn (gen_add2_insn
2729 (stack_pointer_rtx
, GEN_INT (hi_ofs
)));
2730 RTX_FRAME_RELATED_P (insn
) = 1;
2731 frame_size
= lo_ofs
;
2733 insn
= emit_insn (gen_add2_insn
2734 (stack_pointer_rtx
, GEN_INT (frame_size
)));
2737 /* Reset the CFA to be SP + 0. */
2738 add_reg_note (insn
, REG_CFA_DEF_CFA
, stack_pointer_rtx
);
2739 RTX_FRAME_RELATED_P (insn
) = 1;
2742 /* Stack adjustment for exception handler. */
2743 if (crtl
->calls_eh_return
)
2745 /* We need to unwind the stack by the offset computed by
2746 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2747 to be SP; letting the CFA move during this adjustment
2748 is just as correct as retaining the CFA from the body
2749 of the function. Therefore, do nothing special. */
2750 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2753 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2755 emit_jump_insn (ret_rtx
);
2758 /* Return the place to copy the exception unwinding return address to.
2759 This will probably be a stack slot, but could (in theory be the
2760 return register). */
2762 aarch64_final_eh_return_addr (void)
2764 HOST_WIDE_INT fp_offset
;
2766 aarch64_layout_frame ();
2768 fp_offset
= cfun
->machine
->frame
.frame_size
2769 - cfun
->machine
->frame
.hard_fp_offset
;
2771 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2772 return gen_rtx_REG (DImode
, LR_REGNUM
);
2774 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2775 result in a store to save LR introduced by builtin_eh_return () being
2776 incorrectly deleted because the alias is not detected.
2777 So in the calculation of the address to copy the exception unwinding
2778 return address to, we note 2 cases.
2779 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2780 we return a SP-relative location since all the addresses are SP-relative
2781 in this case. This prevents the store from being optimized away.
2782 If the fp_offset is not 0, then the addresses will be FP-relative and
2783 therefore we return a FP-relative location. */
2785 if (frame_pointer_needed
)
2788 return gen_frame_mem (DImode
,
2789 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2791 return gen_frame_mem (DImode
,
2792 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2795 /* If FP is not needed, we calculate the location of LR, which would be
2796 at the top of the saved registers block. */
2798 return gen_frame_mem (DImode
,
2799 plus_constant (Pmode
,
2802 + cfun
->machine
->frame
.saved_regs_size
2803 - 2 * UNITS_PER_WORD
));
2806 /* Possibly output code to build up a constant in a register. For
2807 the benefit of the costs infrastructure, returns the number of
2808 instructions which would be emitted. GENERATE inhibits or
2809 enables code generation. */
2812 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2816 if (aarch64_bitmask_imm (val
, DImode
))
2819 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2827 HOST_WIDE_INT valp
= val
>> 16;
2831 for (i
= 16; i
< 64; i
+= 16)
2833 valm
= (valp
& 0xffff);
2844 /* zcount contains the number of additional MOVK instructions
2845 required if the constant is built up with an initial MOVZ instruction,
2846 while ncount is the number of MOVK instructions required if starting
2847 with a MOVN instruction. Choose the sequence that yields the fewest
2848 number of instructions, preferring MOVZ instructions when they are both
2850 if (ncount
< zcount
)
2853 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2854 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2861 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2862 GEN_INT (val
& 0xffff));
2869 for (i
= 16; i
< 64; i
+= 16)
2871 if ((val
& 0xffff) != tval
)
2874 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2876 GEN_INT (val
& 0xffff)));
2886 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2888 HOST_WIDE_INT mdelta
= delta
;
2889 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2890 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2895 if (mdelta
>= 4096 * 4096)
2897 (void) aarch64_build_constant (scratchreg
, delta
, true);
2898 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2900 else if (mdelta
> 0)
2904 emit_insn (gen_rtx_SET (scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2905 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2907 emit_insn (gen_rtx_SET (this_rtx
,
2908 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2910 emit_insn (gen_rtx_SET (this_rtx
,
2911 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2913 if (mdelta
% 4096 != 0)
2915 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2916 emit_insn (gen_rtx_SET (this_rtx
,
2917 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2922 /* Output code to add DELTA to the first argument, and then jump
2923 to FUNCTION. Used for C++ multiple inheritance. */
2925 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2926 HOST_WIDE_INT delta
,
2927 HOST_WIDE_INT vcall_offset
,
2930 /* The this pointer is always in x0. Note that this differs from
2931 Arm where the this pointer maybe bumped to r1 if r0 is required
2932 to return a pointer to an aggregate. On AArch64 a result value
2933 pointer will be in x8. */
2934 int this_regno
= R0_REGNUM
;
2935 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
2938 reload_completed
= 1;
2939 emit_note (NOTE_INSN_PROLOGUE_END
);
2941 if (vcall_offset
== 0)
2942 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2945 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2947 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2948 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2949 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2954 if (delta
>= -256 && delta
< 256)
2955 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2956 plus_constant (Pmode
, this_rtx
, delta
));
2958 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2961 if (Pmode
== ptr_mode
)
2962 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2964 aarch64_emit_move (temp0
,
2965 gen_rtx_ZERO_EXTEND (Pmode
,
2966 gen_rtx_MEM (ptr_mode
, addr
)));
2968 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2969 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2972 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2973 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2976 if (Pmode
== ptr_mode
)
2977 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2979 aarch64_emit_move (temp1
,
2980 gen_rtx_SIGN_EXTEND (Pmode
,
2981 gen_rtx_MEM (ptr_mode
, addr
)));
2983 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2986 /* Generate a tail call to the target function. */
2987 if (!TREE_USED (function
))
2989 assemble_external (function
);
2990 TREE_USED (function
) = 1;
2992 funexp
= XEXP (DECL_RTL (function
), 0);
2993 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2994 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2995 SIBLING_CALL_P (insn
) = 1;
2997 insn
= get_insns ();
2998 shorten_branches (insn
);
2999 final_start_function (insn
, file
, 1);
3000 final (insn
, file
, 1);
3001 final_end_function ();
3003 /* Stop pretending to be a post-reload pass. */
3004 reload_completed
= 0;
3008 aarch64_tls_referenced_p (rtx x
)
3010 if (!TARGET_HAVE_TLS
)
3012 subrtx_iterator::array_type array
;
3013 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3015 const_rtx x
= *iter
;
3016 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
3018 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3019 TLS offsets, not real symbol references. */
3020 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3021 iter
.skip_subrtxes ();
3028 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
3030 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
3031 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
3042 aarch64_build_bitmask_table (void)
3044 unsigned HOST_WIDE_INT mask
, imm
;
3045 unsigned int log_e
, e
, s
, r
;
3046 unsigned int nimms
= 0;
3048 for (log_e
= 1; log_e
<= 6; log_e
++)
3052 mask
= ~(HOST_WIDE_INT
) 0;
3054 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
3055 for (s
= 1; s
< e
; s
++)
3057 for (r
= 0; r
< e
; r
++)
3059 /* set s consecutive bits to 1 (s < 64) */
3060 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
3061 /* rotate right by r */
3063 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
3064 /* replicate the constant depending on SIMD size */
3066 case 1: imm
|= (imm
<< 2);
3067 case 2: imm
|= (imm
<< 4);
3068 case 3: imm
|= (imm
<< 8);
3069 case 4: imm
|= (imm
<< 16);
3070 case 5: imm
|= (imm
<< 32);
3076 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
3077 aarch64_bitmasks
[nimms
++] = imm
;
3082 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
3083 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
3084 aarch64_bitmasks_cmp
);
3088 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3089 a left shift of 0 or 12 bits. */
3091 aarch64_uimm12_shift (HOST_WIDE_INT val
)
3093 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
3094 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
3099 /* Return true if val is an immediate that can be loaded into a
3100 register by a MOVZ instruction. */
3102 aarch64_movw_imm (HOST_WIDE_INT val
, machine_mode mode
)
3104 if (GET_MODE_SIZE (mode
) > 4)
3106 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
3107 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
3112 /* Ignore sign extension. */
3113 val
&= (HOST_WIDE_INT
) 0xffffffff;
3115 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
3116 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
3120 /* Return true if val is a valid bitmask immediate. */
3122 aarch64_bitmask_imm (HOST_WIDE_INT val
, machine_mode mode
)
3124 if (GET_MODE_SIZE (mode
) < 8)
3126 /* Replicate bit pattern. */
3127 val
&= (HOST_WIDE_INT
) 0xffffffff;
3130 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
3131 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
3135 /* Return true if val is an immediate that can be loaded into a
3136 register in a single instruction. */
3138 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
3140 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
3142 return aarch64_bitmask_imm (val
, mode
);
3146 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3150 if (GET_CODE (x
) == HIGH
)
3153 split_const (x
, &base
, &offset
);
3154 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
3156 if (aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
)
3157 != SYMBOL_FORCE_TO_MEM
)
3160 /* Avoid generating a 64-bit relocation in ILP32; leave
3161 to aarch64_expand_mov_immediate to handle it properly. */
3162 return mode
!= ptr_mode
;
3165 return aarch64_tls_referenced_p (x
);
3168 /* Return true if register REGNO is a valid index register.
3169 STRICT_P is true if REG_OK_STRICT is in effect. */
3172 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
3174 if (!HARD_REGISTER_NUM_P (regno
))
3182 regno
= reg_renumber
[regno
];
3184 return GP_REGNUM_P (regno
);
3187 /* Return true if register REGNO is a valid base register for mode MODE.
3188 STRICT_P is true if REG_OK_STRICT is in effect. */
3191 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
3193 if (!HARD_REGISTER_NUM_P (regno
))
3201 regno
= reg_renumber
[regno
];
3204 /* The fake registers will be eliminated to either the stack or
3205 hard frame pointer, both of which are usually valid base registers.
3206 Reload deals with the cases where the eliminated form isn't valid. */
3207 return (GP_REGNUM_P (regno
)
3208 || regno
== SP_REGNUM
3209 || regno
== FRAME_POINTER_REGNUM
3210 || regno
== ARG_POINTER_REGNUM
);
3213 /* Return true if X is a valid base register for mode MODE.
3214 STRICT_P is true if REG_OK_STRICT is in effect. */
3217 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
3219 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
3222 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
3225 /* Return true if address offset is a valid index. If it is, fill in INFO
3226 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3229 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
3230 machine_mode mode
, bool strict_p
)
3232 enum aarch64_address_type type
;
3237 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
3238 && GET_MODE (x
) == Pmode
)
3240 type
= ADDRESS_REG_REG
;
3244 /* (sign_extend:DI (reg:SI)) */
3245 else if ((GET_CODE (x
) == SIGN_EXTEND
3246 || GET_CODE (x
) == ZERO_EXTEND
)
3247 && GET_MODE (x
) == DImode
3248 && GET_MODE (XEXP (x
, 0)) == SImode
)
3250 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3251 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3252 index
= XEXP (x
, 0);
3255 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3256 else if (GET_CODE (x
) == MULT
3257 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3258 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3259 && GET_MODE (XEXP (x
, 0)) == DImode
3260 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3261 && CONST_INT_P (XEXP (x
, 1)))
3263 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3264 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3265 index
= XEXP (XEXP (x
, 0), 0);
3266 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3268 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3269 else if (GET_CODE (x
) == ASHIFT
3270 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3271 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3272 && GET_MODE (XEXP (x
, 0)) == DImode
3273 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3274 && CONST_INT_P (XEXP (x
, 1)))
3276 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3277 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3278 index
= XEXP (XEXP (x
, 0), 0);
3279 shift
= INTVAL (XEXP (x
, 1));
3281 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3282 else if ((GET_CODE (x
) == SIGN_EXTRACT
3283 || GET_CODE (x
) == ZERO_EXTRACT
)
3284 && GET_MODE (x
) == DImode
3285 && GET_CODE (XEXP (x
, 0)) == MULT
3286 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3287 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3289 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3290 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3291 index
= XEXP (XEXP (x
, 0), 0);
3292 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3293 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3294 || INTVAL (XEXP (x
, 2)) != 0)
3297 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3298 (const_int 0xffffffff<<shift)) */
3299 else if (GET_CODE (x
) == AND
3300 && GET_MODE (x
) == DImode
3301 && GET_CODE (XEXP (x
, 0)) == MULT
3302 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3303 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3304 && CONST_INT_P (XEXP (x
, 1)))
3306 type
= ADDRESS_REG_UXTW
;
3307 index
= XEXP (XEXP (x
, 0), 0);
3308 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3309 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3312 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3313 else if ((GET_CODE (x
) == SIGN_EXTRACT
3314 || GET_CODE (x
) == ZERO_EXTRACT
)
3315 && GET_MODE (x
) == DImode
3316 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3317 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3318 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3320 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3321 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3322 index
= XEXP (XEXP (x
, 0), 0);
3323 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3324 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3325 || INTVAL (XEXP (x
, 2)) != 0)
3328 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3329 (const_int 0xffffffff<<shift)) */
3330 else if (GET_CODE (x
) == AND
3331 && GET_MODE (x
) == DImode
3332 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3333 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3334 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3335 && CONST_INT_P (XEXP (x
, 1)))
3337 type
= ADDRESS_REG_UXTW
;
3338 index
= XEXP (XEXP (x
, 0), 0);
3339 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3340 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3343 /* (mult:P (reg:P) (const_int scale)) */
3344 else if (GET_CODE (x
) == MULT
3345 && GET_MODE (x
) == Pmode
3346 && GET_MODE (XEXP (x
, 0)) == Pmode
3347 && CONST_INT_P (XEXP (x
, 1)))
3349 type
= ADDRESS_REG_REG
;
3350 index
= XEXP (x
, 0);
3351 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3353 /* (ashift:P (reg:P) (const_int shift)) */
3354 else if (GET_CODE (x
) == ASHIFT
3355 && GET_MODE (x
) == Pmode
3356 && GET_MODE (XEXP (x
, 0)) == Pmode
3357 && CONST_INT_P (XEXP (x
, 1)))
3359 type
= ADDRESS_REG_REG
;
3360 index
= XEXP (x
, 0);
3361 shift
= INTVAL (XEXP (x
, 1));
3366 if (GET_CODE (index
) == SUBREG
)
3367 index
= SUBREG_REG (index
);
3370 (shift
> 0 && shift
<= 3
3371 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3373 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3376 info
->offset
= index
;
3377 info
->shift
= shift
;
3385 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3387 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3388 && offset
< 64 * GET_MODE_SIZE (mode
)
3389 && offset
% GET_MODE_SIZE (mode
) == 0);
3393 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3394 HOST_WIDE_INT offset
)
3396 return offset
>= -256 && offset
< 256;
3400 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3403 && offset
< 4096 * GET_MODE_SIZE (mode
)
3404 && offset
% GET_MODE_SIZE (mode
) == 0);
3407 /* Return true if X is a valid address for machine mode MODE. If it is,
3408 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3409 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3412 aarch64_classify_address (struct aarch64_address_info
*info
,
3413 rtx x
, machine_mode mode
,
3414 RTX_CODE outer_code
, bool strict_p
)
3416 enum rtx_code code
= GET_CODE (x
);
3419 /* On BE, we use load/store pair for all large int mode load/stores. */
3420 bool load_store_pair_p
= (outer_code
== PARALLEL
3421 || (BYTES_BIG_ENDIAN
3422 && aarch64_vect_struct_mode_p (mode
)));
3424 bool allow_reg_index_p
=
3426 && (GET_MODE_SIZE (mode
) != 16 || aarch64_vector_mode_supported_p (mode
))
3427 && !aarch64_vect_struct_mode_p (mode
);
3429 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3431 if (aarch64_vect_struct_mode_p (mode
) && !BYTES_BIG_ENDIAN
3432 && (code
!= POST_INC
&& code
!= REG
))
3439 info
->type
= ADDRESS_REG_IMM
;
3441 info
->offset
= const0_rtx
;
3442 return aarch64_base_register_rtx_p (x
, strict_p
);
3450 && (op0
== virtual_stack_vars_rtx
3451 || op0
== frame_pointer_rtx
3452 || op0
== arg_pointer_rtx
)
3453 && CONST_INT_P (op1
))
3455 info
->type
= ADDRESS_REG_IMM
;
3462 if (GET_MODE_SIZE (mode
) != 0
3463 && CONST_INT_P (op1
)
3464 && aarch64_base_register_rtx_p (op0
, strict_p
))
3466 HOST_WIDE_INT offset
= INTVAL (op1
);
3468 info
->type
= ADDRESS_REG_IMM
;
3472 /* TImode and TFmode values are allowed in both pairs of X
3473 registers and individual Q registers. The available
3475 X,X: 7-bit signed scaled offset
3476 Q: 9-bit signed offset
3477 We conservatively require an offset representable in either mode.
3479 if (mode
== TImode
|| mode
== TFmode
)
3480 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3481 && offset_9bit_signed_unscaled_p (mode
, offset
));
3483 /* A 7bit offset check because OImode will emit a ldp/stp
3484 instruction (only big endian will get here).
3485 For ldp/stp instructions, the offset is scaled for the size of a
3486 single element of the pair. */
3488 return aarch64_offset_7bit_signed_scaled_p (TImode
, offset
);
3490 /* Three 9/12 bit offsets checks because CImode will emit three
3491 ldr/str instructions (only big endian will get here). */
3493 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3494 && (offset_9bit_signed_unscaled_p (V16QImode
, offset
+ 32)
3495 || offset_12bit_unsigned_scaled_p (V16QImode
,
3498 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3499 instructions (only big endian will get here). */
3501 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3502 && aarch64_offset_7bit_signed_scaled_p (TImode
,
3505 if (load_store_pair_p
)
3506 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3507 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3509 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3510 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3513 if (allow_reg_index_p
)
3515 /* Look for base + (scaled/extended) index register. */
3516 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3517 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3522 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3523 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3536 info
->type
= ADDRESS_REG_WB
;
3537 info
->base
= XEXP (x
, 0);
3538 info
->offset
= NULL_RTX
;
3539 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3543 info
->type
= ADDRESS_REG_WB
;
3544 info
->base
= XEXP (x
, 0);
3545 if (GET_CODE (XEXP (x
, 1)) == PLUS
3546 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3547 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3548 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3550 HOST_WIDE_INT offset
;
3551 info
->offset
= XEXP (XEXP (x
, 1), 1);
3552 offset
= INTVAL (info
->offset
);
3554 /* TImode and TFmode values are allowed in both pairs of X
3555 registers and individual Q registers. The available
3557 X,X: 7-bit signed scaled offset
3558 Q: 9-bit signed offset
3559 We conservatively require an offset representable in either mode.
3561 if (mode
== TImode
|| mode
== TFmode
)
3562 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3563 && offset_9bit_signed_unscaled_p (mode
, offset
));
3565 if (load_store_pair_p
)
3566 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3567 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3569 return offset_9bit_signed_unscaled_p (mode
, offset
);
3576 /* load literal: pc-relative constant pool entry. Only supported
3577 for SI mode or larger. */
3578 info
->type
= ADDRESS_SYMBOLIC
;
3580 if (!load_store_pair_p
&& GET_MODE_SIZE (mode
) >= 4)
3584 split_const (x
, &sym
, &addend
);
3585 return (GET_CODE (sym
) == LABEL_REF
3586 || (GET_CODE (sym
) == SYMBOL_REF
3587 && CONSTANT_POOL_ADDRESS_P (sym
)));
3592 info
->type
= ADDRESS_LO_SUM
;
3593 info
->base
= XEXP (x
, 0);
3594 info
->offset
= XEXP (x
, 1);
3595 if (allow_reg_index_p
3596 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3599 split_const (info
->offset
, &sym
, &offs
);
3600 if (GET_CODE (sym
) == SYMBOL_REF
3601 && (aarch64_classify_symbol (sym
, offs
, SYMBOL_CONTEXT_MEM
)
3602 == SYMBOL_SMALL_ABSOLUTE
))
3604 /* The symbol and offset must be aligned to the access size. */
3606 unsigned int ref_size
;
3608 if (CONSTANT_POOL_ADDRESS_P (sym
))
3609 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3610 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3612 tree exp
= SYMBOL_REF_DECL (sym
);
3613 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3614 align
= CONSTANT_ALIGNMENT (exp
, align
);
3616 else if (SYMBOL_REF_DECL (sym
))
3617 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3618 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3619 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3620 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3622 align
= BITS_PER_UNIT
;
3624 ref_size
= GET_MODE_SIZE (mode
);
3626 ref_size
= GET_MODE_SIZE (DImode
);
3628 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3629 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3640 aarch64_symbolic_address_p (rtx x
)
3644 split_const (x
, &x
, &offset
);
3645 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3648 /* Classify the base of symbolic expression X, given that X appears in
3651 enum aarch64_symbol_type
3652 aarch64_classify_symbolic_expression (rtx x
,
3653 enum aarch64_symbol_context context
)
3657 split_const (x
, &x
, &offset
);
3658 return aarch64_classify_symbol (x
, offset
, context
);
3662 /* Return TRUE if X is a legitimate address for accessing memory in
3665 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
3667 struct aarch64_address_info addr
;
3669 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3672 /* Return TRUE if X is a legitimate address for accessing memory in
3673 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3676 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
3677 RTX_CODE outer_code
, bool strict_p
)
3679 struct aarch64_address_info addr
;
3681 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3684 /* Return TRUE if rtx X is immediate constant 0.0 */
3686 aarch64_float_const_zero_rtx_p (rtx x
)
3690 if (GET_MODE (x
) == VOIDmode
)
3693 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3694 if (REAL_VALUE_MINUS_ZERO (r
))
3695 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3696 return REAL_VALUES_EQUAL (r
, dconst0
);
3699 /* Return the fixed registers used for condition codes. */
3702 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3705 *p2
= INVALID_REGNUM
;
3709 /* Emit call insn with PAT and do aarch64-specific handling. */
3712 aarch64_emit_call_insn (rtx pat
)
3714 rtx insn
= emit_call_insn (pat
);
3716 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
3717 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
3718 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
3722 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3724 /* All floating point compares return CCFP if it is an equality
3725 comparison, and CCFPE otherwise. */
3726 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3753 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3755 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3756 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3757 || GET_CODE (x
) == NEG
))
3760 /* A compare with a shifted operand. Because of canonicalization,
3761 the comparison will have to be swapped when we emit the assembly
3763 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3764 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3765 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3766 || GET_CODE (x
) == LSHIFTRT
3767 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3770 /* Similarly for a negated operand, but we can only do this for
3772 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3773 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3774 && (code
== EQ
|| code
== NE
)
3775 && GET_CODE (x
) == NEG
)
3778 /* A compare of a mode narrower than SI mode against zero can be done
3779 by extending the value in the comparison. */
3780 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3782 /* Only use sign-extension if we really need it. */
3783 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3784 ? CC_SESWPmode
: CC_ZESWPmode
);
3786 /* For everything else, return CCmode. */
3791 aarch64_get_condition_code_1 (enum machine_mode
, enum rtx_code
);
3794 aarch64_get_condition_code (rtx x
)
3796 machine_mode mode
= GET_MODE (XEXP (x
, 0));
3797 enum rtx_code comp_code
= GET_CODE (x
);
3799 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3800 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3801 return aarch64_get_condition_code_1 (mode
, comp_code
);
3805 aarch64_get_condition_code_1 (enum machine_mode mode
, enum rtx_code comp_code
)
3807 int ne
= -1, eq
= -1;
3814 case GE
: return AARCH64_GE
;
3815 case GT
: return AARCH64_GT
;
3816 case LE
: return AARCH64_LS
;
3817 case LT
: return AARCH64_MI
;
3818 case NE
: return AARCH64_NE
;
3819 case EQ
: return AARCH64_EQ
;
3820 case ORDERED
: return AARCH64_VC
;
3821 case UNORDERED
: return AARCH64_VS
;
3822 case UNLT
: return AARCH64_LT
;
3823 case UNLE
: return AARCH64_LE
;
3824 case UNGT
: return AARCH64_HI
;
3825 case UNGE
: return AARCH64_PL
;
3883 case NE
: return AARCH64_NE
;
3884 case EQ
: return AARCH64_EQ
;
3885 case GE
: return AARCH64_GE
;
3886 case GT
: return AARCH64_GT
;
3887 case LE
: return AARCH64_LE
;
3888 case LT
: return AARCH64_LT
;
3889 case GEU
: return AARCH64_CS
;
3890 case GTU
: return AARCH64_HI
;
3891 case LEU
: return AARCH64_LS
;
3892 case LTU
: return AARCH64_CC
;
3902 case NE
: return AARCH64_NE
;
3903 case EQ
: return AARCH64_EQ
;
3904 case GE
: return AARCH64_LE
;
3905 case GT
: return AARCH64_LT
;
3906 case LE
: return AARCH64_GE
;
3907 case LT
: return AARCH64_GT
;
3908 case GEU
: return AARCH64_LS
;
3909 case GTU
: return AARCH64_CC
;
3910 case LEU
: return AARCH64_CS
;
3911 case LTU
: return AARCH64_HI
;
3919 case NE
: return AARCH64_NE
;
3920 case EQ
: return AARCH64_EQ
;
3921 case GE
: return AARCH64_PL
;
3922 case LT
: return AARCH64_MI
;
3930 case NE
: return AARCH64_NE
;
3931 case EQ
: return AARCH64_EQ
;
3941 if (comp_code
== NE
)
3944 if (comp_code
== EQ
)
3951 aarch64_const_vec_all_same_in_range_p (rtx x
,
3952 HOST_WIDE_INT minval
,
3953 HOST_WIDE_INT maxval
)
3955 HOST_WIDE_INT firstval
;
3958 if (GET_CODE (x
) != CONST_VECTOR
3959 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
3962 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
3963 if (firstval
< minval
|| firstval
> maxval
)
3966 count
= CONST_VECTOR_NUNITS (x
);
3967 for (i
= 1; i
< count
; i
++)
3968 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
3975 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
3977 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
3981 bit_count (unsigned HOST_WIDE_INT value
)
3995 #define AARCH64_CC_V 1
3996 #define AARCH64_CC_C (1 << 1)
3997 #define AARCH64_CC_Z (1 << 2)
3998 #define AARCH64_CC_N (1 << 3)
4000 /* N Z C V flags for ccmp. The first code is for AND op and the other
4001 is for IOR op. Indexed by AARCH64_COND_CODE. */
4002 static const int aarch64_nzcv_codes
[][2] =
4004 {AARCH64_CC_Z
, 0}, /* EQ, Z == 1. */
4005 {0, AARCH64_CC_Z
}, /* NE, Z == 0. */
4006 {AARCH64_CC_C
, 0}, /* CS, C == 1. */
4007 {0, AARCH64_CC_C
}, /* CC, C == 0. */
4008 {AARCH64_CC_N
, 0}, /* MI, N == 1. */
4009 {0, AARCH64_CC_N
}, /* PL, N == 0. */
4010 {AARCH64_CC_V
, 0}, /* VS, V == 1. */
4011 {0, AARCH64_CC_V
}, /* VC, V == 0. */
4012 {AARCH64_CC_C
, 0}, /* HI, C ==1 && Z == 0. */
4013 {0, AARCH64_CC_C
}, /* LS, !(C == 1 && Z == 0). */
4014 {0, AARCH64_CC_V
}, /* GE, N == V. */
4015 {AARCH64_CC_V
, 0}, /* LT, N != V. */
4016 {0, AARCH64_CC_Z
}, /* GT, Z == 0 && N == V. */
4017 {AARCH64_CC_Z
, 0}, /* LE, !(Z == 0 && N == V). */
4018 {0, 0}, /* AL, Any. */
4019 {0, 0}, /* NV, Any. */
4023 aarch64_ccmp_mode_to_code (enum machine_mode mode
)
4064 aarch64_print_operand (FILE *f
, rtx x
, char code
)
4068 /* An integer or symbol address without a preceding # sign. */
4070 switch (GET_CODE (x
))
4073 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
4077 output_addr_const (f
, x
);
4081 if (GET_CODE (XEXP (x
, 0)) == PLUS
4082 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4084 output_addr_const (f
, x
);
4090 output_operand_lossage ("Unsupported operand for code '%c'", code
);
4095 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4099 if (!CONST_INT_P (x
)
4100 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
4102 output_operand_lossage ("invalid operand for '%%%c'", code
);
4118 output_operand_lossage ("invalid operand for '%%%c'", code
);
4128 /* Print N such that 2^N == X. */
4129 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
4131 output_operand_lossage ("invalid operand for '%%%c'", code
);
4135 asm_fprintf (f
, "%d", n
);
4140 /* Print the number of non-zero bits in X (a const_int). */
4141 if (!CONST_INT_P (x
))
4143 output_operand_lossage ("invalid operand for '%%%c'", code
);
4147 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
4151 /* Print the higher numbered register of a pair (TImode) of regs. */
4152 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
4154 output_operand_lossage ("invalid operand for '%%%c'", code
);
4158 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
4164 /* Print a condition (eq, ne, etc). */
4166 /* CONST_TRUE_RTX means always -- that's the default. */
4167 if (x
== const_true_rtx
)
4170 if (!COMPARISON_P (x
))
4172 output_operand_lossage ("invalid operand for '%%%c'", code
);
4176 cond_code
= aarch64_get_condition_code (x
);
4177 gcc_assert (cond_code
>= 0);
4178 fputs (aarch64_condition_codes
[cond_code
], f
);
4185 /* Print the inverse of a condition (eq <-> ne, etc). */
4187 /* CONST_TRUE_RTX means never -- that's the default. */
4188 if (x
== const_true_rtx
)
4194 if (!COMPARISON_P (x
))
4196 output_operand_lossage ("invalid operand for '%%%c'", code
);
4199 cond_code
= aarch64_get_condition_code (x
);
4200 gcc_assert (cond_code
>= 0);
4201 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
4211 /* Print a scalar FP/SIMD register name. */
4212 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4214 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4217 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
4224 /* Print the first FP/SIMD register name in a list. */
4225 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4227 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4230 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
4234 /* Print a scalar FP/SIMD register name + 1. */
4235 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4237 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4240 asm_fprintf (f
, "q%d", REGNO (x
) - V0_REGNUM
+ 1);
4244 /* Print bottom 16 bits of integer constant in hex. */
4245 if (!CONST_INT_P (x
))
4247 output_operand_lossage ("invalid operand for '%%%c'", code
);
4250 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
4255 /* Print a general register name or the zero register (32-bit or
4258 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
4260 asm_fprintf (f
, "%czr", code
);
4264 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
4266 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
4270 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
4272 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
4279 /* Print a normal operand, if it's a general register, then we
4283 output_operand_lossage ("missing operand");
4287 switch (GET_CODE (x
))
4290 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
4294 aarch64_memory_reference_mode
= GET_MODE (x
);
4295 output_address (XEXP (x
, 0));
4300 output_addr_const (asm_out_file
, x
);
4304 asm_fprintf (f
, "%wd", INTVAL (x
));
4308 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
4311 aarch64_const_vec_all_same_in_range_p (x
,
4313 HOST_WIDE_INT_MAX
));
4314 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
4316 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
4325 /* CONST_DOUBLE can represent a double-width integer.
4326 In this case, the mode of x is VOIDmode. */
4327 if (GET_MODE (x
) == VOIDmode
)
4329 else if (aarch64_float_const_zero_rtx_p (x
))
4334 else if (aarch64_float_const_representable_p (x
))
4337 char float_buf
[buf_size
] = {'\0'};
4339 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4340 real_to_decimal_for_mode (float_buf
, &r
,
4343 asm_fprintf (asm_out_file
, "%s", float_buf
);
4347 output_operand_lossage ("invalid constant");
4350 output_operand_lossage ("invalid operand");
4356 if (GET_CODE (x
) == HIGH
)
4359 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4361 case SYMBOL_SMALL_GOT
:
4362 asm_fprintf (asm_out_file
, ":got:");
4365 case SYMBOL_SMALL_TLSGD
:
4366 asm_fprintf (asm_out_file
, ":tlsgd:");
4369 case SYMBOL_SMALL_TLSDESC
:
4370 asm_fprintf (asm_out_file
, ":tlsdesc:");
4373 case SYMBOL_SMALL_GOTTPREL
:
4374 asm_fprintf (asm_out_file
, ":gottprel:");
4377 case SYMBOL_SMALL_TPREL
:
4378 asm_fprintf (asm_out_file
, ":tprel:");
4381 case SYMBOL_TINY_GOT
:
4388 output_addr_const (asm_out_file
, x
);
4392 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4394 case SYMBOL_SMALL_GOT
:
4395 asm_fprintf (asm_out_file
, ":lo12:");
4398 case SYMBOL_SMALL_TLSGD
:
4399 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
4402 case SYMBOL_SMALL_TLSDESC
:
4403 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
4406 case SYMBOL_SMALL_GOTTPREL
:
4407 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
4410 case SYMBOL_SMALL_TPREL
:
4411 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
4414 case SYMBOL_TINY_GOT
:
4415 asm_fprintf (asm_out_file
, ":got:");
4421 output_addr_const (asm_out_file
, x
);
4426 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4428 case SYMBOL_SMALL_TPREL
:
4429 asm_fprintf (asm_out_file
, ":tprel_hi12:");
4434 output_addr_const (asm_out_file
, x
);
4442 if (!COMPARISON_P (x
))
4444 output_operand_lossage ("invalid operand for '%%%c'", code
);
4448 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4449 gcc_assert (cond_code
>= 0);
4450 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][0]);
4459 if (!COMPARISON_P (x
))
4461 output_operand_lossage ("invalid operand for '%%%c'", code
);
4465 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4466 gcc_assert (cond_code
>= 0);
4467 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][1]);
4472 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
4478 aarch64_print_operand_address (FILE *f
, rtx x
)
4480 struct aarch64_address_info addr
;
4482 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
4486 case ADDRESS_REG_IMM
:
4487 if (addr
.offset
== const0_rtx
)
4488 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
4490 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
4491 INTVAL (addr
.offset
));
4494 case ADDRESS_REG_REG
:
4495 if (addr
.shift
== 0)
4496 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
4497 reg_names
[REGNO (addr
.offset
)]);
4499 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
4500 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
4503 case ADDRESS_REG_UXTW
:
4504 if (addr
.shift
== 0)
4505 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
4506 REGNO (addr
.offset
) - R0_REGNUM
);
4508 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
4509 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4512 case ADDRESS_REG_SXTW
:
4513 if (addr
.shift
== 0)
4514 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
4515 REGNO (addr
.offset
) - R0_REGNUM
);
4517 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
4518 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4521 case ADDRESS_REG_WB
:
4522 switch (GET_CODE (x
))
4525 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
4526 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4529 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4530 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4533 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4534 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4537 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4538 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4541 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4542 INTVAL (addr
.offset
));
4545 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4546 INTVAL (addr
.offset
));
4553 case ADDRESS_LO_SUM
:
4554 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4555 output_addr_const (f
, addr
.offset
);
4556 asm_fprintf (f
, "]");
4559 case ADDRESS_SYMBOLIC
:
4563 output_addr_const (f
, x
);
4567 aarch64_label_mentioned_p (rtx x
)
4572 if (GET_CODE (x
) == LABEL_REF
)
4575 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4576 referencing instruction, but they are constant offsets, not
4578 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4581 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4582 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4588 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4589 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4592 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4599 /* Implement REGNO_REG_CLASS. */
4602 aarch64_regno_regclass (unsigned regno
)
4604 if (GP_REGNUM_P (regno
))
4605 return GENERAL_REGS
;
4607 if (regno
== SP_REGNUM
)
4610 if (regno
== FRAME_POINTER_REGNUM
4611 || regno
== ARG_POINTER_REGNUM
)
4612 return POINTER_REGS
;
4614 if (FP_REGNUM_P (regno
))
4615 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4621 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
4623 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4624 where mask is selected by alignment and size of the offset.
4625 We try to pick as large a range for the offset as possible to
4626 maximize the chance of a CSE. However, for aligned addresses
4627 we limit the range to 4k so that structures with different sized
4628 elements are likely to use the same base. */
4630 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
4632 HOST_WIDE_INT offset
= INTVAL (XEXP (x
, 1));
4633 HOST_WIDE_INT base_offset
;
4635 /* Does it look like we'll need a load/store-pair operation? */
4636 if (GET_MODE_SIZE (mode
) > 16
4638 base_offset
= ((offset
+ 64 * GET_MODE_SIZE (mode
))
4639 & ~((128 * GET_MODE_SIZE (mode
)) - 1));
4640 /* For offsets aren't a multiple of the access size, the limit is
4642 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
4643 base_offset
= (offset
+ 0x100) & ~0x1ff;
4645 base_offset
= offset
& ~0xfff;
4647 if (base_offset
== 0)
4650 offset
-= base_offset
;
4651 rtx base_reg
= gen_reg_rtx (Pmode
);
4652 rtx val
= force_operand (plus_constant (Pmode
, XEXP (x
, 0), base_offset
),
4654 emit_move_insn (base_reg
, val
);
4655 x
= plus_constant (Pmode
, base_reg
, offset
);
4661 /* Try a machine-dependent way of reloading an illegitimate address
4662 operand. If we find one, push the reload and return the new rtx. */
4665 aarch64_legitimize_reload_address (rtx
*x_p
,
4667 int opnum
, int type
,
4668 int ind_levels ATTRIBUTE_UNUSED
)
4672 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4673 if (aarch64_vect_struct_mode_p (mode
)
4674 && GET_CODE (x
) == PLUS
4675 && REG_P (XEXP (x
, 0))
4676 && CONST_INT_P (XEXP (x
, 1)))
4680 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4681 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4682 opnum
, (enum reload_type
) type
);
4686 /* We must recognize output that we have already generated ourselves. */
4687 if (GET_CODE (x
) == PLUS
4688 && GET_CODE (XEXP (x
, 0)) == PLUS
4689 && REG_P (XEXP (XEXP (x
, 0), 0))
4690 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4691 && CONST_INT_P (XEXP (x
, 1)))
4693 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4694 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4695 opnum
, (enum reload_type
) type
);
4699 /* We wish to handle large displacements off a base register by splitting
4700 the addend across an add and the mem insn. This can cut the number of
4701 extra insns needed from 3 to 1. It is only useful for load/store of a
4702 single register with 12 bit offset field. */
4703 if (GET_CODE (x
) == PLUS
4704 && REG_P (XEXP (x
, 0))
4705 && CONST_INT_P (XEXP (x
, 1))
4706 && HARD_REGISTER_P (XEXP (x
, 0))
4709 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4711 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4712 HOST_WIDE_INT low
= val
& 0xfff;
4713 HOST_WIDE_INT high
= val
- low
;
4716 machine_mode xmode
= GET_MODE (x
);
4718 /* In ILP32, xmode can be either DImode or SImode. */
4719 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4721 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4722 BLKmode alignment. */
4723 if (GET_MODE_SIZE (mode
) == 0)
4726 offs
= low
% GET_MODE_SIZE (mode
);
4728 /* Align misaligned offset by adjusting high part to compensate. */
4731 if (aarch64_uimm12_shift (high
+ offs
))
4740 offs
= GET_MODE_SIZE (mode
) - offs
;
4742 high
= high
+ (low
& 0x1000) - offs
;
4747 /* Check for overflow. */
4748 if (high
+ low
!= val
)
4751 cst
= GEN_INT (high
);
4752 if (!aarch64_uimm12_shift (high
))
4753 cst
= force_const_mem (xmode
, cst
);
4755 /* Reload high part into base reg, leaving the low part
4756 in the mem instruction.
4757 Note that replacing this gen_rtx_PLUS with plus_constant is
4758 wrong in this case because we rely on the
4759 (plus (plus reg c1) c2) structure being preserved so that
4760 XEXP (*p, 0) in push_reload below uses the correct term. */
4761 x
= gen_rtx_PLUS (xmode
,
4762 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4765 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4766 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4767 opnum
, (enum reload_type
) type
);
4776 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4779 secondary_reload_info
*sri
)
4781 /* Without the TARGET_SIMD instructions we cannot move a Q register
4782 to a Q register directly. We need a scratch. */
4783 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4784 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4785 && reg_class_subset_p (rclass
, FP_REGS
))
4788 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4789 else if (mode
== TImode
)
4790 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4794 /* A TFmode or TImode memory access should be handled via an FP_REGS
4795 because AArch64 has richer addressing modes for LDR/STR instructions
4796 than LDP/STP instructions. */
4797 if (TARGET_FLOAT
&& rclass
== GENERAL_REGS
4798 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4801 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4802 return GENERAL_REGS
;
4808 aarch64_can_eliminate (const int from
, const int to
)
4810 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4811 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4813 if (frame_pointer_needed
)
4815 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4817 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4819 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4820 && !cfun
->calls_alloca
)
4822 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4829 /* If we decided that we didn't need a leaf frame pointer but then used
4830 LR in the function, then we'll want a frame pointer after all, so
4831 prevent this elimination to ensure a frame pointer is used. */
4832 if (to
== STACK_POINTER_REGNUM
4833 && flag_omit_leaf_frame_pointer
4834 && df_regs_ever_live_p (LR_REGNUM
))
4842 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4844 aarch64_layout_frame ();
4846 if (to
== HARD_FRAME_POINTER_REGNUM
)
4848 if (from
== ARG_POINTER_REGNUM
)
4849 return cfun
->machine
->frame
.frame_size
- crtl
->outgoing_args_size
;
4851 if (from
== FRAME_POINTER_REGNUM
)
4852 return (cfun
->machine
->frame
.hard_fp_offset
4853 - cfun
->machine
->frame
.saved_varargs_size
);
4856 if (to
== STACK_POINTER_REGNUM
)
4858 if (from
== FRAME_POINTER_REGNUM
)
4859 return (cfun
->machine
->frame
.frame_size
4860 - cfun
->machine
->frame
.saved_varargs_size
);
4863 return cfun
->machine
->frame
.frame_size
;
4866 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4870 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4874 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4879 aarch64_asm_trampoline_template (FILE *f
)
4883 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4884 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4888 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4889 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4891 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4892 assemble_aligned_integer (4, const0_rtx
);
4893 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4894 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4898 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4900 rtx fnaddr
, mem
, a_tramp
;
4901 const int tramp_code_sz
= 16;
4903 /* Don't need to copy the trailing D-words, we fill those in below. */
4904 emit_block_move (m_tramp
, assemble_trampoline_template (),
4905 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4906 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4907 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4908 if (GET_MODE (fnaddr
) != ptr_mode
)
4909 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4910 emit_move_insn (mem
, fnaddr
);
4912 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4913 emit_move_insn (mem
, chain_value
);
4915 /* XXX We should really define a "clear_cache" pattern and use
4916 gen_clear_cache(). */
4917 a_tramp
= XEXP (m_tramp
, 0);
4918 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4919 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4920 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4924 static unsigned char
4925 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
4929 case CALLER_SAVE_REGS
:
4936 aarch64_vector_mode_p (mode
)
4937 ? (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
4938 : (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4952 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4954 if (regclass
== POINTER_REGS
)
4955 return GENERAL_REGS
;
4957 if (regclass
== STACK_REG
)
4960 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4966 /* If it's an integer immediate that MOVI can't handle, then
4967 FP_REGS is not an option, so we return NO_REGS instead. */
4968 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4969 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4972 /* Register eliminiation can result in a request for
4973 SP+constant->FP_REGS. We cannot support such operations which
4974 use SP as source and an FP_REG as destination, so reject out
4976 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4978 rtx lhs
= XEXP (x
, 0);
4980 /* Look through a possible SUBREG introduced by ILP32. */
4981 if (GET_CODE (lhs
) == SUBREG
)
4982 lhs
= SUBREG_REG (lhs
);
4984 gcc_assert (REG_P (lhs
));
4985 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4994 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4996 asm_fprintf (f
, "%U%s", name
);
5000 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
5002 if (priority
== DEFAULT_INIT_PRIORITY
)
5003 default_ctor_section_asm_out_constructor (symbol
, priority
);
5008 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
5009 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5010 switch_to_section (s
);
5011 assemble_align (POINTER_SIZE
);
5012 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5017 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
5019 if (priority
== DEFAULT_INIT_PRIORITY
)
5020 default_dtor_section_asm_out_destructor (symbol
, priority
);
5025 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
5026 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5027 switch_to_section (s
);
5028 assemble_align (POINTER_SIZE
);
5029 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5034 aarch64_output_casesi (rtx
*operands
)
5038 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
5040 static const char *const patterns
[4][2] =
5043 "ldrb\t%w3, [%0,%w1,uxtw]",
5044 "add\t%3, %4, %w3, sxtb #2"
5047 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5048 "add\t%3, %4, %w3, sxth #2"
5051 "ldr\t%w3, [%0,%w1,uxtw #2]",
5052 "add\t%3, %4, %w3, sxtw #2"
5054 /* We assume that DImode is only generated when not optimizing and
5055 that we don't really need 64-bit address offsets. That would
5056 imply an object file with 8GB of code in a single function! */
5058 "ldr\t%w3, [%0,%w1,uxtw #2]",
5059 "add\t%3, %4, %w3, sxtw #2"
5063 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
5065 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
5067 gcc_assert (index
>= 0 && index
<= 3);
5069 /* Need to implement table size reduction, by chaning the code below. */
5070 output_asm_insn (patterns
[index
][0], operands
);
5071 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
5072 snprintf (buf
, sizeof (buf
),
5073 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
5074 output_asm_insn (buf
, operands
);
5075 output_asm_insn (patterns
[index
][1], operands
);
5076 output_asm_insn ("br\t%3", operands
);
5077 assemble_label (asm_out_file
, label
);
5082 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5083 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5087 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
5089 if (shift
>= 0 && shift
<= 3)
5092 for (size
= 8; size
<= 32; size
*= 2)
5094 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
5095 if (mask
== bits
<< shift
)
5103 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED
,
5104 const_rtx x ATTRIBUTE_UNUSED
)
5106 /* We can't use blocks for constants when we're using a per-function
5112 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED
,
5113 rtx x ATTRIBUTE_UNUSED
,
5114 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
5116 /* Force all constant pool entries into the current function section. */
5117 return function_section (current_function_decl
);
5123 /* Helper function for rtx cost calculation. Strip a shift expression
5124 from X. Returns the inner operand if successful, or the original
5125 expression on failure. */
5127 aarch64_strip_shift (rtx x
)
5131 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5132 we can convert both to ROR during final output. */
5133 if ((GET_CODE (op
) == ASHIFT
5134 || GET_CODE (op
) == ASHIFTRT
5135 || GET_CODE (op
) == LSHIFTRT
5136 || GET_CODE (op
) == ROTATERT
5137 || GET_CODE (op
) == ROTATE
)
5138 && CONST_INT_P (XEXP (op
, 1)))
5139 return XEXP (op
, 0);
5141 if (GET_CODE (op
) == MULT
5142 && CONST_INT_P (XEXP (op
, 1))
5143 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
5144 return XEXP (op
, 0);
5149 /* Helper function for rtx cost calculation. Strip an extend
5150 expression from X. Returns the inner operand if successful, or the
5151 original expression on failure. We deal with a number of possible
5152 canonicalization variations here. */
5154 aarch64_strip_extend (rtx x
)
5158 /* Zero and sign extraction of a widened value. */
5159 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
5160 && XEXP (op
, 2) == const0_rtx
5161 && GET_CODE (XEXP (op
, 0)) == MULT
5162 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
5164 return XEXP (XEXP (op
, 0), 0);
5166 /* It can also be represented (for zero-extend) as an AND with an
5168 if (GET_CODE (op
) == AND
5169 && GET_CODE (XEXP (op
, 0)) == MULT
5170 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
5171 && CONST_INT_P (XEXP (op
, 1))
5172 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
5173 INTVAL (XEXP (op
, 1))) != 0)
5174 return XEXP (XEXP (op
, 0), 0);
5176 /* Now handle extended register, as this may also have an optional
5177 left shift by 1..4. */
5178 if (GET_CODE (op
) == ASHIFT
5179 && CONST_INT_P (XEXP (op
, 1))
5180 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
5183 if (GET_CODE (op
) == ZERO_EXTEND
5184 || GET_CODE (op
) == SIGN_EXTEND
)
5193 /* Return true iff CODE is a shift supported in combination
5194 with arithmetic instructions. */
5197 aarch64_shift_p (enum rtx_code code
)
5199 return code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
;
5202 /* Helper function for rtx cost calculation. Calculate the cost of
5203 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5204 Return the calculated cost of the expression, recursing manually in to
5205 operands where needed. */
5208 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
5211 const struct cpu_cost_table
*extra_cost
5212 = aarch64_tune_params
->insn_extra_cost
;
5214 bool compound_p
= (outer
== PLUS
|| outer
== MINUS
);
5215 machine_mode mode
= GET_MODE (x
);
5217 gcc_checking_assert (code
== MULT
);
5222 if (VECTOR_MODE_P (mode
))
5223 mode
= GET_MODE_INNER (mode
);
5225 /* Integer multiply/fma. */
5226 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5228 /* The multiply will be canonicalized as a shift, cost it as such. */
5229 if (aarch64_shift_p (GET_CODE (x
))
5230 || (CONST_INT_P (op1
)
5231 && exact_log2 (INTVAL (op1
)) > 0))
5233 bool is_extend
= GET_CODE (op0
) == ZERO_EXTEND
5234 || GET_CODE (op0
) == SIGN_EXTEND
;
5240 /* ARITH + shift-by-register. */
5241 cost
+= extra_cost
->alu
.arith_shift_reg
;
5243 /* ARITH + extended register. We don't have a cost field
5244 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5245 cost
+= extra_cost
->alu
.extend_arith
;
5247 /* ARITH + shift-by-immediate. */
5248 cost
+= extra_cost
->alu
.arith_shift
;
5251 /* LSL (immediate). */
5252 cost
+= extra_cost
->alu
.shift
;
5255 /* Strip extends as we will have costed them in the case above. */
5257 op0
= aarch64_strip_extend (op0
);
5259 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
5264 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5265 compound and let the below cases handle it. After all, MNEG is a
5266 special-case alias of MSUB. */
5267 if (GET_CODE (op0
) == NEG
)
5269 op0
= XEXP (op0
, 0);
5273 /* Integer multiplies or FMAs have zero/sign extending variants. */
5274 if ((GET_CODE (op0
) == ZERO_EXTEND
5275 && GET_CODE (op1
) == ZERO_EXTEND
)
5276 || (GET_CODE (op0
) == SIGN_EXTEND
5277 && GET_CODE (op1
) == SIGN_EXTEND
))
5279 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
5280 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
5285 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
5286 cost
+= extra_cost
->mult
[0].extend_add
;
5288 /* MUL/SMULL/UMULL. */
5289 cost
+= extra_cost
->mult
[0].extend
;
5295 /* This is either an integer multiply or a MADD. In both cases
5296 we want to recurse and cost the operands. */
5297 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5298 + rtx_cost (op1
, MULT
, 1, speed
);
5304 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
5307 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
5316 /* Floating-point FMA/FMUL can also support negations of the
5318 if (GET_CODE (op0
) == NEG
)
5319 op0
= XEXP (op0
, 0);
5320 if (GET_CODE (op1
) == NEG
)
5321 op1
= XEXP (op1
, 0);
5324 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5325 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5328 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
5331 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5332 + rtx_cost (op1
, MULT
, 1, speed
);
5338 aarch64_address_cost (rtx x
,
5340 addr_space_t as ATTRIBUTE_UNUSED
,
5343 enum rtx_code c
= GET_CODE (x
);
5344 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
5345 struct aarch64_address_info info
;
5349 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
5351 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
5353 /* This is a CONST or SYMBOL ref which will be split
5354 in a different way depending on the code model in use.
5355 Cost it through the generic infrastructure. */
5356 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
5357 /* Divide through by the cost of one instruction to
5358 bring it to the same units as the address costs. */
5359 cost_symbol_ref
/= COSTS_N_INSNS (1);
5360 /* The cost is then the cost of preparing the address,
5361 followed by an immediate (possibly 0) offset. */
5362 return cost_symbol_ref
+ addr_cost
->imm_offset
;
5366 /* This is most likely a jump table from a case
5368 return addr_cost
->register_offset
;
5374 case ADDRESS_LO_SUM
:
5375 case ADDRESS_SYMBOLIC
:
5376 case ADDRESS_REG_IMM
:
5377 cost
+= addr_cost
->imm_offset
;
5380 case ADDRESS_REG_WB
:
5381 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
5382 cost
+= addr_cost
->pre_modify
;
5383 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
5384 cost
+= addr_cost
->post_modify
;
5390 case ADDRESS_REG_REG
:
5391 cost
+= addr_cost
->register_offset
;
5394 case ADDRESS_REG_UXTW
:
5395 case ADDRESS_REG_SXTW
:
5396 cost
+= addr_cost
->register_extend
;
5406 /* For the sake of calculating the cost of the shifted register
5407 component, we can treat same sized modes in the same way. */
5408 switch (GET_MODE_BITSIZE (mode
))
5411 cost
+= addr_cost
->addr_scale_costs
.hi
;
5415 cost
+= addr_cost
->addr_scale_costs
.si
;
5419 cost
+= addr_cost
->addr_scale_costs
.di
;
5422 /* We can't tell, or this is a 128-bit vector. */
5424 cost
+= addr_cost
->addr_scale_costs
.ti
;
5432 /* Return the cost of a branch. If SPEED_P is true then the compiler is
5433 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5437 aarch64_branch_cost (bool speed_p
, bool predictable_p
)
5439 /* When optimizing for speed, use the cost of unpredictable branches. */
5440 const struct cpu_branch_cost
*branch_costs
=
5441 aarch64_tune_params
->branch_costs
;
5443 if (!speed_p
|| predictable_p
)
5444 return branch_costs
->predictable
;
5446 return branch_costs
->unpredictable
;
5449 /* Return true if the RTX X in mode MODE is a zero or sign extract
5450 usable in an ADD or SUB (extended register) instruction. */
5452 aarch64_rtx_arith_op_extract_p (rtx x
, machine_mode mode
)
5454 /* Catch add with a sign extract.
5455 This is add_<optab><mode>_multp2. */
5456 if (GET_CODE (x
) == SIGN_EXTRACT
5457 || GET_CODE (x
) == ZERO_EXTRACT
)
5459 rtx op0
= XEXP (x
, 0);
5460 rtx op1
= XEXP (x
, 1);
5461 rtx op2
= XEXP (x
, 2);
5463 if (GET_CODE (op0
) == MULT
5464 && CONST_INT_P (op1
)
5465 && op2
== const0_rtx
5466 && CONST_INT_P (XEXP (op0
, 1))
5467 && aarch64_is_extend_from_extract (mode
,
5479 aarch64_frint_unspec_p (unsigned int u
)
5497 /* Return true iff X is an rtx that will match an extr instruction
5498 i.e. as described in the *extr<mode>5_insn family of patterns.
5499 OP0 and OP1 will be set to the operands of the shifts involved
5500 on success and will be NULL_RTX otherwise. */
5503 aarch64_extr_rtx_p (rtx x
, rtx
*res_op0
, rtx
*res_op1
)
5506 machine_mode mode
= GET_MODE (x
);
5508 *res_op0
= NULL_RTX
;
5509 *res_op1
= NULL_RTX
;
5511 if (GET_CODE (x
) != IOR
)
5517 if ((GET_CODE (op0
) == ASHIFT
&& GET_CODE (op1
) == LSHIFTRT
)
5518 || (GET_CODE (op1
) == ASHIFT
&& GET_CODE (op0
) == LSHIFTRT
))
5520 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5521 if (GET_CODE (op1
) == ASHIFT
)
5522 std::swap (op0
, op1
);
5524 if (!CONST_INT_P (XEXP (op0
, 1)) || !CONST_INT_P (XEXP (op1
, 1)))
5527 unsigned HOST_WIDE_INT shft_amnt_0
= UINTVAL (XEXP (op0
, 1));
5528 unsigned HOST_WIDE_INT shft_amnt_1
= UINTVAL (XEXP (op1
, 1));
5530 if (shft_amnt_0
< GET_MODE_BITSIZE (mode
)
5531 && shft_amnt_0
+ shft_amnt_1
== GET_MODE_BITSIZE (mode
))
5533 *res_op0
= XEXP (op0
, 0);
5534 *res_op1
= XEXP (op1
, 0);
5542 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5543 storing it in *COST. Result is true if the total cost of the operation
5544 has now been calculated. */
5546 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
5550 enum rtx_code cmpcode
;
5552 if (COMPARISON_P (op0
))
5554 inner
= XEXP (op0
, 0);
5555 comparator
= XEXP (op0
, 1);
5556 cmpcode
= GET_CODE (op0
);
5561 comparator
= const0_rtx
;
5565 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
5567 /* Conditional branch. */
5568 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5572 if (cmpcode
== NE
|| cmpcode
== EQ
)
5574 if (comparator
== const0_rtx
)
5576 /* TBZ/TBNZ/CBZ/CBNZ. */
5577 if (GET_CODE (inner
) == ZERO_EXTRACT
)
5579 *cost
+= rtx_cost (XEXP (inner
, 0), ZERO_EXTRACT
,
5583 *cost
+= rtx_cost (inner
, cmpcode
, 0, speed
);
5588 else if (cmpcode
== LT
|| cmpcode
== GE
)
5591 if (comparator
== const0_rtx
)
5596 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5598 /* It's a conditional operation based on the status flags,
5599 so it must be some flavor of CSEL. */
5601 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5602 if (GET_CODE (op1
) == NEG
5603 || GET_CODE (op1
) == NOT
5604 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
5605 op1
= XEXP (op1
, 0);
5607 *cost
+= rtx_cost (op1
, IF_THEN_ELSE
, 1, speed
);
5608 *cost
+= rtx_cost (op2
, IF_THEN_ELSE
, 2, speed
);
5612 /* We don't know what this is, cost all operands. */
5616 /* Calculate the cost of calculating X, storing it in *COST. Result
5617 is true if the total cost of the operation has now been calculated. */
5619 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
5620 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
5623 const struct cpu_cost_table
*extra_cost
5624 = aarch64_tune_params
->insn_extra_cost
;
5625 machine_mode mode
= GET_MODE (x
);
5627 /* By default, assume that everything has equivalent cost to the
5628 cheapest instruction. Any additional costs are applied as a delta
5629 above this default. */
5630 *cost
= COSTS_N_INSNS (1);
5635 /* The cost depends entirely on the operands to SET. */
5640 switch (GET_CODE (op0
))
5645 rtx address
= XEXP (op0
, 0);
5646 if (VECTOR_MODE_P (mode
))
5647 *cost
+= extra_cost
->ldst
.storev
;
5648 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
5649 *cost
+= extra_cost
->ldst
.store
;
5650 else if (mode
== SFmode
)
5651 *cost
+= extra_cost
->ldst
.storef
;
5652 else if (mode
== DFmode
)
5653 *cost
+= extra_cost
->ldst
.stored
;
5656 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5660 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5664 if (! REG_P (SUBREG_REG (op0
)))
5665 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
5669 /* The cost is one per vector-register copied. */
5670 if (VECTOR_MODE_P (GET_MODE (op0
)) && REG_P (op1
))
5672 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5673 / GET_MODE_SIZE (V4SImode
);
5674 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5676 /* const0_rtx is in general free, but we will use an
5677 instruction to set a register to 0. */
5678 else if (REG_P (op1
) || op1
== const0_rtx
)
5680 /* The cost is 1 per register copied. */
5681 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5683 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5686 /* Cost is just the cost of the RHS of the set. */
5687 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5692 /* Bit-field insertion. Strip any redundant widening of
5693 the RHS to meet the width of the target. */
5694 if (GET_CODE (op1
) == SUBREG
)
5695 op1
= SUBREG_REG (op1
);
5696 if ((GET_CODE (op1
) == ZERO_EXTEND
5697 || GET_CODE (op1
) == SIGN_EXTEND
)
5698 && CONST_INT_P (XEXP (op0
, 1))
5699 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5700 >= INTVAL (XEXP (op0
, 1))))
5701 op1
= XEXP (op1
, 0);
5703 if (CONST_INT_P (op1
))
5705 /* MOV immediate is assumed to always be cheap. */
5706 *cost
= COSTS_N_INSNS (1);
5712 *cost
+= extra_cost
->alu
.bfi
;
5713 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5719 /* We can't make sense of this, assume default cost. */
5720 *cost
= COSTS_N_INSNS (1);
5726 /* If an instruction can incorporate a constant within the
5727 instruction, the instruction's expression avoids calling
5728 rtx_cost() on the constant. If rtx_cost() is called on a
5729 constant, then it is usually because the constant must be
5730 moved into a register by one or more instructions.
5732 The exception is constant 0, which can be expressed
5733 as XZR/WZR and is therefore free. The exception to this is
5734 if we have (set (reg) (const0_rtx)) in which case we must cost
5735 the move. However, we can catch that when we cost the SET, so
5736 we don't need to consider that here. */
5737 if (x
== const0_rtx
)
5741 /* To an approximation, building any other constant is
5742 proportionally expensive to the number of instructions
5743 required to build that constant. This is true whether we
5744 are compiling for SPEED or otherwise. */
5745 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
5746 (NULL_RTX
, x
, false, mode
));
5753 /* mov[df,sf]_aarch64. */
5754 if (aarch64_float_const_representable_p (x
))
5755 /* FMOV (scalar immediate). */
5756 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5757 else if (!aarch64_float_const_zero_rtx_p (x
))
5759 /* This will be a load from memory. */
5761 *cost
+= extra_cost
->ldst
.loadd
;
5763 *cost
+= extra_cost
->ldst
.loadf
;
5766 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5767 or MOV v0.s[0], wzr - neither of which are modeled by the
5768 cost tables. Just use the default cost. */
5778 /* For loads we want the base cost of a load, plus an
5779 approximation for the additional cost of the addressing
5781 rtx address
= XEXP (x
, 0);
5782 if (VECTOR_MODE_P (mode
))
5783 *cost
+= extra_cost
->ldst
.loadv
;
5784 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
5785 *cost
+= extra_cost
->ldst
.load
;
5786 else if (mode
== SFmode
)
5787 *cost
+= extra_cost
->ldst
.loadf
;
5788 else if (mode
== DFmode
)
5789 *cost
+= extra_cost
->ldst
.loadd
;
5792 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5801 if (VECTOR_MODE_P (mode
))
5806 *cost
+= extra_cost
->vect
.alu
;
5811 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5813 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5814 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5817 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5821 /* Cost this as SUB wzr, X. */
5822 op0
= CONST0_RTX (GET_MODE (x
));
5827 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5829 /* Support (neg(fma...)) as a single instruction only if
5830 sign of zeros is unimportant. This matches the decision
5831 making in aarch64.md. */
5832 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5835 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5840 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5850 if (VECTOR_MODE_P (mode
))
5851 *cost
+= extra_cost
->vect
.alu
;
5853 *cost
+= extra_cost
->alu
.clz
;
5862 if (op1
== const0_rtx
5863 && GET_CODE (op0
) == AND
)
5869 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
5871 /* TODO: A write to the CC flags possibly costs extra, this
5872 needs encoding in the cost tables. */
5874 /* CC_ZESWPmode supports zero extend for free. */
5875 if (GET_MODE (x
) == CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
5876 op0
= XEXP (op0
, 0);
5879 if (GET_CODE (op0
) == AND
)
5885 if (GET_CODE (op0
) == PLUS
)
5887 /* ADDS (and CMN alias). */
5892 if (GET_CODE (op0
) == MINUS
)
5899 if (GET_CODE (op1
) == NEG
)
5903 *cost
+= extra_cost
->alu
.arith
;
5905 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5906 *cost
+= rtx_cost (XEXP (op1
, 0), NEG
, 1, speed
);
5912 Compare can freely swap the order of operands, and
5913 canonicalization puts the more complex operation first.
5914 But the integer MINUS logic expects the shift/extend
5915 operation in op1. */
5917 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5925 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5929 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
5931 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
5933 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5934 /* FCMP supports constant 0.0 for no extra cost. */
5940 if (VECTOR_MODE_P (mode
))
5942 /* Vector compare. */
5944 *cost
+= extra_cost
->vect
.alu
;
5946 if (aarch64_float_const_zero_rtx_p (op1
))
5948 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
5962 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5964 /* Detect valid immediates. */
5965 if ((GET_MODE_CLASS (mode
) == MODE_INT
5966 || (GET_MODE_CLASS (mode
) == MODE_CC
5967 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5968 && CONST_INT_P (op1
)
5969 && aarch64_uimm12_shift (INTVAL (op1
)))
5972 /* SUB(S) (immediate). */
5973 *cost
+= extra_cost
->alu
.arith
;
5977 /* Look for SUB (extended register). */
5978 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
5981 *cost
+= extra_cost
->alu
.extend_arith
;
5983 *cost
+= rtx_cost (XEXP (XEXP (op1
, 0), 0),
5984 (enum rtx_code
) GET_CODE (op1
),
5989 rtx new_op1
= aarch64_strip_extend (op1
);
5991 /* Cost this as an FMA-alike operation. */
5992 if ((GET_CODE (new_op1
) == MULT
5993 || aarch64_shift_p (GET_CODE (new_op1
)))
5996 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5997 (enum rtx_code
) code
,
6002 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
6006 if (VECTOR_MODE_P (mode
))
6009 *cost
+= extra_cost
->vect
.alu
;
6011 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6014 *cost
+= extra_cost
->alu
.arith
;
6016 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6019 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6033 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
6034 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
6037 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
6038 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
6042 if (GET_MODE_CLASS (mode
) == MODE_INT
6043 && CONST_INT_P (op1
)
6044 && aarch64_uimm12_shift (INTVAL (op1
)))
6046 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
6049 /* ADD (immediate). */
6050 *cost
+= extra_cost
->alu
.arith
;
6054 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
6056 /* Look for ADD (extended register). */
6057 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
6060 *cost
+= extra_cost
->alu
.extend_arith
;
6062 *cost
+= rtx_cost (XEXP (XEXP (op0
, 0), 0),
6063 (enum rtx_code
) GET_CODE (op0
),
6068 /* Strip any extend, leave shifts behind as we will
6069 cost them through mult_cost. */
6070 new_op0
= aarch64_strip_extend (op0
);
6072 if (GET_CODE (new_op0
) == MULT
6073 || aarch64_shift_p (GET_CODE (new_op0
)))
6075 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
6080 *cost
+= rtx_cost (new_op0
, PLUS
, 0, speed
);
6084 if (VECTOR_MODE_P (mode
))
6087 *cost
+= extra_cost
->vect
.alu
;
6089 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6092 *cost
+= extra_cost
->alu
.arith
;
6094 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6097 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6104 *cost
= COSTS_N_INSNS (1);
6108 if (VECTOR_MODE_P (mode
))
6109 *cost
+= extra_cost
->vect
.alu
;
6111 *cost
+= extra_cost
->alu
.rev
;
6116 if (aarch_rev16_p (x
))
6118 *cost
= COSTS_N_INSNS (1);
6122 if (VECTOR_MODE_P (mode
))
6123 *cost
+= extra_cost
->vect
.alu
;
6125 *cost
+= extra_cost
->alu
.rev
;
6130 if (aarch64_extr_rtx_p (x
, &op0
, &op1
))
6132 *cost
+= rtx_cost (op0
, IOR
, 0, speed
)
6133 + rtx_cost (op1
, IOR
, 1, speed
);
6135 *cost
+= extra_cost
->alu
.shift
;
6146 if (VECTOR_MODE_P (mode
))
6149 *cost
+= extra_cost
->vect
.alu
;
6154 && GET_CODE (op0
) == MULT
6155 && CONST_INT_P (XEXP (op0
, 1))
6156 && CONST_INT_P (op1
)
6157 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
6160 /* This is a UBFM/SBFM. */
6161 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
6163 *cost
+= extra_cost
->alu
.bfx
;
6167 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6169 /* We possibly get the immediate for free, this is not
6171 if (CONST_INT_P (op1
)
6172 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
6174 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6177 *cost
+= extra_cost
->alu
.logical
;
6185 /* Handle ORN, EON, or BIC. */
6186 if (GET_CODE (op0
) == NOT
)
6187 op0
= XEXP (op0
, 0);
6189 new_op0
= aarch64_strip_shift (op0
);
6191 /* If we had a shift on op0 then this is a logical-shift-
6192 by-register/immediate operation. Otherwise, this is just
6193 a logical operation. */
6198 /* Shift by immediate. */
6199 if (CONST_INT_P (XEXP (op0
, 1)))
6200 *cost
+= extra_cost
->alu
.log_shift
;
6202 *cost
+= extra_cost
->alu
.log_shift_reg
;
6205 *cost
+= extra_cost
->alu
.logical
;
6208 /* In both cases we want to cost both operands. */
6209 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
6210 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
6219 op0
= aarch64_strip_shift (x
);
6221 if (VECTOR_MODE_P (mode
))
6224 *cost
+= extra_cost
->vect
.alu
;
6228 /* MVN-shifted-reg. */
6231 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6234 *cost
+= extra_cost
->alu
.log_shift
;
6238 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6239 Handle the second form here taking care that 'a' in the above can
6241 else if (GET_CODE (op0
) == XOR
)
6243 rtx newop0
= XEXP (op0
, 0);
6244 rtx newop1
= XEXP (op0
, 1);
6245 rtx op0_stripped
= aarch64_strip_shift (newop0
);
6247 *cost
+= rtx_cost (newop1
, (enum rtx_code
) code
, 1, speed
)
6248 + rtx_cost (op0_stripped
, XOR
, 0, speed
);
6252 if (op0_stripped
!= newop0
)
6253 *cost
+= extra_cost
->alu
.log_shift
;
6255 *cost
+= extra_cost
->alu
.logical
;
6262 *cost
+= extra_cost
->alu
.logical
;
6269 /* If a value is written in SI mode, then zero extended to DI
6270 mode, the operation will in general be free as a write to
6271 a 'w' register implicitly zeroes the upper bits of an 'x'
6272 register. However, if this is
6274 (set (reg) (zero_extend (reg)))
6276 we must cost the explicit register move. */
6278 && GET_MODE (op0
) == SImode
6281 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
6283 if (!op_cost
&& speed
)
6285 *cost
+= extra_cost
->alu
.extend
;
6287 /* Free, the cost is that of the SI mode operation. */
6292 else if (MEM_P (XEXP (x
, 0)))
6294 /* All loads can zero extend to any size for free. */
6295 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
6301 if (VECTOR_MODE_P (mode
))
6304 *cost
+= extra_cost
->vect
.alu
;
6309 *cost
+= extra_cost
->alu
.extend
;
6315 if (MEM_P (XEXP (x
, 0)))
6320 rtx address
= XEXP (XEXP (x
, 0), 0);
6321 *cost
+= extra_cost
->ldst
.load_sign_extend
;
6324 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6332 if (VECTOR_MODE_P (mode
))
6333 *cost
+= extra_cost
->vect
.alu
;
6335 *cost
+= extra_cost
->alu
.extend
;
6343 if (CONST_INT_P (op1
))
6347 if (VECTOR_MODE_P (mode
))
6349 /* Vector shift (immediate). */
6350 *cost
+= extra_cost
->vect
.alu
;
6354 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6356 *cost
+= extra_cost
->alu
.shift
;
6360 /* We can incorporate zero/sign extend for free. */
6361 if (GET_CODE (op0
) == ZERO_EXTEND
6362 || GET_CODE (op0
) == SIGN_EXTEND
)
6363 op0
= XEXP (op0
, 0);
6365 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
6372 if (VECTOR_MODE_P (mode
))
6374 /* Vector shift (register). */
6375 *cost
+= extra_cost
->vect
.alu
;
6380 *cost
+= extra_cost
->alu
.shift_reg
;
6383 return false; /* All arguments need to be in registers. */
6393 if (CONST_INT_P (op1
))
6395 /* ASR (immediate) and friends. */
6398 if (VECTOR_MODE_P (mode
))
6399 *cost
+= extra_cost
->vect
.alu
;
6401 *cost
+= extra_cost
->alu
.shift
;
6404 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6410 /* ASR (register) and friends. */
6413 if (VECTOR_MODE_P (mode
))
6414 *cost
+= extra_cost
->vect
.alu
;
6416 *cost
+= extra_cost
->alu
.shift_reg
;
6418 return false; /* All arguments need to be in registers. */
6423 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6427 *cost
+= extra_cost
->ldst
.load
;
6429 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
6430 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
6432 /* ADRP, followed by ADD. */
6433 *cost
+= COSTS_N_INSNS (1);
6435 *cost
+= 2 * extra_cost
->alu
.arith
;
6437 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
6438 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
6442 *cost
+= extra_cost
->alu
.arith
;
6447 /* One extra load instruction, after accessing the GOT. */
6448 *cost
+= COSTS_N_INSNS (1);
6450 *cost
+= extra_cost
->ldst
.load
;
6456 /* ADRP/ADD (immediate). */
6458 *cost
+= extra_cost
->alu
.arith
;
6466 if (VECTOR_MODE_P (mode
))
6467 *cost
+= extra_cost
->vect
.alu
;
6469 *cost
+= extra_cost
->alu
.bfx
;
6472 /* We can trust that the immediates used will be correct (there
6473 are no by-register forms), so we need only cost op0. */
6474 *cost
+= rtx_cost (XEXP (x
, 0), (enum rtx_code
) code
, 0, speed
);
6478 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
6479 /* aarch64_rtx_mult_cost always handles recursion to its
6487 if (VECTOR_MODE_P (mode
))
6488 *cost
+= extra_cost
->vect
.alu
;
6489 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6490 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
6491 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
6492 else if (GET_MODE (x
) == DFmode
)
6493 *cost
+= (extra_cost
->fp
[1].mult
6494 + extra_cost
->fp
[1].div
);
6495 else if (GET_MODE (x
) == SFmode
)
6496 *cost
+= (extra_cost
->fp
[0].mult
6497 + extra_cost
->fp
[0].div
);
6499 return false; /* All arguments need to be in registers. */
6506 if (VECTOR_MODE_P (mode
))
6507 *cost
+= extra_cost
->vect
.alu
;
6508 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6509 /* There is no integer SQRT, so only DIV and UDIV can get
6511 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
6513 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
6515 return false; /* All arguments need to be in registers. */
6518 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
6519 XEXP (x
, 2), cost
, speed
);
6532 return false; /* All arguments must be in registers. */
6541 if (VECTOR_MODE_P (mode
))
6542 *cost
+= extra_cost
->vect
.alu
;
6544 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6547 /* FMSUB, FNMADD, and FNMSUB are free. */
6548 if (GET_CODE (op0
) == NEG
)
6549 op0
= XEXP (op0
, 0);
6551 if (GET_CODE (op2
) == NEG
)
6552 op2
= XEXP (op2
, 0);
6554 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6555 and the by-element operand as operand 0. */
6556 if (GET_CODE (op1
) == NEG
)
6557 op1
= XEXP (op1
, 0);
6559 /* Catch vector-by-element operations. The by-element operand can
6560 either be (vec_duplicate (vec_select (x))) or just
6561 (vec_select (x)), depending on whether we are multiplying by
6562 a vector or a scalar.
6564 Canonicalization is not very good in these cases, FMA4 will put the
6565 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6566 if (GET_CODE (op0
) == VEC_DUPLICATE
)
6567 op0
= XEXP (op0
, 0);
6568 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
6569 op1
= XEXP (op1
, 0);
6571 if (GET_CODE (op0
) == VEC_SELECT
)
6572 op0
= XEXP (op0
, 0);
6573 else if (GET_CODE (op1
) == VEC_SELECT
)
6574 op1
= XEXP (op1
, 0);
6576 /* If the remaining parameters are not registers,
6577 get the cost to put them into registers. */
6578 *cost
+= rtx_cost (op0
, FMA
, 0, speed
);
6579 *cost
+= rtx_cost (op1
, FMA
, 1, speed
);
6580 *cost
+= rtx_cost (op2
, FMA
, 2, speed
);
6584 case UNSIGNED_FLOAT
:
6586 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
6592 if (VECTOR_MODE_P (mode
))
6594 /*Vector truncate. */
6595 *cost
+= extra_cost
->vect
.alu
;
6598 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
6602 case FLOAT_TRUNCATE
:
6605 if (VECTOR_MODE_P (mode
))
6607 /*Vector conversion. */
6608 *cost
+= extra_cost
->vect
.alu
;
6611 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
6618 /* Strip the rounding part. They will all be implemented
6619 by the fcvt* family of instructions anyway. */
6620 if (GET_CODE (x
) == UNSPEC
)
6622 unsigned int uns_code
= XINT (x
, 1);
6624 if (uns_code
== UNSPEC_FRINTA
6625 || uns_code
== UNSPEC_FRINTM
6626 || uns_code
== UNSPEC_FRINTN
6627 || uns_code
== UNSPEC_FRINTP
6628 || uns_code
== UNSPEC_FRINTZ
)
6629 x
= XVECEXP (x
, 0, 0);
6634 if (VECTOR_MODE_P (mode
))
6635 *cost
+= extra_cost
->vect
.alu
;
6637 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
6639 *cost
+= rtx_cost (x
, (enum rtx_code
) code
, 0, speed
);
6643 if (VECTOR_MODE_P (mode
))
6647 *cost
+= extra_cost
->vect
.alu
;
6649 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6653 /* FABD, which is analogous to FADD. */
6654 if (GET_CODE (op0
) == MINUS
)
6656 *cost
+= rtx_cost (XEXP (op0
, 0), MINUS
, 0, speed
);
6657 + rtx_cost (XEXP (op0
, 1), MINUS
, 1, speed
);
6659 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6663 /* Simple FABS is analogous to FNEG. */
6665 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6669 /* Integer ABS will either be split to
6670 two arithmetic instructions, or will be an ABS
6671 (scalar), which we don't model. */
6672 *cost
= COSTS_N_INSNS (2);
6674 *cost
+= 2 * extra_cost
->alu
.arith
;
6682 if (VECTOR_MODE_P (mode
))
6683 *cost
+= extra_cost
->vect
.alu
;
6686 /* FMAXNM/FMINNM/FMAX/FMIN.
6687 TODO: This may not be accurate for all implementations, but
6688 we do not model this in the cost tables. */
6689 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6695 /* The floating point round to integer frint* instructions. */
6696 if (aarch64_frint_unspec_p (XINT (x
, 1)))
6699 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
6704 if (XINT (x
, 1) == UNSPEC_RBIT
)
6707 *cost
+= extra_cost
->alu
.rev
;
6715 /* Decompose <su>muldi3_highpart. */
6716 if (/* (truncate:DI */
6719 && GET_MODE (XEXP (x
, 0)) == TImode
6720 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6722 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6723 /* (ANY_EXTEND:TI (reg:DI))
6724 (ANY_EXTEND:TI (reg:DI))) */
6725 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6726 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
6727 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
6728 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
6729 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
6730 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
6731 /* (const_int 64) */
6732 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
6733 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
6737 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
6738 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
6740 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
6750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6752 "\nFailed to cost RTX. Assuming default cost.\n");
6757 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6758 calculated for X. This cost is stored in *COST. Returns true
6759 if the total cost of X was calculated. */
6761 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
6762 int param
, int *cost
, bool speed
)
6764 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
6766 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6768 print_rtl_single (dump_file
, x
);
6769 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
6770 speed
? "Hot" : "Cold",
6771 *cost
, result
? "final" : "partial");
6778 aarch64_register_move_cost (machine_mode mode
,
6779 reg_class_t from_i
, reg_class_t to_i
)
6781 enum reg_class from
= (enum reg_class
) from_i
;
6782 enum reg_class to
= (enum reg_class
) to_i
;
6783 const struct cpu_regmove_cost
*regmove_cost
6784 = aarch64_tune_params
->regmove_cost
;
6786 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6787 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
6790 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
6791 from
= GENERAL_REGS
;
6793 /* Moving between GPR and stack cost is the same as GP2GP. */
6794 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
6795 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
6796 return regmove_cost
->GP2GP
;
6798 /* To/From the stack register, we move via the gprs. */
6799 if (to
== STACK_REG
|| from
== STACK_REG
)
6800 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
6801 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
6803 if (GET_MODE_SIZE (mode
) == 16)
6805 /* 128-bit operations on general registers require 2 instructions. */
6806 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6807 return regmove_cost
->GP2GP
* 2;
6808 else if (from
== GENERAL_REGS
)
6809 return regmove_cost
->GP2FP
* 2;
6810 else if (to
== GENERAL_REGS
)
6811 return regmove_cost
->FP2GP
* 2;
6813 /* When AdvSIMD instructions are disabled it is not possible to move
6814 a 128-bit value directly between Q registers. This is handled in
6815 secondary reload. A general register is used as a scratch to move
6816 the upper DI value and the lower DI value is moved directly,
6817 hence the cost is the sum of three moves. */
6819 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
6821 return regmove_cost
->FP2FP
;
6824 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6825 return regmove_cost
->GP2GP
;
6826 else if (from
== GENERAL_REGS
)
6827 return regmove_cost
->GP2FP
;
6828 else if (to
== GENERAL_REGS
)
6829 return regmove_cost
->FP2GP
;
6831 return regmove_cost
->FP2FP
;
6835 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
6836 reg_class_t rclass ATTRIBUTE_UNUSED
,
6837 bool in ATTRIBUTE_UNUSED
)
6839 return aarch64_tune_params
->memmov_cost
;
6842 /* Return the number of instructions that can be issued per cycle. */
6844 aarch64_sched_issue_rate (void)
6846 return aarch64_tune_params
->issue_rate
;
6850 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6852 int issue_rate
= aarch64_sched_issue_rate ();
6854 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
6857 /* Vectorizer cost model target hooks. */
6859 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6861 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6863 int misalign ATTRIBUTE_UNUSED
)
6867 switch (type_of_cost
)
6870 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
6873 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
6876 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
6879 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6882 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
6885 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
6888 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
6891 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
6893 case unaligned_load
:
6894 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
6896 case unaligned_store
:
6897 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
6899 case cond_branch_taken
:
6900 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
6902 case cond_branch_not_taken
:
6903 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
6906 case vec_promote_demote
:
6907 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6910 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6911 return elements
/ 2 + 1;
6918 /* Implement targetm.vectorize.add_stmt_cost. */
6920 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6921 struct _stmt_vec_info
*stmt_info
, int misalign
,
6922 enum vect_cost_model_location where
)
6924 unsigned *cost
= (unsigned *) data
;
6925 unsigned retval
= 0;
6927 if (flag_vect_cost_model
)
6929 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6931 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
6933 /* Statements in an inner loop relative to the loop being
6934 vectorized are weighted more heavily. The value here is
6935 a function (linear for now) of the loop nest level. */
6936 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6938 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6939 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
6940 unsigned nest_level
= loop_depth (loop
);
6942 count
*= nest_level
;
6945 retval
= (unsigned) (count
* stmt_cost
);
6946 cost
[where
] += retval
;
6952 static void initialize_aarch64_code_model (void);
6954 /* Parse the architecture extension string. */
6957 aarch64_parse_extension (char *str
)
6959 /* The extension string is parsed left to right. */
6960 const struct aarch64_option_extension
*opt
= NULL
;
6962 /* Flag to say whether we are adding or removing an extension. */
6963 int adding_ext
= -1;
6965 while (str
!= NULL
&& *str
!= 0)
6971 ext
= strchr (str
, '+');
6978 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
6989 error ("missing feature modifier after %qs", adding_ext
? "+"
6994 /* Scan over the extensions table trying to find an exact match. */
6995 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
6997 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
6999 /* Add or remove the extension. */
7001 aarch64_isa_flags
|= opt
->flags_on
;
7003 aarch64_isa_flags
&= ~(opt
->flags_off
);
7008 if (opt
->name
== NULL
)
7010 /* Extension not found in list. */
7011 error ("unknown feature modifier %qs", str
);
7021 /* Parse the ARCH string. */
7024 aarch64_parse_arch (void)
7027 const struct processor
*arch
;
7028 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
7031 strcpy (str
, aarch64_arch_string
);
7033 ext
= strchr (str
, '+');
7042 error ("missing arch name in -march=%qs", str
);
7046 /* Loop through the list of supported ARCHs to find a match. */
7047 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
7049 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
7051 selected_arch
= arch
;
7052 aarch64_isa_flags
= selected_arch
->flags
;
7055 selected_cpu
= &all_cores
[selected_arch
->core
];
7059 /* ARCH string contains at least one extension. */
7060 aarch64_parse_extension (ext
);
7063 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
7065 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
7066 selected_cpu
->name
, selected_arch
->name
);
7073 /* ARCH name not found in list. */
7074 error ("unknown value %qs for -march", str
);
7078 /* Parse the CPU string. */
7081 aarch64_parse_cpu (void)
7084 const struct processor
*cpu
;
7085 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
7088 strcpy (str
, aarch64_cpu_string
);
7090 ext
= strchr (str
, '+');
7099 error ("missing cpu name in -mcpu=%qs", str
);
7103 /* Loop through the list of supported CPUs to find a match. */
7104 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
7106 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
7109 aarch64_isa_flags
= selected_cpu
->flags
;
7113 /* CPU string contains at least one extension. */
7114 aarch64_parse_extension (ext
);
7121 /* CPU name not found in list. */
7122 error ("unknown value %qs for -mcpu", str
);
7126 /* Parse the TUNE string. */
7129 aarch64_parse_tune (void)
7131 const struct processor
*cpu
;
7132 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
7133 strcpy (str
, aarch64_tune_string
);
7135 /* Loop through the list of supported CPUs to find a match. */
7136 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
7138 if (strcmp (cpu
->name
, str
) == 0)
7140 selected_tune
= cpu
;
7145 /* CPU name not found in list. */
7146 error ("unknown value %qs for -mtune", str
);
7151 /* Implement TARGET_OPTION_OVERRIDE. */
7154 aarch64_override_options (void)
7156 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
7157 If either of -march or -mtune is given, they override their
7158 respective component of -mcpu.
7160 So, first parse AARCH64_CPU_STRING, then the others, be careful
7161 with -march as, if -mcpu is not present on the command line, march
7162 must set a sensible default CPU. */
7163 if (aarch64_cpu_string
)
7165 aarch64_parse_cpu ();
7168 if (aarch64_arch_string
)
7170 aarch64_parse_arch ();
7173 if (aarch64_tune_string
)
7175 aarch64_parse_tune ();
7178 #ifndef HAVE_AS_MABI_OPTION
7179 /* The compiler may have been configured with 2.23.* binutils, which does
7180 not have support for ILP32. */
7182 error ("Assembler does not support -mabi=ilp32");
7185 initialize_aarch64_code_model ();
7187 aarch64_build_bitmask_table ();
7189 /* This target defaults to strict volatile bitfields. */
7190 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
7191 flag_strict_volatile_bitfields
= 1;
7193 /* If the user did not specify a processor, choose the default
7194 one for them. This will be the CPU set during configuration using
7195 --with-cpu, otherwise it is "generic". */
7198 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
7199 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
7202 gcc_assert (selected_cpu
);
7205 selected_tune
= selected_cpu
;
7207 aarch64_tune_flags
= selected_tune
->flags
;
7208 aarch64_tune
= selected_tune
->core
;
7209 aarch64_tune_params
= selected_tune
->tune
;
7210 aarch64_architecture_version
= selected_cpu
->architecture_version
;
7212 if (aarch64_fix_a53_err835769
== 2)
7214 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
7215 aarch64_fix_a53_err835769
= 1;
7217 aarch64_fix_a53_err835769
= 0;
7221 aarch64_register_fma_steering ();
7223 aarch64_override_options_after_change ();
7226 /* Implement targetm.override_options_after_change. */
7229 aarch64_override_options_after_change (void)
7231 if (flag_omit_frame_pointer
)
7232 flag_omit_leaf_frame_pointer
= false;
7233 else if (flag_omit_leaf_frame_pointer
)
7234 flag_omit_frame_pointer
= true;
7236 /* If not optimizing for size, set the default
7237 alignment to what the target wants */
7240 if (align_loops
<= 0)
7241 align_loops
= aarch64_tune_params
->loop_align
;
7242 if (align_jumps
<= 0)
7243 align_jumps
= aarch64_tune_params
->jump_align
;
7244 if (align_functions
<= 0)
7245 align_functions
= aarch64_tune_params
->function_align
;
7249 static struct machine_function
*
7250 aarch64_init_machine_status (void)
7252 struct machine_function
*machine
;
7253 machine
= ggc_cleared_alloc
<machine_function
> ();
7258 aarch64_init_expanders (void)
7260 init_machine_status
= aarch64_init_machine_status
;
7263 /* A checking mechanism for the implementation of the various code models. */
7265 initialize_aarch64_code_model (void)
7269 switch (aarch64_cmodel_var
)
7271 case AARCH64_CMODEL_TINY
:
7272 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
7274 case AARCH64_CMODEL_SMALL
:
7275 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
7277 case AARCH64_CMODEL_LARGE
:
7278 sorry ("code model %qs with -f%s", "large",
7279 flag_pic
> 1 ? "PIC" : "pic");
7285 aarch64_cmodel
= aarch64_cmodel_var
;
7288 /* Return true if SYMBOL_REF X binds locally. */
7291 aarch64_symbol_binds_local_p (const_rtx x
)
7293 return (SYMBOL_REF_DECL (x
)
7294 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
7295 : SYMBOL_REF_LOCAL_P (x
));
7298 /* Return true if SYMBOL_REF X is thread local */
7300 aarch64_tls_symbol_p (rtx x
)
7302 if (! TARGET_HAVE_TLS
)
7305 if (GET_CODE (x
) != SYMBOL_REF
)
7308 return SYMBOL_REF_TLS_MODEL (x
) != 0;
7311 /* Classify a TLS symbol into one of the TLS kinds. */
7312 enum aarch64_symbol_type
7313 aarch64_classify_tls_symbol (rtx x
)
7315 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
7319 case TLS_MODEL_GLOBAL_DYNAMIC
:
7320 case TLS_MODEL_LOCAL_DYNAMIC
:
7321 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
7323 case TLS_MODEL_INITIAL_EXEC
:
7324 return SYMBOL_SMALL_GOTTPREL
;
7326 case TLS_MODEL_LOCAL_EXEC
:
7327 return SYMBOL_SMALL_TPREL
;
7329 case TLS_MODEL_EMULATED
:
7330 case TLS_MODEL_NONE
:
7331 return SYMBOL_FORCE_TO_MEM
;
7338 /* Return the method that should be used to access SYMBOL_REF or
7339 LABEL_REF X in context CONTEXT. */
7341 enum aarch64_symbol_type
7342 aarch64_classify_symbol (rtx x
, rtx offset
,
7343 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
7345 if (GET_CODE (x
) == LABEL_REF
)
7347 switch (aarch64_cmodel
)
7349 case AARCH64_CMODEL_LARGE
:
7350 return SYMBOL_FORCE_TO_MEM
;
7352 case AARCH64_CMODEL_TINY_PIC
:
7353 case AARCH64_CMODEL_TINY
:
7354 return SYMBOL_TINY_ABSOLUTE
;
7356 case AARCH64_CMODEL_SMALL_PIC
:
7357 case AARCH64_CMODEL_SMALL
:
7358 return SYMBOL_SMALL_ABSOLUTE
;
7365 if (GET_CODE (x
) == SYMBOL_REF
)
7367 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
7368 return SYMBOL_FORCE_TO_MEM
;
7370 if (aarch64_tls_symbol_p (x
))
7371 return aarch64_classify_tls_symbol (x
);
7373 switch (aarch64_cmodel
)
7375 case AARCH64_CMODEL_TINY
:
7376 /* When we retreive symbol + offset address, we have to make sure
7377 the offset does not cause overflow of the final address. But
7378 we have no way of knowing the address of symbol at compile time
7379 so we can't accurately say if the distance between the PC and
7380 symbol + offset is outside the addressible range of +/-1M in the
7381 TINY code model. So we rely on images not being greater than
7382 1M and cap the offset at 1M and anything beyond 1M will have to
7383 be loaded using an alternative mechanism. */
7384 if (SYMBOL_REF_WEAK (x
)
7385 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
7386 return SYMBOL_FORCE_TO_MEM
;
7387 return SYMBOL_TINY_ABSOLUTE
;
7389 case AARCH64_CMODEL_SMALL
:
7390 /* Same reasoning as the tiny code model, but the offset cap here is
7392 if (SYMBOL_REF_WEAK (x
)
7393 || !IN_RANGE (INTVAL (offset
), HOST_WIDE_INT_C (-4294967263),
7394 HOST_WIDE_INT_C (4294967264)))
7395 return SYMBOL_FORCE_TO_MEM
;
7396 return SYMBOL_SMALL_ABSOLUTE
;
7398 case AARCH64_CMODEL_TINY_PIC
:
7399 if (!aarch64_symbol_binds_local_p (x
))
7400 return SYMBOL_TINY_GOT
;
7401 return SYMBOL_TINY_ABSOLUTE
;
7403 case AARCH64_CMODEL_SMALL_PIC
:
7404 if (!aarch64_symbol_binds_local_p (x
))
7405 return SYMBOL_SMALL_GOT
;
7406 return SYMBOL_SMALL_ABSOLUTE
;
7413 /* By default push everything into the constant pool. */
7414 return SYMBOL_FORCE_TO_MEM
;
7418 aarch64_constant_address_p (rtx x
)
7420 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
7424 aarch64_legitimate_pic_operand_p (rtx x
)
7426 if (GET_CODE (x
) == SYMBOL_REF
7427 || (GET_CODE (x
) == CONST
7428 && GET_CODE (XEXP (x
, 0)) == PLUS
7429 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7435 /* Return true if X holds either a quarter-precision or
7436 floating-point +0.0 constant. */
7438 aarch64_valid_floating_const (machine_mode mode
, rtx x
)
7440 if (!CONST_DOUBLE_P (x
))
7443 if (aarch64_float_const_zero_rtx_p (x
))
7446 /* We only handle moving 0.0 to a TFmode register. */
7447 if (!(mode
== SFmode
|| mode
== DFmode
))
7450 return aarch64_float_const_representable_p (x
);
7454 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
7456 /* Do not allow vector struct mode constants. We could support
7457 0 and -1 easily, but they need support in aarch64-simd.md. */
7458 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
7461 /* This could probably go away because
7462 we now decompose CONST_INTs according to expand_mov_immediate. */
7463 if ((GET_CODE (x
) == CONST_VECTOR
7464 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
7465 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
7466 return !targetm
.cannot_force_const_mem (mode
, x
);
7468 if (GET_CODE (x
) == HIGH
7469 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7472 return aarch64_constant_address_p (x
);
7476 aarch64_load_tp (rtx target
)
7479 || GET_MODE (target
) != Pmode
7480 || !register_operand (target
, Pmode
))
7481 target
= gen_reg_rtx (Pmode
);
7483 /* Can return in any reg. */
7484 emit_insn (gen_aarch64_load_tp_hard (target
));
7488 /* On AAPCS systems, this is the "struct __va_list". */
7489 static GTY(()) tree va_list_type
;
7491 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7492 Return the type to use as __builtin_va_list.
7494 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7506 aarch64_build_builtin_va_list (void)
7509 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7511 /* Create the type. */
7512 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7513 /* Give it the required name. */
7514 va_list_name
= build_decl (BUILTINS_LOCATION
,
7516 get_identifier ("__va_list"),
7518 DECL_ARTIFICIAL (va_list_name
) = 1;
7519 TYPE_NAME (va_list_type
) = va_list_name
;
7520 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
7522 /* Create the fields. */
7523 f_stack
= build_decl (BUILTINS_LOCATION
,
7524 FIELD_DECL
, get_identifier ("__stack"),
7526 f_grtop
= build_decl (BUILTINS_LOCATION
,
7527 FIELD_DECL
, get_identifier ("__gr_top"),
7529 f_vrtop
= build_decl (BUILTINS_LOCATION
,
7530 FIELD_DECL
, get_identifier ("__vr_top"),
7532 f_groff
= build_decl (BUILTINS_LOCATION
,
7533 FIELD_DECL
, get_identifier ("__gr_offs"),
7535 f_vroff
= build_decl (BUILTINS_LOCATION
,
7536 FIELD_DECL
, get_identifier ("__vr_offs"),
7539 DECL_ARTIFICIAL (f_stack
) = 1;
7540 DECL_ARTIFICIAL (f_grtop
) = 1;
7541 DECL_ARTIFICIAL (f_vrtop
) = 1;
7542 DECL_ARTIFICIAL (f_groff
) = 1;
7543 DECL_ARTIFICIAL (f_vroff
) = 1;
7545 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
7546 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
7547 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
7548 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
7549 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
7551 TYPE_FIELDS (va_list_type
) = f_stack
;
7552 DECL_CHAIN (f_stack
) = f_grtop
;
7553 DECL_CHAIN (f_grtop
) = f_vrtop
;
7554 DECL_CHAIN (f_vrtop
) = f_groff
;
7555 DECL_CHAIN (f_groff
) = f_vroff
;
7557 /* Compute its layout. */
7558 layout_type (va_list_type
);
7560 return va_list_type
;
7563 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7565 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
7567 const CUMULATIVE_ARGS
*cum
;
7568 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7569 tree stack
, grtop
, vrtop
, groff
, vroff
;
7571 int gr_save_area_size
;
7572 int vr_save_area_size
;
7575 cum
= &crtl
->args
.info
;
7577 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
7579 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
7583 gcc_assert (cum
->aapcs_nvrn
== 0);
7584 vr_save_area_size
= 0;
7587 f_stack
= TYPE_FIELDS (va_list_type_node
);
7588 f_grtop
= DECL_CHAIN (f_stack
);
7589 f_vrtop
= DECL_CHAIN (f_grtop
);
7590 f_groff
= DECL_CHAIN (f_vrtop
);
7591 f_vroff
= DECL_CHAIN (f_groff
);
7593 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
7595 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
7597 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
7599 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
7601 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
7604 /* Emit code to initialize STACK, which points to the next varargs stack
7605 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7606 by named arguments. STACK is 8-byte aligned. */
7607 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
7608 if (cum
->aapcs_stack_size
> 0)
7609 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
7610 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
7611 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7613 /* Emit code to initialize GRTOP, the top of the GR save area.
7614 virtual_incoming_args_rtx should have been 16 byte aligned. */
7615 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
7616 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
7617 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7619 /* Emit code to initialize VRTOP, the top of the VR save area.
7620 This address is gr_save_area_bytes below GRTOP, rounded
7621 down to the next 16-byte boundary. */
7622 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
7623 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
7624 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7627 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
7628 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
7629 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7631 /* Emit code to initialize GROFF, the offset from GRTOP of the
7632 next GPR argument. */
7633 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
7634 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
7635 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7637 /* Likewise emit code to initialize VROFF, the offset from FTOP
7638 of the next VR argument. */
7639 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
7640 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
7641 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7644 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7647 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
7648 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
7652 bool is_ha
; /* is HFA or HVA. */
7653 bool dw_align
; /* double-word align. */
7654 machine_mode ag_mode
= VOIDmode
;
7658 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7659 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
7660 HOST_WIDE_INT size
, rsize
, adjust
, align
;
7661 tree t
, u
, cond1
, cond2
;
7663 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7665 type
= build_pointer_type (type
);
7667 mode
= TYPE_MODE (type
);
7669 f_stack
= TYPE_FIELDS (va_list_type_node
);
7670 f_grtop
= DECL_CHAIN (f_stack
);
7671 f_vrtop
= DECL_CHAIN (f_grtop
);
7672 f_groff
= DECL_CHAIN (f_vrtop
);
7673 f_vroff
= DECL_CHAIN (f_groff
);
7675 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
7676 f_stack
, NULL_TREE
);
7677 size
= int_size_in_bytes (type
);
7678 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
7682 if (aarch64_vfp_is_call_or_return_candidate (mode
,
7688 /* TYPE passed in fp/simd registers. */
7690 aarch64_err_no_fpadvsimd (mode
, "varargs");
7692 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
7693 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
7694 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
7695 unshare_expr (valist
), f_vroff
, NULL_TREE
);
7697 rsize
= nregs
* UNITS_PER_VREG
;
7701 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
7702 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
7704 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7705 && size
< UNITS_PER_VREG
)
7707 adjust
= UNITS_PER_VREG
- size
;
7712 /* TYPE passed in general registers. */
7713 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
7714 unshare_expr (valist
), f_grtop
, NULL_TREE
);
7715 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
7716 unshare_expr (valist
), f_groff
, NULL_TREE
);
7717 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
7718 nregs
= rsize
/ UNITS_PER_WORD
;
7723 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7724 && size
< UNITS_PER_WORD
)
7726 adjust
= UNITS_PER_WORD
- size
;
7730 /* Get a local temporary for the field value. */
7731 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
7733 /* Emit code to branch if off >= 0. */
7734 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
7735 build_int_cst (TREE_TYPE (off
), 0));
7736 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
7740 /* Emit: offs = (offs + 15) & -16. */
7741 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7742 build_int_cst (TREE_TYPE (off
), 15));
7743 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
7744 build_int_cst (TREE_TYPE (off
), -16));
7745 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
7750 /* Update ap.__[g|v]r_offs */
7751 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7752 build_int_cst (TREE_TYPE (off
), rsize
));
7753 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
7757 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7759 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7760 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
7761 build_int_cst (TREE_TYPE (f_off
), 0));
7762 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
7764 /* String up: make sure the assignment happens before the use. */
7765 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
7766 COND_EXPR_ELSE (cond1
) = t
;
7768 /* Prepare the trees handling the argument that is passed on the stack;
7769 the top level node will store in ON_STACK. */
7770 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
7773 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7774 t
= fold_convert (intDI_type_node
, arg
);
7775 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7776 build_int_cst (TREE_TYPE (t
), 15));
7777 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7778 build_int_cst (TREE_TYPE (t
), -16));
7779 t
= fold_convert (TREE_TYPE (arg
), t
);
7780 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
7784 /* Advance ap.__stack */
7785 t
= fold_convert (intDI_type_node
, arg
);
7786 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7787 build_int_cst (TREE_TYPE (t
), size
+ 7));
7788 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7789 build_int_cst (TREE_TYPE (t
), -8));
7790 t
= fold_convert (TREE_TYPE (arg
), t
);
7791 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
7792 /* String up roundup and advance. */
7794 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7795 /* String up with arg */
7796 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
7797 /* Big-endianness related address adjustment. */
7798 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7799 && size
< UNITS_PER_WORD
)
7801 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
7802 size_int (UNITS_PER_WORD
- size
));
7803 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
7806 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
7807 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
7809 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7812 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
7813 build_int_cst (TREE_TYPE (off
), adjust
));
7815 t
= fold_convert (sizetype
, t
);
7816 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
7820 /* type ha; // treat as "struct {ftype field[n];}"
7821 ... [computing offs]
7822 for (i = 0; i <nregs; ++i, offs += 16)
7823 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7826 tree tmp_ha
, field_t
, field_ptr_t
;
7828 /* Declare a local variable. */
7829 tmp_ha
= create_tmp_var_raw (type
, "ha");
7830 gimple_add_tmp_var (tmp_ha
);
7832 /* Establish the base type. */
7836 field_t
= float_type_node
;
7837 field_ptr_t
= float_ptr_type_node
;
7840 field_t
= double_type_node
;
7841 field_ptr_t
= double_ptr_type_node
;
7844 field_t
= long_double_type_node
;
7845 field_ptr_t
= long_double_ptr_type_node
;
7847 /* The half precision and quad precision are not fully supported yet. Enable
7848 the following code after the support is complete. Need to find the correct
7849 type node for __fp16 *. */
7852 field_t
= float_type_node
;
7853 field_ptr_t
= float_ptr_type_node
;
7859 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
7860 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
7861 field_ptr_t
= build_pointer_type (field_t
);
7868 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7869 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
7871 t
= fold_convert (field_ptr_t
, addr
);
7872 t
= build2 (MODIFY_EXPR
, field_t
,
7873 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
7874 build1 (INDIRECT_REF
, field_t
, t
));
7876 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7877 for (i
= 1; i
< nregs
; ++i
)
7879 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
7880 u
= fold_convert (field_ptr_t
, addr
);
7881 u
= build2 (MODIFY_EXPR
, field_t
,
7882 build2 (MEM_REF
, field_t
, tmp_ha
,
7883 build_int_cst (field_ptr_t
,
7885 int_size_in_bytes (field_t
)))),
7886 build1 (INDIRECT_REF
, field_t
, u
));
7887 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
7890 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
7891 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
7894 COND_EXPR_ELSE (cond2
) = t
;
7895 addr
= fold_convert (build_pointer_type (type
), cond1
);
7896 addr
= build_va_arg_indirect_ref (addr
);
7899 addr
= build_va_arg_indirect_ref (addr
);
7904 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7907 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
7908 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7911 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7912 CUMULATIVE_ARGS local_cum
;
7913 int gr_saved
, vr_saved
;
7915 /* The caller has advanced CUM up to, but not beyond, the last named
7916 argument. Advance a local copy of CUM past the last "real" named
7917 argument, to find out how many registers are left over. */
7919 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
7921 /* Found out how many registers we need to save. */
7922 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
7923 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
7927 gcc_assert (local_cum
.aapcs_nvrn
== 0);
7937 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7938 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
7939 - gr_saved
* UNITS_PER_WORD
);
7940 mem
= gen_frame_mem (BLKmode
, ptr
);
7941 set_mem_alias_set (mem
, get_varargs_alias_set ());
7943 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
7948 /* We can't use move_block_from_reg, because it will use
7949 the wrong mode, storing D regs only. */
7950 machine_mode mode
= TImode
;
7953 /* Set OFF to the offset from virtual_incoming_args_rtx of
7954 the first vector register. The VR save area lies below
7955 the GR one, and is aligned to 16 bytes. */
7956 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7957 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7958 off
-= vr_saved
* UNITS_PER_VREG
;
7960 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
7964 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
7965 mem
= gen_frame_mem (mode
, ptr
);
7966 set_mem_alias_set (mem
, get_varargs_alias_set ());
7967 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
7968 off
+= UNITS_PER_VREG
;
7973 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7974 any complication of having crtl->args.pretend_args_size changed. */
7975 cfun
->machine
->frame
.saved_varargs_size
7976 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7977 STACK_BOUNDARY
/ BITS_PER_UNIT
)
7978 + vr_saved
* UNITS_PER_VREG
);
7982 aarch64_conditional_register_usage (void)
7987 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
7990 call_used_regs
[i
] = 1;
7995 /* Walk down the type tree of TYPE counting consecutive base elements.
7996 If *MODEP is VOIDmode, then set it to the first valid floating point
7997 type. If a non-floating point type is found, or if a floating point
7998 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7999 otherwise return the count in the sub-tree. */
8001 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
8006 switch (TREE_CODE (type
))
8009 mode
= TYPE_MODE (type
);
8010 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
8013 if (*modep
== VOIDmode
)
8022 mode
= TYPE_MODE (TREE_TYPE (type
));
8023 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
8026 if (*modep
== VOIDmode
)
8035 /* Use V2SImode and V4SImode as representatives of all 64-bit
8036 and 128-bit vector types. */
8037 size
= int_size_in_bytes (type
);
8050 if (*modep
== VOIDmode
)
8053 /* Vector modes are considered to be opaque: two vectors are
8054 equivalent for the purposes of being homogeneous aggregates
8055 if they are the same size. */
8064 tree index
= TYPE_DOMAIN (type
);
8066 /* Can't handle incomplete types nor sizes that are not
8068 if (!COMPLETE_TYPE_P (type
)
8069 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
8072 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
8075 || !TYPE_MAX_VALUE (index
)
8076 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
8077 || !TYPE_MIN_VALUE (index
)
8078 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
8082 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
8083 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
8085 /* There must be no padding. */
8086 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
8098 /* Can't handle incomplete types nor sizes that are not
8100 if (!COMPLETE_TYPE_P (type
)
8101 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
8104 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
8106 if (TREE_CODE (field
) != FIELD_DECL
)
8109 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
8115 /* There must be no padding. */
8116 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
8123 case QUAL_UNION_TYPE
:
8125 /* These aren't very interesting except in a degenerate case. */
8130 /* Can't handle incomplete types nor sizes that are not
8132 if (!COMPLETE_TYPE_P (type
)
8133 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
8136 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
8138 if (TREE_CODE (field
) != FIELD_DECL
)
8141 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
8144 count
= count
> sub_count
? count
: sub_count
;
8147 /* There must be no padding. */
8148 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
8161 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
8162 type as described in AAPCS64 \S 4.1.2.
8164 See the comment above aarch64_composite_type_p for the notes on MODE. */
8167 aarch64_short_vector_p (const_tree type
,
8170 HOST_WIDE_INT size
= -1;
8172 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
8173 size
= int_size_in_bytes (type
);
8174 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
8175 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8176 size
= GET_MODE_SIZE (mode
);
8178 return (size
== 8 || size
== 16);
8181 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
8182 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
8183 array types. The C99 floating-point complex types are also considered
8184 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
8185 types, which are GCC extensions and out of the scope of AAPCS64, are
8186 treated as composite types here as well.
8188 Note that MODE itself is not sufficient in determining whether a type
8189 is such a composite type or not. This is because
8190 stor-layout.c:compute_record_mode may have already changed the MODE
8191 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
8192 structure with only one field may have its MODE set to the mode of the
8193 field. Also an integer mode whose size matches the size of the
8194 RECORD_TYPE type may be used to substitute the original mode
8195 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
8196 solely relied on. */
8199 aarch64_composite_type_p (const_tree type
,
8202 if (aarch64_short_vector_p (type
, mode
))
8205 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
8209 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
8210 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
8216 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
8217 shall be passed or returned in simd/fp register(s) (providing these
8218 parameter passing registers are available).
8220 Upon successful return, *COUNT returns the number of needed registers,
8221 *BASE_MODE returns the mode of the individual register and when IS_HAF
8222 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
8223 floating-point aggregate or a homogeneous short-vector aggregate. */
8226 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
8228 machine_mode
*base_mode
,
8232 machine_mode new_mode
= VOIDmode
;
8233 bool composite_p
= aarch64_composite_type_p (type
, mode
);
8235 if (is_ha
!= NULL
) *is_ha
= false;
8237 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8238 || aarch64_short_vector_p (type
, mode
))
8243 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
8245 if (is_ha
!= NULL
) *is_ha
= true;
8247 new_mode
= GET_MODE_INNER (mode
);
8249 else if (type
&& composite_p
)
8251 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
8253 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
8255 if (is_ha
!= NULL
) *is_ha
= true;
8264 *base_mode
= new_mode
;
8268 /* Implement TARGET_STRUCT_VALUE_RTX. */
8271 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
8272 int incoming ATTRIBUTE_UNUSED
)
8274 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
8277 /* Implements target hook vector_mode_supported_p. */
8279 aarch64_vector_mode_supported_p (machine_mode mode
)
8282 && (mode
== V4SImode
|| mode
== V8HImode
8283 || mode
== V16QImode
|| mode
== V2DImode
8284 || mode
== V2SImode
|| mode
== V4HImode
8285 || mode
== V8QImode
|| mode
== V2SFmode
8286 || mode
== V4SFmode
|| mode
== V2DFmode
8287 || mode
== V1DFmode
))
8293 /* Return appropriate SIMD container
8294 for MODE within a vector of WIDTH bits. */
8296 aarch64_simd_container_mode (machine_mode mode
, unsigned width
)
8298 gcc_assert (width
== 64 || width
== 128);
8337 /* Return 128-bit container as the preferred SIMD mode for MODE. */
8339 aarch64_preferred_simd_mode (machine_mode mode
)
8341 return aarch64_simd_container_mode (mode
, 128);
8344 /* Return the bitmask of possible vector sizes for the vectorizer
8347 aarch64_autovectorize_vector_sizes (void)
8352 /* Implement TARGET_MANGLE_TYPE. */
8355 aarch64_mangle_type (const_tree type
)
8357 /* The AArch64 ABI documents say that "__va_list" has to be
8358 managled as if it is in the "std" namespace. */
8359 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
8360 return "St9__va_list";
8362 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8364 if (TYPE_NAME (type
) != NULL
)
8365 return aarch64_mangle_builtin_type (type
);
8367 /* Use the default mangling. */
8372 /* Return true if the rtx_insn contains a MEM RTX somewhere
8376 has_memory_op (rtx_insn
*mem_insn
)
8378 subrtx_iterator::array_type array
;
8379 FOR_EACH_SUBRTX (iter
, array
, PATTERN (mem_insn
), ALL
)
8386 /* Find the first rtx_insn before insn that will generate an assembly
8390 aarch64_prev_real_insn (rtx_insn
*insn
)
8397 insn
= prev_real_insn (insn
);
8399 while (insn
&& recog_memoized (insn
) < 0);
8405 is_madd_op (enum attr_type t1
)
8408 /* A number of these may be AArch32 only. */
8409 enum attr_type mlatypes
[] = {
8410 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
8411 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
8412 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
8415 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
8417 if (t1
== mlatypes
[i
])
8424 /* Check if there is a register dependency between a load and the insn
8425 for which we hold recog_data. */
8428 dep_between_memop_and_curr (rtx memop
)
8433 gcc_assert (GET_CODE (memop
) == SET
);
8435 if (!REG_P (SET_DEST (memop
)))
8438 load_reg
= SET_DEST (memop
);
8439 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
8441 rtx operand
= recog_data
.operand
[opno
];
8443 && reg_overlap_mentioned_p (load_reg
, operand
))
8451 /* When working around the Cortex-A53 erratum 835769,
8452 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8453 instruction and has a preceding memory instruction such that a NOP
8454 should be inserted between them. */
8457 aarch64_madd_needs_nop (rtx_insn
* insn
)
8459 enum attr_type attr_type
;
8463 if (!aarch64_fix_a53_err835769
)
8466 if (recog_memoized (insn
) < 0)
8469 attr_type
= get_attr_type (insn
);
8470 if (!is_madd_op (attr_type
))
8473 prev
= aarch64_prev_real_insn (insn
);
8474 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8475 Restore recog state to INSN to avoid state corruption. */
8476 extract_constrain_insn_cached (insn
);
8478 if (!prev
|| !has_memory_op (prev
))
8481 body
= single_set (prev
);
8483 /* If the previous insn is a memory op and there is no dependency between
8484 it and the DImode madd, emit a NOP between them. If body is NULL then we
8485 have a complex memory operation, probably a load/store pair.
8486 Be conservative for now and emit a NOP. */
8487 if (GET_MODE (recog_data
.operand
[0]) == DImode
8488 && (!body
|| !dep_between_memop_and_curr (body
)))
8496 /* Implement FINAL_PRESCAN_INSN. */
8499 aarch64_final_prescan_insn (rtx_insn
*insn
)
8501 if (aarch64_madd_needs_nop (insn
))
8502 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
8506 /* Return the equivalent letter for size. */
8508 sizetochar (int size
)
8512 case 64: return 'd';
8513 case 32: return 's';
8514 case 16: return 'h';
8515 case 8 : return 'b';
8516 default: gcc_unreachable ();
8520 /* Return true iff x is a uniform vector of floating-point
8521 constants, and the constant can be represented in
8522 quarter-precision form. Note, as aarch64_float_const_representable
8523 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8525 aarch64_vect_float_const_representable_p (rtx x
)
8528 REAL_VALUE_TYPE r0
, ri
;
8531 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
8534 x0
= CONST_VECTOR_ELT (x
, 0);
8535 if (!CONST_DOUBLE_P (x0
))
8538 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
8540 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
8542 xi
= CONST_VECTOR_ELT (x
, i
);
8543 if (!CONST_DOUBLE_P (xi
))
8546 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
8547 if (!REAL_VALUES_EQUAL (r0
, ri
))
8551 return aarch64_float_const_representable_p (x0
);
8554 /* Return true for valid and false for invalid. */
8556 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
8557 struct simd_immediate_info
*info
)
8559 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8561 for (i = 0; i < idx; i += (STRIDE)) \
8566 immtype = (CLASS); \
8567 elsize = (ELSIZE); \
8573 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8574 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8575 unsigned char bytes
[16];
8576 int immtype
= -1, matches
;
8577 unsigned int invmask
= inverse
? 0xff : 0;
8580 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8582 if (! (aarch64_simd_imm_zero_p (op
, mode
)
8583 || aarch64_vect_float_const_representable_p (op
)))
8588 info
->value
= CONST_VECTOR_ELT (op
, 0);
8589 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
8597 /* Splat vector constant out into a byte vector. */
8598 for (i
= 0; i
< n_elts
; i
++)
8600 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8601 it must be laid out in the vector register in reverse order. */
8602 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
8603 unsigned HOST_WIDE_INT elpart
;
8604 unsigned int part
, parts
;
8606 if (CONST_INT_P (el
))
8608 elpart
= INTVAL (el
);
8611 else if (GET_CODE (el
) == CONST_DOUBLE
)
8613 elpart
= CONST_DOUBLE_LOW (el
);
8619 for (part
= 0; part
< parts
; part
++)
8622 for (byte
= 0; byte
< innersize
; byte
++)
8624 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8625 elpart
>>= BITS_PER_UNIT
;
8627 if (GET_CODE (el
) == CONST_DOUBLE
)
8628 elpart
= CONST_DOUBLE_HIGH (el
);
8633 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8637 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8638 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
8640 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8641 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8643 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8644 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8646 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8647 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
8649 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
8651 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
8653 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8654 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
8656 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8657 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8659 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8660 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8662 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8663 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
8665 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
8667 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
8669 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8670 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8672 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8673 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8675 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8676 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8678 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8679 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8681 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
8683 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8684 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
8693 info
->element_width
= elsize
;
8694 info
->mvn
= emvn
!= 0;
8695 info
->shift
= eshift
;
8697 unsigned HOST_WIDE_INT imm
= 0;
8699 if (immtype
>= 12 && immtype
<= 15)
8702 /* Un-invert bytes of recognized vector, if necessary. */
8704 for (i
= 0; i
< idx
; i
++)
8705 bytes
[i
] ^= invmask
;
8709 /* FIXME: Broken on 32-bit H_W_I hosts. */
8710 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8712 for (i
= 0; i
< 8; i
++)
8713 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8714 << (i
* BITS_PER_UNIT
);
8717 info
->value
= GEN_INT (imm
);
8721 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8722 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8724 /* Construct 'abcdefgh' because the assembler cannot handle
8725 generic constants. */
8728 imm
= (imm
>> info
->shift
) & 0xff;
8729 info
->value
= GEN_INT (imm
);
8737 /* Check of immediate shift constants are within range. */
8739 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
8741 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
8743 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
8745 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
8748 /* Return true if X is a uniform vector where all elements
8749 are either the floating-point constant 0.0 or the
8750 integer constant 0. */
8752 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
8754 return x
== CONST0_RTX (mode
);
8758 aarch64_simd_imm_scalar_p (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
)
8760 HOST_WIDE_INT imm
= INTVAL (x
);
8763 for (i
= 0; i
< 8; i
++)
8765 unsigned int byte
= imm
& 0xff;
8766 if (byte
!= 0xff && byte
!= 0)
8775 aarch64_mov_operand_p (rtx x
,
8776 enum aarch64_symbol_context context
,
8779 if (GET_CODE (x
) == HIGH
8780 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
8783 if (CONST_INT_P (x
))
8786 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
8789 return aarch64_classify_symbolic_expression (x
, context
)
8790 == SYMBOL_TINY_ABSOLUTE
;
8793 /* Return a const_int vector of VAL. */
8795 aarch64_simd_gen_const_vector_dup (machine_mode mode
, int val
)
8797 int nunits
= GET_MODE_NUNITS (mode
);
8798 rtvec v
= rtvec_alloc (nunits
);
8801 for (i
=0; i
< nunits
; i
++)
8802 RTVEC_ELT (v
, i
) = GEN_INT (val
);
8804 return gen_rtx_CONST_VECTOR (mode
, v
);
8807 /* Check OP is a legal scalar immediate for the MOVI instruction. */
8810 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, machine_mode mode
)
8814 gcc_assert (!VECTOR_MODE_P (mode
));
8815 vmode
= aarch64_preferred_simd_mode (mode
);
8816 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
8817 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
8820 /* Construct and return a PARALLEL RTX vector with elements numbering the
8821 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8822 the vector - from the perspective of the architecture. This does not
8823 line up with GCC's perspective on lane numbers, so we end up with
8824 different masks depending on our target endian-ness. The diagram
8825 below may help. We must draw the distinction when building masks
8826 which select one half of the vector. An instruction selecting
8827 architectural low-lanes for a big-endian target, must be described using
8828 a mask selecting GCC high-lanes.
8830 Big-Endian Little-Endian
8833 | x | x | x | x | | x | x | x | x |
8834 Architecture 3 2 1 0 3 2 1 0
8836 Low Mask: { 2, 3 } { 0, 1 }
8837 High Mask: { 0, 1 } { 2, 3 }
8841 aarch64_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
8843 int nunits
= GET_MODE_NUNITS (mode
);
8844 rtvec v
= rtvec_alloc (nunits
/ 2);
8845 int high_base
= nunits
/ 2;
8851 if (BYTES_BIG_ENDIAN
)
8852 base
= high
? low_base
: high_base
;
8854 base
= high
? high_base
: low_base
;
8856 for (i
= 0; i
< nunits
/ 2; i
++)
8857 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
8859 t1
= gen_rtx_PARALLEL (mode
, v
);
8863 /* Check OP for validity as a PARALLEL RTX vector with elements
8864 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8865 from the perspective of the architecture. See the diagram above
8866 aarch64_simd_vect_par_cnst_half for more details. */
8869 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
8872 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
8873 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
8874 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
8877 if (!VECTOR_MODE_P (mode
))
8880 if (count_op
!= count_ideal
)
8883 for (i
= 0; i
< count_ideal
; i
++)
8885 rtx elt_op
= XVECEXP (op
, 0, i
);
8886 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
8888 if (!CONST_INT_P (elt_op
)
8889 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
8895 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8896 HIGH (exclusive). */
8898 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8902 gcc_assert (CONST_INT_P (operand
));
8903 lane
= INTVAL (operand
);
8905 if (lane
< low
|| lane
>= high
)
8908 error ("%Klane %wd out of range %wd - %wd", exp
, lane
, low
, high
- 1);
8910 error ("lane %wd out of range %wd - %wd", lane
, low
, high
- 1);
8914 /* Return TRUE if OP is a valid vector addressing mode. */
8916 aarch64_simd_mem_operand_p (rtx op
)
8918 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
8919 || REG_P (XEXP (op
, 0)));
8922 /* Emit a register copy from operand to operand, taking care not to
8923 early-clobber source registers in the process.
8925 COUNT is the number of components into which the copy needs to be
8928 aarch64_simd_emit_reg_reg_move (rtx
*operands
, enum machine_mode mode
,
8932 int rdest
= REGNO (operands
[0]);
8933 int rsrc
= REGNO (operands
[1]);
8935 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
8937 for (i
= 0; i
< count
; i
++)
8938 emit_move_insn (gen_rtx_REG (mode
, rdest
+ i
),
8939 gen_rtx_REG (mode
, rsrc
+ i
));
8941 for (i
= 0; i
< count
; i
++)
8942 emit_move_insn (gen_rtx_REG (mode
, rdest
+ count
- i
- 1),
8943 gen_rtx_REG (mode
, rsrc
+ count
- i
- 1));
8946 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8947 one of VSTRUCT modes: OI, CI or XI. */
8949 aarch64_simd_attr_length_move (rtx_insn
*insn
)
8953 extract_insn_cached (insn
);
8955 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
8957 mode
= GET_MODE (recog_data
.operand
[0]);
8973 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
8974 one of VSTRUCT modes: OI, CI, EI, or XI. */
8976 aarch64_simd_attr_length_rglist (enum machine_mode mode
)
8978 return (GET_MODE_SIZE (mode
) / UNITS_PER_VREG
) * 4;
8981 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8982 alignment of a vector to 128 bits. */
8983 static HOST_WIDE_INT
8984 aarch64_simd_vector_alignment (const_tree type
)
8986 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
8987 return MIN (align
, 128);
8990 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8992 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
8997 /* We guarantee alignment for vectors up to 128-bits. */
8998 if (tree_int_cst_compare (TYPE_SIZE (type
),
8999 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
9002 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
9006 /* If VALS is a vector constant that can be loaded into a register
9007 using DUP, generate instructions to do so and return an RTX to
9008 assign to the register. Otherwise return NULL_RTX. */
9010 aarch64_simd_dup_constant (rtx vals
)
9012 machine_mode mode
= GET_MODE (vals
);
9013 machine_mode inner_mode
= GET_MODE_INNER (mode
);
9014 int n_elts
= GET_MODE_NUNITS (mode
);
9015 bool all_same
= true;
9019 if (GET_CODE (vals
) != CONST_VECTOR
)
9022 for (i
= 1; i
< n_elts
; ++i
)
9024 x
= CONST_VECTOR_ELT (vals
, i
);
9025 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
9032 /* We can load this constant by using DUP and a constant in a
9033 single ARM register. This will be cheaper than a vector
9035 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
9036 return gen_rtx_VEC_DUPLICATE (mode
, x
);
9040 /* Generate code to load VALS, which is a PARALLEL containing only
9041 constants (for vec_init) or CONST_VECTOR, efficiently into a
9042 register. Returns an RTX to copy into the register, or NULL_RTX
9043 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9045 aarch64_simd_make_constant (rtx vals
)
9047 machine_mode mode
= GET_MODE (vals
);
9049 rtx const_vec
= NULL_RTX
;
9050 int n_elts
= GET_MODE_NUNITS (mode
);
9054 if (GET_CODE (vals
) == CONST_VECTOR
)
9056 else if (GET_CODE (vals
) == PARALLEL
)
9058 /* A CONST_VECTOR must contain only CONST_INTs and
9059 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9060 Only store valid constants in a CONST_VECTOR. */
9061 for (i
= 0; i
< n_elts
; ++i
)
9063 rtx x
= XVECEXP (vals
, 0, i
);
9064 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
9067 if (n_const
== n_elts
)
9068 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
9073 if (const_vec
!= NULL_RTX
9074 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
9075 /* Load using MOVI/MVNI. */
9077 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
9078 /* Loaded using DUP. */
9080 else if (const_vec
!= NULL_RTX
)
9081 /* Load from constant pool. We can not take advantage of single-cycle
9082 LD1 because we need a PC-relative addressing mode. */
9085 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9086 We can not construct an initializer. */
9091 aarch64_expand_vector_init (rtx target
, rtx vals
)
9093 machine_mode mode
= GET_MODE (target
);
9094 machine_mode inner_mode
= GET_MODE_INNER (mode
);
9095 int n_elts
= GET_MODE_NUNITS (mode
);
9097 rtx any_const
= NULL_RTX
;
9098 bool all_same
= true;
9100 for (int i
= 0; i
< n_elts
; ++i
)
9102 rtx x
= XVECEXP (vals
, 0, i
);
9103 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
9108 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
9114 rtx constant
= aarch64_simd_make_constant (vals
);
9115 if (constant
!= NULL_RTX
)
9117 emit_move_insn (target
, constant
);
9122 /* Splat a single non-constant element if we can. */
9125 rtx x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
9126 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
9130 /* Half the fields (or less) are non-constant. Load constant then overwrite
9131 varying fields. Hope that this is more efficient than using the stack. */
9132 if (n_var
<= n_elts
/2)
9134 rtx copy
= copy_rtx (vals
);
9136 /* Load constant part of vector. We really don't care what goes into the
9137 parts we will overwrite, but we're more likely to be able to load the
9138 constant efficiently if it has fewer, larger, repeating parts
9139 (see aarch64_simd_valid_immediate). */
9140 for (int i
= 0; i
< n_elts
; i
++)
9142 rtx x
= XVECEXP (vals
, 0, i
);
9143 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
9145 rtx subst
= any_const
;
9146 for (int bit
= n_elts
/ 2; bit
> 0; bit
/= 2)
9148 /* Look in the copied vector, as more elements are const. */
9149 rtx test
= XVECEXP (copy
, 0, i
^ bit
);
9150 if (CONST_INT_P (test
) || CONST_DOUBLE_P (test
))
9156 XVECEXP (copy
, 0, i
) = subst
;
9158 aarch64_expand_vector_init (target
, copy
);
9160 /* Insert variables. */
9161 enum insn_code icode
= optab_handler (vec_set_optab
, mode
);
9162 gcc_assert (icode
!= CODE_FOR_nothing
);
9164 for (int i
= 0; i
< n_elts
; i
++)
9166 rtx x
= XVECEXP (vals
, 0, i
);
9167 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
9169 x
= copy_to_mode_reg (inner_mode
, x
);
9170 emit_insn (GEN_FCN (icode
) (target
, x
, GEN_INT (i
)));
9175 /* Construct the vector in memory one field at a time
9176 and load the whole vector. */
9177 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
9178 for (int i
= 0; i
< n_elts
; i
++)
9179 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
9180 i
* GET_MODE_SIZE (inner_mode
)),
9181 XVECEXP (vals
, 0, i
));
9182 emit_move_insn (target
, mem
);
9186 static unsigned HOST_WIDE_INT
9187 aarch64_shift_truncation_mask (machine_mode mode
)
9190 (aarch64_vector_mode_supported_p (mode
)
9191 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
9194 #ifndef TLS_SECTION_ASM_FLAG
9195 #define TLS_SECTION_ASM_FLAG 'T'
9199 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
9200 tree decl ATTRIBUTE_UNUSED
)
9202 char flagchars
[10], *f
= flagchars
;
9204 /* If we have already declared this section, we can use an
9205 abbreviated form to switch back to it -- unless this section is
9206 part of a COMDAT groups, in which case GAS requires the full
9207 declaration every time. */
9208 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
9209 && (flags
& SECTION_DECLARED
))
9211 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
9215 if (!(flags
& SECTION_DEBUG
))
9217 if (flags
& SECTION_WRITE
)
9219 if (flags
& SECTION_CODE
)
9221 if (flags
& SECTION_SMALL
)
9223 if (flags
& SECTION_MERGE
)
9225 if (flags
& SECTION_STRINGS
)
9227 if (flags
& SECTION_TLS
)
9228 *f
++ = TLS_SECTION_ASM_FLAG
;
9229 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
9233 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
9235 if (!(flags
& SECTION_NOTYPE
))
9240 if (flags
& SECTION_BSS
)
9245 #ifdef TYPE_OPERAND_FMT
9246 format
= "," TYPE_OPERAND_FMT
;
9251 fprintf (asm_out_file
, format
, type
);
9253 if (flags
& SECTION_ENTSIZE
)
9254 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
9255 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
9257 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
9258 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
9260 fprintf (asm_out_file
, ",%s,comdat",
9261 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
9265 putc ('\n', asm_out_file
);
9268 /* Select a format to encode pointers in exception handling data. */
9270 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
9273 switch (aarch64_cmodel
)
9275 case AARCH64_CMODEL_TINY
:
9276 case AARCH64_CMODEL_TINY_PIC
:
9277 case AARCH64_CMODEL_SMALL
:
9278 case AARCH64_CMODEL_SMALL_PIC
:
9279 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
9281 type
= DW_EH_PE_sdata4
;
9284 /* No assumptions here. 8-byte relocs required. */
9285 type
= DW_EH_PE_sdata8
;
9288 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
9291 /* Emit load exclusive. */
9294 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
9295 rtx mem
, rtx model_rtx
)
9297 rtx (*gen
) (rtx
, rtx
, rtx
);
9301 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
9302 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
9303 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
9304 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
9309 emit_insn (gen (rval
, mem
, model_rtx
));
9312 /* Emit store exclusive. */
9315 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
9316 rtx rval
, rtx mem
, rtx model_rtx
)
9318 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9322 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
9323 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
9324 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
9325 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
9330 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
9333 /* Mark the previous jump instruction as unlikely. */
9336 aarch64_emit_unlikely_jump (rtx insn
)
9338 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
9340 insn
= emit_jump_insn (insn
);
9341 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
9344 /* Expand a compare and swap pattern. */
9347 aarch64_expand_compare_and_swap (rtx operands
[])
9349 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
9350 machine_mode mode
, cmp_mode
;
9351 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
9356 oldval
= operands
[3];
9357 newval
= operands
[4];
9358 is_weak
= operands
[5];
9359 mod_s
= operands
[6];
9360 mod_f
= operands
[7];
9361 mode
= GET_MODE (mem
);
9364 /* Normally the succ memory model must be stronger than fail, but in the
9365 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9366 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9368 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
9369 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
9370 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
9376 /* For short modes, we're going to perform the comparison in SImode,
9377 so do the zero-extension now. */
9379 rval
= gen_reg_rtx (SImode
);
9380 oldval
= convert_modes (SImode
, mode
, oldval
, true);
9385 /* Force the value into a register if needed. */
9386 if (!aarch64_plus_operand (oldval
, mode
))
9387 oldval
= force_reg (cmp_mode
, oldval
);
9396 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
9397 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
9398 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
9399 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
9404 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
9406 if (mode
== QImode
|| mode
== HImode
)
9407 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
9409 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9410 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
9411 emit_insn (gen_rtx_SET (bval
, x
));
9414 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
9415 sequence implementing an atomic operation. */
9418 aarch64_emit_post_barrier (enum memmodel model
)
9420 const enum memmodel base_model
= memmodel_base (model
);
9422 if (is_mm_sync (model
)
9423 && (base_model
== MEMMODEL_ACQUIRE
9424 || base_model
== MEMMODEL_ACQ_REL
9425 || base_model
== MEMMODEL_SEQ_CST
))
9427 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST
)));
9431 /* Split a compare and swap pattern. */
9434 aarch64_split_compare_and_swap (rtx operands
[])
9436 rtx rval
, mem
, oldval
, newval
, scratch
;
9439 rtx_code_label
*label1
, *label2
;
9441 enum memmodel model
;
9446 oldval
= operands
[2];
9447 newval
= operands
[3];
9448 is_weak
= (operands
[4] != const0_rtx
);
9449 model_rtx
= operands
[5];
9450 scratch
= operands
[7];
9451 mode
= GET_MODE (mem
);
9452 model
= memmodel_from_int (INTVAL (model_rtx
));
9457 label1
= gen_label_rtx ();
9458 emit_label (label1
);
9460 label2
= gen_label_rtx ();
9462 /* The initial load can be relaxed for a __sync operation since a final
9463 barrier will be emitted to stop code hoisting. */
9464 if (is_mm_sync (model
))
9465 aarch64_emit_load_exclusive (mode
, rval
, mem
,
9466 GEN_INT (MEMMODEL_RELAXED
));
9468 aarch64_emit_load_exclusive (mode
, rval
, mem
, model_rtx
);
9470 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
9471 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9472 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9473 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
9474 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
9476 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, model_rtx
);
9480 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
9481 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9482 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
9483 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
9487 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9488 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
9489 emit_insn (gen_rtx_SET (cond
, x
));
9492 emit_label (label2
);
9494 /* Emit any final barrier needed for a __sync operation. */
9495 if (is_mm_sync (model
))
9496 aarch64_emit_post_barrier (model
);
9499 /* Split an atomic operation. */
9502 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
9503 rtx value
, rtx model_rtx
, rtx cond
)
9505 machine_mode mode
= GET_MODE (mem
);
9506 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
9507 const enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
9508 const bool is_sync
= is_mm_sync (model
);
9509 rtx_code_label
*label
;
9512 label
= gen_label_rtx ();
9516 new_out
= gen_lowpart (wmode
, new_out
);
9518 old_out
= gen_lowpart (wmode
, old_out
);
9521 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
9523 /* The initial load can be relaxed for a __sync operation since a final
9524 barrier will be emitted to stop code hoisting. */
9526 aarch64_emit_load_exclusive (mode
, old_out
, mem
,
9527 GEN_INT (MEMMODEL_RELAXED
));
9529 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
9538 x
= gen_rtx_AND (wmode
, old_out
, value
);
9539 emit_insn (gen_rtx_SET (new_out
, x
));
9540 x
= gen_rtx_NOT (wmode
, new_out
);
9541 emit_insn (gen_rtx_SET (new_out
, x
));
9545 if (CONST_INT_P (value
))
9547 value
= GEN_INT (-INTVAL (value
));
9553 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
9554 emit_insn (gen_rtx_SET (new_out
, x
));
9558 aarch64_emit_store_exclusive (mode
, cond
, mem
,
9559 gen_lowpart (mode
, new_out
), model_rtx
);
9561 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9562 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9563 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
9564 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
9566 /* Emit any final barrier needed for a __sync operation. */
9568 aarch64_emit_post_barrier (model
);
9572 aarch64_print_extension (void)
9574 const struct aarch64_option_extension
*opt
= NULL
;
9576 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
9577 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
9578 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
9580 asm_fprintf (asm_out_file
, "\n");
9584 aarch64_start_file (void)
9588 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
9589 aarch64_print_extension ();
9591 else if (selected_cpu
)
9593 const char *truncated_name
9594 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
9595 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
9596 aarch64_print_extension ();
9598 default_file_start();
9601 /* Target hook for c_mode_for_suffix. */
9603 aarch64_c_mode_for_suffix (char suffix
)
9611 /* We can only represent floating point constants which will fit in
9612 "quarter-precision" values. These values are characterised by
9613 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9616 (-1)^s * (n/16) * 2^r
9619 's' is the sign bit.
9620 'n' is an integer in the range 16 <= n <= 31.
9621 'r' is an integer in the range -3 <= r <= 4. */
9623 /* Return true iff X can be represented by a quarter-precision
9624 floating point immediate operand X. Note, we cannot represent 0.0. */
9626 aarch64_float_const_representable_p (rtx x
)
9628 /* This represents our current view of how many bits
9629 make up the mantissa. */
9630 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
9632 unsigned HOST_WIDE_INT mantissa
, mask
;
9633 REAL_VALUE_TYPE r
, m
;
9636 if (!CONST_DOUBLE_P (x
))
9639 if (GET_MODE (x
) == VOIDmode
)
9642 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9644 /* We cannot represent infinities, NaNs or +/-zero. We won't
9645 know if we have +zero until we analyse the mantissa, but we
9646 can reject the other invalid values. */
9647 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
9648 || REAL_VALUE_MINUS_ZERO (r
))
9651 /* Extract exponent. */
9652 r
= real_value_abs (&r
);
9653 exponent
= REAL_EXP (&r
);
9655 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9656 highest (sign) bit, with a fixed binary point at bit point_pos.
9657 m1 holds the low part of the mantissa, m2 the high part.
9658 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9659 bits for the mantissa, this can fail (low bits will be lost). */
9660 real_ldexp (&m
, &r
, point_pos
- exponent
);
9661 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
9663 /* If the low part of the mantissa has bits set we cannot represent
9667 /* We have rejected the lower HOST_WIDE_INT, so update our
9668 understanding of how many bits lie in the mantissa and
9669 look only at the high HOST_WIDE_INT. */
9670 mantissa
= w
.elt (1);
9671 point_pos
-= HOST_BITS_PER_WIDE_INT
;
9673 /* We can only represent values with a mantissa of the form 1.xxxx. */
9674 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
9675 if ((mantissa
& mask
) != 0)
9678 /* Having filtered unrepresentable values, we may now remove all
9679 but the highest 5 bits. */
9680 mantissa
>>= point_pos
- 5;
9682 /* We cannot represent the value 0.0, so reject it. This is handled
9687 /* Then, as bit 4 is always set, we can mask it off, leaving
9688 the mantissa in the range [0, 15]. */
9689 mantissa
&= ~(1 << 4);
9690 gcc_assert (mantissa
<= 15);
9692 /* GCC internally does not use IEEE754-like encoding (where normalized
9693 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9694 Our mantissa values are shifted 4 places to the left relative to
9695 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9696 by 5 places to correct for GCC's representation. */
9697 exponent
= 5 - exponent
;
9699 return (exponent
>= 0 && exponent
<= 7);
9703 aarch64_output_simd_mov_immediate (rtx const_vector
,
9708 static char templ
[40];
9709 const char *mnemonic
;
9710 const char *shift_op
;
9711 unsigned int lane_count
= 0;
9714 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
9716 /* This will return true to show const_vector is legal for use as either
9717 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9718 also update INFO to show how the immediate should be generated. */
9719 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
9720 gcc_assert (is_valid
);
9722 element_char
= sizetochar (info
.element_width
);
9723 lane_count
= width
/ info
.element_width
;
9725 mode
= GET_MODE_INNER (mode
);
9726 if (mode
== SFmode
|| mode
== DFmode
)
9728 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
9729 if (aarch64_float_const_zero_rtx_p (info
.value
))
9730 info
.value
= GEN_INT (0);
9735 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
9736 char float_buf
[buf_size
] = {'\0'};
9737 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
9740 if (lane_count
== 1)
9741 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
9743 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
9744 lane_count
, element_char
, float_buf
);
9749 mnemonic
= info
.mvn
? "mvni" : "movi";
9750 shift_op
= info
.msl
? "msl" : "lsl";
9752 if (lane_count
== 1)
9753 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
9754 mnemonic
, UINTVAL (info
.value
));
9755 else if (info
.shift
)
9756 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9757 ", %s %d", mnemonic
, lane_count
, element_char
,
9758 UINTVAL (info
.value
), shift_op
, info
.shift
);
9760 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
9761 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
9766 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
9771 gcc_assert (!VECTOR_MODE_P (mode
));
9772 vmode
= aarch64_simd_container_mode (mode
, 64);
9773 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
9774 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
9777 /* Split operands into moves from op[1] + op[2] into op[0]. */
9780 aarch64_split_combinev16qi (rtx operands
[3])
9782 unsigned int dest
= REGNO (operands
[0]);
9783 unsigned int src1
= REGNO (operands
[1]);
9784 unsigned int src2
= REGNO (operands
[2]);
9785 machine_mode halfmode
= GET_MODE (operands
[1]);
9786 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
9789 gcc_assert (halfmode
== V16QImode
);
9791 if (src1
== dest
&& src2
== dest
+ halfregs
)
9793 /* No-op move. Can't split to nothing; emit something. */
9794 emit_note (NOTE_INSN_DELETED
);
9798 /* Preserve register attributes for variable tracking. */
9799 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
9800 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
9801 GET_MODE_SIZE (halfmode
));
9803 /* Special case of reversed high/low parts. */
9804 if (reg_overlap_mentioned_p (operands
[2], destlo
)
9805 && reg_overlap_mentioned_p (operands
[1], desthi
))
9807 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9808 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
9809 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9811 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
9813 /* Try to avoid unnecessary moves if part of the result
9814 is in the right place already. */
9816 emit_move_insn (destlo
, operands
[1]);
9817 if (src2
!= dest
+ halfregs
)
9818 emit_move_insn (desthi
, operands
[2]);
9822 if (src2
!= dest
+ halfregs
)
9823 emit_move_insn (desthi
, operands
[2]);
9825 emit_move_insn (destlo
, operands
[1]);
9829 /* vec_perm support. */
9831 #define MAX_VECT_LEN 16
9833 struct expand_vec_perm_d
9835 rtx target
, op0
, op1
;
9836 unsigned char perm
[MAX_VECT_LEN
];
9843 /* Generate a variable permutation. */
9846 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9848 machine_mode vmode
= GET_MODE (target
);
9849 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9851 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
9852 gcc_checking_assert (GET_MODE (op0
) == vmode
);
9853 gcc_checking_assert (GET_MODE (op1
) == vmode
);
9854 gcc_checking_assert (GET_MODE (sel
) == vmode
);
9855 gcc_checking_assert (TARGET_SIMD
);
9859 if (vmode
== V8QImode
)
9861 /* Expand the argument to a V16QI mode by duplicating it. */
9862 rtx pair
= gen_reg_rtx (V16QImode
);
9863 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
9864 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9868 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
9875 if (vmode
== V8QImode
)
9877 pair
= gen_reg_rtx (V16QImode
);
9878 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
9879 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9883 pair
= gen_reg_rtx (OImode
);
9884 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
9885 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
9891 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9893 machine_mode vmode
= GET_MODE (target
);
9894 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
9895 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9898 /* The TBL instruction does not use a modulo index, so we must take care
9899 of that ourselves. */
9900 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
9901 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9902 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
9904 /* For big-endian, we also need to reverse the index within the vector
9905 (but not which vector). */
9906 if (BYTES_BIG_ENDIAN
)
9908 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9910 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
9911 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
9912 NULL
, 0, OPTAB_LIB_WIDEN
);
9914 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
9917 /* Recognize patterns suitable for the TRN instructions. */
9919 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
9921 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9922 rtx out
, in0
, in1
, x
;
9923 rtx (*gen
) (rtx
, rtx
, rtx
);
9924 machine_mode vmode
= d
->vmode
;
9926 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9929 /* Note that these are little-endian tests.
9930 We correct for big-endian later. */
9931 if (d
->perm
[0] == 0)
9933 else if (d
->perm
[0] == 1)
9937 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9939 for (i
= 0; i
< nelt
; i
+= 2)
9941 if (d
->perm
[i
] != i
+ odd
)
9943 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
9953 if (BYTES_BIG_ENDIAN
)
9955 x
= in0
, in0
= in1
, in1
= x
;
9964 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
9965 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
9966 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
9967 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
9968 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
9969 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
9970 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
9971 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
9972 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
9973 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
9982 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
9983 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
9984 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
9985 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
9986 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
9987 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
9988 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
9989 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
9990 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
9991 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
9997 emit_insn (gen (out
, in0
, in1
));
10001 /* Recognize patterns suitable for the UZP instructions. */
10003 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
10005 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
10006 rtx out
, in0
, in1
, x
;
10007 rtx (*gen
) (rtx
, rtx
, rtx
);
10008 machine_mode vmode
= d
->vmode
;
10010 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
10013 /* Note that these are little-endian tests.
10014 We correct for big-endian later. */
10015 if (d
->perm
[0] == 0)
10017 else if (d
->perm
[0] == 1)
10021 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
10023 for (i
= 0; i
< nelt
; i
++)
10025 unsigned elt
= (i
* 2 + odd
) & mask
;
10026 if (d
->perm
[i
] != elt
)
10036 if (BYTES_BIG_ENDIAN
)
10038 x
= in0
, in0
= in1
, in1
= x
;
10047 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
10048 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
10049 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
10050 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
10051 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
10052 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
10053 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
10054 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
10055 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
10056 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
10065 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
10066 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
10067 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
10068 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
10069 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
10070 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
10071 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
10072 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
10073 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
10074 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
10080 emit_insn (gen (out
, in0
, in1
));
10084 /* Recognize patterns suitable for the ZIP instructions. */
10086 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
10088 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
10089 rtx out
, in0
, in1
, x
;
10090 rtx (*gen
) (rtx
, rtx
, rtx
);
10091 machine_mode vmode
= d
->vmode
;
10093 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
10096 /* Note that these are little-endian tests.
10097 We correct for big-endian later. */
10099 if (d
->perm
[0] == high
)
10102 else if (d
->perm
[0] == 0)
10106 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
10108 for (i
= 0; i
< nelt
/ 2; i
++)
10110 unsigned elt
= (i
+ high
) & mask
;
10111 if (d
->perm
[i
* 2] != elt
)
10113 elt
= (elt
+ nelt
) & mask
;
10114 if (d
->perm
[i
* 2 + 1] != elt
)
10124 if (BYTES_BIG_ENDIAN
)
10126 x
= in0
, in0
= in1
, in1
= x
;
10135 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
10136 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
10137 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
10138 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
10139 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
10140 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
10141 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
10142 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
10143 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
10144 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
10153 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
10154 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
10155 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
10156 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
10157 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
10158 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
10159 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
10160 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
10161 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
10162 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
10168 emit_insn (gen (out
, in0
, in1
));
10172 /* Recognize patterns for the EXT insn. */
10175 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
10177 unsigned int i
, nelt
= d
->nelt
;
10178 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
10181 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
10183 /* Check if the extracted indices are increasing by one. */
10184 for (i
= 1; i
< nelt
; i
++)
10186 unsigned int required
= location
+ i
;
10187 if (d
->one_vector_p
)
10189 /* We'll pass the same vector in twice, so allow indices to wrap. */
10190 required
&= (nelt
- 1);
10192 if (d
->perm
[i
] != required
)
10198 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
10199 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
10200 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
10201 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
10202 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
10203 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
10204 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
10205 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
10206 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
10207 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
10216 /* The case where (location == 0) is a no-op for both big- and little-endian,
10217 and is removed by the mid-end at optimization levels -O1 and higher. */
10219 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
10221 /* After setup, we want the high elements of the first vector (stored
10222 at the LSB end of the register), and the low elements of the second
10223 vector (stored at the MSB end of the register). So swap. */
10224 std::swap (d
->op0
, d
->op1
);
10225 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
10226 location
= nelt
- location
;
10229 offset
= GEN_INT (location
);
10230 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
10234 /* Recognize patterns for the REV insns. */
10237 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
10239 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
10240 rtx (*gen
) (rtx
, rtx
);
10242 if (!d
->one_vector_p
)
10251 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
10252 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
10260 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
10261 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
10262 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
10263 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
10271 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
10272 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
10273 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
10274 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
10275 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
10276 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
10277 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
10278 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
10287 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
10288 for (j
= 0; j
<= diff
; j
+= 1)
10290 /* This is guaranteed to be true as the value of diff
10291 is 7, 3, 1 and we should have enough elements in the
10292 queue to generate this. Getting a vector mask with a
10293 value of diff other than these values implies that
10294 something is wrong by the time we get here. */
10295 gcc_assert (i
+ j
< nelt
);
10296 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
10304 emit_insn (gen (d
->target
, d
->op0
));
10309 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
10311 rtx (*gen
) (rtx
, rtx
, rtx
);
10312 rtx out
= d
->target
;
10314 machine_mode vmode
= d
->vmode
;
10315 unsigned int i
, elt
, nelt
= d
->nelt
;
10319 for (i
= 1; i
< nelt
; i
++)
10321 if (elt
!= d
->perm
[i
])
10325 /* The generic preparation in aarch64_expand_vec_perm_const_1
10326 swaps the operand order and the permute indices if it finds
10327 d->perm[0] to be in the second operand. Thus, we can always
10328 use d->op0 and need not do any extra arithmetic to get the
10329 correct lane number. */
10331 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
10335 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
10336 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
10337 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
10338 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
10339 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
10340 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
10341 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
10342 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
10343 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
10344 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
10349 emit_insn (gen (out
, in0
, lane
));
10354 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
10356 rtx rperm
[MAX_VECT_LEN
], sel
;
10357 machine_mode vmode
= d
->vmode
;
10358 unsigned int i
, nelt
= d
->nelt
;
10363 /* Generic code will try constant permutation twice. Once with the
10364 original mode and again with the elements lowered to QImode.
10365 So wait and don't do the selector expansion ourselves. */
10366 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
10369 for (i
= 0; i
< nelt
; ++i
)
10371 int nunits
= GET_MODE_NUNITS (vmode
);
10373 /* If big-endian and two vectors we end up with a weird mixed-endian
10374 mode on NEON. Reverse the index within each word but not the word
10376 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
10379 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
10380 sel
= force_reg (vmode
, sel
);
10382 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
10387 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
10389 /* The pattern matching functions above are written to look for a small
10390 number to begin the sequence (0, 1, N/2). If we begin with an index
10391 from the second operand, we can swap the operands. */
10392 if (d
->perm
[0] >= d
->nelt
)
10394 unsigned i
, nelt
= d
->nelt
;
10396 gcc_assert (nelt
== (nelt
& -nelt
));
10397 for (i
= 0; i
< nelt
; ++i
)
10398 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
10400 std::swap (d
->op0
, d
->op1
);
10405 if (aarch64_evpc_rev (d
))
10407 else if (aarch64_evpc_ext (d
))
10409 else if (aarch64_evpc_dup (d
))
10411 else if (aarch64_evpc_zip (d
))
10413 else if (aarch64_evpc_uzp (d
))
10415 else if (aarch64_evpc_trn (d
))
10417 return aarch64_evpc_tbl (d
);
10422 /* Expand a vec_perm_const pattern. */
10425 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
10427 struct expand_vec_perm_d d
;
10428 int i
, nelt
, which
;
10434 d
.vmode
= GET_MODE (target
);
10435 gcc_assert (VECTOR_MODE_P (d
.vmode
));
10436 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10437 d
.testing_p
= false;
10439 for (i
= which
= 0; i
< nelt
; ++i
)
10441 rtx e
= XVECEXP (sel
, 0, i
);
10442 int ei
= INTVAL (e
) & (2 * nelt
- 1);
10443 which
|= (ei
< nelt
? 1 : 2);
10450 gcc_unreachable ();
10453 d
.one_vector_p
= false;
10454 if (!rtx_equal_p (op0
, op1
))
10457 /* The elements of PERM do not suggest that only the first operand
10458 is used, but both operands are identical. Allow easier matching
10459 of the permutation by folding the permutation into the single
10461 /* Fall Through. */
10463 for (i
= 0; i
< nelt
; ++i
)
10464 d
.perm
[i
] &= nelt
- 1;
10466 d
.one_vector_p
= true;
10471 d
.one_vector_p
= true;
10475 return aarch64_expand_vec_perm_const_1 (&d
);
10479 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
10480 const unsigned char *sel
)
10482 struct expand_vec_perm_d d
;
10483 unsigned int i
, nelt
, which
;
10487 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10488 d
.testing_p
= true;
10489 memcpy (d
.perm
, sel
, nelt
);
10491 /* Calculate whether all elements are in one vector. */
10492 for (i
= which
= 0; i
< nelt
; ++i
)
10494 unsigned char e
= d
.perm
[i
];
10495 gcc_assert (e
< 2 * nelt
);
10496 which
|= (e
< nelt
? 1 : 2);
10499 /* If all elements are from the second vector, reindex as if from the
10502 for (i
= 0; i
< nelt
; ++i
)
10505 /* Check whether the mask can be applied to a single vector. */
10506 d
.one_vector_p
= (which
!= 3);
10508 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
10509 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
10510 if (!d
.one_vector_p
)
10511 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
10514 ret
= aarch64_expand_vec_perm_const_1 (&d
);
10521 aarch64_reverse_mask (enum machine_mode mode
)
10523 /* We have to reverse each vector because we dont have
10524 a permuted load that can reverse-load according to ABI rules. */
10526 rtvec v
= rtvec_alloc (16);
10528 int nunits
= GET_MODE_NUNITS (mode
);
10529 int usize
= GET_MODE_UNIT_SIZE (mode
);
10531 gcc_assert (BYTES_BIG_ENDIAN
);
10532 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode
));
10534 for (i
= 0; i
< nunits
; i
++)
10535 for (j
= 0; j
< usize
; j
++)
10536 RTVEC_ELT (v
, i
* usize
+ j
) = GEN_INT ((i
+ 1) * usize
- 1 - j
);
10537 mask
= gen_rtx_CONST_VECTOR (V16QImode
, v
);
10538 return force_reg (V16QImode
, mask
);
10541 /* Implement MODES_TIEABLE_P. */
10544 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10546 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
10549 /* We specifically want to allow elements of "structure" modes to
10550 be tieable to the structure. This more general condition allows
10551 other rarer situations too. */
10553 && aarch64_vector_mode_p (mode1
)
10554 && aarch64_vector_mode_p (mode2
))
10560 /* Return a new RTX holding the result of moving POINTER forward by
10564 aarch64_move_pointer (rtx pointer
, int amount
)
10566 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
10568 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
10572 /* Return a new RTX holding the result of moving POINTER forward by the
10573 size of the mode it points to. */
10576 aarch64_progress_pointer (rtx pointer
)
10578 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
10580 return aarch64_move_pointer (pointer
, amount
);
10583 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10587 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
10590 rtx reg
= gen_reg_rtx (mode
);
10592 /* "Cast" the pointers to the correct mode. */
10593 *src
= adjust_address (*src
, mode
, 0);
10594 *dst
= adjust_address (*dst
, mode
, 0);
10595 /* Emit the memcpy. */
10596 emit_move_insn (reg
, *src
);
10597 emit_move_insn (*dst
, reg
);
10598 /* Move the pointers forward. */
10599 *src
= aarch64_progress_pointer (*src
);
10600 *dst
= aarch64_progress_pointer (*dst
);
10603 /* Expand movmem, as if from a __builtin_memcpy. Return true if
10604 we succeed, otherwise return false. */
10607 aarch64_expand_movmem (rtx
*operands
)
10610 rtx dst
= operands
[0];
10611 rtx src
= operands
[1];
10613 bool speed_p
= !optimize_function_for_size_p (cfun
);
10615 /* When optimizing for size, give a better estimate of the length of a
10616 memcpy call, but use the default otherwise. */
10617 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
10619 /* We can't do anything smart if the amount to copy is not constant. */
10620 if (!CONST_INT_P (operands
[2]))
10623 n
= UINTVAL (operands
[2]);
10625 /* Try to keep the number of instructions low. For cases below 16 bytes we
10626 need to make at most two moves. For cases above 16 bytes it will be one
10627 move for each 16 byte chunk, then at most two additional moves. */
10628 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
10631 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10632 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
10634 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10635 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
10637 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10643 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10648 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10653 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10654 4-byte chunk, partially overlapping with the previously copied chunk. */
10657 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10663 src
= aarch64_move_pointer (src
, move
);
10664 dst
= aarch64_move_pointer (dst
, move
);
10665 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10670 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10671 them, then (if applicable) an 8-byte chunk. */
10676 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
10681 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10686 /* Finish the final bytes of the copy. We can always do this in one
10687 instruction. We either copy the exact amount we need, or partially
10688 overlap with the previous chunk we copied and copy 8-bytes. */
10692 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10694 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10696 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10701 src
= aarch64_move_pointer (src
, -1);
10702 dst
= aarch64_move_pointer (dst
, -1);
10703 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10709 src
= aarch64_move_pointer (src
, move
);
10710 dst
= aarch64_move_pointer (dst
, move
);
10711 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10718 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10720 static unsigned HOST_WIDE_INT
10721 aarch64_asan_shadow_offset (void)
10723 return (HOST_WIDE_INT_1
<< 36);
10727 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
10728 unsigned int align
,
10729 enum by_pieces_operation op
,
10732 /* STORE_BY_PIECES can be used when copying a constant string, but
10733 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10734 For now we always fail this and let the move_by_pieces code copy
10735 the string from read-only memory. */
10736 if (op
== STORE_BY_PIECES
)
10739 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
10742 static enum machine_mode
10743 aarch64_code_to_ccmode (enum rtx_code code
)
10766 return CC_DLEUmode
;
10769 return CC_DLTUmode
;
10772 return CC_DGEUmode
;
10775 return CC_DGTUmode
;
10783 aarch64_gen_ccmp_first (rtx
*prep_seq
, rtx
*gen_seq
,
10784 int code
, tree treeop0
, tree treeop1
)
10786 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
10787 rtx op0
, op1
, cmp
, target
;
10788 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
10789 enum insn_code icode
;
10790 struct expand_operand ops
[4];
10792 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) code
);
10793 if (cc_mode
== CCmode
)
10797 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
10799 op_mode
= GET_MODE (op0
);
10800 if (op_mode
== VOIDmode
)
10801 op_mode
= GET_MODE (op1
);
10809 icode
= CODE_FOR_cmpsi
;
10814 icode
= CODE_FOR_cmpdi
;
10822 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
10823 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
10829 *prep_seq
= get_insns ();
10832 cmp
= gen_rtx_fmt_ee ((enum rtx_code
) code
, cmp_mode
, op0
, op1
);
10833 target
= gen_rtx_REG (CCmode
, CC_REGNUM
);
10835 create_output_operand (&ops
[0], target
, CCmode
);
10836 create_fixed_operand (&ops
[1], cmp
);
10837 create_fixed_operand (&ops
[2], op0
);
10838 create_fixed_operand (&ops
[3], op1
);
10841 if (!maybe_expand_insn (icode
, 4, ops
))
10846 *gen_seq
= get_insns ();
10849 return gen_rtx_REG (cc_mode
, CC_REGNUM
);
10853 aarch64_gen_ccmp_next (rtx
*prep_seq
, rtx
*gen_seq
, rtx prev
, int cmp_code
,
10854 tree treeop0
, tree treeop1
, int bit_code
)
10856 rtx op0
, op1
, cmp0
, cmp1
, target
;
10857 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
10858 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
10859 enum insn_code icode
= CODE_FOR_ccmp_andsi
;
10860 struct expand_operand ops
[6];
10862 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) cmp_code
);
10863 if (cc_mode
== CCmode
)
10866 push_to_sequence ((rtx_insn
*) *prep_seq
);
10867 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
10869 op_mode
= GET_MODE (op0
);
10870 if (op_mode
== VOIDmode
)
10871 op_mode
= GET_MODE (op1
);
10879 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_andsi
10880 : CODE_FOR_ccmp_iorsi
;
10885 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_anddi
10886 : CODE_FOR_ccmp_iordi
;
10894 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
10895 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
10901 *prep_seq
= get_insns ();
10904 target
= gen_rtx_REG (cc_mode
, CC_REGNUM
);
10905 cmp1
= gen_rtx_fmt_ee ((enum rtx_code
) cmp_code
, cmp_mode
, op0
, op1
);
10906 cmp0
= gen_rtx_fmt_ee (NE
, cmp_mode
, prev
, const0_rtx
);
10908 create_fixed_operand (&ops
[0], prev
);
10909 create_fixed_operand (&ops
[1], target
);
10910 create_fixed_operand (&ops
[2], op0
);
10911 create_fixed_operand (&ops
[3], op1
);
10912 create_fixed_operand (&ops
[4], cmp0
);
10913 create_fixed_operand (&ops
[5], cmp1
);
10915 push_to_sequence ((rtx_insn
*) *gen_seq
);
10916 if (!maybe_expand_insn (icode
, 6, ops
))
10922 *gen_seq
= get_insns ();
10928 #undef TARGET_GEN_CCMP_FIRST
10929 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
10931 #undef TARGET_GEN_CCMP_NEXT
10932 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
10934 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10935 instruction fusion of some sort. */
10938 aarch64_macro_fusion_p (void)
10940 return aarch64_tune_params
->fusible_ops
!= AARCH64_FUSE_NOTHING
;
10944 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10945 should be kept together during scheduling. */
10948 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
10951 rtx prev_set
= single_set (prev
);
10952 rtx curr_set
= single_set (curr
);
10953 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10954 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
10956 if (!aarch64_macro_fusion_p ())
10960 && (aarch64_tune_params
->fusible_ops
& AARCH64_FUSE_MOV_MOVK
))
10962 /* We are trying to match:
10963 prev (mov) == (set (reg r0) (const_int imm16))
10964 curr (movk) == (set (zero_extract (reg r0)
10967 (const_int imm16_1)) */
10969 set_dest
= SET_DEST (curr_set
);
10971 if (GET_CODE (set_dest
) == ZERO_EXTRACT
10972 && CONST_INT_P (SET_SRC (curr_set
))
10973 && CONST_INT_P (SET_SRC (prev_set
))
10974 && CONST_INT_P (XEXP (set_dest
, 2))
10975 && INTVAL (XEXP (set_dest
, 2)) == 16
10976 && REG_P (XEXP (set_dest
, 0))
10977 && REG_P (SET_DEST (prev_set
))
10978 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
10985 && (aarch64_tune_params
->fusible_ops
& AARCH64_FUSE_ADRP_ADD
))
10988 /* We're trying to match:
10989 prev (adrp) == (set (reg r1)
10990 (high (symbol_ref ("SYM"))))
10991 curr (add) == (set (reg r0)
10993 (symbol_ref ("SYM"))))
10994 Note that r0 need not necessarily be the same as r1, especially
10995 during pre-regalloc scheduling. */
10997 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
10998 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
11000 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
11001 && REG_P (XEXP (SET_SRC (curr_set
), 0))
11002 && REGNO (XEXP (SET_SRC (curr_set
), 0))
11003 == REGNO (SET_DEST (prev_set
))
11004 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
11005 XEXP (SET_SRC (curr_set
), 1)))
11011 && (aarch64_tune_params
->fusible_ops
& AARCH64_FUSE_MOVK_MOVK
))
11014 /* We're trying to match:
11015 prev (movk) == (set (zero_extract (reg r0)
11018 (const_int imm16_1))
11019 curr (movk) == (set (zero_extract (reg r0)
11022 (const_int imm16_2)) */
11024 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
11025 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
11026 && REG_P (XEXP (SET_DEST (prev_set
), 0))
11027 && REG_P (XEXP (SET_DEST (curr_set
), 0))
11028 && REGNO (XEXP (SET_DEST (prev_set
), 0))
11029 == REGNO (XEXP (SET_DEST (curr_set
), 0))
11030 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
11031 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
11032 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
11033 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
11034 && CONST_INT_P (SET_SRC (prev_set
))
11035 && CONST_INT_P (SET_SRC (curr_set
)))
11040 && (aarch64_tune_params
->fusible_ops
& AARCH64_FUSE_ADRP_LDR
))
11042 /* We're trying to match:
11043 prev (adrp) == (set (reg r0)
11044 (high (symbol_ref ("SYM"))))
11045 curr (ldr) == (set (reg r1)
11046 (mem (lo_sum (reg r0)
11047 (symbol_ref ("SYM")))))
11049 curr (ldr) == (set (reg r1)
11052 (symbol_ref ("SYM")))))) */
11053 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
11054 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
11056 rtx curr_src
= SET_SRC (curr_set
);
11058 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
11059 curr_src
= XEXP (curr_src
, 0);
11061 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
11062 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
11063 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
11064 == REGNO (SET_DEST (prev_set
))
11065 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
11066 XEXP (SET_SRC (prev_set
), 0)))
11071 if ((aarch64_tune_params
->fusible_ops
& AARCH64_FUSE_CMP_BRANCH
)
11072 && any_condjump_p (curr
))
11074 enum attr_type prev_type
= get_attr_type (prev
);
11076 /* FIXME: this misses some which is considered simple arthematic
11077 instructions for ThunderX. Simple shifts are missed here. */
11078 if (prev_type
== TYPE_ALUS_SREG
11079 || prev_type
== TYPE_ALUS_IMM
11080 || prev_type
== TYPE_LOGICS_REG
11081 || prev_type
== TYPE_LOGICS_IMM
)
11088 /* If MEM is in the form of [base+offset], extract the two parts
11089 of address and set to BASE and OFFSET, otherwise return false
11090 after clearing BASE and OFFSET. */
11093 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
11097 gcc_assert (MEM_P (mem
));
11099 addr
= XEXP (mem
, 0);
11104 *offset
= const0_rtx
;
11108 if (GET_CODE (addr
) == PLUS
11109 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
11111 *base
= XEXP (addr
, 0);
11112 *offset
= XEXP (addr
, 1);
11117 *offset
= NULL_RTX
;
11122 /* Types for scheduling fusion. */
11123 enum sched_fusion_type
11125 SCHED_FUSION_NONE
= 0,
11126 SCHED_FUSION_LD_SIGN_EXTEND
,
11127 SCHED_FUSION_LD_ZERO_EXTEND
,
11133 /* If INSN is a load or store of address in the form of [base+offset],
11134 extract the two parts and set to BASE and OFFSET. Return scheduling
11135 fusion type this INSN is. */
11137 static enum sched_fusion_type
11138 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
11141 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
11143 gcc_assert (INSN_P (insn
));
11144 x
= PATTERN (insn
);
11145 if (GET_CODE (x
) != SET
)
11146 return SCHED_FUSION_NONE
;
11149 dest
= SET_DEST (x
);
11151 if (GET_MODE (dest
) != SImode
&& GET_MODE (dest
) != DImode
11152 && GET_MODE (dest
) != SFmode
&& GET_MODE (dest
) != DFmode
)
11153 return SCHED_FUSION_NONE
;
11155 if (GET_CODE (src
) == SIGN_EXTEND
)
11157 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
11158 src
= XEXP (src
, 0);
11159 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
11160 return SCHED_FUSION_NONE
;
11162 else if (GET_CODE (src
) == ZERO_EXTEND
)
11164 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
11165 src
= XEXP (src
, 0);
11166 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
11167 return SCHED_FUSION_NONE
;
11170 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
11171 extract_base_offset_in_addr (src
, base
, offset
);
11172 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
11174 fusion
= SCHED_FUSION_ST
;
11175 extract_base_offset_in_addr (dest
, base
, offset
);
11178 return SCHED_FUSION_NONE
;
11180 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
11181 fusion
= SCHED_FUSION_NONE
;
11186 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
11188 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
11189 and PRI are only calculated for these instructions. For other instruction,
11190 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
11191 type instruction fusion can be added by returning different priorities.
11193 It's important that irrelevant instructions get the largest FUSION_PRI. */
11196 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
11197 int *fusion_pri
, int *pri
)
11201 enum sched_fusion_type fusion
;
11203 gcc_assert (INSN_P (insn
));
11206 fusion
= fusion_load_store (insn
, &base
, &offset
);
11207 if (fusion
== SCHED_FUSION_NONE
)
11214 /* Set FUSION_PRI according to fusion type and base register. */
11215 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
11217 /* Calculate PRI. */
11220 /* INSN with smaller offset goes first. */
11221 off_val
= (int)(INTVAL (offset
));
11223 tmp
-= (off_val
& 0xfffff);
11225 tmp
+= ((- off_val
) & 0xfffff);
11231 /* Given OPERANDS of consecutive load/store, check if we can merge
11232 them into ldp/stp. LOAD is true if they are load instructions.
11233 MODE is the mode of memory operands. */
11236 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
11237 enum machine_mode mode
)
11239 HOST_WIDE_INT offval_1
, offval_2
, msize
;
11240 enum reg_class rclass_1
, rclass_2
;
11241 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
11245 mem_1
= operands
[1];
11246 mem_2
= operands
[3];
11247 reg_1
= operands
[0];
11248 reg_2
= operands
[2];
11249 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
11250 if (REGNO (reg_1
) == REGNO (reg_2
))
11255 mem_1
= operands
[0];
11256 mem_2
= operands
[2];
11257 reg_1
= operands
[1];
11258 reg_2
= operands
[3];
11261 /* The mems cannot be volatile. */
11262 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
11265 /* Check if the addresses are in the form of [base+offset]. */
11266 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
11267 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
11269 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
11270 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
11273 /* Check if the bases are same. */
11274 if (!rtx_equal_p (base_1
, base_2
))
11277 offval_1
= INTVAL (offset_1
);
11278 offval_2
= INTVAL (offset_2
);
11279 msize
= GET_MODE_SIZE (mode
);
11280 /* Check if the offsets are consecutive. */
11281 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
11284 /* Check if the addresses are clobbered by load. */
11287 if (reg_mentioned_p (reg_1
, mem_1
))
11290 /* In increasing order, the last load can clobber the address. */
11291 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
11295 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
11296 rclass_1
= FP_REGS
;
11298 rclass_1
= GENERAL_REGS
;
11300 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
11301 rclass_2
= FP_REGS
;
11303 rclass_2
= GENERAL_REGS
;
11305 /* Check if the registers are of same class. */
11306 if (rclass_1
!= rclass_2
)
11312 /* Given OPERANDS of consecutive load/store, check if we can merge
11313 them into ldp/stp by adjusting the offset. LOAD is true if they
11314 are load instructions. MODE is the mode of memory operands.
11316 Given below consecutive stores:
11318 str w1, [xb, 0x100]
11319 str w1, [xb, 0x104]
11320 str w1, [xb, 0x108]
11321 str w1, [xb, 0x10c]
11323 Though the offsets are out of the range supported by stp, we can
11324 still pair them after adjusting the offset, like:
11326 add scratch, xb, 0x100
11327 stp w1, w1, [scratch]
11328 stp w1, w1, [scratch, 0x8]
11330 The peephole patterns detecting this opportunity should guarantee
11331 the scratch register is avaliable. */
11334 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
11335 enum machine_mode mode
)
11337 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
11338 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
11339 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
11340 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
11344 reg_1
= operands
[0];
11345 mem_1
= operands
[1];
11346 reg_2
= operands
[2];
11347 mem_2
= operands
[3];
11348 reg_3
= operands
[4];
11349 mem_3
= operands
[5];
11350 reg_4
= operands
[6];
11351 mem_4
= operands
[7];
11352 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
11353 && REG_P (reg_3
) && REG_P (reg_4
));
11354 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
11359 mem_1
= operands
[0];
11360 reg_1
= operands
[1];
11361 mem_2
= operands
[2];
11362 reg_2
= operands
[3];
11363 mem_3
= operands
[4];
11364 reg_3
= operands
[5];
11365 mem_4
= operands
[6];
11366 reg_4
= operands
[7];
11368 /* Skip if memory operand is by itslef valid for ldp/stp. */
11369 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
11372 /* The mems cannot be volatile. */
11373 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
11374 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
11377 /* Check if the addresses are in the form of [base+offset]. */
11378 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
11379 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
11381 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
11382 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
11384 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
11385 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
11387 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
11388 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
11391 /* Check if the bases are same. */
11392 if (!rtx_equal_p (base_1
, base_2
)
11393 || !rtx_equal_p (base_2
, base_3
)
11394 || !rtx_equal_p (base_3
, base_4
))
11397 offval_1
= INTVAL (offset_1
);
11398 offval_2
= INTVAL (offset_2
);
11399 offval_3
= INTVAL (offset_3
);
11400 offval_4
= INTVAL (offset_4
);
11401 msize
= GET_MODE_SIZE (mode
);
11402 /* Check if the offsets are consecutive. */
11403 if ((offval_1
!= (offval_2
+ msize
)
11404 || offval_1
!= (offval_3
+ msize
* 2)
11405 || offval_1
!= (offval_4
+ msize
* 3))
11406 && (offval_4
!= (offval_3
+ msize
)
11407 || offval_4
!= (offval_2
+ msize
* 2)
11408 || offval_4
!= (offval_1
+ msize
* 3)))
11411 /* Check if the addresses are clobbered by load. */
11414 if (reg_mentioned_p (reg_1
, mem_1
)
11415 || reg_mentioned_p (reg_2
, mem_2
)
11416 || reg_mentioned_p (reg_3
, mem_3
))
11419 /* In increasing order, the last load can clobber the address. */
11420 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
11424 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
11425 rclass_1
= FP_REGS
;
11427 rclass_1
= GENERAL_REGS
;
11429 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
11430 rclass_2
= FP_REGS
;
11432 rclass_2
= GENERAL_REGS
;
11434 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
11435 rclass_3
= FP_REGS
;
11437 rclass_3
= GENERAL_REGS
;
11439 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
11440 rclass_4
= FP_REGS
;
11442 rclass_4
= GENERAL_REGS
;
11444 /* Check if the registers are of same class. */
11445 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
11451 /* Given OPERANDS of consecutive load/store, this function pairs them
11452 into ldp/stp after adjusting the offset. It depends on the fact
11453 that addresses of load/store instructions are in increasing order.
11454 MODE is the mode of memory operands. CODE is the rtl operator
11455 which should be applied to all memory operands, it's SIGN_EXTEND,
11456 ZERO_EXTEND or UNKNOWN. */
11459 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
11460 enum machine_mode mode
, RTX_CODE code
)
11462 rtx base
, offset
, t1
, t2
;
11463 rtx mem_1
, mem_2
, mem_3
, mem_4
;
11464 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
11468 mem_1
= operands
[1];
11469 mem_2
= operands
[3];
11470 mem_3
= operands
[5];
11471 mem_4
= operands
[7];
11475 mem_1
= operands
[0];
11476 mem_2
= operands
[2];
11477 mem_3
= operands
[4];
11478 mem_4
= operands
[6];
11479 gcc_assert (code
== UNKNOWN
);
11482 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
11483 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
11485 /* Adjust offset thus it can fit in ldp/stp instruction. */
11486 msize
= GET_MODE_SIZE (mode
);
11487 stp_off_limit
= msize
* 0x40;
11488 off_val
= INTVAL (offset
);
11489 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
11490 new_off
= abs_off
% stp_off_limit
;
11491 adj_off
= abs_off
- new_off
;
11493 /* Further adjust to make sure all offsets are OK. */
11494 if ((new_off
+ msize
* 2) >= stp_off_limit
)
11496 adj_off
+= stp_off_limit
;
11497 new_off
-= stp_off_limit
;
11500 /* Make sure the adjustment can be done with ADD/SUB instructions. */
11501 if (adj_off
>= 0x1000)
11506 adj_off
= -adj_off
;
11507 new_off
= -new_off
;
11510 /* Create new memory references. */
11511 mem_1
= change_address (mem_1
, VOIDmode
,
11512 plus_constant (DImode
, operands
[8], new_off
));
11514 /* Check if the adjusted address is OK for ldp/stp. */
11515 if (!aarch64_mem_pair_operand (mem_1
, mode
))
11518 msize
= GET_MODE_SIZE (mode
);
11519 mem_2
= change_address (mem_2
, VOIDmode
,
11520 plus_constant (DImode
,
11523 mem_3
= change_address (mem_3
, VOIDmode
,
11524 plus_constant (DImode
,
11526 new_off
+ msize
* 2));
11527 mem_4
= change_address (mem_4
, VOIDmode
,
11528 plus_constant (DImode
,
11530 new_off
+ msize
* 3));
11532 if (code
== ZERO_EXTEND
)
11534 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
11535 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
11536 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
11537 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
11539 else if (code
== SIGN_EXTEND
)
11541 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
11542 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
11543 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
11544 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
11549 operands
[1] = mem_1
;
11550 operands
[3] = mem_2
;
11551 operands
[5] = mem_3
;
11552 operands
[7] = mem_4
;
11556 operands
[0] = mem_1
;
11557 operands
[2] = mem_2
;
11558 operands
[4] = mem_3
;
11559 operands
[6] = mem_4
;
11562 /* Emit adjusting instruction. */
11563 emit_insn (gen_rtx_SET (operands
[8], plus_constant (DImode
, base
, adj_off
)));
11564 /* Emit ldp/stp instructions. */
11565 t1
= gen_rtx_SET (operands
[0], operands
[1]);
11566 t2
= gen_rtx_SET (operands
[2], operands
[3]);
11567 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
11568 t1
= gen_rtx_SET (operands
[4], operands
[5]);
11569 t2
= gen_rtx_SET (operands
[6], operands
[7]);
11570 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
11574 #undef TARGET_ADDRESS_COST
11575 #define TARGET_ADDRESS_COST aarch64_address_cost
11577 /* This hook will determines whether unnamed bitfields affect the alignment
11578 of the containing structure. The hook returns true if the structure
11579 should inherit the alignment requirements of an unnamed bitfield's
11581 #undef TARGET_ALIGN_ANON_BITFIELD
11582 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11584 #undef TARGET_ASM_ALIGNED_DI_OP
11585 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11587 #undef TARGET_ASM_ALIGNED_HI_OP
11588 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11590 #undef TARGET_ASM_ALIGNED_SI_OP
11591 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11593 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11594 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11595 hook_bool_const_tree_hwi_hwi_const_tree_true
11597 #undef TARGET_ASM_FILE_START
11598 #define TARGET_ASM_FILE_START aarch64_start_file
11600 #undef TARGET_ASM_OUTPUT_MI_THUNK
11601 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11603 #undef TARGET_ASM_SELECT_RTX_SECTION
11604 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11606 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11607 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11609 #undef TARGET_BUILD_BUILTIN_VA_LIST
11610 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11612 #undef TARGET_CALLEE_COPIES
11613 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11615 #undef TARGET_CAN_ELIMINATE
11616 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11618 #undef TARGET_CANNOT_FORCE_CONST_MEM
11619 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11621 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11622 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11624 /* Only the least significant bit is used for initialization guard
11626 #undef TARGET_CXX_GUARD_MASK_BIT
11627 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11629 #undef TARGET_C_MODE_FOR_SUFFIX
11630 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11632 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11633 #undef TARGET_DEFAULT_TARGET_FLAGS
11634 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11637 #undef TARGET_CLASS_MAX_NREGS
11638 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11640 #undef TARGET_BUILTIN_DECL
11641 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11643 #undef TARGET_EXPAND_BUILTIN
11644 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11646 #undef TARGET_EXPAND_BUILTIN_VA_START
11647 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11649 #undef TARGET_FOLD_BUILTIN
11650 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11652 #undef TARGET_FUNCTION_ARG
11653 #define TARGET_FUNCTION_ARG aarch64_function_arg
11655 #undef TARGET_FUNCTION_ARG_ADVANCE
11656 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11658 #undef TARGET_FUNCTION_ARG_BOUNDARY
11659 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11661 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11662 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11664 #undef TARGET_FUNCTION_VALUE
11665 #define TARGET_FUNCTION_VALUE aarch64_function_value
11667 #undef TARGET_FUNCTION_VALUE_REGNO_P
11668 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11670 #undef TARGET_FRAME_POINTER_REQUIRED
11671 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11673 #undef TARGET_GIMPLE_FOLD_BUILTIN
11674 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11676 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11677 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11679 #undef TARGET_INIT_BUILTINS
11680 #define TARGET_INIT_BUILTINS aarch64_init_builtins
11682 #undef TARGET_LEGITIMATE_ADDRESS_P
11683 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11685 #undef TARGET_LEGITIMATE_CONSTANT_P
11686 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11688 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11689 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11691 #undef TARGET_LRA_P
11692 #define TARGET_LRA_P hook_bool_void_true
11694 #undef TARGET_MANGLE_TYPE
11695 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11697 #undef TARGET_MEMORY_MOVE_COST
11698 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11700 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11701 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11703 #undef TARGET_MUST_PASS_IN_STACK
11704 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11706 /* This target hook should return true if accesses to volatile bitfields
11707 should use the narrowest mode possible. It should return false if these
11708 accesses should use the bitfield container type. */
11709 #undef TARGET_NARROW_VOLATILE_BITFIELD
11710 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11712 #undef TARGET_OPTION_OVERRIDE
11713 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11715 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11716 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11717 aarch64_override_options_after_change
11719 #undef TARGET_PASS_BY_REFERENCE
11720 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11722 #undef TARGET_PREFERRED_RELOAD_CLASS
11723 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11725 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11726 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11728 #undef TARGET_SECONDARY_RELOAD
11729 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11731 #undef TARGET_SHIFT_TRUNCATION_MASK
11732 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11734 #undef TARGET_SETUP_INCOMING_VARARGS
11735 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11737 #undef TARGET_STRUCT_VALUE_RTX
11738 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11740 #undef TARGET_REGISTER_MOVE_COST
11741 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11743 #undef TARGET_RETURN_IN_MEMORY
11744 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11746 #undef TARGET_RETURN_IN_MSB
11747 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11749 #undef TARGET_RTX_COSTS
11750 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11752 #undef TARGET_SCHED_ISSUE_RATE
11753 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11755 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11756 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
11757 aarch64_sched_first_cycle_multipass_dfa_lookahead
11759 #undef TARGET_TRAMPOLINE_INIT
11760 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11762 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11763 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11765 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11766 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11768 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11769 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11771 #undef TARGET_VECTORIZE_ADD_STMT_COST
11772 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11774 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11775 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11776 aarch64_builtin_vectorization_cost
11778 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11779 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11781 #undef TARGET_VECTORIZE_BUILTINS
11782 #define TARGET_VECTORIZE_BUILTINS
11784 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11785 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11786 aarch64_builtin_vectorized_function
11788 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11789 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11790 aarch64_autovectorize_vector_sizes
11792 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11793 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11794 aarch64_atomic_assign_expand_fenv
11796 /* Section anchor support. */
11798 #undef TARGET_MIN_ANCHOR_OFFSET
11799 #define TARGET_MIN_ANCHOR_OFFSET -256
11801 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11802 byte offset; we can do much more for larger data types, but have no way
11803 to determine the size of the access. We assume accesses are aligned. */
11804 #undef TARGET_MAX_ANCHOR_OFFSET
11805 #define TARGET_MAX_ANCHOR_OFFSET 4095
11807 #undef TARGET_VECTOR_ALIGNMENT
11808 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11810 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11811 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11812 aarch64_simd_vector_alignment_reachable
11814 /* vec_perm support. */
11816 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11817 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11818 aarch64_vectorize_vec_perm_const_ok
11821 #undef TARGET_FIXED_CONDITION_CODE_REGS
11822 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11824 #undef TARGET_FLAGS_REGNUM
11825 #define TARGET_FLAGS_REGNUM CC_REGNUM
11827 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11828 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11830 #undef TARGET_ASAN_SHADOW_OFFSET
11831 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11833 #undef TARGET_LEGITIMIZE_ADDRESS
11834 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11836 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11837 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11838 aarch64_use_by_pieces_infrastructure_p
11840 #undef TARGET_CAN_USE_DOLOOP_P
11841 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11843 #undef TARGET_SCHED_MACRO_FUSION_P
11844 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11846 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11847 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11849 #undef TARGET_SCHED_FUSION_PRIORITY
11850 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11852 struct gcc_target targetm
= TARGET_INITIALIZER
;
11854 #include "gt-aarch64.h"