1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "double-int.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "stor-layout.h"
44 #include "dominance.h"
50 #include "cfgcleanup.h"
52 #include "basic-block.h"
54 #include "hard-reg-set.h"
60 #include "target-def.h"
61 #include "targhooks.h"
67 #include "langhooks.h"
68 #include "diagnostic-core.h"
69 #include "hash-table.h"
70 #include "tree-ssa-alias.h"
71 #include "internal-fn.h"
72 #include "gimple-fold.h"
74 #include "gimple-expr.h"
81 #include "tree-vectorizer.h"
82 #include "aarch64-cost-tables.h"
86 #include "tm-constrs.h"
88 /* Defined for convenience. */
89 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
91 /* Classifies an address.
94 A simple base register plus immediate offset.
97 A base register indexed by immediate offset with writeback.
100 A base register indexed by (optionally scaled) register.
103 A base register indexed by (optionally scaled) zero-extended register.
106 A base register indexed by (optionally scaled) sign-extended register.
109 A LO_SUM rtx with a base register and "LO12" symbol relocation.
112 A constant symbolic address, in pc-relative literal pool. */
114 enum aarch64_address_type
{
124 struct aarch64_address_info
{
125 enum aarch64_address_type type
;
129 enum aarch64_symbol_type symbol_type
;
132 struct simd_immediate_info
141 /* The current code model. */
142 enum aarch64_code_model aarch64_cmodel
;
145 #undef TARGET_HAVE_TLS
146 #define TARGET_HAVE_TLS 1
149 static bool aarch64_lra_p (void);
150 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
151 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
153 machine_mode
*, int *,
155 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
156 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
157 static void aarch64_override_options_after_change (void);
158 static bool aarch64_vector_mode_supported_p (machine_mode
);
159 static unsigned bit_count (unsigned HOST_WIDE_INT
);
160 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
161 const unsigned char *sel
);
162 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
164 /* Major revision number of the ARM Architecture implemented by the target. */
165 unsigned aarch64_architecture_version
;
167 /* The processor for which instructions should be scheduled. */
168 enum aarch64_processor aarch64_tune
= cortexa53
;
170 /* The current tuning set. */
171 const struct tune_params
*aarch64_tune_params
;
173 /* Mask to specify which instructions we are allowed to generate. */
174 unsigned long aarch64_isa_flags
= 0;
176 /* Mask to specify which instruction scheduling options should be used. */
177 unsigned long aarch64_tune_flags
= 0;
179 /* Tuning parameters. */
181 #if HAVE_DESIGNATED_INITIALIZERS
182 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
184 #define NAMED_PARAM(NAME, VAL) (VAL)
187 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
191 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
194 static const struct cpu_addrcost_table generic_addrcost_table
=
196 #if HAVE_DESIGNATED_INITIALIZERS
205 NAMED_PARAM (pre_modify
, 0),
206 NAMED_PARAM (post_modify
, 0),
207 NAMED_PARAM (register_offset
, 0),
208 NAMED_PARAM (register_extend
, 0),
209 NAMED_PARAM (imm_offset
, 0)
212 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
215 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
217 #if HAVE_DESIGNATED_INITIALIZERS
226 NAMED_PARAM (pre_modify
, 0),
227 NAMED_PARAM (post_modify
, 0),
228 NAMED_PARAM (register_offset
, 0),
229 NAMED_PARAM (register_extend
, 0),
230 NAMED_PARAM (imm_offset
, 0),
233 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
236 static const struct cpu_regmove_cost generic_regmove_cost
=
238 NAMED_PARAM (GP2GP
, 1),
239 /* Avoid the use of slow int<->fp moves for spilling by setting
240 their cost higher than memmov_cost. */
241 NAMED_PARAM (GP2FP
, 5),
242 NAMED_PARAM (FP2GP
, 5),
243 NAMED_PARAM (FP2FP
, 2)
246 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
248 NAMED_PARAM (GP2GP
, 1),
249 /* Avoid the use of slow int<->fp moves for spilling by setting
250 their cost higher than memmov_cost. */
251 NAMED_PARAM (GP2FP
, 5),
252 NAMED_PARAM (FP2GP
, 5),
253 NAMED_PARAM (FP2FP
, 2)
256 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
258 NAMED_PARAM (GP2GP
, 1),
259 /* Avoid the use of slow int<->fp moves for spilling by setting
260 their cost higher than memmov_cost. */
261 NAMED_PARAM (GP2FP
, 5),
262 NAMED_PARAM (FP2GP
, 5),
263 NAMED_PARAM (FP2FP
, 2)
266 static const struct cpu_regmove_cost thunderx_regmove_cost
=
268 NAMED_PARAM (GP2GP
, 2),
269 NAMED_PARAM (GP2FP
, 2),
270 NAMED_PARAM (FP2GP
, 6),
271 NAMED_PARAM (FP2FP
, 4)
274 /* Generic costs for vector insn classes. */
275 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
278 static const struct cpu_vector_cost generic_vector_cost
=
280 NAMED_PARAM (scalar_stmt_cost
, 1),
281 NAMED_PARAM (scalar_load_cost
, 1),
282 NAMED_PARAM (scalar_store_cost
, 1),
283 NAMED_PARAM (vec_stmt_cost
, 1),
284 NAMED_PARAM (vec_to_scalar_cost
, 1),
285 NAMED_PARAM (scalar_to_vec_cost
, 1),
286 NAMED_PARAM (vec_align_load_cost
, 1),
287 NAMED_PARAM (vec_unalign_load_cost
, 1),
288 NAMED_PARAM (vec_unalign_store_cost
, 1),
289 NAMED_PARAM (vec_store_cost
, 1),
290 NAMED_PARAM (cond_taken_branch_cost
, 3),
291 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
294 /* Generic costs for vector insn classes. */
295 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
298 static const struct cpu_vector_cost cortexa57_vector_cost
=
300 NAMED_PARAM (scalar_stmt_cost
, 1),
301 NAMED_PARAM (scalar_load_cost
, 4),
302 NAMED_PARAM (scalar_store_cost
, 1),
303 NAMED_PARAM (vec_stmt_cost
, 3),
304 NAMED_PARAM (vec_to_scalar_cost
, 8),
305 NAMED_PARAM (scalar_to_vec_cost
, 8),
306 NAMED_PARAM (vec_align_load_cost
, 5),
307 NAMED_PARAM (vec_unalign_load_cost
, 5),
308 NAMED_PARAM (vec_unalign_store_cost
, 1),
309 NAMED_PARAM (vec_store_cost
, 1),
310 NAMED_PARAM (cond_taken_branch_cost
, 1),
311 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
314 #define AARCH64_FUSE_NOTHING (0)
315 #define AARCH64_FUSE_MOV_MOVK (1 << 0)
316 #define AARCH64_FUSE_ADRP_ADD (1 << 1)
317 #define AARCH64_FUSE_MOVK_MOVK (1 << 2)
318 #define AARCH64_FUSE_ADRP_LDR (1 << 3)
319 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
321 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
324 static const struct tune_params generic_tunings
=
326 &cortexa57_extra_costs
,
327 &generic_addrcost_table
,
328 &generic_regmove_cost
,
329 &generic_vector_cost
,
330 NAMED_PARAM (memmov_cost
, 4),
331 NAMED_PARAM (issue_rate
, 2),
332 NAMED_PARAM (fuseable_ops
, AARCH64_FUSE_NOTHING
),
333 8, /* function_align. */
336 2, /* int_reassoc_width. */
337 4, /* fp_reassoc_width. */
338 1 /* vec_reassoc_width. */
341 static const struct tune_params cortexa53_tunings
=
343 &cortexa53_extra_costs
,
344 &generic_addrcost_table
,
345 &cortexa53_regmove_cost
,
346 &generic_vector_cost
,
347 NAMED_PARAM (memmov_cost
, 4),
348 NAMED_PARAM (issue_rate
, 2),
349 NAMED_PARAM (fuseable_ops
, (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
350 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
)),
351 8, /* function_align. */
354 2, /* int_reassoc_width. */
355 4, /* fp_reassoc_width. */
356 1 /* vec_reassoc_width. */
359 static const struct tune_params cortexa57_tunings
=
361 &cortexa57_extra_costs
,
362 &cortexa57_addrcost_table
,
363 &cortexa57_regmove_cost
,
364 &cortexa57_vector_cost
,
365 NAMED_PARAM (memmov_cost
, 4),
366 NAMED_PARAM (issue_rate
, 3),
367 NAMED_PARAM (fuseable_ops
, (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK
)),
368 16, /* function_align. */
371 2, /* int_reassoc_width. */
372 4, /* fp_reassoc_width. */
373 1 /* vec_reassoc_width. */
376 static const struct tune_params thunderx_tunings
=
378 &thunderx_extra_costs
,
379 &generic_addrcost_table
,
380 &thunderx_regmove_cost
,
381 &generic_vector_cost
,
382 NAMED_PARAM (memmov_cost
, 6),
383 NAMED_PARAM (issue_rate
, 2),
384 NAMED_PARAM (fuseable_ops
, AARCH64_FUSE_CMP_BRANCH
),
385 8, /* function_align. */
388 2, /* int_reassoc_width. */
389 4, /* fp_reassoc_width. */
390 1 /* vec_reassoc_width. */
393 /* A processor implementing AArch64. */
396 const char *const name
;
397 enum aarch64_processor core
;
399 unsigned architecture_version
;
400 const unsigned long flags
;
401 const struct tune_params
*const tune
;
404 /* Processor cores implementing AArch64. */
405 static const struct processor all_cores
[] =
407 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
408 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
409 #include "aarch64-cores.def"
411 {"generic", cortexa53
, "8", 8, AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
412 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
415 /* Architectures implementing AArch64. */
416 static const struct processor all_architectures
[] =
418 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
419 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
420 #include "aarch64-arches.def"
422 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
425 /* Target specification. These are populated as commandline arguments
426 are processed, or NULL if not specified. */
427 static const struct processor
*selected_arch
;
428 static const struct processor
*selected_cpu
;
429 static const struct processor
*selected_tune
;
431 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
433 /* An ISA extension in the co-processor and main instruction set space. */
434 struct aarch64_option_extension
436 const char *const name
;
437 const unsigned long flags_on
;
438 const unsigned long flags_off
;
441 /* ISA extensions in AArch64. */
442 static const struct aarch64_option_extension all_extensions
[] =
444 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
445 {NAME, FLAGS_ON, FLAGS_OFF},
446 #include "aarch64-option-extensions.def"
447 #undef AARCH64_OPT_EXTENSION
451 /* Used to track the size of an address when generating a pre/post
452 increment address. */
453 static machine_mode aarch64_memory_reference_mode
;
455 /* Used to force GTY into this file. */
456 static GTY(()) int gty_dummy
;
458 /* A table of valid AArch64 "bitmask immediate" values for
459 logical instructions. */
461 #define AARCH64_NUM_BITMASKS 5334
462 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
464 typedef enum aarch64_cond_code
466 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
467 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
468 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
472 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
474 /* The condition codes of the processor, and the inverse function. */
475 static const char * const aarch64_condition_codes
[] =
477 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
478 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
482 aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED
)
488 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
489 enum machine_mode mode
)
491 if (VECTOR_MODE_P (mode
))
492 return aarch64_tune_params
->vec_reassoc_width
;
493 if (INTEGRAL_MODE_P (mode
))
494 return aarch64_tune_params
->int_reassoc_width
;
495 if (FLOAT_MODE_P (mode
))
496 return aarch64_tune_params
->fp_reassoc_width
;
500 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
502 aarch64_dbx_register_number (unsigned regno
)
504 if (GP_REGNUM_P (regno
))
505 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
506 else if (regno
== SP_REGNUM
)
507 return AARCH64_DWARF_SP
;
508 else if (FP_REGNUM_P (regno
))
509 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
511 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
512 equivalent DWARF register. */
513 return DWARF_FRAME_REGISTERS
;
516 /* Return TRUE if MODE is any of the large INT modes. */
518 aarch64_vect_struct_mode_p (machine_mode mode
)
520 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
523 /* Return TRUE if MODE is any of the vector modes. */
525 aarch64_vector_mode_p (machine_mode mode
)
527 return aarch64_vector_mode_supported_p (mode
)
528 || aarch64_vect_struct_mode_p (mode
);
531 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
533 aarch64_array_mode_supported_p (machine_mode mode
,
534 unsigned HOST_WIDE_INT nelems
)
537 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
538 && (nelems
>= 2 && nelems
<= 4))
544 /* Implement HARD_REGNO_NREGS. */
547 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
549 switch (aarch64_regno_regclass (regno
))
553 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
555 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
560 /* Implement HARD_REGNO_MODE_OK. */
563 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
565 if (GET_MODE_CLASS (mode
) == MODE_CC
)
566 return regno
== CC_REGNUM
;
568 if (regno
== SP_REGNUM
)
569 /* The purpose of comparing with ptr_mode is to support the
570 global register variable associated with the stack pointer
571 register via the syntax of asm ("wsp") in ILP32. */
572 return mode
== Pmode
|| mode
== ptr_mode
;
574 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
575 return mode
== Pmode
;
577 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
580 if (FP_REGNUM_P (regno
))
582 if (aarch64_vect_struct_mode_p (mode
))
584 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
592 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
594 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
597 /* Handle modes that fit within single registers. */
598 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
600 if (GET_MODE_SIZE (mode
) >= 4)
605 /* Fall back to generic for multi-reg and very large modes. */
607 return choose_hard_reg_mode (regno
, nregs
, false);
610 /* Return true if calls to DECL should be treated as
611 long-calls (ie called via a register). */
613 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
618 /* Return true if calls to symbol-ref SYM should be treated as
619 long-calls (ie called via a register). */
621 aarch64_is_long_call_p (rtx sym
)
623 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
626 /* Return true if the offsets to a zero/sign-extract operation
627 represent an expression that matches an extend operation. The
628 operands represent the paramters from
630 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
632 aarch64_is_extend_from_extract (machine_mode mode
, rtx mult_imm
,
635 HOST_WIDE_INT mult_val
, extract_val
;
637 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
640 mult_val
= INTVAL (mult_imm
);
641 extract_val
= INTVAL (extract_imm
);
644 && extract_val
< GET_MODE_BITSIZE (mode
)
645 && exact_log2 (extract_val
& ~7) > 0
646 && (extract_val
& 7) <= 4
647 && mult_val
== (1 << (extract_val
& 7)))
653 /* Emit an insn that's a simple single-set. Both the operands must be
654 known to be valid. */
656 emit_set_insn (rtx x
, rtx y
)
658 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
661 /* X and Y are two things to compare using CODE. Emit the compare insn and
662 return the rtx for register 0 in the proper mode. */
664 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
666 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
667 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
669 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
673 /* Build the SYMBOL_REF for __tls_get_addr. */
675 static GTY(()) rtx tls_get_addr_libfunc
;
678 aarch64_tls_get_addr (void)
680 if (!tls_get_addr_libfunc
)
681 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
682 return tls_get_addr_libfunc
;
685 /* Return the TLS model to use for ADDR. */
687 static enum tls_model
688 tls_symbolic_operand_type (rtx addr
)
690 enum tls_model tls_kind
= TLS_MODEL_NONE
;
693 if (GET_CODE (addr
) == CONST
)
695 split_const (addr
, &sym
, &addend
);
696 if (GET_CODE (sym
) == SYMBOL_REF
)
697 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
699 else if (GET_CODE (addr
) == SYMBOL_REF
)
700 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
705 /* We'll allow lo_sum's in addresses in our legitimate addresses
706 so that combine would take care of combining addresses where
707 necessary, but for generation purposes, we'll generate the address
710 tmp = hi (symbol_ref); adrp x1, foo
711 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
715 adrp x1, :got:foo adrp tmp, :tlsgd:foo
716 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
720 Load TLS symbol, depending on TLS mechanism and TLS access model.
722 Global Dynamic - Traditional TLS:
724 add dest, tmp, #:tlsgd_lo12:imm
727 Global Dynamic - TLS Descriptors:
728 adrp dest, :tlsdesc:imm
729 ldr tmp, [dest, #:tlsdesc_lo12:imm]
730 add dest, dest, #:tlsdesc_lo12:imm
737 adrp tmp, :gottprel:imm
738 ldr dest, [tmp, #:gottprel_lo12:imm]
743 add t0, tp, #:tprel_hi12:imm
744 add t0, #:tprel_lo12_nc:imm
748 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
749 enum aarch64_symbol_type type
)
753 case SYMBOL_SMALL_ABSOLUTE
:
755 /* In ILP32, the mode of dest can be either SImode or DImode. */
757 machine_mode mode
= GET_MODE (dest
);
759 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
761 if (can_create_pseudo_p ())
762 tmp_reg
= gen_reg_rtx (mode
);
764 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
765 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
769 case SYMBOL_TINY_ABSOLUTE
:
770 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
773 case SYMBOL_SMALL_GOT
:
775 /* In ILP32, the mode of dest can be either SImode or DImode,
776 while the got entry is always of SImode size. The mode of
777 dest depends on how dest is used: if dest is assigned to a
778 pointer (e.g. in the memory), it has SImode; it may have
779 DImode if dest is dereferenced to access the memeory.
780 This is why we have to handle three different ldr_got_small
781 patterns here (two patterns for ILP32). */
783 machine_mode mode
= GET_MODE (dest
);
785 if (can_create_pseudo_p ())
786 tmp_reg
= gen_reg_rtx (mode
);
788 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
789 if (mode
== ptr_mode
)
792 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
794 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
798 gcc_assert (mode
== Pmode
);
799 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
805 case SYMBOL_SMALL_TLSGD
:
808 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
811 aarch64_emit_call_insn (gen_tlsgd_small (result
, imm
));
812 insns
= get_insns ();
815 RTL_CONST_CALL_P (insns
) = 1;
816 emit_libcall_block (insns
, dest
, result
, imm
);
820 case SYMBOL_SMALL_TLSDESC
:
822 machine_mode mode
= GET_MODE (dest
);
823 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
826 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
828 /* In ILP32, the got entry is always of SImode size. Unlike
829 small GOT, the dest is fixed at reg 0. */
831 emit_insn (gen_tlsdesc_small_si (imm
));
833 emit_insn (gen_tlsdesc_small_di (imm
));
834 tp
= aarch64_load_tp (NULL
);
837 tp
= gen_lowpart (mode
, tp
);
839 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
840 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
844 case SYMBOL_SMALL_GOTTPREL
:
846 /* In ILP32, the mode of dest can be either SImode or DImode,
847 while the got entry is always of SImode size. The mode of
848 dest depends on how dest is used: if dest is assigned to a
849 pointer (e.g. in the memory), it has SImode; it may have
850 DImode if dest is dereferenced to access the memeory.
851 This is why we have to handle three different tlsie_small
852 patterns here (two patterns for ILP32). */
853 machine_mode mode
= GET_MODE (dest
);
854 rtx tmp_reg
= gen_reg_rtx (mode
);
855 rtx tp
= aarch64_load_tp (NULL
);
857 if (mode
== ptr_mode
)
860 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
863 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
864 tp
= gen_lowpart (mode
, tp
);
869 gcc_assert (mode
== Pmode
);
870 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
873 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
874 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
878 case SYMBOL_SMALL_TPREL
:
880 rtx tp
= aarch64_load_tp (NULL
);
881 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
882 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
886 case SYMBOL_TINY_GOT
:
887 emit_insn (gen_ldr_got_tiny (dest
, imm
));
895 /* Emit a move from SRC to DEST. Assume that the move expanders can
896 handle all moves if !can_create_pseudo_p (). The distinction is
897 important because, unlike emit_move_insn, the move expanders know
898 how to force Pmode objects into the constant pool even when the
899 constant pool address is not itself legitimate. */
901 aarch64_emit_move (rtx dest
, rtx src
)
903 return (can_create_pseudo_p ()
904 ? emit_move_insn (dest
, src
)
905 : emit_move_insn_1 (dest
, src
));
908 /* Split a 128-bit move operation into two 64-bit move operations,
909 taking care to handle partial overlap of register to register
910 copies. Special cases are needed when moving between GP regs and
911 FP regs. SRC can be a register, constant or memory; DST a register
912 or memory. If either operand is memory it must not have any side
915 aarch64_split_128bit_move (rtx dst
, rtx src
)
920 machine_mode mode
= GET_MODE (dst
);
922 gcc_assert (mode
== TImode
|| mode
== TFmode
);
923 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
924 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
926 if (REG_P (dst
) && REG_P (src
))
928 int src_regno
= REGNO (src
);
929 int dst_regno
= REGNO (dst
);
931 /* Handle FP <-> GP regs. */
932 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
934 src_lo
= gen_lowpart (word_mode
, src
);
935 src_hi
= gen_highpart (word_mode
, src
);
939 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
940 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
944 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
945 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
949 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
951 dst_lo
= gen_lowpart (word_mode
, dst
);
952 dst_hi
= gen_highpart (word_mode
, dst
);
956 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
957 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
961 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
962 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
968 dst_lo
= gen_lowpart (word_mode
, dst
);
969 dst_hi
= gen_highpart (word_mode
, dst
);
970 src_lo
= gen_lowpart (word_mode
, src
);
971 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
973 /* At most one pairing may overlap. */
974 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
976 aarch64_emit_move (dst_hi
, src_hi
);
977 aarch64_emit_move (dst_lo
, src_lo
);
981 aarch64_emit_move (dst_lo
, src_lo
);
982 aarch64_emit_move (dst_hi
, src_hi
);
987 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
989 return (! REG_P (src
)
990 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
993 /* Split a complex SIMD combine. */
996 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
998 machine_mode src_mode
= GET_MODE (src1
);
999 machine_mode dst_mode
= GET_MODE (dst
);
1001 gcc_assert (VECTOR_MODE_P (dst_mode
));
1003 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
1005 rtx (*gen
) (rtx
, rtx
, rtx
);
1010 gen
= gen_aarch64_simd_combinev8qi
;
1013 gen
= gen_aarch64_simd_combinev4hi
;
1016 gen
= gen_aarch64_simd_combinev2si
;
1019 gen
= gen_aarch64_simd_combinev2sf
;
1022 gen
= gen_aarch64_simd_combinedi
;
1025 gen
= gen_aarch64_simd_combinedf
;
1031 emit_insn (gen (dst
, src1
, src2
));
1036 /* Split a complex SIMD move. */
1039 aarch64_split_simd_move (rtx dst
, rtx src
)
1041 machine_mode src_mode
= GET_MODE (src
);
1042 machine_mode dst_mode
= GET_MODE (dst
);
1044 gcc_assert (VECTOR_MODE_P (dst_mode
));
1046 if (REG_P (dst
) && REG_P (src
))
1048 rtx (*gen
) (rtx
, rtx
);
1050 gcc_assert (VECTOR_MODE_P (src_mode
));
1055 gen
= gen_aarch64_split_simd_movv16qi
;
1058 gen
= gen_aarch64_split_simd_movv8hi
;
1061 gen
= gen_aarch64_split_simd_movv4si
;
1064 gen
= gen_aarch64_split_simd_movv2di
;
1067 gen
= gen_aarch64_split_simd_movv4sf
;
1070 gen
= gen_aarch64_split_simd_movv2df
;
1076 emit_insn (gen (dst
, src
));
1082 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1084 if (can_create_pseudo_p ())
1085 return force_reg (mode
, value
);
1088 x
= aarch64_emit_move (x
, value
);
1095 aarch64_add_offset (machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
1097 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1100 /* Load the full offset into a register. This
1101 might be improvable in the future. */
1102 high
= GEN_INT (offset
);
1104 high
= aarch64_force_temporary (mode
, temp
, high
);
1105 reg
= aarch64_force_temporary (mode
, temp
,
1106 gen_rtx_PLUS (mode
, high
, reg
));
1108 return plus_constant (mode
, reg
, offset
);
1112 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1115 unsigned HOST_WIDE_INT mask
;
1118 unsigned HOST_WIDE_INT val
;
1121 int one_match
, zero_match
, first_not_ffff_match
;
1124 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1127 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1134 /* We know we can't do this in 1 insn, and we must be able to do it
1135 in two; so don't mess around looking for sequences that don't buy
1139 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1140 GEN_INT (INTVAL (imm
) & 0xffff)));
1141 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1142 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1148 /* Remaining cases are all for DImode. */
1151 subtargets
= optimize
&& can_create_pseudo_p ();
1156 first_not_ffff_match
= -1;
1158 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1160 if ((val
& mask
) == mask
)
1164 if (first_not_ffff_match
< 0)
1165 first_not_ffff_match
= i
;
1166 if ((val
& mask
) == 0)
1173 /* Set one of the quarters and then insert back into result. */
1174 mask
= 0xffffll
<< first_not_ffff_match
;
1177 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1178 emit_insn (gen_insv_immdi (dest
, GEN_INT (first_not_ffff_match
),
1179 GEN_INT ((val
>> first_not_ffff_match
)
1186 if (zero_match
== 2)
1187 goto simple_sequence
;
1189 mask
= 0x0ffff0000UL
;
1190 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1192 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1194 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1198 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1199 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1200 GEN_INT (val
& mask
)));
1201 emit_insn (gen_adddi3 (dest
, subtarget
,
1202 GEN_INT (val
- (val
& mask
))));
1207 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1211 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1212 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1213 GEN_INT ((val
+ comp
) & mask
)));
1214 emit_insn (gen_adddi3 (dest
, subtarget
,
1215 GEN_INT (val
- ((val
+ comp
) & mask
))));
1220 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1224 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1225 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1226 GEN_INT ((val
- comp
) | ~mask
)));
1227 emit_insn (gen_adddi3 (dest
, subtarget
,
1228 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1233 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1237 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1238 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1239 GEN_INT (val
| ~mask
)));
1240 emit_insn (gen_adddi3 (dest
, subtarget
,
1241 GEN_INT (val
- (val
| ~mask
))));
1248 /* See if we can do it by arithmetically combining two
1250 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1255 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1256 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1260 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1261 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1262 GEN_INT (aarch64_bitmasks
[i
])));
1263 emit_insn (gen_adddi3 (dest
, subtarget
,
1264 GEN_INT (val
- aarch64_bitmasks
[i
])));
1270 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1272 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1276 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1277 GEN_INT (aarch64_bitmasks
[i
])));
1278 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1279 GEN_INT ((val
>> j
) & 0xffff)));
1287 /* See if we can do it by logically combining two immediates. */
1288 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1290 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1294 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1295 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1299 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1300 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1301 GEN_INT (aarch64_bitmasks
[i
])));
1302 emit_insn (gen_iordi3 (dest
, subtarget
,
1303 GEN_INT (aarch64_bitmasks
[j
])));
1309 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1313 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1314 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1318 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1319 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1320 GEN_INT (aarch64_bitmasks
[j
])));
1321 emit_insn (gen_anddi3 (dest
, subtarget
,
1322 GEN_INT (aarch64_bitmasks
[i
])));
1330 if (one_match
> zero_match
)
1332 /* Set either first three quarters or all but the third. */
1333 mask
= 0xffffll
<< (16 - first_not_ffff_match
);
1335 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1336 GEN_INT (val
| mask
| 0xffffffff00000000ull
)));
1339 /* Now insert other two quarters. */
1340 for (i
= first_not_ffff_match
+ 16, mask
<<= (first_not_ffff_match
<< 1);
1341 i
< 64; i
+= 16, mask
<<= 16)
1343 if ((val
& mask
) != mask
)
1346 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1347 GEN_INT ((val
>> i
) & 0xffff)));
1357 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1359 if ((val
& mask
) != 0)
1364 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1365 GEN_INT (val
& mask
)));
1372 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1373 GEN_INT ((val
>> i
) & 0xffff)));
1384 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1386 machine_mode mode
= GET_MODE (dest
);
1388 gcc_assert (mode
== SImode
|| mode
== DImode
);
1390 /* Check on what type of symbol it is. */
1391 if (GET_CODE (imm
) == SYMBOL_REF
1392 || GET_CODE (imm
) == LABEL_REF
1393 || GET_CODE (imm
) == CONST
)
1395 rtx mem
, base
, offset
;
1396 enum aarch64_symbol_type sty
;
1398 /* If we have (const (plus symbol offset)), separate out the offset
1399 before we start classifying the symbol. */
1400 split_const (imm
, &base
, &offset
);
1402 sty
= aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
);
1405 case SYMBOL_FORCE_TO_MEM
:
1406 if (offset
!= const0_rtx
1407 && targetm
.cannot_force_const_mem (mode
, imm
))
1409 gcc_assert (can_create_pseudo_p ());
1410 base
= aarch64_force_temporary (mode
, dest
, base
);
1411 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1412 aarch64_emit_move (dest
, base
);
1415 mem
= force_const_mem (ptr_mode
, imm
);
1417 if (mode
!= ptr_mode
)
1418 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1419 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1422 case SYMBOL_SMALL_TLSGD
:
1423 case SYMBOL_SMALL_TLSDESC
:
1424 case SYMBOL_SMALL_GOTTPREL
:
1425 case SYMBOL_SMALL_GOT
:
1426 case SYMBOL_TINY_GOT
:
1427 if (offset
!= const0_rtx
)
1429 gcc_assert(can_create_pseudo_p ());
1430 base
= aarch64_force_temporary (mode
, dest
, base
);
1431 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1432 aarch64_emit_move (dest
, base
);
1437 case SYMBOL_SMALL_TPREL
:
1438 case SYMBOL_SMALL_ABSOLUTE
:
1439 case SYMBOL_TINY_ABSOLUTE
:
1440 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1448 if (!CONST_INT_P (imm
))
1450 if (GET_CODE (imm
) == HIGH
)
1451 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1454 rtx mem
= force_const_mem (mode
, imm
);
1456 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1462 aarch64_internal_mov_immediate (dest
, imm
, true, GET_MODE (dest
));
1466 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
1467 tree exp ATTRIBUTE_UNUSED
)
1469 /* Currently, always true. */
1473 /* Implement TARGET_PASS_BY_REFERENCE. */
1476 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1479 bool named ATTRIBUTE_UNUSED
)
1482 machine_mode dummymode
;
1485 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1486 size
= (mode
== BLKmode
&& type
)
1487 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1489 /* Aggregates are passed by reference based on their size. */
1490 if (type
&& AGGREGATE_TYPE_P (type
))
1492 size
= int_size_in_bytes (type
);
1495 /* Variable sized arguments are always returned by reference. */
1499 /* Can this be a candidate to be passed in fp/simd register(s)? */
1500 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1505 /* Arguments which are variable sized or larger than 2 registers are
1506 passed by reference unless they are a homogenous floating point
1508 return size
> 2 * UNITS_PER_WORD
;
1511 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1513 aarch64_return_in_msb (const_tree valtype
)
1515 machine_mode dummy_mode
;
1518 /* Never happens in little-endian mode. */
1519 if (!BYTES_BIG_ENDIAN
)
1522 /* Only composite types smaller than or equal to 16 bytes can
1523 be potentially returned in registers. */
1524 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1525 || int_size_in_bytes (valtype
) <= 0
1526 || int_size_in_bytes (valtype
) > 16)
1529 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1530 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1531 is always passed/returned in the least significant bits of fp/simd
1533 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1534 &dummy_mode
, &dummy_int
, NULL
))
1540 /* Implement TARGET_FUNCTION_VALUE.
1541 Define how to find the value returned by a function. */
1544 aarch64_function_value (const_tree type
, const_tree func
,
1545 bool outgoing ATTRIBUTE_UNUSED
)
1550 machine_mode ag_mode
;
1552 mode
= TYPE_MODE (type
);
1553 if (INTEGRAL_TYPE_P (type
))
1554 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1556 if (aarch64_return_in_msb (type
))
1558 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1560 if (size
% UNITS_PER_WORD
!= 0)
1562 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1563 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1567 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1568 &ag_mode
, &count
, NULL
))
1570 if (!aarch64_composite_type_p (type
, mode
))
1572 gcc_assert (count
== 1 && mode
== ag_mode
);
1573 return gen_rtx_REG (mode
, V0_REGNUM
);
1580 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1581 for (i
= 0; i
< count
; i
++)
1583 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1584 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1585 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1586 XVECEXP (par
, 0, i
) = tmp
;
1592 return gen_rtx_REG (mode
, R0_REGNUM
);
1595 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1596 Return true if REGNO is the number of a hard register in which the values
1597 of called function may come back. */
1600 aarch64_function_value_regno_p (const unsigned int regno
)
1602 /* Maximum of 16 bytes can be returned in the general registers. Examples
1603 of 16-byte return values are: 128-bit integers and 16-byte small
1604 structures (excluding homogeneous floating-point aggregates). */
1605 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1608 /* Up to four fp/simd registers can return a function value, e.g. a
1609 homogeneous floating-point aggregate having four members. */
1610 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1611 return !TARGET_GENERAL_REGS_ONLY
;
1616 /* Implement TARGET_RETURN_IN_MEMORY.
1618 If the type T of the result of a function is such that
1620 would require that arg be passed as a value in a register (or set of
1621 registers) according to the parameter passing rules, then the result
1622 is returned in the same registers as would be used for such an
1626 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1629 machine_mode ag_mode
;
1632 if (!AGGREGATE_TYPE_P (type
)
1633 && TREE_CODE (type
) != COMPLEX_TYPE
1634 && TREE_CODE (type
) != VECTOR_TYPE
)
1635 /* Simple scalar types always returned in registers. */
1638 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1645 /* Types larger than 2 registers returned in memory. */
1646 size
= int_size_in_bytes (type
);
1647 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1651 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
1652 const_tree type
, int *nregs
)
1654 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1655 return aarch64_vfp_is_call_or_return_candidate (mode
,
1657 &pcum
->aapcs_vfp_rmode
,
1662 /* Given MODE and TYPE of a function argument, return the alignment in
1663 bits. The idea is to suppress any stronger alignment requested by
1664 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1665 This is a helper function for local use only. */
1668 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
1670 unsigned int alignment
;
1674 if (!integer_zerop (TYPE_SIZE (type
)))
1676 if (TYPE_MODE (type
) == mode
)
1677 alignment
= TYPE_ALIGN (type
);
1679 alignment
= GET_MODE_ALIGNMENT (mode
);
1685 alignment
= GET_MODE_ALIGNMENT (mode
);
1690 /* Layout a function argument according to the AAPCS64 rules. The rule
1691 numbers refer to the rule numbers in the AAPCS64. */
1694 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1696 bool named ATTRIBUTE_UNUSED
)
1698 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1699 int ncrn
, nvrn
, nregs
;
1700 bool allocate_ncrn
, allocate_nvrn
;
1703 /* We need to do this once per argument. */
1704 if (pcum
->aapcs_arg_processed
)
1707 pcum
->aapcs_arg_processed
= true;
1709 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1711 = AARCH64_ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1714 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1715 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1720 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1721 The following code thus handles passing by SIMD/FP registers first. */
1723 nvrn
= pcum
->aapcs_nvrn
;
1725 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1726 and homogenous short-vector aggregates (HVA). */
1729 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1731 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1732 if (!aarch64_composite_type_p (type
, mode
))
1734 gcc_assert (nregs
== 1);
1735 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1741 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1742 for (i
= 0; i
< nregs
; i
++)
1744 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1745 V0_REGNUM
+ nvrn
+ i
);
1746 tmp
= gen_rtx_EXPR_LIST
1748 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1749 XVECEXP (par
, 0, i
) = tmp
;
1751 pcum
->aapcs_reg
= par
;
1757 /* C.3 NSRN is set to 8. */
1758 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1763 ncrn
= pcum
->aapcs_ncrn
;
1764 nregs
= size
/ UNITS_PER_WORD
;
1766 /* C6 - C9. though the sign and zero extension semantics are
1767 handled elsewhere. This is the case where the argument fits
1768 entirely general registers. */
1769 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1771 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1773 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1775 /* C.8 if the argument has an alignment of 16 then the NGRN is
1776 rounded up to the next even number. */
1777 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1780 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1782 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1783 A reg is still generated for it, but the caller should be smart
1784 enough not to use it. */
1785 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1787 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1794 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1795 for (i
= 0; i
< nregs
; i
++)
1797 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1798 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1799 GEN_INT (i
* UNITS_PER_WORD
));
1800 XVECEXP (par
, 0, i
) = tmp
;
1802 pcum
->aapcs_reg
= par
;
1805 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1810 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1812 /* The argument is passed on stack; record the needed number of words for
1813 this argument and align the total size if necessary. */
1815 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1816 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1817 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1818 16 / UNITS_PER_WORD
);
1822 /* Implement TARGET_FUNCTION_ARG. */
1825 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1826 const_tree type
, bool named
)
1828 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1829 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1831 if (mode
== VOIDmode
)
1834 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1835 return pcum
->aapcs_reg
;
1839 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1840 const_tree fntype ATTRIBUTE_UNUSED
,
1841 rtx libname ATTRIBUTE_UNUSED
,
1842 const_tree fndecl ATTRIBUTE_UNUSED
,
1843 unsigned n_named ATTRIBUTE_UNUSED
)
1845 pcum
->aapcs_ncrn
= 0;
1846 pcum
->aapcs_nvrn
= 0;
1847 pcum
->aapcs_nextncrn
= 0;
1848 pcum
->aapcs_nextnvrn
= 0;
1849 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1850 pcum
->aapcs_reg
= NULL_RTX
;
1851 pcum
->aapcs_arg_processed
= false;
1852 pcum
->aapcs_stack_words
= 0;
1853 pcum
->aapcs_stack_size
= 0;
1859 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1864 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1865 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1867 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1868 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1869 != (pcum
->aapcs_stack_words
!= 0));
1870 pcum
->aapcs_arg_processed
= false;
1871 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1872 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1873 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1874 pcum
->aapcs_stack_words
= 0;
1875 pcum
->aapcs_reg
= NULL_RTX
;
1880 aarch64_function_arg_regno_p (unsigned regno
)
1882 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1883 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1886 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1887 PARM_BOUNDARY bits of alignment, but will be given anything up
1888 to STACK_BOUNDARY bits if the type requires it. This makes sure
1889 that both before and after the layout of each argument, the Next
1890 Stacked Argument Address (NSAA) will have a minimum alignment of
1894 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
1896 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1898 if (alignment
< PARM_BOUNDARY
)
1899 alignment
= PARM_BOUNDARY
;
1900 if (alignment
> STACK_BOUNDARY
)
1901 alignment
= STACK_BOUNDARY
;
1905 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1907 Return true if an argument passed on the stack should be padded upwards,
1908 i.e. if the least-significant byte of the stack slot has useful data.
1910 Small aggregate types are placed in the lowest memory address.
1912 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1915 aarch64_pad_arg_upward (machine_mode mode
, const_tree type
)
1917 /* On little-endian targets, the least significant byte of every stack
1918 argument is passed at the lowest byte address of the stack slot. */
1919 if (!BYTES_BIG_ENDIAN
)
1922 /* Otherwise, integral, floating-point and pointer types are padded downward:
1923 the least significant byte of a stack argument is passed at the highest
1924 byte address of the stack slot. */
1926 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
1927 || POINTER_TYPE_P (type
))
1928 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
1931 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1935 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1937 It specifies padding for the last (may also be the only)
1938 element of a block move between registers and memory. If
1939 assuming the block is in the memory, padding upward means that
1940 the last element is padded after its highest significant byte,
1941 while in downward padding, the last element is padded at the
1942 its least significant byte side.
1944 Small aggregates and small complex types are always padded
1947 We don't need to worry about homogeneous floating-point or
1948 short-vector aggregates; their move is not affected by the
1949 padding direction determined here. Regardless of endianness,
1950 each element of such an aggregate is put in the least
1951 significant bits of a fp/simd register.
1953 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1954 register has useful data, and return the opposite if the most
1955 significant byte does. */
1958 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
1959 bool first ATTRIBUTE_UNUSED
)
1962 /* Small composite types are always padded upward. */
1963 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
1965 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
1966 : GET_MODE_SIZE (mode
));
1967 if (size
< 2 * UNITS_PER_WORD
)
1971 /* Otherwise, use the default padding. */
1972 return !BYTES_BIG_ENDIAN
;
1976 aarch64_libgcc_cmp_return_mode (void)
1982 aarch64_frame_pointer_required (void)
1984 /* In aarch64_override_options_after_change
1985 flag_omit_leaf_frame_pointer turns off the frame pointer by
1986 default. Turn it back on now if we've not got a leaf
1988 if (flag_omit_leaf_frame_pointer
1989 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
1995 /* Mark the registers that need to be saved by the callee and calculate
1996 the size of the callee-saved registers area and frame record (both FP
1997 and LR may be omitted). */
1999 aarch64_layout_frame (void)
2001 HOST_WIDE_INT offset
= 0;
2004 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2007 #define SLOT_NOT_REQUIRED (-2)
2008 #define SLOT_REQUIRED (-1)
2010 cfun
->machine
->frame
.wb_candidate1
= FIRST_PSEUDO_REGISTER
;
2011 cfun
->machine
->frame
.wb_candidate2
= FIRST_PSEUDO_REGISTER
;
2013 /* First mark all the registers that really need to be saved... */
2014 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2015 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2017 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2018 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2020 /* ... that includes the eh data registers (if needed)... */
2021 if (crtl
->calls_eh_return
)
2022 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2023 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2026 /* ... and any callee saved register that dataflow says is live. */
2027 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2028 if (df_regs_ever_live_p (regno
)
2029 && (regno
== R30_REGNUM
2030 || !call_used_regs
[regno
]))
2031 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2033 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2034 if (df_regs_ever_live_p (regno
)
2035 && !call_used_regs
[regno
])
2036 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2038 if (frame_pointer_needed
)
2040 /* FP and LR are placed in the linkage record. */
2041 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2042 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2043 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2044 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2045 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
2046 offset
+= 2 * UNITS_PER_WORD
;
2049 /* Now assign stack slots for them. */
2050 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2051 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2053 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2054 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2055 cfun
->machine
->frame
.wb_candidate1
= regno
;
2056 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
)
2057 cfun
->machine
->frame
.wb_candidate2
= regno
;
2058 offset
+= UNITS_PER_WORD
;
2061 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2062 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2064 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2065 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2066 cfun
->machine
->frame
.wb_candidate1
= regno
;
2067 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
2068 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2069 cfun
->machine
->frame
.wb_candidate2
= regno
;
2070 offset
+= UNITS_PER_WORD
;
2073 cfun
->machine
->frame
.padding0
=
2074 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
2075 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2077 cfun
->machine
->frame
.saved_regs_size
= offset
;
2079 cfun
->machine
->frame
.hard_fp_offset
2080 = AARCH64_ROUND_UP (cfun
->machine
->frame
.saved_varargs_size
2082 + cfun
->machine
->frame
.saved_regs_size
,
2083 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2085 cfun
->machine
->frame
.frame_size
2086 = AARCH64_ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
2087 + crtl
->outgoing_args_size
,
2088 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2090 cfun
->machine
->frame
.laid_out
= true;
2094 aarch64_register_saved_on_entry (int regno
)
2096 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
2100 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
2102 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
2108 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
2109 HOST_WIDE_INT adjustment
)
2111 rtx base_rtx
= stack_pointer_rtx
;
2114 reg
= gen_rtx_REG (mode
, regno
);
2115 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
2116 plus_constant (Pmode
, base_rtx
, -adjustment
));
2117 mem
= gen_rtx_MEM (mode
, mem
);
2119 insn
= emit_move_insn (mem
, reg
);
2120 RTX_FRAME_RELATED_P (insn
) = 1;
2124 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2125 HOST_WIDE_INT adjustment
)
2130 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
2131 GEN_INT (-adjustment
),
2132 GEN_INT (UNITS_PER_WORD
- adjustment
));
2134 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
2135 GEN_INT (-adjustment
),
2136 GEN_INT (UNITS_PER_WORD
- adjustment
));
2143 aarch64_pushwb_pair_reg (machine_mode mode
, unsigned regno1
,
2144 unsigned regno2
, HOST_WIDE_INT adjustment
)
2147 rtx reg1
= gen_rtx_REG (mode
, regno1
);
2148 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2150 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
2152 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2153 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2154 RTX_FRAME_RELATED_P (insn
) = 1;
2158 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2159 HOST_WIDE_INT adjustment
)
2164 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2165 GEN_INT (UNITS_PER_WORD
));
2167 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2168 GEN_INT (UNITS_PER_WORD
));
2175 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
2181 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
2184 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
2192 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
2198 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
2201 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
2210 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
2211 unsigned start
, unsigned limit
, bool skip_wb
)
2214 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2215 ? gen_frame_mem
: gen_rtx_MEM
);
2219 for (regno
= aarch64_next_callee_save (start
, limit
);
2221 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2224 HOST_WIDE_INT offset
;
2227 && (regno
== cfun
->machine
->frame
.wb_candidate1
2228 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2231 reg
= gen_rtx_REG (mode
, regno
);
2232 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2233 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2236 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2239 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2240 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2243 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2246 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2247 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2249 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
2252 /* The first part of a frame-related parallel insn is
2253 always assumed to be relevant to the frame
2254 calculations; subsequent parts, are only
2255 frame-related if explicitly marked. */
2256 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2260 insn
= emit_move_insn (mem
, reg
);
2262 RTX_FRAME_RELATED_P (insn
) = 1;
2267 aarch64_restore_callee_saves (machine_mode mode
,
2268 HOST_WIDE_INT start_offset
, unsigned start
,
2269 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
2271 rtx base_rtx
= stack_pointer_rtx
;
2272 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2273 ? gen_frame_mem
: gen_rtx_MEM
);
2276 HOST_WIDE_INT offset
;
2278 for (regno
= aarch64_next_callee_save (start
, limit
);
2280 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2285 && (regno
== cfun
->machine
->frame
.wb_candidate1
2286 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2289 reg
= gen_rtx_REG (mode
, regno
);
2290 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2291 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2293 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2296 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2297 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2299 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2302 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2303 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2304 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
2306 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
2310 emit_move_insn (reg
, mem
);
2311 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
2315 /* AArch64 stack frames generated by this compiler look like:
2317 +-------------------------------+
2319 | incoming stack arguments |
2321 +-------------------------------+
2322 | | <-- incoming stack pointer (aligned)
2323 | callee-allocated save area |
2324 | for register varargs |
2326 +-------------------------------+
2327 | local variables | <-- frame_pointer_rtx
2329 +-------------------------------+
2331 +-------------------------------+ |
2332 | callee-saved registers | | frame.saved_regs_size
2333 +-------------------------------+ |
2335 +-------------------------------+ |
2336 | FP' | / <- hard_frame_pointer_rtx (aligned)
2337 +-------------------------------+
2338 | dynamic allocation |
2339 +-------------------------------+
2341 +-------------------------------+
2342 | outgoing stack arguments | <-- arg_pointer
2344 +-------------------------------+
2345 | | <-- stack_pointer_rtx (aligned)
2347 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2348 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2351 /* Generate the prologue instructions for entry into a function.
2352 Establish the stack frame by decreasing the stack pointer with a
2353 properly calculated size and, if necessary, create a frame record
2354 filled with the values of LR and previous frame pointer. The
2355 current FP is also set up if it is in use. */
2358 aarch64_expand_prologue (void)
2360 /* sub sp, sp, #<frame_size>
2361 stp {fp, lr}, [sp, #<frame_size> - 16]
2362 add fp, sp, #<frame_size> - hardfp_offset
2363 stp {cs_reg}, [fp, #-16] etc.
2365 sub sp, sp, <final_adjustment_if_any>
2367 HOST_WIDE_INT frame_size
, offset
;
2368 HOST_WIDE_INT fp_offset
; /* Offset from hard FP to SP. */
2369 HOST_WIDE_INT hard_fp_offset
;
2372 aarch64_layout_frame ();
2374 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2375 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2376 fp_offset
= frame_size
- hard_fp_offset
;
2378 if (flag_stack_usage_info
)
2379 current_function_static_stack_size
= frame_size
;
2381 /* Store pairs and load pairs have a range only -512 to 504. */
2384 /* When the frame has a large size, an initial decrease is done on
2385 the stack pointer to jump over the callee-allocated save area for
2386 register varargs, the local variable area and/or the callee-saved
2387 register area. This will allow the pre-index write-back
2388 store pair instructions to be used for setting up the stack frame
2390 offset
= hard_fp_offset
;
2392 offset
= cfun
->machine
->frame
.saved_regs_size
;
2394 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2397 if (frame_size
>= 0x1000000)
2399 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2400 emit_move_insn (op0
, GEN_INT (-frame_size
));
2401 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2403 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2404 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
2405 plus_constant (Pmode
, stack_pointer_rtx
,
2407 RTX_FRAME_RELATED_P (insn
) = 1;
2409 else if (frame_size
> 0)
2411 int hi_ofs
= frame_size
& 0xfff000;
2412 int lo_ofs
= frame_size
& 0x000fff;
2416 insn
= emit_insn (gen_add2_insn
2417 (stack_pointer_rtx
, GEN_INT (-hi_ofs
)));
2418 RTX_FRAME_RELATED_P (insn
) = 1;
2422 insn
= emit_insn (gen_add2_insn
2423 (stack_pointer_rtx
, GEN_INT (-lo_ofs
)));
2424 RTX_FRAME_RELATED_P (insn
) = 1;
2433 bool skip_wb
= false;
2435 if (frame_pointer_needed
)
2441 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2442 GEN_INT (-offset
)));
2443 RTX_FRAME_RELATED_P (insn
) = 1;
2445 aarch64_save_callee_saves (DImode
, fp_offset
, R29_REGNUM
,
2449 aarch64_pushwb_pair_reg (DImode
, R29_REGNUM
, R30_REGNUM
, offset
);
2451 /* Set up frame pointer to point to the location of the
2452 previous frame pointer on the stack. */
2453 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2455 GEN_INT (fp_offset
)));
2456 RTX_FRAME_RELATED_P (insn
) = 1;
2457 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
2461 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2462 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2465 || reg1
== FIRST_PSEUDO_REGISTER
2466 || (reg2
== FIRST_PSEUDO_REGISTER
2469 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2470 GEN_INT (-offset
)));
2471 RTX_FRAME_RELATED_P (insn
) = 1;
2475 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2479 if (reg2
== FIRST_PSEUDO_REGISTER
)
2480 aarch64_pushwb_single_reg (mode1
, reg1
, offset
);
2482 aarch64_pushwb_pair_reg (mode1
, reg1
, reg2
, offset
);
2486 aarch64_save_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2488 aarch64_save_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2492 /* when offset >= 512,
2493 sub sp, sp, #<outgoing_args_size> */
2494 if (frame_size
> -1)
2496 if (crtl
->outgoing_args_size
> 0)
2498 insn
= emit_insn (gen_add2_insn
2500 GEN_INT (- crtl
->outgoing_args_size
)));
2501 RTX_FRAME_RELATED_P (insn
) = 1;
2506 /* Return TRUE if we can use a simple_return insn.
2508 This function checks whether the callee saved stack is empty, which
2509 means no restore actions are need. The pro_and_epilogue will use
2510 this to check whether shrink-wrapping opt is feasible. */
2513 aarch64_use_return_insn_p (void)
2515 if (!reload_completed
)
2521 aarch64_layout_frame ();
2523 return cfun
->machine
->frame
.frame_size
== 0;
2526 /* Generate the epilogue instructions for returning from a function. */
2528 aarch64_expand_epilogue (bool for_sibcall
)
2530 HOST_WIDE_INT frame_size
, offset
;
2531 HOST_WIDE_INT fp_offset
;
2532 HOST_WIDE_INT hard_fp_offset
;
2534 /* We need to add memory barrier to prevent read from deallocated stack. */
2535 bool need_barrier_p
= (get_frame_size () != 0
2536 || cfun
->machine
->frame
.saved_varargs_size
);
2538 aarch64_layout_frame ();
2540 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2541 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2542 fp_offset
= frame_size
- hard_fp_offset
;
2544 /* Store pairs and load pairs have a range only -512 to 504. */
2547 offset
= hard_fp_offset
;
2549 offset
= cfun
->machine
->frame
.saved_regs_size
;
2551 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2553 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2555 insn
= emit_insn (gen_add2_insn
2557 GEN_INT (crtl
->outgoing_args_size
)));
2558 RTX_FRAME_RELATED_P (insn
) = 1;
2564 /* If there were outgoing arguments or we've done dynamic stack
2565 allocation, then restore the stack pointer from the frame
2566 pointer. This is at most one insn and more efficient than using
2567 GCC's internal mechanism. */
2568 if (frame_pointer_needed
2569 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2571 if (cfun
->calls_alloca
)
2572 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2574 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2575 hard_frame_pointer_rtx
,
2577 offset
= offset
- fp_offset
;
2582 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2583 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2584 bool skip_wb
= true;
2587 if (frame_pointer_needed
)
2590 || reg1
== FIRST_PSEUDO_REGISTER
2591 || (reg2
== FIRST_PSEUDO_REGISTER
2595 aarch64_restore_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2597 aarch64_restore_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2601 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2605 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2606 rtx rreg1
= gen_rtx_REG (mode1
, reg1
);
2608 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg1
, cfi_ops
);
2609 if (reg2
== FIRST_PSEUDO_REGISTER
)
2611 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
2612 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
2613 mem
= gen_rtx_MEM (mode1
, mem
);
2614 insn
= emit_move_insn (rreg1
, mem
);
2618 rtx rreg2
= gen_rtx_REG (mode1
, reg2
);
2620 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg2
, cfi_ops
);
2621 insn
= emit_insn (aarch64_gen_loadwb_pair
2622 (mode1
, stack_pointer_rtx
, rreg1
,
2628 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2632 /* Reset the CFA to be SP + FRAME_SIZE. */
2633 rtx new_cfa
= stack_pointer_rtx
;
2635 new_cfa
= plus_constant (Pmode
, new_cfa
, frame_size
);
2636 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
2637 REG_NOTES (insn
) = cfi_ops
;
2638 RTX_FRAME_RELATED_P (insn
) = 1;
2644 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2646 if (frame_size
>= 0x1000000)
2648 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2649 emit_move_insn (op0
, GEN_INT (frame_size
));
2650 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2654 int hi_ofs
= frame_size
& 0xfff000;
2655 int lo_ofs
= frame_size
& 0x000fff;
2657 if (hi_ofs
&& lo_ofs
)
2659 insn
= emit_insn (gen_add2_insn
2660 (stack_pointer_rtx
, GEN_INT (hi_ofs
)));
2661 RTX_FRAME_RELATED_P (insn
) = 1;
2662 frame_size
= lo_ofs
;
2664 insn
= emit_insn (gen_add2_insn
2665 (stack_pointer_rtx
, GEN_INT (frame_size
)));
2668 /* Reset the CFA to be SP + 0. */
2669 add_reg_note (insn
, REG_CFA_DEF_CFA
, stack_pointer_rtx
);
2670 RTX_FRAME_RELATED_P (insn
) = 1;
2673 /* Stack adjustment for exception handler. */
2674 if (crtl
->calls_eh_return
)
2676 /* We need to unwind the stack by the offset computed by
2677 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2678 to be SP; letting the CFA move during this adjustment
2679 is just as correct as retaining the CFA from the body
2680 of the function. Therefore, do nothing special. */
2681 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2684 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2686 emit_jump_insn (ret_rtx
);
2689 /* Return the place to copy the exception unwinding return address to.
2690 This will probably be a stack slot, but could (in theory be the
2691 return register). */
2693 aarch64_final_eh_return_addr (void)
2695 HOST_WIDE_INT fp_offset
;
2697 aarch64_layout_frame ();
2699 fp_offset
= cfun
->machine
->frame
.frame_size
2700 - cfun
->machine
->frame
.hard_fp_offset
;
2702 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2703 return gen_rtx_REG (DImode
, LR_REGNUM
);
2705 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2706 result in a store to save LR introduced by builtin_eh_return () being
2707 incorrectly deleted because the alias is not detected.
2708 So in the calculation of the address to copy the exception unwinding
2709 return address to, we note 2 cases.
2710 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2711 we return a SP-relative location since all the addresses are SP-relative
2712 in this case. This prevents the store from being optimized away.
2713 If the fp_offset is not 0, then the addresses will be FP-relative and
2714 therefore we return a FP-relative location. */
2716 if (frame_pointer_needed
)
2719 return gen_frame_mem (DImode
,
2720 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2722 return gen_frame_mem (DImode
,
2723 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2726 /* If FP is not needed, we calculate the location of LR, which would be
2727 at the top of the saved registers block. */
2729 return gen_frame_mem (DImode
,
2730 plus_constant (Pmode
,
2733 + cfun
->machine
->frame
.saved_regs_size
2734 - 2 * UNITS_PER_WORD
));
2737 /* Possibly output code to build up a constant in a register. For
2738 the benefit of the costs infrastructure, returns the number of
2739 instructions which would be emitted. GENERATE inhibits or
2740 enables code generation. */
2743 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2747 if (aarch64_bitmask_imm (val
, DImode
))
2750 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2758 HOST_WIDE_INT valp
= val
>> 16;
2762 for (i
= 16; i
< 64; i
+= 16)
2764 valm
= (valp
& 0xffff);
2775 /* zcount contains the number of additional MOVK instructions
2776 required if the constant is built up with an initial MOVZ instruction,
2777 while ncount is the number of MOVK instructions required if starting
2778 with a MOVN instruction. Choose the sequence that yields the fewest
2779 number of instructions, preferring MOVZ instructions when they are both
2781 if (ncount
< zcount
)
2784 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2785 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2792 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2793 GEN_INT (val
& 0xffff));
2800 for (i
= 16; i
< 64; i
+= 16)
2802 if ((val
& 0xffff) != tval
)
2805 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2807 GEN_INT (val
& 0xffff)));
2817 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2819 HOST_WIDE_INT mdelta
= delta
;
2820 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2821 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2826 if (mdelta
>= 4096 * 4096)
2828 (void) aarch64_build_constant (scratchreg
, delta
, true);
2829 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2831 else if (mdelta
> 0)
2835 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2836 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2838 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2839 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2841 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2842 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2844 if (mdelta
% 4096 != 0)
2846 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2847 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2848 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2853 /* Output code to add DELTA to the first argument, and then jump
2854 to FUNCTION. Used for C++ multiple inheritance. */
2856 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2857 HOST_WIDE_INT delta
,
2858 HOST_WIDE_INT vcall_offset
,
2861 /* The this pointer is always in x0. Note that this differs from
2862 Arm where the this pointer maybe bumped to r1 if r0 is required
2863 to return a pointer to an aggregate. On AArch64 a result value
2864 pointer will be in x8. */
2865 int this_regno
= R0_REGNUM
;
2866 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
2869 reload_completed
= 1;
2870 emit_note (NOTE_INSN_PROLOGUE_END
);
2872 if (vcall_offset
== 0)
2873 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2876 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2878 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2879 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2880 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2885 if (delta
>= -256 && delta
< 256)
2886 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2887 plus_constant (Pmode
, this_rtx
, delta
));
2889 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2892 if (Pmode
== ptr_mode
)
2893 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2895 aarch64_emit_move (temp0
,
2896 gen_rtx_ZERO_EXTEND (Pmode
,
2897 gen_rtx_MEM (ptr_mode
, addr
)));
2899 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2900 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2903 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2904 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2907 if (Pmode
== ptr_mode
)
2908 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2910 aarch64_emit_move (temp1
,
2911 gen_rtx_SIGN_EXTEND (Pmode
,
2912 gen_rtx_MEM (ptr_mode
, addr
)));
2914 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2917 /* Generate a tail call to the target function. */
2918 if (!TREE_USED (function
))
2920 assemble_external (function
);
2921 TREE_USED (function
) = 1;
2923 funexp
= XEXP (DECL_RTL (function
), 0);
2924 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
2925 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
2926 SIBLING_CALL_P (insn
) = 1;
2928 insn
= get_insns ();
2929 shorten_branches (insn
);
2930 final_start_function (insn
, file
, 1);
2931 final (insn
, file
, 1);
2932 final_end_function ();
2934 /* Stop pretending to be a post-reload pass. */
2935 reload_completed
= 0;
2939 aarch64_tls_referenced_p (rtx x
)
2941 if (!TARGET_HAVE_TLS
)
2943 subrtx_iterator::array_type array
;
2944 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
2946 const_rtx x
= *iter
;
2947 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
2949 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2950 TLS offsets, not real symbol references. */
2951 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
2952 iter
.skip_subrtxes ();
2959 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
2961 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
2962 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
2973 aarch64_build_bitmask_table (void)
2975 unsigned HOST_WIDE_INT mask
, imm
;
2976 unsigned int log_e
, e
, s
, r
;
2977 unsigned int nimms
= 0;
2979 for (log_e
= 1; log_e
<= 6; log_e
++)
2983 mask
= ~(HOST_WIDE_INT
) 0;
2985 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
2986 for (s
= 1; s
< e
; s
++)
2988 for (r
= 0; r
< e
; r
++)
2990 /* set s consecutive bits to 1 (s < 64) */
2991 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
2992 /* rotate right by r */
2994 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
2995 /* replicate the constant depending on SIMD size */
2997 case 1: imm
|= (imm
<< 2);
2998 case 2: imm
|= (imm
<< 4);
2999 case 3: imm
|= (imm
<< 8);
3000 case 4: imm
|= (imm
<< 16);
3001 case 5: imm
|= (imm
<< 32);
3007 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
3008 aarch64_bitmasks
[nimms
++] = imm
;
3013 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
3014 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
3015 aarch64_bitmasks_cmp
);
3019 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3020 a left shift of 0 or 12 bits. */
3022 aarch64_uimm12_shift (HOST_WIDE_INT val
)
3024 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
3025 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
3030 /* Return true if val is an immediate that can be loaded into a
3031 register by a MOVZ instruction. */
3033 aarch64_movw_imm (HOST_WIDE_INT val
, machine_mode mode
)
3035 if (GET_MODE_SIZE (mode
) > 4)
3037 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
3038 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
3043 /* Ignore sign extension. */
3044 val
&= (HOST_WIDE_INT
) 0xffffffff;
3046 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
3047 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
3051 /* Return true if val is a valid bitmask immediate. */
3053 aarch64_bitmask_imm (HOST_WIDE_INT val
, machine_mode mode
)
3055 if (GET_MODE_SIZE (mode
) < 8)
3057 /* Replicate bit pattern. */
3058 val
&= (HOST_WIDE_INT
) 0xffffffff;
3061 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
3062 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
3066 /* Return true if val is an immediate that can be loaded into a
3067 register in a single instruction. */
3069 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
3071 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
3073 return aarch64_bitmask_imm (val
, mode
);
3077 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3081 if (GET_CODE (x
) == HIGH
)
3084 split_const (x
, &base
, &offset
);
3085 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
3087 if (aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
)
3088 != SYMBOL_FORCE_TO_MEM
)
3091 /* Avoid generating a 64-bit relocation in ILP32; leave
3092 to aarch64_expand_mov_immediate to handle it properly. */
3093 return mode
!= ptr_mode
;
3096 return aarch64_tls_referenced_p (x
);
3099 /* Return true if register REGNO is a valid index register.
3100 STRICT_P is true if REG_OK_STRICT is in effect. */
3103 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
3105 if (!HARD_REGISTER_NUM_P (regno
))
3113 regno
= reg_renumber
[regno
];
3115 return GP_REGNUM_P (regno
);
3118 /* Return true if register REGNO is a valid base register for mode MODE.
3119 STRICT_P is true if REG_OK_STRICT is in effect. */
3122 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
3124 if (!HARD_REGISTER_NUM_P (regno
))
3132 regno
= reg_renumber
[regno
];
3135 /* The fake registers will be eliminated to either the stack or
3136 hard frame pointer, both of which are usually valid base registers.
3137 Reload deals with the cases where the eliminated form isn't valid. */
3138 return (GP_REGNUM_P (regno
)
3139 || regno
== SP_REGNUM
3140 || regno
== FRAME_POINTER_REGNUM
3141 || regno
== ARG_POINTER_REGNUM
);
3144 /* Return true if X is a valid base register for mode MODE.
3145 STRICT_P is true if REG_OK_STRICT is in effect. */
3148 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
3150 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
3153 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
3156 /* Return true if address offset is a valid index. If it is, fill in INFO
3157 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3160 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
3161 machine_mode mode
, bool strict_p
)
3163 enum aarch64_address_type type
;
3168 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
3169 && GET_MODE (x
) == Pmode
)
3171 type
= ADDRESS_REG_REG
;
3175 /* (sign_extend:DI (reg:SI)) */
3176 else if ((GET_CODE (x
) == SIGN_EXTEND
3177 || GET_CODE (x
) == ZERO_EXTEND
)
3178 && GET_MODE (x
) == DImode
3179 && GET_MODE (XEXP (x
, 0)) == SImode
)
3181 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3182 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3183 index
= XEXP (x
, 0);
3186 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3187 else if (GET_CODE (x
) == MULT
3188 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3189 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3190 && GET_MODE (XEXP (x
, 0)) == DImode
3191 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3192 && CONST_INT_P (XEXP (x
, 1)))
3194 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3195 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3196 index
= XEXP (XEXP (x
, 0), 0);
3197 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3199 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3200 else if (GET_CODE (x
) == ASHIFT
3201 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3202 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3203 && GET_MODE (XEXP (x
, 0)) == DImode
3204 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3205 && CONST_INT_P (XEXP (x
, 1)))
3207 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3208 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3209 index
= XEXP (XEXP (x
, 0), 0);
3210 shift
= INTVAL (XEXP (x
, 1));
3212 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3213 else if ((GET_CODE (x
) == SIGN_EXTRACT
3214 || GET_CODE (x
) == ZERO_EXTRACT
)
3215 && GET_MODE (x
) == DImode
3216 && GET_CODE (XEXP (x
, 0)) == MULT
3217 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3218 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3220 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3221 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3222 index
= XEXP (XEXP (x
, 0), 0);
3223 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3224 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3225 || INTVAL (XEXP (x
, 2)) != 0)
3228 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3229 (const_int 0xffffffff<<shift)) */
3230 else if (GET_CODE (x
) == AND
3231 && GET_MODE (x
) == DImode
3232 && GET_CODE (XEXP (x
, 0)) == MULT
3233 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3234 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3235 && CONST_INT_P (XEXP (x
, 1)))
3237 type
= ADDRESS_REG_UXTW
;
3238 index
= XEXP (XEXP (x
, 0), 0);
3239 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3240 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3243 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3244 else if ((GET_CODE (x
) == SIGN_EXTRACT
3245 || GET_CODE (x
) == ZERO_EXTRACT
)
3246 && GET_MODE (x
) == DImode
3247 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3248 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3249 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3251 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3252 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3253 index
= XEXP (XEXP (x
, 0), 0);
3254 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3255 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3256 || INTVAL (XEXP (x
, 2)) != 0)
3259 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3260 (const_int 0xffffffff<<shift)) */
3261 else if (GET_CODE (x
) == AND
3262 && GET_MODE (x
) == DImode
3263 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3264 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3265 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3266 && CONST_INT_P (XEXP (x
, 1)))
3268 type
= ADDRESS_REG_UXTW
;
3269 index
= XEXP (XEXP (x
, 0), 0);
3270 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3271 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3274 /* (mult:P (reg:P) (const_int scale)) */
3275 else if (GET_CODE (x
) == MULT
3276 && GET_MODE (x
) == Pmode
3277 && GET_MODE (XEXP (x
, 0)) == Pmode
3278 && CONST_INT_P (XEXP (x
, 1)))
3280 type
= ADDRESS_REG_REG
;
3281 index
= XEXP (x
, 0);
3282 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3284 /* (ashift:P (reg:P) (const_int shift)) */
3285 else if (GET_CODE (x
) == ASHIFT
3286 && GET_MODE (x
) == Pmode
3287 && GET_MODE (XEXP (x
, 0)) == Pmode
3288 && CONST_INT_P (XEXP (x
, 1)))
3290 type
= ADDRESS_REG_REG
;
3291 index
= XEXP (x
, 0);
3292 shift
= INTVAL (XEXP (x
, 1));
3297 if (GET_CODE (index
) == SUBREG
)
3298 index
= SUBREG_REG (index
);
3301 (shift
> 0 && shift
<= 3
3302 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3304 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3307 info
->offset
= index
;
3308 info
->shift
= shift
;
3316 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3318 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3319 && offset
< 64 * GET_MODE_SIZE (mode
)
3320 && offset
% GET_MODE_SIZE (mode
) == 0);
3324 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3325 HOST_WIDE_INT offset
)
3327 return offset
>= -256 && offset
< 256;
3331 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3334 && offset
< 4096 * GET_MODE_SIZE (mode
)
3335 && offset
% GET_MODE_SIZE (mode
) == 0);
3338 /* Return true if X is a valid address for machine mode MODE. If it is,
3339 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3340 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3343 aarch64_classify_address (struct aarch64_address_info
*info
,
3344 rtx x
, machine_mode mode
,
3345 RTX_CODE outer_code
, bool strict_p
)
3347 enum rtx_code code
= GET_CODE (x
);
3349 bool allow_reg_index_p
=
3350 outer_code
!= PARALLEL
&& (GET_MODE_SIZE (mode
) != 16
3351 || aarch64_vector_mode_supported_p (mode
));
3352 /* Don't support anything other than POST_INC or REG addressing for
3354 if (aarch64_vect_struct_mode_p (mode
)
3355 && (code
!= POST_INC
&& code
!= REG
))
3362 info
->type
= ADDRESS_REG_IMM
;
3364 info
->offset
= const0_rtx
;
3365 return aarch64_base_register_rtx_p (x
, strict_p
);
3373 && (op0
== virtual_stack_vars_rtx
3374 || op0
== frame_pointer_rtx
3375 || op0
== arg_pointer_rtx
)
3376 && CONST_INT_P (op1
))
3378 info
->type
= ADDRESS_REG_IMM
;
3385 if (GET_MODE_SIZE (mode
) != 0
3386 && CONST_INT_P (op1
)
3387 && aarch64_base_register_rtx_p (op0
, strict_p
))
3389 HOST_WIDE_INT offset
= INTVAL (op1
);
3391 info
->type
= ADDRESS_REG_IMM
;
3395 /* TImode and TFmode values are allowed in both pairs of X
3396 registers and individual Q registers. The available
3398 X,X: 7-bit signed scaled offset
3399 Q: 9-bit signed offset
3400 We conservatively require an offset representable in either mode.
3402 if (mode
== TImode
|| mode
== TFmode
)
3403 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3404 && offset_9bit_signed_unscaled_p (mode
, offset
));
3406 if (outer_code
== PARALLEL
)
3407 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3408 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3410 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3411 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3414 if (allow_reg_index_p
)
3416 /* Look for base + (scaled/extended) index register. */
3417 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3418 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3423 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3424 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3437 info
->type
= ADDRESS_REG_WB
;
3438 info
->base
= XEXP (x
, 0);
3439 info
->offset
= NULL_RTX
;
3440 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3444 info
->type
= ADDRESS_REG_WB
;
3445 info
->base
= XEXP (x
, 0);
3446 if (GET_CODE (XEXP (x
, 1)) == PLUS
3447 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3448 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3449 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3451 HOST_WIDE_INT offset
;
3452 info
->offset
= XEXP (XEXP (x
, 1), 1);
3453 offset
= INTVAL (info
->offset
);
3455 /* TImode and TFmode values are allowed in both pairs of X
3456 registers and individual Q registers. The available
3458 X,X: 7-bit signed scaled offset
3459 Q: 9-bit signed offset
3460 We conservatively require an offset representable in either mode.
3462 if (mode
== TImode
|| mode
== TFmode
)
3463 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3464 && offset_9bit_signed_unscaled_p (mode
, offset
));
3466 if (outer_code
== PARALLEL
)
3467 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3468 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3470 return offset_9bit_signed_unscaled_p (mode
, offset
);
3477 /* load literal: pc-relative constant pool entry. Only supported
3478 for SI mode or larger. */
3479 info
->type
= ADDRESS_SYMBOLIC
;
3480 if (outer_code
!= PARALLEL
&& GET_MODE_SIZE (mode
) >= 4)
3484 split_const (x
, &sym
, &addend
);
3485 return (GET_CODE (sym
) == LABEL_REF
3486 || (GET_CODE (sym
) == SYMBOL_REF
3487 && CONSTANT_POOL_ADDRESS_P (sym
)));
3492 info
->type
= ADDRESS_LO_SUM
;
3493 info
->base
= XEXP (x
, 0);
3494 info
->offset
= XEXP (x
, 1);
3495 if (allow_reg_index_p
3496 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3499 split_const (info
->offset
, &sym
, &offs
);
3500 if (GET_CODE (sym
) == SYMBOL_REF
3501 && (aarch64_classify_symbol (sym
, offs
, SYMBOL_CONTEXT_MEM
)
3502 == SYMBOL_SMALL_ABSOLUTE
))
3504 /* The symbol and offset must be aligned to the access size. */
3506 unsigned int ref_size
;
3508 if (CONSTANT_POOL_ADDRESS_P (sym
))
3509 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3510 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3512 tree exp
= SYMBOL_REF_DECL (sym
);
3513 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3514 align
= CONSTANT_ALIGNMENT (exp
, align
);
3516 else if (SYMBOL_REF_DECL (sym
))
3517 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3518 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3519 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3520 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3522 align
= BITS_PER_UNIT
;
3524 ref_size
= GET_MODE_SIZE (mode
);
3526 ref_size
= GET_MODE_SIZE (DImode
);
3528 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3529 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3540 aarch64_symbolic_address_p (rtx x
)
3544 split_const (x
, &x
, &offset
);
3545 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3548 /* Classify the base of symbolic expression X, given that X appears in
3551 enum aarch64_symbol_type
3552 aarch64_classify_symbolic_expression (rtx x
,
3553 enum aarch64_symbol_context context
)
3557 split_const (x
, &x
, &offset
);
3558 return aarch64_classify_symbol (x
, offset
, context
);
3562 /* Return TRUE if X is a legitimate address for accessing memory in
3565 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
3567 struct aarch64_address_info addr
;
3569 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3572 /* Return TRUE if X is a legitimate address for accessing memory in
3573 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3576 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
3577 RTX_CODE outer_code
, bool strict_p
)
3579 struct aarch64_address_info addr
;
3581 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3584 /* Return TRUE if rtx X is immediate constant 0.0 */
3586 aarch64_float_const_zero_rtx_p (rtx x
)
3590 if (GET_MODE (x
) == VOIDmode
)
3593 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3594 if (REAL_VALUE_MINUS_ZERO (r
))
3595 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3596 return REAL_VALUES_EQUAL (r
, dconst0
);
3599 /* Return the fixed registers used for condition codes. */
3602 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3605 *p2
= INVALID_REGNUM
;
3609 /* Emit call insn with PAT and do aarch64-specific handling. */
3612 aarch64_emit_call_insn (rtx pat
)
3614 rtx insn
= emit_call_insn (pat
);
3616 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
3617 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
3618 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
3622 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3624 /* All floating point compares return CCFP if it is an equality
3625 comparison, and CCFPE otherwise. */
3626 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3653 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3655 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3656 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3657 || GET_CODE (x
) == NEG
))
3660 /* A compare with a shifted operand. Because of canonicalization,
3661 the comparison will have to be swapped when we emit the assembly
3663 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3664 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3665 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3666 || GET_CODE (x
) == LSHIFTRT
3667 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3670 /* Similarly for a negated operand, but we can only do this for
3672 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3673 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3674 && (code
== EQ
|| code
== NE
)
3675 && GET_CODE (x
) == NEG
)
3678 /* A compare of a mode narrower than SI mode against zero can be done
3679 by extending the value in the comparison. */
3680 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3682 /* Only use sign-extension if we really need it. */
3683 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3684 ? CC_SESWPmode
: CC_ZESWPmode
);
3686 /* For everything else, return CCmode. */
3691 aarch64_get_condition_code_1 (enum machine_mode
, enum rtx_code
);
3694 aarch64_get_condition_code (rtx x
)
3696 machine_mode mode
= GET_MODE (XEXP (x
, 0));
3697 enum rtx_code comp_code
= GET_CODE (x
);
3699 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3700 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3701 return aarch64_get_condition_code_1 (mode
, comp_code
);
3705 aarch64_get_condition_code_1 (enum machine_mode mode
, enum rtx_code comp_code
)
3707 int ne
= -1, eq
= -1;
3714 case GE
: return AARCH64_GE
;
3715 case GT
: return AARCH64_GT
;
3716 case LE
: return AARCH64_LS
;
3717 case LT
: return AARCH64_MI
;
3718 case NE
: return AARCH64_NE
;
3719 case EQ
: return AARCH64_EQ
;
3720 case ORDERED
: return AARCH64_VC
;
3721 case UNORDERED
: return AARCH64_VS
;
3722 case UNLT
: return AARCH64_LT
;
3723 case UNLE
: return AARCH64_LE
;
3724 case UNGT
: return AARCH64_HI
;
3725 case UNGE
: return AARCH64_PL
;
3783 case NE
: return AARCH64_NE
;
3784 case EQ
: return AARCH64_EQ
;
3785 case GE
: return AARCH64_GE
;
3786 case GT
: return AARCH64_GT
;
3787 case LE
: return AARCH64_LE
;
3788 case LT
: return AARCH64_LT
;
3789 case GEU
: return AARCH64_CS
;
3790 case GTU
: return AARCH64_HI
;
3791 case LEU
: return AARCH64_LS
;
3792 case LTU
: return AARCH64_CC
;
3802 case NE
: return AARCH64_NE
;
3803 case EQ
: return AARCH64_EQ
;
3804 case GE
: return AARCH64_LE
;
3805 case GT
: return AARCH64_LT
;
3806 case LE
: return AARCH64_GE
;
3807 case LT
: return AARCH64_GT
;
3808 case GEU
: return AARCH64_LS
;
3809 case GTU
: return AARCH64_CC
;
3810 case LEU
: return AARCH64_CS
;
3811 case LTU
: return AARCH64_HI
;
3819 case NE
: return AARCH64_NE
;
3820 case EQ
: return AARCH64_EQ
;
3821 case GE
: return AARCH64_PL
;
3822 case LT
: return AARCH64_MI
;
3830 case NE
: return AARCH64_NE
;
3831 case EQ
: return AARCH64_EQ
;
3841 if (comp_code
== NE
)
3844 if (comp_code
== EQ
)
3851 aarch64_const_vec_all_same_in_range_p (rtx x
,
3852 HOST_WIDE_INT minval
,
3853 HOST_WIDE_INT maxval
)
3855 HOST_WIDE_INT firstval
;
3858 if (GET_CODE (x
) != CONST_VECTOR
3859 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
3862 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
3863 if (firstval
< minval
|| firstval
> maxval
)
3866 count
= CONST_VECTOR_NUNITS (x
);
3867 for (i
= 1; i
< count
; i
++)
3868 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
3875 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
3877 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
3881 bit_count (unsigned HOST_WIDE_INT value
)
3895 #define AARCH64_CC_V 1
3896 #define AARCH64_CC_C (1 << 1)
3897 #define AARCH64_CC_Z (1 << 2)
3898 #define AARCH64_CC_N (1 << 3)
3900 /* N Z C V flags for ccmp. The first code is for AND op and the other
3901 is for IOR op. Indexed by AARCH64_COND_CODE. */
3902 static const int aarch64_nzcv_codes
[][2] =
3904 {AARCH64_CC_Z
, 0}, /* EQ, Z == 1. */
3905 {0, AARCH64_CC_Z
}, /* NE, Z == 0. */
3906 {AARCH64_CC_C
, 0}, /* CS, C == 1. */
3907 {0, AARCH64_CC_C
}, /* CC, C == 0. */
3908 {AARCH64_CC_N
, 0}, /* MI, N == 1. */
3909 {0, AARCH64_CC_N
}, /* PL, N == 0. */
3910 {AARCH64_CC_V
, 0}, /* VS, V == 1. */
3911 {0, AARCH64_CC_V
}, /* VC, V == 0. */
3912 {AARCH64_CC_C
, 0}, /* HI, C ==1 && Z == 0. */
3913 {0, AARCH64_CC_C
}, /* LS, !(C == 1 && Z == 0). */
3914 {0, AARCH64_CC_V
}, /* GE, N == V. */
3915 {AARCH64_CC_V
, 0}, /* LT, N != V. */
3916 {0, AARCH64_CC_Z
}, /* GT, Z == 0 && N == V. */
3917 {AARCH64_CC_Z
, 0}, /* LE, !(Z == 0 && N == V). */
3918 {0, 0}, /* AL, Any. */
3919 {0, 0}, /* NV, Any. */
3923 aarch64_ccmp_mode_to_code (enum machine_mode mode
)
3964 aarch64_print_operand (FILE *f
, rtx x
, char code
)
3968 /* An integer or symbol address without a preceding # sign. */
3970 switch (GET_CODE (x
))
3973 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
3977 output_addr_const (f
, x
);
3981 if (GET_CODE (XEXP (x
, 0)) == PLUS
3982 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3984 output_addr_const (f
, x
);
3990 output_operand_lossage ("Unsupported operand for code '%c'", code
);
3995 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3999 if (!CONST_INT_P (x
)
4000 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
4002 output_operand_lossage ("invalid operand for '%%%c'", code
);
4018 output_operand_lossage ("invalid operand for '%%%c'", code
);
4028 /* Print N such that 2^N == X. */
4029 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
4031 output_operand_lossage ("invalid operand for '%%%c'", code
);
4035 asm_fprintf (f
, "%d", n
);
4040 /* Print the number of non-zero bits in X (a const_int). */
4041 if (!CONST_INT_P (x
))
4043 output_operand_lossage ("invalid operand for '%%%c'", code
);
4047 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
4051 /* Print the higher numbered register of a pair (TImode) of regs. */
4052 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
4054 output_operand_lossage ("invalid operand for '%%%c'", code
);
4058 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
4064 /* Print a condition (eq, ne, etc). */
4066 /* CONST_TRUE_RTX means always -- that's the default. */
4067 if (x
== const_true_rtx
)
4070 if (!COMPARISON_P (x
))
4072 output_operand_lossage ("invalid operand for '%%%c'", code
);
4076 cond_code
= aarch64_get_condition_code (x
);
4077 gcc_assert (cond_code
>= 0);
4078 fputs (aarch64_condition_codes
[cond_code
], f
);
4085 /* Print the inverse of a condition (eq <-> ne, etc). */
4087 /* CONST_TRUE_RTX means never -- that's the default. */
4088 if (x
== const_true_rtx
)
4094 if (!COMPARISON_P (x
))
4096 output_operand_lossage ("invalid operand for '%%%c'", code
);
4099 cond_code
= aarch64_get_condition_code (x
);
4100 gcc_assert (cond_code
>= 0);
4101 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
4111 /* Print a scalar FP/SIMD register name. */
4112 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4114 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4117 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
4124 /* Print the first FP/SIMD register name in a list. */
4125 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4127 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4130 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
4134 /* Print bottom 16 bits of integer constant in hex. */
4135 if (!CONST_INT_P (x
))
4137 output_operand_lossage ("invalid operand for '%%%c'", code
);
4140 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
4145 /* Print a general register name or the zero register (32-bit or
4148 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
4150 asm_fprintf (f
, "%czr", code
);
4154 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
4156 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
4160 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
4162 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
4169 /* Print a normal operand, if it's a general register, then we
4173 output_operand_lossage ("missing operand");
4177 switch (GET_CODE (x
))
4180 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
4184 aarch64_memory_reference_mode
= GET_MODE (x
);
4185 output_address (XEXP (x
, 0));
4190 output_addr_const (asm_out_file
, x
);
4194 asm_fprintf (f
, "%wd", INTVAL (x
));
4198 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
4201 aarch64_const_vec_all_same_in_range_p (x
,
4203 HOST_WIDE_INT_MAX
));
4204 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
4206 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
4215 /* CONST_DOUBLE can represent a double-width integer.
4216 In this case, the mode of x is VOIDmode. */
4217 if (GET_MODE (x
) == VOIDmode
)
4219 else if (aarch64_float_const_zero_rtx_p (x
))
4224 else if (aarch64_float_const_representable_p (x
))
4227 char float_buf
[buf_size
] = {'\0'};
4229 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4230 real_to_decimal_for_mode (float_buf
, &r
,
4233 asm_fprintf (asm_out_file
, "%s", float_buf
);
4237 output_operand_lossage ("invalid constant");
4240 output_operand_lossage ("invalid operand");
4246 if (GET_CODE (x
) == HIGH
)
4249 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4251 case SYMBOL_SMALL_GOT
:
4252 asm_fprintf (asm_out_file
, ":got:");
4255 case SYMBOL_SMALL_TLSGD
:
4256 asm_fprintf (asm_out_file
, ":tlsgd:");
4259 case SYMBOL_SMALL_TLSDESC
:
4260 asm_fprintf (asm_out_file
, ":tlsdesc:");
4263 case SYMBOL_SMALL_GOTTPREL
:
4264 asm_fprintf (asm_out_file
, ":gottprel:");
4267 case SYMBOL_SMALL_TPREL
:
4268 asm_fprintf (asm_out_file
, ":tprel:");
4271 case SYMBOL_TINY_GOT
:
4278 output_addr_const (asm_out_file
, x
);
4282 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4284 case SYMBOL_SMALL_GOT
:
4285 asm_fprintf (asm_out_file
, ":lo12:");
4288 case SYMBOL_SMALL_TLSGD
:
4289 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
4292 case SYMBOL_SMALL_TLSDESC
:
4293 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
4296 case SYMBOL_SMALL_GOTTPREL
:
4297 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
4300 case SYMBOL_SMALL_TPREL
:
4301 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
4304 case SYMBOL_TINY_GOT
:
4305 asm_fprintf (asm_out_file
, ":got:");
4311 output_addr_const (asm_out_file
, x
);
4316 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4318 case SYMBOL_SMALL_TPREL
:
4319 asm_fprintf (asm_out_file
, ":tprel_hi12:");
4324 output_addr_const (asm_out_file
, x
);
4332 if (!COMPARISON_P (x
))
4334 output_operand_lossage ("invalid operand for '%%%c'", code
);
4338 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4339 gcc_assert (cond_code
>= 0);
4340 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][0]);
4349 if (!COMPARISON_P (x
))
4351 output_operand_lossage ("invalid operand for '%%%c'", code
);
4355 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4356 gcc_assert (cond_code
>= 0);
4357 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][1]);
4362 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
4368 aarch64_print_operand_address (FILE *f
, rtx x
)
4370 struct aarch64_address_info addr
;
4372 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
4376 case ADDRESS_REG_IMM
:
4377 if (addr
.offset
== const0_rtx
)
4378 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
4380 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
4381 INTVAL (addr
.offset
));
4384 case ADDRESS_REG_REG
:
4385 if (addr
.shift
== 0)
4386 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
4387 reg_names
[REGNO (addr
.offset
)]);
4389 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
4390 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
4393 case ADDRESS_REG_UXTW
:
4394 if (addr
.shift
== 0)
4395 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
4396 REGNO (addr
.offset
) - R0_REGNUM
);
4398 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
4399 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4402 case ADDRESS_REG_SXTW
:
4403 if (addr
.shift
== 0)
4404 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
4405 REGNO (addr
.offset
) - R0_REGNUM
);
4407 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
4408 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4411 case ADDRESS_REG_WB
:
4412 switch (GET_CODE (x
))
4415 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
4416 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4419 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4420 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4423 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4424 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4427 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4428 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4431 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4432 INTVAL (addr
.offset
));
4435 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4436 INTVAL (addr
.offset
));
4443 case ADDRESS_LO_SUM
:
4444 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4445 output_addr_const (f
, addr
.offset
);
4446 asm_fprintf (f
, "]");
4449 case ADDRESS_SYMBOLIC
:
4453 output_addr_const (f
, x
);
4457 aarch64_label_mentioned_p (rtx x
)
4462 if (GET_CODE (x
) == LABEL_REF
)
4465 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4466 referencing instruction, but they are constant offsets, not
4468 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4471 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4472 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4478 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4479 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4482 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4489 /* Implement REGNO_REG_CLASS. */
4492 aarch64_regno_regclass (unsigned regno
)
4494 if (GP_REGNUM_P (regno
))
4495 return GENERAL_REGS
;
4497 if (regno
== SP_REGNUM
)
4500 if (regno
== FRAME_POINTER_REGNUM
4501 || regno
== ARG_POINTER_REGNUM
)
4502 return POINTER_REGS
;
4504 if (FP_REGNUM_P (regno
))
4505 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4511 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
4513 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4514 where mask is selected by alignment and size of the offset.
4515 We try to pick as large a range for the offset as possible to
4516 maximize the chance of a CSE. However, for aligned addresses
4517 we limit the range to 4k so that structures with different sized
4518 elements are likely to use the same base. */
4520 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
4522 HOST_WIDE_INT offset
= INTVAL (XEXP (x
, 1));
4523 HOST_WIDE_INT base_offset
;
4525 /* Does it look like we'll need a load/store-pair operation? */
4526 if (GET_MODE_SIZE (mode
) > 16
4528 base_offset
= ((offset
+ 64 * GET_MODE_SIZE (mode
))
4529 & ~((128 * GET_MODE_SIZE (mode
)) - 1));
4530 /* For offsets aren't a multiple of the access size, the limit is
4532 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
4533 base_offset
= (offset
+ 0x100) & ~0x1ff;
4535 base_offset
= offset
& ~0xfff;
4537 if (base_offset
== 0)
4540 offset
-= base_offset
;
4541 rtx base_reg
= gen_reg_rtx (Pmode
);
4542 rtx val
= force_operand (plus_constant (Pmode
, XEXP (x
, 0), base_offset
),
4544 emit_move_insn (base_reg
, val
);
4545 x
= plus_constant (Pmode
, base_reg
, offset
);
4551 /* Try a machine-dependent way of reloading an illegitimate address
4552 operand. If we find one, push the reload and return the new rtx. */
4555 aarch64_legitimize_reload_address (rtx
*x_p
,
4557 int opnum
, int type
,
4558 int ind_levels ATTRIBUTE_UNUSED
)
4562 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4563 if (aarch64_vect_struct_mode_p (mode
)
4564 && GET_CODE (x
) == PLUS
4565 && REG_P (XEXP (x
, 0))
4566 && CONST_INT_P (XEXP (x
, 1)))
4570 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4571 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4572 opnum
, (enum reload_type
) type
);
4576 /* We must recognize output that we have already generated ourselves. */
4577 if (GET_CODE (x
) == PLUS
4578 && GET_CODE (XEXP (x
, 0)) == PLUS
4579 && REG_P (XEXP (XEXP (x
, 0), 0))
4580 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4581 && CONST_INT_P (XEXP (x
, 1)))
4583 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4584 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4585 opnum
, (enum reload_type
) type
);
4589 /* We wish to handle large displacements off a base register by splitting
4590 the addend across an add and the mem insn. This can cut the number of
4591 extra insns needed from 3 to 1. It is only useful for load/store of a
4592 single register with 12 bit offset field. */
4593 if (GET_CODE (x
) == PLUS
4594 && REG_P (XEXP (x
, 0))
4595 && CONST_INT_P (XEXP (x
, 1))
4596 && HARD_REGISTER_P (XEXP (x
, 0))
4599 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4601 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4602 HOST_WIDE_INT low
= val
& 0xfff;
4603 HOST_WIDE_INT high
= val
- low
;
4606 machine_mode xmode
= GET_MODE (x
);
4608 /* In ILP32, xmode can be either DImode or SImode. */
4609 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4611 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4612 BLKmode alignment. */
4613 if (GET_MODE_SIZE (mode
) == 0)
4616 offs
= low
% GET_MODE_SIZE (mode
);
4618 /* Align misaligned offset by adjusting high part to compensate. */
4621 if (aarch64_uimm12_shift (high
+ offs
))
4630 offs
= GET_MODE_SIZE (mode
) - offs
;
4632 high
= high
+ (low
& 0x1000) - offs
;
4637 /* Check for overflow. */
4638 if (high
+ low
!= val
)
4641 cst
= GEN_INT (high
);
4642 if (!aarch64_uimm12_shift (high
))
4643 cst
= force_const_mem (xmode
, cst
);
4645 /* Reload high part into base reg, leaving the low part
4646 in the mem instruction.
4647 Note that replacing this gen_rtx_PLUS with plus_constant is
4648 wrong in this case because we rely on the
4649 (plus (plus reg c1) c2) structure being preserved so that
4650 XEXP (*p, 0) in push_reload below uses the correct term. */
4651 x
= gen_rtx_PLUS (xmode
,
4652 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4655 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4656 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4657 opnum
, (enum reload_type
) type
);
4666 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4669 secondary_reload_info
*sri
)
4671 /* Without the TARGET_SIMD instructions we cannot move a Q register
4672 to a Q register directly. We need a scratch. */
4673 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4674 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4675 && reg_class_subset_p (rclass
, FP_REGS
))
4678 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4679 else if (mode
== TImode
)
4680 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4684 /* A TFmode or TImode memory access should be handled via an FP_REGS
4685 because AArch64 has richer addressing modes for LDR/STR instructions
4686 than LDP/STP instructions. */
4687 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== GENERAL_REGS
4688 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4691 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4692 return GENERAL_REGS
;
4698 aarch64_can_eliminate (const int from
, const int to
)
4700 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4701 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4703 if (frame_pointer_needed
)
4705 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4707 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4709 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4710 && !cfun
->calls_alloca
)
4712 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4719 /* If we decided that we didn't need a leaf frame pointer but then used
4720 LR in the function, then we'll want a frame pointer after all, so
4721 prevent this elimination to ensure a frame pointer is used. */
4722 if (to
== STACK_POINTER_REGNUM
4723 && flag_omit_leaf_frame_pointer
4724 && df_regs_ever_live_p (LR_REGNUM
))
4732 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4734 aarch64_layout_frame ();
4736 if (to
== HARD_FRAME_POINTER_REGNUM
)
4738 if (from
== ARG_POINTER_REGNUM
)
4739 return cfun
->machine
->frame
.frame_size
- crtl
->outgoing_args_size
;
4741 if (from
== FRAME_POINTER_REGNUM
)
4742 return (cfun
->machine
->frame
.hard_fp_offset
4743 - cfun
->machine
->frame
.saved_varargs_size
);
4746 if (to
== STACK_POINTER_REGNUM
)
4748 if (from
== FRAME_POINTER_REGNUM
)
4749 return (cfun
->machine
->frame
.frame_size
4750 - cfun
->machine
->frame
.saved_varargs_size
);
4753 return cfun
->machine
->frame
.frame_size
;
4756 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4760 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4764 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4769 aarch64_asm_trampoline_template (FILE *f
)
4773 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4774 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4778 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4779 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4781 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4782 assemble_aligned_integer (4, const0_rtx
);
4783 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4784 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4788 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4790 rtx fnaddr
, mem
, a_tramp
;
4791 const int tramp_code_sz
= 16;
4793 /* Don't need to copy the trailing D-words, we fill those in below. */
4794 emit_block_move (m_tramp
, assemble_trampoline_template (),
4795 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4796 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4797 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4798 if (GET_MODE (fnaddr
) != ptr_mode
)
4799 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4800 emit_move_insn (mem
, fnaddr
);
4802 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4803 emit_move_insn (mem
, chain_value
);
4805 /* XXX We should really define a "clear_cache" pattern and use
4806 gen_clear_cache(). */
4807 a_tramp
= XEXP (m_tramp
, 0);
4808 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4809 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4810 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4814 static unsigned char
4815 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
4819 case CALLER_SAVE_REGS
:
4826 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4827 (GET_MODE_SIZE (mode
) + 7) / 8;
4841 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4843 if (regclass
== POINTER_REGS
)
4844 return GENERAL_REGS
;
4846 if (regclass
== STACK_REG
)
4849 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4855 /* If it's an integer immediate that MOVI can't handle, then
4856 FP_REGS is not an option, so we return NO_REGS instead. */
4857 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4858 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4861 /* Register eliminiation can result in a request for
4862 SP+constant->FP_REGS. We cannot support such operations which
4863 use SP as source and an FP_REG as destination, so reject out
4865 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4867 rtx lhs
= XEXP (x
, 0);
4869 /* Look through a possible SUBREG introduced by ILP32. */
4870 if (GET_CODE (lhs
) == SUBREG
)
4871 lhs
= SUBREG_REG (lhs
);
4873 gcc_assert (REG_P (lhs
));
4874 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
4883 aarch64_asm_output_labelref (FILE* f
, const char *name
)
4885 asm_fprintf (f
, "%U%s", name
);
4889 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
4891 if (priority
== DEFAULT_INIT_PRIORITY
)
4892 default_ctor_section_asm_out_constructor (symbol
, priority
);
4897 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
4898 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4899 switch_to_section (s
);
4900 assemble_align (POINTER_SIZE
);
4901 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4906 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
4908 if (priority
== DEFAULT_INIT_PRIORITY
)
4909 default_dtor_section_asm_out_destructor (symbol
, priority
);
4914 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
4915 s
= get_section (buf
, SECTION_WRITE
, NULL
);
4916 switch_to_section (s
);
4917 assemble_align (POINTER_SIZE
);
4918 assemble_aligned_integer (POINTER_BYTES
, symbol
);
4923 aarch64_output_casesi (rtx
*operands
)
4927 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
4929 static const char *const patterns
[4][2] =
4932 "ldrb\t%w3, [%0,%w1,uxtw]",
4933 "add\t%3, %4, %w3, sxtb #2"
4936 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4937 "add\t%3, %4, %w3, sxth #2"
4940 "ldr\t%w3, [%0,%w1,uxtw #2]",
4941 "add\t%3, %4, %w3, sxtw #2"
4943 /* We assume that DImode is only generated when not optimizing and
4944 that we don't really need 64-bit address offsets. That would
4945 imply an object file with 8GB of code in a single function! */
4947 "ldr\t%w3, [%0,%w1,uxtw #2]",
4948 "add\t%3, %4, %w3, sxtw #2"
4952 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
4954 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
4956 gcc_assert (index
>= 0 && index
<= 3);
4958 /* Need to implement table size reduction, by chaning the code below. */
4959 output_asm_insn (patterns
[index
][0], operands
);
4960 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
4961 snprintf (buf
, sizeof (buf
),
4962 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
4963 output_asm_insn (buf
, operands
);
4964 output_asm_insn (patterns
[index
][1], operands
);
4965 output_asm_insn ("br\t%3", operands
);
4966 assemble_label (asm_out_file
, label
);
4971 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4972 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4976 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
4978 if (shift
>= 0 && shift
<= 3)
4981 for (size
= 8; size
<= 32; size
*= 2)
4983 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
4984 if (mask
== bits
<< shift
)
4992 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED
,
4993 const_rtx x ATTRIBUTE_UNUSED
)
4995 /* We can't use blocks for constants when we're using a per-function
5001 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED
,
5002 rtx x ATTRIBUTE_UNUSED
,
5003 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
5005 /* Force all constant pool entries into the current function section. */
5006 return function_section (current_function_decl
);
5012 /* Helper function for rtx cost calculation. Strip a shift expression
5013 from X. Returns the inner operand if successful, or the original
5014 expression on failure. */
5016 aarch64_strip_shift (rtx x
)
5020 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5021 we can convert both to ROR during final output. */
5022 if ((GET_CODE (op
) == ASHIFT
5023 || GET_CODE (op
) == ASHIFTRT
5024 || GET_CODE (op
) == LSHIFTRT
5025 || GET_CODE (op
) == ROTATERT
5026 || GET_CODE (op
) == ROTATE
)
5027 && CONST_INT_P (XEXP (op
, 1)))
5028 return XEXP (op
, 0);
5030 if (GET_CODE (op
) == MULT
5031 && CONST_INT_P (XEXP (op
, 1))
5032 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
5033 return XEXP (op
, 0);
5038 /* Helper function for rtx cost calculation. Strip an extend
5039 expression from X. Returns the inner operand if successful, or the
5040 original expression on failure. We deal with a number of possible
5041 canonicalization variations here. */
5043 aarch64_strip_extend (rtx x
)
5047 /* Zero and sign extraction of a widened value. */
5048 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
5049 && XEXP (op
, 2) == const0_rtx
5050 && GET_CODE (XEXP (op
, 0)) == MULT
5051 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
5053 return XEXP (XEXP (op
, 0), 0);
5055 /* It can also be represented (for zero-extend) as an AND with an
5057 if (GET_CODE (op
) == AND
5058 && GET_CODE (XEXP (op
, 0)) == MULT
5059 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
5060 && CONST_INT_P (XEXP (op
, 1))
5061 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
5062 INTVAL (XEXP (op
, 1))) != 0)
5063 return XEXP (XEXP (op
, 0), 0);
5065 /* Now handle extended register, as this may also have an optional
5066 left shift by 1..4. */
5067 if (GET_CODE (op
) == ASHIFT
5068 && CONST_INT_P (XEXP (op
, 1))
5069 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
5072 if (GET_CODE (op
) == ZERO_EXTEND
5073 || GET_CODE (op
) == SIGN_EXTEND
)
5082 /* Helper function for rtx cost calculation. Calculate the cost of
5083 a MULT, which may be part of a multiply-accumulate rtx. Return
5084 the calculated cost of the expression, recursing manually in to
5085 operands where needed. */
5088 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
5091 const struct cpu_cost_table
*extra_cost
5092 = aarch64_tune_params
->insn_extra_cost
;
5094 bool maybe_fma
= (outer
== PLUS
|| outer
== MINUS
);
5095 machine_mode mode
= GET_MODE (x
);
5097 gcc_checking_assert (code
== MULT
);
5102 if (VECTOR_MODE_P (mode
))
5103 mode
= GET_MODE_INNER (mode
);
5105 /* Integer multiply/fma. */
5106 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5108 /* The multiply will be canonicalized as a shift, cost it as such. */
5109 if (CONST_INT_P (op1
)
5110 && exact_log2 (INTVAL (op1
)) > 0)
5115 /* ADD (shifted register). */
5116 cost
+= extra_cost
->alu
.arith_shift
;
5118 /* LSL (immediate). */
5119 cost
+= extra_cost
->alu
.shift
;
5122 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
5127 /* Integer multiplies or FMAs have zero/sign extending variants. */
5128 if ((GET_CODE (op0
) == ZERO_EXTEND
5129 && GET_CODE (op1
) == ZERO_EXTEND
)
5130 || (GET_CODE (op0
) == SIGN_EXTEND
5131 && GET_CODE (op1
) == SIGN_EXTEND
))
5133 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
5134 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
5139 /* MADD/SMADDL/UMADDL. */
5140 cost
+= extra_cost
->mult
[0].extend_add
;
5142 /* MUL/SMULL/UMULL. */
5143 cost
+= extra_cost
->mult
[0].extend
;
5149 /* This is either an integer multiply or an FMA. In both cases
5150 we want to recurse and cost the operands. */
5151 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5152 + rtx_cost (op1
, MULT
, 1, speed
);
5158 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
5161 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
5170 /* Floating-point FMA/FMUL can also support negations of the
5172 if (GET_CODE (op0
) == NEG
)
5173 op0
= XEXP (op0
, 0);
5174 if (GET_CODE (op1
) == NEG
)
5175 op1
= XEXP (op1
, 0);
5178 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5179 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5182 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
5185 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5186 + rtx_cost (op1
, MULT
, 1, speed
);
5192 aarch64_address_cost (rtx x
,
5194 addr_space_t as ATTRIBUTE_UNUSED
,
5197 enum rtx_code c
= GET_CODE (x
);
5198 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
5199 struct aarch64_address_info info
;
5203 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
5205 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
5207 /* This is a CONST or SYMBOL ref which will be split
5208 in a different way depending on the code model in use.
5209 Cost it through the generic infrastructure. */
5210 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
5211 /* Divide through by the cost of one instruction to
5212 bring it to the same units as the address costs. */
5213 cost_symbol_ref
/= COSTS_N_INSNS (1);
5214 /* The cost is then the cost of preparing the address,
5215 followed by an immediate (possibly 0) offset. */
5216 return cost_symbol_ref
+ addr_cost
->imm_offset
;
5220 /* This is most likely a jump table from a case
5222 return addr_cost
->register_offset
;
5228 case ADDRESS_LO_SUM
:
5229 case ADDRESS_SYMBOLIC
:
5230 case ADDRESS_REG_IMM
:
5231 cost
+= addr_cost
->imm_offset
;
5234 case ADDRESS_REG_WB
:
5235 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
5236 cost
+= addr_cost
->pre_modify
;
5237 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
5238 cost
+= addr_cost
->post_modify
;
5244 case ADDRESS_REG_REG
:
5245 cost
+= addr_cost
->register_offset
;
5248 case ADDRESS_REG_UXTW
:
5249 case ADDRESS_REG_SXTW
:
5250 cost
+= addr_cost
->register_extend
;
5260 /* For the sake of calculating the cost of the shifted register
5261 component, we can treat same sized modes in the same way. */
5262 switch (GET_MODE_BITSIZE (mode
))
5265 cost
+= addr_cost
->addr_scale_costs
.hi
;
5269 cost
+= addr_cost
->addr_scale_costs
.si
;
5273 cost
+= addr_cost
->addr_scale_costs
.di
;
5276 /* We can't tell, or this is a 128-bit vector. */
5278 cost
+= addr_cost
->addr_scale_costs
.ti
;
5286 /* Return true if the RTX X in mode MODE is a zero or sign extract
5287 usable in an ADD or SUB (extended register) instruction. */
5289 aarch64_rtx_arith_op_extract_p (rtx x
, machine_mode mode
)
5291 /* Catch add with a sign extract.
5292 This is add_<optab><mode>_multp2. */
5293 if (GET_CODE (x
) == SIGN_EXTRACT
5294 || GET_CODE (x
) == ZERO_EXTRACT
)
5296 rtx op0
= XEXP (x
, 0);
5297 rtx op1
= XEXP (x
, 1);
5298 rtx op2
= XEXP (x
, 2);
5300 if (GET_CODE (op0
) == MULT
5301 && CONST_INT_P (op1
)
5302 && op2
== const0_rtx
5303 && CONST_INT_P (XEXP (op0
, 1))
5304 && aarch64_is_extend_from_extract (mode
,
5316 aarch64_frint_unspec_p (unsigned int u
)
5334 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5335 storing it in *COST. Result is true if the total cost of the operation
5336 has now been calculated. */
5338 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
5342 enum rtx_code cmpcode
;
5344 if (COMPARISON_P (op0
))
5346 inner
= XEXP (op0
, 0);
5347 comparator
= XEXP (op0
, 1);
5348 cmpcode
= GET_CODE (op0
);
5353 comparator
= const0_rtx
;
5357 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
5359 /* Conditional branch. */
5360 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5364 if (cmpcode
== NE
|| cmpcode
== EQ
)
5366 if (comparator
== const0_rtx
)
5368 /* TBZ/TBNZ/CBZ/CBNZ. */
5369 if (GET_CODE (inner
) == ZERO_EXTRACT
)
5371 *cost
+= rtx_cost (XEXP (inner
, 0), ZERO_EXTRACT
,
5375 *cost
+= rtx_cost (inner
, cmpcode
, 0, speed
);
5380 else if (cmpcode
== LT
|| cmpcode
== GE
)
5383 if (comparator
== const0_rtx
)
5388 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5390 /* It's a conditional operation based on the status flags,
5391 so it must be some flavor of CSEL. */
5393 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5394 if (GET_CODE (op1
) == NEG
5395 || GET_CODE (op1
) == NOT
5396 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
5397 op1
= XEXP (op1
, 0);
5399 *cost
+= rtx_cost (op1
, IF_THEN_ELSE
, 1, speed
);
5400 *cost
+= rtx_cost (op2
, IF_THEN_ELSE
, 2, speed
);
5404 /* We don't know what this is, cost all operands. */
5408 /* Calculate the cost of calculating X, storing it in *COST. Result
5409 is true if the total cost of the operation has now been calculated. */
5411 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
5412 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
5415 const struct cpu_cost_table
*extra_cost
5416 = aarch64_tune_params
->insn_extra_cost
;
5417 machine_mode mode
= GET_MODE (x
);
5419 /* By default, assume that everything has equivalent cost to the
5420 cheapest instruction. Any additional costs are applied as a delta
5421 above this default. */
5422 *cost
= COSTS_N_INSNS (1);
5424 /* TODO: The cost infrastructure currently does not handle
5425 vector operations. Assume that all vector operations
5426 are equally expensive. */
5427 if (VECTOR_MODE_P (mode
))
5430 *cost
+= extra_cost
->vect
.alu
;
5437 /* The cost depends entirely on the operands to SET. */
5442 switch (GET_CODE (op0
))
5447 rtx address
= XEXP (op0
, 0);
5448 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5449 *cost
+= extra_cost
->ldst
.store
;
5450 else if (mode
== SFmode
)
5451 *cost
+= extra_cost
->ldst
.storef
;
5452 else if (mode
== DFmode
)
5453 *cost
+= extra_cost
->ldst
.stored
;
5456 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5460 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5464 if (! REG_P (SUBREG_REG (op0
)))
5465 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
5469 /* const0_rtx is in general free, but we will use an
5470 instruction to set a register to 0. */
5471 if (REG_P (op1
) || op1
== const0_rtx
)
5473 /* The cost is 1 per register copied. */
5474 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5476 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5479 /* Cost is just the cost of the RHS of the set. */
5480 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5485 /* Bit-field insertion. Strip any redundant widening of
5486 the RHS to meet the width of the target. */
5487 if (GET_CODE (op1
) == SUBREG
)
5488 op1
= SUBREG_REG (op1
);
5489 if ((GET_CODE (op1
) == ZERO_EXTEND
5490 || GET_CODE (op1
) == SIGN_EXTEND
)
5491 && CONST_INT_P (XEXP (op0
, 1))
5492 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5493 >= INTVAL (XEXP (op0
, 1))))
5494 op1
= XEXP (op1
, 0);
5496 if (CONST_INT_P (op1
))
5498 /* MOV immediate is assumed to always be cheap. */
5499 *cost
= COSTS_N_INSNS (1);
5505 *cost
+= extra_cost
->alu
.bfi
;
5506 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5512 /* We can't make sense of this, assume default cost. */
5513 *cost
= COSTS_N_INSNS (1);
5519 /* If an instruction can incorporate a constant within the
5520 instruction, the instruction's expression avoids calling
5521 rtx_cost() on the constant. If rtx_cost() is called on a
5522 constant, then it is usually because the constant must be
5523 moved into a register by one or more instructions.
5525 The exception is constant 0, which can be expressed
5526 as XZR/WZR and is therefore free. The exception to this is
5527 if we have (set (reg) (const0_rtx)) in which case we must cost
5528 the move. However, we can catch that when we cost the SET, so
5529 we don't need to consider that here. */
5530 if (x
== const0_rtx
)
5534 /* To an approximation, building any other constant is
5535 proportionally expensive to the number of instructions
5536 required to build that constant. This is true whether we
5537 are compiling for SPEED or otherwise. */
5538 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
5539 (NULL_RTX
, x
, false, mode
));
5546 /* mov[df,sf]_aarch64. */
5547 if (aarch64_float_const_representable_p (x
))
5548 /* FMOV (scalar immediate). */
5549 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5550 else if (!aarch64_float_const_zero_rtx_p (x
))
5552 /* This will be a load from memory. */
5554 *cost
+= extra_cost
->ldst
.loadd
;
5556 *cost
+= extra_cost
->ldst
.loadf
;
5559 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5560 or MOV v0.s[0], wzr - neither of which are modeled by the
5561 cost tables. Just use the default cost. */
5571 /* For loads we want the base cost of a load, plus an
5572 approximation for the additional cost of the addressing
5574 rtx address
= XEXP (x
, 0);
5575 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5576 *cost
+= extra_cost
->ldst
.load
;
5577 else if (mode
== SFmode
)
5578 *cost
+= extra_cost
->ldst
.loadf
;
5579 else if (mode
== DFmode
)
5580 *cost
+= extra_cost
->ldst
.loadd
;
5583 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5592 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5594 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5595 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5598 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5602 /* Cost this as SUB wzr, X. */
5603 op0
= CONST0_RTX (GET_MODE (x
));
5608 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5610 /* Support (neg(fma...)) as a single instruction only if
5611 sign of zeros is unimportant. This matches the decision
5612 making in aarch64.md. */
5613 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5616 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5621 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5630 *cost
+= extra_cost
->alu
.clz
;
5638 if (op1
== const0_rtx
5639 && GET_CODE (op0
) == AND
)
5645 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
5647 /* TODO: A write to the CC flags possibly costs extra, this
5648 needs encoding in the cost tables. */
5650 /* CC_ZESWPmode supports zero extend for free. */
5651 if (GET_MODE (x
) == CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
5652 op0
= XEXP (op0
, 0);
5655 if (GET_CODE (op0
) == AND
)
5661 if (GET_CODE (op0
) == PLUS
)
5663 /* ADDS (and CMN alias). */
5668 if (GET_CODE (op0
) == MINUS
)
5675 if (GET_CODE (op1
) == NEG
)
5679 *cost
+= extra_cost
->alu
.arith
;
5681 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5682 *cost
+= rtx_cost (XEXP (op1
, 0), NEG
, 1, speed
);
5688 Compare can freely swap the order of operands, and
5689 canonicalization puts the more complex operation first.
5690 But the integer MINUS logic expects the shift/extend
5691 operation in op1. */
5693 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5701 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5705 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
5707 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
5709 /* FCMP supports constant 0.0 for no extra cost. */
5723 /* Detect valid immediates. */
5724 if ((GET_MODE_CLASS (mode
) == MODE_INT
5725 || (GET_MODE_CLASS (mode
) == MODE_CC
5726 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5727 && CONST_INT_P (op1
)
5728 && aarch64_uimm12_shift (INTVAL (op1
)))
5730 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5733 /* SUB(S) (immediate). */
5734 *cost
+= extra_cost
->alu
.arith
;
5739 /* Look for SUB (extended register). */
5740 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
5743 *cost
+= extra_cost
->alu
.arith_shift
;
5745 *cost
+= rtx_cost (XEXP (XEXP (op1
, 0), 0),
5746 (enum rtx_code
) GET_CODE (op1
),
5751 rtx new_op1
= aarch64_strip_extend (op1
);
5753 /* Cost this as an FMA-alike operation. */
5754 if ((GET_CODE (new_op1
) == MULT
5755 || GET_CODE (new_op1
) == ASHIFT
)
5758 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5759 (enum rtx_code
) code
,
5761 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5765 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
5769 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5771 *cost
+= extra_cost
->alu
.arith
;
5772 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5774 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5787 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5788 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5791 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
5792 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5796 if (GET_MODE_CLASS (mode
) == MODE_INT
5797 && CONST_INT_P (op1
)
5798 && aarch64_uimm12_shift (INTVAL (op1
)))
5800 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
5803 /* ADD (immediate). */
5804 *cost
+= extra_cost
->alu
.arith
;
5808 /* Look for ADD (extended register). */
5809 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
5812 *cost
+= extra_cost
->alu
.arith_shift
;
5814 *cost
+= rtx_cost (XEXP (XEXP (op0
, 0), 0),
5815 (enum rtx_code
) GET_CODE (op0
),
5820 /* Strip any extend, leave shifts behind as we will
5821 cost them through mult_cost. */
5822 new_op0
= aarch64_strip_extend (op0
);
5824 if (GET_CODE (new_op0
) == MULT
5825 || GET_CODE (new_op0
) == ASHIFT
)
5827 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
5829 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5833 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
5834 + rtx_cost (op1
, PLUS
, 1, speed
));
5838 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5840 *cost
+= extra_cost
->alu
.arith
;
5841 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5843 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5849 *cost
= COSTS_N_INSNS (1);
5852 *cost
+= extra_cost
->alu
.rev
;
5857 if (aarch_rev16_p (x
))
5859 *cost
= COSTS_N_INSNS (1);
5862 *cost
+= extra_cost
->alu
.rev
;
5874 && GET_CODE (op0
) == MULT
5875 && CONST_INT_P (XEXP (op0
, 1))
5876 && CONST_INT_P (op1
)
5877 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
5880 /* This is a UBFM/SBFM. */
5881 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
5883 *cost
+= extra_cost
->alu
.bfx
;
5887 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5889 /* We possibly get the immediate for free, this is not
5891 if (CONST_INT_P (op1
)
5892 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
5894 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
5897 *cost
+= extra_cost
->alu
.logical
;
5905 /* Handle ORN, EON, or BIC. */
5906 if (GET_CODE (op0
) == NOT
)
5907 op0
= XEXP (op0
, 0);
5909 new_op0
= aarch64_strip_shift (op0
);
5911 /* If we had a shift on op0 then this is a logical-shift-
5912 by-register/immediate operation. Otherwise, this is just
5913 a logical operation. */
5918 /* Shift by immediate. */
5919 if (CONST_INT_P (XEXP (op0
, 1)))
5920 *cost
+= extra_cost
->alu
.log_shift
;
5922 *cost
+= extra_cost
->alu
.log_shift_reg
;
5925 *cost
+= extra_cost
->alu
.logical
;
5928 /* In both cases we want to cost both operands. */
5929 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
5930 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5940 *cost
+= extra_cost
->alu
.logical
;
5942 /* The logical instruction could have the shifted register form,
5943 but the cost is the same if the shift is processed as a separate
5944 instruction, so we don't bother with it here. */
5950 /* If a value is written in SI mode, then zero extended to DI
5951 mode, the operation will in general be free as a write to
5952 a 'w' register implicitly zeroes the upper bits of an 'x'
5953 register. However, if this is
5955 (set (reg) (zero_extend (reg)))
5957 we must cost the explicit register move. */
5959 && GET_MODE (op0
) == SImode
5962 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
5964 if (!op_cost
&& speed
)
5966 *cost
+= extra_cost
->alu
.extend
;
5968 /* Free, the cost is that of the SI mode operation. */
5973 else if (MEM_P (XEXP (x
, 0)))
5975 /* All loads can zero extend to any size for free. */
5976 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
5982 *cost
+= extra_cost
->alu
.extend
;
5987 if (MEM_P (XEXP (x
, 0)))
5992 rtx address
= XEXP (XEXP (x
, 0), 0);
5993 *cost
+= extra_cost
->ldst
.load_sign_extend
;
5996 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6003 *cost
+= extra_cost
->alu
.extend
;
6010 if (CONST_INT_P (op1
))
6012 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6015 *cost
+= extra_cost
->alu
.shift
;
6017 /* We can incorporate zero/sign extend for free. */
6018 if (GET_CODE (op0
) == ZERO_EXTEND
6019 || GET_CODE (op0
) == SIGN_EXTEND
)
6020 op0
= XEXP (op0
, 0);
6022 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
6029 *cost
+= extra_cost
->alu
.shift_reg
;
6031 return false; /* All arguments need to be in registers. */
6041 if (CONST_INT_P (op1
))
6043 /* ASR (immediate) and friends. */
6045 *cost
+= extra_cost
->alu
.shift
;
6047 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6053 /* ASR (register) and friends. */
6055 *cost
+= extra_cost
->alu
.shift_reg
;
6057 return false; /* All arguments need to be in registers. */
6062 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6066 *cost
+= extra_cost
->ldst
.load
;
6068 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
6069 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
6071 /* ADRP, followed by ADD. */
6072 *cost
+= COSTS_N_INSNS (1);
6074 *cost
+= 2 * extra_cost
->alu
.arith
;
6076 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
6077 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
6081 *cost
+= extra_cost
->alu
.arith
;
6086 /* One extra load instruction, after accessing the GOT. */
6087 *cost
+= COSTS_N_INSNS (1);
6089 *cost
+= extra_cost
->ldst
.load
;
6095 /* ADRP/ADD (immediate). */
6097 *cost
+= extra_cost
->alu
.arith
;
6104 *cost
+= extra_cost
->alu
.bfx
;
6106 /* We can trust that the immediates used will be correct (there
6107 are no by-register forms), so we need only cost op0. */
6108 *cost
+= rtx_cost (XEXP (x
, 0), (enum rtx_code
) code
, 0, speed
);
6112 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
6113 /* aarch64_rtx_mult_cost always handles recursion to its
6121 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6122 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
6123 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
6124 else if (GET_MODE (x
) == DFmode
)
6125 *cost
+= (extra_cost
->fp
[1].mult
6126 + extra_cost
->fp
[1].div
);
6127 else if (GET_MODE (x
) == SFmode
)
6128 *cost
+= (extra_cost
->fp
[0].mult
6129 + extra_cost
->fp
[0].div
);
6131 return false; /* All arguments need to be in registers. */
6138 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6139 /* There is no integer SQRT, so only DIV and UDIV can get
6141 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
6143 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
6145 return false; /* All arguments need to be in registers. */
6148 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
6149 XEXP (x
, 2), cost
, speed
);
6162 return false; /* All arguments must be in registers. */
6170 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6172 /* FMSUB, FNMADD, and FNMSUB are free. */
6173 if (GET_CODE (op0
) == NEG
)
6174 op0
= XEXP (op0
, 0);
6176 if (GET_CODE (op2
) == NEG
)
6177 op2
= XEXP (op2
, 0);
6179 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6180 and the by-element operand as operand 0. */
6181 if (GET_CODE (op1
) == NEG
)
6182 op1
= XEXP (op1
, 0);
6184 /* Catch vector-by-element operations. The by-element operand can
6185 either be (vec_duplicate (vec_select (x))) or just
6186 (vec_select (x)), depending on whether we are multiplying by
6187 a vector or a scalar.
6189 Canonicalization is not very good in these cases, FMA4 will put the
6190 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6191 if (GET_CODE (op0
) == VEC_DUPLICATE
)
6192 op0
= XEXP (op0
, 0);
6193 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
6194 op1
= XEXP (op1
, 0);
6196 if (GET_CODE (op0
) == VEC_SELECT
)
6197 op0
= XEXP (op0
, 0);
6198 else if (GET_CODE (op1
) == VEC_SELECT
)
6199 op1
= XEXP (op1
, 0);
6201 /* If the remaining parameters are not registers,
6202 get the cost to put them into registers. */
6203 *cost
+= rtx_cost (op0
, FMA
, 0, speed
);
6204 *cost
+= rtx_cost (op1
, FMA
, 1, speed
);
6205 *cost
+= rtx_cost (op2
, FMA
, 2, speed
);
6210 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
6213 case FLOAT_TRUNCATE
:
6215 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
6221 /* Strip the rounding part. They will all be implemented
6222 by the fcvt* family of instructions anyway. */
6223 if (GET_CODE (x
) == UNSPEC
)
6225 unsigned int uns_code
= XINT (x
, 1);
6227 if (uns_code
== UNSPEC_FRINTA
6228 || uns_code
== UNSPEC_FRINTM
6229 || uns_code
== UNSPEC_FRINTN
6230 || uns_code
== UNSPEC_FRINTP
6231 || uns_code
== UNSPEC_FRINTZ
)
6232 x
= XVECEXP (x
, 0, 0);
6236 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
6238 *cost
+= rtx_cost (x
, (enum rtx_code
) code
, 0, speed
);
6242 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6244 /* FABS and FNEG are analogous. */
6246 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6250 /* Integer ABS will either be split to
6251 two arithmetic instructions, or will be an ABS
6252 (scalar), which we don't model. */
6253 *cost
= COSTS_N_INSNS (2);
6255 *cost
+= 2 * extra_cost
->alu
.arith
;
6263 /* FMAXNM/FMINNM/FMAX/FMIN.
6264 TODO: This may not be accurate for all implementations, but
6265 we do not model this in the cost tables. */
6266 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6271 /* The floating point round to integer frint* instructions. */
6272 if (aarch64_frint_unspec_p (XINT (x
, 1)))
6275 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
6280 if (XINT (x
, 1) == UNSPEC_RBIT
)
6283 *cost
+= extra_cost
->alu
.rev
;
6291 /* Decompose <su>muldi3_highpart. */
6292 if (/* (truncate:DI */
6295 && GET_MODE (XEXP (x
, 0)) == TImode
6296 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6298 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6299 /* (ANY_EXTEND:TI (reg:DI))
6300 (ANY_EXTEND:TI (reg:DI))) */
6301 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6302 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
6303 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
6304 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
6305 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
6306 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
6307 /* (const_int 64) */
6308 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
6309 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
6313 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
6314 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
6316 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
6326 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6328 "\nFailed to cost RTX. Assuming default cost.\n");
6333 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6334 calculated for X. This cost is stored in *COST. Returns true
6335 if the total cost of X was calculated. */
6337 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
6338 int param
, int *cost
, bool speed
)
6340 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
6342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6344 print_rtl_single (dump_file
, x
);
6345 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
6346 speed
? "Hot" : "Cold",
6347 *cost
, result
? "final" : "partial");
6354 aarch64_register_move_cost (machine_mode mode
,
6355 reg_class_t from_i
, reg_class_t to_i
)
6357 enum reg_class from
= (enum reg_class
) from_i
;
6358 enum reg_class to
= (enum reg_class
) to_i
;
6359 const struct cpu_regmove_cost
*regmove_cost
6360 = aarch64_tune_params
->regmove_cost
;
6362 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6363 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
6366 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
6367 from
= GENERAL_REGS
;
6369 /* Moving between GPR and stack cost is the same as GP2GP. */
6370 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
6371 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
6372 return regmove_cost
->GP2GP
;
6374 /* To/From the stack register, we move via the gprs. */
6375 if (to
== STACK_REG
|| from
== STACK_REG
)
6376 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
6377 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
6379 if (GET_MODE_SIZE (mode
) == 16)
6381 /* 128-bit operations on general registers require 2 instructions. */
6382 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6383 return regmove_cost
->GP2GP
* 2;
6384 else if (from
== GENERAL_REGS
)
6385 return regmove_cost
->GP2FP
* 2;
6386 else if (to
== GENERAL_REGS
)
6387 return regmove_cost
->FP2GP
* 2;
6389 /* When AdvSIMD instructions are disabled it is not possible to move
6390 a 128-bit value directly between Q registers. This is handled in
6391 secondary reload. A general register is used as a scratch to move
6392 the upper DI value and the lower DI value is moved directly,
6393 hence the cost is the sum of three moves. */
6395 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
6397 return regmove_cost
->FP2FP
;
6400 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6401 return regmove_cost
->GP2GP
;
6402 else if (from
== GENERAL_REGS
)
6403 return regmove_cost
->GP2FP
;
6404 else if (to
== GENERAL_REGS
)
6405 return regmove_cost
->FP2GP
;
6407 return regmove_cost
->FP2FP
;
6411 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
6412 reg_class_t rclass ATTRIBUTE_UNUSED
,
6413 bool in ATTRIBUTE_UNUSED
)
6415 return aarch64_tune_params
->memmov_cost
;
6418 /* Return the number of instructions that can be issued per cycle. */
6420 aarch64_sched_issue_rate (void)
6422 return aarch64_tune_params
->issue_rate
;
6425 /* Vectorizer cost model target hooks. */
6427 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6429 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6431 int misalign ATTRIBUTE_UNUSED
)
6435 switch (type_of_cost
)
6438 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
6441 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
6444 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
6447 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6450 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
6453 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
6456 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
6459 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
6461 case unaligned_load
:
6462 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
6464 case unaligned_store
:
6465 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
6467 case cond_branch_taken
:
6468 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
6470 case cond_branch_not_taken
:
6471 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
6474 case vec_promote_demote
:
6475 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6478 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6479 return elements
/ 2 + 1;
6486 /* Implement targetm.vectorize.add_stmt_cost. */
6488 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6489 struct _stmt_vec_info
*stmt_info
, int misalign
,
6490 enum vect_cost_model_location where
)
6492 unsigned *cost
= (unsigned *) data
;
6493 unsigned retval
= 0;
6495 if (flag_vect_cost_model
)
6497 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6499 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
6501 /* Statements in an inner loop relative to the loop being
6502 vectorized are weighted more heavily. The value here is
6503 a function (linear for now) of the loop nest level. */
6504 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6506 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6507 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
6508 unsigned nest_level
= loop_depth (loop
);
6510 count
*= nest_level
;
6513 retval
= (unsigned) (count
* stmt_cost
);
6514 cost
[where
] += retval
;
6520 static void initialize_aarch64_code_model (void);
6522 /* Parse the architecture extension string. */
6525 aarch64_parse_extension (char *str
)
6527 /* The extension string is parsed left to right. */
6528 const struct aarch64_option_extension
*opt
= NULL
;
6530 /* Flag to say whether we are adding or removing an extension. */
6531 int adding_ext
= -1;
6533 while (str
!= NULL
&& *str
!= 0)
6539 ext
= strchr (str
, '+');
6546 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
6557 error ("missing feature modifier after %qs", adding_ext
? "+"
6562 /* Scan over the extensions table trying to find an exact match. */
6563 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
6565 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
6567 /* Add or remove the extension. */
6569 aarch64_isa_flags
|= opt
->flags_on
;
6571 aarch64_isa_flags
&= ~(opt
->flags_off
);
6576 if (opt
->name
== NULL
)
6578 /* Extension not found in list. */
6579 error ("unknown feature modifier %qs", str
);
6589 /* Parse the ARCH string. */
6592 aarch64_parse_arch (void)
6595 const struct processor
*arch
;
6596 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
6599 strcpy (str
, aarch64_arch_string
);
6601 ext
= strchr (str
, '+');
6610 error ("missing arch name in -march=%qs", str
);
6614 /* Loop through the list of supported ARCHs to find a match. */
6615 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
6617 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
6619 selected_arch
= arch
;
6620 aarch64_isa_flags
= selected_arch
->flags
;
6623 selected_cpu
= &all_cores
[selected_arch
->core
];
6627 /* ARCH string contains at least one extension. */
6628 aarch64_parse_extension (ext
);
6631 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
6633 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6634 selected_cpu
->name
, selected_arch
->name
);
6641 /* ARCH name not found in list. */
6642 error ("unknown value %qs for -march", str
);
6646 /* Parse the CPU string. */
6649 aarch64_parse_cpu (void)
6652 const struct processor
*cpu
;
6653 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
6656 strcpy (str
, aarch64_cpu_string
);
6658 ext
= strchr (str
, '+');
6667 error ("missing cpu name in -mcpu=%qs", str
);
6671 /* Loop through the list of supported CPUs to find a match. */
6672 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6674 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
6677 aarch64_isa_flags
= selected_cpu
->flags
;
6681 /* CPU string contains at least one extension. */
6682 aarch64_parse_extension (ext
);
6689 /* CPU name not found in list. */
6690 error ("unknown value %qs for -mcpu", str
);
6694 /* Parse the TUNE string. */
6697 aarch64_parse_tune (void)
6699 const struct processor
*cpu
;
6700 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
6701 strcpy (str
, aarch64_tune_string
);
6703 /* Loop through the list of supported CPUs to find a match. */
6704 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6706 if (strcmp (cpu
->name
, str
) == 0)
6708 selected_tune
= cpu
;
6713 /* CPU name not found in list. */
6714 error ("unknown value %qs for -mtune", str
);
6719 /* Implement TARGET_OPTION_OVERRIDE. */
6722 aarch64_override_options (void)
6724 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6725 If either of -march or -mtune is given, they override their
6726 respective component of -mcpu.
6728 So, first parse AARCH64_CPU_STRING, then the others, be careful
6729 with -march as, if -mcpu is not present on the command line, march
6730 must set a sensible default CPU. */
6731 if (aarch64_cpu_string
)
6733 aarch64_parse_cpu ();
6736 if (aarch64_arch_string
)
6738 aarch64_parse_arch ();
6741 if (aarch64_tune_string
)
6743 aarch64_parse_tune ();
6746 #ifndef HAVE_AS_MABI_OPTION
6747 /* The compiler may have been configured with 2.23.* binutils, which does
6748 not have support for ILP32. */
6750 error ("Assembler does not support -mabi=ilp32");
6753 initialize_aarch64_code_model ();
6755 aarch64_build_bitmask_table ();
6757 /* This target defaults to strict volatile bitfields. */
6758 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
6759 flag_strict_volatile_bitfields
= 1;
6761 /* If the user did not specify a processor, choose the default
6762 one for them. This will be the CPU set during configuration using
6763 --with-cpu, otherwise it is "generic". */
6766 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
6767 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
6770 gcc_assert (selected_cpu
);
6773 selected_tune
= selected_cpu
;
6775 aarch64_tune_flags
= selected_tune
->flags
;
6776 aarch64_tune
= selected_tune
->core
;
6777 aarch64_tune_params
= selected_tune
->tune
;
6778 aarch64_architecture_version
= selected_cpu
->architecture_version
;
6780 if (aarch64_fix_a53_err835769
== 2)
6782 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6783 aarch64_fix_a53_err835769
= 1;
6785 aarch64_fix_a53_err835769
= 0;
6789 /* If not opzimizing for size, set the default
6790 alignment to what the target wants */
6793 if (align_loops
<= 0)
6794 align_loops
= aarch64_tune_params
->loop_align
;
6795 if (align_jumps
<= 0)
6796 align_jumps
= aarch64_tune_params
->jump_align
;
6797 if (align_functions
<= 0)
6798 align_functions
= aarch64_tune_params
->function_align
;
6801 aarch64_override_options_after_change ();
6804 /* Implement targetm.override_options_after_change. */
6807 aarch64_override_options_after_change (void)
6809 if (flag_omit_frame_pointer
)
6810 flag_omit_leaf_frame_pointer
= false;
6811 else if (flag_omit_leaf_frame_pointer
)
6812 flag_omit_frame_pointer
= true;
6815 static struct machine_function
*
6816 aarch64_init_machine_status (void)
6818 struct machine_function
*machine
;
6819 machine
= ggc_cleared_alloc
<machine_function
> ();
6824 aarch64_init_expanders (void)
6826 init_machine_status
= aarch64_init_machine_status
;
6829 /* A checking mechanism for the implementation of the various code models. */
6831 initialize_aarch64_code_model (void)
6835 switch (aarch64_cmodel_var
)
6837 case AARCH64_CMODEL_TINY
:
6838 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
6840 case AARCH64_CMODEL_SMALL
:
6841 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
6843 case AARCH64_CMODEL_LARGE
:
6844 sorry ("code model %qs with -f%s", "large",
6845 flag_pic
> 1 ? "PIC" : "pic");
6851 aarch64_cmodel
= aarch64_cmodel_var
;
6854 /* Return true if SYMBOL_REF X binds locally. */
6857 aarch64_symbol_binds_local_p (const_rtx x
)
6859 return (SYMBOL_REF_DECL (x
)
6860 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
6861 : SYMBOL_REF_LOCAL_P (x
));
6864 /* Return true if SYMBOL_REF X is thread local */
6866 aarch64_tls_symbol_p (rtx x
)
6868 if (! TARGET_HAVE_TLS
)
6871 if (GET_CODE (x
) != SYMBOL_REF
)
6874 return SYMBOL_REF_TLS_MODEL (x
) != 0;
6877 /* Classify a TLS symbol into one of the TLS kinds. */
6878 enum aarch64_symbol_type
6879 aarch64_classify_tls_symbol (rtx x
)
6881 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
6885 case TLS_MODEL_GLOBAL_DYNAMIC
:
6886 case TLS_MODEL_LOCAL_DYNAMIC
:
6887 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
6889 case TLS_MODEL_INITIAL_EXEC
:
6890 return SYMBOL_SMALL_GOTTPREL
;
6892 case TLS_MODEL_LOCAL_EXEC
:
6893 return SYMBOL_SMALL_TPREL
;
6895 case TLS_MODEL_EMULATED
:
6896 case TLS_MODEL_NONE
:
6897 return SYMBOL_FORCE_TO_MEM
;
6904 /* Return the method that should be used to access SYMBOL_REF or
6905 LABEL_REF X in context CONTEXT. */
6907 enum aarch64_symbol_type
6908 aarch64_classify_symbol (rtx x
, rtx offset
,
6909 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
6911 if (GET_CODE (x
) == LABEL_REF
)
6913 switch (aarch64_cmodel
)
6915 case AARCH64_CMODEL_LARGE
:
6916 return SYMBOL_FORCE_TO_MEM
;
6918 case AARCH64_CMODEL_TINY_PIC
:
6919 case AARCH64_CMODEL_TINY
:
6920 return SYMBOL_TINY_ABSOLUTE
;
6922 case AARCH64_CMODEL_SMALL_PIC
:
6923 case AARCH64_CMODEL_SMALL
:
6924 return SYMBOL_SMALL_ABSOLUTE
;
6931 if (GET_CODE (x
) == SYMBOL_REF
)
6933 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6934 return SYMBOL_FORCE_TO_MEM
;
6936 if (aarch64_tls_symbol_p (x
))
6937 return aarch64_classify_tls_symbol (x
);
6939 switch (aarch64_cmodel
)
6941 case AARCH64_CMODEL_TINY
:
6942 /* When we retreive symbol + offset address, we have to make sure
6943 the offset does not cause overflow of the final address. But
6944 we have no way of knowing the address of symbol at compile time
6945 so we can't accurately say if the distance between the PC and
6946 symbol + offset is outside the addressible range of +/-1M in the
6947 TINY code model. So we rely on images not being greater than
6948 1M and cap the offset at 1M and anything beyond 1M will have to
6949 be loaded using an alternative mechanism. */
6950 if (SYMBOL_REF_WEAK (x
)
6951 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
6952 return SYMBOL_FORCE_TO_MEM
;
6953 return SYMBOL_TINY_ABSOLUTE
;
6955 case AARCH64_CMODEL_SMALL
:
6956 /* Same reasoning as the tiny code model, but the offset cap here is
6958 if (SYMBOL_REF_WEAK (x
)
6959 || INTVAL (offset
) < (HOST_WIDE_INT
) -4294967263
6960 || INTVAL (offset
) > (HOST_WIDE_INT
) 4294967264)
6961 return SYMBOL_FORCE_TO_MEM
;
6962 return SYMBOL_SMALL_ABSOLUTE
;
6964 case AARCH64_CMODEL_TINY_PIC
:
6965 if (!aarch64_symbol_binds_local_p (x
))
6966 return SYMBOL_TINY_GOT
;
6967 return SYMBOL_TINY_ABSOLUTE
;
6969 case AARCH64_CMODEL_SMALL_PIC
:
6970 if (!aarch64_symbol_binds_local_p (x
))
6971 return SYMBOL_SMALL_GOT
;
6972 return SYMBOL_SMALL_ABSOLUTE
;
6979 /* By default push everything into the constant pool. */
6980 return SYMBOL_FORCE_TO_MEM
;
6984 aarch64_constant_address_p (rtx x
)
6986 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
6990 aarch64_legitimate_pic_operand_p (rtx x
)
6992 if (GET_CODE (x
) == SYMBOL_REF
6993 || (GET_CODE (x
) == CONST
6994 && GET_CODE (XEXP (x
, 0)) == PLUS
6995 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7001 /* Return true if X holds either a quarter-precision or
7002 floating-point +0.0 constant. */
7004 aarch64_valid_floating_const (machine_mode mode
, rtx x
)
7006 if (!CONST_DOUBLE_P (x
))
7009 /* TODO: We could handle moving 0.0 to a TFmode register,
7010 but first we would like to refactor the movtf_aarch64
7011 to be more amicable to split moves properly and
7012 correctly gate on TARGET_SIMD. For now - reject all
7013 constants which are not to SFmode or DFmode registers. */
7014 if (!(mode
== SFmode
|| mode
== DFmode
))
7017 if (aarch64_float_const_zero_rtx_p (x
))
7019 return aarch64_float_const_representable_p (x
);
7023 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
7025 /* Do not allow vector struct mode constants. We could support
7026 0 and -1 easily, but they need support in aarch64-simd.md. */
7027 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
7030 /* This could probably go away because
7031 we now decompose CONST_INTs according to expand_mov_immediate. */
7032 if ((GET_CODE (x
) == CONST_VECTOR
7033 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
7034 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
7035 return !targetm
.cannot_force_const_mem (mode
, x
);
7037 if (GET_CODE (x
) == HIGH
7038 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7041 return aarch64_constant_address_p (x
);
7045 aarch64_load_tp (rtx target
)
7048 || GET_MODE (target
) != Pmode
7049 || !register_operand (target
, Pmode
))
7050 target
= gen_reg_rtx (Pmode
);
7052 /* Can return in any reg. */
7053 emit_insn (gen_aarch64_load_tp_hard (target
));
7057 /* On AAPCS systems, this is the "struct __va_list". */
7058 static GTY(()) tree va_list_type
;
7060 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7061 Return the type to use as __builtin_va_list.
7063 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7075 aarch64_build_builtin_va_list (void)
7078 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7080 /* Create the type. */
7081 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7082 /* Give it the required name. */
7083 va_list_name
= build_decl (BUILTINS_LOCATION
,
7085 get_identifier ("__va_list"),
7087 DECL_ARTIFICIAL (va_list_name
) = 1;
7088 TYPE_NAME (va_list_type
) = va_list_name
;
7089 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
7091 /* Create the fields. */
7092 f_stack
= build_decl (BUILTINS_LOCATION
,
7093 FIELD_DECL
, get_identifier ("__stack"),
7095 f_grtop
= build_decl (BUILTINS_LOCATION
,
7096 FIELD_DECL
, get_identifier ("__gr_top"),
7098 f_vrtop
= build_decl (BUILTINS_LOCATION
,
7099 FIELD_DECL
, get_identifier ("__vr_top"),
7101 f_groff
= build_decl (BUILTINS_LOCATION
,
7102 FIELD_DECL
, get_identifier ("__gr_offs"),
7104 f_vroff
= build_decl (BUILTINS_LOCATION
,
7105 FIELD_DECL
, get_identifier ("__vr_offs"),
7108 DECL_ARTIFICIAL (f_stack
) = 1;
7109 DECL_ARTIFICIAL (f_grtop
) = 1;
7110 DECL_ARTIFICIAL (f_vrtop
) = 1;
7111 DECL_ARTIFICIAL (f_groff
) = 1;
7112 DECL_ARTIFICIAL (f_vroff
) = 1;
7114 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
7115 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
7116 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
7117 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
7118 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
7120 TYPE_FIELDS (va_list_type
) = f_stack
;
7121 DECL_CHAIN (f_stack
) = f_grtop
;
7122 DECL_CHAIN (f_grtop
) = f_vrtop
;
7123 DECL_CHAIN (f_vrtop
) = f_groff
;
7124 DECL_CHAIN (f_groff
) = f_vroff
;
7126 /* Compute its layout. */
7127 layout_type (va_list_type
);
7129 return va_list_type
;
7132 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7134 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
7136 const CUMULATIVE_ARGS
*cum
;
7137 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7138 tree stack
, grtop
, vrtop
, groff
, vroff
;
7140 int gr_save_area_size
;
7141 int vr_save_area_size
;
7144 cum
= &crtl
->args
.info
;
7146 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
7148 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
7150 if (TARGET_GENERAL_REGS_ONLY
)
7152 if (cum
->aapcs_nvrn
> 0)
7153 sorry ("%qs and floating point or vector arguments",
7154 "-mgeneral-regs-only");
7155 vr_save_area_size
= 0;
7158 f_stack
= TYPE_FIELDS (va_list_type_node
);
7159 f_grtop
= DECL_CHAIN (f_stack
);
7160 f_vrtop
= DECL_CHAIN (f_grtop
);
7161 f_groff
= DECL_CHAIN (f_vrtop
);
7162 f_vroff
= DECL_CHAIN (f_groff
);
7164 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
7166 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
7168 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
7170 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
7172 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
7175 /* Emit code to initialize STACK, which points to the next varargs stack
7176 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7177 by named arguments. STACK is 8-byte aligned. */
7178 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
7179 if (cum
->aapcs_stack_size
> 0)
7180 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
7181 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
7182 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7184 /* Emit code to initialize GRTOP, the top of the GR save area.
7185 virtual_incoming_args_rtx should have been 16 byte aligned. */
7186 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
7187 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
7188 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7190 /* Emit code to initialize VRTOP, the top of the VR save area.
7191 This address is gr_save_area_bytes below GRTOP, rounded
7192 down to the next 16-byte boundary. */
7193 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
7194 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
7195 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7198 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
7199 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
7200 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7202 /* Emit code to initialize GROFF, the offset from GRTOP of the
7203 next GPR argument. */
7204 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
7205 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
7206 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7208 /* Likewise emit code to initialize VROFF, the offset from FTOP
7209 of the next VR argument. */
7210 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
7211 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
7212 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7215 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7218 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
7219 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
7223 bool is_ha
; /* is HFA or HVA. */
7224 bool dw_align
; /* double-word align. */
7225 machine_mode ag_mode
= VOIDmode
;
7229 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7230 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
7231 HOST_WIDE_INT size
, rsize
, adjust
, align
;
7232 tree t
, u
, cond1
, cond2
;
7234 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7236 type
= build_pointer_type (type
);
7238 mode
= TYPE_MODE (type
);
7240 f_stack
= TYPE_FIELDS (va_list_type_node
);
7241 f_grtop
= DECL_CHAIN (f_stack
);
7242 f_vrtop
= DECL_CHAIN (f_grtop
);
7243 f_groff
= DECL_CHAIN (f_vrtop
);
7244 f_vroff
= DECL_CHAIN (f_groff
);
7246 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
7247 f_stack
, NULL_TREE
);
7248 size
= int_size_in_bytes (type
);
7249 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
7253 if (aarch64_vfp_is_call_or_return_candidate (mode
,
7259 /* TYPE passed in fp/simd registers. */
7260 if (TARGET_GENERAL_REGS_ONLY
)
7261 sorry ("%qs and floating point or vector arguments",
7262 "-mgeneral-regs-only");
7264 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
7265 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
7266 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
7267 unshare_expr (valist
), f_vroff
, NULL_TREE
);
7269 rsize
= nregs
* UNITS_PER_VREG
;
7273 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
7274 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
7276 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7277 && size
< UNITS_PER_VREG
)
7279 adjust
= UNITS_PER_VREG
- size
;
7284 /* TYPE passed in general registers. */
7285 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
7286 unshare_expr (valist
), f_grtop
, NULL_TREE
);
7287 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
7288 unshare_expr (valist
), f_groff
, NULL_TREE
);
7289 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
7290 nregs
= rsize
/ UNITS_PER_WORD
;
7295 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7296 && size
< UNITS_PER_WORD
)
7298 adjust
= UNITS_PER_WORD
- size
;
7302 /* Get a local temporary for the field value. */
7303 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
7305 /* Emit code to branch if off >= 0. */
7306 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
7307 build_int_cst (TREE_TYPE (off
), 0));
7308 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
7312 /* Emit: offs = (offs + 15) & -16. */
7313 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7314 build_int_cst (TREE_TYPE (off
), 15));
7315 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
7316 build_int_cst (TREE_TYPE (off
), -16));
7317 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
7322 /* Update ap.__[g|v]r_offs */
7323 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7324 build_int_cst (TREE_TYPE (off
), rsize
));
7325 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
7329 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7331 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7332 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
7333 build_int_cst (TREE_TYPE (f_off
), 0));
7334 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
7336 /* String up: make sure the assignment happens before the use. */
7337 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
7338 COND_EXPR_ELSE (cond1
) = t
;
7340 /* Prepare the trees handling the argument that is passed on the stack;
7341 the top level node will store in ON_STACK. */
7342 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
7345 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7346 t
= fold_convert (intDI_type_node
, arg
);
7347 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7348 build_int_cst (TREE_TYPE (t
), 15));
7349 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7350 build_int_cst (TREE_TYPE (t
), -16));
7351 t
= fold_convert (TREE_TYPE (arg
), t
);
7352 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
7356 /* Advance ap.__stack */
7357 t
= fold_convert (intDI_type_node
, arg
);
7358 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7359 build_int_cst (TREE_TYPE (t
), size
+ 7));
7360 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7361 build_int_cst (TREE_TYPE (t
), -8));
7362 t
= fold_convert (TREE_TYPE (arg
), t
);
7363 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
7364 /* String up roundup and advance. */
7366 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7367 /* String up with arg */
7368 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
7369 /* Big-endianness related address adjustment. */
7370 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7371 && size
< UNITS_PER_WORD
)
7373 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
7374 size_int (UNITS_PER_WORD
- size
));
7375 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
7378 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
7379 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
7381 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7384 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
7385 build_int_cst (TREE_TYPE (off
), adjust
));
7387 t
= fold_convert (sizetype
, t
);
7388 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
7392 /* type ha; // treat as "struct {ftype field[n];}"
7393 ... [computing offs]
7394 for (i = 0; i <nregs; ++i, offs += 16)
7395 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7398 tree tmp_ha
, field_t
, field_ptr_t
;
7400 /* Declare a local variable. */
7401 tmp_ha
= create_tmp_var_raw (type
, "ha");
7402 gimple_add_tmp_var (tmp_ha
);
7404 /* Establish the base type. */
7408 field_t
= float_type_node
;
7409 field_ptr_t
= float_ptr_type_node
;
7412 field_t
= double_type_node
;
7413 field_ptr_t
= double_ptr_type_node
;
7416 field_t
= long_double_type_node
;
7417 field_ptr_t
= long_double_ptr_type_node
;
7419 /* The half precision and quad precision are not fully supported yet. Enable
7420 the following code after the support is complete. Need to find the correct
7421 type node for __fp16 *. */
7424 field_t
= float_type_node
;
7425 field_ptr_t
= float_ptr_type_node
;
7431 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
7432 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
7433 field_ptr_t
= build_pointer_type (field_t
);
7440 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7441 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
7443 t
= fold_convert (field_ptr_t
, addr
);
7444 t
= build2 (MODIFY_EXPR
, field_t
,
7445 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
7446 build1 (INDIRECT_REF
, field_t
, t
));
7448 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7449 for (i
= 1; i
< nregs
; ++i
)
7451 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
7452 u
= fold_convert (field_ptr_t
, addr
);
7453 u
= build2 (MODIFY_EXPR
, field_t
,
7454 build2 (MEM_REF
, field_t
, tmp_ha
,
7455 build_int_cst (field_ptr_t
,
7457 int_size_in_bytes (field_t
)))),
7458 build1 (INDIRECT_REF
, field_t
, u
));
7459 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
7462 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
7463 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
7466 COND_EXPR_ELSE (cond2
) = t
;
7467 addr
= fold_convert (build_pointer_type (type
), cond1
);
7468 addr
= build_va_arg_indirect_ref (addr
);
7471 addr
= build_va_arg_indirect_ref (addr
);
7476 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7479 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
7480 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7483 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7484 CUMULATIVE_ARGS local_cum
;
7485 int gr_saved
, vr_saved
;
7487 /* The caller has advanced CUM up to, but not beyond, the last named
7488 argument. Advance a local copy of CUM past the last "real" named
7489 argument, to find out how many registers are left over. */
7491 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
7493 /* Found out how many registers we need to save. */
7494 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
7495 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
7497 if (TARGET_GENERAL_REGS_ONLY
)
7499 if (local_cum
.aapcs_nvrn
> 0)
7500 sorry ("%qs and floating point or vector arguments",
7501 "-mgeneral-regs-only");
7511 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7512 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
7513 - gr_saved
* UNITS_PER_WORD
);
7514 mem
= gen_frame_mem (BLKmode
, ptr
);
7515 set_mem_alias_set (mem
, get_varargs_alias_set ());
7517 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
7522 /* We can't use move_block_from_reg, because it will use
7523 the wrong mode, storing D regs only. */
7524 machine_mode mode
= TImode
;
7527 /* Set OFF to the offset from virtual_incoming_args_rtx of
7528 the first vector register. The VR save area lies below
7529 the GR one, and is aligned to 16 bytes. */
7530 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7531 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7532 off
-= vr_saved
* UNITS_PER_VREG
;
7534 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
7538 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
7539 mem
= gen_frame_mem (mode
, ptr
);
7540 set_mem_alias_set (mem
, get_varargs_alias_set ());
7541 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
7542 off
+= UNITS_PER_VREG
;
7547 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7548 any complication of having crtl->args.pretend_args_size changed. */
7549 cfun
->machine
->frame
.saved_varargs_size
7550 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7551 STACK_BOUNDARY
/ BITS_PER_UNIT
)
7552 + vr_saved
* UNITS_PER_VREG
);
7556 aarch64_conditional_register_usage (void)
7561 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
7564 call_used_regs
[i
] = 1;
7569 /* Walk down the type tree of TYPE counting consecutive base elements.
7570 If *MODEP is VOIDmode, then set it to the first valid floating point
7571 type. If a non-floating point type is found, or if a floating point
7572 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7573 otherwise return the count in the sub-tree. */
7575 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
7580 switch (TREE_CODE (type
))
7583 mode
= TYPE_MODE (type
);
7584 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7587 if (*modep
== VOIDmode
)
7596 mode
= TYPE_MODE (TREE_TYPE (type
));
7597 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7600 if (*modep
== VOIDmode
)
7609 /* Use V2SImode and V4SImode as representatives of all 64-bit
7610 and 128-bit vector types. */
7611 size
= int_size_in_bytes (type
);
7624 if (*modep
== VOIDmode
)
7627 /* Vector modes are considered to be opaque: two vectors are
7628 equivalent for the purposes of being homogeneous aggregates
7629 if they are the same size. */
7638 tree index
= TYPE_DOMAIN (type
);
7640 /* Can't handle incomplete types nor sizes that are not
7642 if (!COMPLETE_TYPE_P (type
)
7643 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7646 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
7649 || !TYPE_MAX_VALUE (index
)
7650 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
7651 || !TYPE_MIN_VALUE (index
)
7652 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
7656 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
7657 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
7659 /* There must be no padding. */
7660 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7672 /* Can't handle incomplete types nor sizes that are not
7674 if (!COMPLETE_TYPE_P (type
)
7675 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7678 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7680 if (TREE_CODE (field
) != FIELD_DECL
)
7683 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7689 /* There must be no padding. */
7690 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7697 case QUAL_UNION_TYPE
:
7699 /* These aren't very interesting except in a degenerate case. */
7704 /* Can't handle incomplete types nor sizes that are not
7706 if (!COMPLETE_TYPE_P (type
)
7707 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7710 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7712 if (TREE_CODE (field
) != FIELD_DECL
)
7715 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7718 count
= count
> sub_count
? count
: sub_count
;
7721 /* There must be no padding. */
7722 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7735 /* Return true if we use LRA instead of reload pass. */
7737 aarch64_lra_p (void)
7739 return aarch64_lra_flag
;
7742 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7743 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7744 array types. The C99 floating-point complex types are also considered
7745 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7746 types, which are GCC extensions and out of the scope of AAPCS64, are
7747 treated as composite types here as well.
7749 Note that MODE itself is not sufficient in determining whether a type
7750 is such a composite type or not. This is because
7751 stor-layout.c:compute_record_mode may have already changed the MODE
7752 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7753 structure with only one field may have its MODE set to the mode of the
7754 field. Also an integer mode whose size matches the size of the
7755 RECORD_TYPE type may be used to substitute the original mode
7756 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7757 solely relied on. */
7760 aarch64_composite_type_p (const_tree type
,
7763 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
7767 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
7768 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
7774 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7775 type as described in AAPCS64 \S 4.1.2.
7777 See the comment above aarch64_composite_type_p for the notes on MODE. */
7780 aarch64_short_vector_p (const_tree type
,
7783 HOST_WIDE_INT size
= -1;
7785 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7786 size
= int_size_in_bytes (type
);
7787 else if (!aarch64_composite_type_p (type
, mode
)
7788 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
7789 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
7790 size
= GET_MODE_SIZE (mode
);
7792 return (size
== 8 || size
== 16) ? true : false;
7795 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7796 shall be passed or returned in simd/fp register(s) (providing these
7797 parameter passing registers are available).
7799 Upon successful return, *COUNT returns the number of needed registers,
7800 *BASE_MODE returns the mode of the individual register and when IS_HAF
7801 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7802 floating-point aggregate or a homogeneous short-vector aggregate. */
7805 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
7807 machine_mode
*base_mode
,
7811 machine_mode new_mode
= VOIDmode
;
7812 bool composite_p
= aarch64_composite_type_p (type
, mode
);
7814 if (is_ha
!= NULL
) *is_ha
= false;
7816 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7817 || aarch64_short_vector_p (type
, mode
))
7822 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
7824 if (is_ha
!= NULL
) *is_ha
= true;
7826 new_mode
= GET_MODE_INNER (mode
);
7828 else if (type
&& composite_p
)
7830 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
7832 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
7834 if (is_ha
!= NULL
) *is_ha
= true;
7843 *base_mode
= new_mode
;
7847 /* Implement TARGET_STRUCT_VALUE_RTX. */
7850 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
7851 int incoming ATTRIBUTE_UNUSED
)
7853 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
7856 /* Implements target hook vector_mode_supported_p. */
7858 aarch64_vector_mode_supported_p (machine_mode mode
)
7861 && (mode
== V4SImode
|| mode
== V8HImode
7862 || mode
== V16QImode
|| mode
== V2DImode
7863 || mode
== V2SImode
|| mode
== V4HImode
7864 || mode
== V8QImode
|| mode
== V2SFmode
7865 || mode
== V4SFmode
|| mode
== V2DFmode
7866 || mode
== V1DFmode
))
7872 /* Return appropriate SIMD container
7873 for MODE within a vector of WIDTH bits. */
7875 aarch64_simd_container_mode (machine_mode mode
, unsigned width
)
7877 gcc_assert (width
== 64 || width
== 128);
7916 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7918 aarch64_preferred_simd_mode (machine_mode mode
)
7920 return aarch64_simd_container_mode (mode
, 128);
7923 /* Return the bitmask of possible vector sizes for the vectorizer
7926 aarch64_autovectorize_vector_sizes (void)
7931 /* Implement TARGET_MANGLE_TYPE. */
7934 aarch64_mangle_type (const_tree type
)
7936 /* The AArch64 ABI documents say that "__va_list" has to be
7937 managled as if it is in the "std" namespace. */
7938 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
7939 return "St9__va_list";
7941 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
7943 if (TYPE_NAME (type
) != NULL
)
7944 return aarch64_mangle_builtin_type (type
);
7946 /* Use the default mangling. */
7951 /* Return true if the rtx_insn contains a MEM RTX somewhere
7955 has_memory_op (rtx_insn
*mem_insn
)
7957 subrtx_iterator::array_type array
;
7958 FOR_EACH_SUBRTX (iter
, array
, PATTERN (mem_insn
), ALL
)
7965 /* Find the first rtx_insn before insn that will generate an assembly
7969 aarch64_prev_real_insn (rtx_insn
*insn
)
7976 insn
= prev_real_insn (insn
);
7978 while (insn
&& recog_memoized (insn
) < 0);
7984 is_madd_op (enum attr_type t1
)
7987 /* A number of these may be AArch32 only. */
7988 enum attr_type mlatypes
[] = {
7989 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
7990 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
7991 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
7994 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
7996 if (t1
== mlatypes
[i
])
8003 /* Check if there is a register dependency between a load and the insn
8004 for which we hold recog_data. */
8007 dep_between_memop_and_curr (rtx memop
)
8012 gcc_assert (GET_CODE (memop
) == SET
);
8014 if (!REG_P (SET_DEST (memop
)))
8017 load_reg
= SET_DEST (memop
);
8018 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
8020 rtx operand
= recog_data
.operand
[opno
];
8022 && reg_overlap_mentioned_p (load_reg
, operand
))
8030 /* When working around the Cortex-A53 erratum 835769,
8031 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8032 instruction and has a preceding memory instruction such that a NOP
8033 should be inserted between them. */
8036 aarch64_madd_needs_nop (rtx_insn
* insn
)
8038 enum attr_type attr_type
;
8042 if (!aarch64_fix_a53_err835769
)
8045 if (recog_memoized (insn
) < 0)
8048 attr_type
= get_attr_type (insn
);
8049 if (!is_madd_op (attr_type
))
8052 prev
= aarch64_prev_real_insn (insn
);
8053 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8054 Restore recog state to INSN to avoid state corruption. */
8055 extract_constrain_insn_cached (insn
);
8057 if (!prev
|| !has_memory_op (prev
))
8060 body
= single_set (prev
);
8062 /* If the previous insn is a memory op and there is no dependency between
8063 it and the DImode madd, emit a NOP between them. If body is NULL then we
8064 have a complex memory operation, probably a load/store pair.
8065 Be conservative for now and emit a NOP. */
8066 if (GET_MODE (recog_data
.operand
[0]) == DImode
8067 && (!body
|| !dep_between_memop_and_curr (body
)))
8075 /* Implement FINAL_PRESCAN_INSN. */
8078 aarch64_final_prescan_insn (rtx_insn
*insn
)
8080 if (aarch64_madd_needs_nop (insn
))
8081 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
8085 /* Return the equivalent letter for size. */
8087 sizetochar (int size
)
8091 case 64: return 'd';
8092 case 32: return 's';
8093 case 16: return 'h';
8094 case 8 : return 'b';
8095 default: gcc_unreachable ();
8099 /* Return true iff x is a uniform vector of floating-point
8100 constants, and the constant can be represented in
8101 quarter-precision form. Note, as aarch64_float_const_representable
8102 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8104 aarch64_vect_float_const_representable_p (rtx x
)
8107 REAL_VALUE_TYPE r0
, ri
;
8110 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
8113 x0
= CONST_VECTOR_ELT (x
, 0);
8114 if (!CONST_DOUBLE_P (x0
))
8117 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
8119 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
8121 xi
= CONST_VECTOR_ELT (x
, i
);
8122 if (!CONST_DOUBLE_P (xi
))
8125 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
8126 if (!REAL_VALUES_EQUAL (r0
, ri
))
8130 return aarch64_float_const_representable_p (x0
);
8133 /* Return true for valid and false for invalid. */
8135 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
8136 struct simd_immediate_info
*info
)
8138 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8140 for (i = 0; i < idx; i += (STRIDE)) \
8145 immtype = (CLASS); \
8146 elsize = (ELSIZE); \
8152 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8153 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8154 unsigned char bytes
[16];
8155 int immtype
= -1, matches
;
8156 unsigned int invmask
= inverse
? 0xff : 0;
8159 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8161 if (! (aarch64_simd_imm_zero_p (op
, mode
)
8162 || aarch64_vect_float_const_representable_p (op
)))
8167 info
->value
= CONST_VECTOR_ELT (op
, 0);
8168 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
8176 /* Splat vector constant out into a byte vector. */
8177 for (i
= 0; i
< n_elts
; i
++)
8179 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8180 it must be laid out in the vector register in reverse order. */
8181 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
8182 unsigned HOST_WIDE_INT elpart
;
8183 unsigned int part
, parts
;
8185 if (CONST_INT_P (el
))
8187 elpart
= INTVAL (el
);
8190 else if (GET_CODE (el
) == CONST_DOUBLE
)
8192 elpart
= CONST_DOUBLE_LOW (el
);
8198 for (part
= 0; part
< parts
; part
++)
8201 for (byte
= 0; byte
< innersize
; byte
++)
8203 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8204 elpart
>>= BITS_PER_UNIT
;
8206 if (GET_CODE (el
) == CONST_DOUBLE
)
8207 elpart
= CONST_DOUBLE_HIGH (el
);
8212 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8216 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8217 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
8219 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8220 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8222 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8223 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8225 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8226 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
8228 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
8230 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
8232 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8233 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
8235 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8236 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8238 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8239 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8241 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8242 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
8244 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
8246 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
8248 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8249 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8251 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8252 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8254 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8255 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8257 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8258 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8260 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
8262 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8263 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
8272 info
->element_width
= elsize
;
8273 info
->mvn
= emvn
!= 0;
8274 info
->shift
= eshift
;
8276 unsigned HOST_WIDE_INT imm
= 0;
8278 if (immtype
>= 12 && immtype
<= 15)
8281 /* Un-invert bytes of recognized vector, if necessary. */
8283 for (i
= 0; i
< idx
; i
++)
8284 bytes
[i
] ^= invmask
;
8288 /* FIXME: Broken on 32-bit H_W_I hosts. */
8289 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8291 for (i
= 0; i
< 8; i
++)
8292 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8293 << (i
* BITS_PER_UNIT
);
8296 info
->value
= GEN_INT (imm
);
8300 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8301 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8303 /* Construct 'abcdefgh' because the assembler cannot handle
8304 generic constants. */
8307 imm
= (imm
>> info
->shift
) & 0xff;
8308 info
->value
= GEN_INT (imm
);
8316 /* Check of immediate shift constants are within range. */
8318 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
8320 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
8322 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
8324 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
8327 /* Return true if X is a uniform vector where all elements
8328 are either the floating-point constant 0.0 or the
8329 integer constant 0. */
8331 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
8333 return x
== CONST0_RTX (mode
);
8337 aarch64_simd_imm_scalar_p (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
)
8339 HOST_WIDE_INT imm
= INTVAL (x
);
8342 for (i
= 0; i
< 8; i
++)
8344 unsigned int byte
= imm
& 0xff;
8345 if (byte
!= 0xff && byte
!= 0)
8354 aarch64_mov_operand_p (rtx x
,
8355 enum aarch64_symbol_context context
,
8358 if (GET_CODE (x
) == HIGH
8359 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
8362 if (CONST_INT_P (x
))
8365 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
8368 return aarch64_classify_symbolic_expression (x
, context
)
8369 == SYMBOL_TINY_ABSOLUTE
;
8372 /* Return a const_int vector of VAL. */
8374 aarch64_simd_gen_const_vector_dup (machine_mode mode
, int val
)
8376 int nunits
= GET_MODE_NUNITS (mode
);
8377 rtvec v
= rtvec_alloc (nunits
);
8380 for (i
=0; i
< nunits
; i
++)
8381 RTVEC_ELT (v
, i
) = GEN_INT (val
);
8383 return gen_rtx_CONST_VECTOR (mode
, v
);
8386 /* Check OP is a legal scalar immediate for the MOVI instruction. */
8389 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, machine_mode mode
)
8393 gcc_assert (!VECTOR_MODE_P (mode
));
8394 vmode
= aarch64_preferred_simd_mode (mode
);
8395 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
8396 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
8399 /* Construct and return a PARALLEL RTX vector with elements numbering the
8400 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8401 the vector - from the perspective of the architecture. This does not
8402 line up with GCC's perspective on lane numbers, so we end up with
8403 different masks depending on our target endian-ness. The diagram
8404 below may help. We must draw the distinction when building masks
8405 which select one half of the vector. An instruction selecting
8406 architectural low-lanes for a big-endian target, must be described using
8407 a mask selecting GCC high-lanes.
8409 Big-Endian Little-Endian
8412 | x | x | x | x | | x | x | x | x |
8413 Architecture 3 2 1 0 3 2 1 0
8415 Low Mask: { 2, 3 } { 0, 1 }
8416 High Mask: { 0, 1 } { 2, 3 }
8420 aarch64_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
8422 int nunits
= GET_MODE_NUNITS (mode
);
8423 rtvec v
= rtvec_alloc (nunits
/ 2);
8424 int high_base
= nunits
/ 2;
8430 if (BYTES_BIG_ENDIAN
)
8431 base
= high
? low_base
: high_base
;
8433 base
= high
? high_base
: low_base
;
8435 for (i
= 0; i
< nunits
/ 2; i
++)
8436 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
8438 t1
= gen_rtx_PARALLEL (mode
, v
);
8442 /* Check OP for validity as a PARALLEL RTX vector with elements
8443 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8444 from the perspective of the architecture. See the diagram above
8445 aarch64_simd_vect_par_cnst_half for more details. */
8448 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
8451 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
8452 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
8453 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
8456 if (!VECTOR_MODE_P (mode
))
8459 if (count_op
!= count_ideal
)
8462 for (i
= 0; i
< count_ideal
; i
++)
8464 rtx elt_op
= XVECEXP (op
, 0, i
);
8465 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
8467 if (!CONST_INT_P (elt_op
)
8468 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
8474 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8475 HIGH (exclusive). */
8477 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8481 gcc_assert (CONST_INT_P (operand
));
8482 lane
= INTVAL (operand
);
8484 if (lane
< low
|| lane
>= high
)
8487 error ("%Klane %ld out of range %ld - %ld", exp
, lane
, low
, high
- 1);
8489 error ("lane %ld out of range %ld - %ld", lane
, low
, high
- 1);
8493 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8496 aarch64_simd_emit_pair_result_insn (machine_mode mode
,
8497 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
8500 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
8501 rtx tmp1
= gen_reg_rtx (mode
);
8502 rtx tmp2
= gen_reg_rtx (mode
);
8504 emit_insn (intfn (tmp1
, op1
, tmp2
));
8506 emit_move_insn (mem
, tmp1
);
8507 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
8508 emit_move_insn (mem
, tmp2
);
8511 /* Return TRUE if OP is a valid vector addressing mode. */
8513 aarch64_simd_mem_operand_p (rtx op
)
8515 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
8516 || REG_P (XEXP (op
, 0)));
8519 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
8520 not to early-clobber SRC registers in the process.
8522 We assume that the operands described by SRC and DEST represent a
8523 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8524 number of components into which the copy has been decomposed. */
8526 aarch64_simd_disambiguate_copy (rtx
*operands
, rtx
*dest
,
8527 rtx
*src
, unsigned int count
)
8531 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
8532 || REGNO (operands
[0]) < REGNO (operands
[1]))
8534 for (i
= 0; i
< count
; i
++)
8536 operands
[2 * i
] = dest
[i
];
8537 operands
[2 * i
+ 1] = src
[i
];
8542 for (i
= 0; i
< count
; i
++)
8544 operands
[2 * i
] = dest
[count
- i
- 1];
8545 operands
[2 * i
+ 1] = src
[count
- i
- 1];
8550 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8551 one of VSTRUCT modes: OI, CI or XI. */
8553 aarch64_simd_attr_length_move (rtx_insn
*insn
)
8557 extract_insn_cached (insn
);
8559 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
8561 mode
= GET_MODE (recog_data
.operand
[0]);
8577 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8578 alignment of a vector to 128 bits. */
8579 static HOST_WIDE_INT
8580 aarch64_simd_vector_alignment (const_tree type
)
8582 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
8583 return MIN (align
, 128);
8586 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8588 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
8593 /* We guarantee alignment for vectors up to 128-bits. */
8594 if (tree_int_cst_compare (TYPE_SIZE (type
),
8595 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
8598 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8602 /* If VALS is a vector constant that can be loaded into a register
8603 using DUP, generate instructions to do so and return an RTX to
8604 assign to the register. Otherwise return NULL_RTX. */
8606 aarch64_simd_dup_constant (rtx vals
)
8608 machine_mode mode
= GET_MODE (vals
);
8609 machine_mode inner_mode
= GET_MODE_INNER (mode
);
8610 int n_elts
= GET_MODE_NUNITS (mode
);
8611 bool all_same
= true;
8615 if (GET_CODE (vals
) != CONST_VECTOR
)
8618 for (i
= 1; i
< n_elts
; ++i
)
8620 x
= CONST_VECTOR_ELT (vals
, i
);
8621 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
8628 /* We can load this constant by using DUP and a constant in a
8629 single ARM register. This will be cheaper than a vector
8631 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
8632 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8636 /* Generate code to load VALS, which is a PARALLEL containing only
8637 constants (for vec_init) or CONST_VECTOR, efficiently into a
8638 register. Returns an RTX to copy into the register, or NULL_RTX
8639 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8641 aarch64_simd_make_constant (rtx vals
)
8643 machine_mode mode
= GET_MODE (vals
);
8645 rtx const_vec
= NULL_RTX
;
8646 int n_elts
= GET_MODE_NUNITS (mode
);
8650 if (GET_CODE (vals
) == CONST_VECTOR
)
8652 else if (GET_CODE (vals
) == PARALLEL
)
8654 /* A CONST_VECTOR must contain only CONST_INTs and
8655 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8656 Only store valid constants in a CONST_VECTOR. */
8657 for (i
= 0; i
< n_elts
; ++i
)
8659 rtx x
= XVECEXP (vals
, 0, i
);
8660 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
8663 if (n_const
== n_elts
)
8664 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8669 if (const_vec
!= NULL_RTX
8670 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
8671 /* Load using MOVI/MVNI. */
8673 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
8674 /* Loaded using DUP. */
8676 else if (const_vec
!= NULL_RTX
)
8677 /* Load from constant pool. We can not take advantage of single-cycle
8678 LD1 because we need a PC-relative addressing mode. */
8681 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8682 We can not construct an initializer. */
8687 aarch64_expand_vector_init (rtx target
, rtx vals
)
8689 machine_mode mode
= GET_MODE (target
);
8690 machine_mode inner_mode
= GET_MODE_INNER (mode
);
8691 int n_elts
= GET_MODE_NUNITS (mode
);
8692 int n_var
= 0, one_var
= -1;
8693 bool all_same
= true;
8697 x
= XVECEXP (vals
, 0, 0);
8698 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8699 n_var
= 1, one_var
= 0;
8701 for (i
= 1; i
< n_elts
; ++i
)
8703 x
= XVECEXP (vals
, 0, i
);
8704 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8705 ++n_var
, one_var
= i
;
8707 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8713 rtx constant
= aarch64_simd_make_constant (vals
);
8714 if (constant
!= NULL_RTX
)
8716 emit_move_insn (target
, constant
);
8721 /* Splat a single non-constant element if we can. */
8724 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8725 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
8729 /* One field is non-constant. Load constant then overwrite varying
8730 field. This is more efficient than using the stack. */
8733 rtx copy
= copy_rtx (vals
);
8734 rtx index
= GEN_INT (one_var
);
8735 enum insn_code icode
;
8737 /* Load constant part of vector, substitute neighboring value for
8739 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
8740 aarch64_expand_vector_init (target
, copy
);
8742 /* Insert variable. */
8743 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8744 icode
= optab_handler (vec_set_optab
, mode
);
8745 gcc_assert (icode
!= CODE_FOR_nothing
);
8746 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
8750 /* Construct the vector in memory one field at a time
8751 and load the whole vector. */
8752 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
8753 for (i
= 0; i
< n_elts
; i
++)
8754 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8755 i
* GET_MODE_SIZE (inner_mode
)),
8756 XVECEXP (vals
, 0, i
));
8757 emit_move_insn (target
, mem
);
8761 static unsigned HOST_WIDE_INT
8762 aarch64_shift_truncation_mask (machine_mode mode
)
8765 (aarch64_vector_mode_supported_p (mode
)
8766 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
8769 #ifndef TLS_SECTION_ASM_FLAG
8770 #define TLS_SECTION_ASM_FLAG 'T'
8774 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
8775 tree decl ATTRIBUTE_UNUSED
)
8777 char flagchars
[10], *f
= flagchars
;
8779 /* If we have already declared this section, we can use an
8780 abbreviated form to switch back to it -- unless this section is
8781 part of a COMDAT groups, in which case GAS requires the full
8782 declaration every time. */
8783 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8784 && (flags
& SECTION_DECLARED
))
8786 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
8790 if (!(flags
& SECTION_DEBUG
))
8792 if (flags
& SECTION_WRITE
)
8794 if (flags
& SECTION_CODE
)
8796 if (flags
& SECTION_SMALL
)
8798 if (flags
& SECTION_MERGE
)
8800 if (flags
& SECTION_STRINGS
)
8802 if (flags
& SECTION_TLS
)
8803 *f
++ = TLS_SECTION_ASM_FLAG
;
8804 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8808 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
8810 if (!(flags
& SECTION_NOTYPE
))
8815 if (flags
& SECTION_BSS
)
8820 #ifdef TYPE_OPERAND_FMT
8821 format
= "," TYPE_OPERAND_FMT
;
8826 fprintf (asm_out_file
, format
, type
);
8828 if (flags
& SECTION_ENTSIZE
)
8829 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
8830 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8832 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
8833 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
8835 fprintf (asm_out_file
, ",%s,comdat",
8836 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
8840 putc ('\n', asm_out_file
);
8843 /* Select a format to encode pointers in exception handling data. */
8845 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
8848 switch (aarch64_cmodel
)
8850 case AARCH64_CMODEL_TINY
:
8851 case AARCH64_CMODEL_TINY_PIC
:
8852 case AARCH64_CMODEL_SMALL
:
8853 case AARCH64_CMODEL_SMALL_PIC
:
8854 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8856 type
= DW_EH_PE_sdata4
;
8859 /* No assumptions here. 8-byte relocs required. */
8860 type
= DW_EH_PE_sdata8
;
8863 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
8866 /* Emit load exclusive. */
8869 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
8870 rtx mem
, rtx model_rtx
)
8872 rtx (*gen
) (rtx
, rtx
, rtx
);
8876 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
8877 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
8878 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
8879 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
8884 emit_insn (gen (rval
, mem
, model_rtx
));
8887 /* Emit store exclusive. */
8890 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
8891 rtx rval
, rtx mem
, rtx model_rtx
)
8893 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
8897 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
8898 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
8899 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
8900 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
8905 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
8908 /* Mark the previous jump instruction as unlikely. */
8911 aarch64_emit_unlikely_jump (rtx insn
)
8913 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
8915 insn
= emit_jump_insn (insn
);
8916 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
8919 /* Expand a compare and swap pattern. */
8922 aarch64_expand_compare_and_swap (rtx operands
[])
8924 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
8925 machine_mode mode
, cmp_mode
;
8926 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
8931 oldval
= operands
[3];
8932 newval
= operands
[4];
8933 is_weak
= operands
[5];
8934 mod_s
= operands
[6];
8935 mod_f
= operands
[7];
8936 mode
= GET_MODE (mem
);
8939 /* Normally the succ memory model must be stronger than fail, but in the
8940 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8941 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8943 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
8944 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
8945 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
8951 /* For short modes, we're going to perform the comparison in SImode,
8952 so do the zero-extension now. */
8954 rval
= gen_reg_rtx (SImode
);
8955 oldval
= convert_modes (SImode
, mode
, oldval
, true);
8960 /* Force the value into a register if needed. */
8961 if (!aarch64_plus_operand (oldval
, mode
))
8962 oldval
= force_reg (cmp_mode
, oldval
);
8971 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
8972 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
8973 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
8974 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
8979 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
8981 if (mode
== QImode
|| mode
== HImode
)
8982 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
8984 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
8985 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
8986 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
8989 /* Split a compare and swap pattern. */
8992 aarch64_split_compare_and_swap (rtx operands
[])
8994 rtx rval
, mem
, oldval
, newval
, scratch
;
8997 rtx_code_label
*label1
, *label2
;
9002 oldval
= operands
[2];
9003 newval
= operands
[3];
9004 is_weak
= (operands
[4] != const0_rtx
);
9005 scratch
= operands
[7];
9006 mode
= GET_MODE (mem
);
9011 label1
= gen_label_rtx ();
9012 emit_label (label1
);
9014 label2
= gen_label_rtx ();
9016 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
9018 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
9019 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9020 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9021 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
9022 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9024 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
9028 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
9029 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9030 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
9031 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9035 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9036 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
9037 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
9040 emit_label (label2
);
9043 /* Split an atomic operation. */
9046 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
9047 rtx value
, rtx model_rtx
, rtx cond
)
9049 machine_mode mode
= GET_MODE (mem
);
9050 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
9051 rtx_code_label
*label
;
9054 label
= gen_label_rtx ();
9058 new_out
= gen_lowpart (wmode
, new_out
);
9060 old_out
= gen_lowpart (wmode
, old_out
);
9063 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
9065 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
9074 x
= gen_rtx_AND (wmode
, old_out
, value
);
9075 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9076 x
= gen_rtx_NOT (wmode
, new_out
);
9077 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9081 if (CONST_INT_P (value
))
9083 value
= GEN_INT (-INTVAL (value
));
9089 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
9090 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9094 aarch64_emit_store_exclusive (mode
, cond
, mem
,
9095 gen_lowpart (mode
, new_out
), model_rtx
);
9097 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9098 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9099 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
9100 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9104 aarch64_print_extension (void)
9106 const struct aarch64_option_extension
*opt
= NULL
;
9108 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
9109 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
9110 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
9112 asm_fprintf (asm_out_file
, "\n");
9116 aarch64_start_file (void)
9120 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
9121 aarch64_print_extension ();
9123 else if (selected_cpu
)
9125 const char *truncated_name
9126 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
9127 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
9128 aarch64_print_extension ();
9130 default_file_start();
9133 /* Target hook for c_mode_for_suffix. */
9135 aarch64_c_mode_for_suffix (char suffix
)
9143 /* We can only represent floating point constants which will fit in
9144 "quarter-precision" values. These values are characterised by
9145 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9148 (-1)^s * (n/16) * 2^r
9151 's' is the sign bit.
9152 'n' is an integer in the range 16 <= n <= 31.
9153 'r' is an integer in the range -3 <= r <= 4. */
9155 /* Return true iff X can be represented by a quarter-precision
9156 floating point immediate operand X. Note, we cannot represent 0.0. */
9158 aarch64_float_const_representable_p (rtx x
)
9160 /* This represents our current view of how many bits
9161 make up the mantissa. */
9162 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
9164 unsigned HOST_WIDE_INT mantissa
, mask
;
9165 REAL_VALUE_TYPE r
, m
;
9168 if (!CONST_DOUBLE_P (x
))
9171 if (GET_MODE (x
) == VOIDmode
)
9174 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9176 /* We cannot represent infinities, NaNs or +/-zero. We won't
9177 know if we have +zero until we analyse the mantissa, but we
9178 can reject the other invalid values. */
9179 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
9180 || REAL_VALUE_MINUS_ZERO (r
))
9183 /* Extract exponent. */
9184 r
= real_value_abs (&r
);
9185 exponent
= REAL_EXP (&r
);
9187 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9188 highest (sign) bit, with a fixed binary point at bit point_pos.
9189 m1 holds the low part of the mantissa, m2 the high part.
9190 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9191 bits for the mantissa, this can fail (low bits will be lost). */
9192 real_ldexp (&m
, &r
, point_pos
- exponent
);
9193 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
9195 /* If the low part of the mantissa has bits set we cannot represent
9199 /* We have rejected the lower HOST_WIDE_INT, so update our
9200 understanding of how many bits lie in the mantissa and
9201 look only at the high HOST_WIDE_INT. */
9202 mantissa
= w
.elt (1);
9203 point_pos
-= HOST_BITS_PER_WIDE_INT
;
9205 /* We can only represent values with a mantissa of the form 1.xxxx. */
9206 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
9207 if ((mantissa
& mask
) != 0)
9210 /* Having filtered unrepresentable values, we may now remove all
9211 but the highest 5 bits. */
9212 mantissa
>>= point_pos
- 5;
9214 /* We cannot represent the value 0.0, so reject it. This is handled
9219 /* Then, as bit 4 is always set, we can mask it off, leaving
9220 the mantissa in the range [0, 15]. */
9221 mantissa
&= ~(1 << 4);
9222 gcc_assert (mantissa
<= 15);
9224 /* GCC internally does not use IEEE754-like encoding (where normalized
9225 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9226 Our mantissa values are shifted 4 places to the left relative to
9227 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9228 by 5 places to correct for GCC's representation. */
9229 exponent
= 5 - exponent
;
9231 return (exponent
>= 0 && exponent
<= 7);
9235 aarch64_output_simd_mov_immediate (rtx const_vector
,
9240 static char templ
[40];
9241 const char *mnemonic
;
9242 const char *shift_op
;
9243 unsigned int lane_count
= 0;
9246 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
9248 /* This will return true to show const_vector is legal for use as either
9249 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9250 also update INFO to show how the immediate should be generated. */
9251 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
9252 gcc_assert (is_valid
);
9254 element_char
= sizetochar (info
.element_width
);
9255 lane_count
= width
/ info
.element_width
;
9257 mode
= GET_MODE_INNER (mode
);
9258 if (mode
== SFmode
|| mode
== DFmode
)
9260 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
9261 if (aarch64_float_const_zero_rtx_p (info
.value
))
9262 info
.value
= GEN_INT (0);
9267 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
9268 char float_buf
[buf_size
] = {'\0'};
9269 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
9272 if (lane_count
== 1)
9273 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
9275 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
9276 lane_count
, element_char
, float_buf
);
9281 mnemonic
= info
.mvn
? "mvni" : "movi";
9282 shift_op
= info
.msl
? "msl" : "lsl";
9284 if (lane_count
== 1)
9285 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
9286 mnemonic
, UINTVAL (info
.value
));
9287 else if (info
.shift
)
9288 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9289 ", %s %d", mnemonic
, lane_count
, element_char
,
9290 UINTVAL (info
.value
), shift_op
, info
.shift
);
9292 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
9293 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
9298 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
9303 gcc_assert (!VECTOR_MODE_P (mode
));
9304 vmode
= aarch64_simd_container_mode (mode
, 64);
9305 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
9306 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
9309 /* Split operands into moves from op[1] + op[2] into op[0]. */
9312 aarch64_split_combinev16qi (rtx operands
[3])
9314 unsigned int dest
= REGNO (operands
[0]);
9315 unsigned int src1
= REGNO (operands
[1]);
9316 unsigned int src2
= REGNO (operands
[2]);
9317 machine_mode halfmode
= GET_MODE (operands
[1]);
9318 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
9321 gcc_assert (halfmode
== V16QImode
);
9323 if (src1
== dest
&& src2
== dest
+ halfregs
)
9325 /* No-op move. Can't split to nothing; emit something. */
9326 emit_note (NOTE_INSN_DELETED
);
9330 /* Preserve register attributes for variable tracking. */
9331 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
9332 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
9333 GET_MODE_SIZE (halfmode
));
9335 /* Special case of reversed high/low parts. */
9336 if (reg_overlap_mentioned_p (operands
[2], destlo
)
9337 && reg_overlap_mentioned_p (operands
[1], desthi
))
9339 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9340 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
9341 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9343 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
9345 /* Try to avoid unnecessary moves if part of the result
9346 is in the right place already. */
9348 emit_move_insn (destlo
, operands
[1]);
9349 if (src2
!= dest
+ halfregs
)
9350 emit_move_insn (desthi
, operands
[2]);
9354 if (src2
!= dest
+ halfregs
)
9355 emit_move_insn (desthi
, operands
[2]);
9357 emit_move_insn (destlo
, operands
[1]);
9361 /* vec_perm support. */
9363 #define MAX_VECT_LEN 16
9365 struct expand_vec_perm_d
9367 rtx target
, op0
, op1
;
9368 unsigned char perm
[MAX_VECT_LEN
];
9375 /* Generate a variable permutation. */
9378 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9380 machine_mode vmode
= GET_MODE (target
);
9381 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9383 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
9384 gcc_checking_assert (GET_MODE (op0
) == vmode
);
9385 gcc_checking_assert (GET_MODE (op1
) == vmode
);
9386 gcc_checking_assert (GET_MODE (sel
) == vmode
);
9387 gcc_checking_assert (TARGET_SIMD
);
9391 if (vmode
== V8QImode
)
9393 /* Expand the argument to a V16QI mode by duplicating it. */
9394 rtx pair
= gen_reg_rtx (V16QImode
);
9395 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
9396 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9400 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
9407 if (vmode
== V8QImode
)
9409 pair
= gen_reg_rtx (V16QImode
);
9410 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
9411 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9415 pair
= gen_reg_rtx (OImode
);
9416 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
9417 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
9423 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9425 machine_mode vmode
= GET_MODE (target
);
9426 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
9427 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9430 /* The TBL instruction does not use a modulo index, so we must take care
9431 of that ourselves. */
9432 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
9433 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9434 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
9436 /* For big-endian, we also need to reverse the index within the vector
9437 (but not which vector). */
9438 if (BYTES_BIG_ENDIAN
)
9440 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9442 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
9443 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
9444 NULL
, 0, OPTAB_LIB_WIDEN
);
9446 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
9449 /* Recognize patterns suitable for the TRN instructions. */
9451 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
9453 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9454 rtx out
, in0
, in1
, x
;
9455 rtx (*gen
) (rtx
, rtx
, rtx
);
9456 machine_mode vmode
= d
->vmode
;
9458 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9461 /* Note that these are little-endian tests.
9462 We correct for big-endian later. */
9463 if (d
->perm
[0] == 0)
9465 else if (d
->perm
[0] == 1)
9469 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9471 for (i
= 0; i
< nelt
; i
+= 2)
9473 if (d
->perm
[i
] != i
+ odd
)
9475 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
9485 if (BYTES_BIG_ENDIAN
)
9487 x
= in0
, in0
= in1
, in1
= x
;
9496 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
9497 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
9498 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
9499 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
9500 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
9501 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
9502 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
9503 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
9504 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
9505 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
9514 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
9515 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
9516 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
9517 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
9518 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
9519 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
9520 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
9521 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
9522 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
9523 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
9529 emit_insn (gen (out
, in0
, in1
));
9533 /* Recognize patterns suitable for the UZP instructions. */
9535 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
9537 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9538 rtx out
, in0
, in1
, x
;
9539 rtx (*gen
) (rtx
, rtx
, rtx
);
9540 machine_mode vmode
= d
->vmode
;
9542 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9545 /* Note that these are little-endian tests.
9546 We correct for big-endian later. */
9547 if (d
->perm
[0] == 0)
9549 else if (d
->perm
[0] == 1)
9553 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9555 for (i
= 0; i
< nelt
; i
++)
9557 unsigned elt
= (i
* 2 + odd
) & mask
;
9558 if (d
->perm
[i
] != elt
)
9568 if (BYTES_BIG_ENDIAN
)
9570 x
= in0
, in0
= in1
, in1
= x
;
9579 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
9580 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
9581 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
9582 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
9583 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
9584 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
9585 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
9586 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
9587 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
9588 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
9597 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
9598 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
9599 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
9600 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
9601 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
9602 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
9603 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
9604 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
9605 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
9606 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
9612 emit_insn (gen (out
, in0
, in1
));
9616 /* Recognize patterns suitable for the ZIP instructions. */
9618 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
9620 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
9621 rtx out
, in0
, in1
, x
;
9622 rtx (*gen
) (rtx
, rtx
, rtx
);
9623 machine_mode vmode
= d
->vmode
;
9625 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9628 /* Note that these are little-endian tests.
9629 We correct for big-endian later. */
9631 if (d
->perm
[0] == high
)
9634 else if (d
->perm
[0] == 0)
9638 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9640 for (i
= 0; i
< nelt
/ 2; i
++)
9642 unsigned elt
= (i
+ high
) & mask
;
9643 if (d
->perm
[i
* 2] != elt
)
9645 elt
= (elt
+ nelt
) & mask
;
9646 if (d
->perm
[i
* 2 + 1] != elt
)
9656 if (BYTES_BIG_ENDIAN
)
9658 x
= in0
, in0
= in1
, in1
= x
;
9667 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
9668 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
9669 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
9670 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
9671 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
9672 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
9673 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
9674 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
9675 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
9676 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
9685 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
9686 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
9687 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
9688 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
9689 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
9690 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
9691 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
9692 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
9693 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
9694 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
9700 emit_insn (gen (out
, in0
, in1
));
9704 /* Recognize patterns for the EXT insn. */
9707 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
9709 unsigned int i
, nelt
= d
->nelt
;
9710 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9713 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
9715 /* Check if the extracted indices are increasing by one. */
9716 for (i
= 1; i
< nelt
; i
++)
9718 unsigned int required
= location
+ i
;
9719 if (d
->one_vector_p
)
9721 /* We'll pass the same vector in twice, so allow indices to wrap. */
9722 required
&= (nelt
- 1);
9724 if (d
->perm
[i
] != required
)
9730 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
9731 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
9732 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
9733 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
9734 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
9735 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
9736 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
9737 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
9738 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
9739 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
9748 /* The case where (location == 0) is a no-op for both big- and little-endian,
9749 and is removed by the mid-end at optimization levels -O1 and higher. */
9751 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
9753 /* After setup, we want the high elements of the first vector (stored
9754 at the LSB end of the register), and the low elements of the second
9755 vector (stored at the MSB end of the register). So swap. */
9759 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9760 location
= nelt
- location
;
9763 offset
= GEN_INT (location
);
9764 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
9768 /* Recognize patterns for the REV insns. */
9771 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
9773 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
9774 rtx (*gen
) (rtx
, rtx
);
9776 if (!d
->one_vector_p
)
9785 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
9786 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
9794 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
9795 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
9796 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
9797 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
9805 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
9806 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
9807 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
9808 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
9809 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
9810 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
9811 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
9812 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
9821 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
9822 for (j
= 0; j
<= diff
; j
+= 1)
9824 /* This is guaranteed to be true as the value of diff
9825 is 7, 3, 1 and we should have enough elements in the
9826 queue to generate this. Getting a vector mask with a
9827 value of diff other than these values implies that
9828 something is wrong by the time we get here. */
9829 gcc_assert (i
+ j
< nelt
);
9830 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
9838 emit_insn (gen (d
->target
, d
->op0
));
9843 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
9845 rtx (*gen
) (rtx
, rtx
, rtx
);
9846 rtx out
= d
->target
;
9848 machine_mode vmode
= d
->vmode
;
9849 unsigned int i
, elt
, nelt
= d
->nelt
;
9853 for (i
= 1; i
< nelt
; i
++)
9855 if (elt
!= d
->perm
[i
])
9859 /* The generic preparation in aarch64_expand_vec_perm_const_1
9860 swaps the operand order and the permute indices if it finds
9861 d->perm[0] to be in the second operand. Thus, we can always
9862 use d->op0 and need not do any extra arithmetic to get the
9863 correct lane number. */
9865 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
9869 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
9870 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
9871 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
9872 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
9873 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
9874 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
9875 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
9876 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
9877 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
9878 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
9883 emit_insn (gen (out
, in0
, lane
));
9888 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
9890 rtx rperm
[MAX_VECT_LEN
], sel
;
9891 machine_mode vmode
= d
->vmode
;
9892 unsigned int i
, nelt
= d
->nelt
;
9897 /* Generic code will try constant permutation twice. Once with the
9898 original mode and again with the elements lowered to QImode.
9899 So wait and don't do the selector expansion ourselves. */
9900 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
9903 for (i
= 0; i
< nelt
; ++i
)
9905 int nunits
= GET_MODE_NUNITS (vmode
);
9907 /* If big-endian and two vectors we end up with a weird mixed-endian
9908 mode on NEON. Reverse the index within each word but not the word
9910 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
9913 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
9914 sel
= force_reg (vmode
, sel
);
9916 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
9921 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
9923 /* The pattern matching functions above are written to look for a small
9924 number to begin the sequence (0, 1, N/2). If we begin with an index
9925 from the second operand, we can swap the operands. */
9926 if (d
->perm
[0] >= d
->nelt
)
9928 unsigned i
, nelt
= d
->nelt
;
9931 gcc_assert (nelt
== (nelt
& -nelt
));
9932 for (i
= 0; i
< nelt
; ++i
)
9933 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
9942 if (aarch64_evpc_rev (d
))
9944 else if (aarch64_evpc_ext (d
))
9946 else if (aarch64_evpc_dup (d
))
9948 else if (aarch64_evpc_zip (d
))
9950 else if (aarch64_evpc_uzp (d
))
9952 else if (aarch64_evpc_trn (d
))
9954 return aarch64_evpc_tbl (d
);
9959 /* Expand a vec_perm_const pattern. */
9962 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9964 struct expand_vec_perm_d d
;
9971 d
.vmode
= GET_MODE (target
);
9972 gcc_assert (VECTOR_MODE_P (d
.vmode
));
9973 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
9974 d
.testing_p
= false;
9976 for (i
= which
= 0; i
< nelt
; ++i
)
9978 rtx e
= XVECEXP (sel
, 0, i
);
9979 int ei
= INTVAL (e
) & (2 * nelt
- 1);
9980 which
|= (ei
< nelt
? 1 : 2);
9990 d
.one_vector_p
= false;
9991 if (!rtx_equal_p (op0
, op1
))
9994 /* The elements of PERM do not suggest that only the first operand
9995 is used, but both operands are identical. Allow easier matching
9996 of the permutation by folding the permutation into the single
10000 for (i
= 0; i
< nelt
; ++i
)
10001 d
.perm
[i
] &= nelt
- 1;
10003 d
.one_vector_p
= true;
10008 d
.one_vector_p
= true;
10012 return aarch64_expand_vec_perm_const_1 (&d
);
10016 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
10017 const unsigned char *sel
)
10019 struct expand_vec_perm_d d
;
10020 unsigned int i
, nelt
, which
;
10024 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10025 d
.testing_p
= true;
10026 memcpy (d
.perm
, sel
, nelt
);
10028 /* Calculate whether all elements are in one vector. */
10029 for (i
= which
= 0; i
< nelt
; ++i
)
10031 unsigned char e
= d
.perm
[i
];
10032 gcc_assert (e
< 2 * nelt
);
10033 which
|= (e
< nelt
? 1 : 2);
10036 /* If all elements are from the second vector, reindex as if from the
10039 for (i
= 0; i
< nelt
; ++i
)
10042 /* Check whether the mask can be applied to a single vector. */
10043 d
.one_vector_p
= (which
!= 3);
10045 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
10046 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
10047 if (!d
.one_vector_p
)
10048 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
10051 ret
= aarch64_expand_vec_perm_const_1 (&d
);
10057 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
10059 aarch64_cannot_change_mode_class (machine_mode from
,
10061 enum reg_class rclass
)
10063 /* Full-reg subregs are allowed on general regs or any class if they are
10065 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
)
10066 || !reg_classes_intersect_p (FP_REGS
, rclass
))
10069 /* Limited combinations of subregs are safe on FPREGs. Particularly,
10070 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
10071 2. Scalar to Scalar for integer modes or same size float modes.
10072 3. Vector to Vector modes.
10073 4. On little-endian only, Vector-Structure to Vector modes. */
10074 if (GET_MODE_SIZE (from
) > GET_MODE_SIZE (to
))
10076 if (aarch64_vector_mode_supported_p (from
)
10077 && GET_MODE_SIZE (GET_MODE_INNER (from
)) == GET_MODE_SIZE (to
))
10080 if (GET_MODE_NUNITS (from
) == 1
10081 && GET_MODE_NUNITS (to
) == 1
10082 && (GET_MODE_CLASS (from
) == MODE_INT
10086 if (aarch64_vector_mode_supported_p (from
)
10087 && aarch64_vector_mode_supported_p (to
))
10090 /* Within an vector structure straddling multiple vector registers
10091 we are in a mixed-endian representation. As such, we can't
10092 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
10093 switch between vectors and vector structures cheaply. */
10094 if (!BYTES_BIG_ENDIAN
)
10095 if ((aarch64_vector_mode_supported_p (from
)
10096 && aarch64_vect_struct_mode_p (to
))
10097 || (aarch64_vector_mode_supported_p (to
)
10098 && aarch64_vect_struct_mode_p (from
)))
10105 /* Implement MODES_TIEABLE_P. */
10108 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10110 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
10113 /* We specifically want to allow elements of "structure" modes to
10114 be tieable to the structure. This more general condition allows
10115 other rarer situations too. */
10117 && aarch64_vector_mode_p (mode1
)
10118 && aarch64_vector_mode_p (mode2
))
10124 /* Return a new RTX holding the result of moving POINTER forward by
10128 aarch64_move_pointer (rtx pointer
, int amount
)
10130 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
10132 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
10136 /* Return a new RTX holding the result of moving POINTER forward by the
10137 size of the mode it points to. */
10140 aarch64_progress_pointer (rtx pointer
)
10142 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
10144 return aarch64_move_pointer (pointer
, amount
);
10147 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10151 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
10154 rtx reg
= gen_reg_rtx (mode
);
10156 /* "Cast" the pointers to the correct mode. */
10157 *src
= adjust_address (*src
, mode
, 0);
10158 *dst
= adjust_address (*dst
, mode
, 0);
10159 /* Emit the memcpy. */
10160 emit_move_insn (reg
, *src
);
10161 emit_move_insn (*dst
, reg
);
10162 /* Move the pointers forward. */
10163 *src
= aarch64_progress_pointer (*src
);
10164 *dst
= aarch64_progress_pointer (*dst
);
10167 /* Expand movmem, as if from a __builtin_memcpy. Return true if
10168 we succeed, otherwise return false. */
10171 aarch64_expand_movmem (rtx
*operands
)
10174 rtx dst
= operands
[0];
10175 rtx src
= operands
[1];
10177 bool speed_p
= !optimize_function_for_size_p (cfun
);
10179 /* When optimizing for size, give a better estimate of the length of a
10180 memcpy call, but use the default otherwise. */
10181 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
10183 /* We can't do anything smart if the amount to copy is not constant. */
10184 if (!CONST_INT_P (operands
[2]))
10187 n
= UINTVAL (operands
[2]);
10189 /* Try to keep the number of instructions low. For cases below 16 bytes we
10190 need to make at most two moves. For cases above 16 bytes it will be one
10191 move for each 16 byte chunk, then at most two additional moves. */
10192 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
10195 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10196 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
10198 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10199 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
10201 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10207 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10212 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10217 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10218 4-byte chunk, partially overlapping with the previously copied chunk. */
10221 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10227 src
= aarch64_move_pointer (src
, move
);
10228 dst
= aarch64_move_pointer (dst
, move
);
10229 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10234 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10235 them, then (if applicable) an 8-byte chunk. */
10240 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
10245 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10250 /* Finish the final bytes of the copy. We can always do this in one
10251 instruction. We either copy the exact amount we need, or partially
10252 overlap with the previous chunk we copied and copy 8-bytes. */
10256 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10258 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10260 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10265 src
= aarch64_move_pointer (src
, -1);
10266 dst
= aarch64_move_pointer (dst
, -1);
10267 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10273 src
= aarch64_move_pointer (src
, move
);
10274 dst
= aarch64_move_pointer (dst
, move
);
10275 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10282 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10284 static unsigned HOST_WIDE_INT
10285 aarch64_asan_shadow_offset (void)
10287 return (HOST_WIDE_INT_1
<< 36);
10291 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
10292 unsigned int align
,
10293 enum by_pieces_operation op
,
10296 /* STORE_BY_PIECES can be used when copying a constant string, but
10297 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10298 For now we always fail this and let the move_by_pieces code copy
10299 the string from read-only memory. */
10300 if (op
== STORE_BY_PIECES
)
10303 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
10306 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10307 instruction fusion of some sort. */
10310 aarch64_macro_fusion_p (void)
10312 return aarch64_tune_params
->fuseable_ops
!= AARCH64_FUSE_NOTHING
;
10316 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10317 should be kept together during scheduling. */
10320 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
10323 rtx prev_set
= single_set (prev
);
10324 rtx curr_set
= single_set (curr
);
10325 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10326 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
10328 if (!aarch64_macro_fusion_p ())
10332 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_MOV_MOVK
))
10334 /* We are trying to match:
10335 prev (mov) == (set (reg r0) (const_int imm16))
10336 curr (movk) == (set (zero_extract (reg r0)
10339 (const_int imm16_1)) */
10341 set_dest
= SET_DEST (curr_set
);
10343 if (GET_CODE (set_dest
) == ZERO_EXTRACT
10344 && CONST_INT_P (SET_SRC (curr_set
))
10345 && CONST_INT_P (SET_SRC (prev_set
))
10346 && CONST_INT_P (XEXP (set_dest
, 2))
10347 && INTVAL (XEXP (set_dest
, 2)) == 16
10348 && REG_P (XEXP (set_dest
, 0))
10349 && REG_P (SET_DEST (prev_set
))
10350 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
10357 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_ADRP_ADD
))
10360 /* We're trying to match:
10361 prev (adrp) == (set (reg r1)
10362 (high (symbol_ref ("SYM"))))
10363 curr (add) == (set (reg r0)
10365 (symbol_ref ("SYM"))))
10366 Note that r0 need not necessarily be the same as r1, especially
10367 during pre-regalloc scheduling. */
10369 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
10370 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
10372 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
10373 && REG_P (XEXP (SET_SRC (curr_set
), 0))
10374 && REGNO (XEXP (SET_SRC (curr_set
), 0))
10375 == REGNO (SET_DEST (prev_set
))
10376 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
10377 XEXP (SET_SRC (curr_set
), 1)))
10383 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_MOVK_MOVK
))
10386 /* We're trying to match:
10387 prev (movk) == (set (zero_extract (reg r0)
10390 (const_int imm16_1))
10391 curr (movk) == (set (zero_extract (reg r0)
10394 (const_int imm16_2)) */
10396 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
10397 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
10398 && REG_P (XEXP (SET_DEST (prev_set
), 0))
10399 && REG_P (XEXP (SET_DEST (curr_set
), 0))
10400 && REGNO (XEXP (SET_DEST (prev_set
), 0))
10401 == REGNO (XEXP (SET_DEST (curr_set
), 0))
10402 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
10403 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
10404 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
10405 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
10406 && CONST_INT_P (SET_SRC (prev_set
))
10407 && CONST_INT_P (SET_SRC (curr_set
)))
10412 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_ADRP_LDR
))
10414 /* We're trying to match:
10415 prev (adrp) == (set (reg r0)
10416 (high (symbol_ref ("SYM"))))
10417 curr (ldr) == (set (reg r1)
10418 (mem (lo_sum (reg r0)
10419 (symbol_ref ("SYM")))))
10421 curr (ldr) == (set (reg r1)
10424 (symbol_ref ("SYM")))))) */
10425 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
10426 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
10428 rtx curr_src
= SET_SRC (curr_set
);
10430 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
10431 curr_src
= XEXP (curr_src
, 0);
10433 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
10434 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
10435 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
10436 == REGNO (SET_DEST (prev_set
))
10437 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
10438 XEXP (SET_SRC (prev_set
), 0)))
10443 if ((aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_CMP_BRANCH
)
10444 && any_condjump_p (curr
))
10446 enum attr_type prev_type
= get_attr_type (prev
);
10448 /* FIXME: this misses some which is considered simple arthematic
10449 instructions for ThunderX. Simple shifts are missed here. */
10450 if (prev_type
== TYPE_ALUS_SREG
10451 || prev_type
== TYPE_ALUS_IMM
10452 || prev_type
== TYPE_LOGICS_REG
10453 || prev_type
== TYPE_LOGICS_IMM
)
10460 /* If MEM is in the form of [base+offset], extract the two parts
10461 of address and set to BASE and OFFSET, otherwise return false
10462 after clearing BASE and OFFSET. */
10465 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
10469 gcc_assert (MEM_P (mem
));
10471 addr
= XEXP (mem
, 0);
10476 *offset
= const0_rtx
;
10480 if (GET_CODE (addr
) == PLUS
10481 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
10483 *base
= XEXP (addr
, 0);
10484 *offset
= XEXP (addr
, 1);
10489 *offset
= NULL_RTX
;
10494 /* Types for scheduling fusion. */
10495 enum sched_fusion_type
10497 SCHED_FUSION_NONE
= 0,
10498 SCHED_FUSION_LD_SIGN_EXTEND
,
10499 SCHED_FUSION_LD_ZERO_EXTEND
,
10505 /* If INSN is a load or store of address in the form of [base+offset],
10506 extract the two parts and set to BASE and OFFSET. Return scheduling
10507 fusion type this INSN is. */
10509 static enum sched_fusion_type
10510 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
10513 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
10515 gcc_assert (INSN_P (insn
));
10516 x
= PATTERN (insn
);
10517 if (GET_CODE (x
) != SET
)
10518 return SCHED_FUSION_NONE
;
10521 dest
= SET_DEST (x
);
10523 if (GET_MODE (src
) != SImode
&& GET_MODE (src
) != DImode
10524 && GET_MODE (src
) != SFmode
&& GET_MODE (src
) != DFmode
)
10525 return SCHED_FUSION_NONE
;
10527 if (GET_CODE (src
) == SIGN_EXTEND
)
10529 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
10530 src
= XEXP (src
, 0);
10531 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
10532 return SCHED_FUSION_NONE
;
10534 else if (GET_CODE (src
) == ZERO_EXTEND
)
10536 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
10537 src
= XEXP (src
, 0);
10538 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
10539 return SCHED_FUSION_NONE
;
10542 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
10543 extract_base_offset_in_addr (src
, base
, offset
);
10544 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
10546 fusion
= SCHED_FUSION_ST
;
10547 extract_base_offset_in_addr (dest
, base
, offset
);
10550 return SCHED_FUSION_NONE
;
10552 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
10553 fusion
= SCHED_FUSION_NONE
;
10558 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10560 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10561 and PRI are only calculated for these instructions. For other instruction,
10562 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10563 type instruction fusion can be added by returning different priorities.
10565 It's important that irrelevant instructions get the largest FUSION_PRI. */
10568 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
10569 int *fusion_pri
, int *pri
)
10573 enum sched_fusion_type fusion
;
10575 gcc_assert (INSN_P (insn
));
10578 fusion
= fusion_load_store (insn
, &base
, &offset
);
10579 if (fusion
== SCHED_FUSION_NONE
)
10586 /* Set FUSION_PRI according to fusion type and base register. */
10587 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
10589 /* Calculate PRI. */
10592 /* INSN with smaller offset goes first. */
10593 off_val
= (int)(INTVAL (offset
));
10595 tmp
-= (off_val
& 0xfffff);
10597 tmp
+= ((- off_val
) & 0xfffff);
10603 /* Given OPERANDS of consecutive load/store, check if we can merge
10604 them into ldp/stp. LOAD is true if they are load instructions.
10605 MODE is the mode of memory operands. */
10608 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
10609 enum machine_mode mode
)
10611 HOST_WIDE_INT offval_1
, offval_2
, msize
;
10612 enum reg_class rclass_1
, rclass_2
;
10613 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
10617 mem_1
= operands
[1];
10618 mem_2
= operands
[3];
10619 reg_1
= operands
[0];
10620 reg_2
= operands
[2];
10621 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
10622 if (REGNO (reg_1
) == REGNO (reg_2
))
10627 mem_1
= operands
[0];
10628 mem_2
= operands
[2];
10629 reg_1
= operands
[1];
10630 reg_2
= operands
[3];
10633 /* The mems cannot be volatile. */
10634 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
10637 /* Check if the addresses are in the form of [base+offset]. */
10638 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
10639 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
10641 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
10642 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
10645 /* Check if the bases are same. */
10646 if (!rtx_equal_p (base_1
, base_2
))
10649 offval_1
= INTVAL (offset_1
);
10650 offval_2
= INTVAL (offset_2
);
10651 msize
= GET_MODE_SIZE (mode
);
10652 /* Check if the offsets are consecutive. */
10653 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
10656 /* Check if the addresses are clobbered by load. */
10659 if (reg_mentioned_p (reg_1
, mem_1
))
10662 /* In increasing order, the last load can clobber the address. */
10663 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
10667 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
10668 rclass_1
= FP_REGS
;
10670 rclass_1
= GENERAL_REGS
;
10672 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
10673 rclass_2
= FP_REGS
;
10675 rclass_2
= GENERAL_REGS
;
10677 /* Check if the registers are of same class. */
10678 if (rclass_1
!= rclass_2
)
10684 /* Given OPERANDS of consecutive load/store, check if we can merge
10685 them into ldp/stp by adjusting the offset. LOAD is true if they
10686 are load instructions. MODE is the mode of memory operands.
10688 Given below consecutive stores:
10690 str w1, [xb, 0x100]
10691 str w1, [xb, 0x104]
10692 str w1, [xb, 0x108]
10693 str w1, [xb, 0x10c]
10695 Though the offsets are out of the range supported by stp, we can
10696 still pair them after adjusting the offset, like:
10698 add scratch, xb, 0x100
10699 stp w1, w1, [scratch]
10700 stp w1, w1, [scratch, 0x8]
10702 The peephole patterns detecting this opportunity should guarantee
10703 the scratch register is avaliable. */
10706 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
10707 enum machine_mode mode
)
10709 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
10710 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
10711 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
10712 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
10716 reg_1
= operands
[0];
10717 mem_1
= operands
[1];
10718 reg_2
= operands
[2];
10719 mem_2
= operands
[3];
10720 reg_3
= operands
[4];
10721 mem_3
= operands
[5];
10722 reg_4
= operands
[6];
10723 mem_4
= operands
[7];
10724 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
10725 && REG_P (reg_3
) && REG_P (reg_4
));
10726 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
10731 mem_1
= operands
[0];
10732 reg_1
= operands
[1];
10733 mem_2
= operands
[2];
10734 reg_2
= operands
[3];
10735 mem_3
= operands
[4];
10736 reg_3
= operands
[5];
10737 mem_4
= operands
[6];
10738 reg_4
= operands
[7];
10740 /* Skip if memory operand is by itslef valid for ldp/stp. */
10741 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
10744 /* The mems cannot be volatile. */
10745 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
10746 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
10749 /* Check if the addresses are in the form of [base+offset]. */
10750 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
10751 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
10753 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
10754 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
10756 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
10757 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
10759 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
10760 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
10763 /* Check if the bases are same. */
10764 if (!rtx_equal_p (base_1
, base_2
)
10765 || !rtx_equal_p (base_2
, base_3
)
10766 || !rtx_equal_p (base_3
, base_4
))
10769 offval_1
= INTVAL (offset_1
);
10770 offval_2
= INTVAL (offset_2
);
10771 offval_3
= INTVAL (offset_3
);
10772 offval_4
= INTVAL (offset_4
);
10773 msize
= GET_MODE_SIZE (mode
);
10774 /* Check if the offsets are consecutive. */
10775 if ((offval_1
!= (offval_2
+ msize
)
10776 || offval_1
!= (offval_3
+ msize
* 2)
10777 || offval_1
!= (offval_4
+ msize
* 3))
10778 && (offval_4
!= (offval_3
+ msize
)
10779 || offval_4
!= (offval_2
+ msize
* 2)
10780 || offval_4
!= (offval_1
+ msize
* 3)))
10783 /* Check if the addresses are clobbered by load. */
10786 if (reg_mentioned_p (reg_1
, mem_1
)
10787 || reg_mentioned_p (reg_2
, mem_2
)
10788 || reg_mentioned_p (reg_3
, mem_3
))
10791 /* In increasing order, the last load can clobber the address. */
10792 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
10796 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
10797 rclass_1
= FP_REGS
;
10799 rclass_1
= GENERAL_REGS
;
10801 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
10802 rclass_2
= FP_REGS
;
10804 rclass_2
= GENERAL_REGS
;
10806 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
10807 rclass_3
= FP_REGS
;
10809 rclass_3
= GENERAL_REGS
;
10811 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
10812 rclass_4
= FP_REGS
;
10814 rclass_4
= GENERAL_REGS
;
10816 /* Check if the registers are of same class. */
10817 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
10823 /* Given OPERANDS of consecutive load/store, this function pairs them
10824 into ldp/stp after adjusting the offset. It depends on the fact
10825 that addresses of load/store instructions are in increasing order.
10826 MODE is the mode of memory operands. CODE is the rtl operator
10827 which should be applied to all memory operands, it's SIGN_EXTEND,
10828 ZERO_EXTEND or UNKNOWN. */
10831 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
10832 enum machine_mode mode
, RTX_CODE code
)
10834 rtx base
, offset
, t1
, t2
;
10835 rtx mem_1
, mem_2
, mem_3
, mem_4
;
10836 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
10840 mem_1
= operands
[1];
10841 mem_2
= operands
[3];
10842 mem_3
= operands
[5];
10843 mem_4
= operands
[7];
10847 mem_1
= operands
[0];
10848 mem_2
= operands
[2];
10849 mem_3
= operands
[4];
10850 mem_4
= operands
[6];
10851 gcc_assert (code
== UNKNOWN
);
10854 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
10855 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
10857 /* Adjust offset thus it can fit in ldp/stp instruction. */
10858 msize
= GET_MODE_SIZE (mode
);
10859 stp_off_limit
= msize
* 0x40;
10860 off_val
= INTVAL (offset
);
10861 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
10862 new_off
= abs_off
% stp_off_limit
;
10863 adj_off
= abs_off
- new_off
;
10865 /* Further adjust to make sure all offsets are OK. */
10866 if ((new_off
+ msize
* 2) >= stp_off_limit
)
10868 adj_off
+= stp_off_limit
;
10869 new_off
-= stp_off_limit
;
10872 /* Make sure the adjustment can be done with ADD/SUB instructions. */
10873 if (adj_off
>= 0x1000)
10878 adj_off
= -adj_off
;
10879 new_off
= -new_off
;
10882 /* Create new memory references. */
10883 mem_1
= change_address (mem_1
, VOIDmode
,
10884 plus_constant (DImode
, operands
[8], new_off
));
10886 /* Check if the adjusted address is OK for ldp/stp. */
10887 if (!aarch64_mem_pair_operand (mem_1
, mode
))
10890 msize
= GET_MODE_SIZE (mode
);
10891 mem_2
= change_address (mem_2
, VOIDmode
,
10892 plus_constant (DImode
,
10895 mem_3
= change_address (mem_3
, VOIDmode
,
10896 plus_constant (DImode
,
10898 new_off
+ msize
* 2));
10899 mem_4
= change_address (mem_4
, VOIDmode
,
10900 plus_constant (DImode
,
10902 new_off
+ msize
* 3));
10904 if (code
== ZERO_EXTEND
)
10906 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
10907 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
10908 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
10909 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
10911 else if (code
== SIGN_EXTEND
)
10913 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
10914 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
10915 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
10916 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
10921 operands
[1] = mem_1
;
10922 operands
[3] = mem_2
;
10923 operands
[5] = mem_3
;
10924 operands
[7] = mem_4
;
10928 operands
[0] = mem_1
;
10929 operands
[2] = mem_2
;
10930 operands
[4] = mem_3
;
10931 operands
[6] = mem_4
;
10934 /* Emit adjusting instruction. */
10935 emit_insn (gen_rtx_SET (VOIDmode
, operands
[8],
10936 plus_constant (DImode
, base
, adj_off
)));
10937 /* Emit ldp/stp instructions. */
10938 t1
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
10939 t2
= gen_rtx_SET (VOIDmode
, operands
[2], operands
[3]);
10940 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
10941 t1
= gen_rtx_SET (VOIDmode
, operands
[4], operands
[5]);
10942 t2
= gen_rtx_SET (VOIDmode
, operands
[6], operands
[7]);
10943 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
10947 #undef TARGET_ADDRESS_COST
10948 #define TARGET_ADDRESS_COST aarch64_address_cost
10950 /* This hook will determines whether unnamed bitfields affect the alignment
10951 of the containing structure. The hook returns true if the structure
10952 should inherit the alignment requirements of an unnamed bitfield's
10954 #undef TARGET_ALIGN_ANON_BITFIELD
10955 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
10957 #undef TARGET_ASM_ALIGNED_DI_OP
10958 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
10960 #undef TARGET_ASM_ALIGNED_HI_OP
10961 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
10963 #undef TARGET_ASM_ALIGNED_SI_OP
10964 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
10966 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
10967 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
10968 hook_bool_const_tree_hwi_hwi_const_tree_true
10970 #undef TARGET_ASM_FILE_START
10971 #define TARGET_ASM_FILE_START aarch64_start_file
10973 #undef TARGET_ASM_OUTPUT_MI_THUNK
10974 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
10976 #undef TARGET_ASM_SELECT_RTX_SECTION
10977 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
10979 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
10980 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
10982 #undef TARGET_BUILD_BUILTIN_VA_LIST
10983 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
10985 #undef TARGET_CALLEE_COPIES
10986 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
10988 #undef TARGET_CAN_ELIMINATE
10989 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
10991 #undef TARGET_CANNOT_FORCE_CONST_MEM
10992 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
10994 #undef TARGET_CONDITIONAL_REGISTER_USAGE
10995 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
10997 /* Only the least significant bit is used for initialization guard
10999 #undef TARGET_CXX_GUARD_MASK_BIT
11000 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11002 #undef TARGET_C_MODE_FOR_SUFFIX
11003 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11005 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11006 #undef TARGET_DEFAULT_TARGET_FLAGS
11007 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11010 #undef TARGET_CLASS_MAX_NREGS
11011 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11013 #undef TARGET_BUILTIN_DECL
11014 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11016 #undef TARGET_EXPAND_BUILTIN
11017 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11019 #undef TARGET_EXPAND_BUILTIN_VA_START
11020 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11022 #undef TARGET_FOLD_BUILTIN
11023 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11025 #undef TARGET_FUNCTION_ARG
11026 #define TARGET_FUNCTION_ARG aarch64_function_arg
11028 #undef TARGET_FUNCTION_ARG_ADVANCE
11029 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11031 #undef TARGET_FUNCTION_ARG_BOUNDARY
11032 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11034 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11035 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11037 #undef TARGET_FUNCTION_VALUE
11038 #define TARGET_FUNCTION_VALUE aarch64_function_value
11040 #undef TARGET_FUNCTION_VALUE_REGNO_P
11041 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11043 #undef TARGET_FRAME_POINTER_REQUIRED
11044 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11046 #undef TARGET_GIMPLE_FOLD_BUILTIN
11047 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11049 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11050 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11052 #undef TARGET_INIT_BUILTINS
11053 #define TARGET_INIT_BUILTINS aarch64_init_builtins
11055 #undef TARGET_LEGITIMATE_ADDRESS_P
11056 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11058 #undef TARGET_LEGITIMATE_CONSTANT_P
11059 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11061 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11062 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11064 #undef TARGET_LRA_P
11065 #define TARGET_LRA_P aarch64_lra_p
11067 #undef TARGET_MANGLE_TYPE
11068 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11070 #undef TARGET_MEMORY_MOVE_COST
11071 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11073 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11074 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11076 #undef TARGET_MUST_PASS_IN_STACK
11077 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11079 /* This target hook should return true if accesses to volatile bitfields
11080 should use the narrowest mode possible. It should return false if these
11081 accesses should use the bitfield container type. */
11082 #undef TARGET_NARROW_VOLATILE_BITFIELD
11083 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11085 #undef TARGET_OPTION_OVERRIDE
11086 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11088 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11089 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11090 aarch64_override_options_after_change
11092 #undef TARGET_PASS_BY_REFERENCE
11093 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11095 #undef TARGET_PREFERRED_RELOAD_CLASS
11096 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11098 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11099 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11101 #undef TARGET_SECONDARY_RELOAD
11102 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11104 #undef TARGET_SHIFT_TRUNCATION_MASK
11105 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11107 #undef TARGET_SETUP_INCOMING_VARARGS
11108 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11110 #undef TARGET_STRUCT_VALUE_RTX
11111 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11113 #undef TARGET_REGISTER_MOVE_COST
11114 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11116 #undef TARGET_RETURN_IN_MEMORY
11117 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11119 #undef TARGET_RETURN_IN_MSB
11120 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11122 #undef TARGET_RTX_COSTS
11123 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11125 #undef TARGET_SCHED_ISSUE_RATE
11126 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11128 #undef TARGET_TRAMPOLINE_INIT
11129 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11131 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11132 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11134 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11135 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11137 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11138 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11140 #undef TARGET_VECTORIZE_ADD_STMT_COST
11141 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11143 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11144 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11145 aarch64_builtin_vectorization_cost
11147 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11148 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11150 #undef TARGET_VECTORIZE_BUILTINS
11151 #define TARGET_VECTORIZE_BUILTINS
11153 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11154 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11155 aarch64_builtin_vectorized_function
11157 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11158 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11159 aarch64_autovectorize_vector_sizes
11161 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11162 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11163 aarch64_atomic_assign_expand_fenv
11165 /* Section anchor support. */
11167 #undef TARGET_MIN_ANCHOR_OFFSET
11168 #define TARGET_MIN_ANCHOR_OFFSET -256
11170 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11171 byte offset; we can do much more for larger data types, but have no way
11172 to determine the size of the access. We assume accesses are aligned. */
11173 #undef TARGET_MAX_ANCHOR_OFFSET
11174 #define TARGET_MAX_ANCHOR_OFFSET 4095
11176 #undef TARGET_VECTOR_ALIGNMENT
11177 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11179 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11180 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11181 aarch64_simd_vector_alignment_reachable
11183 /* vec_perm support. */
11185 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11186 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11187 aarch64_vectorize_vec_perm_const_ok
11190 #undef TARGET_FIXED_CONDITION_CODE_REGS
11191 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11193 #undef TARGET_FLAGS_REGNUM
11194 #define TARGET_FLAGS_REGNUM CC_REGNUM
11196 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11197 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11199 #undef TARGET_ASAN_SHADOW_OFFSET
11200 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11202 #undef TARGET_LEGITIMIZE_ADDRESS
11203 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11205 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11206 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11207 aarch64_use_by_pieces_infrastructure_p
11209 #undef TARGET_CAN_USE_DOLOOP_P
11210 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11212 #undef TARGET_SCHED_MACRO_FUSION_P
11213 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11215 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11216 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11218 #undef TARGET_SCHED_FUSION_PRIORITY
11219 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11221 struct gcc_target targetm
= TARGET_INITIALIZER
;
11223 #include "gt-aarch64.h"