1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
25 #include "insn-codes.h"
27 #include "insn-attr.h"
31 #include "double-int.h"
38 #include "fold-const.h"
39 #include "stringpool.h"
40 #include "stor-layout.h"
44 #include "dominance.h"
50 #include "cfgcleanup.h"
52 #include "basic-block.h"
54 #include "hard-reg-set.h"
59 #include "statistics.h"
61 #include "fixed-value.h"
62 #include "insn-config.h"
72 #include "target-def.h"
73 #include "targhooks.h"
77 #include "langhooks.h"
78 #include "diagnostic-core.h"
79 #include "hash-table.h"
80 #include "tree-ssa-alias.h"
81 #include "internal-fn.h"
82 #include "gimple-fold.h"
84 #include "gimple-expr.h"
91 #include "tree-vectorizer.h"
92 #include "aarch64-cost-tables.h"
96 #include "tm-constrs.h"
97 #include "sched-int.h"
99 /* Defined for convenience. */
100 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
102 /* Classifies an address.
105 A simple base register plus immediate offset.
108 A base register indexed by immediate offset with writeback.
111 A base register indexed by (optionally scaled) register.
114 A base register indexed by (optionally scaled) zero-extended register.
117 A base register indexed by (optionally scaled) sign-extended register.
120 A LO_SUM rtx with a base register and "LO12" symbol relocation.
123 A constant symbolic address, in pc-relative literal pool. */
125 enum aarch64_address_type
{
135 struct aarch64_address_info
{
136 enum aarch64_address_type type
;
140 enum aarch64_symbol_type symbol_type
;
143 struct simd_immediate_info
152 /* The current code model. */
153 enum aarch64_code_model aarch64_cmodel
;
156 #undef TARGET_HAVE_TLS
157 #define TARGET_HAVE_TLS 1
160 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
161 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
163 machine_mode
*, int *,
165 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
166 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
167 static void aarch64_override_options_after_change (void);
168 static bool aarch64_vector_mode_supported_p (machine_mode
);
169 static unsigned bit_count (unsigned HOST_WIDE_INT
);
170 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
171 const unsigned char *sel
);
172 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
174 /* Major revision number of the ARM Architecture implemented by the target. */
175 unsigned aarch64_architecture_version
;
177 /* The processor for which instructions should be scheduled. */
178 enum aarch64_processor aarch64_tune
= cortexa53
;
180 /* The current tuning set. */
181 const struct tune_params
*aarch64_tune_params
;
183 /* Mask to specify which instructions we are allowed to generate. */
184 unsigned long aarch64_isa_flags
= 0;
186 /* Mask to specify which instruction scheduling options should be used. */
187 unsigned long aarch64_tune_flags
= 0;
189 /* Tuning parameters. */
191 #if HAVE_DESIGNATED_INITIALIZERS
192 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
194 #define NAMED_PARAM(NAME, VAL) (VAL)
197 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
201 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
204 static const struct cpu_addrcost_table generic_addrcost_table
=
206 #if HAVE_DESIGNATED_INITIALIZERS
215 NAMED_PARAM (pre_modify
, 0),
216 NAMED_PARAM (post_modify
, 0),
217 NAMED_PARAM (register_offset
, 0),
218 NAMED_PARAM (register_extend
, 0),
219 NAMED_PARAM (imm_offset
, 0)
222 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
225 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
227 #if HAVE_DESIGNATED_INITIALIZERS
236 NAMED_PARAM (pre_modify
, 0),
237 NAMED_PARAM (post_modify
, 0),
238 NAMED_PARAM (register_offset
, 0),
239 NAMED_PARAM (register_extend
, 0),
240 NAMED_PARAM (imm_offset
, 0),
243 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
246 static const struct cpu_addrcost_table xgene1_addrcost_table
=
248 #if HAVE_DESIGNATED_INITIALIZERS
257 NAMED_PARAM (pre_modify
, 1),
258 NAMED_PARAM (post_modify
, 0),
259 NAMED_PARAM (register_offset
, 0),
260 NAMED_PARAM (register_extend
, 1),
261 NAMED_PARAM (imm_offset
, 0),
264 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
267 static const struct cpu_regmove_cost generic_regmove_cost
=
269 NAMED_PARAM (GP2GP
, 1),
270 /* Avoid the use of slow int<->fp moves for spilling by setting
271 their cost higher than memmov_cost. */
272 NAMED_PARAM (GP2FP
, 5),
273 NAMED_PARAM (FP2GP
, 5),
274 NAMED_PARAM (FP2FP
, 2)
277 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
279 NAMED_PARAM (GP2GP
, 1),
280 /* Avoid the use of slow int<->fp moves for spilling by setting
281 their cost higher than memmov_cost. */
282 NAMED_PARAM (GP2FP
, 5),
283 NAMED_PARAM (FP2GP
, 5),
284 NAMED_PARAM (FP2FP
, 2)
287 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
289 NAMED_PARAM (GP2GP
, 1),
290 /* Avoid the use of slow int<->fp moves for spilling by setting
291 their cost higher than memmov_cost. */
292 NAMED_PARAM (GP2FP
, 5),
293 NAMED_PARAM (FP2GP
, 5),
294 NAMED_PARAM (FP2FP
, 2)
297 static const struct cpu_regmove_cost thunderx_regmove_cost
=
299 NAMED_PARAM (GP2GP
, 2),
300 NAMED_PARAM (GP2FP
, 2),
301 NAMED_PARAM (FP2GP
, 6),
302 NAMED_PARAM (FP2FP
, 4)
305 static const struct cpu_regmove_cost xgene1_regmove_cost
=
307 NAMED_PARAM (GP2GP
, 1),
308 /* Avoid the use of slow int<->fp moves for spilling by setting
309 their cost higher than memmov_cost. */
310 NAMED_PARAM (GP2FP
, 8),
311 NAMED_PARAM (FP2GP
, 8),
312 NAMED_PARAM (FP2FP
, 2)
315 /* Generic costs for vector insn classes. */
316 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
319 static const struct cpu_vector_cost generic_vector_cost
=
321 NAMED_PARAM (scalar_stmt_cost
, 1),
322 NAMED_PARAM (scalar_load_cost
, 1),
323 NAMED_PARAM (scalar_store_cost
, 1),
324 NAMED_PARAM (vec_stmt_cost
, 1),
325 NAMED_PARAM (vec_to_scalar_cost
, 1),
326 NAMED_PARAM (scalar_to_vec_cost
, 1),
327 NAMED_PARAM (vec_align_load_cost
, 1),
328 NAMED_PARAM (vec_unalign_load_cost
, 1),
329 NAMED_PARAM (vec_unalign_store_cost
, 1),
330 NAMED_PARAM (vec_store_cost
, 1),
331 NAMED_PARAM (cond_taken_branch_cost
, 3),
332 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
335 /* Generic costs for vector insn classes. */
336 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
339 static const struct cpu_vector_cost cortexa57_vector_cost
=
341 NAMED_PARAM (scalar_stmt_cost
, 1),
342 NAMED_PARAM (scalar_load_cost
, 4),
343 NAMED_PARAM (scalar_store_cost
, 1),
344 NAMED_PARAM (vec_stmt_cost
, 3),
345 NAMED_PARAM (vec_to_scalar_cost
, 8),
346 NAMED_PARAM (scalar_to_vec_cost
, 8),
347 NAMED_PARAM (vec_align_load_cost
, 5),
348 NAMED_PARAM (vec_unalign_load_cost
, 5),
349 NAMED_PARAM (vec_unalign_store_cost
, 1),
350 NAMED_PARAM (vec_store_cost
, 1),
351 NAMED_PARAM (cond_taken_branch_cost
, 1),
352 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
355 /* Generic costs for vector insn classes. */
356 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
359 static const struct cpu_vector_cost xgene1_vector_cost
=
361 NAMED_PARAM (scalar_stmt_cost
, 1),
362 NAMED_PARAM (scalar_load_cost
, 5),
363 NAMED_PARAM (scalar_store_cost
, 1),
364 NAMED_PARAM (vec_stmt_cost
, 2),
365 NAMED_PARAM (vec_to_scalar_cost
, 4),
366 NAMED_PARAM (scalar_to_vec_cost
, 4),
367 NAMED_PARAM (vec_align_load_cost
, 10),
368 NAMED_PARAM (vec_unalign_load_cost
, 10),
369 NAMED_PARAM (vec_unalign_store_cost
, 2),
370 NAMED_PARAM (vec_store_cost
, 2),
371 NAMED_PARAM (cond_taken_branch_cost
, 2),
372 NAMED_PARAM (cond_not_taken_branch_cost
, 1)
375 #define AARCH64_FUSE_NOTHING (0)
376 #define AARCH64_FUSE_MOV_MOVK (1 << 0)
377 #define AARCH64_FUSE_ADRP_ADD (1 << 1)
378 #define AARCH64_FUSE_MOVK_MOVK (1 << 2)
379 #define AARCH64_FUSE_ADRP_LDR (1 << 3)
380 #define AARCH64_FUSE_CMP_BRANCH (1 << 4)
382 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
385 static const struct tune_params generic_tunings
=
387 &cortexa57_extra_costs
,
388 &generic_addrcost_table
,
389 &generic_regmove_cost
,
390 &generic_vector_cost
,
391 NAMED_PARAM (memmov_cost
, 4),
392 NAMED_PARAM (issue_rate
, 2),
393 NAMED_PARAM (fuseable_ops
, AARCH64_FUSE_NOTHING
),
394 8, /* function_align. */
397 2, /* int_reassoc_width. */
398 4, /* fp_reassoc_width. */
399 1 /* vec_reassoc_width. */
402 static const struct tune_params cortexa53_tunings
=
404 &cortexa53_extra_costs
,
405 &generic_addrcost_table
,
406 &cortexa53_regmove_cost
,
407 &generic_vector_cost
,
408 NAMED_PARAM (memmov_cost
, 4),
409 NAMED_PARAM (issue_rate
, 2),
410 NAMED_PARAM (fuseable_ops
, (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
411 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
)),
412 8, /* function_align. */
415 2, /* int_reassoc_width. */
416 4, /* fp_reassoc_width. */
417 1 /* vec_reassoc_width. */
420 static const struct tune_params cortexa57_tunings
=
422 &cortexa57_extra_costs
,
423 &cortexa57_addrcost_table
,
424 &cortexa57_regmove_cost
,
425 &cortexa57_vector_cost
,
426 NAMED_PARAM (memmov_cost
, 4),
427 NAMED_PARAM (issue_rate
, 3),
428 NAMED_PARAM (fuseable_ops
, (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK
)),
429 16, /* function_align. */
432 2, /* int_reassoc_width. */
433 4, /* fp_reassoc_width. */
434 1 /* vec_reassoc_width. */
437 static const struct tune_params thunderx_tunings
=
439 &thunderx_extra_costs
,
440 &generic_addrcost_table
,
441 &thunderx_regmove_cost
,
442 &generic_vector_cost
,
443 NAMED_PARAM (memmov_cost
, 6),
444 NAMED_PARAM (issue_rate
, 2),
445 NAMED_PARAM (fuseable_ops
, AARCH64_FUSE_CMP_BRANCH
),
446 8, /* function_align. */
449 2, /* int_reassoc_width. */
450 4, /* fp_reassoc_width. */
451 1 /* vec_reassoc_width. */
454 static const struct tune_params xgene1_tunings
=
457 &xgene1_addrcost_table
,
458 &xgene1_regmove_cost
,
460 NAMED_PARAM (memmov_cost
, 6),
461 NAMED_PARAM (issue_rate
, 4),
462 NAMED_PARAM (fuseable_ops
, AARCH64_FUSE_NOTHING
),
463 16, /* function_align. */
465 16, /* loop_align. */
466 2, /* int_reassoc_width. */
467 4, /* fp_reassoc_width. */
468 1 /* vec_reassoc_width. */
471 /* A processor implementing AArch64. */
474 const char *const name
;
475 enum aarch64_processor core
;
477 unsigned architecture_version
;
478 const unsigned long flags
;
479 const struct tune_params
*const tune
;
482 /* Processor cores implementing AArch64. */
483 static const struct processor all_cores
[] =
485 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
486 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
487 #include "aarch64-cores.def"
489 {"generic", cortexa53
, "8", 8, AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
490 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
493 /* Architectures implementing AArch64. */
494 static const struct processor all_architectures
[] =
496 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
497 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
498 #include "aarch64-arches.def"
500 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
503 /* Target specification. These are populated as commandline arguments
504 are processed, or NULL if not specified. */
505 static const struct processor
*selected_arch
;
506 static const struct processor
*selected_cpu
;
507 static const struct processor
*selected_tune
;
509 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
511 /* An ISA extension in the co-processor and main instruction set space. */
512 struct aarch64_option_extension
514 const char *const name
;
515 const unsigned long flags_on
;
516 const unsigned long flags_off
;
519 /* ISA extensions in AArch64. */
520 static const struct aarch64_option_extension all_extensions
[] =
522 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
523 {NAME, FLAGS_ON, FLAGS_OFF},
524 #include "aarch64-option-extensions.def"
525 #undef AARCH64_OPT_EXTENSION
529 /* Used to track the size of an address when generating a pre/post
530 increment address. */
531 static machine_mode aarch64_memory_reference_mode
;
533 /* Used to force GTY into this file. */
534 static GTY(()) int gty_dummy
;
536 /* A table of valid AArch64 "bitmask immediate" values for
537 logical instructions. */
539 #define AARCH64_NUM_BITMASKS 5334
540 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
542 typedef enum aarch64_cond_code
544 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
545 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
546 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
550 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
552 /* The condition codes of the processor, and the inverse function. */
553 static const char * const aarch64_condition_codes
[] =
555 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
556 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
560 aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED
)
566 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
567 enum machine_mode mode
)
569 if (VECTOR_MODE_P (mode
))
570 return aarch64_tune_params
->vec_reassoc_width
;
571 if (INTEGRAL_MODE_P (mode
))
572 return aarch64_tune_params
->int_reassoc_width
;
573 if (FLOAT_MODE_P (mode
))
574 return aarch64_tune_params
->fp_reassoc_width
;
578 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
580 aarch64_dbx_register_number (unsigned regno
)
582 if (GP_REGNUM_P (regno
))
583 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
584 else if (regno
== SP_REGNUM
)
585 return AARCH64_DWARF_SP
;
586 else if (FP_REGNUM_P (regno
))
587 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
589 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
590 equivalent DWARF register. */
591 return DWARF_FRAME_REGISTERS
;
594 /* Return TRUE if MODE is any of the large INT modes. */
596 aarch64_vect_struct_mode_p (machine_mode mode
)
598 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
601 /* Return TRUE if MODE is any of the vector modes. */
603 aarch64_vector_mode_p (machine_mode mode
)
605 return aarch64_vector_mode_supported_p (mode
)
606 || aarch64_vect_struct_mode_p (mode
);
609 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
611 aarch64_array_mode_supported_p (machine_mode mode
,
612 unsigned HOST_WIDE_INT nelems
)
615 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
616 && (nelems
>= 2 && nelems
<= 4))
622 /* Implement HARD_REGNO_NREGS. */
625 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
627 switch (aarch64_regno_regclass (regno
))
631 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
633 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
638 /* Implement HARD_REGNO_MODE_OK. */
641 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
643 if (GET_MODE_CLASS (mode
) == MODE_CC
)
644 return regno
== CC_REGNUM
;
646 if (regno
== SP_REGNUM
)
647 /* The purpose of comparing with ptr_mode is to support the
648 global register variable associated with the stack pointer
649 register via the syntax of asm ("wsp") in ILP32. */
650 return mode
== Pmode
|| mode
== ptr_mode
;
652 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
653 return mode
== Pmode
;
655 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
658 if (FP_REGNUM_P (regno
))
660 if (aarch64_vect_struct_mode_p (mode
))
662 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
670 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
672 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
675 /* Handle modes that fit within single registers. */
676 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
678 if (GET_MODE_SIZE (mode
) >= 4)
683 /* Fall back to generic for multi-reg and very large modes. */
685 return choose_hard_reg_mode (regno
, nregs
, false);
688 /* Return true if calls to DECL should be treated as
689 long-calls (ie called via a register). */
691 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
696 /* Return true if calls to symbol-ref SYM should be treated as
697 long-calls (ie called via a register). */
699 aarch64_is_long_call_p (rtx sym
)
701 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
704 /* Return true if the offsets to a zero/sign-extract operation
705 represent an expression that matches an extend operation. The
706 operands represent the paramters from
708 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
710 aarch64_is_extend_from_extract (machine_mode mode
, rtx mult_imm
,
713 HOST_WIDE_INT mult_val
, extract_val
;
715 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
718 mult_val
= INTVAL (mult_imm
);
719 extract_val
= INTVAL (extract_imm
);
722 && extract_val
< GET_MODE_BITSIZE (mode
)
723 && exact_log2 (extract_val
& ~7) > 0
724 && (extract_val
& 7) <= 4
725 && mult_val
== (1 << (extract_val
& 7)))
731 /* Emit an insn that's a simple single-set. Both the operands must be
732 known to be valid. */
734 emit_set_insn (rtx x
, rtx y
)
736 return emit_insn (gen_rtx_SET (VOIDmode
, x
, y
));
739 /* X and Y are two things to compare using CODE. Emit the compare insn and
740 return the rtx for register 0 in the proper mode. */
742 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
744 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
745 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
747 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
751 /* Build the SYMBOL_REF for __tls_get_addr. */
753 static GTY(()) rtx tls_get_addr_libfunc
;
756 aarch64_tls_get_addr (void)
758 if (!tls_get_addr_libfunc
)
759 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
760 return tls_get_addr_libfunc
;
763 /* Return the TLS model to use for ADDR. */
765 static enum tls_model
766 tls_symbolic_operand_type (rtx addr
)
768 enum tls_model tls_kind
= TLS_MODEL_NONE
;
771 if (GET_CODE (addr
) == CONST
)
773 split_const (addr
, &sym
, &addend
);
774 if (GET_CODE (sym
) == SYMBOL_REF
)
775 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
777 else if (GET_CODE (addr
) == SYMBOL_REF
)
778 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
783 /* We'll allow lo_sum's in addresses in our legitimate addresses
784 so that combine would take care of combining addresses where
785 necessary, but for generation purposes, we'll generate the address
788 tmp = hi (symbol_ref); adrp x1, foo
789 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
793 adrp x1, :got:foo adrp tmp, :tlsgd:foo
794 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
798 Load TLS symbol, depending on TLS mechanism and TLS access model.
800 Global Dynamic - Traditional TLS:
802 add dest, tmp, #:tlsgd_lo12:imm
805 Global Dynamic - TLS Descriptors:
806 adrp dest, :tlsdesc:imm
807 ldr tmp, [dest, #:tlsdesc_lo12:imm]
808 add dest, dest, #:tlsdesc_lo12:imm
815 adrp tmp, :gottprel:imm
816 ldr dest, [tmp, #:gottprel_lo12:imm]
821 add t0, tp, #:tprel_hi12:imm
822 add t0, #:tprel_lo12_nc:imm
826 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
827 enum aarch64_symbol_type type
)
831 case SYMBOL_SMALL_ABSOLUTE
:
833 /* In ILP32, the mode of dest can be either SImode or DImode. */
835 machine_mode mode
= GET_MODE (dest
);
837 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
839 if (can_create_pseudo_p ())
840 tmp_reg
= gen_reg_rtx (mode
);
842 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
843 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
847 case SYMBOL_TINY_ABSOLUTE
:
848 emit_insn (gen_rtx_SET (Pmode
, dest
, imm
));
851 case SYMBOL_SMALL_GOT
:
853 /* In ILP32, the mode of dest can be either SImode or DImode,
854 while the got entry is always of SImode size. The mode of
855 dest depends on how dest is used: if dest is assigned to a
856 pointer (e.g. in the memory), it has SImode; it may have
857 DImode if dest is dereferenced to access the memeory.
858 This is why we have to handle three different ldr_got_small
859 patterns here (two patterns for ILP32). */
861 machine_mode mode
= GET_MODE (dest
);
863 if (can_create_pseudo_p ())
864 tmp_reg
= gen_reg_rtx (mode
);
866 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
867 if (mode
== ptr_mode
)
870 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
872 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
876 gcc_assert (mode
== Pmode
);
877 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
883 case SYMBOL_SMALL_TLSGD
:
886 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
889 aarch64_emit_call_insn (gen_tlsgd_small (result
, imm
));
890 insns
= get_insns ();
893 RTL_CONST_CALL_P (insns
) = 1;
894 emit_libcall_block (insns
, dest
, result
, imm
);
898 case SYMBOL_SMALL_TLSDESC
:
900 machine_mode mode
= GET_MODE (dest
);
901 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
904 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
906 /* In ILP32, the got entry is always of SImode size. Unlike
907 small GOT, the dest is fixed at reg 0. */
909 emit_insn (gen_tlsdesc_small_si (imm
));
911 emit_insn (gen_tlsdesc_small_di (imm
));
912 tp
= aarch64_load_tp (NULL
);
915 tp
= gen_lowpart (mode
, tp
);
917 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
918 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
922 case SYMBOL_SMALL_GOTTPREL
:
924 /* In ILP32, the mode of dest can be either SImode or DImode,
925 while the got entry is always of SImode size. The mode of
926 dest depends on how dest is used: if dest is assigned to a
927 pointer (e.g. in the memory), it has SImode; it may have
928 DImode if dest is dereferenced to access the memeory.
929 This is why we have to handle three different tlsie_small
930 patterns here (two patterns for ILP32). */
931 machine_mode mode
= GET_MODE (dest
);
932 rtx tmp_reg
= gen_reg_rtx (mode
);
933 rtx tp
= aarch64_load_tp (NULL
);
935 if (mode
== ptr_mode
)
938 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
941 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
942 tp
= gen_lowpart (mode
, tp
);
947 gcc_assert (mode
== Pmode
);
948 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
951 emit_insn (gen_rtx_SET (mode
, dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
952 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
956 case SYMBOL_SMALL_TPREL
:
958 rtx tp
= aarch64_load_tp (NULL
);
959 emit_insn (gen_tlsle_small (dest
, tp
, imm
));
960 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
964 case SYMBOL_TINY_GOT
:
965 emit_insn (gen_ldr_got_tiny (dest
, imm
));
973 /* Emit a move from SRC to DEST. Assume that the move expanders can
974 handle all moves if !can_create_pseudo_p (). The distinction is
975 important because, unlike emit_move_insn, the move expanders know
976 how to force Pmode objects into the constant pool even when the
977 constant pool address is not itself legitimate. */
979 aarch64_emit_move (rtx dest
, rtx src
)
981 return (can_create_pseudo_p ()
982 ? emit_move_insn (dest
, src
)
983 : emit_move_insn_1 (dest
, src
));
986 /* Split a 128-bit move operation into two 64-bit move operations,
987 taking care to handle partial overlap of register to register
988 copies. Special cases are needed when moving between GP regs and
989 FP regs. SRC can be a register, constant or memory; DST a register
990 or memory. If either operand is memory it must not have any side
993 aarch64_split_128bit_move (rtx dst
, rtx src
)
998 machine_mode mode
= GET_MODE (dst
);
1000 gcc_assert (mode
== TImode
|| mode
== TFmode
);
1001 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
1002 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
1004 if (REG_P (dst
) && REG_P (src
))
1006 int src_regno
= REGNO (src
);
1007 int dst_regno
= REGNO (dst
);
1009 /* Handle FP <-> GP regs. */
1010 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
1012 src_lo
= gen_lowpart (word_mode
, src
);
1013 src_hi
= gen_highpart (word_mode
, src
);
1017 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
1018 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
1022 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
1023 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
1027 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
1029 dst_lo
= gen_lowpart (word_mode
, dst
);
1030 dst_hi
= gen_highpart (word_mode
, dst
);
1034 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
1035 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
1039 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
1040 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
1046 dst_lo
= gen_lowpart (word_mode
, dst
);
1047 dst_hi
= gen_highpart (word_mode
, dst
);
1048 src_lo
= gen_lowpart (word_mode
, src
);
1049 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
1051 /* At most one pairing may overlap. */
1052 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
1054 aarch64_emit_move (dst_hi
, src_hi
);
1055 aarch64_emit_move (dst_lo
, src_lo
);
1059 aarch64_emit_move (dst_lo
, src_lo
);
1060 aarch64_emit_move (dst_hi
, src_hi
);
1065 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
1067 return (! REG_P (src
)
1068 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
1071 /* Split a complex SIMD combine. */
1074 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
1076 machine_mode src_mode
= GET_MODE (src1
);
1077 machine_mode dst_mode
= GET_MODE (dst
);
1079 gcc_assert (VECTOR_MODE_P (dst_mode
));
1081 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
1083 rtx (*gen
) (rtx
, rtx
, rtx
);
1088 gen
= gen_aarch64_simd_combinev8qi
;
1091 gen
= gen_aarch64_simd_combinev4hi
;
1094 gen
= gen_aarch64_simd_combinev2si
;
1097 gen
= gen_aarch64_simd_combinev2sf
;
1100 gen
= gen_aarch64_simd_combinedi
;
1103 gen
= gen_aarch64_simd_combinedf
;
1109 emit_insn (gen (dst
, src1
, src2
));
1114 /* Split a complex SIMD move. */
1117 aarch64_split_simd_move (rtx dst
, rtx src
)
1119 machine_mode src_mode
= GET_MODE (src
);
1120 machine_mode dst_mode
= GET_MODE (dst
);
1122 gcc_assert (VECTOR_MODE_P (dst_mode
));
1124 if (REG_P (dst
) && REG_P (src
))
1126 rtx (*gen
) (rtx
, rtx
);
1128 gcc_assert (VECTOR_MODE_P (src_mode
));
1133 gen
= gen_aarch64_split_simd_movv16qi
;
1136 gen
= gen_aarch64_split_simd_movv8hi
;
1139 gen
= gen_aarch64_split_simd_movv4si
;
1142 gen
= gen_aarch64_split_simd_movv2di
;
1145 gen
= gen_aarch64_split_simd_movv4sf
;
1148 gen
= gen_aarch64_split_simd_movv2df
;
1154 emit_insn (gen (dst
, src
));
1160 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1162 if (can_create_pseudo_p ())
1163 return force_reg (mode
, value
);
1166 x
= aarch64_emit_move (x
, value
);
1173 aarch64_add_offset (machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
1175 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1178 /* Load the full offset into a register. This
1179 might be improvable in the future. */
1180 high
= GEN_INT (offset
);
1182 high
= aarch64_force_temporary (mode
, temp
, high
);
1183 reg
= aarch64_force_temporary (mode
, temp
,
1184 gen_rtx_PLUS (mode
, high
, reg
));
1186 return plus_constant (mode
, reg
, offset
);
1190 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1193 unsigned HOST_WIDE_INT mask
;
1196 unsigned HOST_WIDE_INT val
;
1199 int one_match
, zero_match
, first_not_ffff_match
;
1202 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1205 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1212 /* We know we can't do this in 1 insn, and we must be able to do it
1213 in two; so don't mess around looking for sequences that don't buy
1217 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1218 GEN_INT (INTVAL (imm
) & 0xffff)));
1219 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1220 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1226 /* Remaining cases are all for DImode. */
1229 subtargets
= optimize
&& can_create_pseudo_p ();
1234 first_not_ffff_match
= -1;
1236 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1238 if ((val
& mask
) == mask
)
1242 if (first_not_ffff_match
< 0)
1243 first_not_ffff_match
= i
;
1244 if ((val
& mask
) == 0)
1251 /* Set one of the quarters and then insert back into result. */
1252 mask
= 0xffffll
<< first_not_ffff_match
;
1255 emit_insn (gen_rtx_SET (VOIDmode
, dest
, GEN_INT (val
| mask
)));
1256 emit_insn (gen_insv_immdi (dest
, GEN_INT (first_not_ffff_match
),
1257 GEN_INT ((val
>> first_not_ffff_match
)
1264 if (zero_match
== 2)
1265 goto simple_sequence
;
1267 mask
= 0x0ffff0000UL
;
1268 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1270 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1272 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1276 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1277 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1278 GEN_INT (val
& mask
)));
1279 emit_insn (gen_adddi3 (dest
, subtarget
,
1280 GEN_INT (val
- (val
& mask
))));
1285 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1289 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1290 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1291 GEN_INT ((val
+ comp
) & mask
)));
1292 emit_insn (gen_adddi3 (dest
, subtarget
,
1293 GEN_INT (val
- ((val
+ comp
) & mask
))));
1298 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1302 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1303 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1304 GEN_INT ((val
- comp
) | ~mask
)));
1305 emit_insn (gen_adddi3 (dest
, subtarget
,
1306 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1311 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1315 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1316 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1317 GEN_INT (val
| ~mask
)));
1318 emit_insn (gen_adddi3 (dest
, subtarget
,
1319 GEN_INT (val
- (val
| ~mask
))));
1326 /* See if we can do it by arithmetically combining two
1328 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1333 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1334 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1338 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1339 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1340 GEN_INT (aarch64_bitmasks
[i
])));
1341 emit_insn (gen_adddi3 (dest
, subtarget
,
1342 GEN_INT (val
- aarch64_bitmasks
[i
])));
1348 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1350 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1354 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1355 GEN_INT (aarch64_bitmasks
[i
])));
1356 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1357 GEN_INT ((val
>> j
) & 0xffff)));
1365 /* See if we can do it by logically combining two immediates. */
1366 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1368 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1372 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1373 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1377 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1378 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1379 GEN_INT (aarch64_bitmasks
[i
])));
1380 emit_insn (gen_iordi3 (dest
, subtarget
,
1381 GEN_INT (aarch64_bitmasks
[j
])));
1387 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1391 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1392 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1396 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1397 emit_insn (gen_rtx_SET (VOIDmode
, subtarget
,
1398 GEN_INT (aarch64_bitmasks
[j
])));
1399 emit_insn (gen_anddi3 (dest
, subtarget
,
1400 GEN_INT (aarch64_bitmasks
[i
])));
1408 if (one_match
> zero_match
)
1410 /* Set either first three quarters or all but the third. */
1411 mask
= 0xffffll
<< (16 - first_not_ffff_match
);
1413 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1414 GEN_INT (val
| mask
| 0xffffffff00000000ull
)));
1417 /* Now insert other two quarters. */
1418 for (i
= first_not_ffff_match
+ 16, mask
<<= (first_not_ffff_match
<< 1);
1419 i
< 64; i
+= 16, mask
<<= 16)
1421 if ((val
& mask
) != mask
)
1424 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1425 GEN_INT ((val
>> i
) & 0xffff)));
1435 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1437 if ((val
& mask
) != 0)
1442 emit_insn (gen_rtx_SET (VOIDmode
, dest
,
1443 GEN_INT (val
& mask
)));
1450 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1451 GEN_INT ((val
>> i
) & 0xffff)));
1462 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1464 machine_mode mode
= GET_MODE (dest
);
1466 gcc_assert (mode
== SImode
|| mode
== DImode
);
1468 /* Check on what type of symbol it is. */
1469 if (GET_CODE (imm
) == SYMBOL_REF
1470 || GET_CODE (imm
) == LABEL_REF
1471 || GET_CODE (imm
) == CONST
)
1473 rtx mem
, base
, offset
;
1474 enum aarch64_symbol_type sty
;
1476 /* If we have (const (plus symbol offset)), separate out the offset
1477 before we start classifying the symbol. */
1478 split_const (imm
, &base
, &offset
);
1480 sty
= aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
);
1483 case SYMBOL_FORCE_TO_MEM
:
1484 if (offset
!= const0_rtx
1485 && targetm
.cannot_force_const_mem (mode
, imm
))
1487 gcc_assert (can_create_pseudo_p ());
1488 base
= aarch64_force_temporary (mode
, dest
, base
);
1489 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1490 aarch64_emit_move (dest
, base
);
1493 mem
= force_const_mem (ptr_mode
, imm
);
1495 if (mode
!= ptr_mode
)
1496 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1497 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1500 case SYMBOL_SMALL_TLSGD
:
1501 case SYMBOL_SMALL_TLSDESC
:
1502 case SYMBOL_SMALL_GOTTPREL
:
1503 case SYMBOL_SMALL_GOT
:
1504 case SYMBOL_TINY_GOT
:
1505 if (offset
!= const0_rtx
)
1507 gcc_assert(can_create_pseudo_p ());
1508 base
= aarch64_force_temporary (mode
, dest
, base
);
1509 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1510 aarch64_emit_move (dest
, base
);
1515 case SYMBOL_SMALL_TPREL
:
1516 case SYMBOL_SMALL_ABSOLUTE
:
1517 case SYMBOL_TINY_ABSOLUTE
:
1518 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1526 if (!CONST_INT_P (imm
))
1528 if (GET_CODE (imm
) == HIGH
)
1529 emit_insn (gen_rtx_SET (VOIDmode
, dest
, imm
));
1532 rtx mem
= force_const_mem (mode
, imm
);
1534 emit_insn (gen_rtx_SET (VOIDmode
, dest
, mem
));
1540 aarch64_internal_mov_immediate (dest
, imm
, true, GET_MODE (dest
));
1544 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
1545 tree exp ATTRIBUTE_UNUSED
)
1547 /* Currently, always true. */
1551 /* Implement TARGET_PASS_BY_REFERENCE. */
1554 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1557 bool named ATTRIBUTE_UNUSED
)
1560 machine_mode dummymode
;
1563 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1564 size
= (mode
== BLKmode
&& type
)
1565 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1567 /* Aggregates are passed by reference based on their size. */
1568 if (type
&& AGGREGATE_TYPE_P (type
))
1570 size
= int_size_in_bytes (type
);
1573 /* Variable sized arguments are always returned by reference. */
1577 /* Can this be a candidate to be passed in fp/simd register(s)? */
1578 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1583 /* Arguments which are variable sized or larger than 2 registers are
1584 passed by reference unless they are a homogenous floating point
1586 return size
> 2 * UNITS_PER_WORD
;
1589 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1591 aarch64_return_in_msb (const_tree valtype
)
1593 machine_mode dummy_mode
;
1596 /* Never happens in little-endian mode. */
1597 if (!BYTES_BIG_ENDIAN
)
1600 /* Only composite types smaller than or equal to 16 bytes can
1601 be potentially returned in registers. */
1602 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1603 || int_size_in_bytes (valtype
) <= 0
1604 || int_size_in_bytes (valtype
) > 16)
1607 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1608 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1609 is always passed/returned in the least significant bits of fp/simd
1611 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1612 &dummy_mode
, &dummy_int
, NULL
))
1618 /* Implement TARGET_FUNCTION_VALUE.
1619 Define how to find the value returned by a function. */
1622 aarch64_function_value (const_tree type
, const_tree func
,
1623 bool outgoing ATTRIBUTE_UNUSED
)
1628 machine_mode ag_mode
;
1630 mode
= TYPE_MODE (type
);
1631 if (INTEGRAL_TYPE_P (type
))
1632 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1634 if (aarch64_return_in_msb (type
))
1636 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1638 if (size
% UNITS_PER_WORD
!= 0)
1640 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1641 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1645 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1646 &ag_mode
, &count
, NULL
))
1648 if (!aarch64_composite_type_p (type
, mode
))
1650 gcc_assert (count
== 1 && mode
== ag_mode
);
1651 return gen_rtx_REG (mode
, V0_REGNUM
);
1658 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1659 for (i
= 0; i
< count
; i
++)
1661 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1662 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1663 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1664 XVECEXP (par
, 0, i
) = tmp
;
1670 return gen_rtx_REG (mode
, R0_REGNUM
);
1673 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1674 Return true if REGNO is the number of a hard register in which the values
1675 of called function may come back. */
1678 aarch64_function_value_regno_p (const unsigned int regno
)
1680 /* Maximum of 16 bytes can be returned in the general registers. Examples
1681 of 16-byte return values are: 128-bit integers and 16-byte small
1682 structures (excluding homogeneous floating-point aggregates). */
1683 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1686 /* Up to four fp/simd registers can return a function value, e.g. a
1687 homogeneous floating-point aggregate having four members. */
1688 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1689 return !TARGET_GENERAL_REGS_ONLY
;
1694 /* Implement TARGET_RETURN_IN_MEMORY.
1696 If the type T of the result of a function is such that
1698 would require that arg be passed as a value in a register (or set of
1699 registers) according to the parameter passing rules, then the result
1700 is returned in the same registers as would be used for such an
1704 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1707 machine_mode ag_mode
;
1710 if (!AGGREGATE_TYPE_P (type
)
1711 && TREE_CODE (type
) != COMPLEX_TYPE
1712 && TREE_CODE (type
) != VECTOR_TYPE
)
1713 /* Simple scalar types always returned in registers. */
1716 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1723 /* Types larger than 2 registers returned in memory. */
1724 size
= int_size_in_bytes (type
);
1725 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1729 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
1730 const_tree type
, int *nregs
)
1732 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1733 return aarch64_vfp_is_call_or_return_candidate (mode
,
1735 &pcum
->aapcs_vfp_rmode
,
1740 /* Given MODE and TYPE of a function argument, return the alignment in
1741 bits. The idea is to suppress any stronger alignment requested by
1742 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1743 This is a helper function for local use only. */
1746 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
1748 unsigned int alignment
;
1752 if (!integer_zerop (TYPE_SIZE (type
)))
1754 if (TYPE_MODE (type
) == mode
)
1755 alignment
= TYPE_ALIGN (type
);
1757 alignment
= GET_MODE_ALIGNMENT (mode
);
1763 alignment
= GET_MODE_ALIGNMENT (mode
);
1768 /* Layout a function argument according to the AAPCS64 rules. The rule
1769 numbers refer to the rule numbers in the AAPCS64. */
1772 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1774 bool named ATTRIBUTE_UNUSED
)
1776 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1777 int ncrn
, nvrn
, nregs
;
1778 bool allocate_ncrn
, allocate_nvrn
;
1781 /* We need to do this once per argument. */
1782 if (pcum
->aapcs_arg_processed
)
1785 pcum
->aapcs_arg_processed
= true;
1787 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1789 = AARCH64_ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1792 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1793 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1798 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1799 The following code thus handles passing by SIMD/FP registers first. */
1801 nvrn
= pcum
->aapcs_nvrn
;
1803 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1804 and homogenous short-vector aggregates (HVA). */
1807 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1809 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1810 if (!aarch64_composite_type_p (type
, mode
))
1812 gcc_assert (nregs
== 1);
1813 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1819 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1820 for (i
= 0; i
< nregs
; i
++)
1822 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1823 V0_REGNUM
+ nvrn
+ i
);
1824 tmp
= gen_rtx_EXPR_LIST
1826 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1827 XVECEXP (par
, 0, i
) = tmp
;
1829 pcum
->aapcs_reg
= par
;
1835 /* C.3 NSRN is set to 8. */
1836 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1841 ncrn
= pcum
->aapcs_ncrn
;
1842 nregs
= size
/ UNITS_PER_WORD
;
1844 /* C6 - C9. though the sign and zero extension semantics are
1845 handled elsewhere. This is the case where the argument fits
1846 entirely general registers. */
1847 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1849 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1851 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1853 /* C.8 if the argument has an alignment of 16 then the NGRN is
1854 rounded up to the next even number. */
1855 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1858 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1860 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1861 A reg is still generated for it, but the caller should be smart
1862 enough not to use it. */
1863 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1865 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1872 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1873 for (i
= 0; i
< nregs
; i
++)
1875 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1876 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1877 GEN_INT (i
* UNITS_PER_WORD
));
1878 XVECEXP (par
, 0, i
) = tmp
;
1880 pcum
->aapcs_reg
= par
;
1883 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1888 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1890 /* The argument is passed on stack; record the needed number of words for
1891 this argument and align the total size if necessary. */
1893 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1894 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1895 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1896 16 / UNITS_PER_WORD
);
1900 /* Implement TARGET_FUNCTION_ARG. */
1903 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1904 const_tree type
, bool named
)
1906 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1907 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1909 if (mode
== VOIDmode
)
1912 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1913 return pcum
->aapcs_reg
;
1917 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1918 const_tree fntype ATTRIBUTE_UNUSED
,
1919 rtx libname ATTRIBUTE_UNUSED
,
1920 const_tree fndecl ATTRIBUTE_UNUSED
,
1921 unsigned n_named ATTRIBUTE_UNUSED
)
1923 pcum
->aapcs_ncrn
= 0;
1924 pcum
->aapcs_nvrn
= 0;
1925 pcum
->aapcs_nextncrn
= 0;
1926 pcum
->aapcs_nextnvrn
= 0;
1927 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
1928 pcum
->aapcs_reg
= NULL_RTX
;
1929 pcum
->aapcs_arg_processed
= false;
1930 pcum
->aapcs_stack_words
= 0;
1931 pcum
->aapcs_stack_size
= 0;
1937 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
1942 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1943 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
1945 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1946 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
1947 != (pcum
->aapcs_stack_words
!= 0));
1948 pcum
->aapcs_arg_processed
= false;
1949 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
1950 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
1951 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
1952 pcum
->aapcs_stack_words
= 0;
1953 pcum
->aapcs_reg
= NULL_RTX
;
1958 aarch64_function_arg_regno_p (unsigned regno
)
1960 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
1961 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
1964 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1965 PARM_BOUNDARY bits of alignment, but will be given anything up
1966 to STACK_BOUNDARY bits if the type requires it. This makes sure
1967 that both before and after the layout of each argument, the Next
1968 Stacked Argument Address (NSAA) will have a minimum alignment of
1972 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
1974 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1976 if (alignment
< PARM_BOUNDARY
)
1977 alignment
= PARM_BOUNDARY
;
1978 if (alignment
> STACK_BOUNDARY
)
1979 alignment
= STACK_BOUNDARY
;
1983 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1985 Return true if an argument passed on the stack should be padded upwards,
1986 i.e. if the least-significant byte of the stack slot has useful data.
1988 Small aggregate types are placed in the lowest memory address.
1990 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1993 aarch64_pad_arg_upward (machine_mode mode
, const_tree type
)
1995 /* On little-endian targets, the least significant byte of every stack
1996 argument is passed at the lowest byte address of the stack slot. */
1997 if (!BYTES_BIG_ENDIAN
)
2000 /* Otherwise, integral, floating-point and pointer types are padded downward:
2001 the least significant byte of a stack argument is passed at the highest
2002 byte address of the stack slot. */
2004 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
2005 || POINTER_TYPE_P (type
))
2006 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
2009 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2013 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2015 It specifies padding for the last (may also be the only)
2016 element of a block move between registers and memory. If
2017 assuming the block is in the memory, padding upward means that
2018 the last element is padded after its highest significant byte,
2019 while in downward padding, the last element is padded at the
2020 its least significant byte side.
2022 Small aggregates and small complex types are always padded
2025 We don't need to worry about homogeneous floating-point or
2026 short-vector aggregates; their move is not affected by the
2027 padding direction determined here. Regardless of endianness,
2028 each element of such an aggregate is put in the least
2029 significant bits of a fp/simd register.
2031 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2032 register has useful data, and return the opposite if the most
2033 significant byte does. */
2036 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
2037 bool first ATTRIBUTE_UNUSED
)
2040 /* Small composite types are always padded upward. */
2041 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
2043 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
2044 : GET_MODE_SIZE (mode
));
2045 if (size
< 2 * UNITS_PER_WORD
)
2049 /* Otherwise, use the default padding. */
2050 return !BYTES_BIG_ENDIAN
;
2054 aarch64_libgcc_cmp_return_mode (void)
2060 aarch64_frame_pointer_required (void)
2062 /* In aarch64_override_options_after_change
2063 flag_omit_leaf_frame_pointer turns off the frame pointer by
2064 default. Turn it back on now if we've not got a leaf
2066 if (flag_omit_leaf_frame_pointer
2067 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
2073 /* Mark the registers that need to be saved by the callee and calculate
2074 the size of the callee-saved registers area and frame record (both FP
2075 and LR may be omitted). */
2077 aarch64_layout_frame (void)
2079 HOST_WIDE_INT offset
= 0;
2082 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2085 #define SLOT_NOT_REQUIRED (-2)
2086 #define SLOT_REQUIRED (-1)
2088 cfun
->machine
->frame
.wb_candidate1
= FIRST_PSEUDO_REGISTER
;
2089 cfun
->machine
->frame
.wb_candidate2
= FIRST_PSEUDO_REGISTER
;
2091 /* First mark all the registers that really need to be saved... */
2092 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2093 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2095 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2096 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2098 /* ... that includes the eh data registers (if needed)... */
2099 if (crtl
->calls_eh_return
)
2100 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2101 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2104 /* ... and any callee saved register that dataflow says is live. */
2105 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2106 if (df_regs_ever_live_p (regno
)
2107 && (regno
== R30_REGNUM
2108 || !call_used_regs
[regno
]))
2109 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2111 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2112 if (df_regs_ever_live_p (regno
)
2113 && !call_used_regs
[regno
])
2114 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2116 if (frame_pointer_needed
)
2118 /* FP and LR are placed in the linkage record. */
2119 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2120 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2121 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2122 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2123 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
2124 offset
+= 2 * UNITS_PER_WORD
;
2127 /* Now assign stack slots for them. */
2128 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2129 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2131 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2132 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2133 cfun
->machine
->frame
.wb_candidate1
= regno
;
2134 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
)
2135 cfun
->machine
->frame
.wb_candidate2
= regno
;
2136 offset
+= UNITS_PER_WORD
;
2139 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2140 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2142 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2143 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2144 cfun
->machine
->frame
.wb_candidate1
= regno
;
2145 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
2146 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2147 cfun
->machine
->frame
.wb_candidate2
= regno
;
2148 offset
+= UNITS_PER_WORD
;
2151 cfun
->machine
->frame
.padding0
=
2152 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
2153 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2155 cfun
->machine
->frame
.saved_regs_size
= offset
;
2157 cfun
->machine
->frame
.hard_fp_offset
2158 = AARCH64_ROUND_UP (cfun
->machine
->frame
.saved_varargs_size
2160 + cfun
->machine
->frame
.saved_regs_size
,
2161 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2163 cfun
->machine
->frame
.frame_size
2164 = AARCH64_ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
2165 + crtl
->outgoing_args_size
,
2166 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2168 cfun
->machine
->frame
.laid_out
= true;
2172 aarch64_register_saved_on_entry (int regno
)
2174 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
2178 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
2180 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
2186 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
2187 HOST_WIDE_INT adjustment
)
2189 rtx base_rtx
= stack_pointer_rtx
;
2192 reg
= gen_rtx_REG (mode
, regno
);
2193 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
2194 plus_constant (Pmode
, base_rtx
, -adjustment
));
2195 mem
= gen_rtx_MEM (mode
, mem
);
2197 insn
= emit_move_insn (mem
, reg
);
2198 RTX_FRAME_RELATED_P (insn
) = 1;
2202 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2203 HOST_WIDE_INT adjustment
)
2208 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
2209 GEN_INT (-adjustment
),
2210 GEN_INT (UNITS_PER_WORD
- adjustment
));
2212 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
2213 GEN_INT (-adjustment
),
2214 GEN_INT (UNITS_PER_WORD
- adjustment
));
2221 aarch64_pushwb_pair_reg (machine_mode mode
, unsigned regno1
,
2222 unsigned regno2
, HOST_WIDE_INT adjustment
)
2225 rtx reg1
= gen_rtx_REG (mode
, regno1
);
2226 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2228 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
2230 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2231 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2232 RTX_FRAME_RELATED_P (insn
) = 1;
2236 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2237 HOST_WIDE_INT adjustment
)
2242 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2243 GEN_INT (UNITS_PER_WORD
));
2245 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2246 GEN_INT (UNITS_PER_WORD
));
2253 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
2259 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
2262 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
2270 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
2276 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
2279 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
2288 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
2289 unsigned start
, unsigned limit
, bool skip_wb
)
2292 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2293 ? gen_frame_mem
: gen_rtx_MEM
);
2297 for (regno
= aarch64_next_callee_save (start
, limit
);
2299 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2302 HOST_WIDE_INT offset
;
2305 && (regno
== cfun
->machine
->frame
.wb_candidate1
2306 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2309 reg
= gen_rtx_REG (mode
, regno
);
2310 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2311 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2314 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2317 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2318 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2321 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2324 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2325 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2327 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
2330 /* The first part of a frame-related parallel insn is
2331 always assumed to be relevant to the frame
2332 calculations; subsequent parts, are only
2333 frame-related if explicitly marked. */
2334 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2338 insn
= emit_move_insn (mem
, reg
);
2340 RTX_FRAME_RELATED_P (insn
) = 1;
2345 aarch64_restore_callee_saves (machine_mode mode
,
2346 HOST_WIDE_INT start_offset
, unsigned start
,
2347 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
2349 rtx base_rtx
= stack_pointer_rtx
;
2350 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2351 ? gen_frame_mem
: gen_rtx_MEM
);
2354 HOST_WIDE_INT offset
;
2356 for (regno
= aarch64_next_callee_save (start
, limit
);
2358 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2363 && (regno
== cfun
->machine
->frame
.wb_candidate1
2364 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2367 reg
= gen_rtx_REG (mode
, regno
);
2368 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2369 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2371 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2374 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2375 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2377 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2380 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2381 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2382 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
2384 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
2388 emit_move_insn (reg
, mem
);
2389 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
2393 /* AArch64 stack frames generated by this compiler look like:
2395 +-------------------------------+
2397 | incoming stack arguments |
2399 +-------------------------------+
2400 | | <-- incoming stack pointer (aligned)
2401 | callee-allocated save area |
2402 | for register varargs |
2404 +-------------------------------+
2405 | local variables | <-- frame_pointer_rtx
2407 +-------------------------------+
2409 +-------------------------------+ |
2410 | callee-saved registers | | frame.saved_regs_size
2411 +-------------------------------+ |
2413 +-------------------------------+ |
2414 | FP' | / <- hard_frame_pointer_rtx (aligned)
2415 +-------------------------------+
2416 | dynamic allocation |
2417 +-------------------------------+
2419 +-------------------------------+
2420 | outgoing stack arguments | <-- arg_pointer
2422 +-------------------------------+
2423 | | <-- stack_pointer_rtx (aligned)
2425 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2426 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2429 /* Generate the prologue instructions for entry into a function.
2430 Establish the stack frame by decreasing the stack pointer with a
2431 properly calculated size and, if necessary, create a frame record
2432 filled with the values of LR and previous frame pointer. The
2433 current FP is also set up if it is in use. */
2436 aarch64_expand_prologue (void)
2438 /* sub sp, sp, #<frame_size>
2439 stp {fp, lr}, [sp, #<frame_size> - 16]
2440 add fp, sp, #<frame_size> - hardfp_offset
2441 stp {cs_reg}, [fp, #-16] etc.
2443 sub sp, sp, <final_adjustment_if_any>
2445 HOST_WIDE_INT frame_size
, offset
;
2446 HOST_WIDE_INT fp_offset
; /* Offset from hard FP to SP. */
2447 HOST_WIDE_INT hard_fp_offset
;
2450 aarch64_layout_frame ();
2452 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2453 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2454 fp_offset
= frame_size
- hard_fp_offset
;
2456 if (flag_stack_usage_info
)
2457 current_function_static_stack_size
= frame_size
;
2459 /* Store pairs and load pairs have a range only -512 to 504. */
2462 /* When the frame has a large size, an initial decrease is done on
2463 the stack pointer to jump over the callee-allocated save area for
2464 register varargs, the local variable area and/or the callee-saved
2465 register area. This will allow the pre-index write-back
2466 store pair instructions to be used for setting up the stack frame
2468 offset
= hard_fp_offset
;
2470 offset
= cfun
->machine
->frame
.saved_regs_size
;
2472 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2475 if (frame_size
>= 0x1000000)
2477 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2478 emit_move_insn (op0
, GEN_INT (-frame_size
));
2479 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2481 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2482 gen_rtx_SET (VOIDmode
, stack_pointer_rtx
,
2483 plus_constant (Pmode
, stack_pointer_rtx
,
2485 RTX_FRAME_RELATED_P (insn
) = 1;
2487 else if (frame_size
> 0)
2489 int hi_ofs
= frame_size
& 0xfff000;
2490 int lo_ofs
= frame_size
& 0x000fff;
2494 insn
= emit_insn (gen_add2_insn
2495 (stack_pointer_rtx
, GEN_INT (-hi_ofs
)));
2496 RTX_FRAME_RELATED_P (insn
) = 1;
2500 insn
= emit_insn (gen_add2_insn
2501 (stack_pointer_rtx
, GEN_INT (-lo_ofs
)));
2502 RTX_FRAME_RELATED_P (insn
) = 1;
2511 bool skip_wb
= false;
2513 if (frame_pointer_needed
)
2519 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2520 GEN_INT (-offset
)));
2521 RTX_FRAME_RELATED_P (insn
) = 1;
2523 aarch64_save_callee_saves (DImode
, fp_offset
, R29_REGNUM
,
2527 aarch64_pushwb_pair_reg (DImode
, R29_REGNUM
, R30_REGNUM
, offset
);
2529 /* Set up frame pointer to point to the location of the
2530 previous frame pointer on the stack. */
2531 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2533 GEN_INT (fp_offset
)));
2534 RTX_FRAME_RELATED_P (insn
) = 1;
2535 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
2539 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2540 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2543 || reg1
== FIRST_PSEUDO_REGISTER
2544 || (reg2
== FIRST_PSEUDO_REGISTER
2547 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2548 GEN_INT (-offset
)));
2549 RTX_FRAME_RELATED_P (insn
) = 1;
2553 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2557 if (reg2
== FIRST_PSEUDO_REGISTER
)
2558 aarch64_pushwb_single_reg (mode1
, reg1
, offset
);
2560 aarch64_pushwb_pair_reg (mode1
, reg1
, reg2
, offset
);
2564 aarch64_save_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2566 aarch64_save_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2570 /* when offset >= 512,
2571 sub sp, sp, #<outgoing_args_size> */
2572 if (frame_size
> -1)
2574 if (crtl
->outgoing_args_size
> 0)
2576 insn
= emit_insn (gen_add2_insn
2578 GEN_INT (- crtl
->outgoing_args_size
)));
2579 RTX_FRAME_RELATED_P (insn
) = 1;
2584 /* Return TRUE if we can use a simple_return insn.
2586 This function checks whether the callee saved stack is empty, which
2587 means no restore actions are need. The pro_and_epilogue will use
2588 this to check whether shrink-wrapping opt is feasible. */
2591 aarch64_use_return_insn_p (void)
2593 if (!reload_completed
)
2599 aarch64_layout_frame ();
2601 return cfun
->machine
->frame
.frame_size
== 0;
2604 /* Generate the epilogue instructions for returning from a function. */
2606 aarch64_expand_epilogue (bool for_sibcall
)
2608 HOST_WIDE_INT frame_size
, offset
;
2609 HOST_WIDE_INT fp_offset
;
2610 HOST_WIDE_INT hard_fp_offset
;
2612 /* We need to add memory barrier to prevent read from deallocated stack. */
2613 bool need_barrier_p
= (get_frame_size () != 0
2614 || cfun
->machine
->frame
.saved_varargs_size
);
2616 aarch64_layout_frame ();
2618 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2619 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2620 fp_offset
= frame_size
- hard_fp_offset
;
2622 /* Store pairs and load pairs have a range only -512 to 504. */
2625 offset
= hard_fp_offset
;
2627 offset
= cfun
->machine
->frame
.saved_regs_size
;
2629 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2631 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2633 insn
= emit_insn (gen_add2_insn
2635 GEN_INT (crtl
->outgoing_args_size
)));
2636 RTX_FRAME_RELATED_P (insn
) = 1;
2642 /* If there were outgoing arguments or we've done dynamic stack
2643 allocation, then restore the stack pointer from the frame
2644 pointer. This is at most one insn and more efficient than using
2645 GCC's internal mechanism. */
2646 if (frame_pointer_needed
2647 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2649 if (cfun
->calls_alloca
)
2650 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2652 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2653 hard_frame_pointer_rtx
,
2655 offset
= offset
- fp_offset
;
2660 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2661 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2662 bool skip_wb
= true;
2665 if (frame_pointer_needed
)
2668 || reg1
== FIRST_PSEUDO_REGISTER
2669 || (reg2
== FIRST_PSEUDO_REGISTER
2673 aarch64_restore_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2675 aarch64_restore_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2679 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2683 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2684 rtx rreg1
= gen_rtx_REG (mode1
, reg1
);
2686 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg1
, cfi_ops
);
2687 if (reg2
== FIRST_PSEUDO_REGISTER
)
2689 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
2690 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
2691 mem
= gen_rtx_MEM (mode1
, mem
);
2692 insn
= emit_move_insn (rreg1
, mem
);
2696 rtx rreg2
= gen_rtx_REG (mode1
, reg2
);
2698 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg2
, cfi_ops
);
2699 insn
= emit_insn (aarch64_gen_loadwb_pair
2700 (mode1
, stack_pointer_rtx
, rreg1
,
2706 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2710 /* Reset the CFA to be SP + FRAME_SIZE. */
2711 rtx new_cfa
= stack_pointer_rtx
;
2713 new_cfa
= plus_constant (Pmode
, new_cfa
, frame_size
);
2714 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
2715 REG_NOTES (insn
) = cfi_ops
;
2716 RTX_FRAME_RELATED_P (insn
) = 1;
2722 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2724 if (frame_size
>= 0x1000000)
2726 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2727 emit_move_insn (op0
, GEN_INT (frame_size
));
2728 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2732 int hi_ofs
= frame_size
& 0xfff000;
2733 int lo_ofs
= frame_size
& 0x000fff;
2735 if (hi_ofs
&& lo_ofs
)
2737 insn
= emit_insn (gen_add2_insn
2738 (stack_pointer_rtx
, GEN_INT (hi_ofs
)));
2739 RTX_FRAME_RELATED_P (insn
) = 1;
2740 frame_size
= lo_ofs
;
2742 insn
= emit_insn (gen_add2_insn
2743 (stack_pointer_rtx
, GEN_INT (frame_size
)));
2746 /* Reset the CFA to be SP + 0. */
2747 add_reg_note (insn
, REG_CFA_DEF_CFA
, stack_pointer_rtx
);
2748 RTX_FRAME_RELATED_P (insn
) = 1;
2751 /* Stack adjustment for exception handler. */
2752 if (crtl
->calls_eh_return
)
2754 /* We need to unwind the stack by the offset computed by
2755 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2756 to be SP; letting the CFA move during this adjustment
2757 is just as correct as retaining the CFA from the body
2758 of the function. Therefore, do nothing special. */
2759 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2762 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2764 emit_jump_insn (ret_rtx
);
2767 /* Return the place to copy the exception unwinding return address to.
2768 This will probably be a stack slot, but could (in theory be the
2769 return register). */
2771 aarch64_final_eh_return_addr (void)
2773 HOST_WIDE_INT fp_offset
;
2775 aarch64_layout_frame ();
2777 fp_offset
= cfun
->machine
->frame
.frame_size
2778 - cfun
->machine
->frame
.hard_fp_offset
;
2780 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2781 return gen_rtx_REG (DImode
, LR_REGNUM
);
2783 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2784 result in a store to save LR introduced by builtin_eh_return () being
2785 incorrectly deleted because the alias is not detected.
2786 So in the calculation of the address to copy the exception unwinding
2787 return address to, we note 2 cases.
2788 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2789 we return a SP-relative location since all the addresses are SP-relative
2790 in this case. This prevents the store from being optimized away.
2791 If the fp_offset is not 0, then the addresses will be FP-relative and
2792 therefore we return a FP-relative location. */
2794 if (frame_pointer_needed
)
2797 return gen_frame_mem (DImode
,
2798 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2800 return gen_frame_mem (DImode
,
2801 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2804 /* If FP is not needed, we calculate the location of LR, which would be
2805 at the top of the saved registers block. */
2807 return gen_frame_mem (DImode
,
2808 plus_constant (Pmode
,
2811 + cfun
->machine
->frame
.saved_regs_size
2812 - 2 * UNITS_PER_WORD
));
2815 /* Possibly output code to build up a constant in a register. For
2816 the benefit of the costs infrastructure, returns the number of
2817 instructions which would be emitted. GENERATE inhibits or
2818 enables code generation. */
2821 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2825 if (aarch64_bitmask_imm (val
, DImode
))
2828 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2836 HOST_WIDE_INT valp
= val
>> 16;
2840 for (i
= 16; i
< 64; i
+= 16)
2842 valm
= (valp
& 0xffff);
2853 /* zcount contains the number of additional MOVK instructions
2854 required if the constant is built up with an initial MOVZ instruction,
2855 while ncount is the number of MOVK instructions required if starting
2856 with a MOVN instruction. Choose the sequence that yields the fewest
2857 number of instructions, preferring MOVZ instructions when they are both
2859 if (ncount
< zcount
)
2862 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2863 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2870 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2871 GEN_INT (val
& 0xffff));
2878 for (i
= 16; i
< 64; i
+= 16)
2880 if ((val
& 0xffff) != tval
)
2883 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2885 GEN_INT (val
& 0xffff)));
2895 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2897 HOST_WIDE_INT mdelta
= delta
;
2898 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2899 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2904 if (mdelta
>= 4096 * 4096)
2906 (void) aarch64_build_constant (scratchreg
, delta
, true);
2907 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2909 else if (mdelta
> 0)
2913 emit_insn (gen_rtx_SET (Pmode
, scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2914 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2916 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2917 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
2919 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2920 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
2922 if (mdelta
% 4096 != 0)
2924 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
2925 emit_insn (gen_rtx_SET (Pmode
, this_rtx
,
2926 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
2931 /* Output code to add DELTA to the first argument, and then jump
2932 to FUNCTION. Used for C++ multiple inheritance. */
2934 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
2935 HOST_WIDE_INT delta
,
2936 HOST_WIDE_INT vcall_offset
,
2939 /* The this pointer is always in x0. Note that this differs from
2940 Arm where the this pointer maybe bumped to r1 if r0 is required
2941 to return a pointer to an aggregate. On AArch64 a result value
2942 pointer will be in x8. */
2943 int this_regno
= R0_REGNUM
;
2944 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
2947 reload_completed
= 1;
2948 emit_note (NOTE_INSN_PROLOGUE_END
);
2950 if (vcall_offset
== 0)
2951 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2954 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
2956 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
2957 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2958 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
2963 if (delta
>= -256 && delta
< 256)
2964 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
2965 plus_constant (Pmode
, this_rtx
, delta
));
2967 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
2970 if (Pmode
== ptr_mode
)
2971 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
2973 aarch64_emit_move (temp0
,
2974 gen_rtx_ZERO_EXTEND (Pmode
,
2975 gen_rtx_MEM (ptr_mode
, addr
)));
2977 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
2978 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
2981 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
2982 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
2985 if (Pmode
== ptr_mode
)
2986 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
2988 aarch64_emit_move (temp1
,
2989 gen_rtx_SIGN_EXTEND (Pmode
,
2990 gen_rtx_MEM (ptr_mode
, addr
)));
2992 emit_insn (gen_add2_insn (this_rtx
, temp1
));
2995 /* Generate a tail call to the target function. */
2996 if (!TREE_USED (function
))
2998 assemble_external (function
);
2999 TREE_USED (function
) = 1;
3001 funexp
= XEXP (DECL_RTL (function
), 0);
3002 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
3003 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
3004 SIBLING_CALL_P (insn
) = 1;
3006 insn
= get_insns ();
3007 shorten_branches (insn
);
3008 final_start_function (insn
, file
, 1);
3009 final (insn
, file
, 1);
3010 final_end_function ();
3012 /* Stop pretending to be a post-reload pass. */
3013 reload_completed
= 0;
3017 aarch64_tls_referenced_p (rtx x
)
3019 if (!TARGET_HAVE_TLS
)
3021 subrtx_iterator::array_type array
;
3022 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3024 const_rtx x
= *iter
;
3025 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
3027 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3028 TLS offsets, not real symbol references. */
3029 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3030 iter
.skip_subrtxes ();
3037 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
3039 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
3040 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
3051 aarch64_build_bitmask_table (void)
3053 unsigned HOST_WIDE_INT mask
, imm
;
3054 unsigned int log_e
, e
, s
, r
;
3055 unsigned int nimms
= 0;
3057 for (log_e
= 1; log_e
<= 6; log_e
++)
3061 mask
= ~(HOST_WIDE_INT
) 0;
3063 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
3064 for (s
= 1; s
< e
; s
++)
3066 for (r
= 0; r
< e
; r
++)
3068 /* set s consecutive bits to 1 (s < 64) */
3069 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
3070 /* rotate right by r */
3072 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
3073 /* replicate the constant depending on SIMD size */
3075 case 1: imm
|= (imm
<< 2);
3076 case 2: imm
|= (imm
<< 4);
3077 case 3: imm
|= (imm
<< 8);
3078 case 4: imm
|= (imm
<< 16);
3079 case 5: imm
|= (imm
<< 32);
3085 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
3086 aarch64_bitmasks
[nimms
++] = imm
;
3091 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
3092 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
3093 aarch64_bitmasks_cmp
);
3097 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3098 a left shift of 0 or 12 bits. */
3100 aarch64_uimm12_shift (HOST_WIDE_INT val
)
3102 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
3103 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
3108 /* Return true if val is an immediate that can be loaded into a
3109 register by a MOVZ instruction. */
3111 aarch64_movw_imm (HOST_WIDE_INT val
, machine_mode mode
)
3113 if (GET_MODE_SIZE (mode
) > 4)
3115 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
3116 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
3121 /* Ignore sign extension. */
3122 val
&= (HOST_WIDE_INT
) 0xffffffff;
3124 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
3125 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
3129 /* Return true if val is a valid bitmask immediate. */
3131 aarch64_bitmask_imm (HOST_WIDE_INT val
, machine_mode mode
)
3133 if (GET_MODE_SIZE (mode
) < 8)
3135 /* Replicate bit pattern. */
3136 val
&= (HOST_WIDE_INT
) 0xffffffff;
3139 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
3140 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
3144 /* Return true if val is an immediate that can be loaded into a
3145 register in a single instruction. */
3147 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
3149 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
3151 return aarch64_bitmask_imm (val
, mode
);
3155 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3159 if (GET_CODE (x
) == HIGH
)
3162 split_const (x
, &base
, &offset
);
3163 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
3165 if (aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
)
3166 != SYMBOL_FORCE_TO_MEM
)
3169 /* Avoid generating a 64-bit relocation in ILP32; leave
3170 to aarch64_expand_mov_immediate to handle it properly. */
3171 return mode
!= ptr_mode
;
3174 return aarch64_tls_referenced_p (x
);
3177 /* Return true if register REGNO is a valid index register.
3178 STRICT_P is true if REG_OK_STRICT is in effect. */
3181 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
3183 if (!HARD_REGISTER_NUM_P (regno
))
3191 regno
= reg_renumber
[regno
];
3193 return GP_REGNUM_P (regno
);
3196 /* Return true if register REGNO is a valid base register for mode MODE.
3197 STRICT_P is true if REG_OK_STRICT is in effect. */
3200 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
3202 if (!HARD_REGISTER_NUM_P (regno
))
3210 regno
= reg_renumber
[regno
];
3213 /* The fake registers will be eliminated to either the stack or
3214 hard frame pointer, both of which are usually valid base registers.
3215 Reload deals with the cases where the eliminated form isn't valid. */
3216 return (GP_REGNUM_P (regno
)
3217 || regno
== SP_REGNUM
3218 || regno
== FRAME_POINTER_REGNUM
3219 || regno
== ARG_POINTER_REGNUM
);
3222 /* Return true if X is a valid base register for mode MODE.
3223 STRICT_P is true if REG_OK_STRICT is in effect. */
3226 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
3228 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
3231 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
3234 /* Return true if address offset is a valid index. If it is, fill in INFO
3235 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3238 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
3239 machine_mode mode
, bool strict_p
)
3241 enum aarch64_address_type type
;
3246 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
3247 && GET_MODE (x
) == Pmode
)
3249 type
= ADDRESS_REG_REG
;
3253 /* (sign_extend:DI (reg:SI)) */
3254 else if ((GET_CODE (x
) == SIGN_EXTEND
3255 || GET_CODE (x
) == ZERO_EXTEND
)
3256 && GET_MODE (x
) == DImode
3257 && GET_MODE (XEXP (x
, 0)) == SImode
)
3259 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3260 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3261 index
= XEXP (x
, 0);
3264 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3265 else if (GET_CODE (x
) == MULT
3266 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3267 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3268 && GET_MODE (XEXP (x
, 0)) == DImode
3269 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3270 && CONST_INT_P (XEXP (x
, 1)))
3272 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3273 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3274 index
= XEXP (XEXP (x
, 0), 0);
3275 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3277 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3278 else if (GET_CODE (x
) == ASHIFT
3279 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3280 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3281 && GET_MODE (XEXP (x
, 0)) == DImode
3282 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3283 && CONST_INT_P (XEXP (x
, 1)))
3285 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3286 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3287 index
= XEXP (XEXP (x
, 0), 0);
3288 shift
= INTVAL (XEXP (x
, 1));
3290 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3291 else if ((GET_CODE (x
) == SIGN_EXTRACT
3292 || GET_CODE (x
) == ZERO_EXTRACT
)
3293 && GET_MODE (x
) == DImode
3294 && GET_CODE (XEXP (x
, 0)) == MULT
3295 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3296 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3298 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3299 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3300 index
= XEXP (XEXP (x
, 0), 0);
3301 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3302 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3303 || INTVAL (XEXP (x
, 2)) != 0)
3306 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3307 (const_int 0xffffffff<<shift)) */
3308 else if (GET_CODE (x
) == AND
3309 && GET_MODE (x
) == DImode
3310 && GET_CODE (XEXP (x
, 0)) == MULT
3311 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3312 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3313 && CONST_INT_P (XEXP (x
, 1)))
3315 type
= ADDRESS_REG_UXTW
;
3316 index
= XEXP (XEXP (x
, 0), 0);
3317 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3318 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3321 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3322 else if ((GET_CODE (x
) == SIGN_EXTRACT
3323 || GET_CODE (x
) == ZERO_EXTRACT
)
3324 && GET_MODE (x
) == DImode
3325 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3326 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3327 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3329 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3330 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3331 index
= XEXP (XEXP (x
, 0), 0);
3332 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3333 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3334 || INTVAL (XEXP (x
, 2)) != 0)
3337 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3338 (const_int 0xffffffff<<shift)) */
3339 else if (GET_CODE (x
) == AND
3340 && GET_MODE (x
) == DImode
3341 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3342 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3343 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3344 && CONST_INT_P (XEXP (x
, 1)))
3346 type
= ADDRESS_REG_UXTW
;
3347 index
= XEXP (XEXP (x
, 0), 0);
3348 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3349 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3352 /* (mult:P (reg:P) (const_int scale)) */
3353 else if (GET_CODE (x
) == MULT
3354 && GET_MODE (x
) == Pmode
3355 && GET_MODE (XEXP (x
, 0)) == Pmode
3356 && CONST_INT_P (XEXP (x
, 1)))
3358 type
= ADDRESS_REG_REG
;
3359 index
= XEXP (x
, 0);
3360 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3362 /* (ashift:P (reg:P) (const_int shift)) */
3363 else if (GET_CODE (x
) == ASHIFT
3364 && GET_MODE (x
) == Pmode
3365 && GET_MODE (XEXP (x
, 0)) == Pmode
3366 && CONST_INT_P (XEXP (x
, 1)))
3368 type
= ADDRESS_REG_REG
;
3369 index
= XEXP (x
, 0);
3370 shift
= INTVAL (XEXP (x
, 1));
3375 if (GET_CODE (index
) == SUBREG
)
3376 index
= SUBREG_REG (index
);
3379 (shift
> 0 && shift
<= 3
3380 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3382 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3385 info
->offset
= index
;
3386 info
->shift
= shift
;
3394 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3396 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3397 && offset
< 64 * GET_MODE_SIZE (mode
)
3398 && offset
% GET_MODE_SIZE (mode
) == 0);
3402 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3403 HOST_WIDE_INT offset
)
3405 return offset
>= -256 && offset
< 256;
3409 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3412 && offset
< 4096 * GET_MODE_SIZE (mode
)
3413 && offset
% GET_MODE_SIZE (mode
) == 0);
3416 /* Return true if X is a valid address for machine mode MODE. If it is,
3417 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3418 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3421 aarch64_classify_address (struct aarch64_address_info
*info
,
3422 rtx x
, machine_mode mode
,
3423 RTX_CODE outer_code
, bool strict_p
)
3425 enum rtx_code code
= GET_CODE (x
);
3428 /* On BE, we use load/store pair for all large int mode load/stores. */
3429 bool load_store_pair_p
= (outer_code
== PARALLEL
3430 || (BYTES_BIG_ENDIAN
3431 && aarch64_vect_struct_mode_p (mode
)));
3433 bool allow_reg_index_p
=
3435 && (GET_MODE_SIZE (mode
) != 16 || aarch64_vector_mode_supported_p (mode
))
3436 && !aarch64_vect_struct_mode_p (mode
);
3438 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3440 if (aarch64_vect_struct_mode_p (mode
) && !BYTES_BIG_ENDIAN
3441 && (code
!= POST_INC
&& code
!= REG
))
3448 info
->type
= ADDRESS_REG_IMM
;
3450 info
->offset
= const0_rtx
;
3451 return aarch64_base_register_rtx_p (x
, strict_p
);
3459 && (op0
== virtual_stack_vars_rtx
3460 || op0
== frame_pointer_rtx
3461 || op0
== arg_pointer_rtx
)
3462 && CONST_INT_P (op1
))
3464 info
->type
= ADDRESS_REG_IMM
;
3471 if (GET_MODE_SIZE (mode
) != 0
3472 && CONST_INT_P (op1
)
3473 && aarch64_base_register_rtx_p (op0
, strict_p
))
3475 HOST_WIDE_INT offset
= INTVAL (op1
);
3477 info
->type
= ADDRESS_REG_IMM
;
3481 /* TImode and TFmode values are allowed in both pairs of X
3482 registers and individual Q registers. The available
3484 X,X: 7-bit signed scaled offset
3485 Q: 9-bit signed offset
3486 We conservatively require an offset representable in either mode.
3488 if (mode
== TImode
|| mode
== TFmode
)
3489 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3490 && offset_9bit_signed_unscaled_p (mode
, offset
));
3492 /* A 7bit offset check because OImode will emit a ldp/stp
3493 instruction (only big endian will get here).
3494 For ldp/stp instructions, the offset is scaled for the size of a
3495 single element of the pair. */
3497 return aarch64_offset_7bit_signed_scaled_p (TImode
, offset
);
3499 /* Three 9/12 bit offsets checks because CImode will emit three
3500 ldr/str instructions (only big endian will get here). */
3502 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3503 && (offset_9bit_signed_unscaled_p (V16QImode
, offset
+ 32)
3504 || offset_12bit_unsigned_scaled_p (V16QImode
,
3507 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3508 instructions (only big endian will get here). */
3510 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3511 && aarch64_offset_7bit_signed_scaled_p (TImode
,
3514 if (load_store_pair_p
)
3515 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3516 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3518 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3519 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3522 if (allow_reg_index_p
)
3524 /* Look for base + (scaled/extended) index register. */
3525 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3526 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3531 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3532 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3545 info
->type
= ADDRESS_REG_WB
;
3546 info
->base
= XEXP (x
, 0);
3547 info
->offset
= NULL_RTX
;
3548 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3552 info
->type
= ADDRESS_REG_WB
;
3553 info
->base
= XEXP (x
, 0);
3554 if (GET_CODE (XEXP (x
, 1)) == PLUS
3555 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3556 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3557 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3559 HOST_WIDE_INT offset
;
3560 info
->offset
= XEXP (XEXP (x
, 1), 1);
3561 offset
= INTVAL (info
->offset
);
3563 /* TImode and TFmode values are allowed in both pairs of X
3564 registers and individual Q registers. The available
3566 X,X: 7-bit signed scaled offset
3567 Q: 9-bit signed offset
3568 We conservatively require an offset representable in either mode.
3570 if (mode
== TImode
|| mode
== TFmode
)
3571 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3572 && offset_9bit_signed_unscaled_p (mode
, offset
));
3574 if (load_store_pair_p
)
3575 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3576 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3578 return offset_9bit_signed_unscaled_p (mode
, offset
);
3585 /* load literal: pc-relative constant pool entry. Only supported
3586 for SI mode or larger. */
3587 info
->type
= ADDRESS_SYMBOLIC
;
3589 if (!load_store_pair_p
&& GET_MODE_SIZE (mode
) >= 4)
3593 split_const (x
, &sym
, &addend
);
3594 return (GET_CODE (sym
) == LABEL_REF
3595 || (GET_CODE (sym
) == SYMBOL_REF
3596 && CONSTANT_POOL_ADDRESS_P (sym
)));
3601 info
->type
= ADDRESS_LO_SUM
;
3602 info
->base
= XEXP (x
, 0);
3603 info
->offset
= XEXP (x
, 1);
3604 if (allow_reg_index_p
3605 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3608 split_const (info
->offset
, &sym
, &offs
);
3609 if (GET_CODE (sym
) == SYMBOL_REF
3610 && (aarch64_classify_symbol (sym
, offs
, SYMBOL_CONTEXT_MEM
)
3611 == SYMBOL_SMALL_ABSOLUTE
))
3613 /* The symbol and offset must be aligned to the access size. */
3615 unsigned int ref_size
;
3617 if (CONSTANT_POOL_ADDRESS_P (sym
))
3618 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3619 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3621 tree exp
= SYMBOL_REF_DECL (sym
);
3622 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3623 align
= CONSTANT_ALIGNMENT (exp
, align
);
3625 else if (SYMBOL_REF_DECL (sym
))
3626 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3627 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3628 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3629 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3631 align
= BITS_PER_UNIT
;
3633 ref_size
= GET_MODE_SIZE (mode
);
3635 ref_size
= GET_MODE_SIZE (DImode
);
3637 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3638 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3649 aarch64_symbolic_address_p (rtx x
)
3653 split_const (x
, &x
, &offset
);
3654 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3657 /* Classify the base of symbolic expression X, given that X appears in
3660 enum aarch64_symbol_type
3661 aarch64_classify_symbolic_expression (rtx x
,
3662 enum aarch64_symbol_context context
)
3666 split_const (x
, &x
, &offset
);
3667 return aarch64_classify_symbol (x
, offset
, context
);
3671 /* Return TRUE if X is a legitimate address for accessing memory in
3674 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
3676 struct aarch64_address_info addr
;
3678 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3681 /* Return TRUE if X is a legitimate address for accessing memory in
3682 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3685 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
3686 RTX_CODE outer_code
, bool strict_p
)
3688 struct aarch64_address_info addr
;
3690 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3693 /* Return TRUE if rtx X is immediate constant 0.0 */
3695 aarch64_float_const_zero_rtx_p (rtx x
)
3699 if (GET_MODE (x
) == VOIDmode
)
3702 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3703 if (REAL_VALUE_MINUS_ZERO (r
))
3704 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3705 return REAL_VALUES_EQUAL (r
, dconst0
);
3708 /* Return the fixed registers used for condition codes. */
3711 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3714 *p2
= INVALID_REGNUM
;
3718 /* Emit call insn with PAT and do aarch64-specific handling. */
3721 aarch64_emit_call_insn (rtx pat
)
3723 rtx insn
= emit_call_insn (pat
);
3725 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
3726 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
3727 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
3731 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3733 /* All floating point compares return CCFP if it is an equality
3734 comparison, and CCFPE otherwise. */
3735 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3762 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3764 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3765 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3766 || GET_CODE (x
) == NEG
))
3769 /* A compare with a shifted operand. Because of canonicalization,
3770 the comparison will have to be swapped when we emit the assembly
3772 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3773 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3774 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3775 || GET_CODE (x
) == LSHIFTRT
3776 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3779 /* Similarly for a negated operand, but we can only do this for
3781 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3782 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3783 && (code
== EQ
|| code
== NE
)
3784 && GET_CODE (x
) == NEG
)
3787 /* A compare of a mode narrower than SI mode against zero can be done
3788 by extending the value in the comparison. */
3789 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3791 /* Only use sign-extension if we really need it. */
3792 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3793 ? CC_SESWPmode
: CC_ZESWPmode
);
3795 /* For everything else, return CCmode. */
3800 aarch64_get_condition_code_1 (enum machine_mode
, enum rtx_code
);
3803 aarch64_get_condition_code (rtx x
)
3805 machine_mode mode
= GET_MODE (XEXP (x
, 0));
3806 enum rtx_code comp_code
= GET_CODE (x
);
3808 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3809 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3810 return aarch64_get_condition_code_1 (mode
, comp_code
);
3814 aarch64_get_condition_code_1 (enum machine_mode mode
, enum rtx_code comp_code
)
3816 int ne
= -1, eq
= -1;
3823 case GE
: return AARCH64_GE
;
3824 case GT
: return AARCH64_GT
;
3825 case LE
: return AARCH64_LS
;
3826 case LT
: return AARCH64_MI
;
3827 case NE
: return AARCH64_NE
;
3828 case EQ
: return AARCH64_EQ
;
3829 case ORDERED
: return AARCH64_VC
;
3830 case UNORDERED
: return AARCH64_VS
;
3831 case UNLT
: return AARCH64_LT
;
3832 case UNLE
: return AARCH64_LE
;
3833 case UNGT
: return AARCH64_HI
;
3834 case UNGE
: return AARCH64_PL
;
3892 case NE
: return AARCH64_NE
;
3893 case EQ
: return AARCH64_EQ
;
3894 case GE
: return AARCH64_GE
;
3895 case GT
: return AARCH64_GT
;
3896 case LE
: return AARCH64_LE
;
3897 case LT
: return AARCH64_LT
;
3898 case GEU
: return AARCH64_CS
;
3899 case GTU
: return AARCH64_HI
;
3900 case LEU
: return AARCH64_LS
;
3901 case LTU
: return AARCH64_CC
;
3911 case NE
: return AARCH64_NE
;
3912 case EQ
: return AARCH64_EQ
;
3913 case GE
: return AARCH64_LE
;
3914 case GT
: return AARCH64_LT
;
3915 case LE
: return AARCH64_GE
;
3916 case LT
: return AARCH64_GT
;
3917 case GEU
: return AARCH64_LS
;
3918 case GTU
: return AARCH64_CC
;
3919 case LEU
: return AARCH64_CS
;
3920 case LTU
: return AARCH64_HI
;
3928 case NE
: return AARCH64_NE
;
3929 case EQ
: return AARCH64_EQ
;
3930 case GE
: return AARCH64_PL
;
3931 case LT
: return AARCH64_MI
;
3939 case NE
: return AARCH64_NE
;
3940 case EQ
: return AARCH64_EQ
;
3950 if (comp_code
== NE
)
3953 if (comp_code
== EQ
)
3960 aarch64_const_vec_all_same_in_range_p (rtx x
,
3961 HOST_WIDE_INT minval
,
3962 HOST_WIDE_INT maxval
)
3964 HOST_WIDE_INT firstval
;
3967 if (GET_CODE (x
) != CONST_VECTOR
3968 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
3971 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
3972 if (firstval
< minval
|| firstval
> maxval
)
3975 count
= CONST_VECTOR_NUNITS (x
);
3976 for (i
= 1; i
< count
; i
++)
3977 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
3984 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
3986 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
3990 bit_count (unsigned HOST_WIDE_INT value
)
4004 #define AARCH64_CC_V 1
4005 #define AARCH64_CC_C (1 << 1)
4006 #define AARCH64_CC_Z (1 << 2)
4007 #define AARCH64_CC_N (1 << 3)
4009 /* N Z C V flags for ccmp. The first code is for AND op and the other
4010 is for IOR op. Indexed by AARCH64_COND_CODE. */
4011 static const int aarch64_nzcv_codes
[][2] =
4013 {AARCH64_CC_Z
, 0}, /* EQ, Z == 1. */
4014 {0, AARCH64_CC_Z
}, /* NE, Z == 0. */
4015 {AARCH64_CC_C
, 0}, /* CS, C == 1. */
4016 {0, AARCH64_CC_C
}, /* CC, C == 0. */
4017 {AARCH64_CC_N
, 0}, /* MI, N == 1. */
4018 {0, AARCH64_CC_N
}, /* PL, N == 0. */
4019 {AARCH64_CC_V
, 0}, /* VS, V == 1. */
4020 {0, AARCH64_CC_V
}, /* VC, V == 0. */
4021 {AARCH64_CC_C
, 0}, /* HI, C ==1 && Z == 0. */
4022 {0, AARCH64_CC_C
}, /* LS, !(C == 1 && Z == 0). */
4023 {0, AARCH64_CC_V
}, /* GE, N == V. */
4024 {AARCH64_CC_V
, 0}, /* LT, N != V. */
4025 {0, AARCH64_CC_Z
}, /* GT, Z == 0 && N == V. */
4026 {AARCH64_CC_Z
, 0}, /* LE, !(Z == 0 && N == V). */
4027 {0, 0}, /* AL, Any. */
4028 {0, 0}, /* NV, Any. */
4032 aarch64_ccmp_mode_to_code (enum machine_mode mode
)
4073 aarch64_print_operand (FILE *f
, rtx x
, char code
)
4077 /* An integer or symbol address without a preceding # sign. */
4079 switch (GET_CODE (x
))
4082 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
4086 output_addr_const (f
, x
);
4090 if (GET_CODE (XEXP (x
, 0)) == PLUS
4091 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4093 output_addr_const (f
, x
);
4099 output_operand_lossage ("Unsupported operand for code '%c'", code
);
4104 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4108 if (!CONST_INT_P (x
)
4109 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
4111 output_operand_lossage ("invalid operand for '%%%c'", code
);
4127 output_operand_lossage ("invalid operand for '%%%c'", code
);
4137 /* Print N such that 2^N == X. */
4138 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
4140 output_operand_lossage ("invalid operand for '%%%c'", code
);
4144 asm_fprintf (f
, "%d", n
);
4149 /* Print the number of non-zero bits in X (a const_int). */
4150 if (!CONST_INT_P (x
))
4152 output_operand_lossage ("invalid operand for '%%%c'", code
);
4156 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
4160 /* Print the higher numbered register of a pair (TImode) of regs. */
4161 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
4163 output_operand_lossage ("invalid operand for '%%%c'", code
);
4167 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
4173 /* Print a condition (eq, ne, etc). */
4175 /* CONST_TRUE_RTX means always -- that's the default. */
4176 if (x
== const_true_rtx
)
4179 if (!COMPARISON_P (x
))
4181 output_operand_lossage ("invalid operand for '%%%c'", code
);
4185 cond_code
= aarch64_get_condition_code (x
);
4186 gcc_assert (cond_code
>= 0);
4187 fputs (aarch64_condition_codes
[cond_code
], f
);
4194 /* Print the inverse of a condition (eq <-> ne, etc). */
4196 /* CONST_TRUE_RTX means never -- that's the default. */
4197 if (x
== const_true_rtx
)
4203 if (!COMPARISON_P (x
))
4205 output_operand_lossage ("invalid operand for '%%%c'", code
);
4208 cond_code
= aarch64_get_condition_code (x
);
4209 gcc_assert (cond_code
>= 0);
4210 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
4220 /* Print a scalar FP/SIMD register name. */
4221 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4223 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4226 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
4233 /* Print the first FP/SIMD register name in a list. */
4234 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4236 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4239 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
4243 /* Print a scalar FP/SIMD register name + 1. */
4244 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4246 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4249 asm_fprintf (f
, "q%d", REGNO (x
) - V0_REGNUM
+ 1);
4253 /* Print bottom 16 bits of integer constant in hex. */
4254 if (!CONST_INT_P (x
))
4256 output_operand_lossage ("invalid operand for '%%%c'", code
);
4259 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
4264 /* Print a general register name or the zero register (32-bit or
4267 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
4269 asm_fprintf (f
, "%czr", code
);
4273 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
4275 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
4279 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
4281 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
4288 /* Print a normal operand, if it's a general register, then we
4292 output_operand_lossage ("missing operand");
4296 switch (GET_CODE (x
))
4299 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
4303 aarch64_memory_reference_mode
= GET_MODE (x
);
4304 output_address (XEXP (x
, 0));
4309 output_addr_const (asm_out_file
, x
);
4313 asm_fprintf (f
, "%wd", INTVAL (x
));
4317 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
4320 aarch64_const_vec_all_same_in_range_p (x
,
4322 HOST_WIDE_INT_MAX
));
4323 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
4325 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
4334 /* CONST_DOUBLE can represent a double-width integer.
4335 In this case, the mode of x is VOIDmode. */
4336 if (GET_MODE (x
) == VOIDmode
)
4338 else if (aarch64_float_const_zero_rtx_p (x
))
4343 else if (aarch64_float_const_representable_p (x
))
4346 char float_buf
[buf_size
] = {'\0'};
4348 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4349 real_to_decimal_for_mode (float_buf
, &r
,
4352 asm_fprintf (asm_out_file
, "%s", float_buf
);
4356 output_operand_lossage ("invalid constant");
4359 output_operand_lossage ("invalid operand");
4365 if (GET_CODE (x
) == HIGH
)
4368 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4370 case SYMBOL_SMALL_GOT
:
4371 asm_fprintf (asm_out_file
, ":got:");
4374 case SYMBOL_SMALL_TLSGD
:
4375 asm_fprintf (asm_out_file
, ":tlsgd:");
4378 case SYMBOL_SMALL_TLSDESC
:
4379 asm_fprintf (asm_out_file
, ":tlsdesc:");
4382 case SYMBOL_SMALL_GOTTPREL
:
4383 asm_fprintf (asm_out_file
, ":gottprel:");
4386 case SYMBOL_SMALL_TPREL
:
4387 asm_fprintf (asm_out_file
, ":tprel:");
4390 case SYMBOL_TINY_GOT
:
4397 output_addr_const (asm_out_file
, x
);
4401 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4403 case SYMBOL_SMALL_GOT
:
4404 asm_fprintf (asm_out_file
, ":lo12:");
4407 case SYMBOL_SMALL_TLSGD
:
4408 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
4411 case SYMBOL_SMALL_TLSDESC
:
4412 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
4415 case SYMBOL_SMALL_GOTTPREL
:
4416 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
4419 case SYMBOL_SMALL_TPREL
:
4420 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
4423 case SYMBOL_TINY_GOT
:
4424 asm_fprintf (asm_out_file
, ":got:");
4430 output_addr_const (asm_out_file
, x
);
4435 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4437 case SYMBOL_SMALL_TPREL
:
4438 asm_fprintf (asm_out_file
, ":tprel_hi12:");
4443 output_addr_const (asm_out_file
, x
);
4451 if (!COMPARISON_P (x
))
4453 output_operand_lossage ("invalid operand for '%%%c'", code
);
4457 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4458 gcc_assert (cond_code
>= 0);
4459 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][0]);
4468 if (!COMPARISON_P (x
))
4470 output_operand_lossage ("invalid operand for '%%%c'", code
);
4474 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4475 gcc_assert (cond_code
>= 0);
4476 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][1]);
4481 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
4487 aarch64_print_operand_address (FILE *f
, rtx x
)
4489 struct aarch64_address_info addr
;
4491 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
4495 case ADDRESS_REG_IMM
:
4496 if (addr
.offset
== const0_rtx
)
4497 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
4499 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
4500 INTVAL (addr
.offset
));
4503 case ADDRESS_REG_REG
:
4504 if (addr
.shift
== 0)
4505 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
4506 reg_names
[REGNO (addr
.offset
)]);
4508 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
4509 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
4512 case ADDRESS_REG_UXTW
:
4513 if (addr
.shift
== 0)
4514 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
4515 REGNO (addr
.offset
) - R0_REGNUM
);
4517 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
4518 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4521 case ADDRESS_REG_SXTW
:
4522 if (addr
.shift
== 0)
4523 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
4524 REGNO (addr
.offset
) - R0_REGNUM
);
4526 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
4527 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4530 case ADDRESS_REG_WB
:
4531 switch (GET_CODE (x
))
4534 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
4535 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4538 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4539 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4542 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4543 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4546 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4547 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4550 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4551 INTVAL (addr
.offset
));
4554 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4555 INTVAL (addr
.offset
));
4562 case ADDRESS_LO_SUM
:
4563 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4564 output_addr_const (f
, addr
.offset
);
4565 asm_fprintf (f
, "]");
4568 case ADDRESS_SYMBOLIC
:
4572 output_addr_const (f
, x
);
4576 aarch64_label_mentioned_p (rtx x
)
4581 if (GET_CODE (x
) == LABEL_REF
)
4584 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4585 referencing instruction, but they are constant offsets, not
4587 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4590 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4591 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4597 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4598 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4601 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4608 /* Implement REGNO_REG_CLASS. */
4611 aarch64_regno_regclass (unsigned regno
)
4613 if (GP_REGNUM_P (regno
))
4614 return GENERAL_REGS
;
4616 if (regno
== SP_REGNUM
)
4619 if (regno
== FRAME_POINTER_REGNUM
4620 || regno
== ARG_POINTER_REGNUM
)
4621 return POINTER_REGS
;
4623 if (FP_REGNUM_P (regno
))
4624 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4630 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
4632 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4633 where mask is selected by alignment and size of the offset.
4634 We try to pick as large a range for the offset as possible to
4635 maximize the chance of a CSE. However, for aligned addresses
4636 we limit the range to 4k so that structures with different sized
4637 elements are likely to use the same base. */
4639 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
4641 HOST_WIDE_INT offset
= INTVAL (XEXP (x
, 1));
4642 HOST_WIDE_INT base_offset
;
4644 /* Does it look like we'll need a load/store-pair operation? */
4645 if (GET_MODE_SIZE (mode
) > 16
4647 base_offset
= ((offset
+ 64 * GET_MODE_SIZE (mode
))
4648 & ~((128 * GET_MODE_SIZE (mode
)) - 1));
4649 /* For offsets aren't a multiple of the access size, the limit is
4651 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
4652 base_offset
= (offset
+ 0x100) & ~0x1ff;
4654 base_offset
= offset
& ~0xfff;
4656 if (base_offset
== 0)
4659 offset
-= base_offset
;
4660 rtx base_reg
= gen_reg_rtx (Pmode
);
4661 rtx val
= force_operand (plus_constant (Pmode
, XEXP (x
, 0), base_offset
),
4663 emit_move_insn (base_reg
, val
);
4664 x
= plus_constant (Pmode
, base_reg
, offset
);
4670 /* Try a machine-dependent way of reloading an illegitimate address
4671 operand. If we find one, push the reload and return the new rtx. */
4674 aarch64_legitimize_reload_address (rtx
*x_p
,
4676 int opnum
, int type
,
4677 int ind_levels ATTRIBUTE_UNUSED
)
4681 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4682 if (aarch64_vect_struct_mode_p (mode
)
4683 && GET_CODE (x
) == PLUS
4684 && REG_P (XEXP (x
, 0))
4685 && CONST_INT_P (XEXP (x
, 1)))
4689 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4690 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4691 opnum
, (enum reload_type
) type
);
4695 /* We must recognize output that we have already generated ourselves. */
4696 if (GET_CODE (x
) == PLUS
4697 && GET_CODE (XEXP (x
, 0)) == PLUS
4698 && REG_P (XEXP (XEXP (x
, 0), 0))
4699 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4700 && CONST_INT_P (XEXP (x
, 1)))
4702 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4703 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4704 opnum
, (enum reload_type
) type
);
4708 /* We wish to handle large displacements off a base register by splitting
4709 the addend across an add and the mem insn. This can cut the number of
4710 extra insns needed from 3 to 1. It is only useful for load/store of a
4711 single register with 12 bit offset field. */
4712 if (GET_CODE (x
) == PLUS
4713 && REG_P (XEXP (x
, 0))
4714 && CONST_INT_P (XEXP (x
, 1))
4715 && HARD_REGISTER_P (XEXP (x
, 0))
4718 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4720 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4721 HOST_WIDE_INT low
= val
& 0xfff;
4722 HOST_WIDE_INT high
= val
- low
;
4725 machine_mode xmode
= GET_MODE (x
);
4727 /* In ILP32, xmode can be either DImode or SImode. */
4728 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4730 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4731 BLKmode alignment. */
4732 if (GET_MODE_SIZE (mode
) == 0)
4735 offs
= low
% GET_MODE_SIZE (mode
);
4737 /* Align misaligned offset by adjusting high part to compensate. */
4740 if (aarch64_uimm12_shift (high
+ offs
))
4749 offs
= GET_MODE_SIZE (mode
) - offs
;
4751 high
= high
+ (low
& 0x1000) - offs
;
4756 /* Check for overflow. */
4757 if (high
+ low
!= val
)
4760 cst
= GEN_INT (high
);
4761 if (!aarch64_uimm12_shift (high
))
4762 cst
= force_const_mem (xmode
, cst
);
4764 /* Reload high part into base reg, leaving the low part
4765 in the mem instruction.
4766 Note that replacing this gen_rtx_PLUS with plus_constant is
4767 wrong in this case because we rely on the
4768 (plus (plus reg c1) c2) structure being preserved so that
4769 XEXP (*p, 0) in push_reload below uses the correct term. */
4770 x
= gen_rtx_PLUS (xmode
,
4771 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4774 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4775 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4776 opnum
, (enum reload_type
) type
);
4785 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4788 secondary_reload_info
*sri
)
4790 /* Without the TARGET_SIMD instructions we cannot move a Q register
4791 to a Q register directly. We need a scratch. */
4792 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4793 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4794 && reg_class_subset_p (rclass
, FP_REGS
))
4797 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4798 else if (mode
== TImode
)
4799 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4803 /* A TFmode or TImode memory access should be handled via an FP_REGS
4804 because AArch64 has richer addressing modes for LDR/STR instructions
4805 than LDP/STP instructions. */
4806 if (!TARGET_GENERAL_REGS_ONLY
&& rclass
== GENERAL_REGS
4807 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4810 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4811 return GENERAL_REGS
;
4817 aarch64_can_eliminate (const int from
, const int to
)
4819 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4820 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4822 if (frame_pointer_needed
)
4824 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4826 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4828 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4829 && !cfun
->calls_alloca
)
4831 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4838 /* If we decided that we didn't need a leaf frame pointer but then used
4839 LR in the function, then we'll want a frame pointer after all, so
4840 prevent this elimination to ensure a frame pointer is used. */
4841 if (to
== STACK_POINTER_REGNUM
4842 && flag_omit_leaf_frame_pointer
4843 && df_regs_ever_live_p (LR_REGNUM
))
4851 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4853 aarch64_layout_frame ();
4855 if (to
== HARD_FRAME_POINTER_REGNUM
)
4857 if (from
== ARG_POINTER_REGNUM
)
4858 return cfun
->machine
->frame
.frame_size
- crtl
->outgoing_args_size
;
4860 if (from
== FRAME_POINTER_REGNUM
)
4861 return (cfun
->machine
->frame
.hard_fp_offset
4862 - cfun
->machine
->frame
.saved_varargs_size
);
4865 if (to
== STACK_POINTER_REGNUM
)
4867 if (from
== FRAME_POINTER_REGNUM
)
4868 return (cfun
->machine
->frame
.frame_size
4869 - cfun
->machine
->frame
.saved_varargs_size
);
4872 return cfun
->machine
->frame
.frame_size
;
4875 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4879 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4883 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
4888 aarch64_asm_trampoline_template (FILE *f
)
4892 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
4893 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
4897 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
4898 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
4900 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
4901 assemble_aligned_integer (4, const0_rtx
);
4902 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4903 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
4907 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
4909 rtx fnaddr
, mem
, a_tramp
;
4910 const int tramp_code_sz
= 16;
4912 /* Don't need to copy the trailing D-words, we fill those in below. */
4913 emit_block_move (m_tramp
, assemble_trampoline_template (),
4914 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
4915 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
4916 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
4917 if (GET_MODE (fnaddr
) != ptr_mode
)
4918 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
4919 emit_move_insn (mem
, fnaddr
);
4921 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
4922 emit_move_insn (mem
, chain_value
);
4924 /* XXX We should really define a "clear_cache" pattern and use
4925 gen_clear_cache(). */
4926 a_tramp
= XEXP (m_tramp
, 0);
4927 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
4928 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
4929 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
4933 static unsigned char
4934 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
4938 case CALLER_SAVE_REGS
:
4945 aarch64_vector_mode_p (mode
) ? (GET_MODE_SIZE (mode
) + 15) / 16 :
4946 (GET_MODE_SIZE (mode
) + 7) / 8;
4960 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
4962 if (regclass
== POINTER_REGS
)
4963 return GENERAL_REGS
;
4965 if (regclass
== STACK_REG
)
4968 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
4974 /* If it's an integer immediate that MOVI can't handle, then
4975 FP_REGS is not an option, so we return NO_REGS instead. */
4976 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
4977 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
4980 /* Register eliminiation can result in a request for
4981 SP+constant->FP_REGS. We cannot support such operations which
4982 use SP as source and an FP_REG as destination, so reject out
4984 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
4986 rtx lhs
= XEXP (x
, 0);
4988 /* Look through a possible SUBREG introduced by ILP32. */
4989 if (GET_CODE (lhs
) == SUBREG
)
4990 lhs
= SUBREG_REG (lhs
);
4992 gcc_assert (REG_P (lhs
));
4993 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
5002 aarch64_asm_output_labelref (FILE* f
, const char *name
)
5004 asm_fprintf (f
, "%U%s", name
);
5008 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
5010 if (priority
== DEFAULT_INIT_PRIORITY
)
5011 default_ctor_section_asm_out_constructor (symbol
, priority
);
5016 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
5017 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5018 switch_to_section (s
);
5019 assemble_align (POINTER_SIZE
);
5020 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5025 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
5027 if (priority
== DEFAULT_INIT_PRIORITY
)
5028 default_dtor_section_asm_out_destructor (symbol
, priority
);
5033 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
5034 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5035 switch_to_section (s
);
5036 assemble_align (POINTER_SIZE
);
5037 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5042 aarch64_output_casesi (rtx
*operands
)
5046 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
5048 static const char *const patterns
[4][2] =
5051 "ldrb\t%w3, [%0,%w1,uxtw]",
5052 "add\t%3, %4, %w3, sxtb #2"
5055 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5056 "add\t%3, %4, %w3, sxth #2"
5059 "ldr\t%w3, [%0,%w1,uxtw #2]",
5060 "add\t%3, %4, %w3, sxtw #2"
5062 /* We assume that DImode is only generated when not optimizing and
5063 that we don't really need 64-bit address offsets. That would
5064 imply an object file with 8GB of code in a single function! */
5066 "ldr\t%w3, [%0,%w1,uxtw #2]",
5067 "add\t%3, %4, %w3, sxtw #2"
5071 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
5073 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
5075 gcc_assert (index
>= 0 && index
<= 3);
5077 /* Need to implement table size reduction, by chaning the code below. */
5078 output_asm_insn (patterns
[index
][0], operands
);
5079 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
5080 snprintf (buf
, sizeof (buf
),
5081 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
5082 output_asm_insn (buf
, operands
);
5083 output_asm_insn (patterns
[index
][1], operands
);
5084 output_asm_insn ("br\t%3", operands
);
5085 assemble_label (asm_out_file
, label
);
5090 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5091 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5095 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
5097 if (shift
>= 0 && shift
<= 3)
5100 for (size
= 8; size
<= 32; size
*= 2)
5102 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
5103 if (mask
== bits
<< shift
)
5111 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED
,
5112 const_rtx x ATTRIBUTE_UNUSED
)
5114 /* We can't use blocks for constants when we're using a per-function
5120 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED
,
5121 rtx x ATTRIBUTE_UNUSED
,
5122 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
5124 /* Force all constant pool entries into the current function section. */
5125 return function_section (current_function_decl
);
5131 /* Helper function for rtx cost calculation. Strip a shift expression
5132 from X. Returns the inner operand if successful, or the original
5133 expression on failure. */
5135 aarch64_strip_shift (rtx x
)
5139 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5140 we can convert both to ROR during final output. */
5141 if ((GET_CODE (op
) == ASHIFT
5142 || GET_CODE (op
) == ASHIFTRT
5143 || GET_CODE (op
) == LSHIFTRT
5144 || GET_CODE (op
) == ROTATERT
5145 || GET_CODE (op
) == ROTATE
)
5146 && CONST_INT_P (XEXP (op
, 1)))
5147 return XEXP (op
, 0);
5149 if (GET_CODE (op
) == MULT
5150 && CONST_INT_P (XEXP (op
, 1))
5151 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
5152 return XEXP (op
, 0);
5157 /* Helper function for rtx cost calculation. Strip an extend
5158 expression from X. Returns the inner operand if successful, or the
5159 original expression on failure. We deal with a number of possible
5160 canonicalization variations here. */
5162 aarch64_strip_extend (rtx x
)
5166 /* Zero and sign extraction of a widened value. */
5167 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
5168 && XEXP (op
, 2) == const0_rtx
5169 && GET_CODE (XEXP (op
, 0)) == MULT
5170 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
5172 return XEXP (XEXP (op
, 0), 0);
5174 /* It can also be represented (for zero-extend) as an AND with an
5176 if (GET_CODE (op
) == AND
5177 && GET_CODE (XEXP (op
, 0)) == MULT
5178 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
5179 && CONST_INT_P (XEXP (op
, 1))
5180 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
5181 INTVAL (XEXP (op
, 1))) != 0)
5182 return XEXP (XEXP (op
, 0), 0);
5184 /* Now handle extended register, as this may also have an optional
5185 left shift by 1..4. */
5186 if (GET_CODE (op
) == ASHIFT
5187 && CONST_INT_P (XEXP (op
, 1))
5188 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
5191 if (GET_CODE (op
) == ZERO_EXTEND
5192 || GET_CODE (op
) == SIGN_EXTEND
)
5201 /* Helper function for rtx cost calculation. Calculate the cost of
5202 a MULT, which may be part of a multiply-accumulate rtx. Return
5203 the calculated cost of the expression, recursing manually in to
5204 operands where needed. */
5207 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
5210 const struct cpu_cost_table
*extra_cost
5211 = aarch64_tune_params
->insn_extra_cost
;
5213 bool maybe_fma
= (outer
== PLUS
|| outer
== MINUS
);
5214 machine_mode mode
= GET_MODE (x
);
5216 gcc_checking_assert (code
== MULT
);
5221 if (VECTOR_MODE_P (mode
))
5222 mode
= GET_MODE_INNER (mode
);
5224 /* Integer multiply/fma. */
5225 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5227 /* The multiply will be canonicalized as a shift, cost it as such. */
5228 if (CONST_INT_P (op1
)
5229 && exact_log2 (INTVAL (op1
)) > 0)
5234 /* ADD (shifted register). */
5235 cost
+= extra_cost
->alu
.arith_shift
;
5237 /* LSL (immediate). */
5238 cost
+= extra_cost
->alu
.shift
;
5241 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
5246 /* Integer multiplies or FMAs have zero/sign extending variants. */
5247 if ((GET_CODE (op0
) == ZERO_EXTEND
5248 && GET_CODE (op1
) == ZERO_EXTEND
)
5249 || (GET_CODE (op0
) == SIGN_EXTEND
5250 && GET_CODE (op1
) == SIGN_EXTEND
))
5252 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
5253 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
5258 /* MADD/SMADDL/UMADDL. */
5259 cost
+= extra_cost
->mult
[0].extend_add
;
5261 /* MUL/SMULL/UMULL. */
5262 cost
+= extra_cost
->mult
[0].extend
;
5268 /* This is either an integer multiply or an FMA. In both cases
5269 we want to recurse and cost the operands. */
5270 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5271 + rtx_cost (op1
, MULT
, 1, speed
);
5277 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
5280 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
5289 /* Floating-point FMA/FMUL can also support negations of the
5291 if (GET_CODE (op0
) == NEG
)
5292 op0
= XEXP (op0
, 0);
5293 if (GET_CODE (op1
) == NEG
)
5294 op1
= XEXP (op1
, 0);
5297 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5298 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5301 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
5304 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5305 + rtx_cost (op1
, MULT
, 1, speed
);
5311 aarch64_address_cost (rtx x
,
5313 addr_space_t as ATTRIBUTE_UNUSED
,
5316 enum rtx_code c
= GET_CODE (x
);
5317 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
->addr_cost
;
5318 struct aarch64_address_info info
;
5322 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
5324 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
5326 /* This is a CONST or SYMBOL ref which will be split
5327 in a different way depending on the code model in use.
5328 Cost it through the generic infrastructure. */
5329 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
5330 /* Divide through by the cost of one instruction to
5331 bring it to the same units as the address costs. */
5332 cost_symbol_ref
/= COSTS_N_INSNS (1);
5333 /* The cost is then the cost of preparing the address,
5334 followed by an immediate (possibly 0) offset. */
5335 return cost_symbol_ref
+ addr_cost
->imm_offset
;
5339 /* This is most likely a jump table from a case
5341 return addr_cost
->register_offset
;
5347 case ADDRESS_LO_SUM
:
5348 case ADDRESS_SYMBOLIC
:
5349 case ADDRESS_REG_IMM
:
5350 cost
+= addr_cost
->imm_offset
;
5353 case ADDRESS_REG_WB
:
5354 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
5355 cost
+= addr_cost
->pre_modify
;
5356 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
5357 cost
+= addr_cost
->post_modify
;
5363 case ADDRESS_REG_REG
:
5364 cost
+= addr_cost
->register_offset
;
5367 case ADDRESS_REG_UXTW
:
5368 case ADDRESS_REG_SXTW
:
5369 cost
+= addr_cost
->register_extend
;
5379 /* For the sake of calculating the cost of the shifted register
5380 component, we can treat same sized modes in the same way. */
5381 switch (GET_MODE_BITSIZE (mode
))
5384 cost
+= addr_cost
->addr_scale_costs
.hi
;
5388 cost
+= addr_cost
->addr_scale_costs
.si
;
5392 cost
+= addr_cost
->addr_scale_costs
.di
;
5395 /* We can't tell, or this is a 128-bit vector. */
5397 cost
+= addr_cost
->addr_scale_costs
.ti
;
5405 /* Return true if the RTX X in mode MODE is a zero or sign extract
5406 usable in an ADD or SUB (extended register) instruction. */
5408 aarch64_rtx_arith_op_extract_p (rtx x
, machine_mode mode
)
5410 /* Catch add with a sign extract.
5411 This is add_<optab><mode>_multp2. */
5412 if (GET_CODE (x
) == SIGN_EXTRACT
5413 || GET_CODE (x
) == ZERO_EXTRACT
)
5415 rtx op0
= XEXP (x
, 0);
5416 rtx op1
= XEXP (x
, 1);
5417 rtx op2
= XEXP (x
, 2);
5419 if (GET_CODE (op0
) == MULT
5420 && CONST_INT_P (op1
)
5421 && op2
== const0_rtx
5422 && CONST_INT_P (XEXP (op0
, 1))
5423 && aarch64_is_extend_from_extract (mode
,
5435 aarch64_frint_unspec_p (unsigned int u
)
5453 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5454 storing it in *COST. Result is true if the total cost of the operation
5455 has now been calculated. */
5457 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
5461 enum rtx_code cmpcode
;
5463 if (COMPARISON_P (op0
))
5465 inner
= XEXP (op0
, 0);
5466 comparator
= XEXP (op0
, 1);
5467 cmpcode
= GET_CODE (op0
);
5472 comparator
= const0_rtx
;
5476 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
5478 /* Conditional branch. */
5479 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5483 if (cmpcode
== NE
|| cmpcode
== EQ
)
5485 if (comparator
== const0_rtx
)
5487 /* TBZ/TBNZ/CBZ/CBNZ. */
5488 if (GET_CODE (inner
) == ZERO_EXTRACT
)
5490 *cost
+= rtx_cost (XEXP (inner
, 0), ZERO_EXTRACT
,
5494 *cost
+= rtx_cost (inner
, cmpcode
, 0, speed
);
5499 else if (cmpcode
== LT
|| cmpcode
== GE
)
5502 if (comparator
== const0_rtx
)
5507 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5509 /* It's a conditional operation based on the status flags,
5510 so it must be some flavor of CSEL. */
5512 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5513 if (GET_CODE (op1
) == NEG
5514 || GET_CODE (op1
) == NOT
5515 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
5516 op1
= XEXP (op1
, 0);
5518 *cost
+= rtx_cost (op1
, IF_THEN_ELSE
, 1, speed
);
5519 *cost
+= rtx_cost (op2
, IF_THEN_ELSE
, 2, speed
);
5523 /* We don't know what this is, cost all operands. */
5527 /* Calculate the cost of calculating X, storing it in *COST. Result
5528 is true if the total cost of the operation has now been calculated. */
5530 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
5531 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
5534 const struct cpu_cost_table
*extra_cost
5535 = aarch64_tune_params
->insn_extra_cost
;
5536 machine_mode mode
= GET_MODE (x
);
5538 /* By default, assume that everything has equivalent cost to the
5539 cheapest instruction. Any additional costs are applied as a delta
5540 above this default. */
5541 *cost
= COSTS_N_INSNS (1);
5543 /* TODO: The cost infrastructure currently does not handle
5544 vector operations. Assume that all vector operations
5545 are equally expensive. */
5546 if (VECTOR_MODE_P (mode
))
5549 *cost
+= extra_cost
->vect
.alu
;
5556 /* The cost depends entirely on the operands to SET. */
5561 switch (GET_CODE (op0
))
5566 rtx address
= XEXP (op0
, 0);
5567 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5568 *cost
+= extra_cost
->ldst
.store
;
5569 else if (mode
== SFmode
)
5570 *cost
+= extra_cost
->ldst
.storef
;
5571 else if (mode
== DFmode
)
5572 *cost
+= extra_cost
->ldst
.stored
;
5575 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5579 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5583 if (! REG_P (SUBREG_REG (op0
)))
5584 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
5588 /* const0_rtx is in general free, but we will use an
5589 instruction to set a register to 0. */
5590 if (REG_P (op1
) || op1
== const0_rtx
)
5592 /* The cost is 1 per register copied. */
5593 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5595 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5598 /* Cost is just the cost of the RHS of the set. */
5599 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5604 /* Bit-field insertion. Strip any redundant widening of
5605 the RHS to meet the width of the target. */
5606 if (GET_CODE (op1
) == SUBREG
)
5607 op1
= SUBREG_REG (op1
);
5608 if ((GET_CODE (op1
) == ZERO_EXTEND
5609 || GET_CODE (op1
) == SIGN_EXTEND
)
5610 && CONST_INT_P (XEXP (op0
, 1))
5611 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5612 >= INTVAL (XEXP (op0
, 1))))
5613 op1
= XEXP (op1
, 0);
5615 if (CONST_INT_P (op1
))
5617 /* MOV immediate is assumed to always be cheap. */
5618 *cost
= COSTS_N_INSNS (1);
5624 *cost
+= extra_cost
->alu
.bfi
;
5625 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5631 /* We can't make sense of this, assume default cost. */
5632 *cost
= COSTS_N_INSNS (1);
5638 /* If an instruction can incorporate a constant within the
5639 instruction, the instruction's expression avoids calling
5640 rtx_cost() on the constant. If rtx_cost() is called on a
5641 constant, then it is usually because the constant must be
5642 moved into a register by one or more instructions.
5644 The exception is constant 0, which can be expressed
5645 as XZR/WZR and is therefore free. The exception to this is
5646 if we have (set (reg) (const0_rtx)) in which case we must cost
5647 the move. However, we can catch that when we cost the SET, so
5648 we don't need to consider that here. */
5649 if (x
== const0_rtx
)
5653 /* To an approximation, building any other constant is
5654 proportionally expensive to the number of instructions
5655 required to build that constant. This is true whether we
5656 are compiling for SPEED or otherwise. */
5657 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
5658 (NULL_RTX
, x
, false, mode
));
5665 /* mov[df,sf]_aarch64. */
5666 if (aarch64_float_const_representable_p (x
))
5667 /* FMOV (scalar immediate). */
5668 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5669 else if (!aarch64_float_const_zero_rtx_p (x
))
5671 /* This will be a load from memory. */
5673 *cost
+= extra_cost
->ldst
.loadd
;
5675 *cost
+= extra_cost
->ldst
.loadf
;
5678 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5679 or MOV v0.s[0], wzr - neither of which are modeled by the
5680 cost tables. Just use the default cost. */
5690 /* For loads we want the base cost of a load, plus an
5691 approximation for the additional cost of the addressing
5693 rtx address
= XEXP (x
, 0);
5694 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5695 *cost
+= extra_cost
->ldst
.load
;
5696 else if (mode
== SFmode
)
5697 *cost
+= extra_cost
->ldst
.loadf
;
5698 else if (mode
== DFmode
)
5699 *cost
+= extra_cost
->ldst
.loadd
;
5702 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5711 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5713 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5714 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5717 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5721 /* Cost this as SUB wzr, X. */
5722 op0
= CONST0_RTX (GET_MODE (x
));
5727 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5729 /* Support (neg(fma...)) as a single instruction only if
5730 sign of zeros is unimportant. This matches the decision
5731 making in aarch64.md. */
5732 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5735 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5740 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5749 *cost
+= extra_cost
->alu
.clz
;
5757 if (op1
== const0_rtx
5758 && GET_CODE (op0
) == AND
)
5764 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
5766 /* TODO: A write to the CC flags possibly costs extra, this
5767 needs encoding in the cost tables. */
5769 /* CC_ZESWPmode supports zero extend for free. */
5770 if (GET_MODE (x
) == CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
5771 op0
= XEXP (op0
, 0);
5774 if (GET_CODE (op0
) == AND
)
5780 if (GET_CODE (op0
) == PLUS
)
5782 /* ADDS (and CMN alias). */
5787 if (GET_CODE (op0
) == MINUS
)
5794 if (GET_CODE (op1
) == NEG
)
5798 *cost
+= extra_cost
->alu
.arith
;
5800 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
5801 *cost
+= rtx_cost (XEXP (op1
, 0), NEG
, 1, speed
);
5807 Compare can freely swap the order of operands, and
5808 canonicalization puts the more complex operation first.
5809 But the integer MINUS logic expects the shift/extend
5810 operation in op1. */
5812 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
5820 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
5824 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
5826 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
5828 /* FCMP supports constant 0.0 for no extra cost. */
5842 /* Detect valid immediates. */
5843 if ((GET_MODE_CLASS (mode
) == MODE_INT
5844 || (GET_MODE_CLASS (mode
) == MODE_CC
5845 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
5846 && CONST_INT_P (op1
)
5847 && aarch64_uimm12_shift (INTVAL (op1
)))
5849 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5852 /* SUB(S) (immediate). */
5853 *cost
+= extra_cost
->alu
.arith
;
5858 /* Look for SUB (extended register). */
5859 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
5862 *cost
+= extra_cost
->alu
.arith_shift
;
5864 *cost
+= rtx_cost (XEXP (XEXP (op1
, 0), 0),
5865 (enum rtx_code
) GET_CODE (op1
),
5870 rtx new_op1
= aarch64_strip_extend (op1
);
5872 /* Cost this as an FMA-alike operation. */
5873 if ((GET_CODE (new_op1
) == MULT
5874 || GET_CODE (new_op1
) == ASHIFT
)
5877 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
5878 (enum rtx_code
) code
,
5880 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
5884 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
5888 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5890 *cost
+= extra_cost
->alu
.arith
;
5891 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5893 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5906 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5907 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5910 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
5911 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5915 if (GET_MODE_CLASS (mode
) == MODE_INT
5916 && CONST_INT_P (op1
)
5917 && aarch64_uimm12_shift (INTVAL (op1
)))
5919 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
5922 /* ADD (immediate). */
5923 *cost
+= extra_cost
->alu
.arith
;
5927 /* Look for ADD (extended register). */
5928 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
5931 *cost
+= extra_cost
->alu
.arith_shift
;
5933 *cost
+= rtx_cost (XEXP (XEXP (op0
, 0), 0),
5934 (enum rtx_code
) GET_CODE (op0
),
5939 /* Strip any extend, leave shifts behind as we will
5940 cost them through mult_cost. */
5941 new_op0
= aarch64_strip_extend (op0
);
5943 if (GET_CODE (new_op0
) == MULT
5944 || GET_CODE (new_op0
) == ASHIFT
)
5946 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
5948 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
5952 *cost
+= (rtx_cost (new_op0
, PLUS
, 0, speed
)
5953 + rtx_cost (op1
, PLUS
, 1, speed
));
5957 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5959 *cost
+= extra_cost
->alu
.arith
;
5960 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5962 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
5968 *cost
= COSTS_N_INSNS (1);
5971 *cost
+= extra_cost
->alu
.rev
;
5976 if (aarch_rev16_p (x
))
5978 *cost
= COSTS_N_INSNS (1);
5981 *cost
+= extra_cost
->alu
.rev
;
5993 && GET_CODE (op0
) == MULT
5994 && CONST_INT_P (XEXP (op0
, 1))
5995 && CONST_INT_P (op1
)
5996 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
5999 /* This is a UBFM/SBFM. */
6000 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
6002 *cost
+= extra_cost
->alu
.bfx
;
6006 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6008 /* We possibly get the immediate for free, this is not
6010 if (CONST_INT_P (op1
)
6011 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
6013 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6016 *cost
+= extra_cost
->alu
.logical
;
6024 /* Handle ORN, EON, or BIC. */
6025 if (GET_CODE (op0
) == NOT
)
6026 op0
= XEXP (op0
, 0);
6028 new_op0
= aarch64_strip_shift (op0
);
6030 /* If we had a shift on op0 then this is a logical-shift-
6031 by-register/immediate operation. Otherwise, this is just
6032 a logical operation. */
6037 /* Shift by immediate. */
6038 if (CONST_INT_P (XEXP (op0
, 1)))
6039 *cost
+= extra_cost
->alu
.log_shift
;
6041 *cost
+= extra_cost
->alu
.log_shift_reg
;
6044 *cost
+= extra_cost
->alu
.logical
;
6047 /* In both cases we want to cost both operands. */
6048 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
6049 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
6059 *cost
+= extra_cost
->alu
.logical
;
6061 /* The logical instruction could have the shifted register form,
6062 but the cost is the same if the shift is processed as a separate
6063 instruction, so we don't bother with it here. */
6069 /* If a value is written in SI mode, then zero extended to DI
6070 mode, the operation will in general be free as a write to
6071 a 'w' register implicitly zeroes the upper bits of an 'x'
6072 register. However, if this is
6074 (set (reg) (zero_extend (reg)))
6076 we must cost the explicit register move. */
6078 && GET_MODE (op0
) == SImode
6081 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
6083 if (!op_cost
&& speed
)
6085 *cost
+= extra_cost
->alu
.extend
;
6087 /* Free, the cost is that of the SI mode operation. */
6092 else if (MEM_P (XEXP (x
, 0)))
6094 /* All loads can zero extend to any size for free. */
6095 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
6101 *cost
+= extra_cost
->alu
.extend
;
6106 if (MEM_P (XEXP (x
, 0)))
6111 rtx address
= XEXP (XEXP (x
, 0), 0);
6112 *cost
+= extra_cost
->ldst
.load_sign_extend
;
6115 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6122 *cost
+= extra_cost
->alu
.extend
;
6129 if (CONST_INT_P (op1
))
6131 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6134 *cost
+= extra_cost
->alu
.shift
;
6136 /* We can incorporate zero/sign extend for free. */
6137 if (GET_CODE (op0
) == ZERO_EXTEND
6138 || GET_CODE (op0
) == SIGN_EXTEND
)
6139 op0
= XEXP (op0
, 0);
6141 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
6148 *cost
+= extra_cost
->alu
.shift_reg
;
6150 return false; /* All arguments need to be in registers. */
6160 if (CONST_INT_P (op1
))
6162 /* ASR (immediate) and friends. */
6164 *cost
+= extra_cost
->alu
.shift
;
6166 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6172 /* ASR (register) and friends. */
6174 *cost
+= extra_cost
->alu
.shift_reg
;
6176 return false; /* All arguments need to be in registers. */
6181 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
6185 *cost
+= extra_cost
->ldst
.load
;
6187 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
6188 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
6190 /* ADRP, followed by ADD. */
6191 *cost
+= COSTS_N_INSNS (1);
6193 *cost
+= 2 * extra_cost
->alu
.arith
;
6195 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
6196 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
6200 *cost
+= extra_cost
->alu
.arith
;
6205 /* One extra load instruction, after accessing the GOT. */
6206 *cost
+= COSTS_N_INSNS (1);
6208 *cost
+= extra_cost
->ldst
.load
;
6214 /* ADRP/ADD (immediate). */
6216 *cost
+= extra_cost
->alu
.arith
;
6223 *cost
+= extra_cost
->alu
.bfx
;
6225 /* We can trust that the immediates used will be correct (there
6226 are no by-register forms), so we need only cost op0. */
6227 *cost
+= rtx_cost (XEXP (x
, 0), (enum rtx_code
) code
, 0, speed
);
6231 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
6232 /* aarch64_rtx_mult_cost always handles recursion to its
6240 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6241 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
6242 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
6243 else if (GET_MODE (x
) == DFmode
)
6244 *cost
+= (extra_cost
->fp
[1].mult
6245 + extra_cost
->fp
[1].div
);
6246 else if (GET_MODE (x
) == SFmode
)
6247 *cost
+= (extra_cost
->fp
[0].mult
6248 + extra_cost
->fp
[0].div
);
6250 return false; /* All arguments need to be in registers. */
6257 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6258 /* There is no integer SQRT, so only DIV and UDIV can get
6260 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
6262 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
6264 return false; /* All arguments need to be in registers. */
6267 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
6268 XEXP (x
, 2), cost
, speed
);
6281 return false; /* All arguments must be in registers. */
6289 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6291 /* FMSUB, FNMADD, and FNMSUB are free. */
6292 if (GET_CODE (op0
) == NEG
)
6293 op0
= XEXP (op0
, 0);
6295 if (GET_CODE (op2
) == NEG
)
6296 op2
= XEXP (op2
, 0);
6298 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6299 and the by-element operand as operand 0. */
6300 if (GET_CODE (op1
) == NEG
)
6301 op1
= XEXP (op1
, 0);
6303 /* Catch vector-by-element operations. The by-element operand can
6304 either be (vec_duplicate (vec_select (x))) or just
6305 (vec_select (x)), depending on whether we are multiplying by
6306 a vector or a scalar.
6308 Canonicalization is not very good in these cases, FMA4 will put the
6309 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6310 if (GET_CODE (op0
) == VEC_DUPLICATE
)
6311 op0
= XEXP (op0
, 0);
6312 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
6313 op1
= XEXP (op1
, 0);
6315 if (GET_CODE (op0
) == VEC_SELECT
)
6316 op0
= XEXP (op0
, 0);
6317 else if (GET_CODE (op1
) == VEC_SELECT
)
6318 op1
= XEXP (op1
, 0);
6320 /* If the remaining parameters are not registers,
6321 get the cost to put them into registers. */
6322 *cost
+= rtx_cost (op0
, FMA
, 0, speed
);
6323 *cost
+= rtx_cost (op1
, FMA
, 1, speed
);
6324 *cost
+= rtx_cost (op2
, FMA
, 2, speed
);
6329 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
6332 case FLOAT_TRUNCATE
:
6334 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
6340 /* Strip the rounding part. They will all be implemented
6341 by the fcvt* family of instructions anyway. */
6342 if (GET_CODE (x
) == UNSPEC
)
6344 unsigned int uns_code
= XINT (x
, 1);
6346 if (uns_code
== UNSPEC_FRINTA
6347 || uns_code
== UNSPEC_FRINTM
6348 || uns_code
== UNSPEC_FRINTN
6349 || uns_code
== UNSPEC_FRINTP
6350 || uns_code
== UNSPEC_FRINTZ
)
6351 x
= XVECEXP (x
, 0, 0);
6355 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
6357 *cost
+= rtx_cost (x
, (enum rtx_code
) code
, 0, speed
);
6361 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6363 /* FABS and FNEG are analogous. */
6365 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6369 /* Integer ABS will either be split to
6370 two arithmetic instructions, or will be an ABS
6371 (scalar), which we don't model. */
6372 *cost
= COSTS_N_INSNS (2);
6374 *cost
+= 2 * extra_cost
->alu
.arith
;
6382 /* FMAXNM/FMINNM/FMAX/FMIN.
6383 TODO: This may not be accurate for all implementations, but
6384 we do not model this in the cost tables. */
6385 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6390 /* The floating point round to integer frint* instructions. */
6391 if (aarch64_frint_unspec_p (XINT (x
, 1)))
6394 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
6399 if (XINT (x
, 1) == UNSPEC_RBIT
)
6402 *cost
+= extra_cost
->alu
.rev
;
6410 /* Decompose <su>muldi3_highpart. */
6411 if (/* (truncate:DI */
6414 && GET_MODE (XEXP (x
, 0)) == TImode
6415 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6417 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6418 /* (ANY_EXTEND:TI (reg:DI))
6419 (ANY_EXTEND:TI (reg:DI))) */
6420 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6421 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
6422 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
6423 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
6424 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
6425 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
6426 /* (const_int 64) */
6427 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
6428 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
6432 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
6433 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
6435 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
6445 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6447 "\nFailed to cost RTX. Assuming default cost.\n");
6452 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6453 calculated for X. This cost is stored in *COST. Returns true
6454 if the total cost of X was calculated. */
6456 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
6457 int param
, int *cost
, bool speed
)
6459 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
6461 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6463 print_rtl_single (dump_file
, x
);
6464 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
6465 speed
? "Hot" : "Cold",
6466 *cost
, result
? "final" : "partial");
6473 aarch64_register_move_cost (machine_mode mode
,
6474 reg_class_t from_i
, reg_class_t to_i
)
6476 enum reg_class from
= (enum reg_class
) from_i
;
6477 enum reg_class to
= (enum reg_class
) to_i
;
6478 const struct cpu_regmove_cost
*regmove_cost
6479 = aarch64_tune_params
->regmove_cost
;
6481 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6482 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
6485 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
6486 from
= GENERAL_REGS
;
6488 /* Moving between GPR and stack cost is the same as GP2GP. */
6489 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
6490 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
6491 return regmove_cost
->GP2GP
;
6493 /* To/From the stack register, we move via the gprs. */
6494 if (to
== STACK_REG
|| from
== STACK_REG
)
6495 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
6496 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
6498 if (GET_MODE_SIZE (mode
) == 16)
6500 /* 128-bit operations on general registers require 2 instructions. */
6501 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6502 return regmove_cost
->GP2GP
* 2;
6503 else if (from
== GENERAL_REGS
)
6504 return regmove_cost
->GP2FP
* 2;
6505 else if (to
== GENERAL_REGS
)
6506 return regmove_cost
->FP2GP
* 2;
6508 /* When AdvSIMD instructions are disabled it is not possible to move
6509 a 128-bit value directly between Q registers. This is handled in
6510 secondary reload. A general register is used as a scratch to move
6511 the upper DI value and the lower DI value is moved directly,
6512 hence the cost is the sum of three moves. */
6514 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
6516 return regmove_cost
->FP2FP
;
6519 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6520 return regmove_cost
->GP2GP
;
6521 else if (from
== GENERAL_REGS
)
6522 return regmove_cost
->GP2FP
;
6523 else if (to
== GENERAL_REGS
)
6524 return regmove_cost
->FP2GP
;
6526 return regmove_cost
->FP2FP
;
6530 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
6531 reg_class_t rclass ATTRIBUTE_UNUSED
,
6532 bool in ATTRIBUTE_UNUSED
)
6534 return aarch64_tune_params
->memmov_cost
;
6537 /* Return the number of instructions that can be issued per cycle. */
6539 aarch64_sched_issue_rate (void)
6541 return aarch64_tune_params
->issue_rate
;
6545 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6547 int issue_rate
= aarch64_sched_issue_rate ();
6549 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
6552 /* Vectorizer cost model target hooks. */
6554 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6556 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6558 int misalign ATTRIBUTE_UNUSED
)
6562 switch (type_of_cost
)
6565 return aarch64_tune_params
->vec_costs
->scalar_stmt_cost
;
6568 return aarch64_tune_params
->vec_costs
->scalar_load_cost
;
6571 return aarch64_tune_params
->vec_costs
->scalar_store_cost
;
6574 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6577 return aarch64_tune_params
->vec_costs
->vec_align_load_cost
;
6580 return aarch64_tune_params
->vec_costs
->vec_store_cost
;
6583 return aarch64_tune_params
->vec_costs
->vec_to_scalar_cost
;
6586 return aarch64_tune_params
->vec_costs
->scalar_to_vec_cost
;
6588 case unaligned_load
:
6589 return aarch64_tune_params
->vec_costs
->vec_unalign_load_cost
;
6591 case unaligned_store
:
6592 return aarch64_tune_params
->vec_costs
->vec_unalign_store_cost
;
6594 case cond_branch_taken
:
6595 return aarch64_tune_params
->vec_costs
->cond_taken_branch_cost
;
6597 case cond_branch_not_taken
:
6598 return aarch64_tune_params
->vec_costs
->cond_not_taken_branch_cost
;
6601 case vec_promote_demote
:
6602 return aarch64_tune_params
->vec_costs
->vec_stmt_cost
;
6605 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6606 return elements
/ 2 + 1;
6613 /* Implement targetm.vectorize.add_stmt_cost. */
6615 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6616 struct _stmt_vec_info
*stmt_info
, int misalign
,
6617 enum vect_cost_model_location where
)
6619 unsigned *cost
= (unsigned *) data
;
6620 unsigned retval
= 0;
6622 if (flag_vect_cost_model
)
6624 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6626 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
6628 /* Statements in an inner loop relative to the loop being
6629 vectorized are weighted more heavily. The value here is
6630 a function (linear for now) of the loop nest level. */
6631 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6633 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6634 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
6635 unsigned nest_level
= loop_depth (loop
);
6637 count
*= nest_level
;
6640 retval
= (unsigned) (count
* stmt_cost
);
6641 cost
[where
] += retval
;
6647 static void initialize_aarch64_code_model (void);
6649 /* Parse the architecture extension string. */
6652 aarch64_parse_extension (char *str
)
6654 /* The extension string is parsed left to right. */
6655 const struct aarch64_option_extension
*opt
= NULL
;
6657 /* Flag to say whether we are adding or removing an extension. */
6658 int adding_ext
= -1;
6660 while (str
!= NULL
&& *str
!= 0)
6666 ext
= strchr (str
, '+');
6673 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
6684 error ("missing feature modifier after %qs", adding_ext
? "+"
6689 /* Scan over the extensions table trying to find an exact match. */
6690 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
6692 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
6694 /* Add or remove the extension. */
6696 aarch64_isa_flags
|= opt
->flags_on
;
6698 aarch64_isa_flags
&= ~(opt
->flags_off
);
6703 if (opt
->name
== NULL
)
6705 /* Extension not found in list. */
6706 error ("unknown feature modifier %qs", str
);
6716 /* Parse the ARCH string. */
6719 aarch64_parse_arch (void)
6722 const struct processor
*arch
;
6723 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
6726 strcpy (str
, aarch64_arch_string
);
6728 ext
= strchr (str
, '+');
6737 error ("missing arch name in -march=%qs", str
);
6741 /* Loop through the list of supported ARCHs to find a match. */
6742 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
6744 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
6746 selected_arch
= arch
;
6747 aarch64_isa_flags
= selected_arch
->flags
;
6750 selected_cpu
= &all_cores
[selected_arch
->core
];
6754 /* ARCH string contains at least one extension. */
6755 aarch64_parse_extension (ext
);
6758 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
6760 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6761 selected_cpu
->name
, selected_arch
->name
);
6768 /* ARCH name not found in list. */
6769 error ("unknown value %qs for -march", str
);
6773 /* Parse the CPU string. */
6776 aarch64_parse_cpu (void)
6779 const struct processor
*cpu
;
6780 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
6783 strcpy (str
, aarch64_cpu_string
);
6785 ext
= strchr (str
, '+');
6794 error ("missing cpu name in -mcpu=%qs", str
);
6798 /* Loop through the list of supported CPUs to find a match. */
6799 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6801 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
6804 aarch64_isa_flags
= selected_cpu
->flags
;
6808 /* CPU string contains at least one extension. */
6809 aarch64_parse_extension (ext
);
6816 /* CPU name not found in list. */
6817 error ("unknown value %qs for -mcpu", str
);
6821 /* Parse the TUNE string. */
6824 aarch64_parse_tune (void)
6826 const struct processor
*cpu
;
6827 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
6828 strcpy (str
, aarch64_tune_string
);
6830 /* Loop through the list of supported CPUs to find a match. */
6831 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
6833 if (strcmp (cpu
->name
, str
) == 0)
6835 selected_tune
= cpu
;
6840 /* CPU name not found in list. */
6841 error ("unknown value %qs for -mtune", str
);
6846 /* Implement TARGET_OPTION_OVERRIDE. */
6849 aarch64_override_options (void)
6851 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6852 If either of -march or -mtune is given, they override their
6853 respective component of -mcpu.
6855 So, first parse AARCH64_CPU_STRING, then the others, be careful
6856 with -march as, if -mcpu is not present on the command line, march
6857 must set a sensible default CPU. */
6858 if (aarch64_cpu_string
)
6860 aarch64_parse_cpu ();
6863 if (aarch64_arch_string
)
6865 aarch64_parse_arch ();
6868 if (aarch64_tune_string
)
6870 aarch64_parse_tune ();
6873 #ifndef HAVE_AS_MABI_OPTION
6874 /* The compiler may have been configured with 2.23.* binutils, which does
6875 not have support for ILP32. */
6877 error ("Assembler does not support -mabi=ilp32");
6880 initialize_aarch64_code_model ();
6882 aarch64_build_bitmask_table ();
6884 /* This target defaults to strict volatile bitfields. */
6885 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
6886 flag_strict_volatile_bitfields
= 1;
6888 /* If the user did not specify a processor, choose the default
6889 one for them. This will be the CPU set during configuration using
6890 --with-cpu, otherwise it is "generic". */
6893 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
6894 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
6897 gcc_assert (selected_cpu
);
6900 selected_tune
= selected_cpu
;
6902 aarch64_tune_flags
= selected_tune
->flags
;
6903 aarch64_tune
= selected_tune
->core
;
6904 aarch64_tune_params
= selected_tune
->tune
;
6905 aarch64_architecture_version
= selected_cpu
->architecture_version
;
6907 if (aarch64_fix_a53_err835769
== 2)
6909 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
6910 aarch64_fix_a53_err835769
= 1;
6912 aarch64_fix_a53_err835769
= 0;
6916 /* If not opzimizing for size, set the default
6917 alignment to what the target wants */
6920 if (align_loops
<= 0)
6921 align_loops
= aarch64_tune_params
->loop_align
;
6922 if (align_jumps
<= 0)
6923 align_jumps
= aarch64_tune_params
->jump_align
;
6924 if (align_functions
<= 0)
6925 align_functions
= aarch64_tune_params
->function_align
;
6928 aarch64_override_options_after_change ();
6931 /* Implement targetm.override_options_after_change. */
6934 aarch64_override_options_after_change (void)
6936 if (flag_omit_frame_pointer
)
6937 flag_omit_leaf_frame_pointer
= false;
6938 else if (flag_omit_leaf_frame_pointer
)
6939 flag_omit_frame_pointer
= true;
6942 static struct machine_function
*
6943 aarch64_init_machine_status (void)
6945 struct machine_function
*machine
;
6946 machine
= ggc_cleared_alloc
<machine_function
> ();
6951 aarch64_init_expanders (void)
6953 init_machine_status
= aarch64_init_machine_status
;
6956 /* A checking mechanism for the implementation of the various code models. */
6958 initialize_aarch64_code_model (void)
6962 switch (aarch64_cmodel_var
)
6964 case AARCH64_CMODEL_TINY
:
6965 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
6967 case AARCH64_CMODEL_SMALL
:
6968 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
6970 case AARCH64_CMODEL_LARGE
:
6971 sorry ("code model %qs with -f%s", "large",
6972 flag_pic
> 1 ? "PIC" : "pic");
6978 aarch64_cmodel
= aarch64_cmodel_var
;
6981 /* Return true if SYMBOL_REF X binds locally. */
6984 aarch64_symbol_binds_local_p (const_rtx x
)
6986 return (SYMBOL_REF_DECL (x
)
6987 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
6988 : SYMBOL_REF_LOCAL_P (x
));
6991 /* Return true if SYMBOL_REF X is thread local */
6993 aarch64_tls_symbol_p (rtx x
)
6995 if (! TARGET_HAVE_TLS
)
6998 if (GET_CODE (x
) != SYMBOL_REF
)
7001 return SYMBOL_REF_TLS_MODEL (x
) != 0;
7004 /* Classify a TLS symbol into one of the TLS kinds. */
7005 enum aarch64_symbol_type
7006 aarch64_classify_tls_symbol (rtx x
)
7008 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
7012 case TLS_MODEL_GLOBAL_DYNAMIC
:
7013 case TLS_MODEL_LOCAL_DYNAMIC
:
7014 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
7016 case TLS_MODEL_INITIAL_EXEC
:
7017 return SYMBOL_SMALL_GOTTPREL
;
7019 case TLS_MODEL_LOCAL_EXEC
:
7020 return SYMBOL_SMALL_TPREL
;
7022 case TLS_MODEL_EMULATED
:
7023 case TLS_MODEL_NONE
:
7024 return SYMBOL_FORCE_TO_MEM
;
7031 /* Return the method that should be used to access SYMBOL_REF or
7032 LABEL_REF X in context CONTEXT. */
7034 enum aarch64_symbol_type
7035 aarch64_classify_symbol (rtx x
, rtx offset
,
7036 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
7038 if (GET_CODE (x
) == LABEL_REF
)
7040 switch (aarch64_cmodel
)
7042 case AARCH64_CMODEL_LARGE
:
7043 return SYMBOL_FORCE_TO_MEM
;
7045 case AARCH64_CMODEL_TINY_PIC
:
7046 case AARCH64_CMODEL_TINY
:
7047 return SYMBOL_TINY_ABSOLUTE
;
7049 case AARCH64_CMODEL_SMALL_PIC
:
7050 case AARCH64_CMODEL_SMALL
:
7051 return SYMBOL_SMALL_ABSOLUTE
;
7058 if (GET_CODE (x
) == SYMBOL_REF
)
7060 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
7061 return SYMBOL_FORCE_TO_MEM
;
7063 if (aarch64_tls_symbol_p (x
))
7064 return aarch64_classify_tls_symbol (x
);
7066 switch (aarch64_cmodel
)
7068 case AARCH64_CMODEL_TINY
:
7069 /* When we retreive symbol + offset address, we have to make sure
7070 the offset does not cause overflow of the final address. But
7071 we have no way of knowing the address of symbol at compile time
7072 so we can't accurately say if the distance between the PC and
7073 symbol + offset is outside the addressible range of +/-1M in the
7074 TINY code model. So we rely on images not being greater than
7075 1M and cap the offset at 1M and anything beyond 1M will have to
7076 be loaded using an alternative mechanism. */
7077 if (SYMBOL_REF_WEAK (x
)
7078 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
7079 return SYMBOL_FORCE_TO_MEM
;
7080 return SYMBOL_TINY_ABSOLUTE
;
7082 case AARCH64_CMODEL_SMALL
:
7083 /* Same reasoning as the tiny code model, but the offset cap here is
7085 if (SYMBOL_REF_WEAK (x
)
7086 || INTVAL (offset
) < (HOST_WIDE_INT
) -4294967263
7087 || INTVAL (offset
) > (HOST_WIDE_INT
) 4294967264)
7088 return SYMBOL_FORCE_TO_MEM
;
7089 return SYMBOL_SMALL_ABSOLUTE
;
7091 case AARCH64_CMODEL_TINY_PIC
:
7092 if (!aarch64_symbol_binds_local_p (x
))
7093 return SYMBOL_TINY_GOT
;
7094 return SYMBOL_TINY_ABSOLUTE
;
7096 case AARCH64_CMODEL_SMALL_PIC
:
7097 if (!aarch64_symbol_binds_local_p (x
))
7098 return SYMBOL_SMALL_GOT
;
7099 return SYMBOL_SMALL_ABSOLUTE
;
7106 /* By default push everything into the constant pool. */
7107 return SYMBOL_FORCE_TO_MEM
;
7111 aarch64_constant_address_p (rtx x
)
7113 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
7117 aarch64_legitimate_pic_operand_p (rtx x
)
7119 if (GET_CODE (x
) == SYMBOL_REF
7120 || (GET_CODE (x
) == CONST
7121 && GET_CODE (XEXP (x
, 0)) == PLUS
7122 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7128 /* Return true if X holds either a quarter-precision or
7129 floating-point +0.0 constant. */
7131 aarch64_valid_floating_const (machine_mode mode
, rtx x
)
7133 if (!CONST_DOUBLE_P (x
))
7136 /* TODO: We could handle moving 0.0 to a TFmode register,
7137 but first we would like to refactor the movtf_aarch64
7138 to be more amicable to split moves properly and
7139 correctly gate on TARGET_SIMD. For now - reject all
7140 constants which are not to SFmode or DFmode registers. */
7141 if (!(mode
== SFmode
|| mode
== DFmode
))
7144 if (aarch64_float_const_zero_rtx_p (x
))
7146 return aarch64_float_const_representable_p (x
);
7150 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
7152 /* Do not allow vector struct mode constants. We could support
7153 0 and -1 easily, but they need support in aarch64-simd.md. */
7154 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
7157 /* This could probably go away because
7158 we now decompose CONST_INTs according to expand_mov_immediate. */
7159 if ((GET_CODE (x
) == CONST_VECTOR
7160 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
7161 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
7162 return !targetm
.cannot_force_const_mem (mode
, x
);
7164 if (GET_CODE (x
) == HIGH
7165 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7168 return aarch64_constant_address_p (x
);
7172 aarch64_load_tp (rtx target
)
7175 || GET_MODE (target
) != Pmode
7176 || !register_operand (target
, Pmode
))
7177 target
= gen_reg_rtx (Pmode
);
7179 /* Can return in any reg. */
7180 emit_insn (gen_aarch64_load_tp_hard (target
));
7184 /* On AAPCS systems, this is the "struct __va_list". */
7185 static GTY(()) tree va_list_type
;
7187 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7188 Return the type to use as __builtin_va_list.
7190 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7202 aarch64_build_builtin_va_list (void)
7205 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7207 /* Create the type. */
7208 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7209 /* Give it the required name. */
7210 va_list_name
= build_decl (BUILTINS_LOCATION
,
7212 get_identifier ("__va_list"),
7214 DECL_ARTIFICIAL (va_list_name
) = 1;
7215 TYPE_NAME (va_list_type
) = va_list_name
;
7216 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
7218 /* Create the fields. */
7219 f_stack
= build_decl (BUILTINS_LOCATION
,
7220 FIELD_DECL
, get_identifier ("__stack"),
7222 f_grtop
= build_decl (BUILTINS_LOCATION
,
7223 FIELD_DECL
, get_identifier ("__gr_top"),
7225 f_vrtop
= build_decl (BUILTINS_LOCATION
,
7226 FIELD_DECL
, get_identifier ("__vr_top"),
7228 f_groff
= build_decl (BUILTINS_LOCATION
,
7229 FIELD_DECL
, get_identifier ("__gr_offs"),
7231 f_vroff
= build_decl (BUILTINS_LOCATION
,
7232 FIELD_DECL
, get_identifier ("__vr_offs"),
7235 DECL_ARTIFICIAL (f_stack
) = 1;
7236 DECL_ARTIFICIAL (f_grtop
) = 1;
7237 DECL_ARTIFICIAL (f_vrtop
) = 1;
7238 DECL_ARTIFICIAL (f_groff
) = 1;
7239 DECL_ARTIFICIAL (f_vroff
) = 1;
7241 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
7242 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
7243 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
7244 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
7245 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
7247 TYPE_FIELDS (va_list_type
) = f_stack
;
7248 DECL_CHAIN (f_stack
) = f_grtop
;
7249 DECL_CHAIN (f_grtop
) = f_vrtop
;
7250 DECL_CHAIN (f_vrtop
) = f_groff
;
7251 DECL_CHAIN (f_groff
) = f_vroff
;
7253 /* Compute its layout. */
7254 layout_type (va_list_type
);
7256 return va_list_type
;
7259 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7261 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
7263 const CUMULATIVE_ARGS
*cum
;
7264 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7265 tree stack
, grtop
, vrtop
, groff
, vroff
;
7267 int gr_save_area_size
;
7268 int vr_save_area_size
;
7271 cum
= &crtl
->args
.info
;
7273 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
7275 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
7277 if (TARGET_GENERAL_REGS_ONLY
)
7279 if (cum
->aapcs_nvrn
> 0)
7280 sorry ("%qs and floating point or vector arguments",
7281 "-mgeneral-regs-only");
7282 vr_save_area_size
= 0;
7285 f_stack
= TYPE_FIELDS (va_list_type_node
);
7286 f_grtop
= DECL_CHAIN (f_stack
);
7287 f_vrtop
= DECL_CHAIN (f_grtop
);
7288 f_groff
= DECL_CHAIN (f_vrtop
);
7289 f_vroff
= DECL_CHAIN (f_groff
);
7291 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
7293 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
7295 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
7297 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
7299 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
7302 /* Emit code to initialize STACK, which points to the next varargs stack
7303 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7304 by named arguments. STACK is 8-byte aligned. */
7305 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
7306 if (cum
->aapcs_stack_size
> 0)
7307 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
7308 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
7309 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7311 /* Emit code to initialize GRTOP, the top of the GR save area.
7312 virtual_incoming_args_rtx should have been 16 byte aligned. */
7313 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
7314 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
7315 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7317 /* Emit code to initialize VRTOP, the top of the VR save area.
7318 This address is gr_save_area_bytes below GRTOP, rounded
7319 down to the next 16-byte boundary. */
7320 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
7321 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
7322 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7325 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
7326 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
7327 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7329 /* Emit code to initialize GROFF, the offset from GRTOP of the
7330 next GPR argument. */
7331 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
7332 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
7333 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7335 /* Likewise emit code to initialize VROFF, the offset from FTOP
7336 of the next VR argument. */
7337 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
7338 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
7339 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7342 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7345 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
7346 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
7350 bool is_ha
; /* is HFA or HVA. */
7351 bool dw_align
; /* double-word align. */
7352 machine_mode ag_mode
= VOIDmode
;
7356 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7357 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
7358 HOST_WIDE_INT size
, rsize
, adjust
, align
;
7359 tree t
, u
, cond1
, cond2
;
7361 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7363 type
= build_pointer_type (type
);
7365 mode
= TYPE_MODE (type
);
7367 f_stack
= TYPE_FIELDS (va_list_type_node
);
7368 f_grtop
= DECL_CHAIN (f_stack
);
7369 f_vrtop
= DECL_CHAIN (f_grtop
);
7370 f_groff
= DECL_CHAIN (f_vrtop
);
7371 f_vroff
= DECL_CHAIN (f_groff
);
7373 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
7374 f_stack
, NULL_TREE
);
7375 size
= int_size_in_bytes (type
);
7376 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
7380 if (aarch64_vfp_is_call_or_return_candidate (mode
,
7386 /* TYPE passed in fp/simd registers. */
7387 if (TARGET_GENERAL_REGS_ONLY
)
7388 sorry ("%qs and floating point or vector arguments",
7389 "-mgeneral-regs-only");
7391 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
7392 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
7393 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
7394 unshare_expr (valist
), f_vroff
, NULL_TREE
);
7396 rsize
= nregs
* UNITS_PER_VREG
;
7400 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
7401 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
7403 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7404 && size
< UNITS_PER_VREG
)
7406 adjust
= UNITS_PER_VREG
- size
;
7411 /* TYPE passed in general registers. */
7412 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
7413 unshare_expr (valist
), f_grtop
, NULL_TREE
);
7414 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
7415 unshare_expr (valist
), f_groff
, NULL_TREE
);
7416 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
7417 nregs
= rsize
/ UNITS_PER_WORD
;
7422 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7423 && size
< UNITS_PER_WORD
)
7425 adjust
= UNITS_PER_WORD
- size
;
7429 /* Get a local temporary for the field value. */
7430 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
7432 /* Emit code to branch if off >= 0. */
7433 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
7434 build_int_cst (TREE_TYPE (off
), 0));
7435 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
7439 /* Emit: offs = (offs + 15) & -16. */
7440 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7441 build_int_cst (TREE_TYPE (off
), 15));
7442 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
7443 build_int_cst (TREE_TYPE (off
), -16));
7444 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
7449 /* Update ap.__[g|v]r_offs */
7450 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
7451 build_int_cst (TREE_TYPE (off
), rsize
));
7452 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
7456 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7458 /* [cond2] if (ap.__[g|v]r_offs > 0) */
7459 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
7460 build_int_cst (TREE_TYPE (f_off
), 0));
7461 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
7463 /* String up: make sure the assignment happens before the use. */
7464 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
7465 COND_EXPR_ELSE (cond1
) = t
;
7467 /* Prepare the trees handling the argument that is passed on the stack;
7468 the top level node will store in ON_STACK. */
7469 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
7472 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
7473 t
= fold_convert (intDI_type_node
, arg
);
7474 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7475 build_int_cst (TREE_TYPE (t
), 15));
7476 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7477 build_int_cst (TREE_TYPE (t
), -16));
7478 t
= fold_convert (TREE_TYPE (arg
), t
);
7479 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
7483 /* Advance ap.__stack */
7484 t
= fold_convert (intDI_type_node
, arg
);
7485 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
7486 build_int_cst (TREE_TYPE (t
), size
+ 7));
7487 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
7488 build_int_cst (TREE_TYPE (t
), -8));
7489 t
= fold_convert (TREE_TYPE (arg
), t
);
7490 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
7491 /* String up roundup and advance. */
7493 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
7494 /* String up with arg */
7495 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
7496 /* Big-endianness related address adjustment. */
7497 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
7498 && size
< UNITS_PER_WORD
)
7500 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
7501 size_int (UNITS_PER_WORD
- size
));
7502 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
7505 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
7506 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
7508 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
7511 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
7512 build_int_cst (TREE_TYPE (off
), adjust
));
7514 t
= fold_convert (sizetype
, t
);
7515 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
7519 /* type ha; // treat as "struct {ftype field[n];}"
7520 ... [computing offs]
7521 for (i = 0; i <nregs; ++i, offs += 16)
7522 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
7525 tree tmp_ha
, field_t
, field_ptr_t
;
7527 /* Declare a local variable. */
7528 tmp_ha
= create_tmp_var_raw (type
, "ha");
7529 gimple_add_tmp_var (tmp_ha
);
7531 /* Establish the base type. */
7535 field_t
= float_type_node
;
7536 field_ptr_t
= float_ptr_type_node
;
7539 field_t
= double_type_node
;
7540 field_ptr_t
= double_ptr_type_node
;
7543 field_t
= long_double_type_node
;
7544 field_ptr_t
= long_double_ptr_type_node
;
7546 /* The half precision and quad precision are not fully supported yet. Enable
7547 the following code after the support is complete. Need to find the correct
7548 type node for __fp16 *. */
7551 field_t
= float_type_node
;
7552 field_ptr_t
= float_ptr_type_node
;
7558 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
7559 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
7560 field_ptr_t
= build_pointer_type (field_t
);
7567 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7568 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
7570 t
= fold_convert (field_ptr_t
, addr
);
7571 t
= build2 (MODIFY_EXPR
, field_t
,
7572 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
7573 build1 (INDIRECT_REF
, field_t
, t
));
7575 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7576 for (i
= 1; i
< nregs
; ++i
)
7578 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
7579 u
= fold_convert (field_ptr_t
, addr
);
7580 u
= build2 (MODIFY_EXPR
, field_t
,
7581 build2 (MEM_REF
, field_t
, tmp_ha
,
7582 build_int_cst (field_ptr_t
,
7584 int_size_in_bytes (field_t
)))),
7585 build1 (INDIRECT_REF
, field_t
, u
));
7586 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
7589 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
7590 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
7593 COND_EXPR_ELSE (cond2
) = t
;
7594 addr
= fold_convert (build_pointer_type (type
), cond1
);
7595 addr
= build_va_arg_indirect_ref (addr
);
7598 addr
= build_va_arg_indirect_ref (addr
);
7603 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7606 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
7607 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
7610 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
7611 CUMULATIVE_ARGS local_cum
;
7612 int gr_saved
, vr_saved
;
7614 /* The caller has advanced CUM up to, but not beyond, the last named
7615 argument. Advance a local copy of CUM past the last "real" named
7616 argument, to find out how many registers are left over. */
7618 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
7620 /* Found out how many registers we need to save. */
7621 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
7622 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
7624 if (TARGET_GENERAL_REGS_ONLY
)
7626 if (local_cum
.aapcs_nvrn
> 0)
7627 sorry ("%qs and floating point or vector arguments",
7628 "-mgeneral-regs-only");
7638 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7639 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
7640 - gr_saved
* UNITS_PER_WORD
);
7641 mem
= gen_frame_mem (BLKmode
, ptr
);
7642 set_mem_alias_set (mem
, get_varargs_alias_set ());
7644 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
7649 /* We can't use move_block_from_reg, because it will use
7650 the wrong mode, storing D regs only. */
7651 machine_mode mode
= TImode
;
7654 /* Set OFF to the offset from virtual_incoming_args_rtx of
7655 the first vector register. The VR save area lies below
7656 the GR one, and is aligned to 16 bytes. */
7657 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7658 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7659 off
-= vr_saved
* UNITS_PER_VREG
;
7661 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
7665 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
7666 mem
= gen_frame_mem (mode
, ptr
);
7667 set_mem_alias_set (mem
, get_varargs_alias_set ());
7668 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
7669 off
+= UNITS_PER_VREG
;
7674 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7675 any complication of having crtl->args.pretend_args_size changed. */
7676 cfun
->machine
->frame
.saved_varargs_size
7677 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
7678 STACK_BOUNDARY
/ BITS_PER_UNIT
)
7679 + vr_saved
* UNITS_PER_VREG
);
7683 aarch64_conditional_register_usage (void)
7688 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
7691 call_used_regs
[i
] = 1;
7696 /* Walk down the type tree of TYPE counting consecutive base elements.
7697 If *MODEP is VOIDmode, then set it to the first valid floating point
7698 type. If a non-floating point type is found, or if a floating point
7699 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7700 otherwise return the count in the sub-tree. */
7702 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
7707 switch (TREE_CODE (type
))
7710 mode
= TYPE_MODE (type
);
7711 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7714 if (*modep
== VOIDmode
)
7723 mode
= TYPE_MODE (TREE_TYPE (type
));
7724 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
7727 if (*modep
== VOIDmode
)
7736 /* Use V2SImode and V4SImode as representatives of all 64-bit
7737 and 128-bit vector types. */
7738 size
= int_size_in_bytes (type
);
7751 if (*modep
== VOIDmode
)
7754 /* Vector modes are considered to be opaque: two vectors are
7755 equivalent for the purposes of being homogeneous aggregates
7756 if they are the same size. */
7765 tree index
= TYPE_DOMAIN (type
);
7767 /* Can't handle incomplete types nor sizes that are not
7769 if (!COMPLETE_TYPE_P (type
)
7770 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7773 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
7776 || !TYPE_MAX_VALUE (index
)
7777 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
7778 || !TYPE_MIN_VALUE (index
)
7779 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
7783 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
7784 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
7786 /* There must be no padding. */
7787 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7799 /* Can't handle incomplete types nor sizes that are not
7801 if (!COMPLETE_TYPE_P (type
)
7802 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7805 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7807 if (TREE_CODE (field
) != FIELD_DECL
)
7810 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7816 /* There must be no padding. */
7817 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7824 case QUAL_UNION_TYPE
:
7826 /* These aren't very interesting except in a degenerate case. */
7831 /* Can't handle incomplete types nor sizes that are not
7833 if (!COMPLETE_TYPE_P (type
)
7834 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
7837 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
7839 if (TREE_CODE (field
) != FIELD_DECL
)
7842 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
7845 count
= count
> sub_count
? count
: sub_count
;
7848 /* There must be no padding. */
7849 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
7862 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7863 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7864 array types. The C99 floating-point complex types are also considered
7865 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7866 types, which are GCC extensions and out of the scope of AAPCS64, are
7867 treated as composite types here as well.
7869 Note that MODE itself is not sufficient in determining whether a type
7870 is such a composite type or not. This is because
7871 stor-layout.c:compute_record_mode may have already changed the MODE
7872 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7873 structure with only one field may have its MODE set to the mode of the
7874 field. Also an integer mode whose size matches the size of the
7875 RECORD_TYPE type may be used to substitute the original mode
7876 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7877 solely relied on. */
7880 aarch64_composite_type_p (const_tree type
,
7883 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
7887 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
7888 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
7894 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7895 type as described in AAPCS64 \S 4.1.2.
7897 See the comment above aarch64_composite_type_p for the notes on MODE. */
7900 aarch64_short_vector_p (const_tree type
,
7903 HOST_WIDE_INT size
= -1;
7905 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
7906 size
= int_size_in_bytes (type
);
7907 else if (!aarch64_composite_type_p (type
, mode
)
7908 && (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
7909 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
))
7910 size
= GET_MODE_SIZE (mode
);
7912 return (size
== 8 || size
== 16) ? true : false;
7915 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7916 shall be passed or returned in simd/fp register(s) (providing these
7917 parameter passing registers are available).
7919 Upon successful return, *COUNT returns the number of needed registers,
7920 *BASE_MODE returns the mode of the individual register and when IS_HAF
7921 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7922 floating-point aggregate or a homogeneous short-vector aggregate. */
7925 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
7927 machine_mode
*base_mode
,
7931 machine_mode new_mode
= VOIDmode
;
7932 bool composite_p
= aarch64_composite_type_p (type
, mode
);
7934 if (is_ha
!= NULL
) *is_ha
= false;
7936 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
7937 || aarch64_short_vector_p (type
, mode
))
7942 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
7944 if (is_ha
!= NULL
) *is_ha
= true;
7946 new_mode
= GET_MODE_INNER (mode
);
7948 else if (type
&& composite_p
)
7950 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
7952 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
7954 if (is_ha
!= NULL
) *is_ha
= true;
7963 *base_mode
= new_mode
;
7967 /* Implement TARGET_STRUCT_VALUE_RTX. */
7970 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
7971 int incoming ATTRIBUTE_UNUSED
)
7973 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
7976 /* Implements target hook vector_mode_supported_p. */
7978 aarch64_vector_mode_supported_p (machine_mode mode
)
7981 && (mode
== V4SImode
|| mode
== V8HImode
7982 || mode
== V16QImode
|| mode
== V2DImode
7983 || mode
== V2SImode
|| mode
== V4HImode
7984 || mode
== V8QImode
|| mode
== V2SFmode
7985 || mode
== V4SFmode
|| mode
== V2DFmode
7986 || mode
== V1DFmode
))
7992 /* Return appropriate SIMD container
7993 for MODE within a vector of WIDTH bits. */
7995 aarch64_simd_container_mode (machine_mode mode
, unsigned width
)
7997 gcc_assert (width
== 64 || width
== 128);
8036 /* Return 128-bit container as the preferred SIMD mode for MODE. */
8038 aarch64_preferred_simd_mode (machine_mode mode
)
8040 return aarch64_simd_container_mode (mode
, 128);
8043 /* Return the bitmask of possible vector sizes for the vectorizer
8046 aarch64_autovectorize_vector_sizes (void)
8051 /* Implement TARGET_MANGLE_TYPE. */
8054 aarch64_mangle_type (const_tree type
)
8056 /* The AArch64 ABI documents say that "__va_list" has to be
8057 managled as if it is in the "std" namespace. */
8058 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
8059 return "St9__va_list";
8061 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8063 if (TYPE_NAME (type
) != NULL
)
8064 return aarch64_mangle_builtin_type (type
);
8066 /* Use the default mangling. */
8071 /* Return true if the rtx_insn contains a MEM RTX somewhere
8075 has_memory_op (rtx_insn
*mem_insn
)
8077 subrtx_iterator::array_type array
;
8078 FOR_EACH_SUBRTX (iter
, array
, PATTERN (mem_insn
), ALL
)
8085 /* Find the first rtx_insn before insn that will generate an assembly
8089 aarch64_prev_real_insn (rtx_insn
*insn
)
8096 insn
= prev_real_insn (insn
);
8098 while (insn
&& recog_memoized (insn
) < 0);
8104 is_madd_op (enum attr_type t1
)
8107 /* A number of these may be AArch32 only. */
8108 enum attr_type mlatypes
[] = {
8109 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
8110 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
8111 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
8114 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
8116 if (t1
== mlatypes
[i
])
8123 /* Check if there is a register dependency between a load and the insn
8124 for which we hold recog_data. */
8127 dep_between_memop_and_curr (rtx memop
)
8132 gcc_assert (GET_CODE (memop
) == SET
);
8134 if (!REG_P (SET_DEST (memop
)))
8137 load_reg
= SET_DEST (memop
);
8138 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
8140 rtx operand
= recog_data
.operand
[opno
];
8142 && reg_overlap_mentioned_p (load_reg
, operand
))
8150 /* When working around the Cortex-A53 erratum 835769,
8151 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8152 instruction and has a preceding memory instruction such that a NOP
8153 should be inserted between them. */
8156 aarch64_madd_needs_nop (rtx_insn
* insn
)
8158 enum attr_type attr_type
;
8162 if (!aarch64_fix_a53_err835769
)
8165 if (recog_memoized (insn
) < 0)
8168 attr_type
= get_attr_type (insn
);
8169 if (!is_madd_op (attr_type
))
8172 prev
= aarch64_prev_real_insn (insn
);
8173 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8174 Restore recog state to INSN to avoid state corruption. */
8175 extract_constrain_insn_cached (insn
);
8177 if (!prev
|| !has_memory_op (prev
))
8180 body
= single_set (prev
);
8182 /* If the previous insn is a memory op and there is no dependency between
8183 it and the DImode madd, emit a NOP between them. If body is NULL then we
8184 have a complex memory operation, probably a load/store pair.
8185 Be conservative for now and emit a NOP. */
8186 if (GET_MODE (recog_data
.operand
[0]) == DImode
8187 && (!body
|| !dep_between_memop_and_curr (body
)))
8195 /* Implement FINAL_PRESCAN_INSN. */
8198 aarch64_final_prescan_insn (rtx_insn
*insn
)
8200 if (aarch64_madd_needs_nop (insn
))
8201 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
8205 /* Return the equivalent letter for size. */
8207 sizetochar (int size
)
8211 case 64: return 'd';
8212 case 32: return 's';
8213 case 16: return 'h';
8214 case 8 : return 'b';
8215 default: gcc_unreachable ();
8219 /* Return true iff x is a uniform vector of floating-point
8220 constants, and the constant can be represented in
8221 quarter-precision form. Note, as aarch64_float_const_representable
8222 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8224 aarch64_vect_float_const_representable_p (rtx x
)
8227 REAL_VALUE_TYPE r0
, ri
;
8230 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
8233 x0
= CONST_VECTOR_ELT (x
, 0);
8234 if (!CONST_DOUBLE_P (x0
))
8237 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
8239 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
8241 xi
= CONST_VECTOR_ELT (x
, i
);
8242 if (!CONST_DOUBLE_P (xi
))
8245 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
8246 if (!REAL_VALUES_EQUAL (r0
, ri
))
8250 return aarch64_float_const_representable_p (x0
);
8253 /* Return true for valid and false for invalid. */
8255 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
8256 struct simd_immediate_info
*info
)
8258 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8260 for (i = 0; i < idx; i += (STRIDE)) \
8265 immtype = (CLASS); \
8266 elsize = (ELSIZE); \
8272 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8273 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8274 unsigned char bytes
[16];
8275 int immtype
= -1, matches
;
8276 unsigned int invmask
= inverse
? 0xff : 0;
8279 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8281 if (! (aarch64_simd_imm_zero_p (op
, mode
)
8282 || aarch64_vect_float_const_representable_p (op
)))
8287 info
->value
= CONST_VECTOR_ELT (op
, 0);
8288 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
8296 /* Splat vector constant out into a byte vector. */
8297 for (i
= 0; i
< n_elts
; i
++)
8299 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8300 it must be laid out in the vector register in reverse order. */
8301 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
8302 unsigned HOST_WIDE_INT elpart
;
8303 unsigned int part
, parts
;
8305 if (CONST_INT_P (el
))
8307 elpart
= INTVAL (el
);
8310 else if (GET_CODE (el
) == CONST_DOUBLE
)
8312 elpart
= CONST_DOUBLE_LOW (el
);
8318 for (part
= 0; part
< parts
; part
++)
8321 for (byte
= 0; byte
< innersize
; byte
++)
8323 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8324 elpart
>>= BITS_PER_UNIT
;
8326 if (GET_CODE (el
) == CONST_DOUBLE
)
8327 elpart
= CONST_DOUBLE_HIGH (el
);
8332 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8336 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8337 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
8339 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8340 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8342 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8343 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8345 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8346 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
8348 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
8350 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
8352 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8353 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
8355 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8356 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8358 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8359 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8361 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8362 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
8364 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
8366 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
8368 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8369 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8371 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8372 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8374 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8375 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8377 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8378 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8380 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
8382 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8383 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
8392 info
->element_width
= elsize
;
8393 info
->mvn
= emvn
!= 0;
8394 info
->shift
= eshift
;
8396 unsigned HOST_WIDE_INT imm
= 0;
8398 if (immtype
>= 12 && immtype
<= 15)
8401 /* Un-invert bytes of recognized vector, if necessary. */
8403 for (i
= 0; i
< idx
; i
++)
8404 bytes
[i
] ^= invmask
;
8408 /* FIXME: Broken on 32-bit H_W_I hosts. */
8409 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
8411 for (i
= 0; i
< 8; i
++)
8412 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
8413 << (i
* BITS_PER_UNIT
);
8416 info
->value
= GEN_INT (imm
);
8420 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
8421 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
8423 /* Construct 'abcdefgh' because the assembler cannot handle
8424 generic constants. */
8427 imm
= (imm
>> info
->shift
) & 0xff;
8428 info
->value
= GEN_INT (imm
);
8436 /* Check of immediate shift constants are within range. */
8438 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
8440 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
8442 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
8444 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
8447 /* Return true if X is a uniform vector where all elements
8448 are either the floating-point constant 0.0 or the
8449 integer constant 0. */
8451 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
8453 return x
== CONST0_RTX (mode
);
8457 aarch64_simd_imm_scalar_p (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
)
8459 HOST_WIDE_INT imm
= INTVAL (x
);
8462 for (i
= 0; i
< 8; i
++)
8464 unsigned int byte
= imm
& 0xff;
8465 if (byte
!= 0xff && byte
!= 0)
8474 aarch64_mov_operand_p (rtx x
,
8475 enum aarch64_symbol_context context
,
8478 if (GET_CODE (x
) == HIGH
8479 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
8482 if (CONST_INT_P (x
))
8485 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
8488 return aarch64_classify_symbolic_expression (x
, context
)
8489 == SYMBOL_TINY_ABSOLUTE
;
8492 /* Return a const_int vector of VAL. */
8494 aarch64_simd_gen_const_vector_dup (machine_mode mode
, int val
)
8496 int nunits
= GET_MODE_NUNITS (mode
);
8497 rtvec v
= rtvec_alloc (nunits
);
8500 for (i
=0; i
< nunits
; i
++)
8501 RTVEC_ELT (v
, i
) = GEN_INT (val
);
8503 return gen_rtx_CONST_VECTOR (mode
, v
);
8506 /* Check OP is a legal scalar immediate for the MOVI instruction. */
8509 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, machine_mode mode
)
8513 gcc_assert (!VECTOR_MODE_P (mode
));
8514 vmode
= aarch64_preferred_simd_mode (mode
);
8515 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
8516 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
8519 /* Construct and return a PARALLEL RTX vector with elements numbering the
8520 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
8521 the vector - from the perspective of the architecture. This does not
8522 line up with GCC's perspective on lane numbers, so we end up with
8523 different masks depending on our target endian-ness. The diagram
8524 below may help. We must draw the distinction when building masks
8525 which select one half of the vector. An instruction selecting
8526 architectural low-lanes for a big-endian target, must be described using
8527 a mask selecting GCC high-lanes.
8529 Big-Endian Little-Endian
8532 | x | x | x | x | | x | x | x | x |
8533 Architecture 3 2 1 0 3 2 1 0
8535 Low Mask: { 2, 3 } { 0, 1 }
8536 High Mask: { 0, 1 } { 2, 3 }
8540 aarch64_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
8542 int nunits
= GET_MODE_NUNITS (mode
);
8543 rtvec v
= rtvec_alloc (nunits
/ 2);
8544 int high_base
= nunits
/ 2;
8550 if (BYTES_BIG_ENDIAN
)
8551 base
= high
? low_base
: high_base
;
8553 base
= high
? high_base
: low_base
;
8555 for (i
= 0; i
< nunits
/ 2; i
++)
8556 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
8558 t1
= gen_rtx_PARALLEL (mode
, v
);
8562 /* Check OP for validity as a PARALLEL RTX vector with elements
8563 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
8564 from the perspective of the architecture. See the diagram above
8565 aarch64_simd_vect_par_cnst_half for more details. */
8568 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
8571 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
8572 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
8573 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
8576 if (!VECTOR_MODE_P (mode
))
8579 if (count_op
!= count_ideal
)
8582 for (i
= 0; i
< count_ideal
; i
++)
8584 rtx elt_op
= XVECEXP (op
, 0, i
);
8585 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
8587 if (!CONST_INT_P (elt_op
)
8588 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
8594 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
8595 HIGH (exclusive). */
8597 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
8601 gcc_assert (CONST_INT_P (operand
));
8602 lane
= INTVAL (operand
);
8604 if (lane
< low
|| lane
>= high
)
8607 error ("%Klane %ld out of range %ld - %ld", exp
, lane
, low
, high
- 1);
8609 error ("lane %ld out of range %ld - %ld", lane
, low
, high
- 1);
8613 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8616 aarch64_simd_emit_pair_result_insn (machine_mode mode
,
8617 rtx (*intfn
) (rtx
, rtx
, rtx
), rtx destaddr
,
8620 rtx mem
= gen_rtx_MEM (mode
, destaddr
);
8621 rtx tmp1
= gen_reg_rtx (mode
);
8622 rtx tmp2
= gen_reg_rtx (mode
);
8624 emit_insn (intfn (tmp1
, op1
, tmp2
));
8626 emit_move_insn (mem
, tmp1
);
8627 mem
= adjust_address (mem
, mode
, GET_MODE_SIZE (mode
));
8628 emit_move_insn (mem
, tmp2
);
8631 /* Return TRUE if OP is a valid vector addressing mode. */
8633 aarch64_simd_mem_operand_p (rtx op
)
8635 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
8636 || REG_P (XEXP (op
, 0)));
8639 /* Emit a register copy from operand to operand, taking care not to
8640 early-clobber source registers in the process.
8642 COUNT is the number of components into which the copy needs to be
8645 aarch64_simd_emit_reg_reg_move (rtx
*operands
, enum machine_mode mode
,
8649 int rdest
= REGNO (operands
[0]);
8650 int rsrc
= REGNO (operands
[1]);
8652 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
8654 for (i
= 0; i
< count
; i
++)
8655 emit_move_insn (gen_rtx_REG (mode
, rdest
+ i
),
8656 gen_rtx_REG (mode
, rsrc
+ i
));
8658 for (i
= 0; i
< count
; i
++)
8659 emit_move_insn (gen_rtx_REG (mode
, rdest
+ count
- i
- 1),
8660 gen_rtx_REG (mode
, rsrc
+ count
- i
- 1));
8663 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8664 one of VSTRUCT modes: OI, CI or XI. */
8666 aarch64_simd_attr_length_move (rtx_insn
*insn
)
8670 extract_insn_cached (insn
);
8672 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
8674 mode
= GET_MODE (recog_data
.operand
[0]);
8690 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8691 alignment of a vector to 128 bits. */
8692 static HOST_WIDE_INT
8693 aarch64_simd_vector_alignment (const_tree type
)
8695 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
8696 return MIN (align
, 128);
8699 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8701 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
8706 /* We guarantee alignment for vectors up to 128-bits. */
8707 if (tree_int_cst_compare (TYPE_SIZE (type
),
8708 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
8711 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8715 /* If VALS is a vector constant that can be loaded into a register
8716 using DUP, generate instructions to do so and return an RTX to
8717 assign to the register. Otherwise return NULL_RTX. */
8719 aarch64_simd_dup_constant (rtx vals
)
8721 machine_mode mode
= GET_MODE (vals
);
8722 machine_mode inner_mode
= GET_MODE_INNER (mode
);
8723 int n_elts
= GET_MODE_NUNITS (mode
);
8724 bool all_same
= true;
8728 if (GET_CODE (vals
) != CONST_VECTOR
)
8731 for (i
= 1; i
< n_elts
; ++i
)
8733 x
= CONST_VECTOR_ELT (vals
, i
);
8734 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
8741 /* We can load this constant by using DUP and a constant in a
8742 single ARM register. This will be cheaper than a vector
8744 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
8745 return gen_rtx_VEC_DUPLICATE (mode
, x
);
8749 /* Generate code to load VALS, which is a PARALLEL containing only
8750 constants (for vec_init) or CONST_VECTOR, efficiently into a
8751 register. Returns an RTX to copy into the register, or NULL_RTX
8752 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8754 aarch64_simd_make_constant (rtx vals
)
8756 machine_mode mode
= GET_MODE (vals
);
8758 rtx const_vec
= NULL_RTX
;
8759 int n_elts
= GET_MODE_NUNITS (mode
);
8763 if (GET_CODE (vals
) == CONST_VECTOR
)
8765 else if (GET_CODE (vals
) == PARALLEL
)
8767 /* A CONST_VECTOR must contain only CONST_INTs and
8768 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8769 Only store valid constants in a CONST_VECTOR. */
8770 for (i
= 0; i
< n_elts
; ++i
)
8772 rtx x
= XVECEXP (vals
, 0, i
);
8773 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
8776 if (n_const
== n_elts
)
8777 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
8782 if (const_vec
!= NULL_RTX
8783 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
8784 /* Load using MOVI/MVNI. */
8786 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
8787 /* Loaded using DUP. */
8789 else if (const_vec
!= NULL_RTX
)
8790 /* Load from constant pool. We can not take advantage of single-cycle
8791 LD1 because we need a PC-relative addressing mode. */
8794 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8795 We can not construct an initializer. */
8800 aarch64_expand_vector_init (rtx target
, rtx vals
)
8802 machine_mode mode
= GET_MODE (target
);
8803 machine_mode inner_mode
= GET_MODE_INNER (mode
);
8804 int n_elts
= GET_MODE_NUNITS (mode
);
8805 int n_var
= 0, one_var
= -1;
8806 bool all_same
= true;
8810 x
= XVECEXP (vals
, 0, 0);
8811 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8812 n_var
= 1, one_var
= 0;
8814 for (i
= 1; i
< n_elts
; ++i
)
8816 x
= XVECEXP (vals
, 0, i
);
8817 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
8818 ++n_var
, one_var
= i
;
8820 if (!rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
8826 rtx constant
= aarch64_simd_make_constant (vals
);
8827 if (constant
!= NULL_RTX
)
8829 emit_move_insn (target
, constant
);
8834 /* Splat a single non-constant element if we can. */
8837 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
8838 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
8842 /* One field is non-constant. Load constant then overwrite varying
8843 field. This is more efficient than using the stack. */
8846 rtx copy
= copy_rtx (vals
);
8847 rtx index
= GEN_INT (one_var
);
8848 enum insn_code icode
;
8850 /* Load constant part of vector, substitute neighboring value for
8852 XVECEXP (copy
, 0, one_var
) = XVECEXP (vals
, 0, one_var
^ 1);
8853 aarch64_expand_vector_init (target
, copy
);
8855 /* Insert variable. */
8856 x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, one_var
));
8857 icode
= optab_handler (vec_set_optab
, mode
);
8858 gcc_assert (icode
!= CODE_FOR_nothing
);
8859 emit_insn (GEN_FCN (icode
) (target
, x
, index
));
8863 /* Construct the vector in memory one field at a time
8864 and load the whole vector. */
8865 mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
8866 for (i
= 0; i
< n_elts
; i
++)
8867 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
8868 i
* GET_MODE_SIZE (inner_mode
)),
8869 XVECEXP (vals
, 0, i
));
8870 emit_move_insn (target
, mem
);
8874 static unsigned HOST_WIDE_INT
8875 aarch64_shift_truncation_mask (machine_mode mode
)
8878 (aarch64_vector_mode_supported_p (mode
)
8879 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
8882 #ifndef TLS_SECTION_ASM_FLAG
8883 #define TLS_SECTION_ASM_FLAG 'T'
8887 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
8888 tree decl ATTRIBUTE_UNUSED
)
8890 char flagchars
[10], *f
= flagchars
;
8892 /* If we have already declared this section, we can use an
8893 abbreviated form to switch back to it -- unless this section is
8894 part of a COMDAT groups, in which case GAS requires the full
8895 declaration every time. */
8896 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8897 && (flags
& SECTION_DECLARED
))
8899 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
8903 if (!(flags
& SECTION_DEBUG
))
8905 if (flags
& SECTION_WRITE
)
8907 if (flags
& SECTION_CODE
)
8909 if (flags
& SECTION_SMALL
)
8911 if (flags
& SECTION_MERGE
)
8913 if (flags
& SECTION_STRINGS
)
8915 if (flags
& SECTION_TLS
)
8916 *f
++ = TLS_SECTION_ASM_FLAG
;
8917 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8921 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
8923 if (!(flags
& SECTION_NOTYPE
))
8928 if (flags
& SECTION_BSS
)
8933 #ifdef TYPE_OPERAND_FMT
8934 format
= "," TYPE_OPERAND_FMT
;
8939 fprintf (asm_out_file
, format
, type
);
8941 if (flags
& SECTION_ENTSIZE
)
8942 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
8943 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
8945 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
8946 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
8948 fprintf (asm_out_file
, ",%s,comdat",
8949 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
8953 putc ('\n', asm_out_file
);
8956 /* Select a format to encode pointers in exception handling data. */
8958 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
8961 switch (aarch64_cmodel
)
8963 case AARCH64_CMODEL_TINY
:
8964 case AARCH64_CMODEL_TINY_PIC
:
8965 case AARCH64_CMODEL_SMALL
:
8966 case AARCH64_CMODEL_SMALL_PIC
:
8967 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8969 type
= DW_EH_PE_sdata4
;
8972 /* No assumptions here. 8-byte relocs required. */
8973 type
= DW_EH_PE_sdata8
;
8976 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
8979 /* Emit load exclusive. */
8982 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
8983 rtx mem
, rtx model_rtx
)
8985 rtx (*gen
) (rtx
, rtx
, rtx
);
8989 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
8990 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
8991 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
8992 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
8997 emit_insn (gen (rval
, mem
, model_rtx
));
9000 /* Emit store exclusive. */
9003 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
9004 rtx rval
, rtx mem
, rtx model_rtx
)
9006 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9010 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
9011 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
9012 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
9013 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
9018 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
9021 /* Mark the previous jump instruction as unlikely. */
9024 aarch64_emit_unlikely_jump (rtx insn
)
9026 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
9028 insn
= emit_jump_insn (insn
);
9029 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
9032 /* Expand a compare and swap pattern. */
9035 aarch64_expand_compare_and_swap (rtx operands
[])
9037 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
9038 machine_mode mode
, cmp_mode
;
9039 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
9044 oldval
= operands
[3];
9045 newval
= operands
[4];
9046 is_weak
= operands
[5];
9047 mod_s
= operands
[6];
9048 mod_f
= operands
[7];
9049 mode
= GET_MODE (mem
);
9052 /* Normally the succ memory model must be stronger than fail, but in the
9053 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9054 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9056 if (INTVAL (mod_f
) == MEMMODEL_ACQUIRE
9057 && INTVAL (mod_s
) == MEMMODEL_RELEASE
)
9058 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
9064 /* For short modes, we're going to perform the comparison in SImode,
9065 so do the zero-extension now. */
9067 rval
= gen_reg_rtx (SImode
);
9068 oldval
= convert_modes (SImode
, mode
, oldval
, true);
9073 /* Force the value into a register if needed. */
9074 if (!aarch64_plus_operand (oldval
, mode
))
9075 oldval
= force_reg (cmp_mode
, oldval
);
9084 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
9085 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
9086 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
9087 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
9092 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
9094 if (mode
== QImode
|| mode
== HImode
)
9095 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
9097 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9098 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
9099 emit_insn (gen_rtx_SET (VOIDmode
, bval
, x
));
9102 /* Split a compare and swap pattern. */
9105 aarch64_split_compare_and_swap (rtx operands
[])
9107 rtx rval
, mem
, oldval
, newval
, scratch
;
9110 rtx_code_label
*label1
, *label2
;
9115 oldval
= operands
[2];
9116 newval
= operands
[3];
9117 is_weak
= (operands
[4] != const0_rtx
);
9118 scratch
= operands
[7];
9119 mode
= GET_MODE (mem
);
9124 label1
= gen_label_rtx ();
9125 emit_label (label1
);
9127 label2
= gen_label_rtx ();
9129 aarch64_emit_load_exclusive (mode
, rval
, mem
, operands
[5]);
9131 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
9132 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9133 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9134 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
9135 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9137 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, operands
[5]);
9141 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
9142 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9143 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
9144 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9148 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9149 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
9150 emit_insn (gen_rtx_SET (VOIDmode
, cond
, x
));
9153 emit_label (label2
);
9156 /* Split an atomic operation. */
9159 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
9160 rtx value
, rtx model_rtx
, rtx cond
)
9162 machine_mode mode
= GET_MODE (mem
);
9163 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
9164 rtx_code_label
*label
;
9167 label
= gen_label_rtx ();
9171 new_out
= gen_lowpart (wmode
, new_out
);
9173 old_out
= gen_lowpart (wmode
, old_out
);
9176 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
9178 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
9187 x
= gen_rtx_AND (wmode
, old_out
, value
);
9188 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9189 x
= gen_rtx_NOT (wmode
, new_out
);
9190 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9194 if (CONST_INT_P (value
))
9196 value
= GEN_INT (-INTVAL (value
));
9202 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
9203 emit_insn (gen_rtx_SET (VOIDmode
, new_out
, x
));
9207 aarch64_emit_store_exclusive (mode
, cond
, mem
,
9208 gen_lowpart (mode
, new_out
), model_rtx
);
9210 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9211 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9212 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
9213 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode
, pc_rtx
, x
));
9217 aarch64_print_extension (void)
9219 const struct aarch64_option_extension
*opt
= NULL
;
9221 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
9222 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
9223 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
9225 asm_fprintf (asm_out_file
, "\n");
9229 aarch64_start_file (void)
9233 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
9234 aarch64_print_extension ();
9236 else if (selected_cpu
)
9238 const char *truncated_name
9239 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
9240 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
9241 aarch64_print_extension ();
9243 default_file_start();
9246 /* Target hook for c_mode_for_suffix. */
9248 aarch64_c_mode_for_suffix (char suffix
)
9256 /* We can only represent floating point constants which will fit in
9257 "quarter-precision" values. These values are characterised by
9258 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9261 (-1)^s * (n/16) * 2^r
9264 's' is the sign bit.
9265 'n' is an integer in the range 16 <= n <= 31.
9266 'r' is an integer in the range -3 <= r <= 4. */
9268 /* Return true iff X can be represented by a quarter-precision
9269 floating point immediate operand X. Note, we cannot represent 0.0. */
9271 aarch64_float_const_representable_p (rtx x
)
9273 /* This represents our current view of how many bits
9274 make up the mantissa. */
9275 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
9277 unsigned HOST_WIDE_INT mantissa
, mask
;
9278 REAL_VALUE_TYPE r
, m
;
9281 if (!CONST_DOUBLE_P (x
))
9284 if (GET_MODE (x
) == VOIDmode
)
9287 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9289 /* We cannot represent infinities, NaNs or +/-zero. We won't
9290 know if we have +zero until we analyse the mantissa, but we
9291 can reject the other invalid values. */
9292 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
9293 || REAL_VALUE_MINUS_ZERO (r
))
9296 /* Extract exponent. */
9297 r
= real_value_abs (&r
);
9298 exponent
= REAL_EXP (&r
);
9300 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9301 highest (sign) bit, with a fixed binary point at bit point_pos.
9302 m1 holds the low part of the mantissa, m2 the high part.
9303 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9304 bits for the mantissa, this can fail (low bits will be lost). */
9305 real_ldexp (&m
, &r
, point_pos
- exponent
);
9306 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
9308 /* If the low part of the mantissa has bits set we cannot represent
9312 /* We have rejected the lower HOST_WIDE_INT, so update our
9313 understanding of how many bits lie in the mantissa and
9314 look only at the high HOST_WIDE_INT. */
9315 mantissa
= w
.elt (1);
9316 point_pos
-= HOST_BITS_PER_WIDE_INT
;
9318 /* We can only represent values with a mantissa of the form 1.xxxx. */
9319 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
9320 if ((mantissa
& mask
) != 0)
9323 /* Having filtered unrepresentable values, we may now remove all
9324 but the highest 5 bits. */
9325 mantissa
>>= point_pos
- 5;
9327 /* We cannot represent the value 0.0, so reject it. This is handled
9332 /* Then, as bit 4 is always set, we can mask it off, leaving
9333 the mantissa in the range [0, 15]. */
9334 mantissa
&= ~(1 << 4);
9335 gcc_assert (mantissa
<= 15);
9337 /* GCC internally does not use IEEE754-like encoding (where normalized
9338 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
9339 Our mantissa values are shifted 4 places to the left relative to
9340 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
9341 by 5 places to correct for GCC's representation. */
9342 exponent
= 5 - exponent
;
9344 return (exponent
>= 0 && exponent
<= 7);
9348 aarch64_output_simd_mov_immediate (rtx const_vector
,
9353 static char templ
[40];
9354 const char *mnemonic
;
9355 const char *shift_op
;
9356 unsigned int lane_count
= 0;
9359 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
9361 /* This will return true to show const_vector is legal for use as either
9362 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
9363 also update INFO to show how the immediate should be generated. */
9364 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
9365 gcc_assert (is_valid
);
9367 element_char
= sizetochar (info
.element_width
);
9368 lane_count
= width
/ info
.element_width
;
9370 mode
= GET_MODE_INNER (mode
);
9371 if (mode
== SFmode
|| mode
== DFmode
)
9373 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
9374 if (aarch64_float_const_zero_rtx_p (info
.value
))
9375 info
.value
= GEN_INT (0);
9380 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
9381 char float_buf
[buf_size
] = {'\0'};
9382 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
9385 if (lane_count
== 1)
9386 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
9388 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
9389 lane_count
, element_char
, float_buf
);
9394 mnemonic
= info
.mvn
? "mvni" : "movi";
9395 shift_op
= info
.msl
? "msl" : "lsl";
9397 if (lane_count
== 1)
9398 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
9399 mnemonic
, UINTVAL (info
.value
));
9400 else if (info
.shift
)
9401 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
9402 ", %s %d", mnemonic
, lane_count
, element_char
,
9403 UINTVAL (info
.value
), shift_op
, info
.shift
);
9405 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
9406 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
9411 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
9416 gcc_assert (!VECTOR_MODE_P (mode
));
9417 vmode
= aarch64_simd_container_mode (mode
, 64);
9418 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
9419 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
9422 /* Split operands into moves from op[1] + op[2] into op[0]. */
9425 aarch64_split_combinev16qi (rtx operands
[3])
9427 unsigned int dest
= REGNO (operands
[0]);
9428 unsigned int src1
= REGNO (operands
[1]);
9429 unsigned int src2
= REGNO (operands
[2]);
9430 machine_mode halfmode
= GET_MODE (operands
[1]);
9431 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
9434 gcc_assert (halfmode
== V16QImode
);
9436 if (src1
== dest
&& src2
== dest
+ halfregs
)
9438 /* No-op move. Can't split to nothing; emit something. */
9439 emit_note (NOTE_INSN_DELETED
);
9443 /* Preserve register attributes for variable tracking. */
9444 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
9445 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
9446 GET_MODE_SIZE (halfmode
));
9448 /* Special case of reversed high/low parts. */
9449 if (reg_overlap_mentioned_p (operands
[2], destlo
)
9450 && reg_overlap_mentioned_p (operands
[1], desthi
))
9452 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9453 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
9454 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
9456 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
9458 /* Try to avoid unnecessary moves if part of the result
9459 is in the right place already. */
9461 emit_move_insn (destlo
, operands
[1]);
9462 if (src2
!= dest
+ halfregs
)
9463 emit_move_insn (desthi
, operands
[2]);
9467 if (src2
!= dest
+ halfregs
)
9468 emit_move_insn (desthi
, operands
[2]);
9470 emit_move_insn (destlo
, operands
[1]);
9474 /* vec_perm support. */
9476 #define MAX_VECT_LEN 16
9478 struct expand_vec_perm_d
9480 rtx target
, op0
, op1
;
9481 unsigned char perm
[MAX_VECT_LEN
];
9488 /* Generate a variable permutation. */
9491 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9493 machine_mode vmode
= GET_MODE (target
);
9494 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9496 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
9497 gcc_checking_assert (GET_MODE (op0
) == vmode
);
9498 gcc_checking_assert (GET_MODE (op1
) == vmode
);
9499 gcc_checking_assert (GET_MODE (sel
) == vmode
);
9500 gcc_checking_assert (TARGET_SIMD
);
9504 if (vmode
== V8QImode
)
9506 /* Expand the argument to a V16QI mode by duplicating it. */
9507 rtx pair
= gen_reg_rtx (V16QImode
);
9508 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
9509 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9513 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
9520 if (vmode
== V8QImode
)
9522 pair
= gen_reg_rtx (V16QImode
);
9523 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
9524 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
9528 pair
= gen_reg_rtx (OImode
);
9529 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
9530 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
9536 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
9538 machine_mode vmode
= GET_MODE (target
);
9539 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
9540 bool one_vector_p
= rtx_equal_p (op0
, op1
);
9543 /* The TBL instruction does not use a modulo index, so we must take care
9544 of that ourselves. */
9545 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
9546 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9547 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
9549 /* For big-endian, we also need to reverse the index within the vector
9550 (but not which vector). */
9551 if (BYTES_BIG_ENDIAN
)
9553 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
9555 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
9556 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
9557 NULL
, 0, OPTAB_LIB_WIDEN
);
9559 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
9562 /* Recognize patterns suitable for the TRN instructions. */
9564 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
9566 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9567 rtx out
, in0
, in1
, x
;
9568 rtx (*gen
) (rtx
, rtx
, rtx
);
9569 machine_mode vmode
= d
->vmode
;
9571 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9574 /* Note that these are little-endian tests.
9575 We correct for big-endian later. */
9576 if (d
->perm
[0] == 0)
9578 else if (d
->perm
[0] == 1)
9582 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9584 for (i
= 0; i
< nelt
; i
+= 2)
9586 if (d
->perm
[i
] != i
+ odd
)
9588 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
9598 if (BYTES_BIG_ENDIAN
)
9600 x
= in0
, in0
= in1
, in1
= x
;
9609 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
9610 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
9611 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
9612 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
9613 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
9614 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
9615 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
9616 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
9617 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
9618 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
9627 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
9628 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
9629 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
9630 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
9631 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
9632 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
9633 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
9634 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
9635 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
9636 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
9642 emit_insn (gen (out
, in0
, in1
));
9646 /* Recognize patterns suitable for the UZP instructions. */
9648 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
9650 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
9651 rtx out
, in0
, in1
, x
;
9652 rtx (*gen
) (rtx
, rtx
, rtx
);
9653 machine_mode vmode
= d
->vmode
;
9655 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9658 /* Note that these are little-endian tests.
9659 We correct for big-endian later. */
9660 if (d
->perm
[0] == 0)
9662 else if (d
->perm
[0] == 1)
9666 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9668 for (i
= 0; i
< nelt
; i
++)
9670 unsigned elt
= (i
* 2 + odd
) & mask
;
9671 if (d
->perm
[i
] != elt
)
9681 if (BYTES_BIG_ENDIAN
)
9683 x
= in0
, in0
= in1
, in1
= x
;
9692 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
9693 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
9694 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
9695 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
9696 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
9697 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
9698 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
9699 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
9700 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
9701 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
9710 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
9711 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
9712 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
9713 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
9714 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
9715 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
9716 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
9717 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
9718 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
9719 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
9725 emit_insn (gen (out
, in0
, in1
));
9729 /* Recognize patterns suitable for the ZIP instructions. */
9731 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
9733 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
9734 rtx out
, in0
, in1
, x
;
9735 rtx (*gen
) (rtx
, rtx
, rtx
);
9736 machine_mode vmode
= d
->vmode
;
9738 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
9741 /* Note that these are little-endian tests.
9742 We correct for big-endian later. */
9744 if (d
->perm
[0] == high
)
9747 else if (d
->perm
[0] == 0)
9751 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
9753 for (i
= 0; i
< nelt
/ 2; i
++)
9755 unsigned elt
= (i
+ high
) & mask
;
9756 if (d
->perm
[i
* 2] != elt
)
9758 elt
= (elt
+ nelt
) & mask
;
9759 if (d
->perm
[i
* 2 + 1] != elt
)
9769 if (BYTES_BIG_ENDIAN
)
9771 x
= in0
, in0
= in1
, in1
= x
;
9780 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
9781 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
9782 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
9783 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
9784 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
9785 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
9786 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
9787 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
9788 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
9789 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
9798 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
9799 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
9800 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
9801 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
9802 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
9803 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
9804 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
9805 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
9806 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
9807 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
9813 emit_insn (gen (out
, in0
, in1
));
9817 /* Recognize patterns for the EXT insn. */
9820 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
9822 unsigned int i
, nelt
= d
->nelt
;
9823 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9826 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
9828 /* Check if the extracted indices are increasing by one. */
9829 for (i
= 1; i
< nelt
; i
++)
9831 unsigned int required
= location
+ i
;
9832 if (d
->one_vector_p
)
9834 /* We'll pass the same vector in twice, so allow indices to wrap. */
9835 required
&= (nelt
- 1);
9837 if (d
->perm
[i
] != required
)
9843 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
9844 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
9845 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
9846 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
9847 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
9848 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
9849 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
9850 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
9851 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
9852 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
9861 /* The case where (location == 0) is a no-op for both big- and little-endian,
9862 and is removed by the mid-end at optimization levels -O1 and higher. */
9864 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
9866 /* After setup, we want the high elements of the first vector (stored
9867 at the LSB end of the register), and the low elements of the second
9868 vector (stored at the MSB end of the register). So swap. */
9872 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9873 location
= nelt
- location
;
9876 offset
= GEN_INT (location
);
9877 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
9881 /* Recognize patterns for the REV insns. */
9884 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
9886 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
9887 rtx (*gen
) (rtx
, rtx
);
9889 if (!d
->one_vector_p
)
9898 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
9899 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
9907 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
9908 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
9909 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
9910 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
9918 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
9919 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
9920 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
9921 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
9922 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
9923 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
9924 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
9925 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
9934 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
9935 for (j
= 0; j
<= diff
; j
+= 1)
9937 /* This is guaranteed to be true as the value of diff
9938 is 7, 3, 1 and we should have enough elements in the
9939 queue to generate this. Getting a vector mask with a
9940 value of diff other than these values implies that
9941 something is wrong by the time we get here. */
9942 gcc_assert (i
+ j
< nelt
);
9943 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
9951 emit_insn (gen (d
->target
, d
->op0
));
9956 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
9958 rtx (*gen
) (rtx
, rtx
, rtx
);
9959 rtx out
= d
->target
;
9961 machine_mode vmode
= d
->vmode
;
9962 unsigned int i
, elt
, nelt
= d
->nelt
;
9966 for (i
= 1; i
< nelt
; i
++)
9968 if (elt
!= d
->perm
[i
])
9972 /* The generic preparation in aarch64_expand_vec_perm_const_1
9973 swaps the operand order and the permute indices if it finds
9974 d->perm[0] to be in the second operand. Thus, we can always
9975 use d->op0 and need not do any extra arithmetic to get the
9976 correct lane number. */
9978 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
9982 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
9983 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
9984 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
9985 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
9986 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
9987 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
9988 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
9989 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
9990 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
9991 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
9996 emit_insn (gen (out
, in0
, lane
));
10001 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
10003 rtx rperm
[MAX_VECT_LEN
], sel
;
10004 machine_mode vmode
= d
->vmode
;
10005 unsigned int i
, nelt
= d
->nelt
;
10010 /* Generic code will try constant permutation twice. Once with the
10011 original mode and again with the elements lowered to QImode.
10012 So wait and don't do the selector expansion ourselves. */
10013 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
10016 for (i
= 0; i
< nelt
; ++i
)
10018 int nunits
= GET_MODE_NUNITS (vmode
);
10020 /* If big-endian and two vectors we end up with a weird mixed-endian
10021 mode on NEON. Reverse the index within each word but not the word
10023 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
10026 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
10027 sel
= force_reg (vmode
, sel
);
10029 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
10034 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
10036 /* The pattern matching functions above are written to look for a small
10037 number to begin the sequence (0, 1, N/2). If we begin with an index
10038 from the second operand, we can swap the operands. */
10039 if (d
->perm
[0] >= d
->nelt
)
10041 unsigned i
, nelt
= d
->nelt
;
10044 gcc_assert (nelt
== (nelt
& -nelt
));
10045 for (i
= 0; i
< nelt
; ++i
)
10046 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
10055 if (aarch64_evpc_rev (d
))
10057 else if (aarch64_evpc_ext (d
))
10059 else if (aarch64_evpc_dup (d
))
10061 else if (aarch64_evpc_zip (d
))
10063 else if (aarch64_evpc_uzp (d
))
10065 else if (aarch64_evpc_trn (d
))
10067 return aarch64_evpc_tbl (d
);
10072 /* Expand a vec_perm_const pattern. */
10075 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
10077 struct expand_vec_perm_d d
;
10078 int i
, nelt
, which
;
10084 d
.vmode
= GET_MODE (target
);
10085 gcc_assert (VECTOR_MODE_P (d
.vmode
));
10086 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10087 d
.testing_p
= false;
10089 for (i
= which
= 0; i
< nelt
; ++i
)
10091 rtx e
= XVECEXP (sel
, 0, i
);
10092 int ei
= INTVAL (e
) & (2 * nelt
- 1);
10093 which
|= (ei
< nelt
? 1 : 2);
10100 gcc_unreachable ();
10103 d
.one_vector_p
= false;
10104 if (!rtx_equal_p (op0
, op1
))
10107 /* The elements of PERM do not suggest that only the first operand
10108 is used, but both operands are identical. Allow easier matching
10109 of the permutation by folding the permutation into the single
10111 /* Fall Through. */
10113 for (i
= 0; i
< nelt
; ++i
)
10114 d
.perm
[i
] &= nelt
- 1;
10116 d
.one_vector_p
= true;
10121 d
.one_vector_p
= true;
10125 return aarch64_expand_vec_perm_const_1 (&d
);
10129 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
10130 const unsigned char *sel
)
10132 struct expand_vec_perm_d d
;
10133 unsigned int i
, nelt
, which
;
10137 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10138 d
.testing_p
= true;
10139 memcpy (d
.perm
, sel
, nelt
);
10141 /* Calculate whether all elements are in one vector. */
10142 for (i
= which
= 0; i
< nelt
; ++i
)
10144 unsigned char e
= d
.perm
[i
];
10145 gcc_assert (e
< 2 * nelt
);
10146 which
|= (e
< nelt
? 1 : 2);
10149 /* If all elements are from the second vector, reindex as if from the
10152 for (i
= 0; i
< nelt
; ++i
)
10155 /* Check whether the mask can be applied to a single vector. */
10156 d
.one_vector_p
= (which
!= 3);
10158 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
10159 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
10160 if (!d
.one_vector_p
)
10161 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
10164 ret
= aarch64_expand_vec_perm_const_1 (&d
);
10170 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
10172 aarch64_cannot_change_mode_class (machine_mode from
,
10174 enum reg_class rclass
)
10176 /* Full-reg subregs are allowed on general regs or any class if they are
10178 if (GET_MODE_SIZE (from
) == GET_MODE_SIZE (to
)
10179 || !reg_classes_intersect_p (FP_REGS
, rclass
))
10182 /* Limited combinations of subregs are safe on FPREGs. Particularly,
10183 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
10184 2. Scalar to Scalar for integer modes or same size float modes.
10185 3. Vector to Vector modes.
10186 4. On little-endian only, Vector-Structure to Vector modes. */
10187 if (GET_MODE_SIZE (from
) > GET_MODE_SIZE (to
))
10189 if (aarch64_vector_mode_supported_p (from
)
10190 && GET_MODE_SIZE (GET_MODE_INNER (from
)) == GET_MODE_SIZE (to
))
10193 if (GET_MODE_NUNITS (from
) == 1
10194 && GET_MODE_NUNITS (to
) == 1
10195 && (GET_MODE_CLASS (from
) == MODE_INT
10199 if (aarch64_vector_mode_supported_p (from
)
10200 && aarch64_vector_mode_supported_p (to
))
10203 /* Within an vector structure straddling multiple vector registers
10204 we are in a mixed-endian representation. As such, we can't
10205 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
10206 switch between vectors and vector structures cheaply. */
10207 if (!BYTES_BIG_ENDIAN
)
10208 if ((aarch64_vector_mode_supported_p (from
)
10209 && aarch64_vect_struct_mode_p (to
))
10210 || (aarch64_vector_mode_supported_p (to
)
10211 && aarch64_vect_struct_mode_p (from
)))
10218 /* Implement MODES_TIEABLE_P. */
10221 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10223 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
10226 /* We specifically want to allow elements of "structure" modes to
10227 be tieable to the structure. This more general condition allows
10228 other rarer situations too. */
10230 && aarch64_vector_mode_p (mode1
)
10231 && aarch64_vector_mode_p (mode2
))
10237 /* Return a new RTX holding the result of moving POINTER forward by
10241 aarch64_move_pointer (rtx pointer
, int amount
)
10243 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
10245 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
10249 /* Return a new RTX holding the result of moving POINTER forward by the
10250 size of the mode it points to. */
10253 aarch64_progress_pointer (rtx pointer
)
10255 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
10257 return aarch64_move_pointer (pointer
, amount
);
10260 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10264 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
10267 rtx reg
= gen_reg_rtx (mode
);
10269 /* "Cast" the pointers to the correct mode. */
10270 *src
= adjust_address (*src
, mode
, 0);
10271 *dst
= adjust_address (*dst
, mode
, 0);
10272 /* Emit the memcpy. */
10273 emit_move_insn (reg
, *src
);
10274 emit_move_insn (*dst
, reg
);
10275 /* Move the pointers forward. */
10276 *src
= aarch64_progress_pointer (*src
);
10277 *dst
= aarch64_progress_pointer (*dst
);
10280 /* Expand movmem, as if from a __builtin_memcpy. Return true if
10281 we succeed, otherwise return false. */
10284 aarch64_expand_movmem (rtx
*operands
)
10287 rtx dst
= operands
[0];
10288 rtx src
= operands
[1];
10290 bool speed_p
= !optimize_function_for_size_p (cfun
);
10292 /* When optimizing for size, give a better estimate of the length of a
10293 memcpy call, but use the default otherwise. */
10294 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
10296 /* We can't do anything smart if the amount to copy is not constant. */
10297 if (!CONST_INT_P (operands
[2]))
10300 n
= UINTVAL (operands
[2]);
10302 /* Try to keep the number of instructions low. For cases below 16 bytes we
10303 need to make at most two moves. For cases above 16 bytes it will be one
10304 move for each 16 byte chunk, then at most two additional moves. */
10305 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
10308 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10309 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
10311 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10312 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
10314 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10320 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10325 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10330 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10331 4-byte chunk, partially overlapping with the previously copied chunk. */
10334 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10340 src
= aarch64_move_pointer (src
, move
);
10341 dst
= aarch64_move_pointer (dst
, move
);
10342 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10347 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10348 them, then (if applicable) an 8-byte chunk. */
10353 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
10358 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10363 /* Finish the final bytes of the copy. We can always do this in one
10364 instruction. We either copy the exact amount we need, or partially
10365 overlap with the previous chunk we copied and copy 8-bytes. */
10369 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10371 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10373 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10378 src
= aarch64_move_pointer (src
, -1);
10379 dst
= aarch64_move_pointer (dst
, -1);
10380 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10386 src
= aarch64_move_pointer (src
, move
);
10387 dst
= aarch64_move_pointer (dst
, move
);
10388 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10395 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
10397 static unsigned HOST_WIDE_INT
10398 aarch64_asan_shadow_offset (void)
10400 return (HOST_WIDE_INT_1
<< 36);
10404 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
10405 unsigned int align
,
10406 enum by_pieces_operation op
,
10409 /* STORE_BY_PIECES can be used when copying a constant string, but
10410 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
10411 For now we always fail this and let the move_by_pieces code copy
10412 the string from read-only memory. */
10413 if (op
== STORE_BY_PIECES
)
10416 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
10419 static enum machine_mode
10420 aarch64_code_to_ccmode (enum rtx_code code
)
10443 return CC_DLEUmode
;
10446 return CC_DLTUmode
;
10449 return CC_DGEUmode
;
10452 return CC_DGTUmode
;
10460 aarch64_gen_ccmp_first (rtx
*prep_seq
, rtx
*gen_seq
,
10461 int code
, tree treeop0
, tree treeop1
)
10463 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
10464 rtx op0
, op1
, cmp
, target
;
10465 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
10466 enum insn_code icode
;
10467 struct expand_operand ops
[4];
10469 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) code
);
10470 if (cc_mode
== CCmode
)
10474 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
10476 op_mode
= GET_MODE (op0
);
10477 if (op_mode
== VOIDmode
)
10478 op_mode
= GET_MODE (op1
);
10486 icode
= CODE_FOR_cmpsi
;
10491 icode
= CODE_FOR_cmpdi
;
10499 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
10500 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
10506 *prep_seq
= get_insns ();
10509 cmp
= gen_rtx_fmt_ee ((enum rtx_code
) code
, cmp_mode
, op0
, op1
);
10510 target
= gen_rtx_REG (CCmode
, CC_REGNUM
);
10512 create_output_operand (&ops
[0], target
, CCmode
);
10513 create_fixed_operand (&ops
[1], cmp
);
10514 create_fixed_operand (&ops
[2], op0
);
10515 create_fixed_operand (&ops
[3], op1
);
10518 if (!maybe_expand_insn (icode
, 4, ops
))
10523 *gen_seq
= get_insns ();
10526 return gen_rtx_REG (cc_mode
, CC_REGNUM
);
10530 aarch64_gen_ccmp_next (rtx
*prep_seq
, rtx
*gen_seq
, rtx prev
, int cmp_code
,
10531 tree treeop0
, tree treeop1
, int bit_code
)
10533 rtx op0
, op1
, cmp0
, cmp1
, target
;
10534 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
10535 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
10536 enum insn_code icode
= CODE_FOR_ccmp_andsi
;
10537 struct expand_operand ops
[6];
10539 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) cmp_code
);
10540 if (cc_mode
== CCmode
)
10543 push_to_sequence ((rtx_insn
*) *prep_seq
);
10544 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
10546 op_mode
= GET_MODE (op0
);
10547 if (op_mode
== VOIDmode
)
10548 op_mode
= GET_MODE (op1
);
10556 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_andsi
10557 : CODE_FOR_ccmp_iorsi
;
10562 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_anddi
10563 : CODE_FOR_ccmp_iordi
;
10571 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
10572 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
10578 *prep_seq
= get_insns ();
10581 target
= gen_rtx_REG (cc_mode
, CC_REGNUM
);
10582 cmp1
= gen_rtx_fmt_ee ((enum rtx_code
) cmp_code
, cmp_mode
, op0
, op1
);
10583 cmp0
= gen_rtx_fmt_ee (NE
, cmp_mode
, prev
, const0_rtx
);
10585 create_fixed_operand (&ops
[0], prev
);
10586 create_fixed_operand (&ops
[1], target
);
10587 create_fixed_operand (&ops
[2], op0
);
10588 create_fixed_operand (&ops
[3], op1
);
10589 create_fixed_operand (&ops
[4], cmp0
);
10590 create_fixed_operand (&ops
[5], cmp1
);
10592 push_to_sequence ((rtx_insn
*) *gen_seq
);
10593 if (!maybe_expand_insn (icode
, 6, ops
))
10599 *gen_seq
= get_insns ();
10605 #undef TARGET_GEN_CCMP_FIRST
10606 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
10608 #undef TARGET_GEN_CCMP_NEXT
10609 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
10611 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
10612 instruction fusion of some sort. */
10615 aarch64_macro_fusion_p (void)
10617 return aarch64_tune_params
->fuseable_ops
!= AARCH64_FUSE_NOTHING
;
10621 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
10622 should be kept together during scheduling. */
10625 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
10628 rtx prev_set
= single_set (prev
);
10629 rtx curr_set
= single_set (curr
);
10630 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10631 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
10633 if (!aarch64_macro_fusion_p ())
10637 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_MOV_MOVK
))
10639 /* We are trying to match:
10640 prev (mov) == (set (reg r0) (const_int imm16))
10641 curr (movk) == (set (zero_extract (reg r0)
10644 (const_int imm16_1)) */
10646 set_dest
= SET_DEST (curr_set
);
10648 if (GET_CODE (set_dest
) == ZERO_EXTRACT
10649 && CONST_INT_P (SET_SRC (curr_set
))
10650 && CONST_INT_P (SET_SRC (prev_set
))
10651 && CONST_INT_P (XEXP (set_dest
, 2))
10652 && INTVAL (XEXP (set_dest
, 2)) == 16
10653 && REG_P (XEXP (set_dest
, 0))
10654 && REG_P (SET_DEST (prev_set
))
10655 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
10662 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_ADRP_ADD
))
10665 /* We're trying to match:
10666 prev (adrp) == (set (reg r1)
10667 (high (symbol_ref ("SYM"))))
10668 curr (add) == (set (reg r0)
10670 (symbol_ref ("SYM"))))
10671 Note that r0 need not necessarily be the same as r1, especially
10672 during pre-regalloc scheduling. */
10674 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
10675 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
10677 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
10678 && REG_P (XEXP (SET_SRC (curr_set
), 0))
10679 && REGNO (XEXP (SET_SRC (curr_set
), 0))
10680 == REGNO (SET_DEST (prev_set
))
10681 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
10682 XEXP (SET_SRC (curr_set
), 1)))
10688 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_MOVK_MOVK
))
10691 /* We're trying to match:
10692 prev (movk) == (set (zero_extract (reg r0)
10695 (const_int imm16_1))
10696 curr (movk) == (set (zero_extract (reg r0)
10699 (const_int imm16_2)) */
10701 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
10702 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
10703 && REG_P (XEXP (SET_DEST (prev_set
), 0))
10704 && REG_P (XEXP (SET_DEST (curr_set
), 0))
10705 && REGNO (XEXP (SET_DEST (prev_set
), 0))
10706 == REGNO (XEXP (SET_DEST (curr_set
), 0))
10707 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
10708 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
10709 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
10710 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
10711 && CONST_INT_P (SET_SRC (prev_set
))
10712 && CONST_INT_P (SET_SRC (curr_set
)))
10717 && (aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_ADRP_LDR
))
10719 /* We're trying to match:
10720 prev (adrp) == (set (reg r0)
10721 (high (symbol_ref ("SYM"))))
10722 curr (ldr) == (set (reg r1)
10723 (mem (lo_sum (reg r0)
10724 (symbol_ref ("SYM")))))
10726 curr (ldr) == (set (reg r1)
10729 (symbol_ref ("SYM")))))) */
10730 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
10731 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
10733 rtx curr_src
= SET_SRC (curr_set
);
10735 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
10736 curr_src
= XEXP (curr_src
, 0);
10738 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
10739 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
10740 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
10741 == REGNO (SET_DEST (prev_set
))
10742 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
10743 XEXP (SET_SRC (prev_set
), 0)))
10748 if ((aarch64_tune_params
->fuseable_ops
& AARCH64_FUSE_CMP_BRANCH
)
10749 && any_condjump_p (curr
))
10751 enum attr_type prev_type
= get_attr_type (prev
);
10753 /* FIXME: this misses some which is considered simple arthematic
10754 instructions for ThunderX. Simple shifts are missed here. */
10755 if (prev_type
== TYPE_ALUS_SREG
10756 || prev_type
== TYPE_ALUS_IMM
10757 || prev_type
== TYPE_LOGICS_REG
10758 || prev_type
== TYPE_LOGICS_IMM
)
10765 /* If MEM is in the form of [base+offset], extract the two parts
10766 of address and set to BASE and OFFSET, otherwise return false
10767 after clearing BASE and OFFSET. */
10770 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
10774 gcc_assert (MEM_P (mem
));
10776 addr
= XEXP (mem
, 0);
10781 *offset
= const0_rtx
;
10785 if (GET_CODE (addr
) == PLUS
10786 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
10788 *base
= XEXP (addr
, 0);
10789 *offset
= XEXP (addr
, 1);
10794 *offset
= NULL_RTX
;
10799 /* Types for scheduling fusion. */
10800 enum sched_fusion_type
10802 SCHED_FUSION_NONE
= 0,
10803 SCHED_FUSION_LD_SIGN_EXTEND
,
10804 SCHED_FUSION_LD_ZERO_EXTEND
,
10810 /* If INSN is a load or store of address in the form of [base+offset],
10811 extract the two parts and set to BASE and OFFSET. Return scheduling
10812 fusion type this INSN is. */
10814 static enum sched_fusion_type
10815 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
10818 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
10820 gcc_assert (INSN_P (insn
));
10821 x
= PATTERN (insn
);
10822 if (GET_CODE (x
) != SET
)
10823 return SCHED_FUSION_NONE
;
10826 dest
= SET_DEST (x
);
10828 if (GET_MODE (dest
) != SImode
&& GET_MODE (dest
) != DImode
10829 && GET_MODE (dest
) != SFmode
&& GET_MODE (dest
) != DFmode
)
10830 return SCHED_FUSION_NONE
;
10832 if (GET_CODE (src
) == SIGN_EXTEND
)
10834 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
10835 src
= XEXP (src
, 0);
10836 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
10837 return SCHED_FUSION_NONE
;
10839 else if (GET_CODE (src
) == ZERO_EXTEND
)
10841 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
10842 src
= XEXP (src
, 0);
10843 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
10844 return SCHED_FUSION_NONE
;
10847 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
10848 extract_base_offset_in_addr (src
, base
, offset
);
10849 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
10851 fusion
= SCHED_FUSION_ST
;
10852 extract_base_offset_in_addr (dest
, base
, offset
);
10855 return SCHED_FUSION_NONE
;
10857 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
10858 fusion
= SCHED_FUSION_NONE
;
10863 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
10865 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
10866 and PRI are only calculated for these instructions. For other instruction,
10867 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
10868 type instruction fusion can be added by returning different priorities.
10870 It's important that irrelevant instructions get the largest FUSION_PRI. */
10873 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
10874 int *fusion_pri
, int *pri
)
10878 enum sched_fusion_type fusion
;
10880 gcc_assert (INSN_P (insn
));
10883 fusion
= fusion_load_store (insn
, &base
, &offset
);
10884 if (fusion
== SCHED_FUSION_NONE
)
10891 /* Set FUSION_PRI according to fusion type and base register. */
10892 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
10894 /* Calculate PRI. */
10897 /* INSN with smaller offset goes first. */
10898 off_val
= (int)(INTVAL (offset
));
10900 tmp
-= (off_val
& 0xfffff);
10902 tmp
+= ((- off_val
) & 0xfffff);
10908 /* Given OPERANDS of consecutive load/store, check if we can merge
10909 them into ldp/stp. LOAD is true if they are load instructions.
10910 MODE is the mode of memory operands. */
10913 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
10914 enum machine_mode mode
)
10916 HOST_WIDE_INT offval_1
, offval_2
, msize
;
10917 enum reg_class rclass_1
, rclass_2
;
10918 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
10922 mem_1
= operands
[1];
10923 mem_2
= operands
[3];
10924 reg_1
= operands
[0];
10925 reg_2
= operands
[2];
10926 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
10927 if (REGNO (reg_1
) == REGNO (reg_2
))
10932 mem_1
= operands
[0];
10933 mem_2
= operands
[2];
10934 reg_1
= operands
[1];
10935 reg_2
= operands
[3];
10938 /* The mems cannot be volatile. */
10939 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
10942 /* Check if the addresses are in the form of [base+offset]. */
10943 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
10944 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
10946 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
10947 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
10950 /* Check if the bases are same. */
10951 if (!rtx_equal_p (base_1
, base_2
))
10954 offval_1
= INTVAL (offset_1
);
10955 offval_2
= INTVAL (offset_2
);
10956 msize
= GET_MODE_SIZE (mode
);
10957 /* Check if the offsets are consecutive. */
10958 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
10961 /* Check if the addresses are clobbered by load. */
10964 if (reg_mentioned_p (reg_1
, mem_1
))
10967 /* In increasing order, the last load can clobber the address. */
10968 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
10972 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
10973 rclass_1
= FP_REGS
;
10975 rclass_1
= GENERAL_REGS
;
10977 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
10978 rclass_2
= FP_REGS
;
10980 rclass_2
= GENERAL_REGS
;
10982 /* Check if the registers are of same class. */
10983 if (rclass_1
!= rclass_2
)
10989 /* Given OPERANDS of consecutive load/store, check if we can merge
10990 them into ldp/stp by adjusting the offset. LOAD is true if they
10991 are load instructions. MODE is the mode of memory operands.
10993 Given below consecutive stores:
10995 str w1, [xb, 0x100]
10996 str w1, [xb, 0x104]
10997 str w1, [xb, 0x108]
10998 str w1, [xb, 0x10c]
11000 Though the offsets are out of the range supported by stp, we can
11001 still pair them after adjusting the offset, like:
11003 add scratch, xb, 0x100
11004 stp w1, w1, [scratch]
11005 stp w1, w1, [scratch, 0x8]
11007 The peephole patterns detecting this opportunity should guarantee
11008 the scratch register is avaliable. */
11011 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
11012 enum machine_mode mode
)
11014 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
11015 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
11016 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
11017 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
11021 reg_1
= operands
[0];
11022 mem_1
= operands
[1];
11023 reg_2
= operands
[2];
11024 mem_2
= operands
[3];
11025 reg_3
= operands
[4];
11026 mem_3
= operands
[5];
11027 reg_4
= operands
[6];
11028 mem_4
= operands
[7];
11029 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
11030 && REG_P (reg_3
) && REG_P (reg_4
));
11031 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
11036 mem_1
= operands
[0];
11037 reg_1
= operands
[1];
11038 mem_2
= operands
[2];
11039 reg_2
= operands
[3];
11040 mem_3
= operands
[4];
11041 reg_3
= operands
[5];
11042 mem_4
= operands
[6];
11043 reg_4
= operands
[7];
11045 /* Skip if memory operand is by itslef valid for ldp/stp. */
11046 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
11049 /* The mems cannot be volatile. */
11050 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
11051 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
11054 /* Check if the addresses are in the form of [base+offset]. */
11055 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
11056 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
11058 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
11059 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
11061 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
11062 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
11064 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
11065 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
11068 /* Check if the bases are same. */
11069 if (!rtx_equal_p (base_1
, base_2
)
11070 || !rtx_equal_p (base_2
, base_3
)
11071 || !rtx_equal_p (base_3
, base_4
))
11074 offval_1
= INTVAL (offset_1
);
11075 offval_2
= INTVAL (offset_2
);
11076 offval_3
= INTVAL (offset_3
);
11077 offval_4
= INTVAL (offset_4
);
11078 msize
= GET_MODE_SIZE (mode
);
11079 /* Check if the offsets are consecutive. */
11080 if ((offval_1
!= (offval_2
+ msize
)
11081 || offval_1
!= (offval_3
+ msize
* 2)
11082 || offval_1
!= (offval_4
+ msize
* 3))
11083 && (offval_4
!= (offval_3
+ msize
)
11084 || offval_4
!= (offval_2
+ msize
* 2)
11085 || offval_4
!= (offval_1
+ msize
* 3)))
11088 /* Check if the addresses are clobbered by load. */
11091 if (reg_mentioned_p (reg_1
, mem_1
)
11092 || reg_mentioned_p (reg_2
, mem_2
)
11093 || reg_mentioned_p (reg_3
, mem_3
))
11096 /* In increasing order, the last load can clobber the address. */
11097 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
11101 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
11102 rclass_1
= FP_REGS
;
11104 rclass_1
= GENERAL_REGS
;
11106 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
11107 rclass_2
= FP_REGS
;
11109 rclass_2
= GENERAL_REGS
;
11111 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
11112 rclass_3
= FP_REGS
;
11114 rclass_3
= GENERAL_REGS
;
11116 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
11117 rclass_4
= FP_REGS
;
11119 rclass_4
= GENERAL_REGS
;
11121 /* Check if the registers are of same class. */
11122 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
11128 /* Given OPERANDS of consecutive load/store, this function pairs them
11129 into ldp/stp after adjusting the offset. It depends on the fact
11130 that addresses of load/store instructions are in increasing order.
11131 MODE is the mode of memory operands. CODE is the rtl operator
11132 which should be applied to all memory operands, it's SIGN_EXTEND,
11133 ZERO_EXTEND or UNKNOWN. */
11136 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
11137 enum machine_mode mode
, RTX_CODE code
)
11139 rtx base
, offset
, t1
, t2
;
11140 rtx mem_1
, mem_2
, mem_3
, mem_4
;
11141 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
11145 mem_1
= operands
[1];
11146 mem_2
= operands
[3];
11147 mem_3
= operands
[5];
11148 mem_4
= operands
[7];
11152 mem_1
= operands
[0];
11153 mem_2
= operands
[2];
11154 mem_3
= operands
[4];
11155 mem_4
= operands
[6];
11156 gcc_assert (code
== UNKNOWN
);
11159 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
11160 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
11162 /* Adjust offset thus it can fit in ldp/stp instruction. */
11163 msize
= GET_MODE_SIZE (mode
);
11164 stp_off_limit
= msize
* 0x40;
11165 off_val
= INTVAL (offset
);
11166 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
11167 new_off
= abs_off
% stp_off_limit
;
11168 adj_off
= abs_off
- new_off
;
11170 /* Further adjust to make sure all offsets are OK. */
11171 if ((new_off
+ msize
* 2) >= stp_off_limit
)
11173 adj_off
+= stp_off_limit
;
11174 new_off
-= stp_off_limit
;
11177 /* Make sure the adjustment can be done with ADD/SUB instructions. */
11178 if (adj_off
>= 0x1000)
11183 adj_off
= -adj_off
;
11184 new_off
= -new_off
;
11187 /* Create new memory references. */
11188 mem_1
= change_address (mem_1
, VOIDmode
,
11189 plus_constant (DImode
, operands
[8], new_off
));
11191 /* Check if the adjusted address is OK for ldp/stp. */
11192 if (!aarch64_mem_pair_operand (mem_1
, mode
))
11195 msize
= GET_MODE_SIZE (mode
);
11196 mem_2
= change_address (mem_2
, VOIDmode
,
11197 plus_constant (DImode
,
11200 mem_3
= change_address (mem_3
, VOIDmode
,
11201 plus_constant (DImode
,
11203 new_off
+ msize
* 2));
11204 mem_4
= change_address (mem_4
, VOIDmode
,
11205 plus_constant (DImode
,
11207 new_off
+ msize
* 3));
11209 if (code
== ZERO_EXTEND
)
11211 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
11212 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
11213 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
11214 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
11216 else if (code
== SIGN_EXTEND
)
11218 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
11219 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
11220 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
11221 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
11226 operands
[1] = mem_1
;
11227 operands
[3] = mem_2
;
11228 operands
[5] = mem_3
;
11229 operands
[7] = mem_4
;
11233 operands
[0] = mem_1
;
11234 operands
[2] = mem_2
;
11235 operands
[4] = mem_3
;
11236 operands
[6] = mem_4
;
11239 /* Emit adjusting instruction. */
11240 emit_insn (gen_rtx_SET (VOIDmode
, operands
[8],
11241 plus_constant (DImode
, base
, adj_off
)));
11242 /* Emit ldp/stp instructions. */
11243 t1
= gen_rtx_SET (VOIDmode
, operands
[0], operands
[1]);
11244 t2
= gen_rtx_SET (VOIDmode
, operands
[2], operands
[3]);
11245 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
11246 t1
= gen_rtx_SET (VOIDmode
, operands
[4], operands
[5]);
11247 t2
= gen_rtx_SET (VOIDmode
, operands
[6], operands
[7]);
11248 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
11252 #undef TARGET_ADDRESS_COST
11253 #define TARGET_ADDRESS_COST aarch64_address_cost
11255 /* This hook will determines whether unnamed bitfields affect the alignment
11256 of the containing structure. The hook returns true if the structure
11257 should inherit the alignment requirements of an unnamed bitfield's
11259 #undef TARGET_ALIGN_ANON_BITFIELD
11260 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11262 #undef TARGET_ASM_ALIGNED_DI_OP
11263 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11265 #undef TARGET_ASM_ALIGNED_HI_OP
11266 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11268 #undef TARGET_ASM_ALIGNED_SI_OP
11269 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11271 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11272 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11273 hook_bool_const_tree_hwi_hwi_const_tree_true
11275 #undef TARGET_ASM_FILE_START
11276 #define TARGET_ASM_FILE_START aarch64_start_file
11278 #undef TARGET_ASM_OUTPUT_MI_THUNK
11279 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11281 #undef TARGET_ASM_SELECT_RTX_SECTION
11282 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11284 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11285 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11287 #undef TARGET_BUILD_BUILTIN_VA_LIST
11288 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11290 #undef TARGET_CALLEE_COPIES
11291 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11293 #undef TARGET_CAN_ELIMINATE
11294 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11296 #undef TARGET_CANNOT_FORCE_CONST_MEM
11297 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11299 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11300 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11302 /* Only the least significant bit is used for initialization guard
11304 #undef TARGET_CXX_GUARD_MASK_BIT
11305 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11307 #undef TARGET_C_MODE_FOR_SUFFIX
11308 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11310 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11311 #undef TARGET_DEFAULT_TARGET_FLAGS
11312 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11315 #undef TARGET_CLASS_MAX_NREGS
11316 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11318 #undef TARGET_BUILTIN_DECL
11319 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11321 #undef TARGET_EXPAND_BUILTIN
11322 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11324 #undef TARGET_EXPAND_BUILTIN_VA_START
11325 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11327 #undef TARGET_FOLD_BUILTIN
11328 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11330 #undef TARGET_FUNCTION_ARG
11331 #define TARGET_FUNCTION_ARG aarch64_function_arg
11333 #undef TARGET_FUNCTION_ARG_ADVANCE
11334 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11336 #undef TARGET_FUNCTION_ARG_BOUNDARY
11337 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11339 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11340 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11342 #undef TARGET_FUNCTION_VALUE
11343 #define TARGET_FUNCTION_VALUE aarch64_function_value
11345 #undef TARGET_FUNCTION_VALUE_REGNO_P
11346 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11348 #undef TARGET_FRAME_POINTER_REQUIRED
11349 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11351 #undef TARGET_GIMPLE_FOLD_BUILTIN
11352 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11354 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11355 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11357 #undef TARGET_INIT_BUILTINS
11358 #define TARGET_INIT_BUILTINS aarch64_init_builtins
11360 #undef TARGET_LEGITIMATE_ADDRESS_P
11361 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
11363 #undef TARGET_LEGITIMATE_CONSTANT_P
11364 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
11366 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11367 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
11369 #undef TARGET_LRA_P
11370 #define TARGET_LRA_P hook_bool_void_true
11372 #undef TARGET_MANGLE_TYPE
11373 #define TARGET_MANGLE_TYPE aarch64_mangle_type
11375 #undef TARGET_MEMORY_MOVE_COST
11376 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
11378 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
11379 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
11381 #undef TARGET_MUST_PASS_IN_STACK
11382 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
11384 /* This target hook should return true if accesses to volatile bitfields
11385 should use the narrowest mode possible. It should return false if these
11386 accesses should use the bitfield container type. */
11387 #undef TARGET_NARROW_VOLATILE_BITFIELD
11388 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
11390 #undef TARGET_OPTION_OVERRIDE
11391 #define TARGET_OPTION_OVERRIDE aarch64_override_options
11393 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
11394 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
11395 aarch64_override_options_after_change
11397 #undef TARGET_PASS_BY_REFERENCE
11398 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
11400 #undef TARGET_PREFERRED_RELOAD_CLASS
11401 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
11403 #undef TARGET_SCHED_REASSOCIATION_WIDTH
11404 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
11406 #undef TARGET_SECONDARY_RELOAD
11407 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
11409 #undef TARGET_SHIFT_TRUNCATION_MASK
11410 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
11412 #undef TARGET_SETUP_INCOMING_VARARGS
11413 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
11415 #undef TARGET_STRUCT_VALUE_RTX
11416 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
11418 #undef TARGET_REGISTER_MOVE_COST
11419 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
11421 #undef TARGET_RETURN_IN_MEMORY
11422 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
11424 #undef TARGET_RETURN_IN_MSB
11425 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
11427 #undef TARGET_RTX_COSTS
11428 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
11430 #undef TARGET_SCHED_ISSUE_RATE
11431 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
11433 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11434 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
11435 aarch64_sched_first_cycle_multipass_dfa_lookahead
11437 #undef TARGET_TRAMPOLINE_INIT
11438 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
11440 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
11441 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
11443 #undef TARGET_VECTOR_MODE_SUPPORTED_P
11444 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
11446 #undef TARGET_ARRAY_MODE_SUPPORTED_P
11447 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
11449 #undef TARGET_VECTORIZE_ADD_STMT_COST
11450 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
11452 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
11453 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
11454 aarch64_builtin_vectorization_cost
11456 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
11457 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
11459 #undef TARGET_VECTORIZE_BUILTINS
11460 #define TARGET_VECTORIZE_BUILTINS
11462 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
11463 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
11464 aarch64_builtin_vectorized_function
11466 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
11467 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
11468 aarch64_autovectorize_vector_sizes
11470 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
11471 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
11472 aarch64_atomic_assign_expand_fenv
11474 /* Section anchor support. */
11476 #undef TARGET_MIN_ANCHOR_OFFSET
11477 #define TARGET_MIN_ANCHOR_OFFSET -256
11479 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
11480 byte offset; we can do much more for larger data types, but have no way
11481 to determine the size of the access. We assume accesses are aligned. */
11482 #undef TARGET_MAX_ANCHOR_OFFSET
11483 #define TARGET_MAX_ANCHOR_OFFSET 4095
11485 #undef TARGET_VECTOR_ALIGNMENT
11486 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
11488 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
11489 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
11490 aarch64_simd_vector_alignment_reachable
11492 /* vec_perm support. */
11494 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
11495 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
11496 aarch64_vectorize_vec_perm_const_ok
11499 #undef TARGET_FIXED_CONDITION_CODE_REGS
11500 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
11502 #undef TARGET_FLAGS_REGNUM
11503 #define TARGET_FLAGS_REGNUM CC_REGNUM
11505 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
11506 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
11508 #undef TARGET_ASAN_SHADOW_OFFSET
11509 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
11511 #undef TARGET_LEGITIMIZE_ADDRESS
11512 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
11514 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
11515 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
11516 aarch64_use_by_pieces_infrastructure_p
11518 #undef TARGET_CAN_USE_DOLOOP_P
11519 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
11521 #undef TARGET_SCHED_MACRO_FUSION_P
11522 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
11524 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
11525 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
11527 #undef TARGET_SCHED_FUSION_PRIORITY
11528 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
11530 struct gcc_target targetm
= TARGET_INITIALIZER
;
11532 #include "gt-aarch64.h"