1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "insn-codes.h"
30 #include "insn-attr.h"
32 #include "fold-const.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
42 #include "cfgcleanup.h"
45 #include "insn-config.h"
55 #include "targhooks.h"
58 #include "langhooks.h"
59 #include "diagnostic-core.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
67 #include "tree-vectorizer.h"
68 #include "aarch64-cost-tables.h"
72 #include "tm-constrs.h"
73 #include "sched-int.h"
74 #include "cortex-a57-fma-steering.h"
76 /* This file should be included last. */
77 #include "target-def.h"
79 /* Defined for convenience. */
80 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
82 /* Classifies an address.
85 A simple base register plus immediate offset.
88 A base register indexed by immediate offset with writeback.
91 A base register indexed by (optionally scaled) register.
94 A base register indexed by (optionally scaled) zero-extended register.
97 A base register indexed by (optionally scaled) sign-extended register.
100 A LO_SUM rtx with a base register and "LO12" symbol relocation.
103 A constant symbolic address, in pc-relative literal pool. */
105 enum aarch64_address_type
{
115 struct aarch64_address_info
{
116 enum aarch64_address_type type
;
120 enum aarch64_symbol_type symbol_type
;
123 struct simd_immediate_info
132 /* The current code model. */
133 enum aarch64_code_model aarch64_cmodel
;
136 #undef TARGET_HAVE_TLS
137 #define TARGET_HAVE_TLS 1
140 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
141 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
143 machine_mode
*, int *,
145 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
146 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
147 static void aarch64_override_options_after_change (void);
148 static bool aarch64_vector_mode_supported_p (machine_mode
);
149 static unsigned bit_count (unsigned HOST_WIDE_INT
);
150 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
151 const unsigned char *sel
);
152 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
154 /* Major revision number of the ARM Architecture implemented by the target. */
155 unsigned aarch64_architecture_version
;
157 /* The processor for which instructions should be scheduled. */
158 enum aarch64_processor aarch64_tune
= cortexa53
;
160 /* Mask to specify which instructions we are allowed to generate. */
161 unsigned long aarch64_isa_flags
= 0;
163 /* Mask to specify which instruction scheduling options should be used. */
164 unsigned long aarch64_tune_flags
= 0;
166 /* Support for command line parsing of boolean flags in the tuning
168 struct aarch64_flag_desc
174 #define AARCH64_FUSION_PAIR(name, internal_name, y) \
175 { name, AARCH64_FUSE_##internal_name },
176 static const struct aarch64_flag_desc aarch64_fusible_pairs
[] =
178 { "none", AARCH64_FUSE_NOTHING
},
179 #include "aarch64-fusion-pairs.def"
180 { "all", AARCH64_FUSE_ALL
},
181 { NULL
, AARCH64_FUSE_NOTHING
}
183 #undef AARCH64_FUION_PAIR
185 #define AARCH64_EXTRA_TUNING_OPTION(name, internal_name, y) \
186 { name, AARCH64_EXTRA_TUNE_##internal_name },
187 static const struct aarch64_flag_desc aarch64_tuning_flags
[] =
189 { "none", AARCH64_EXTRA_TUNE_NONE
},
190 #include "aarch64-tuning-flags.def"
191 { "all", AARCH64_EXTRA_TUNE_ALL
},
192 { NULL
, AARCH64_EXTRA_TUNE_NONE
}
194 #undef AARCH64_EXTRA_TUNING_OPTION
196 /* Tuning parameters. */
198 static const struct cpu_addrcost_table generic_addrcost_table
=
208 0, /* register_offset */
209 0, /* register_extend */
213 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
223 0, /* register_offset */
224 0, /* register_extend */
228 static const struct cpu_addrcost_table xgene1_addrcost_table
=
238 0, /* register_offset */
239 1, /* register_extend */
243 static const struct cpu_regmove_cost generic_regmove_cost
=
246 /* Avoid the use of slow int<->fp moves for spilling by setting
247 their cost higher than memmov_cost. */
253 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
256 /* Avoid the use of slow int<->fp moves for spilling by setting
257 their cost higher than memmov_cost. */
263 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
266 /* Avoid the use of slow int<->fp moves for spilling by setting
267 their cost higher than memmov_cost. */
273 static const struct cpu_regmove_cost thunderx_regmove_cost
=
281 static const struct cpu_regmove_cost xgene1_regmove_cost
=
284 /* Avoid the use of slow int<->fp moves for spilling by setting
285 their cost higher than memmov_cost. */
291 /* Generic costs for vector insn classes. */
292 static const struct cpu_vector_cost generic_vector_cost
=
294 1, /* scalar_stmt_cost */
295 1, /* scalar_load_cost */
296 1, /* scalar_store_cost */
297 1, /* vec_stmt_cost */
298 1, /* vec_to_scalar_cost */
299 1, /* scalar_to_vec_cost */
300 1, /* vec_align_load_cost */
301 1, /* vec_unalign_load_cost */
302 1, /* vec_unalign_store_cost */
303 1, /* vec_store_cost */
304 3, /* cond_taken_branch_cost */
305 1 /* cond_not_taken_branch_cost */
308 /* Generic costs for vector insn classes. */
309 static const struct cpu_vector_cost cortexa57_vector_cost
=
311 1, /* scalar_stmt_cost */
312 4, /* scalar_load_cost */
313 1, /* scalar_store_cost */
314 3, /* vec_stmt_cost */
315 8, /* vec_to_scalar_cost */
316 8, /* scalar_to_vec_cost */
317 5, /* vec_align_load_cost */
318 5, /* vec_unalign_load_cost */
319 1, /* vec_unalign_store_cost */
320 1, /* vec_store_cost */
321 1, /* cond_taken_branch_cost */
322 1 /* cond_not_taken_branch_cost */
325 /* Generic costs for vector insn classes. */
326 static const struct cpu_vector_cost xgene1_vector_cost
=
328 1, /* scalar_stmt_cost */
329 5, /* scalar_load_cost */
330 1, /* scalar_store_cost */
331 2, /* vec_stmt_cost */
332 4, /* vec_to_scalar_cost */
333 4, /* scalar_to_vec_cost */
334 10, /* vec_align_load_cost */
335 10, /* vec_unalign_load_cost */
336 2, /* vec_unalign_store_cost */
337 2, /* vec_store_cost */
338 2, /* cond_taken_branch_cost */
339 1 /* cond_not_taken_branch_cost */
342 /* Generic costs for branch instructions. */
343 static const struct cpu_branch_cost generic_branch_cost
=
345 2, /* Predictable. */
346 2 /* Unpredictable. */
349 static const struct tune_params generic_tunings
=
351 &cortexa57_extra_costs
,
352 &generic_addrcost_table
,
353 &generic_regmove_cost
,
354 &generic_vector_cost
,
355 &generic_branch_cost
,
358 AARCH64_FUSE_NOTHING
, /* fusible_ops */
359 8, /* function_align. */
362 2, /* int_reassoc_width. */
363 4, /* fp_reassoc_width. */
364 1, /* vec_reassoc_width. */
365 2, /* min_div_recip_mul_sf. */
366 2, /* min_div_recip_mul_df. */
367 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
370 static const struct tune_params cortexa53_tunings
=
372 &cortexa53_extra_costs
,
373 &generic_addrcost_table
,
374 &cortexa53_regmove_cost
,
375 &generic_vector_cost
,
376 &generic_branch_cost
,
379 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
380 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
381 8, /* function_align. */
384 2, /* int_reassoc_width. */
385 4, /* fp_reassoc_width. */
386 1, /* vec_reassoc_width. */
387 2, /* min_div_recip_mul_sf. */
388 2, /* min_div_recip_mul_df. */
389 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
392 static const struct tune_params cortexa57_tunings
=
394 &cortexa57_extra_costs
,
395 &cortexa57_addrcost_table
,
396 &cortexa57_regmove_cost
,
397 &cortexa57_vector_cost
,
398 &generic_branch_cost
,
401 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
402 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
403 16, /* function_align. */
406 2, /* int_reassoc_width. */
407 4, /* fp_reassoc_width. */
408 1, /* vec_reassoc_width. */
409 2, /* min_div_recip_mul_sf. */
410 2, /* min_div_recip_mul_df. */
411 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS
) /* tune_flags. */
414 static const struct tune_params cortexa72_tunings
=
416 &cortexa57_extra_costs
,
417 &cortexa57_addrcost_table
,
418 &cortexa57_regmove_cost
,
419 &cortexa57_vector_cost
,
420 &generic_branch_cost
,
423 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
424 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
425 16, /* function_align. */
428 2, /* int_reassoc_width. */
429 4, /* fp_reassoc_width. */
430 1, /* vec_reassoc_width. */
431 2, /* min_div_recip_mul_sf. */
432 2, /* min_div_recip_mul_df. */
433 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
436 static const struct tune_params thunderx_tunings
=
438 &thunderx_extra_costs
,
439 &generic_addrcost_table
,
440 &thunderx_regmove_cost
,
441 &generic_vector_cost
,
442 &generic_branch_cost
,
445 AARCH64_FUSE_CMP_BRANCH
, /* fusible_ops */
446 8, /* function_align. */
449 2, /* int_reassoc_width. */
450 4, /* fp_reassoc_width. */
451 1, /* vec_reassoc_width. */
452 2, /* min_div_recip_mul_sf. */
453 2, /* min_div_recip_mul_df. */
454 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
457 static const struct tune_params xgene1_tunings
=
460 &xgene1_addrcost_table
,
461 &xgene1_regmove_cost
,
463 &generic_branch_cost
,
466 AARCH64_FUSE_NOTHING
, /* fusible_ops */
467 16, /* function_align. */
469 16, /* loop_align. */
470 2, /* int_reassoc_width. */
471 4, /* fp_reassoc_width. */
472 1, /* vec_reassoc_width. */
473 2, /* min_div_recip_mul_sf. */
474 2, /* min_div_recip_mul_df. */
475 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
478 /* Support for fine-grained override of the tuning structures. */
479 struct aarch64_tuning_override_function
482 void (*parse_override
)(const char*, struct tune_params
*);
485 static void aarch64_parse_fuse_string (const char*, struct tune_params
*);
486 static void aarch64_parse_tune_string (const char*, struct tune_params
*);
488 static const struct aarch64_tuning_override_function
489 aarch64_tuning_override_functions
[] =
491 { "fuse", aarch64_parse_fuse_string
},
492 { "tune", aarch64_parse_tune_string
},
496 /* A processor implementing AArch64. */
499 const char *const name
;
500 enum aarch64_processor core
;
502 unsigned architecture_version
;
503 const unsigned long flags
;
504 const struct tune_params
*const tune
;
507 /* Processor cores implementing AArch64. */
508 static const struct processor all_cores
[] =
510 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
511 {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
512 #include "aarch64-cores.def"
514 {"generic", cortexa53
, "8", 8, AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
515 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
518 /* Architectures implementing AArch64. */
519 static const struct processor all_architectures
[] =
521 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
522 {NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
523 #include "aarch64-arches.def"
525 {NULL
, aarch64_none
, NULL
, 0, 0, NULL
}
528 /* Target specification. These are populated as commandline arguments
529 are processed, or NULL if not specified. */
530 static const struct processor
*selected_arch
;
531 static const struct processor
*selected_cpu
;
532 static const struct processor
*selected_tune
;
534 /* The current tuning set. */
535 struct tune_params aarch64_tune_params
= generic_tunings
;
537 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
539 /* An ISA extension in the co-processor and main instruction set space. */
540 struct aarch64_option_extension
542 const char *const name
;
543 const unsigned long flags_on
;
544 const unsigned long flags_off
;
547 /* ISA extensions in AArch64. */
548 static const struct aarch64_option_extension all_extensions
[] =
550 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
551 {NAME, FLAGS_ON, FLAGS_OFF},
552 #include "aarch64-option-extensions.def"
553 #undef AARCH64_OPT_EXTENSION
557 /* Used to track the size of an address when generating a pre/post
558 increment address. */
559 static machine_mode aarch64_memory_reference_mode
;
561 /* A table of valid AArch64 "bitmask immediate" values for
562 logical instructions. */
564 #define AARCH64_NUM_BITMASKS 5334
565 static unsigned HOST_WIDE_INT aarch64_bitmasks
[AARCH64_NUM_BITMASKS
];
567 typedef enum aarch64_cond_code
569 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
570 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
571 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
575 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
577 /* The condition codes of the processor, and the inverse function. */
578 static const char * const aarch64_condition_codes
[] =
580 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
581 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
585 aarch64_err_no_fpadvsimd (machine_mode mode
, const char *msg
)
587 const char *mc
= FLOAT_MODE_P (mode
) ? "floating-point" : "vector";
588 if (TARGET_GENERAL_REGS_ONLY
)
589 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc
, msg
);
591 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc
, msg
);
595 aarch64_min_divisions_for_recip_mul (enum machine_mode mode
)
597 if (GET_MODE_UNIT_SIZE (mode
) == 4)
598 return aarch64_tune_params
.min_div_recip_mul_sf
;
599 return aarch64_tune_params
.min_div_recip_mul_df
;
603 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
604 enum machine_mode mode
)
606 if (VECTOR_MODE_P (mode
))
607 return aarch64_tune_params
.vec_reassoc_width
;
608 if (INTEGRAL_MODE_P (mode
))
609 return aarch64_tune_params
.int_reassoc_width
;
610 if (FLOAT_MODE_P (mode
))
611 return aarch64_tune_params
.fp_reassoc_width
;
615 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
617 aarch64_dbx_register_number (unsigned regno
)
619 if (GP_REGNUM_P (regno
))
620 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
621 else if (regno
== SP_REGNUM
)
622 return AARCH64_DWARF_SP
;
623 else if (FP_REGNUM_P (regno
))
624 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
626 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
627 equivalent DWARF register. */
628 return DWARF_FRAME_REGISTERS
;
631 /* Return TRUE if MODE is any of the large INT modes. */
633 aarch64_vect_struct_mode_p (machine_mode mode
)
635 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
638 /* Return TRUE if MODE is any of the vector modes. */
640 aarch64_vector_mode_p (machine_mode mode
)
642 return aarch64_vector_mode_supported_p (mode
)
643 || aarch64_vect_struct_mode_p (mode
);
646 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
648 aarch64_array_mode_supported_p (machine_mode mode
,
649 unsigned HOST_WIDE_INT nelems
)
652 && AARCH64_VALID_SIMD_QREG_MODE (mode
)
653 && (nelems
>= 2 && nelems
<= 4))
659 /* Implement HARD_REGNO_NREGS. */
662 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
664 switch (aarch64_regno_regclass (regno
))
668 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
670 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
675 /* Implement HARD_REGNO_MODE_OK. */
678 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
680 if (GET_MODE_CLASS (mode
) == MODE_CC
)
681 return regno
== CC_REGNUM
;
683 if (regno
== SP_REGNUM
)
684 /* The purpose of comparing with ptr_mode is to support the
685 global register variable associated with the stack pointer
686 register via the syntax of asm ("wsp") in ILP32. */
687 return mode
== Pmode
|| mode
== ptr_mode
;
689 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
690 return mode
== Pmode
;
692 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
695 if (FP_REGNUM_P (regno
))
697 if (aarch64_vect_struct_mode_p (mode
))
699 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
707 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
709 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
712 /* Handle modes that fit within single registers. */
713 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
715 if (GET_MODE_SIZE (mode
) >= 4)
720 /* Fall back to generic for multi-reg and very large modes. */
722 return choose_hard_reg_mode (regno
, nregs
, false);
725 /* Return true if calls to DECL should be treated as
726 long-calls (ie called via a register). */
728 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
733 /* Return true if calls to symbol-ref SYM should be treated as
734 long-calls (ie called via a register). */
736 aarch64_is_long_call_p (rtx sym
)
738 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
741 /* Return true if the offsets to a zero/sign-extract operation
742 represent an expression that matches an extend operation. The
743 operands represent the paramters from
745 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
747 aarch64_is_extend_from_extract (machine_mode mode
, rtx mult_imm
,
750 HOST_WIDE_INT mult_val
, extract_val
;
752 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
755 mult_val
= INTVAL (mult_imm
);
756 extract_val
= INTVAL (extract_imm
);
759 && extract_val
< GET_MODE_BITSIZE (mode
)
760 && exact_log2 (extract_val
& ~7) > 0
761 && (extract_val
& 7) <= 4
762 && mult_val
== (1 << (extract_val
& 7)))
768 /* Emit an insn that's a simple single-set. Both the operands must be
769 known to be valid. */
771 emit_set_insn (rtx x
, rtx y
)
773 return emit_insn (gen_rtx_SET (x
, y
));
776 /* X and Y are two things to compare using CODE. Emit the compare insn and
777 return the rtx for register 0 in the proper mode. */
779 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
781 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
782 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
784 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
788 /* Build the SYMBOL_REF for __tls_get_addr. */
790 static GTY(()) rtx tls_get_addr_libfunc
;
793 aarch64_tls_get_addr (void)
795 if (!tls_get_addr_libfunc
)
796 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
797 return tls_get_addr_libfunc
;
800 /* Return the TLS model to use for ADDR. */
802 static enum tls_model
803 tls_symbolic_operand_type (rtx addr
)
805 enum tls_model tls_kind
= TLS_MODEL_NONE
;
808 if (GET_CODE (addr
) == CONST
)
810 split_const (addr
, &sym
, &addend
);
811 if (GET_CODE (sym
) == SYMBOL_REF
)
812 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
814 else if (GET_CODE (addr
) == SYMBOL_REF
)
815 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
820 /* We'll allow lo_sum's in addresses in our legitimate addresses
821 so that combine would take care of combining addresses where
822 necessary, but for generation purposes, we'll generate the address
825 tmp = hi (symbol_ref); adrp x1, foo
826 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
830 adrp x1, :got:foo adrp tmp, :tlsgd:foo
831 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
835 Load TLS symbol, depending on TLS mechanism and TLS access model.
837 Global Dynamic - Traditional TLS:
839 add dest, tmp, #:tlsgd_lo12:imm
842 Global Dynamic - TLS Descriptors:
843 adrp dest, :tlsdesc:imm
844 ldr tmp, [dest, #:tlsdesc_lo12:imm]
845 add dest, dest, #:tlsdesc_lo12:imm
852 adrp tmp, :gottprel:imm
853 ldr dest, [tmp, #:gottprel_lo12:imm]
858 add t0, tp, #:tprel_hi12:imm, lsl #12
859 add t0, t0, #:tprel_lo12_nc:imm
863 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
864 enum aarch64_symbol_type type
)
868 case SYMBOL_SMALL_ABSOLUTE
:
870 /* In ILP32, the mode of dest can be either SImode or DImode. */
872 machine_mode mode
= GET_MODE (dest
);
874 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
876 if (can_create_pseudo_p ())
877 tmp_reg
= gen_reg_rtx (mode
);
879 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
880 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
884 case SYMBOL_TINY_ABSOLUTE
:
885 emit_insn (gen_rtx_SET (dest
, imm
));
888 case SYMBOL_SMALL_GOT_28K
:
890 machine_mode mode
= GET_MODE (dest
);
891 rtx gp_rtx
= pic_offset_table_rtx
;
893 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
894 here before rtl expand. Tree IVOPT will generate rtl pattern to
895 decide rtx costs, in which case pic_offset_table_rtx is not
896 initialized. For that case no need to generate the first adrp
897 instruction as the the final cost for global variable access is
901 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
902 using the page base as GOT base, the first page may be wasted,
903 in the worst scenario, there is only 28K space for GOT).
905 The generate instruction sequence for accessing global variable
908 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
910 Only one instruction needed. But we must initialize
911 pic_offset_table_rtx properly. We generate initialize insn for
912 every global access, and allow CSE to remove all redundant.
914 The final instruction sequences will look like the following
915 for multiply global variables access.
917 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
919 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
920 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
921 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
924 rtx s
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
925 crtl
->uses_pic_offset_table
= 1;
926 emit_move_insn (gp_rtx
, gen_rtx_HIGH (Pmode
, s
));
928 if (mode
!= GET_MODE (gp_rtx
))
929 gp_rtx
= simplify_gen_subreg (mode
, gp_rtx
, GET_MODE (gp_rtx
), 0);
932 if (mode
== ptr_mode
)
935 emit_insn (gen_ldr_got_small_28k_di (dest
, gp_rtx
, imm
));
937 emit_insn (gen_ldr_got_small_28k_si (dest
, gp_rtx
, imm
));
941 gcc_assert (mode
== Pmode
);
942 emit_insn (gen_ldr_got_small_28k_sidi (dest
, gp_rtx
, imm
));
948 case SYMBOL_SMALL_GOT_4G
:
950 /* In ILP32, the mode of dest can be either SImode or DImode,
951 while the got entry is always of SImode size. The mode of
952 dest depends on how dest is used: if dest is assigned to a
953 pointer (e.g. in the memory), it has SImode; it may have
954 DImode if dest is dereferenced to access the memeory.
955 This is why we have to handle three different ldr_got_small
956 patterns here (two patterns for ILP32). */
958 machine_mode mode
= GET_MODE (dest
);
960 if (can_create_pseudo_p ())
961 tmp_reg
= gen_reg_rtx (mode
);
963 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
964 if (mode
== ptr_mode
)
967 emit_insn (gen_ldr_got_small_di (dest
, tmp_reg
, imm
));
969 emit_insn (gen_ldr_got_small_si (dest
, tmp_reg
, imm
));
973 gcc_assert (mode
== Pmode
);
974 emit_insn (gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
));
980 case SYMBOL_SMALL_TLSGD
:
983 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
986 aarch64_emit_call_insn (gen_tlsgd_small (result
, imm
));
987 insns
= get_insns ();
990 RTL_CONST_CALL_P (insns
) = 1;
991 emit_libcall_block (insns
, dest
, result
, imm
);
995 case SYMBOL_SMALL_TLSDESC
:
997 machine_mode mode
= GET_MODE (dest
);
998 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
1001 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
1003 /* In ILP32, the got entry is always of SImode size. Unlike
1004 small GOT, the dest is fixed at reg 0. */
1006 emit_insn (gen_tlsdesc_small_si (imm
));
1008 emit_insn (gen_tlsdesc_small_di (imm
));
1009 tp
= aarch64_load_tp (NULL
);
1012 tp
= gen_lowpart (mode
, tp
);
1014 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
1015 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1019 case SYMBOL_SMALL_GOTTPREL
:
1021 /* In ILP32, the mode of dest can be either SImode or DImode,
1022 while the got entry is always of SImode size. The mode of
1023 dest depends on how dest is used: if dest is assigned to a
1024 pointer (e.g. in the memory), it has SImode; it may have
1025 DImode if dest is dereferenced to access the memeory.
1026 This is why we have to handle three different tlsie_small
1027 patterns here (two patterns for ILP32). */
1028 machine_mode mode
= GET_MODE (dest
);
1029 rtx tmp_reg
= gen_reg_rtx (mode
);
1030 rtx tp
= aarch64_load_tp (NULL
);
1032 if (mode
== ptr_mode
)
1035 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
1038 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
1039 tp
= gen_lowpart (mode
, tp
);
1044 gcc_assert (mode
== Pmode
);
1045 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
1048 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
1049 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1055 rtx tp
= aarch64_load_tp (NULL
);
1057 if (GET_MODE (dest
) != Pmode
)
1058 tp
= gen_lowpart (GET_MODE (dest
), tp
);
1060 emit_insn (gen_tlsle (dest
, tp
, imm
));
1061 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1065 case SYMBOL_TINY_GOT
:
1066 emit_insn (gen_ldr_got_tiny (dest
, imm
));
1074 /* Emit a move from SRC to DEST. Assume that the move expanders can
1075 handle all moves if !can_create_pseudo_p (). The distinction is
1076 important because, unlike emit_move_insn, the move expanders know
1077 how to force Pmode objects into the constant pool even when the
1078 constant pool address is not itself legitimate. */
1080 aarch64_emit_move (rtx dest
, rtx src
)
1082 return (can_create_pseudo_p ()
1083 ? emit_move_insn (dest
, src
)
1084 : emit_move_insn_1 (dest
, src
));
1087 /* Split a 128-bit move operation into two 64-bit move operations,
1088 taking care to handle partial overlap of register to register
1089 copies. Special cases are needed when moving between GP regs and
1090 FP regs. SRC can be a register, constant or memory; DST a register
1091 or memory. If either operand is memory it must not have any side
1094 aarch64_split_128bit_move (rtx dst
, rtx src
)
1099 machine_mode mode
= GET_MODE (dst
);
1101 gcc_assert (mode
== TImode
|| mode
== TFmode
);
1102 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
1103 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
1105 if (REG_P (dst
) && REG_P (src
))
1107 int src_regno
= REGNO (src
);
1108 int dst_regno
= REGNO (dst
);
1110 /* Handle FP <-> GP regs. */
1111 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
1113 src_lo
= gen_lowpart (word_mode
, src
);
1114 src_hi
= gen_highpart (word_mode
, src
);
1118 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
1119 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
1123 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
1124 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
1128 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
1130 dst_lo
= gen_lowpart (word_mode
, dst
);
1131 dst_hi
= gen_highpart (word_mode
, dst
);
1135 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
1136 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
1140 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
1141 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
1147 dst_lo
= gen_lowpart (word_mode
, dst
);
1148 dst_hi
= gen_highpart (word_mode
, dst
);
1149 src_lo
= gen_lowpart (word_mode
, src
);
1150 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
1152 /* At most one pairing may overlap. */
1153 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
1155 aarch64_emit_move (dst_hi
, src_hi
);
1156 aarch64_emit_move (dst_lo
, src_lo
);
1160 aarch64_emit_move (dst_lo
, src_lo
);
1161 aarch64_emit_move (dst_hi
, src_hi
);
1166 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
1168 return (! REG_P (src
)
1169 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
1172 /* Split a complex SIMD combine. */
1175 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
1177 machine_mode src_mode
= GET_MODE (src1
);
1178 machine_mode dst_mode
= GET_MODE (dst
);
1180 gcc_assert (VECTOR_MODE_P (dst_mode
));
1182 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
1184 rtx (*gen
) (rtx
, rtx
, rtx
);
1189 gen
= gen_aarch64_simd_combinev8qi
;
1192 gen
= gen_aarch64_simd_combinev4hi
;
1195 gen
= gen_aarch64_simd_combinev2si
;
1198 gen
= gen_aarch64_simd_combinev2sf
;
1201 gen
= gen_aarch64_simd_combinedi
;
1204 gen
= gen_aarch64_simd_combinedf
;
1210 emit_insn (gen (dst
, src1
, src2
));
1215 /* Split a complex SIMD move. */
1218 aarch64_split_simd_move (rtx dst
, rtx src
)
1220 machine_mode src_mode
= GET_MODE (src
);
1221 machine_mode dst_mode
= GET_MODE (dst
);
1223 gcc_assert (VECTOR_MODE_P (dst_mode
));
1225 if (REG_P (dst
) && REG_P (src
))
1227 rtx (*gen
) (rtx
, rtx
);
1229 gcc_assert (VECTOR_MODE_P (src_mode
));
1234 gen
= gen_aarch64_split_simd_movv16qi
;
1237 gen
= gen_aarch64_split_simd_movv8hi
;
1240 gen
= gen_aarch64_split_simd_movv4si
;
1243 gen
= gen_aarch64_split_simd_movv2di
;
1246 gen
= gen_aarch64_split_simd_movv4sf
;
1249 gen
= gen_aarch64_split_simd_movv2df
;
1255 emit_insn (gen (dst
, src
));
1261 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1263 if (can_create_pseudo_p ())
1264 return force_reg (mode
, value
);
1267 x
= aarch64_emit_move (x
, value
);
1274 aarch64_add_offset (machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
1276 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1279 /* Load the full offset into a register. This
1280 might be improvable in the future. */
1281 high
= GEN_INT (offset
);
1283 high
= aarch64_force_temporary (mode
, temp
, high
);
1284 reg
= aarch64_force_temporary (mode
, temp
,
1285 gen_rtx_PLUS (mode
, high
, reg
));
1287 return plus_constant (mode
, reg
, offset
);
1291 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1294 unsigned HOST_WIDE_INT mask
;
1297 unsigned HOST_WIDE_INT val
;
1300 int one_match
, zero_match
, first_not_ffff_match
;
1303 if (CONST_INT_P (imm
) && aarch64_move_imm (INTVAL (imm
), mode
))
1306 emit_insn (gen_rtx_SET (dest
, imm
));
1313 /* We know we can't do this in 1 insn, and we must be able to do it
1314 in two; so don't mess around looking for sequences that don't buy
1318 emit_insn (gen_rtx_SET (dest
, GEN_INT (INTVAL (imm
) & 0xffff)));
1319 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1320 GEN_INT ((INTVAL (imm
) >> 16) & 0xffff)));
1326 /* Remaining cases are all for DImode. */
1329 subtargets
= optimize
&& can_create_pseudo_p ();
1334 first_not_ffff_match
= -1;
1336 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1338 if ((val
& mask
) == mask
)
1342 if (first_not_ffff_match
< 0)
1343 first_not_ffff_match
= i
;
1344 if ((val
& mask
) == 0)
1351 /* Set one of the quarters and then insert back into result. */
1352 mask
= 0xffffll
<< first_not_ffff_match
;
1355 emit_insn (gen_rtx_SET (dest
, GEN_INT (val
| mask
)));
1356 emit_insn (gen_insv_immdi (dest
, GEN_INT (first_not_ffff_match
),
1357 GEN_INT ((val
>> first_not_ffff_match
)
1364 if (zero_match
== 2)
1365 goto simple_sequence
;
1367 mask
= 0x0ffff0000UL
;
1368 for (i
= 16; i
< 64; i
+= 16, mask
<<= 16)
1370 HOST_WIDE_INT comp
= mask
& ~(mask
- 1);
1372 if (aarch64_uimm12_shift (val
- (val
& mask
)))
1376 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1377 emit_insn (gen_rtx_SET (subtarget
, GEN_INT (val
& mask
)));
1378 emit_insn (gen_adddi3 (dest
, subtarget
,
1379 GEN_INT (val
- (val
& mask
))));
1384 else if (aarch64_uimm12_shift (-(val
- ((val
+ comp
) & mask
))))
1388 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1389 emit_insn (gen_rtx_SET (subtarget
,
1390 GEN_INT ((val
+ comp
) & mask
)));
1391 emit_insn (gen_adddi3 (dest
, subtarget
,
1392 GEN_INT (val
- ((val
+ comp
) & mask
))));
1397 else if (aarch64_uimm12_shift (val
- ((val
- comp
) | ~mask
)))
1401 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1402 emit_insn (gen_rtx_SET (subtarget
,
1403 GEN_INT ((val
- comp
) | ~mask
)));
1404 emit_insn (gen_adddi3 (dest
, subtarget
,
1405 GEN_INT (val
- ((val
- comp
) | ~mask
))));
1410 else if (aarch64_uimm12_shift (-(val
- (val
| ~mask
))))
1414 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1415 emit_insn (gen_rtx_SET (subtarget
, GEN_INT (val
| ~mask
)));
1416 emit_insn (gen_adddi3 (dest
, subtarget
,
1417 GEN_INT (val
- (val
| ~mask
))));
1424 /* See if we can do it by arithmetically combining two
1426 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1431 if (aarch64_uimm12_shift (val
- aarch64_bitmasks
[i
])
1432 || aarch64_uimm12_shift (-val
+ aarch64_bitmasks
[i
]))
1436 subtarget
= subtargets
? gen_reg_rtx (DImode
) : dest
;
1437 emit_insn (gen_rtx_SET (subtarget
,
1438 GEN_INT (aarch64_bitmasks
[i
])));
1439 emit_insn (gen_adddi3 (dest
, subtarget
,
1440 GEN_INT (val
- aarch64_bitmasks
[i
])));
1446 for (j
= 0; j
< 64; j
+= 16, mask
<<= 16)
1448 if ((aarch64_bitmasks
[i
] & ~mask
) == (val
& ~mask
))
1452 emit_insn (gen_rtx_SET (dest
,
1453 GEN_INT (aarch64_bitmasks
[i
])));
1454 emit_insn (gen_insv_immdi (dest
, GEN_INT (j
),
1455 GEN_INT ((val
>> j
) & 0xffff)));
1463 /* See if we can do it by logically combining two immediates. */
1464 for (i
= 0; i
< AARCH64_NUM_BITMASKS
; i
++)
1466 if ((aarch64_bitmasks
[i
] & val
) == aarch64_bitmasks
[i
])
1470 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1471 if (val
== (aarch64_bitmasks
[i
] | aarch64_bitmasks
[j
]))
1475 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1476 emit_insn (gen_rtx_SET (subtarget
,
1477 GEN_INT (aarch64_bitmasks
[i
])));
1478 emit_insn (gen_iordi3 (dest
, subtarget
,
1479 GEN_INT (aarch64_bitmasks
[j
])));
1485 else if ((val
& aarch64_bitmasks
[i
]) == val
)
1489 for (j
= i
+ 1; j
< AARCH64_NUM_BITMASKS
; j
++)
1490 if (val
== (aarch64_bitmasks
[j
] & aarch64_bitmasks
[i
]))
1494 subtarget
= subtargets
? gen_reg_rtx (mode
) : dest
;
1495 emit_insn (gen_rtx_SET (subtarget
,
1496 GEN_INT (aarch64_bitmasks
[j
])));
1497 emit_insn (gen_anddi3 (dest
, subtarget
,
1498 GEN_INT (aarch64_bitmasks
[i
])));
1506 if (one_match
> zero_match
)
1508 /* Set either first three quarters or all but the third. */
1509 mask
= 0xffffll
<< (16 - first_not_ffff_match
);
1511 emit_insn (gen_rtx_SET (dest
,
1512 GEN_INT (val
| mask
| 0xffffffff00000000ull
)));
1515 /* Now insert other two quarters. */
1516 for (i
= first_not_ffff_match
+ 16, mask
<<= (first_not_ffff_match
<< 1);
1517 i
< 64; i
+= 16, mask
<<= 16)
1519 if ((val
& mask
) != mask
)
1522 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1523 GEN_INT ((val
>> i
) & 0xffff)));
1533 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1535 if ((val
& mask
) != 0)
1540 emit_insn (gen_rtx_SET (dest
, GEN_INT (val
& mask
)));
1547 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1548 GEN_INT ((val
>> i
) & 0xffff)));
1559 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1561 machine_mode mode
= GET_MODE (dest
);
1563 gcc_assert (mode
== SImode
|| mode
== DImode
);
1565 /* Check on what type of symbol it is. */
1566 if (GET_CODE (imm
) == SYMBOL_REF
1567 || GET_CODE (imm
) == LABEL_REF
1568 || GET_CODE (imm
) == CONST
)
1570 rtx mem
, base
, offset
;
1571 enum aarch64_symbol_type sty
;
1573 /* If we have (const (plus symbol offset)), separate out the offset
1574 before we start classifying the symbol. */
1575 split_const (imm
, &base
, &offset
);
1577 sty
= aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
);
1580 case SYMBOL_FORCE_TO_MEM
:
1581 if (offset
!= const0_rtx
1582 && targetm
.cannot_force_const_mem (mode
, imm
))
1584 gcc_assert (can_create_pseudo_p ());
1585 base
= aarch64_force_temporary (mode
, dest
, base
);
1586 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1587 aarch64_emit_move (dest
, base
);
1590 mem
= force_const_mem (ptr_mode
, imm
);
1592 if (mode
!= ptr_mode
)
1593 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1594 emit_insn (gen_rtx_SET (dest
, mem
));
1597 case SYMBOL_SMALL_TLSGD
:
1598 case SYMBOL_SMALL_TLSDESC
:
1599 case SYMBOL_SMALL_GOTTPREL
:
1600 case SYMBOL_SMALL_GOT_28K
:
1601 case SYMBOL_SMALL_GOT_4G
:
1602 case SYMBOL_TINY_GOT
:
1603 if (offset
!= const0_rtx
)
1605 gcc_assert(can_create_pseudo_p ());
1606 base
= aarch64_force_temporary (mode
, dest
, base
);
1607 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1608 aarch64_emit_move (dest
, base
);
1613 case SYMBOL_SMALL_ABSOLUTE
:
1614 case SYMBOL_TINY_ABSOLUTE
:
1616 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1624 if (!CONST_INT_P (imm
))
1626 if (GET_CODE (imm
) == HIGH
)
1627 emit_insn (gen_rtx_SET (dest
, imm
));
1630 rtx mem
= force_const_mem (mode
, imm
);
1632 emit_insn (gen_rtx_SET (dest
, mem
));
1638 aarch64_internal_mov_immediate (dest
, imm
, true, GET_MODE (dest
));
1642 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
1643 tree exp ATTRIBUTE_UNUSED
)
1645 /* Currently, always true. */
1649 /* Implement TARGET_PASS_BY_REFERENCE. */
1652 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1655 bool named ATTRIBUTE_UNUSED
)
1658 machine_mode dummymode
;
1661 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1662 size
= (mode
== BLKmode
&& type
)
1663 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1665 /* Aggregates are passed by reference based on their size. */
1666 if (type
&& AGGREGATE_TYPE_P (type
))
1668 size
= int_size_in_bytes (type
);
1671 /* Variable sized arguments are always returned by reference. */
1675 /* Can this be a candidate to be passed in fp/simd register(s)? */
1676 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1681 /* Arguments which are variable sized or larger than 2 registers are
1682 passed by reference unless they are a homogenous floating point
1684 return size
> 2 * UNITS_PER_WORD
;
1687 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1689 aarch64_return_in_msb (const_tree valtype
)
1691 machine_mode dummy_mode
;
1694 /* Never happens in little-endian mode. */
1695 if (!BYTES_BIG_ENDIAN
)
1698 /* Only composite types smaller than or equal to 16 bytes can
1699 be potentially returned in registers. */
1700 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1701 || int_size_in_bytes (valtype
) <= 0
1702 || int_size_in_bytes (valtype
) > 16)
1705 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1706 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1707 is always passed/returned in the least significant bits of fp/simd
1709 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1710 &dummy_mode
, &dummy_int
, NULL
))
1716 /* Implement TARGET_FUNCTION_VALUE.
1717 Define how to find the value returned by a function. */
1720 aarch64_function_value (const_tree type
, const_tree func
,
1721 bool outgoing ATTRIBUTE_UNUSED
)
1726 machine_mode ag_mode
;
1728 mode
= TYPE_MODE (type
);
1729 if (INTEGRAL_TYPE_P (type
))
1730 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1732 if (aarch64_return_in_msb (type
))
1734 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1736 if (size
% UNITS_PER_WORD
!= 0)
1738 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1739 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1743 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1744 &ag_mode
, &count
, NULL
))
1746 if (!aarch64_composite_type_p (type
, mode
))
1748 gcc_assert (count
== 1 && mode
== ag_mode
);
1749 return gen_rtx_REG (mode
, V0_REGNUM
);
1756 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1757 for (i
= 0; i
< count
; i
++)
1759 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1760 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1761 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1762 XVECEXP (par
, 0, i
) = tmp
;
1768 return gen_rtx_REG (mode
, R0_REGNUM
);
1771 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1772 Return true if REGNO is the number of a hard register in which the values
1773 of called function may come back. */
1776 aarch64_function_value_regno_p (const unsigned int regno
)
1778 /* Maximum of 16 bytes can be returned in the general registers. Examples
1779 of 16-byte return values are: 128-bit integers and 16-byte small
1780 structures (excluding homogeneous floating-point aggregates). */
1781 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1784 /* Up to four fp/simd registers can return a function value, e.g. a
1785 homogeneous floating-point aggregate having four members. */
1786 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1787 return TARGET_FLOAT
;
1792 /* Implement TARGET_RETURN_IN_MEMORY.
1794 If the type T of the result of a function is such that
1796 would require that arg be passed as a value in a register (or set of
1797 registers) according to the parameter passing rules, then the result
1798 is returned in the same registers as would be used for such an
1802 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1805 machine_mode ag_mode
;
1808 if (!AGGREGATE_TYPE_P (type
)
1809 && TREE_CODE (type
) != COMPLEX_TYPE
1810 && TREE_CODE (type
) != VECTOR_TYPE
)
1811 /* Simple scalar types always returned in registers. */
1814 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1821 /* Types larger than 2 registers returned in memory. */
1822 size
= int_size_in_bytes (type
);
1823 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1827 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
1828 const_tree type
, int *nregs
)
1830 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1831 return aarch64_vfp_is_call_or_return_candidate (mode
,
1833 &pcum
->aapcs_vfp_rmode
,
1838 /* Given MODE and TYPE of a function argument, return the alignment in
1839 bits. The idea is to suppress any stronger alignment requested by
1840 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1841 This is a helper function for local use only. */
1844 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
1846 unsigned int alignment
;
1850 if (!integer_zerop (TYPE_SIZE (type
)))
1852 if (TYPE_MODE (type
) == mode
)
1853 alignment
= TYPE_ALIGN (type
);
1855 alignment
= GET_MODE_ALIGNMENT (mode
);
1861 alignment
= GET_MODE_ALIGNMENT (mode
);
1866 /* Layout a function argument according to the AAPCS64 rules. The rule
1867 numbers refer to the rule numbers in the AAPCS64. */
1870 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1872 bool named ATTRIBUTE_UNUSED
)
1874 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1875 int ncrn
, nvrn
, nregs
;
1876 bool allocate_ncrn
, allocate_nvrn
;
1879 /* We need to do this once per argument. */
1880 if (pcum
->aapcs_arg_processed
)
1883 pcum
->aapcs_arg_processed
= true;
1885 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1887 = AARCH64_ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1890 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1891 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1896 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1897 The following code thus handles passing by SIMD/FP registers first. */
1899 nvrn
= pcum
->aapcs_nvrn
;
1901 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1902 and homogenous short-vector aggregates (HVA). */
1906 aarch64_err_no_fpadvsimd (mode
, "argument");
1908 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1910 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1911 if (!aarch64_composite_type_p (type
, mode
))
1913 gcc_assert (nregs
== 1);
1914 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1920 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1921 for (i
= 0; i
< nregs
; i
++)
1923 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1924 V0_REGNUM
+ nvrn
+ i
);
1925 tmp
= gen_rtx_EXPR_LIST
1927 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1928 XVECEXP (par
, 0, i
) = tmp
;
1930 pcum
->aapcs_reg
= par
;
1936 /* C.3 NSRN is set to 8. */
1937 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1942 ncrn
= pcum
->aapcs_ncrn
;
1943 nregs
= size
/ UNITS_PER_WORD
;
1945 /* C6 - C9. though the sign and zero extension semantics are
1946 handled elsewhere. This is the case where the argument fits
1947 entirely general registers. */
1948 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1950 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1952 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1954 /* C.8 if the argument has an alignment of 16 then the NGRN is
1955 rounded up to the next even number. */
1956 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1959 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1961 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1962 A reg is still generated for it, but the caller should be smart
1963 enough not to use it. */
1964 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1966 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1973 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1974 for (i
= 0; i
< nregs
; i
++)
1976 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1977 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1978 GEN_INT (i
* UNITS_PER_WORD
));
1979 XVECEXP (par
, 0, i
) = tmp
;
1981 pcum
->aapcs_reg
= par
;
1984 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1989 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1991 /* The argument is passed on stack; record the needed number of words for
1992 this argument and align the total size if necessary. */
1994 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1995 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1996 pcum
->aapcs_stack_size
= AARCH64_ROUND_UP (pcum
->aapcs_stack_size
,
1997 16 / UNITS_PER_WORD
);
2001 /* Implement TARGET_FUNCTION_ARG. */
2004 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
2005 const_tree type
, bool named
)
2007 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2008 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
2010 if (mode
== VOIDmode
)
2013 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
2014 return pcum
->aapcs_reg
;
2018 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
2019 const_tree fntype ATTRIBUTE_UNUSED
,
2020 rtx libname ATTRIBUTE_UNUSED
,
2021 const_tree fndecl ATTRIBUTE_UNUSED
,
2022 unsigned n_named ATTRIBUTE_UNUSED
)
2024 pcum
->aapcs_ncrn
= 0;
2025 pcum
->aapcs_nvrn
= 0;
2026 pcum
->aapcs_nextncrn
= 0;
2027 pcum
->aapcs_nextnvrn
= 0;
2028 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
2029 pcum
->aapcs_reg
= NULL_RTX
;
2030 pcum
->aapcs_arg_processed
= false;
2031 pcum
->aapcs_stack_words
= 0;
2032 pcum
->aapcs_stack_size
= 0;
2035 && fndecl
&& TREE_PUBLIC (fndecl
)
2036 && fntype
&& fntype
!= error_mark_node
)
2038 const_tree type
= TREE_TYPE (fntype
);
2039 machine_mode mode ATTRIBUTE_UNUSED
; /* To pass pointer as argument. */
2040 int nregs ATTRIBUTE_UNUSED
; /* Likewise. */
2041 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
), type
,
2042 &mode
, &nregs
, NULL
))
2043 aarch64_err_no_fpadvsimd (TYPE_MODE (type
), "return type");
2049 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
2054 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2055 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
2057 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
2058 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
2059 != (pcum
->aapcs_stack_words
!= 0));
2060 pcum
->aapcs_arg_processed
= false;
2061 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
2062 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
2063 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
2064 pcum
->aapcs_stack_words
= 0;
2065 pcum
->aapcs_reg
= NULL_RTX
;
2070 aarch64_function_arg_regno_p (unsigned regno
)
2072 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
2073 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
2076 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2077 PARM_BOUNDARY bits of alignment, but will be given anything up
2078 to STACK_BOUNDARY bits if the type requires it. This makes sure
2079 that both before and after the layout of each argument, the Next
2080 Stacked Argument Address (NSAA) will have a minimum alignment of
2084 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
2086 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
2088 if (alignment
< PARM_BOUNDARY
)
2089 alignment
= PARM_BOUNDARY
;
2090 if (alignment
> STACK_BOUNDARY
)
2091 alignment
= STACK_BOUNDARY
;
2095 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2097 Return true if an argument passed on the stack should be padded upwards,
2098 i.e. if the least-significant byte of the stack slot has useful data.
2100 Small aggregate types are placed in the lowest memory address.
2102 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2105 aarch64_pad_arg_upward (machine_mode mode
, const_tree type
)
2107 /* On little-endian targets, the least significant byte of every stack
2108 argument is passed at the lowest byte address of the stack slot. */
2109 if (!BYTES_BIG_ENDIAN
)
2112 /* Otherwise, integral, floating-point and pointer types are padded downward:
2113 the least significant byte of a stack argument is passed at the highest
2114 byte address of the stack slot. */
2116 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
2117 || POINTER_TYPE_P (type
))
2118 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
2121 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2125 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2127 It specifies padding for the last (may also be the only)
2128 element of a block move between registers and memory. If
2129 assuming the block is in the memory, padding upward means that
2130 the last element is padded after its highest significant byte,
2131 while in downward padding, the last element is padded at the
2132 its least significant byte side.
2134 Small aggregates and small complex types are always padded
2137 We don't need to worry about homogeneous floating-point or
2138 short-vector aggregates; their move is not affected by the
2139 padding direction determined here. Regardless of endianness,
2140 each element of such an aggregate is put in the least
2141 significant bits of a fp/simd register.
2143 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2144 register has useful data, and return the opposite if the most
2145 significant byte does. */
2148 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
2149 bool first ATTRIBUTE_UNUSED
)
2152 /* Small composite types are always padded upward. */
2153 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
2155 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
2156 : GET_MODE_SIZE (mode
));
2157 if (size
< 2 * UNITS_PER_WORD
)
2161 /* Otherwise, use the default padding. */
2162 return !BYTES_BIG_ENDIAN
;
2166 aarch64_libgcc_cmp_return_mode (void)
2172 aarch64_frame_pointer_required (void)
2174 /* In aarch64_override_options_after_change
2175 flag_omit_leaf_frame_pointer turns off the frame pointer by
2176 default. Turn it back on now if we've not got a leaf
2178 if (flag_omit_leaf_frame_pointer
2179 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
2185 /* Mark the registers that need to be saved by the callee and calculate
2186 the size of the callee-saved registers area and frame record (both FP
2187 and LR may be omitted). */
2189 aarch64_layout_frame (void)
2191 HOST_WIDE_INT offset
= 0;
2194 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2197 #define SLOT_NOT_REQUIRED (-2)
2198 #define SLOT_REQUIRED (-1)
2200 cfun
->machine
->frame
.wb_candidate1
= FIRST_PSEUDO_REGISTER
;
2201 cfun
->machine
->frame
.wb_candidate2
= FIRST_PSEUDO_REGISTER
;
2203 /* First mark all the registers that really need to be saved... */
2204 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2205 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2207 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2208 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2210 /* ... that includes the eh data registers (if needed)... */
2211 if (crtl
->calls_eh_return
)
2212 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2213 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2216 /* ... and any callee saved register that dataflow says is live. */
2217 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2218 if (df_regs_ever_live_p (regno
)
2219 && (regno
== R30_REGNUM
2220 || !call_used_regs
[regno
]))
2221 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2223 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2224 if (df_regs_ever_live_p (regno
)
2225 && !call_used_regs
[regno
])
2226 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2228 if (frame_pointer_needed
)
2230 /* FP and LR are placed in the linkage record. */
2231 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2232 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2233 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2234 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2235 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
2236 offset
+= 2 * UNITS_PER_WORD
;
2239 /* Now assign stack slots for them. */
2240 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2241 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2243 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2244 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2245 cfun
->machine
->frame
.wb_candidate1
= regno
;
2246 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
)
2247 cfun
->machine
->frame
.wb_candidate2
= regno
;
2248 offset
+= UNITS_PER_WORD
;
2251 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2252 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2254 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2255 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2256 cfun
->machine
->frame
.wb_candidate1
= regno
;
2257 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
2258 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2259 cfun
->machine
->frame
.wb_candidate2
= regno
;
2260 offset
+= UNITS_PER_WORD
;
2263 cfun
->machine
->frame
.padding0
=
2264 (AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
2265 offset
= AARCH64_ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2267 cfun
->machine
->frame
.saved_regs_size
= offset
;
2269 cfun
->machine
->frame
.hard_fp_offset
2270 = AARCH64_ROUND_UP (cfun
->machine
->frame
.saved_varargs_size
2272 + cfun
->machine
->frame
.saved_regs_size
,
2273 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2275 cfun
->machine
->frame
.frame_size
2276 = AARCH64_ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
2277 + crtl
->outgoing_args_size
,
2278 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2280 cfun
->machine
->frame
.laid_out
= true;
2284 aarch64_register_saved_on_entry (int regno
)
2286 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
2290 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
2292 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
2298 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
2299 HOST_WIDE_INT adjustment
)
2301 rtx base_rtx
= stack_pointer_rtx
;
2304 reg
= gen_rtx_REG (mode
, regno
);
2305 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
2306 plus_constant (Pmode
, base_rtx
, -adjustment
));
2307 mem
= gen_rtx_MEM (mode
, mem
);
2309 insn
= emit_move_insn (mem
, reg
);
2310 RTX_FRAME_RELATED_P (insn
) = 1;
2314 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2315 HOST_WIDE_INT adjustment
)
2320 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
2321 GEN_INT (-adjustment
),
2322 GEN_INT (UNITS_PER_WORD
- adjustment
));
2324 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
2325 GEN_INT (-adjustment
),
2326 GEN_INT (UNITS_PER_WORD
- adjustment
));
2333 aarch64_pushwb_pair_reg (machine_mode mode
, unsigned regno1
,
2334 unsigned regno2
, HOST_WIDE_INT adjustment
)
2337 rtx reg1
= gen_rtx_REG (mode
, regno1
);
2338 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2340 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
2342 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2343 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2344 RTX_FRAME_RELATED_P (insn
) = 1;
2348 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2349 HOST_WIDE_INT adjustment
)
2354 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2355 GEN_INT (UNITS_PER_WORD
));
2357 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2358 GEN_INT (UNITS_PER_WORD
));
2365 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
2371 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
2374 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
2382 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
2388 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
2391 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
2400 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
2401 unsigned start
, unsigned limit
, bool skip_wb
)
2404 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2405 ? gen_frame_mem
: gen_rtx_MEM
);
2409 for (regno
= aarch64_next_callee_save (start
, limit
);
2411 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2414 HOST_WIDE_INT offset
;
2417 && (regno
== cfun
->machine
->frame
.wb_candidate1
2418 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2421 reg
= gen_rtx_REG (mode
, regno
);
2422 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2423 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2426 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2429 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2430 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2433 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2436 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2437 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2439 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
2442 /* The first part of a frame-related parallel insn is
2443 always assumed to be relevant to the frame
2444 calculations; subsequent parts, are only
2445 frame-related if explicitly marked. */
2446 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2450 insn
= emit_move_insn (mem
, reg
);
2452 RTX_FRAME_RELATED_P (insn
) = 1;
2457 aarch64_restore_callee_saves (machine_mode mode
,
2458 HOST_WIDE_INT start_offset
, unsigned start
,
2459 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
2461 rtx base_rtx
= stack_pointer_rtx
;
2462 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2463 ? gen_frame_mem
: gen_rtx_MEM
);
2466 HOST_WIDE_INT offset
;
2468 for (regno
= aarch64_next_callee_save (start
, limit
);
2470 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2475 && (regno
== cfun
->machine
->frame
.wb_candidate1
2476 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2479 reg
= gen_rtx_REG (mode
, regno
);
2480 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2481 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2483 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2486 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2487 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2489 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2492 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2493 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2494 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
2496 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
2500 emit_move_insn (reg
, mem
);
2501 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
2505 /* AArch64 stack frames generated by this compiler look like:
2507 +-------------------------------+
2509 | incoming stack arguments |
2511 +-------------------------------+
2512 | | <-- incoming stack pointer (aligned)
2513 | callee-allocated save area |
2514 | for register varargs |
2516 +-------------------------------+
2517 | local variables | <-- frame_pointer_rtx
2519 +-------------------------------+
2521 +-------------------------------+ |
2522 | callee-saved registers | | frame.saved_regs_size
2523 +-------------------------------+ |
2525 +-------------------------------+ |
2526 | FP' | / <- hard_frame_pointer_rtx (aligned)
2527 +-------------------------------+
2528 | dynamic allocation |
2529 +-------------------------------+
2531 +-------------------------------+
2532 | outgoing stack arguments | <-- arg_pointer
2534 +-------------------------------+
2535 | | <-- stack_pointer_rtx (aligned)
2537 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2538 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2541 /* Generate the prologue instructions for entry into a function.
2542 Establish the stack frame by decreasing the stack pointer with a
2543 properly calculated size and, if necessary, create a frame record
2544 filled with the values of LR and previous frame pointer. The
2545 current FP is also set up if it is in use. */
2548 aarch64_expand_prologue (void)
2550 /* sub sp, sp, #<frame_size>
2551 stp {fp, lr}, [sp, #<frame_size> - 16]
2552 add fp, sp, #<frame_size> - hardfp_offset
2553 stp {cs_reg}, [fp, #-16] etc.
2555 sub sp, sp, <final_adjustment_if_any>
2557 HOST_WIDE_INT frame_size
, offset
;
2558 HOST_WIDE_INT fp_offset
; /* Offset from hard FP to SP. */
2559 HOST_WIDE_INT hard_fp_offset
;
2562 aarch64_layout_frame ();
2564 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2565 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2566 fp_offset
= frame_size
- hard_fp_offset
;
2568 if (flag_stack_usage_info
)
2569 current_function_static_stack_size
= frame_size
;
2571 /* Store pairs and load pairs have a range only -512 to 504. */
2574 /* When the frame has a large size, an initial decrease is done on
2575 the stack pointer to jump over the callee-allocated save area for
2576 register varargs, the local variable area and/or the callee-saved
2577 register area. This will allow the pre-index write-back
2578 store pair instructions to be used for setting up the stack frame
2580 offset
= hard_fp_offset
;
2582 offset
= cfun
->machine
->frame
.saved_regs_size
;
2584 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2587 if (frame_size
>= 0x1000000)
2589 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2590 emit_move_insn (op0
, GEN_INT (-frame_size
));
2591 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2593 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2594 gen_rtx_SET (stack_pointer_rtx
,
2595 plus_constant (Pmode
, stack_pointer_rtx
,
2597 RTX_FRAME_RELATED_P (insn
) = 1;
2599 else if (frame_size
> 0)
2601 int hi_ofs
= frame_size
& 0xfff000;
2602 int lo_ofs
= frame_size
& 0x000fff;
2606 insn
= emit_insn (gen_add2_insn
2607 (stack_pointer_rtx
, GEN_INT (-hi_ofs
)));
2608 RTX_FRAME_RELATED_P (insn
) = 1;
2612 insn
= emit_insn (gen_add2_insn
2613 (stack_pointer_rtx
, GEN_INT (-lo_ofs
)));
2614 RTX_FRAME_RELATED_P (insn
) = 1;
2623 bool skip_wb
= false;
2625 if (frame_pointer_needed
)
2631 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2632 GEN_INT (-offset
)));
2633 RTX_FRAME_RELATED_P (insn
) = 1;
2635 aarch64_save_callee_saves (DImode
, fp_offset
, R29_REGNUM
,
2639 aarch64_pushwb_pair_reg (DImode
, R29_REGNUM
, R30_REGNUM
, offset
);
2641 /* Set up frame pointer to point to the location of the
2642 previous frame pointer on the stack. */
2643 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2645 GEN_INT (fp_offset
)));
2646 RTX_FRAME_RELATED_P (insn
) = 1;
2647 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
2651 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2652 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2655 || reg1
== FIRST_PSEUDO_REGISTER
2656 || (reg2
== FIRST_PSEUDO_REGISTER
2659 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2660 GEN_INT (-offset
)));
2661 RTX_FRAME_RELATED_P (insn
) = 1;
2665 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2669 if (reg2
== FIRST_PSEUDO_REGISTER
)
2670 aarch64_pushwb_single_reg (mode1
, reg1
, offset
);
2672 aarch64_pushwb_pair_reg (mode1
, reg1
, reg2
, offset
);
2676 aarch64_save_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2678 aarch64_save_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2682 /* when offset >= 512,
2683 sub sp, sp, #<outgoing_args_size> */
2684 if (frame_size
> -1)
2686 if (crtl
->outgoing_args_size
> 0)
2688 insn
= emit_insn (gen_add2_insn
2690 GEN_INT (- crtl
->outgoing_args_size
)));
2691 RTX_FRAME_RELATED_P (insn
) = 1;
2696 /* Return TRUE if we can use a simple_return insn.
2698 This function checks whether the callee saved stack is empty, which
2699 means no restore actions are need. The pro_and_epilogue will use
2700 this to check whether shrink-wrapping opt is feasible. */
2703 aarch64_use_return_insn_p (void)
2705 if (!reload_completed
)
2711 aarch64_layout_frame ();
2713 return cfun
->machine
->frame
.frame_size
== 0;
2716 /* Generate the epilogue instructions for returning from a function. */
2718 aarch64_expand_epilogue (bool for_sibcall
)
2720 HOST_WIDE_INT frame_size
, offset
;
2721 HOST_WIDE_INT fp_offset
;
2722 HOST_WIDE_INT hard_fp_offset
;
2724 /* We need to add memory barrier to prevent read from deallocated stack. */
2725 bool need_barrier_p
= (get_frame_size () != 0
2726 || cfun
->machine
->frame
.saved_varargs_size
);
2728 aarch64_layout_frame ();
2730 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2731 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2732 fp_offset
= frame_size
- hard_fp_offset
;
2734 /* Store pairs and load pairs have a range only -512 to 504. */
2737 offset
= hard_fp_offset
;
2739 offset
= cfun
->machine
->frame
.saved_regs_size
;
2741 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2743 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2745 insn
= emit_insn (gen_add2_insn
2747 GEN_INT (crtl
->outgoing_args_size
)));
2748 RTX_FRAME_RELATED_P (insn
) = 1;
2754 /* If there were outgoing arguments or we've done dynamic stack
2755 allocation, then restore the stack pointer from the frame
2756 pointer. This is at most one insn and more efficient than using
2757 GCC's internal mechanism. */
2758 if (frame_pointer_needed
2759 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2761 if (cfun
->calls_alloca
)
2762 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2764 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2765 hard_frame_pointer_rtx
,
2767 offset
= offset
- fp_offset
;
2772 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2773 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2774 bool skip_wb
= true;
2777 if (frame_pointer_needed
)
2780 || reg1
== FIRST_PSEUDO_REGISTER
2781 || (reg2
== FIRST_PSEUDO_REGISTER
2785 aarch64_restore_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2787 aarch64_restore_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2791 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2795 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2796 rtx rreg1
= gen_rtx_REG (mode1
, reg1
);
2798 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg1
, cfi_ops
);
2799 if (reg2
== FIRST_PSEUDO_REGISTER
)
2801 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
2802 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
2803 mem
= gen_rtx_MEM (mode1
, mem
);
2804 insn
= emit_move_insn (rreg1
, mem
);
2808 rtx rreg2
= gen_rtx_REG (mode1
, reg2
);
2810 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg2
, cfi_ops
);
2811 insn
= emit_insn (aarch64_gen_loadwb_pair
2812 (mode1
, stack_pointer_rtx
, rreg1
,
2818 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2822 /* Reset the CFA to be SP + FRAME_SIZE. */
2823 rtx new_cfa
= stack_pointer_rtx
;
2825 new_cfa
= plus_constant (Pmode
, new_cfa
, frame_size
);
2826 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
2827 REG_NOTES (insn
) = cfi_ops
;
2828 RTX_FRAME_RELATED_P (insn
) = 1;
2834 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2836 if (frame_size
>= 0x1000000)
2838 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2839 emit_move_insn (op0
, GEN_INT (frame_size
));
2840 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2844 int hi_ofs
= frame_size
& 0xfff000;
2845 int lo_ofs
= frame_size
& 0x000fff;
2847 if (hi_ofs
&& lo_ofs
)
2849 insn
= emit_insn (gen_add2_insn
2850 (stack_pointer_rtx
, GEN_INT (hi_ofs
)));
2851 RTX_FRAME_RELATED_P (insn
) = 1;
2852 frame_size
= lo_ofs
;
2854 insn
= emit_insn (gen_add2_insn
2855 (stack_pointer_rtx
, GEN_INT (frame_size
)));
2858 /* Reset the CFA to be SP + 0. */
2859 add_reg_note (insn
, REG_CFA_DEF_CFA
, stack_pointer_rtx
);
2860 RTX_FRAME_RELATED_P (insn
) = 1;
2863 /* Stack adjustment for exception handler. */
2864 if (crtl
->calls_eh_return
)
2866 /* We need to unwind the stack by the offset computed by
2867 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2868 to be SP; letting the CFA move during this adjustment
2869 is just as correct as retaining the CFA from the body
2870 of the function. Therefore, do nothing special. */
2871 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2874 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2876 emit_jump_insn (ret_rtx
);
2879 /* Return the place to copy the exception unwinding return address to.
2880 This will probably be a stack slot, but could (in theory be the
2881 return register). */
2883 aarch64_final_eh_return_addr (void)
2885 HOST_WIDE_INT fp_offset
;
2887 aarch64_layout_frame ();
2889 fp_offset
= cfun
->machine
->frame
.frame_size
2890 - cfun
->machine
->frame
.hard_fp_offset
;
2892 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2893 return gen_rtx_REG (DImode
, LR_REGNUM
);
2895 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2896 result in a store to save LR introduced by builtin_eh_return () being
2897 incorrectly deleted because the alias is not detected.
2898 So in the calculation of the address to copy the exception unwinding
2899 return address to, we note 2 cases.
2900 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2901 we return a SP-relative location since all the addresses are SP-relative
2902 in this case. This prevents the store from being optimized away.
2903 If the fp_offset is not 0, then the addresses will be FP-relative and
2904 therefore we return a FP-relative location. */
2906 if (frame_pointer_needed
)
2909 return gen_frame_mem (DImode
,
2910 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2912 return gen_frame_mem (DImode
,
2913 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2916 /* If FP is not needed, we calculate the location of LR, which would be
2917 at the top of the saved registers block. */
2919 return gen_frame_mem (DImode
,
2920 plus_constant (Pmode
,
2923 + cfun
->machine
->frame
.saved_regs_size
2924 - 2 * UNITS_PER_WORD
));
2927 /* Possibly output code to build up a constant in a register. For
2928 the benefit of the costs infrastructure, returns the number of
2929 instructions which would be emitted. GENERATE inhibits or
2930 enables code generation. */
2933 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2937 if (aarch64_bitmask_imm (val
, DImode
))
2940 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2948 HOST_WIDE_INT valp
= val
>> 16;
2952 for (i
= 16; i
< 64; i
+= 16)
2954 valm
= (valp
& 0xffff);
2965 /* zcount contains the number of additional MOVK instructions
2966 required if the constant is built up with an initial MOVZ instruction,
2967 while ncount is the number of MOVK instructions required if starting
2968 with a MOVN instruction. Choose the sequence that yields the fewest
2969 number of instructions, preferring MOVZ instructions when they are both
2971 if (ncount
< zcount
)
2974 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2975 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2982 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2983 GEN_INT (val
& 0xffff));
2990 for (i
= 16; i
< 64; i
+= 16)
2992 if ((val
& 0xffff) != tval
)
2995 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2997 GEN_INT (val
& 0xffff)));
3007 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
3009 HOST_WIDE_INT mdelta
= delta
;
3010 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
3011 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
3016 if (mdelta
>= 4096 * 4096)
3018 (void) aarch64_build_constant (scratchreg
, delta
, true);
3019 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
3021 else if (mdelta
> 0)
3025 emit_insn (gen_rtx_SET (scratch_rtx
, GEN_INT (mdelta
/ 4096)));
3026 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
3028 emit_insn (gen_rtx_SET (this_rtx
,
3029 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
3031 emit_insn (gen_rtx_SET (this_rtx
,
3032 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
3034 if (mdelta
% 4096 != 0)
3036 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
3037 emit_insn (gen_rtx_SET (this_rtx
,
3038 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
3043 /* Output code to add DELTA to the first argument, and then jump
3044 to FUNCTION. Used for C++ multiple inheritance. */
3046 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
3047 HOST_WIDE_INT delta
,
3048 HOST_WIDE_INT vcall_offset
,
3051 /* The this pointer is always in x0. Note that this differs from
3052 Arm where the this pointer maybe bumped to r1 if r0 is required
3053 to return a pointer to an aggregate. On AArch64 a result value
3054 pointer will be in x8. */
3055 int this_regno
= R0_REGNUM
;
3056 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
3059 reload_completed
= 1;
3060 emit_note (NOTE_INSN_PROLOGUE_END
);
3062 if (vcall_offset
== 0)
3063 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
3066 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
3068 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
3069 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
3070 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
3075 if (delta
>= -256 && delta
< 256)
3076 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
3077 plus_constant (Pmode
, this_rtx
, delta
));
3079 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
3082 if (Pmode
== ptr_mode
)
3083 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
3085 aarch64_emit_move (temp0
,
3086 gen_rtx_ZERO_EXTEND (Pmode
,
3087 gen_rtx_MEM (ptr_mode
, addr
)));
3089 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
3090 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
3093 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
3094 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
3097 if (Pmode
== ptr_mode
)
3098 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
3100 aarch64_emit_move (temp1
,
3101 gen_rtx_SIGN_EXTEND (Pmode
,
3102 gen_rtx_MEM (ptr_mode
, addr
)));
3104 emit_insn (gen_add2_insn (this_rtx
, temp1
));
3107 /* Generate a tail call to the target function. */
3108 if (!TREE_USED (function
))
3110 assemble_external (function
);
3111 TREE_USED (function
) = 1;
3113 funexp
= XEXP (DECL_RTL (function
), 0);
3114 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
3115 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
3116 SIBLING_CALL_P (insn
) = 1;
3118 insn
= get_insns ();
3119 shorten_branches (insn
);
3120 final_start_function (insn
, file
, 1);
3121 final (insn
, file
, 1);
3122 final_end_function ();
3124 /* Stop pretending to be a post-reload pass. */
3125 reload_completed
= 0;
3129 aarch64_tls_referenced_p (rtx x
)
3131 if (!TARGET_HAVE_TLS
)
3133 subrtx_iterator::array_type array
;
3134 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3136 const_rtx x
= *iter
;
3137 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
3139 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3140 TLS offsets, not real symbol references. */
3141 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3142 iter
.skip_subrtxes ();
3149 aarch64_bitmasks_cmp (const void *i1
, const void *i2
)
3151 const unsigned HOST_WIDE_INT
*imm1
= (const unsigned HOST_WIDE_INT
*) i1
;
3152 const unsigned HOST_WIDE_INT
*imm2
= (const unsigned HOST_WIDE_INT
*) i2
;
3163 aarch64_build_bitmask_table (void)
3165 unsigned HOST_WIDE_INT mask
, imm
;
3166 unsigned int log_e
, e
, s
, r
;
3167 unsigned int nimms
= 0;
3169 for (log_e
= 1; log_e
<= 6; log_e
++)
3173 mask
= ~(HOST_WIDE_INT
) 0;
3175 mask
= ((HOST_WIDE_INT
) 1 << e
) - 1;
3176 for (s
= 1; s
< e
; s
++)
3178 for (r
= 0; r
< e
; r
++)
3180 /* set s consecutive bits to 1 (s < 64) */
3181 imm
= ((unsigned HOST_WIDE_INT
)1 << s
) - 1;
3182 /* rotate right by r */
3184 imm
= ((imm
>> r
) | (imm
<< (e
- r
))) & mask
;
3185 /* replicate the constant depending on SIMD size */
3187 case 1: imm
|= (imm
<< 2);
3188 case 2: imm
|= (imm
<< 4);
3189 case 3: imm
|= (imm
<< 8);
3190 case 4: imm
|= (imm
<< 16);
3191 case 5: imm
|= (imm
<< 32);
3197 gcc_assert (nimms
< AARCH64_NUM_BITMASKS
);
3198 aarch64_bitmasks
[nimms
++] = imm
;
3203 gcc_assert (nimms
== AARCH64_NUM_BITMASKS
);
3204 qsort (aarch64_bitmasks
, nimms
, sizeof (aarch64_bitmasks
[0]),
3205 aarch64_bitmasks_cmp
);
3209 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3210 a left shift of 0 or 12 bits. */
3212 aarch64_uimm12_shift (HOST_WIDE_INT val
)
3214 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
3215 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
3220 /* Return true if val is an immediate that can be loaded into a
3221 register by a MOVZ instruction. */
3223 aarch64_movw_imm (HOST_WIDE_INT val
, machine_mode mode
)
3225 if (GET_MODE_SIZE (mode
) > 4)
3227 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
3228 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
3233 /* Ignore sign extension. */
3234 val
&= (HOST_WIDE_INT
) 0xffffffff;
3236 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
3237 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
3241 /* Return true if val is a valid bitmask immediate. */
3243 aarch64_bitmask_imm (HOST_WIDE_INT val
, machine_mode mode
)
3245 if (GET_MODE_SIZE (mode
) < 8)
3247 /* Replicate bit pattern. */
3248 val
&= (HOST_WIDE_INT
) 0xffffffff;
3251 return bsearch (&val
, aarch64_bitmasks
, AARCH64_NUM_BITMASKS
,
3252 sizeof (aarch64_bitmasks
[0]), aarch64_bitmasks_cmp
) != NULL
;
3256 /* Return true if val is an immediate that can be loaded into a
3257 register in a single instruction. */
3259 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
3261 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
3263 return aarch64_bitmask_imm (val
, mode
);
3267 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3271 if (GET_CODE (x
) == HIGH
)
3274 split_const (x
, &base
, &offset
);
3275 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
3277 if (aarch64_classify_symbol (base
, offset
, SYMBOL_CONTEXT_ADR
)
3278 != SYMBOL_FORCE_TO_MEM
)
3281 /* Avoid generating a 64-bit relocation in ILP32; leave
3282 to aarch64_expand_mov_immediate to handle it properly. */
3283 return mode
!= ptr_mode
;
3286 return aarch64_tls_referenced_p (x
);
3289 /* Return true if register REGNO is a valid index register.
3290 STRICT_P is true if REG_OK_STRICT is in effect. */
3293 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
3295 if (!HARD_REGISTER_NUM_P (regno
))
3303 regno
= reg_renumber
[regno
];
3305 return GP_REGNUM_P (regno
);
3308 /* Return true if register REGNO is a valid base register for mode MODE.
3309 STRICT_P is true if REG_OK_STRICT is in effect. */
3312 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
3314 if (!HARD_REGISTER_NUM_P (regno
))
3322 regno
= reg_renumber
[regno
];
3325 /* The fake registers will be eliminated to either the stack or
3326 hard frame pointer, both of which are usually valid base registers.
3327 Reload deals with the cases where the eliminated form isn't valid. */
3328 return (GP_REGNUM_P (regno
)
3329 || regno
== SP_REGNUM
3330 || regno
== FRAME_POINTER_REGNUM
3331 || regno
== ARG_POINTER_REGNUM
);
3334 /* Return true if X is a valid base register for mode MODE.
3335 STRICT_P is true if REG_OK_STRICT is in effect. */
3338 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
3340 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
3343 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
3346 /* Return true if address offset is a valid index. If it is, fill in INFO
3347 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3350 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
3351 machine_mode mode
, bool strict_p
)
3353 enum aarch64_address_type type
;
3358 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
3359 && GET_MODE (x
) == Pmode
)
3361 type
= ADDRESS_REG_REG
;
3365 /* (sign_extend:DI (reg:SI)) */
3366 else if ((GET_CODE (x
) == SIGN_EXTEND
3367 || GET_CODE (x
) == ZERO_EXTEND
)
3368 && GET_MODE (x
) == DImode
3369 && GET_MODE (XEXP (x
, 0)) == SImode
)
3371 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3372 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3373 index
= XEXP (x
, 0);
3376 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3377 else if (GET_CODE (x
) == MULT
3378 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3379 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3380 && GET_MODE (XEXP (x
, 0)) == DImode
3381 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3382 && CONST_INT_P (XEXP (x
, 1)))
3384 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3385 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3386 index
= XEXP (XEXP (x
, 0), 0);
3387 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3389 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3390 else if (GET_CODE (x
) == ASHIFT
3391 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3392 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3393 && GET_MODE (XEXP (x
, 0)) == DImode
3394 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3395 && CONST_INT_P (XEXP (x
, 1)))
3397 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3398 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3399 index
= XEXP (XEXP (x
, 0), 0);
3400 shift
= INTVAL (XEXP (x
, 1));
3402 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3403 else if ((GET_CODE (x
) == SIGN_EXTRACT
3404 || GET_CODE (x
) == ZERO_EXTRACT
)
3405 && GET_MODE (x
) == DImode
3406 && GET_CODE (XEXP (x
, 0)) == MULT
3407 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3408 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3410 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3411 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3412 index
= XEXP (XEXP (x
, 0), 0);
3413 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3414 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3415 || INTVAL (XEXP (x
, 2)) != 0)
3418 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3419 (const_int 0xffffffff<<shift)) */
3420 else if (GET_CODE (x
) == AND
3421 && GET_MODE (x
) == DImode
3422 && GET_CODE (XEXP (x
, 0)) == MULT
3423 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3424 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3425 && CONST_INT_P (XEXP (x
, 1)))
3427 type
= ADDRESS_REG_UXTW
;
3428 index
= XEXP (XEXP (x
, 0), 0);
3429 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3430 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3433 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3434 else if ((GET_CODE (x
) == SIGN_EXTRACT
3435 || GET_CODE (x
) == ZERO_EXTRACT
)
3436 && GET_MODE (x
) == DImode
3437 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3438 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3439 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3441 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3442 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3443 index
= XEXP (XEXP (x
, 0), 0);
3444 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3445 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3446 || INTVAL (XEXP (x
, 2)) != 0)
3449 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3450 (const_int 0xffffffff<<shift)) */
3451 else if (GET_CODE (x
) == AND
3452 && GET_MODE (x
) == DImode
3453 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3454 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3455 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3456 && CONST_INT_P (XEXP (x
, 1)))
3458 type
= ADDRESS_REG_UXTW
;
3459 index
= XEXP (XEXP (x
, 0), 0);
3460 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3461 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3464 /* (mult:P (reg:P) (const_int scale)) */
3465 else if (GET_CODE (x
) == MULT
3466 && GET_MODE (x
) == Pmode
3467 && GET_MODE (XEXP (x
, 0)) == Pmode
3468 && CONST_INT_P (XEXP (x
, 1)))
3470 type
= ADDRESS_REG_REG
;
3471 index
= XEXP (x
, 0);
3472 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3474 /* (ashift:P (reg:P) (const_int shift)) */
3475 else if (GET_CODE (x
) == ASHIFT
3476 && GET_MODE (x
) == Pmode
3477 && GET_MODE (XEXP (x
, 0)) == Pmode
3478 && CONST_INT_P (XEXP (x
, 1)))
3480 type
= ADDRESS_REG_REG
;
3481 index
= XEXP (x
, 0);
3482 shift
= INTVAL (XEXP (x
, 1));
3487 if (GET_CODE (index
) == SUBREG
)
3488 index
= SUBREG_REG (index
);
3491 (shift
> 0 && shift
<= 3
3492 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3494 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3497 info
->offset
= index
;
3498 info
->shift
= shift
;
3506 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3508 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3509 && offset
< 64 * GET_MODE_SIZE (mode
)
3510 && offset
% GET_MODE_SIZE (mode
) == 0);
3514 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3515 HOST_WIDE_INT offset
)
3517 return offset
>= -256 && offset
< 256;
3521 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3524 && offset
< 4096 * GET_MODE_SIZE (mode
)
3525 && offset
% GET_MODE_SIZE (mode
) == 0);
3528 /* Return true if X is a valid address for machine mode MODE. If it is,
3529 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3530 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3533 aarch64_classify_address (struct aarch64_address_info
*info
,
3534 rtx x
, machine_mode mode
,
3535 RTX_CODE outer_code
, bool strict_p
)
3537 enum rtx_code code
= GET_CODE (x
);
3540 /* On BE, we use load/store pair for all large int mode load/stores. */
3541 bool load_store_pair_p
= (outer_code
== PARALLEL
3542 || (BYTES_BIG_ENDIAN
3543 && aarch64_vect_struct_mode_p (mode
)));
3545 bool allow_reg_index_p
=
3547 && (GET_MODE_SIZE (mode
) != 16 || aarch64_vector_mode_supported_p (mode
))
3548 && !aarch64_vect_struct_mode_p (mode
);
3550 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3552 if (aarch64_vect_struct_mode_p (mode
) && !BYTES_BIG_ENDIAN
3553 && (code
!= POST_INC
&& code
!= REG
))
3560 info
->type
= ADDRESS_REG_IMM
;
3562 info
->offset
= const0_rtx
;
3563 return aarch64_base_register_rtx_p (x
, strict_p
);
3571 && (op0
== virtual_stack_vars_rtx
3572 || op0
== frame_pointer_rtx
3573 || op0
== arg_pointer_rtx
)
3574 && CONST_INT_P (op1
))
3576 info
->type
= ADDRESS_REG_IMM
;
3583 if (GET_MODE_SIZE (mode
) != 0
3584 && CONST_INT_P (op1
)
3585 && aarch64_base_register_rtx_p (op0
, strict_p
))
3587 HOST_WIDE_INT offset
= INTVAL (op1
);
3589 info
->type
= ADDRESS_REG_IMM
;
3593 /* TImode and TFmode values are allowed in both pairs of X
3594 registers and individual Q registers. The available
3596 X,X: 7-bit signed scaled offset
3597 Q: 9-bit signed offset
3598 We conservatively require an offset representable in either mode.
3600 if (mode
== TImode
|| mode
== TFmode
)
3601 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3602 && offset_9bit_signed_unscaled_p (mode
, offset
));
3604 /* A 7bit offset check because OImode will emit a ldp/stp
3605 instruction (only big endian will get here).
3606 For ldp/stp instructions, the offset is scaled for the size of a
3607 single element of the pair. */
3609 return aarch64_offset_7bit_signed_scaled_p (TImode
, offset
);
3611 /* Three 9/12 bit offsets checks because CImode will emit three
3612 ldr/str instructions (only big endian will get here). */
3614 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3615 && (offset_9bit_signed_unscaled_p (V16QImode
, offset
+ 32)
3616 || offset_12bit_unsigned_scaled_p (V16QImode
,
3619 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3620 instructions (only big endian will get here). */
3622 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3623 && aarch64_offset_7bit_signed_scaled_p (TImode
,
3626 if (load_store_pair_p
)
3627 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3628 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3630 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3631 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3634 if (allow_reg_index_p
)
3636 /* Look for base + (scaled/extended) index register. */
3637 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3638 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3643 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3644 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3657 info
->type
= ADDRESS_REG_WB
;
3658 info
->base
= XEXP (x
, 0);
3659 info
->offset
= NULL_RTX
;
3660 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3664 info
->type
= ADDRESS_REG_WB
;
3665 info
->base
= XEXP (x
, 0);
3666 if (GET_CODE (XEXP (x
, 1)) == PLUS
3667 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3668 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3669 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3671 HOST_WIDE_INT offset
;
3672 info
->offset
= XEXP (XEXP (x
, 1), 1);
3673 offset
= INTVAL (info
->offset
);
3675 /* TImode and TFmode values are allowed in both pairs of X
3676 registers and individual Q registers. The available
3678 X,X: 7-bit signed scaled offset
3679 Q: 9-bit signed offset
3680 We conservatively require an offset representable in either mode.
3682 if (mode
== TImode
|| mode
== TFmode
)
3683 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3684 && offset_9bit_signed_unscaled_p (mode
, offset
));
3686 if (load_store_pair_p
)
3687 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3688 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3690 return offset_9bit_signed_unscaled_p (mode
, offset
);
3697 /* load literal: pc-relative constant pool entry. Only supported
3698 for SI mode or larger. */
3699 info
->type
= ADDRESS_SYMBOLIC
;
3701 if (!load_store_pair_p
&& GET_MODE_SIZE (mode
) >= 4)
3705 split_const (x
, &sym
, &addend
);
3706 return (GET_CODE (sym
) == LABEL_REF
3707 || (GET_CODE (sym
) == SYMBOL_REF
3708 && CONSTANT_POOL_ADDRESS_P (sym
)));
3713 info
->type
= ADDRESS_LO_SUM
;
3714 info
->base
= XEXP (x
, 0);
3715 info
->offset
= XEXP (x
, 1);
3716 if (allow_reg_index_p
3717 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3720 split_const (info
->offset
, &sym
, &offs
);
3721 if (GET_CODE (sym
) == SYMBOL_REF
3722 && (aarch64_classify_symbol (sym
, offs
, SYMBOL_CONTEXT_MEM
)
3723 == SYMBOL_SMALL_ABSOLUTE
))
3725 /* The symbol and offset must be aligned to the access size. */
3727 unsigned int ref_size
;
3729 if (CONSTANT_POOL_ADDRESS_P (sym
))
3730 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3731 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3733 tree exp
= SYMBOL_REF_DECL (sym
);
3734 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3735 align
= CONSTANT_ALIGNMENT (exp
, align
);
3737 else if (SYMBOL_REF_DECL (sym
))
3738 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3739 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3740 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3741 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3743 align
= BITS_PER_UNIT
;
3745 ref_size
= GET_MODE_SIZE (mode
);
3747 ref_size
= GET_MODE_SIZE (DImode
);
3749 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3750 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3761 aarch64_symbolic_address_p (rtx x
)
3765 split_const (x
, &x
, &offset
);
3766 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3769 /* Classify the base of symbolic expression X, given that X appears in
3772 enum aarch64_symbol_type
3773 aarch64_classify_symbolic_expression (rtx x
,
3774 enum aarch64_symbol_context context
)
3778 split_const (x
, &x
, &offset
);
3779 return aarch64_classify_symbol (x
, offset
, context
);
3783 /* Return TRUE if X is a legitimate address for accessing memory in
3786 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
3788 struct aarch64_address_info addr
;
3790 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3793 /* Return TRUE if X is a legitimate address for accessing memory in
3794 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3797 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
3798 RTX_CODE outer_code
, bool strict_p
)
3800 struct aarch64_address_info addr
;
3802 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3805 /* Return TRUE if rtx X is immediate constant 0.0 */
3807 aarch64_float_const_zero_rtx_p (rtx x
)
3811 if (GET_MODE (x
) == VOIDmode
)
3814 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
3815 if (REAL_VALUE_MINUS_ZERO (r
))
3816 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3817 return REAL_VALUES_EQUAL (r
, dconst0
);
3820 /* Return the fixed registers used for condition codes. */
3823 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3826 *p2
= INVALID_REGNUM
;
3830 /* Emit call insn with PAT and do aarch64-specific handling. */
3833 aarch64_emit_call_insn (rtx pat
)
3835 rtx insn
= emit_call_insn (pat
);
3837 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
3838 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
3839 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
3843 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3845 /* All floating point compares return CCFP if it is an equality
3846 comparison, and CCFPE otherwise. */
3847 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3874 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3876 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3877 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3878 || GET_CODE (x
) == NEG
))
3881 /* A compare with a shifted operand. Because of canonicalization,
3882 the comparison will have to be swapped when we emit the assembly
3884 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3885 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3886 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3887 || GET_CODE (x
) == LSHIFTRT
3888 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3891 /* Similarly for a negated operand, but we can only do this for
3893 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3894 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3895 && (code
== EQ
|| code
== NE
)
3896 && GET_CODE (x
) == NEG
)
3899 /* A compare of a mode narrower than SI mode against zero can be done
3900 by extending the value in the comparison. */
3901 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3903 /* Only use sign-extension if we really need it. */
3904 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3905 ? CC_SESWPmode
: CC_ZESWPmode
);
3907 /* For everything else, return CCmode. */
3912 aarch64_get_condition_code_1 (enum machine_mode
, enum rtx_code
);
3915 aarch64_get_condition_code (rtx x
)
3917 machine_mode mode
= GET_MODE (XEXP (x
, 0));
3918 enum rtx_code comp_code
= GET_CODE (x
);
3920 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3921 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3922 return aarch64_get_condition_code_1 (mode
, comp_code
);
3926 aarch64_get_condition_code_1 (enum machine_mode mode
, enum rtx_code comp_code
)
3928 int ne
= -1, eq
= -1;
3935 case GE
: return AARCH64_GE
;
3936 case GT
: return AARCH64_GT
;
3937 case LE
: return AARCH64_LS
;
3938 case LT
: return AARCH64_MI
;
3939 case NE
: return AARCH64_NE
;
3940 case EQ
: return AARCH64_EQ
;
3941 case ORDERED
: return AARCH64_VC
;
3942 case UNORDERED
: return AARCH64_VS
;
3943 case UNLT
: return AARCH64_LT
;
3944 case UNLE
: return AARCH64_LE
;
3945 case UNGT
: return AARCH64_HI
;
3946 case UNGE
: return AARCH64_PL
;
4004 case NE
: return AARCH64_NE
;
4005 case EQ
: return AARCH64_EQ
;
4006 case GE
: return AARCH64_GE
;
4007 case GT
: return AARCH64_GT
;
4008 case LE
: return AARCH64_LE
;
4009 case LT
: return AARCH64_LT
;
4010 case GEU
: return AARCH64_CS
;
4011 case GTU
: return AARCH64_HI
;
4012 case LEU
: return AARCH64_LS
;
4013 case LTU
: return AARCH64_CC
;
4023 case NE
: return AARCH64_NE
;
4024 case EQ
: return AARCH64_EQ
;
4025 case GE
: return AARCH64_LE
;
4026 case GT
: return AARCH64_LT
;
4027 case LE
: return AARCH64_GE
;
4028 case LT
: return AARCH64_GT
;
4029 case GEU
: return AARCH64_LS
;
4030 case GTU
: return AARCH64_CC
;
4031 case LEU
: return AARCH64_CS
;
4032 case LTU
: return AARCH64_HI
;
4040 case NE
: return AARCH64_NE
;
4041 case EQ
: return AARCH64_EQ
;
4042 case GE
: return AARCH64_PL
;
4043 case LT
: return AARCH64_MI
;
4051 case NE
: return AARCH64_NE
;
4052 case EQ
: return AARCH64_EQ
;
4062 if (comp_code
== NE
)
4065 if (comp_code
== EQ
)
4072 aarch64_const_vec_all_same_in_range_p (rtx x
,
4073 HOST_WIDE_INT minval
,
4074 HOST_WIDE_INT maxval
)
4076 HOST_WIDE_INT firstval
;
4079 if (GET_CODE (x
) != CONST_VECTOR
4080 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
4083 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
4084 if (firstval
< minval
|| firstval
> maxval
)
4087 count
= CONST_VECTOR_NUNITS (x
);
4088 for (i
= 1; i
< count
; i
++)
4089 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
4096 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
4098 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
4102 bit_count (unsigned HOST_WIDE_INT value
)
4116 #define AARCH64_CC_V 1
4117 #define AARCH64_CC_C (1 << 1)
4118 #define AARCH64_CC_Z (1 << 2)
4119 #define AARCH64_CC_N (1 << 3)
4121 /* N Z C V flags for ccmp. The first code is for AND op and the other
4122 is for IOR op. Indexed by AARCH64_COND_CODE. */
4123 static const int aarch64_nzcv_codes
[][2] =
4125 {AARCH64_CC_Z
, 0}, /* EQ, Z == 1. */
4126 {0, AARCH64_CC_Z
}, /* NE, Z == 0. */
4127 {AARCH64_CC_C
, 0}, /* CS, C == 1. */
4128 {0, AARCH64_CC_C
}, /* CC, C == 0. */
4129 {AARCH64_CC_N
, 0}, /* MI, N == 1. */
4130 {0, AARCH64_CC_N
}, /* PL, N == 0. */
4131 {AARCH64_CC_V
, 0}, /* VS, V == 1. */
4132 {0, AARCH64_CC_V
}, /* VC, V == 0. */
4133 {AARCH64_CC_C
, 0}, /* HI, C ==1 && Z == 0. */
4134 {0, AARCH64_CC_C
}, /* LS, !(C == 1 && Z == 0). */
4135 {0, AARCH64_CC_V
}, /* GE, N == V. */
4136 {AARCH64_CC_V
, 0}, /* LT, N != V. */
4137 {0, AARCH64_CC_Z
}, /* GT, Z == 0 && N == V. */
4138 {AARCH64_CC_Z
, 0}, /* LE, !(Z == 0 && N == V). */
4139 {0, 0}, /* AL, Any. */
4140 {0, 0}, /* NV, Any. */
4144 aarch64_ccmp_mode_to_code (enum machine_mode mode
)
4185 aarch64_print_operand (FILE *f
, rtx x
, char code
)
4189 /* An integer or symbol address without a preceding # sign. */
4191 switch (GET_CODE (x
))
4194 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
4198 output_addr_const (f
, x
);
4202 if (GET_CODE (XEXP (x
, 0)) == PLUS
4203 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4205 output_addr_const (f
, x
);
4211 output_operand_lossage ("Unsupported operand for code '%c'", code
);
4216 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4220 if (!CONST_INT_P (x
)
4221 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
4223 output_operand_lossage ("invalid operand for '%%%c'", code
);
4239 output_operand_lossage ("invalid operand for '%%%c'", code
);
4249 /* Print N such that 2^N == X. */
4250 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
4252 output_operand_lossage ("invalid operand for '%%%c'", code
);
4256 asm_fprintf (f
, "%d", n
);
4261 /* Print the number of non-zero bits in X (a const_int). */
4262 if (!CONST_INT_P (x
))
4264 output_operand_lossage ("invalid operand for '%%%c'", code
);
4268 asm_fprintf (f
, "%u", bit_count (INTVAL (x
)));
4272 /* Print the higher numbered register of a pair (TImode) of regs. */
4273 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
4275 output_operand_lossage ("invalid operand for '%%%c'", code
);
4279 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
4285 /* Print a condition (eq, ne, etc). */
4287 /* CONST_TRUE_RTX means always -- that's the default. */
4288 if (x
== const_true_rtx
)
4291 if (!COMPARISON_P (x
))
4293 output_operand_lossage ("invalid operand for '%%%c'", code
);
4297 cond_code
= aarch64_get_condition_code (x
);
4298 gcc_assert (cond_code
>= 0);
4299 fputs (aarch64_condition_codes
[cond_code
], f
);
4306 /* Print the inverse of a condition (eq <-> ne, etc). */
4308 /* CONST_TRUE_RTX means never -- that's the default. */
4309 if (x
== const_true_rtx
)
4315 if (!COMPARISON_P (x
))
4317 output_operand_lossage ("invalid operand for '%%%c'", code
);
4320 cond_code
= aarch64_get_condition_code (x
);
4321 gcc_assert (cond_code
>= 0);
4322 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
4332 /* Print a scalar FP/SIMD register name. */
4333 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4335 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4338 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
4345 /* Print the first FP/SIMD register name in a list. */
4346 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4348 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4351 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
4355 /* Print a scalar FP/SIMD register name + 1. */
4356 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4358 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4361 asm_fprintf (f
, "q%d", REGNO (x
) - V0_REGNUM
+ 1);
4365 /* Print bottom 16 bits of integer constant in hex. */
4366 if (!CONST_INT_P (x
))
4368 output_operand_lossage ("invalid operand for '%%%c'", code
);
4371 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
4376 /* Print a general register name or the zero register (32-bit or
4379 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
4381 asm_fprintf (f
, "%czr", code
);
4385 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
4387 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
4391 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
4393 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
4400 /* Print a normal operand, if it's a general register, then we
4404 output_operand_lossage ("missing operand");
4408 switch (GET_CODE (x
))
4411 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
4415 aarch64_memory_reference_mode
= GET_MODE (x
);
4416 output_address (XEXP (x
, 0));
4421 output_addr_const (asm_out_file
, x
);
4425 asm_fprintf (f
, "%wd", INTVAL (x
));
4429 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
4432 aarch64_const_vec_all_same_in_range_p (x
,
4434 HOST_WIDE_INT_MAX
));
4435 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
4437 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
4446 /* CONST_DOUBLE can represent a double-width integer.
4447 In this case, the mode of x is VOIDmode. */
4448 if (GET_MODE (x
) == VOIDmode
)
4450 else if (aarch64_float_const_zero_rtx_p (x
))
4455 else if (aarch64_float_const_representable_p (x
))
4458 char float_buf
[buf_size
] = {'\0'};
4460 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
4461 real_to_decimal_for_mode (float_buf
, &r
,
4464 asm_fprintf (asm_out_file
, "%s", float_buf
);
4468 output_operand_lossage ("invalid constant");
4471 output_operand_lossage ("invalid operand");
4477 if (GET_CODE (x
) == HIGH
)
4480 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4482 case SYMBOL_SMALL_GOT_4G
:
4483 asm_fprintf (asm_out_file
, ":got:");
4486 case SYMBOL_SMALL_TLSGD
:
4487 asm_fprintf (asm_out_file
, ":tlsgd:");
4490 case SYMBOL_SMALL_TLSDESC
:
4491 asm_fprintf (asm_out_file
, ":tlsdesc:");
4494 case SYMBOL_SMALL_GOTTPREL
:
4495 asm_fprintf (asm_out_file
, ":gottprel:");
4499 asm_fprintf (asm_out_file
, ":tprel:");
4502 case SYMBOL_TINY_GOT
:
4509 output_addr_const (asm_out_file
, x
);
4513 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4515 case SYMBOL_SMALL_GOT_4G
:
4516 asm_fprintf (asm_out_file
, ":lo12:");
4519 case SYMBOL_SMALL_TLSGD
:
4520 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
4523 case SYMBOL_SMALL_TLSDESC
:
4524 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
4527 case SYMBOL_SMALL_GOTTPREL
:
4528 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
4532 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
4535 case SYMBOL_TINY_GOT
:
4536 asm_fprintf (asm_out_file
, ":got:");
4542 output_addr_const (asm_out_file
, x
);
4547 switch (aarch64_classify_symbolic_expression (x
, SYMBOL_CONTEXT_ADR
))
4550 asm_fprintf (asm_out_file
, ":tprel_hi12:");
4555 output_addr_const (asm_out_file
, x
);
4563 if (!COMPARISON_P (x
))
4565 output_operand_lossage ("invalid operand for '%%%c'", code
);
4569 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4570 gcc_assert (cond_code
>= 0);
4571 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][0]);
4580 if (!COMPARISON_P (x
))
4582 output_operand_lossage ("invalid operand for '%%%c'", code
);
4586 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4587 gcc_assert (cond_code
>= 0);
4588 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][1]);
4593 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
4599 aarch64_print_operand_address (FILE *f
, rtx x
)
4601 struct aarch64_address_info addr
;
4603 if (aarch64_classify_address (&addr
, x
, aarch64_memory_reference_mode
,
4607 case ADDRESS_REG_IMM
:
4608 if (addr
.offset
== const0_rtx
)
4609 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
4611 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
4612 INTVAL (addr
.offset
));
4615 case ADDRESS_REG_REG
:
4616 if (addr
.shift
== 0)
4617 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
4618 reg_names
[REGNO (addr
.offset
)]);
4620 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
4621 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
4624 case ADDRESS_REG_UXTW
:
4625 if (addr
.shift
== 0)
4626 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
4627 REGNO (addr
.offset
) - R0_REGNUM
);
4629 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
4630 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4633 case ADDRESS_REG_SXTW
:
4634 if (addr
.shift
== 0)
4635 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
4636 REGNO (addr
.offset
) - R0_REGNUM
);
4638 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
4639 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4642 case ADDRESS_REG_WB
:
4643 switch (GET_CODE (x
))
4646 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
4647 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4650 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4651 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4654 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4655 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4658 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4659 GET_MODE_SIZE (aarch64_memory_reference_mode
));
4662 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4663 INTVAL (addr
.offset
));
4666 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4667 INTVAL (addr
.offset
));
4674 case ADDRESS_LO_SUM
:
4675 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4676 output_addr_const (f
, addr
.offset
);
4677 asm_fprintf (f
, "]");
4680 case ADDRESS_SYMBOLIC
:
4684 output_addr_const (f
, x
);
4688 aarch64_label_mentioned_p (rtx x
)
4693 if (GET_CODE (x
) == LABEL_REF
)
4696 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4697 referencing instruction, but they are constant offsets, not
4699 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4702 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4703 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4709 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4710 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4713 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4720 /* Implement REGNO_REG_CLASS. */
4723 aarch64_regno_regclass (unsigned regno
)
4725 if (GP_REGNUM_P (regno
))
4726 return GENERAL_REGS
;
4728 if (regno
== SP_REGNUM
)
4731 if (regno
== FRAME_POINTER_REGNUM
4732 || regno
== ARG_POINTER_REGNUM
)
4733 return POINTER_REGS
;
4735 if (FP_REGNUM_P (regno
))
4736 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4742 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
4744 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4745 where mask is selected by alignment and size of the offset.
4746 We try to pick as large a range for the offset as possible to
4747 maximize the chance of a CSE. However, for aligned addresses
4748 we limit the range to 4k so that structures with different sized
4749 elements are likely to use the same base. */
4751 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
4753 HOST_WIDE_INT offset
= INTVAL (XEXP (x
, 1));
4754 HOST_WIDE_INT base_offset
;
4756 /* Does it look like we'll need a load/store-pair operation? */
4757 if (GET_MODE_SIZE (mode
) > 16
4759 base_offset
= ((offset
+ 64 * GET_MODE_SIZE (mode
))
4760 & ~((128 * GET_MODE_SIZE (mode
)) - 1));
4761 /* For offsets aren't a multiple of the access size, the limit is
4763 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
4764 base_offset
= (offset
+ 0x100) & ~0x1ff;
4766 base_offset
= offset
& ~0xfff;
4768 if (base_offset
== 0)
4771 offset
-= base_offset
;
4772 rtx base_reg
= gen_reg_rtx (Pmode
);
4773 rtx val
= force_operand (plus_constant (Pmode
, XEXP (x
, 0), base_offset
),
4775 emit_move_insn (base_reg
, val
);
4776 x
= plus_constant (Pmode
, base_reg
, offset
);
4782 /* Try a machine-dependent way of reloading an illegitimate address
4783 operand. If we find one, push the reload and return the new rtx. */
4786 aarch64_legitimize_reload_address (rtx
*x_p
,
4788 int opnum
, int type
,
4789 int ind_levels ATTRIBUTE_UNUSED
)
4793 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4794 if (aarch64_vect_struct_mode_p (mode
)
4795 && GET_CODE (x
) == PLUS
4796 && REG_P (XEXP (x
, 0))
4797 && CONST_INT_P (XEXP (x
, 1)))
4801 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4802 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4803 opnum
, (enum reload_type
) type
);
4807 /* We must recognize output that we have already generated ourselves. */
4808 if (GET_CODE (x
) == PLUS
4809 && GET_CODE (XEXP (x
, 0)) == PLUS
4810 && REG_P (XEXP (XEXP (x
, 0), 0))
4811 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4812 && CONST_INT_P (XEXP (x
, 1)))
4814 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4815 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4816 opnum
, (enum reload_type
) type
);
4820 /* We wish to handle large displacements off a base register by splitting
4821 the addend across an add and the mem insn. This can cut the number of
4822 extra insns needed from 3 to 1. It is only useful for load/store of a
4823 single register with 12 bit offset field. */
4824 if (GET_CODE (x
) == PLUS
4825 && REG_P (XEXP (x
, 0))
4826 && CONST_INT_P (XEXP (x
, 1))
4827 && HARD_REGISTER_P (XEXP (x
, 0))
4830 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4832 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4833 HOST_WIDE_INT low
= val
& 0xfff;
4834 HOST_WIDE_INT high
= val
- low
;
4837 machine_mode xmode
= GET_MODE (x
);
4839 /* In ILP32, xmode can be either DImode or SImode. */
4840 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4842 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4843 BLKmode alignment. */
4844 if (GET_MODE_SIZE (mode
) == 0)
4847 offs
= low
% GET_MODE_SIZE (mode
);
4849 /* Align misaligned offset by adjusting high part to compensate. */
4852 if (aarch64_uimm12_shift (high
+ offs
))
4861 offs
= GET_MODE_SIZE (mode
) - offs
;
4863 high
= high
+ (low
& 0x1000) - offs
;
4868 /* Check for overflow. */
4869 if (high
+ low
!= val
)
4872 cst
= GEN_INT (high
);
4873 if (!aarch64_uimm12_shift (high
))
4874 cst
= force_const_mem (xmode
, cst
);
4876 /* Reload high part into base reg, leaving the low part
4877 in the mem instruction.
4878 Note that replacing this gen_rtx_PLUS with plus_constant is
4879 wrong in this case because we rely on the
4880 (plus (plus reg c1) c2) structure being preserved so that
4881 XEXP (*p, 0) in push_reload below uses the correct term. */
4882 x
= gen_rtx_PLUS (xmode
,
4883 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4886 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4887 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4888 opnum
, (enum reload_type
) type
);
4897 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4900 secondary_reload_info
*sri
)
4902 /* Without the TARGET_SIMD instructions we cannot move a Q register
4903 to a Q register directly. We need a scratch. */
4904 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4905 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4906 && reg_class_subset_p (rclass
, FP_REGS
))
4909 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4910 else if (mode
== TImode
)
4911 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4915 /* A TFmode or TImode memory access should be handled via an FP_REGS
4916 because AArch64 has richer addressing modes for LDR/STR instructions
4917 than LDP/STP instructions. */
4918 if (TARGET_FLOAT
&& rclass
== GENERAL_REGS
4919 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4922 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4923 return GENERAL_REGS
;
4929 aarch64_can_eliminate (const int from
, const int to
)
4931 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4932 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4934 if (frame_pointer_needed
)
4936 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4938 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4940 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4941 && !cfun
->calls_alloca
)
4943 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4950 /* If we decided that we didn't need a leaf frame pointer but then used
4951 LR in the function, then we'll want a frame pointer after all, so
4952 prevent this elimination to ensure a frame pointer is used. */
4953 if (to
== STACK_POINTER_REGNUM
4954 && flag_omit_leaf_frame_pointer
4955 && df_regs_ever_live_p (LR_REGNUM
))
4963 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4965 aarch64_layout_frame ();
4967 if (to
== HARD_FRAME_POINTER_REGNUM
)
4969 if (from
== ARG_POINTER_REGNUM
)
4970 return cfun
->machine
->frame
.frame_size
- crtl
->outgoing_args_size
;
4972 if (from
== FRAME_POINTER_REGNUM
)
4973 return (cfun
->machine
->frame
.hard_fp_offset
4974 - cfun
->machine
->frame
.saved_varargs_size
);
4977 if (to
== STACK_POINTER_REGNUM
)
4979 if (from
== FRAME_POINTER_REGNUM
)
4980 return (cfun
->machine
->frame
.frame_size
4981 - cfun
->machine
->frame
.saved_varargs_size
);
4984 return cfun
->machine
->frame
.frame_size
;
4987 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4991 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
4995 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
5000 aarch64_asm_trampoline_template (FILE *f
)
5004 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
5005 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
5009 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
5010 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
5012 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
5013 assemble_aligned_integer (4, const0_rtx
);
5014 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5015 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5019 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
5021 rtx fnaddr
, mem
, a_tramp
;
5022 const int tramp_code_sz
= 16;
5024 /* Don't need to copy the trailing D-words, we fill those in below. */
5025 emit_block_move (m_tramp
, assemble_trampoline_template (),
5026 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
5027 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
5028 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5029 if (GET_MODE (fnaddr
) != ptr_mode
)
5030 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
5031 emit_move_insn (mem
, fnaddr
);
5033 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
5034 emit_move_insn (mem
, chain_value
);
5036 /* XXX We should really define a "clear_cache" pattern and use
5037 gen_clear_cache(). */
5038 a_tramp
= XEXP (m_tramp
, 0);
5039 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
5040 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
5041 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
5045 static unsigned char
5046 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
5050 case CALLER_SAVE_REGS
:
5057 aarch64_vector_mode_p (mode
)
5058 ? (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
5059 : (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5073 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
5075 if (regclass
== POINTER_REGS
)
5076 return GENERAL_REGS
;
5078 if (regclass
== STACK_REG
)
5081 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
5087 /* If it's an integer immediate that MOVI can't handle, then
5088 FP_REGS is not an option, so we return NO_REGS instead. */
5089 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
5090 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
5093 /* Register eliminiation can result in a request for
5094 SP+constant->FP_REGS. We cannot support such operations which
5095 use SP as source and an FP_REG as destination, so reject out
5097 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
5099 rtx lhs
= XEXP (x
, 0);
5101 /* Look through a possible SUBREG introduced by ILP32. */
5102 if (GET_CODE (lhs
) == SUBREG
)
5103 lhs
= SUBREG_REG (lhs
);
5105 gcc_assert (REG_P (lhs
));
5106 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
5115 aarch64_asm_output_labelref (FILE* f
, const char *name
)
5117 asm_fprintf (f
, "%U%s", name
);
5121 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
5123 if (priority
== DEFAULT_INIT_PRIORITY
)
5124 default_ctor_section_asm_out_constructor (symbol
, priority
);
5129 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
5130 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5131 switch_to_section (s
);
5132 assemble_align (POINTER_SIZE
);
5133 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5138 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
5140 if (priority
== DEFAULT_INIT_PRIORITY
)
5141 default_dtor_section_asm_out_destructor (symbol
, priority
);
5146 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
5147 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5148 switch_to_section (s
);
5149 assemble_align (POINTER_SIZE
);
5150 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5155 aarch64_output_casesi (rtx
*operands
)
5159 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
5161 static const char *const patterns
[4][2] =
5164 "ldrb\t%w3, [%0,%w1,uxtw]",
5165 "add\t%3, %4, %w3, sxtb #2"
5168 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5169 "add\t%3, %4, %w3, sxth #2"
5172 "ldr\t%w3, [%0,%w1,uxtw #2]",
5173 "add\t%3, %4, %w3, sxtw #2"
5175 /* We assume that DImode is only generated when not optimizing and
5176 that we don't really need 64-bit address offsets. That would
5177 imply an object file with 8GB of code in a single function! */
5179 "ldr\t%w3, [%0,%w1,uxtw #2]",
5180 "add\t%3, %4, %w3, sxtw #2"
5184 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
5186 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
5188 gcc_assert (index
>= 0 && index
<= 3);
5190 /* Need to implement table size reduction, by chaning the code below. */
5191 output_asm_insn (patterns
[index
][0], operands
);
5192 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
5193 snprintf (buf
, sizeof (buf
),
5194 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
5195 output_asm_insn (buf
, operands
);
5196 output_asm_insn (patterns
[index
][1], operands
);
5197 output_asm_insn ("br\t%3", operands
);
5198 assemble_label (asm_out_file
, label
);
5203 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5204 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5208 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
5210 if (shift
>= 0 && shift
<= 3)
5213 for (size
= 8; size
<= 32; size
*= 2)
5215 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
5216 if (mask
== bits
<< shift
)
5224 aarch64_use_blocks_for_constant_p (machine_mode mode ATTRIBUTE_UNUSED
,
5225 const_rtx x ATTRIBUTE_UNUSED
)
5227 /* We can't use blocks for constants when we're using a per-function
5233 aarch64_select_rtx_section (machine_mode mode ATTRIBUTE_UNUSED
,
5234 rtx x ATTRIBUTE_UNUSED
,
5235 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED
)
5237 /* Force all constant pool entries into the current function section. */
5238 return function_section (current_function_decl
);
5244 /* Helper function for rtx cost calculation. Strip a shift expression
5245 from X. Returns the inner operand if successful, or the original
5246 expression on failure. */
5248 aarch64_strip_shift (rtx x
)
5252 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5253 we can convert both to ROR during final output. */
5254 if ((GET_CODE (op
) == ASHIFT
5255 || GET_CODE (op
) == ASHIFTRT
5256 || GET_CODE (op
) == LSHIFTRT
5257 || GET_CODE (op
) == ROTATERT
5258 || GET_CODE (op
) == ROTATE
)
5259 && CONST_INT_P (XEXP (op
, 1)))
5260 return XEXP (op
, 0);
5262 if (GET_CODE (op
) == MULT
5263 && CONST_INT_P (XEXP (op
, 1))
5264 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
5265 return XEXP (op
, 0);
5270 /* Helper function for rtx cost calculation. Strip an extend
5271 expression from X. Returns the inner operand if successful, or the
5272 original expression on failure. We deal with a number of possible
5273 canonicalization variations here. */
5275 aarch64_strip_extend (rtx x
)
5279 /* Zero and sign extraction of a widened value. */
5280 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
5281 && XEXP (op
, 2) == const0_rtx
5282 && GET_CODE (XEXP (op
, 0)) == MULT
5283 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
5285 return XEXP (XEXP (op
, 0), 0);
5287 /* It can also be represented (for zero-extend) as an AND with an
5289 if (GET_CODE (op
) == AND
5290 && GET_CODE (XEXP (op
, 0)) == MULT
5291 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
5292 && CONST_INT_P (XEXP (op
, 1))
5293 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
5294 INTVAL (XEXP (op
, 1))) != 0)
5295 return XEXP (XEXP (op
, 0), 0);
5297 /* Now handle extended register, as this may also have an optional
5298 left shift by 1..4. */
5299 if (GET_CODE (op
) == ASHIFT
5300 && CONST_INT_P (XEXP (op
, 1))
5301 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
5304 if (GET_CODE (op
) == ZERO_EXTEND
5305 || GET_CODE (op
) == SIGN_EXTEND
)
5314 /* Return true iff CODE is a shift supported in combination
5315 with arithmetic instructions. */
5318 aarch64_shift_p (enum rtx_code code
)
5320 return code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
;
5323 /* Helper function for rtx cost calculation. Calculate the cost of
5324 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5325 Return the calculated cost of the expression, recursing manually in to
5326 operands where needed. */
5329 aarch64_rtx_mult_cost (rtx x
, int code
, int outer
, bool speed
)
5332 const struct cpu_cost_table
*extra_cost
5333 = aarch64_tune_params
.insn_extra_cost
;
5335 bool compound_p
= (outer
== PLUS
|| outer
== MINUS
);
5336 machine_mode mode
= GET_MODE (x
);
5338 gcc_checking_assert (code
== MULT
);
5343 if (VECTOR_MODE_P (mode
))
5344 mode
= GET_MODE_INNER (mode
);
5346 /* Integer multiply/fma. */
5347 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5349 /* The multiply will be canonicalized as a shift, cost it as such. */
5350 if (aarch64_shift_p (GET_CODE (x
))
5351 || (CONST_INT_P (op1
)
5352 && exact_log2 (INTVAL (op1
)) > 0))
5354 bool is_extend
= GET_CODE (op0
) == ZERO_EXTEND
5355 || GET_CODE (op0
) == SIGN_EXTEND
;
5361 /* ARITH + shift-by-register. */
5362 cost
+= extra_cost
->alu
.arith_shift_reg
;
5364 /* ARITH + extended register. We don't have a cost field
5365 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5366 cost
+= extra_cost
->alu
.extend_arith
;
5368 /* ARITH + shift-by-immediate. */
5369 cost
+= extra_cost
->alu
.arith_shift
;
5372 /* LSL (immediate). */
5373 cost
+= extra_cost
->alu
.shift
;
5376 /* Strip extends as we will have costed them in the case above. */
5378 op0
= aarch64_strip_extend (op0
);
5380 cost
+= rtx_cost (op0
, GET_CODE (op0
), 0, speed
);
5385 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5386 compound and let the below cases handle it. After all, MNEG is a
5387 special-case alias of MSUB. */
5388 if (GET_CODE (op0
) == NEG
)
5390 op0
= XEXP (op0
, 0);
5394 /* Integer multiplies or FMAs have zero/sign extending variants. */
5395 if ((GET_CODE (op0
) == ZERO_EXTEND
5396 && GET_CODE (op1
) == ZERO_EXTEND
)
5397 || (GET_CODE (op0
) == SIGN_EXTEND
5398 && GET_CODE (op1
) == SIGN_EXTEND
))
5400 cost
+= rtx_cost (XEXP (op0
, 0), MULT
, 0, speed
)
5401 + rtx_cost (XEXP (op1
, 0), MULT
, 1, speed
);
5406 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
5407 cost
+= extra_cost
->mult
[0].extend_add
;
5409 /* MUL/SMULL/UMULL. */
5410 cost
+= extra_cost
->mult
[0].extend
;
5416 /* This is either an integer multiply or a MADD. In both cases
5417 we want to recurse and cost the operands. */
5418 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5419 + rtx_cost (op1
, MULT
, 1, speed
);
5425 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
5428 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
5437 /* Floating-point FMA/FMUL can also support negations of the
5439 if (GET_CODE (op0
) == NEG
)
5440 op0
= XEXP (op0
, 0);
5441 if (GET_CODE (op1
) == NEG
)
5442 op1
= XEXP (op1
, 0);
5445 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5446 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5449 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
5452 cost
+= rtx_cost (op0
, MULT
, 0, speed
)
5453 + rtx_cost (op1
, MULT
, 1, speed
);
5459 aarch64_address_cost (rtx x
,
5461 addr_space_t as ATTRIBUTE_UNUSED
,
5464 enum rtx_code c
= GET_CODE (x
);
5465 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
.addr_cost
;
5466 struct aarch64_address_info info
;
5470 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
5472 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
5474 /* This is a CONST or SYMBOL ref which will be split
5475 in a different way depending on the code model in use.
5476 Cost it through the generic infrastructure. */
5477 int cost_symbol_ref
= rtx_cost (x
, MEM
, 1, speed
);
5478 /* Divide through by the cost of one instruction to
5479 bring it to the same units as the address costs. */
5480 cost_symbol_ref
/= COSTS_N_INSNS (1);
5481 /* The cost is then the cost of preparing the address,
5482 followed by an immediate (possibly 0) offset. */
5483 return cost_symbol_ref
+ addr_cost
->imm_offset
;
5487 /* This is most likely a jump table from a case
5489 return addr_cost
->register_offset
;
5495 case ADDRESS_LO_SUM
:
5496 case ADDRESS_SYMBOLIC
:
5497 case ADDRESS_REG_IMM
:
5498 cost
+= addr_cost
->imm_offset
;
5501 case ADDRESS_REG_WB
:
5502 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
5503 cost
+= addr_cost
->pre_modify
;
5504 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
5505 cost
+= addr_cost
->post_modify
;
5511 case ADDRESS_REG_REG
:
5512 cost
+= addr_cost
->register_offset
;
5515 case ADDRESS_REG_UXTW
:
5516 case ADDRESS_REG_SXTW
:
5517 cost
+= addr_cost
->register_extend
;
5527 /* For the sake of calculating the cost of the shifted register
5528 component, we can treat same sized modes in the same way. */
5529 switch (GET_MODE_BITSIZE (mode
))
5532 cost
+= addr_cost
->addr_scale_costs
.hi
;
5536 cost
+= addr_cost
->addr_scale_costs
.si
;
5540 cost
+= addr_cost
->addr_scale_costs
.di
;
5543 /* We can't tell, or this is a 128-bit vector. */
5545 cost
+= addr_cost
->addr_scale_costs
.ti
;
5553 /* Return the cost of a branch. If SPEED_P is true then the compiler is
5554 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5558 aarch64_branch_cost (bool speed_p
, bool predictable_p
)
5560 /* When optimizing for speed, use the cost of unpredictable branches. */
5561 const struct cpu_branch_cost
*branch_costs
=
5562 aarch64_tune_params
.branch_costs
;
5564 if (!speed_p
|| predictable_p
)
5565 return branch_costs
->predictable
;
5567 return branch_costs
->unpredictable
;
5570 /* Return true if the RTX X in mode MODE is a zero or sign extract
5571 usable in an ADD or SUB (extended register) instruction. */
5573 aarch64_rtx_arith_op_extract_p (rtx x
, machine_mode mode
)
5575 /* Catch add with a sign extract.
5576 This is add_<optab><mode>_multp2. */
5577 if (GET_CODE (x
) == SIGN_EXTRACT
5578 || GET_CODE (x
) == ZERO_EXTRACT
)
5580 rtx op0
= XEXP (x
, 0);
5581 rtx op1
= XEXP (x
, 1);
5582 rtx op2
= XEXP (x
, 2);
5584 if (GET_CODE (op0
) == MULT
5585 && CONST_INT_P (op1
)
5586 && op2
== const0_rtx
5587 && CONST_INT_P (XEXP (op0
, 1))
5588 && aarch64_is_extend_from_extract (mode
,
5600 aarch64_frint_unspec_p (unsigned int u
)
5618 /* Return true iff X is an rtx that will match an extr instruction
5619 i.e. as described in the *extr<mode>5_insn family of patterns.
5620 OP0 and OP1 will be set to the operands of the shifts involved
5621 on success and will be NULL_RTX otherwise. */
5624 aarch64_extr_rtx_p (rtx x
, rtx
*res_op0
, rtx
*res_op1
)
5627 machine_mode mode
= GET_MODE (x
);
5629 *res_op0
= NULL_RTX
;
5630 *res_op1
= NULL_RTX
;
5632 if (GET_CODE (x
) != IOR
)
5638 if ((GET_CODE (op0
) == ASHIFT
&& GET_CODE (op1
) == LSHIFTRT
)
5639 || (GET_CODE (op1
) == ASHIFT
&& GET_CODE (op0
) == LSHIFTRT
))
5641 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5642 if (GET_CODE (op1
) == ASHIFT
)
5643 std::swap (op0
, op1
);
5645 if (!CONST_INT_P (XEXP (op0
, 1)) || !CONST_INT_P (XEXP (op1
, 1)))
5648 unsigned HOST_WIDE_INT shft_amnt_0
= UINTVAL (XEXP (op0
, 1));
5649 unsigned HOST_WIDE_INT shft_amnt_1
= UINTVAL (XEXP (op1
, 1));
5651 if (shft_amnt_0
< GET_MODE_BITSIZE (mode
)
5652 && shft_amnt_0
+ shft_amnt_1
== GET_MODE_BITSIZE (mode
))
5654 *res_op0
= XEXP (op0
, 0);
5655 *res_op1
= XEXP (op1
, 0);
5663 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5664 storing it in *COST. Result is true if the total cost of the operation
5665 has now been calculated. */
5667 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
5671 enum rtx_code cmpcode
;
5673 if (COMPARISON_P (op0
))
5675 inner
= XEXP (op0
, 0);
5676 comparator
= XEXP (op0
, 1);
5677 cmpcode
= GET_CODE (op0
);
5682 comparator
= const0_rtx
;
5686 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
5688 /* Conditional branch. */
5689 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5693 if (cmpcode
== NE
|| cmpcode
== EQ
)
5695 if (comparator
== const0_rtx
)
5697 /* TBZ/TBNZ/CBZ/CBNZ. */
5698 if (GET_CODE (inner
) == ZERO_EXTRACT
)
5700 *cost
+= rtx_cost (XEXP (inner
, 0), ZERO_EXTRACT
,
5704 *cost
+= rtx_cost (inner
, cmpcode
, 0, speed
);
5709 else if (cmpcode
== LT
|| cmpcode
== GE
)
5712 if (comparator
== const0_rtx
)
5717 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5719 /* It's a conditional operation based on the status flags,
5720 so it must be some flavor of CSEL. */
5722 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5723 if (GET_CODE (op1
) == NEG
5724 || GET_CODE (op1
) == NOT
5725 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
5726 op1
= XEXP (op1
, 0);
5728 *cost
+= rtx_cost (op1
, IF_THEN_ELSE
, 1, speed
);
5729 *cost
+= rtx_cost (op2
, IF_THEN_ELSE
, 2, speed
);
5733 /* We don't know what this is, cost all operands. */
5737 /* Calculate the cost of calculating X, storing it in *COST. Result
5738 is true if the total cost of the operation has now been calculated. */
5740 aarch64_rtx_costs (rtx x
, int code
, int outer ATTRIBUTE_UNUSED
,
5741 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
5744 const struct cpu_cost_table
*extra_cost
5745 = aarch64_tune_params
.insn_extra_cost
;
5746 machine_mode mode
= GET_MODE (x
);
5748 /* By default, assume that everything has equivalent cost to the
5749 cheapest instruction. Any additional costs are applied as a delta
5750 above this default. */
5751 *cost
= COSTS_N_INSNS (1);
5756 /* The cost depends entirely on the operands to SET. */
5761 switch (GET_CODE (op0
))
5766 rtx address
= XEXP (op0
, 0);
5767 if (VECTOR_MODE_P (mode
))
5768 *cost
+= extra_cost
->ldst
.storev
;
5769 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
5770 *cost
+= extra_cost
->ldst
.store
;
5771 else if (mode
== SFmode
)
5772 *cost
+= extra_cost
->ldst
.storef
;
5773 else if (mode
== DFmode
)
5774 *cost
+= extra_cost
->ldst
.stored
;
5777 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5781 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5785 if (! REG_P (SUBREG_REG (op0
)))
5786 *cost
+= rtx_cost (SUBREG_REG (op0
), SET
, 0, speed
);
5790 /* The cost is one per vector-register copied. */
5791 if (VECTOR_MODE_P (GET_MODE (op0
)) && REG_P (op1
))
5793 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5794 / GET_MODE_SIZE (V4SImode
);
5795 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5797 /* const0_rtx is in general free, but we will use an
5798 instruction to set a register to 0. */
5799 else if (REG_P (op1
) || op1
== const0_rtx
)
5801 /* The cost is 1 per register copied. */
5802 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5804 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5807 /* Cost is just the cost of the RHS of the set. */
5808 *cost
+= rtx_cost (op1
, SET
, 1, speed
);
5813 /* Bit-field insertion. Strip any redundant widening of
5814 the RHS to meet the width of the target. */
5815 if (GET_CODE (op1
) == SUBREG
)
5816 op1
= SUBREG_REG (op1
);
5817 if ((GET_CODE (op1
) == ZERO_EXTEND
5818 || GET_CODE (op1
) == SIGN_EXTEND
)
5819 && CONST_INT_P (XEXP (op0
, 1))
5820 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5821 >= INTVAL (XEXP (op0
, 1))))
5822 op1
= XEXP (op1
, 0);
5824 if (CONST_INT_P (op1
))
5826 /* MOV immediate is assumed to always be cheap. */
5827 *cost
= COSTS_N_INSNS (1);
5833 *cost
+= extra_cost
->alu
.bfi
;
5834 *cost
+= rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
5840 /* We can't make sense of this, assume default cost. */
5841 *cost
= COSTS_N_INSNS (1);
5847 /* If an instruction can incorporate a constant within the
5848 instruction, the instruction's expression avoids calling
5849 rtx_cost() on the constant. If rtx_cost() is called on a
5850 constant, then it is usually because the constant must be
5851 moved into a register by one or more instructions.
5853 The exception is constant 0, which can be expressed
5854 as XZR/WZR and is therefore free. The exception to this is
5855 if we have (set (reg) (const0_rtx)) in which case we must cost
5856 the move. However, we can catch that when we cost the SET, so
5857 we don't need to consider that here. */
5858 if (x
== const0_rtx
)
5862 /* To an approximation, building any other constant is
5863 proportionally expensive to the number of instructions
5864 required to build that constant. This is true whether we
5865 are compiling for SPEED or otherwise. */
5866 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
5867 (NULL_RTX
, x
, false, mode
));
5874 /* mov[df,sf]_aarch64. */
5875 if (aarch64_float_const_representable_p (x
))
5876 /* FMOV (scalar immediate). */
5877 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5878 else if (!aarch64_float_const_zero_rtx_p (x
))
5880 /* This will be a load from memory. */
5882 *cost
+= extra_cost
->ldst
.loadd
;
5884 *cost
+= extra_cost
->ldst
.loadf
;
5887 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5888 or MOV v0.s[0], wzr - neither of which are modeled by the
5889 cost tables. Just use the default cost. */
5899 /* For loads we want the base cost of a load, plus an
5900 approximation for the additional cost of the addressing
5902 rtx address
= XEXP (x
, 0);
5903 if (VECTOR_MODE_P (mode
))
5904 *cost
+= extra_cost
->ldst
.loadv
;
5905 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
5906 *cost
+= extra_cost
->ldst
.load
;
5907 else if (mode
== SFmode
)
5908 *cost
+= extra_cost
->ldst
.loadf
;
5909 else if (mode
== DFmode
)
5910 *cost
+= extra_cost
->ldst
.loadd
;
5913 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5922 if (VECTOR_MODE_P (mode
))
5927 *cost
+= extra_cost
->vect
.alu
;
5932 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
5934 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5935 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5938 *cost
+= rtx_cost (XEXP (op0
, 0), NEG
, 0, speed
);
5942 /* Cost this as SUB wzr, X. */
5943 op0
= CONST0_RTX (GET_MODE (x
));
5948 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
5950 /* Support (neg(fma...)) as a single instruction only if
5951 sign of zeros is unimportant. This matches the decision
5952 making in aarch64.md. */
5953 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5956 *cost
= rtx_cost (op0
, NEG
, 0, speed
);
5961 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
5971 if (VECTOR_MODE_P (mode
))
5972 *cost
+= extra_cost
->vect
.alu
;
5974 *cost
+= extra_cost
->alu
.clz
;
5983 if (op1
== const0_rtx
5984 && GET_CODE (op0
) == AND
)
5990 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
5992 /* TODO: A write to the CC flags possibly costs extra, this
5993 needs encoding in the cost tables. */
5995 /* CC_ZESWPmode supports zero extend for free. */
5996 if (GET_MODE (x
) == CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
5997 op0
= XEXP (op0
, 0);
6000 if (GET_CODE (op0
) == AND
)
6006 if (GET_CODE (op0
) == PLUS
)
6008 /* ADDS (and CMN alias). */
6013 if (GET_CODE (op0
) == MINUS
)
6020 if (GET_CODE (op1
) == NEG
)
6024 *cost
+= extra_cost
->alu
.arith
;
6026 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
6027 *cost
+= rtx_cost (XEXP (op1
, 0), NEG
, 1, speed
);
6033 Compare can freely swap the order of operands, and
6034 canonicalization puts the more complex operation first.
6035 But the integer MINUS logic expects the shift/extend
6036 operation in op1. */
6038 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
6046 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
6050 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
6052 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
6054 *cost
+= rtx_cost (op0
, COMPARE
, 0, speed
);
6055 /* FCMP supports constant 0.0 for no extra cost. */
6061 if (VECTOR_MODE_P (mode
))
6063 /* Vector compare. */
6065 *cost
+= extra_cost
->vect
.alu
;
6067 if (aarch64_float_const_zero_rtx_p (op1
))
6069 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6083 *cost
+= rtx_cost (op0
, MINUS
, 0, speed
);
6085 /* Detect valid immediates. */
6086 if ((GET_MODE_CLASS (mode
) == MODE_INT
6087 || (GET_MODE_CLASS (mode
) == MODE_CC
6088 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
6089 && CONST_INT_P (op1
)
6090 && aarch64_uimm12_shift (INTVAL (op1
)))
6093 /* SUB(S) (immediate). */
6094 *cost
+= extra_cost
->alu
.arith
;
6098 /* Look for SUB (extended register). */
6099 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
6102 *cost
+= extra_cost
->alu
.extend_arith
;
6104 *cost
+= rtx_cost (XEXP (XEXP (op1
, 0), 0),
6105 (enum rtx_code
) GET_CODE (op1
),
6110 rtx new_op1
= aarch64_strip_extend (op1
);
6112 /* Cost this as an FMA-alike operation. */
6113 if ((GET_CODE (new_op1
) == MULT
6114 || aarch64_shift_p (GET_CODE (new_op1
)))
6117 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
6118 (enum rtx_code
) code
,
6123 *cost
+= rtx_cost (new_op1
, MINUS
, 1, speed
);
6127 if (VECTOR_MODE_P (mode
))
6130 *cost
+= extra_cost
->vect
.alu
;
6132 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6135 *cost
+= extra_cost
->alu
.arith
;
6137 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6140 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6154 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
6155 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
6158 *cost
+= rtx_cost (XEXP (op0
, 0), PLUS
, 0, speed
);
6159 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
6163 if (GET_MODE_CLASS (mode
) == MODE_INT
6164 && CONST_INT_P (op1
)
6165 && aarch64_uimm12_shift (INTVAL (op1
)))
6167 *cost
+= rtx_cost (op0
, PLUS
, 0, speed
);
6170 /* ADD (immediate). */
6171 *cost
+= extra_cost
->alu
.arith
;
6175 *cost
+= rtx_cost (op1
, PLUS
, 1, speed
);
6177 /* Look for ADD (extended register). */
6178 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
6181 *cost
+= extra_cost
->alu
.extend_arith
;
6183 *cost
+= rtx_cost (XEXP (XEXP (op0
, 0), 0),
6184 (enum rtx_code
) GET_CODE (op0
),
6189 /* Strip any extend, leave shifts behind as we will
6190 cost them through mult_cost. */
6191 new_op0
= aarch64_strip_extend (op0
);
6193 if (GET_CODE (new_op0
) == MULT
6194 || aarch64_shift_p (GET_CODE (new_op0
)))
6196 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
6201 *cost
+= rtx_cost (new_op0
, PLUS
, 0, speed
);
6205 if (VECTOR_MODE_P (mode
))
6208 *cost
+= extra_cost
->vect
.alu
;
6210 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6213 *cost
+= extra_cost
->alu
.arith
;
6215 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6218 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6225 *cost
= COSTS_N_INSNS (1);
6229 if (VECTOR_MODE_P (mode
))
6230 *cost
+= extra_cost
->vect
.alu
;
6232 *cost
+= extra_cost
->alu
.rev
;
6237 if (aarch_rev16_p (x
))
6239 *cost
= COSTS_N_INSNS (1);
6243 if (VECTOR_MODE_P (mode
))
6244 *cost
+= extra_cost
->vect
.alu
;
6246 *cost
+= extra_cost
->alu
.rev
;
6251 if (aarch64_extr_rtx_p (x
, &op0
, &op1
))
6253 *cost
+= rtx_cost (op0
, IOR
, 0, speed
)
6254 + rtx_cost (op1
, IOR
, 1, speed
);
6256 *cost
+= extra_cost
->alu
.shift
;
6267 if (VECTOR_MODE_P (mode
))
6270 *cost
+= extra_cost
->vect
.alu
;
6275 && GET_CODE (op0
) == MULT
6276 && CONST_INT_P (XEXP (op0
, 1))
6277 && CONST_INT_P (op1
)
6278 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
6281 /* This is a UBFM/SBFM. */
6282 *cost
+= rtx_cost (XEXP (op0
, 0), ZERO_EXTRACT
, 0, speed
);
6284 *cost
+= extra_cost
->alu
.bfx
;
6288 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6290 /* We possibly get the immediate for free, this is not
6292 if (CONST_INT_P (op1
)
6293 && aarch64_bitmask_imm (INTVAL (op1
), GET_MODE (x
)))
6295 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6298 *cost
+= extra_cost
->alu
.logical
;
6306 /* Handle ORN, EON, or BIC. */
6307 if (GET_CODE (op0
) == NOT
)
6308 op0
= XEXP (op0
, 0);
6310 new_op0
= aarch64_strip_shift (op0
);
6312 /* If we had a shift on op0 then this is a logical-shift-
6313 by-register/immediate operation. Otherwise, this is just
6314 a logical operation. */
6319 /* Shift by immediate. */
6320 if (CONST_INT_P (XEXP (op0
, 1)))
6321 *cost
+= extra_cost
->alu
.log_shift
;
6323 *cost
+= extra_cost
->alu
.log_shift_reg
;
6326 *cost
+= extra_cost
->alu
.logical
;
6329 /* In both cases we want to cost both operands. */
6330 *cost
+= rtx_cost (new_op0
, (enum rtx_code
) code
, 0, speed
)
6331 + rtx_cost (op1
, (enum rtx_code
) code
, 1, speed
);
6340 op0
= aarch64_strip_shift (x
);
6342 if (VECTOR_MODE_P (mode
))
6345 *cost
+= extra_cost
->vect
.alu
;
6349 /* MVN-shifted-reg. */
6352 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6355 *cost
+= extra_cost
->alu
.log_shift
;
6359 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6360 Handle the second form here taking care that 'a' in the above can
6362 else if (GET_CODE (op0
) == XOR
)
6364 rtx newop0
= XEXP (op0
, 0);
6365 rtx newop1
= XEXP (op0
, 1);
6366 rtx op0_stripped
= aarch64_strip_shift (newop0
);
6368 *cost
+= rtx_cost (newop1
, (enum rtx_code
) code
, 1, speed
)
6369 + rtx_cost (op0_stripped
, XOR
, 0, speed
);
6373 if (op0_stripped
!= newop0
)
6374 *cost
+= extra_cost
->alu
.log_shift
;
6376 *cost
+= extra_cost
->alu
.logical
;
6383 *cost
+= extra_cost
->alu
.logical
;
6390 /* If a value is written in SI mode, then zero extended to DI
6391 mode, the operation will in general be free as a write to
6392 a 'w' register implicitly zeroes the upper bits of an 'x'
6393 register. However, if this is
6395 (set (reg) (zero_extend (reg)))
6397 we must cost the explicit register move. */
6399 && GET_MODE (op0
) == SImode
6402 int op_cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, 0, speed
);
6404 if (!op_cost
&& speed
)
6406 *cost
+= extra_cost
->alu
.extend
;
6408 /* Free, the cost is that of the SI mode operation. */
6413 else if (MEM_P (XEXP (x
, 0)))
6415 /* All loads can zero extend to any size for free. */
6416 *cost
= rtx_cost (XEXP (x
, 0), ZERO_EXTEND
, param
, speed
);
6422 if (VECTOR_MODE_P (mode
))
6425 *cost
+= extra_cost
->vect
.alu
;
6430 *cost
+= extra_cost
->alu
.extend
;
6436 if (MEM_P (XEXP (x
, 0)))
6441 rtx address
= XEXP (XEXP (x
, 0), 0);
6442 *cost
+= extra_cost
->ldst
.load_sign_extend
;
6445 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6453 if (VECTOR_MODE_P (mode
))
6454 *cost
+= extra_cost
->vect
.alu
;
6456 *cost
+= extra_cost
->alu
.extend
;
6464 if (CONST_INT_P (op1
))
6468 if (VECTOR_MODE_P (mode
))
6470 /* Vector shift (immediate). */
6471 *cost
+= extra_cost
->vect
.alu
;
6475 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6477 *cost
+= extra_cost
->alu
.shift
;
6481 /* We can incorporate zero/sign extend for free. */
6482 if (GET_CODE (op0
) == ZERO_EXTEND
6483 || GET_CODE (op0
) == SIGN_EXTEND
)
6484 op0
= XEXP (op0
, 0);
6486 *cost
+= rtx_cost (op0
, ASHIFT
, 0, speed
);
6493 if (VECTOR_MODE_P (mode
))
6495 /* Vector shift (register). */
6496 *cost
+= extra_cost
->vect
.alu
;
6501 *cost
+= extra_cost
->alu
.shift_reg
;
6504 return false; /* All arguments need to be in registers. */
6514 if (CONST_INT_P (op1
))
6516 /* ASR (immediate) and friends. */
6519 if (VECTOR_MODE_P (mode
))
6520 *cost
+= extra_cost
->vect
.alu
;
6522 *cost
+= extra_cost
->alu
.shift
;
6525 *cost
+= rtx_cost (op0
, (enum rtx_code
) code
, 0, speed
);
6531 /* ASR (register) and friends. */
6534 if (VECTOR_MODE_P (mode
))
6535 *cost
+= extra_cost
->vect
.alu
;
6537 *cost
+= extra_cost
->alu
.shift_reg
;
6539 return false; /* All arguments need to be in registers. */
6544 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
6545 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
)
6549 *cost
+= extra_cost
->ldst
.load
;
6551 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
6552 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
6554 /* ADRP, followed by ADD. */
6555 *cost
+= COSTS_N_INSNS (1);
6557 *cost
+= 2 * extra_cost
->alu
.arith
;
6559 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
6560 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
6564 *cost
+= extra_cost
->alu
.arith
;
6569 /* One extra load instruction, after accessing the GOT. */
6570 *cost
+= COSTS_N_INSNS (1);
6572 *cost
+= extra_cost
->ldst
.load
;
6578 /* ADRP/ADD (immediate). */
6580 *cost
+= extra_cost
->alu
.arith
;
6588 if (VECTOR_MODE_P (mode
))
6589 *cost
+= extra_cost
->vect
.alu
;
6591 *cost
+= extra_cost
->alu
.bfx
;
6594 /* We can trust that the immediates used will be correct (there
6595 are no by-register forms), so we need only cost op0. */
6596 *cost
+= rtx_cost (XEXP (x
, 0), (enum rtx_code
) code
, 0, speed
);
6600 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
6601 /* aarch64_rtx_mult_cost always handles recursion to its
6609 if (VECTOR_MODE_P (mode
))
6610 *cost
+= extra_cost
->vect
.alu
;
6611 else if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_INT
)
6612 *cost
+= (extra_cost
->mult
[GET_MODE (x
) == DImode
].add
6613 + extra_cost
->mult
[GET_MODE (x
) == DImode
].idiv
);
6614 else if (GET_MODE (x
) == DFmode
)
6615 *cost
+= (extra_cost
->fp
[1].mult
6616 + extra_cost
->fp
[1].div
);
6617 else if (GET_MODE (x
) == SFmode
)
6618 *cost
+= (extra_cost
->fp
[0].mult
6619 + extra_cost
->fp
[0].div
);
6621 return false; /* All arguments need to be in registers. */
6628 if (VECTOR_MODE_P (mode
))
6629 *cost
+= extra_cost
->vect
.alu
;
6630 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6631 /* There is no integer SQRT, so only DIV and UDIV can get
6633 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
6635 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
6637 return false; /* All arguments need to be in registers. */
6640 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
6641 XEXP (x
, 2), cost
, speed
);
6654 return false; /* All arguments must be in registers. */
6663 if (VECTOR_MODE_P (mode
))
6664 *cost
+= extra_cost
->vect
.alu
;
6666 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6669 /* FMSUB, FNMADD, and FNMSUB are free. */
6670 if (GET_CODE (op0
) == NEG
)
6671 op0
= XEXP (op0
, 0);
6673 if (GET_CODE (op2
) == NEG
)
6674 op2
= XEXP (op2
, 0);
6676 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6677 and the by-element operand as operand 0. */
6678 if (GET_CODE (op1
) == NEG
)
6679 op1
= XEXP (op1
, 0);
6681 /* Catch vector-by-element operations. The by-element operand can
6682 either be (vec_duplicate (vec_select (x))) or just
6683 (vec_select (x)), depending on whether we are multiplying by
6684 a vector or a scalar.
6686 Canonicalization is not very good in these cases, FMA4 will put the
6687 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6688 if (GET_CODE (op0
) == VEC_DUPLICATE
)
6689 op0
= XEXP (op0
, 0);
6690 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
6691 op1
= XEXP (op1
, 0);
6693 if (GET_CODE (op0
) == VEC_SELECT
)
6694 op0
= XEXP (op0
, 0);
6695 else if (GET_CODE (op1
) == VEC_SELECT
)
6696 op1
= XEXP (op1
, 0);
6698 /* If the remaining parameters are not registers,
6699 get the cost to put them into registers. */
6700 *cost
+= rtx_cost (op0
, FMA
, 0, speed
);
6701 *cost
+= rtx_cost (op1
, FMA
, 1, speed
);
6702 *cost
+= rtx_cost (op2
, FMA
, 2, speed
);
6706 case UNSIGNED_FLOAT
:
6708 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
6714 if (VECTOR_MODE_P (mode
))
6716 /*Vector truncate. */
6717 *cost
+= extra_cost
->vect
.alu
;
6720 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
6724 case FLOAT_TRUNCATE
:
6727 if (VECTOR_MODE_P (mode
))
6729 /*Vector conversion. */
6730 *cost
+= extra_cost
->vect
.alu
;
6733 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
6740 /* Strip the rounding part. They will all be implemented
6741 by the fcvt* family of instructions anyway. */
6742 if (GET_CODE (x
) == UNSPEC
)
6744 unsigned int uns_code
= XINT (x
, 1);
6746 if (uns_code
== UNSPEC_FRINTA
6747 || uns_code
== UNSPEC_FRINTM
6748 || uns_code
== UNSPEC_FRINTN
6749 || uns_code
== UNSPEC_FRINTP
6750 || uns_code
== UNSPEC_FRINTZ
)
6751 x
= XVECEXP (x
, 0, 0);
6756 if (VECTOR_MODE_P (mode
))
6757 *cost
+= extra_cost
->vect
.alu
;
6759 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
6761 *cost
+= rtx_cost (x
, (enum rtx_code
) code
, 0, speed
);
6765 if (VECTOR_MODE_P (mode
))
6769 *cost
+= extra_cost
->vect
.alu
;
6771 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6775 /* FABD, which is analogous to FADD. */
6776 if (GET_CODE (op0
) == MINUS
)
6778 *cost
+= rtx_cost (XEXP (op0
, 0), MINUS
, 0, speed
);
6779 + rtx_cost (XEXP (op0
, 1), MINUS
, 1, speed
);
6781 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6785 /* Simple FABS is analogous to FNEG. */
6787 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6791 /* Integer ABS will either be split to
6792 two arithmetic instructions, or will be an ABS
6793 (scalar), which we don't model. */
6794 *cost
= COSTS_N_INSNS (2);
6796 *cost
+= 2 * extra_cost
->alu
.arith
;
6804 if (VECTOR_MODE_P (mode
))
6805 *cost
+= extra_cost
->vect
.alu
;
6808 /* FMAXNM/FMINNM/FMAX/FMIN.
6809 TODO: This may not be accurate for all implementations, but
6810 we do not model this in the cost tables. */
6811 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6817 /* The floating point round to integer frint* instructions. */
6818 if (aarch64_frint_unspec_p (XINT (x
, 1)))
6821 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
6826 if (XINT (x
, 1) == UNSPEC_RBIT
)
6829 *cost
+= extra_cost
->alu
.rev
;
6837 /* Decompose <su>muldi3_highpart. */
6838 if (/* (truncate:DI */
6841 && GET_MODE (XEXP (x
, 0)) == TImode
6842 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6844 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6845 /* (ANY_EXTEND:TI (reg:DI))
6846 (ANY_EXTEND:TI (reg:DI))) */
6847 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6848 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
6849 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
6850 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
6851 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
6852 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
6853 /* (const_int 64) */
6854 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
6855 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
6859 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
6860 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
6862 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
6872 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6874 "\nFailed to cost RTX. Assuming default cost.\n");
6879 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6880 calculated for X. This cost is stored in *COST. Returns true
6881 if the total cost of X was calculated. */
6883 aarch64_rtx_costs_wrapper (rtx x
, int code
, int outer
,
6884 int param
, int *cost
, bool speed
)
6886 bool result
= aarch64_rtx_costs (x
, code
, outer
, param
, cost
, speed
);
6888 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6890 print_rtl_single (dump_file
, x
);
6891 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
6892 speed
? "Hot" : "Cold",
6893 *cost
, result
? "final" : "partial");
6900 aarch64_register_move_cost (machine_mode mode
,
6901 reg_class_t from_i
, reg_class_t to_i
)
6903 enum reg_class from
= (enum reg_class
) from_i
;
6904 enum reg_class to
= (enum reg_class
) to_i
;
6905 const struct cpu_regmove_cost
*regmove_cost
6906 = aarch64_tune_params
.regmove_cost
;
6908 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6909 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
6912 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
6913 from
= GENERAL_REGS
;
6915 /* Moving between GPR and stack cost is the same as GP2GP. */
6916 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
6917 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
6918 return regmove_cost
->GP2GP
;
6920 /* To/From the stack register, we move via the gprs. */
6921 if (to
== STACK_REG
|| from
== STACK_REG
)
6922 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
6923 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
6925 if (GET_MODE_SIZE (mode
) == 16)
6927 /* 128-bit operations on general registers require 2 instructions. */
6928 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6929 return regmove_cost
->GP2GP
* 2;
6930 else if (from
== GENERAL_REGS
)
6931 return regmove_cost
->GP2FP
* 2;
6932 else if (to
== GENERAL_REGS
)
6933 return regmove_cost
->FP2GP
* 2;
6935 /* When AdvSIMD instructions are disabled it is not possible to move
6936 a 128-bit value directly between Q registers. This is handled in
6937 secondary reload. A general register is used as a scratch to move
6938 the upper DI value and the lower DI value is moved directly,
6939 hence the cost is the sum of three moves. */
6941 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
6943 return regmove_cost
->FP2FP
;
6946 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
6947 return regmove_cost
->GP2GP
;
6948 else if (from
== GENERAL_REGS
)
6949 return regmove_cost
->GP2FP
;
6950 else if (to
== GENERAL_REGS
)
6951 return regmove_cost
->FP2GP
;
6953 return regmove_cost
->FP2FP
;
6957 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
6958 reg_class_t rclass ATTRIBUTE_UNUSED
,
6959 bool in ATTRIBUTE_UNUSED
)
6961 return aarch64_tune_params
.memmov_cost
;
6964 /* Return the number of instructions that can be issued per cycle. */
6966 aarch64_sched_issue_rate (void)
6968 return aarch64_tune_params
.issue_rate
;
6972 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
6974 int issue_rate
= aarch64_sched_issue_rate ();
6976 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
6979 /* Vectorizer cost model target hooks. */
6981 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6983 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6985 int misalign ATTRIBUTE_UNUSED
)
6989 switch (type_of_cost
)
6992 return aarch64_tune_params
.vec_costs
->scalar_stmt_cost
;
6995 return aarch64_tune_params
.vec_costs
->scalar_load_cost
;
6998 return aarch64_tune_params
.vec_costs
->scalar_store_cost
;
7001 return aarch64_tune_params
.vec_costs
->vec_stmt_cost
;
7004 return aarch64_tune_params
.vec_costs
->vec_align_load_cost
;
7007 return aarch64_tune_params
.vec_costs
->vec_store_cost
;
7010 return aarch64_tune_params
.vec_costs
->vec_to_scalar_cost
;
7013 return aarch64_tune_params
.vec_costs
->scalar_to_vec_cost
;
7015 case unaligned_load
:
7016 return aarch64_tune_params
.vec_costs
->vec_unalign_load_cost
;
7018 case unaligned_store
:
7019 return aarch64_tune_params
.vec_costs
->vec_unalign_store_cost
;
7021 case cond_branch_taken
:
7022 return aarch64_tune_params
.vec_costs
->cond_taken_branch_cost
;
7024 case cond_branch_not_taken
:
7025 return aarch64_tune_params
.vec_costs
->cond_not_taken_branch_cost
;
7028 case vec_promote_demote
:
7029 return aarch64_tune_params
.vec_costs
->vec_stmt_cost
;
7032 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
7033 return elements
/ 2 + 1;
7040 /* Implement targetm.vectorize.add_stmt_cost. */
7042 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
7043 struct _stmt_vec_info
*stmt_info
, int misalign
,
7044 enum vect_cost_model_location where
)
7046 unsigned *cost
= (unsigned *) data
;
7047 unsigned retval
= 0;
7049 if (flag_vect_cost_model
)
7051 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
7053 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
7055 /* Statements in an inner loop relative to the loop being
7056 vectorized are weighted more heavily. The value here is
7057 a function (linear for now) of the loop nest level. */
7058 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
7060 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7061 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
7062 unsigned nest_level
= loop_depth (loop
);
7064 count
*= nest_level
;
7067 retval
= (unsigned) (count
* stmt_cost
);
7068 cost
[where
] += retval
;
7074 static void initialize_aarch64_code_model (void);
7076 /* Parse the architecture extension string. */
7079 aarch64_parse_extension (char *str
)
7081 /* The extension string is parsed left to right. */
7082 const struct aarch64_option_extension
*opt
= NULL
;
7084 /* Flag to say whether we are adding or removing an extension. */
7085 int adding_ext
= -1;
7087 while (str
!= NULL
&& *str
!= 0)
7093 ext
= strchr (str
, '+');
7100 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
7111 error ("missing feature modifier after %qs", adding_ext
? "+"
7116 /* Scan over the extensions table trying to find an exact match. */
7117 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7119 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
7121 /* Add or remove the extension. */
7123 aarch64_isa_flags
|= opt
->flags_on
;
7125 aarch64_isa_flags
&= ~(opt
->flags_off
);
7130 if (opt
->name
== NULL
)
7132 /* Extension not found in list. */
7133 error ("unknown feature modifier %qs", str
);
7143 /* Parse the ARCH string. */
7146 aarch64_parse_arch (void)
7149 const struct processor
*arch
;
7150 char *str
= (char *) alloca (strlen (aarch64_arch_string
) + 1);
7153 strcpy (str
, aarch64_arch_string
);
7155 ext
= strchr (str
, '+');
7164 error ("missing arch name in -march=%qs", str
);
7168 /* Loop through the list of supported ARCHs to find a match. */
7169 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
7171 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
7173 selected_arch
= arch
;
7174 aarch64_isa_flags
= selected_arch
->flags
;
7177 selected_cpu
= &all_cores
[selected_arch
->core
];
7181 /* ARCH string contains at least one extension. */
7182 aarch64_parse_extension (ext
);
7185 if (strcmp (selected_arch
->arch
, selected_cpu
->arch
))
7187 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
7188 selected_cpu
->name
, selected_arch
->name
);
7195 /* ARCH name not found in list. */
7196 error ("unknown value %qs for -march", str
);
7200 /* Parse the CPU string. */
7203 aarch64_parse_cpu (void)
7206 const struct processor
*cpu
;
7207 char *str
= (char *) alloca (strlen (aarch64_cpu_string
) + 1);
7210 strcpy (str
, aarch64_cpu_string
);
7212 ext
= strchr (str
, '+');
7221 error ("missing cpu name in -mcpu=%qs", str
);
7225 /* Loop through the list of supported CPUs to find a match. */
7226 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
7228 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
7231 aarch64_isa_flags
= selected_cpu
->flags
;
7235 /* CPU string contains at least one extension. */
7236 aarch64_parse_extension (ext
);
7243 /* CPU name not found in list. */
7244 error ("unknown value %qs for -mcpu", str
);
7248 /* Parse the TUNE string. */
7251 aarch64_parse_tune (void)
7253 const struct processor
*cpu
;
7254 char *str
= (char *) alloca (strlen (aarch64_tune_string
) + 1);
7255 strcpy (str
, aarch64_tune_string
);
7257 /* Loop through the list of supported CPUs to find a match. */
7258 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
7260 if (strcmp (cpu
->name
, str
) == 0)
7262 selected_tune
= cpu
;
7267 /* CPU name not found in list. */
7268 error ("unknown value %qs for -mtune", str
);
7272 /* Parse TOKEN, which has length LENGTH to see if it is an option
7273 described in FLAG. If it is, return the index bit for that fusion type.
7274 If not, error (printing OPTION_NAME) and return zero. */
7277 aarch64_parse_one_option_token (const char *token
,
7279 const struct aarch64_flag_desc
*flag
,
7280 const char *option_name
)
7282 for (; flag
->name
!= NULL
; flag
++)
7284 if (length
== strlen (flag
->name
)
7285 && !strncmp (flag
->name
, token
, length
))
7289 error ("unknown flag passed in -moverride=%s (%s)", option_name
, token
);
7293 /* Parse OPTION which is a comma-separated list of flags to enable.
7294 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
7295 default state we inherit from the CPU tuning structures. OPTION_NAME
7296 gives the top-level option we are parsing in the -moverride string,
7297 for use in error messages. */
7300 aarch64_parse_boolean_options (const char *option
,
7301 const struct aarch64_flag_desc
*flags
,
7302 unsigned int initial_state
,
7303 const char *option_name
)
7305 const char separator
= '.';
7306 const char* specs
= option
;
7307 const char* ntoken
= option
;
7308 unsigned int found_flags
= initial_state
;
7310 while ((ntoken
= strchr (specs
, separator
)))
7312 size_t token_length
= ntoken
- specs
;
7313 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
7317 /* If we find "none" (or, for simplicity's sake, an error) anywhere
7318 in the token stream, reset the supported operations. So:
7320 adrp+add.cmp+branch.none.adrp+add
7322 would have the result of turning on only adrp+add fusion. */
7326 found_flags
|= token_ops
;
7330 /* We ended with a comma, print something. */
7333 error ("%s string ill-formed\n", option_name
);
7337 /* We still have one more token to parse. */
7338 size_t token_length
= strlen (specs
);
7339 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
7346 found_flags
|= token_ops
;
7350 /* Support for overriding instruction fusion. */
7353 aarch64_parse_fuse_string (const char *fuse_string
,
7354 struct tune_params
*tune
)
7356 tune
->fusible_ops
= aarch64_parse_boolean_options (fuse_string
,
7357 aarch64_fusible_pairs
,
7362 /* Support for overriding other tuning flags. */
7365 aarch64_parse_tune_string (const char *tune_string
,
7366 struct tune_params
*tune
)
7368 tune
->extra_tuning_flags
7369 = aarch64_parse_boolean_options (tune_string
,
7370 aarch64_tuning_flags
,
7371 tune
->extra_tuning_flags
,
7375 /* Parse TOKEN, which has length LENGTH to see if it is a tuning option
7376 we understand. If it is, extract the option string and handoff to
7377 the appropriate function. */
7380 aarch64_parse_one_override_token (const char* token
,
7382 struct tune_params
*tune
)
7384 const struct aarch64_tuning_override_function
*fn
7385 = aarch64_tuning_override_functions
;
7387 const char *option_part
= strchr (token
, '=');
7390 error ("tuning string missing in option (%s)", token
);
7394 /* Get the length of the option name. */
7395 length
= option_part
- token
;
7396 /* Skip the '=' to get to the option string. */
7399 for (; fn
->name
!= NULL
; fn
++)
7401 if (!strncmp (fn
->name
, token
, length
))
7403 fn
->parse_override (option_part
, tune
);
7408 error ("unknown tuning option (%s)",token
);
7412 /* Parse STRING looking for options in the format:
7413 string :: option:string
7414 option :: name=substring
7416 substring :: defined by option. */
7419 aarch64_parse_override_string (const char* input_string
,
7420 struct tune_params
* tune
)
7422 const char separator
= ':';
7423 size_t string_length
= strlen (input_string
) + 1;
7424 char *string_root
= (char *) xmalloc (sizeof (*string_root
) * string_length
);
7425 char *string
= string_root
;
7426 strncpy (string
, input_string
, string_length
);
7427 string
[string_length
- 1] = '\0';
7429 char* ntoken
= string
;
7431 while ((ntoken
= strchr (string
, separator
)))
7433 size_t token_length
= ntoken
- string
;
7434 /* Make this substring look like a string. */
7436 aarch64_parse_one_override_token (string
, token_length
, tune
);
7440 /* One last option to parse. */
7441 aarch64_parse_one_override_token (string
, strlen (string
), tune
);
7445 /* Implement TARGET_OPTION_OVERRIDE. */
7448 aarch64_override_options (void)
7450 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
7451 If either of -march or -mtune is given, they override their
7452 respective component of -mcpu.
7454 So, first parse AARCH64_CPU_STRING, then the others, be careful
7455 with -march as, if -mcpu is not present on the command line, march
7456 must set a sensible default CPU. */
7457 if (aarch64_cpu_string
)
7459 aarch64_parse_cpu ();
7462 if (aarch64_arch_string
)
7464 aarch64_parse_arch ();
7467 if (aarch64_tune_string
)
7469 aarch64_parse_tune ();
7472 #ifndef HAVE_AS_MABI_OPTION
7473 /* The compiler may have been configured with 2.23.* binutils, which does
7474 not have support for ILP32. */
7476 error ("Assembler does not support -mabi=ilp32");
7479 initialize_aarch64_code_model ();
7481 aarch64_build_bitmask_table ();
7483 /* This target defaults to strict volatile bitfields. */
7484 if (flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
7485 flag_strict_volatile_bitfields
= 1;
7487 /* If the user did not specify a processor, choose the default
7488 one for them. This will be the CPU set during configuration using
7489 --with-cpu, otherwise it is "generic". */
7492 selected_cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
7493 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
7496 gcc_assert (selected_cpu
);
7499 selected_tune
= selected_cpu
;
7501 aarch64_tune_flags
= selected_tune
->flags
;
7502 aarch64_tune
= selected_tune
->core
;
7503 /* Make a copy of the tuning parameters attached to the core, which
7504 we may later overwrite. */
7505 aarch64_tune_params
= *(selected_tune
->tune
);
7506 aarch64_architecture_version
= selected_cpu
->architecture_version
;
7508 if (aarch64_override_tune_string
)
7509 aarch64_parse_override_string (aarch64_override_tune_string
,
7510 &aarch64_tune_params
);
7512 if (aarch64_fix_a53_err835769
== 2)
7514 #ifdef TARGET_FIX_ERR_A53_835769_DEFAULT
7515 aarch64_fix_a53_err835769
= 1;
7517 aarch64_fix_a53_err835769
= 0;
7521 aarch64_register_fma_steering ();
7523 aarch64_override_options_after_change ();
7526 /* Implement targetm.override_options_after_change. */
7529 aarch64_override_options_after_change (void)
7531 if (flag_omit_frame_pointer
)
7532 flag_omit_leaf_frame_pointer
= false;
7533 else if (flag_omit_leaf_frame_pointer
)
7534 flag_omit_frame_pointer
= true;
7536 /* If not optimizing for size, set the default
7537 alignment to what the target wants */
7540 if (align_loops
<= 0)
7541 align_loops
= aarch64_tune_params
.loop_align
;
7542 if (align_jumps
<= 0)
7543 align_jumps
= aarch64_tune_params
.jump_align
;
7544 if (align_functions
<= 0)
7545 align_functions
= aarch64_tune_params
.function_align
;
7549 static struct machine_function
*
7550 aarch64_init_machine_status (void)
7552 struct machine_function
*machine
;
7553 machine
= ggc_cleared_alloc
<machine_function
> ();
7558 aarch64_init_expanders (void)
7560 init_machine_status
= aarch64_init_machine_status
;
7563 /* A checking mechanism for the implementation of the various code models. */
7565 initialize_aarch64_code_model (void)
7569 switch (aarch64_cmodel_var
)
7571 case AARCH64_CMODEL_TINY
:
7572 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
7574 case AARCH64_CMODEL_SMALL
:
7575 #ifdef HAVE_AS_SMALL_PIC_RELOCS
7576 aarch64_cmodel
= (flag_pic
== 2
7577 ? AARCH64_CMODEL_SMALL_PIC
7578 : AARCH64_CMODEL_SMALL_SPIC
);
7580 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
7583 case AARCH64_CMODEL_LARGE
:
7584 sorry ("code model %qs with -f%s", "large",
7585 flag_pic
> 1 ? "PIC" : "pic");
7591 aarch64_cmodel
= aarch64_cmodel_var
;
7594 /* Return true if SYMBOL_REF X binds locally. */
7597 aarch64_symbol_binds_local_p (const_rtx x
)
7599 return (SYMBOL_REF_DECL (x
)
7600 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
7601 : SYMBOL_REF_LOCAL_P (x
));
7604 /* Return true if SYMBOL_REF X is thread local */
7606 aarch64_tls_symbol_p (rtx x
)
7608 if (! TARGET_HAVE_TLS
)
7611 if (GET_CODE (x
) != SYMBOL_REF
)
7614 return SYMBOL_REF_TLS_MODEL (x
) != 0;
7617 /* Classify a TLS symbol into one of the TLS kinds. */
7618 enum aarch64_symbol_type
7619 aarch64_classify_tls_symbol (rtx x
)
7621 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
7625 case TLS_MODEL_GLOBAL_DYNAMIC
:
7626 case TLS_MODEL_LOCAL_DYNAMIC
:
7627 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
7629 case TLS_MODEL_INITIAL_EXEC
:
7630 return SYMBOL_SMALL_GOTTPREL
;
7632 case TLS_MODEL_LOCAL_EXEC
:
7633 return SYMBOL_TLSLE
;
7635 case TLS_MODEL_EMULATED
:
7636 case TLS_MODEL_NONE
:
7637 return SYMBOL_FORCE_TO_MEM
;
7644 /* Return the method that should be used to access SYMBOL_REF or
7645 LABEL_REF X in context CONTEXT. */
7647 enum aarch64_symbol_type
7648 aarch64_classify_symbol (rtx x
, rtx offset
,
7649 enum aarch64_symbol_context context ATTRIBUTE_UNUSED
)
7651 if (GET_CODE (x
) == LABEL_REF
)
7653 switch (aarch64_cmodel
)
7655 case AARCH64_CMODEL_LARGE
:
7656 return SYMBOL_FORCE_TO_MEM
;
7658 case AARCH64_CMODEL_TINY_PIC
:
7659 case AARCH64_CMODEL_TINY
:
7660 return SYMBOL_TINY_ABSOLUTE
;
7662 case AARCH64_CMODEL_SMALL_SPIC
:
7663 case AARCH64_CMODEL_SMALL_PIC
:
7664 case AARCH64_CMODEL_SMALL
:
7665 return SYMBOL_SMALL_ABSOLUTE
;
7672 if (GET_CODE (x
) == SYMBOL_REF
)
7674 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
7675 return SYMBOL_FORCE_TO_MEM
;
7677 if (aarch64_tls_symbol_p (x
))
7678 return aarch64_classify_tls_symbol (x
);
7680 switch (aarch64_cmodel
)
7682 case AARCH64_CMODEL_TINY
:
7683 /* When we retreive symbol + offset address, we have to make sure
7684 the offset does not cause overflow of the final address. But
7685 we have no way of knowing the address of symbol at compile time
7686 so we can't accurately say if the distance between the PC and
7687 symbol + offset is outside the addressible range of +/-1M in the
7688 TINY code model. So we rely on images not being greater than
7689 1M and cap the offset at 1M and anything beyond 1M will have to
7690 be loaded using an alternative mechanism. */
7691 if (SYMBOL_REF_WEAK (x
)
7692 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
7693 return SYMBOL_FORCE_TO_MEM
;
7694 return SYMBOL_TINY_ABSOLUTE
;
7696 case AARCH64_CMODEL_SMALL
:
7697 /* Same reasoning as the tiny code model, but the offset cap here is
7699 if (SYMBOL_REF_WEAK (x
)
7700 || !IN_RANGE (INTVAL (offset
), HOST_WIDE_INT_C (-4294967263),
7701 HOST_WIDE_INT_C (4294967264)))
7702 return SYMBOL_FORCE_TO_MEM
;
7703 return SYMBOL_SMALL_ABSOLUTE
;
7705 case AARCH64_CMODEL_TINY_PIC
:
7706 if (!aarch64_symbol_binds_local_p (x
))
7707 return SYMBOL_TINY_GOT
;
7708 return SYMBOL_TINY_ABSOLUTE
;
7710 case AARCH64_CMODEL_SMALL_SPIC
:
7711 case AARCH64_CMODEL_SMALL_PIC
:
7712 if (!aarch64_symbol_binds_local_p (x
))
7713 return (aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
7714 ? SYMBOL_SMALL_GOT_28K
: SYMBOL_SMALL_GOT_4G
);
7715 return SYMBOL_SMALL_ABSOLUTE
;
7722 /* By default push everything into the constant pool. */
7723 return SYMBOL_FORCE_TO_MEM
;
7727 aarch64_constant_address_p (rtx x
)
7729 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
7733 aarch64_legitimate_pic_operand_p (rtx x
)
7735 if (GET_CODE (x
) == SYMBOL_REF
7736 || (GET_CODE (x
) == CONST
7737 && GET_CODE (XEXP (x
, 0)) == PLUS
7738 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
7744 /* Return true if X holds either a quarter-precision or
7745 floating-point +0.0 constant. */
7747 aarch64_valid_floating_const (machine_mode mode
, rtx x
)
7749 if (!CONST_DOUBLE_P (x
))
7752 if (aarch64_float_const_zero_rtx_p (x
))
7755 /* We only handle moving 0.0 to a TFmode register. */
7756 if (!(mode
== SFmode
|| mode
== DFmode
))
7759 return aarch64_float_const_representable_p (x
);
7763 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
7765 /* Do not allow vector struct mode constants. We could support
7766 0 and -1 easily, but they need support in aarch64-simd.md. */
7767 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
7770 /* This could probably go away because
7771 we now decompose CONST_INTs according to expand_mov_immediate. */
7772 if ((GET_CODE (x
) == CONST_VECTOR
7773 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
7774 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
7775 return !targetm
.cannot_force_const_mem (mode
, x
);
7777 if (GET_CODE (x
) == HIGH
7778 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
7781 return aarch64_constant_address_p (x
);
7785 aarch64_load_tp (rtx target
)
7788 || GET_MODE (target
) != Pmode
7789 || !register_operand (target
, Pmode
))
7790 target
= gen_reg_rtx (Pmode
);
7792 /* Can return in any reg. */
7793 emit_insn (gen_aarch64_load_tp_hard (target
));
7797 /* On AAPCS systems, this is the "struct __va_list". */
7798 static GTY(()) tree va_list_type
;
7800 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
7801 Return the type to use as __builtin_va_list.
7803 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
7815 aarch64_build_builtin_va_list (void)
7818 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7820 /* Create the type. */
7821 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
7822 /* Give it the required name. */
7823 va_list_name
= build_decl (BUILTINS_LOCATION
,
7825 get_identifier ("__va_list"),
7827 DECL_ARTIFICIAL (va_list_name
) = 1;
7828 TYPE_NAME (va_list_type
) = va_list_name
;
7829 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
7831 /* Create the fields. */
7832 f_stack
= build_decl (BUILTINS_LOCATION
,
7833 FIELD_DECL
, get_identifier ("__stack"),
7835 f_grtop
= build_decl (BUILTINS_LOCATION
,
7836 FIELD_DECL
, get_identifier ("__gr_top"),
7838 f_vrtop
= build_decl (BUILTINS_LOCATION
,
7839 FIELD_DECL
, get_identifier ("__vr_top"),
7841 f_groff
= build_decl (BUILTINS_LOCATION
,
7842 FIELD_DECL
, get_identifier ("__gr_offs"),
7844 f_vroff
= build_decl (BUILTINS_LOCATION
,
7845 FIELD_DECL
, get_identifier ("__vr_offs"),
7848 DECL_ARTIFICIAL (f_stack
) = 1;
7849 DECL_ARTIFICIAL (f_grtop
) = 1;
7850 DECL_ARTIFICIAL (f_vrtop
) = 1;
7851 DECL_ARTIFICIAL (f_groff
) = 1;
7852 DECL_ARTIFICIAL (f_vroff
) = 1;
7854 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
7855 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
7856 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
7857 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
7858 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
7860 TYPE_FIELDS (va_list_type
) = f_stack
;
7861 DECL_CHAIN (f_stack
) = f_grtop
;
7862 DECL_CHAIN (f_grtop
) = f_vrtop
;
7863 DECL_CHAIN (f_vrtop
) = f_groff
;
7864 DECL_CHAIN (f_groff
) = f_vroff
;
7866 /* Compute its layout. */
7867 layout_type (va_list_type
);
7869 return va_list_type
;
7872 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
7874 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
7876 const CUMULATIVE_ARGS
*cum
;
7877 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7878 tree stack
, grtop
, vrtop
, groff
, vroff
;
7880 int gr_save_area_size
;
7881 int vr_save_area_size
;
7884 cum
= &crtl
->args
.info
;
7886 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
7888 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
7892 gcc_assert (cum
->aapcs_nvrn
== 0);
7893 vr_save_area_size
= 0;
7896 f_stack
= TYPE_FIELDS (va_list_type_node
);
7897 f_grtop
= DECL_CHAIN (f_stack
);
7898 f_vrtop
= DECL_CHAIN (f_grtop
);
7899 f_groff
= DECL_CHAIN (f_vrtop
);
7900 f_vroff
= DECL_CHAIN (f_groff
);
7902 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
7904 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
7906 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
7908 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
7910 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
7913 /* Emit code to initialize STACK, which points to the next varargs stack
7914 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
7915 by named arguments. STACK is 8-byte aligned. */
7916 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
7917 if (cum
->aapcs_stack_size
> 0)
7918 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
7919 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
7920 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7922 /* Emit code to initialize GRTOP, the top of the GR save area.
7923 virtual_incoming_args_rtx should have been 16 byte aligned. */
7924 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
7925 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
7926 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7928 /* Emit code to initialize VRTOP, the top of the VR save area.
7929 This address is gr_save_area_bytes below GRTOP, rounded
7930 down to the next 16-byte boundary. */
7931 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
7932 vr_offset
= AARCH64_ROUND_UP (gr_save_area_size
,
7933 STACK_BOUNDARY
/ BITS_PER_UNIT
);
7936 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
7937 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
7938 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7940 /* Emit code to initialize GROFF, the offset from GRTOP of the
7941 next GPR argument. */
7942 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
7943 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
7944 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7946 /* Likewise emit code to initialize VROFF, the offset from FTOP
7947 of the next VR argument. */
7948 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
7949 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
7950 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
7953 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
7956 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
7957 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
7961 bool is_ha
; /* is HFA or HVA. */
7962 bool dw_align
; /* double-word align. */
7963 machine_mode ag_mode
= VOIDmode
;
7967 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
7968 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
7969 HOST_WIDE_INT size
, rsize
, adjust
, align
;
7970 tree t
, u
, cond1
, cond2
;
7972 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
7974 type
= build_pointer_type (type
);
7976 mode
= TYPE_MODE (type
);
7978 f_stack
= TYPE_FIELDS (va_list_type_node
);
7979 f_grtop
= DECL_CHAIN (f_stack
);
7980 f_vrtop
= DECL_CHAIN (f_grtop
);
7981 f_groff
= DECL_CHAIN (f_vrtop
);
7982 f_vroff
= DECL_CHAIN (f_groff
);
7984 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
7985 f_stack
, NULL_TREE
);
7986 size
= int_size_in_bytes (type
);
7987 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
7991 if (aarch64_vfp_is_call_or_return_candidate (mode
,
7997 /* TYPE passed in fp/simd registers. */
7999 aarch64_err_no_fpadvsimd (mode
, "varargs");
8001 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
8002 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
8003 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
8004 unshare_expr (valist
), f_vroff
, NULL_TREE
);
8006 rsize
= nregs
* UNITS_PER_VREG
;
8010 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
8011 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
8013 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
8014 && size
< UNITS_PER_VREG
)
8016 adjust
= UNITS_PER_VREG
- size
;
8021 /* TYPE passed in general registers. */
8022 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
8023 unshare_expr (valist
), f_grtop
, NULL_TREE
);
8024 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
8025 unshare_expr (valist
), f_groff
, NULL_TREE
);
8026 rsize
= (size
+ UNITS_PER_WORD
- 1) & -UNITS_PER_WORD
;
8027 nregs
= rsize
/ UNITS_PER_WORD
;
8032 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
8033 && size
< UNITS_PER_WORD
)
8035 adjust
= UNITS_PER_WORD
- size
;
8039 /* Get a local temporary for the field value. */
8040 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
8042 /* Emit code to branch if off >= 0. */
8043 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
8044 build_int_cst (TREE_TYPE (off
), 0));
8045 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
8049 /* Emit: offs = (offs + 15) & -16. */
8050 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
8051 build_int_cst (TREE_TYPE (off
), 15));
8052 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
8053 build_int_cst (TREE_TYPE (off
), -16));
8054 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
8059 /* Update ap.__[g|v]r_offs */
8060 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
8061 build_int_cst (TREE_TYPE (off
), rsize
));
8062 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
8066 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
8068 /* [cond2] if (ap.__[g|v]r_offs > 0) */
8069 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
8070 build_int_cst (TREE_TYPE (f_off
), 0));
8071 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
8073 /* String up: make sure the assignment happens before the use. */
8074 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
8075 COND_EXPR_ELSE (cond1
) = t
;
8077 /* Prepare the trees handling the argument that is passed on the stack;
8078 the top level node will store in ON_STACK. */
8079 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
8082 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
8083 t
= fold_convert (intDI_type_node
, arg
);
8084 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
8085 build_int_cst (TREE_TYPE (t
), 15));
8086 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8087 build_int_cst (TREE_TYPE (t
), -16));
8088 t
= fold_convert (TREE_TYPE (arg
), t
);
8089 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
8093 /* Advance ap.__stack */
8094 t
= fold_convert (intDI_type_node
, arg
);
8095 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
8096 build_int_cst (TREE_TYPE (t
), size
+ 7));
8097 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
8098 build_int_cst (TREE_TYPE (t
), -8));
8099 t
= fold_convert (TREE_TYPE (arg
), t
);
8100 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
8101 /* String up roundup and advance. */
8103 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
8104 /* String up with arg */
8105 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
8106 /* Big-endianness related address adjustment. */
8107 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
8108 && size
< UNITS_PER_WORD
)
8110 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
8111 size_int (UNITS_PER_WORD
- size
));
8112 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
8115 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
8116 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
8118 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
8121 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
8122 build_int_cst (TREE_TYPE (off
), adjust
));
8124 t
= fold_convert (sizetype
, t
);
8125 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
8129 /* type ha; // treat as "struct {ftype field[n];}"
8130 ... [computing offs]
8131 for (i = 0; i <nregs; ++i, offs += 16)
8132 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
8135 tree tmp_ha
, field_t
, field_ptr_t
;
8137 /* Declare a local variable. */
8138 tmp_ha
= create_tmp_var_raw (type
, "ha");
8139 gimple_add_tmp_var (tmp_ha
);
8141 /* Establish the base type. */
8145 field_t
= float_type_node
;
8146 field_ptr_t
= float_ptr_type_node
;
8149 field_t
= double_type_node
;
8150 field_ptr_t
= double_ptr_type_node
;
8153 field_t
= long_double_type_node
;
8154 field_ptr_t
= long_double_ptr_type_node
;
8156 /* The half precision and quad precision are not fully supported yet. Enable
8157 the following code after the support is complete. Need to find the correct
8158 type node for __fp16 *. */
8161 field_t
= float_type_node
;
8162 field_ptr_t
= float_ptr_type_node
;
8168 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
8169 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
8170 field_ptr_t
= build_pointer_type (field_t
);
8177 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
8178 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
8180 t
= fold_convert (field_ptr_t
, addr
);
8181 t
= build2 (MODIFY_EXPR
, field_t
,
8182 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
8183 build1 (INDIRECT_REF
, field_t
, t
));
8185 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
8186 for (i
= 1; i
< nregs
; ++i
)
8188 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
8189 u
= fold_convert (field_ptr_t
, addr
);
8190 u
= build2 (MODIFY_EXPR
, field_t
,
8191 build2 (MEM_REF
, field_t
, tmp_ha
,
8192 build_int_cst (field_ptr_t
,
8194 int_size_in_bytes (field_t
)))),
8195 build1 (INDIRECT_REF
, field_t
, u
));
8196 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
8199 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
8200 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
8203 COND_EXPR_ELSE (cond2
) = t
;
8204 addr
= fold_convert (build_pointer_type (type
), cond1
);
8205 addr
= build_va_arg_indirect_ref (addr
);
8208 addr
= build_va_arg_indirect_ref (addr
);
8213 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
8216 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
8217 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
8220 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
8221 CUMULATIVE_ARGS local_cum
;
8222 int gr_saved
, vr_saved
;
8224 /* The caller has advanced CUM up to, but not beyond, the last named
8225 argument. Advance a local copy of CUM past the last "real" named
8226 argument, to find out how many registers are left over. */
8228 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
8230 /* Found out how many registers we need to save. */
8231 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
8232 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
8236 gcc_assert (local_cum
.aapcs_nvrn
== 0);
8246 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
8247 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
8248 - gr_saved
* UNITS_PER_WORD
);
8249 mem
= gen_frame_mem (BLKmode
, ptr
);
8250 set_mem_alias_set (mem
, get_varargs_alias_set ());
8252 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
8257 /* We can't use move_block_from_reg, because it will use
8258 the wrong mode, storing D regs only. */
8259 machine_mode mode
= TImode
;
8262 /* Set OFF to the offset from virtual_incoming_args_rtx of
8263 the first vector register. The VR save area lies below
8264 the GR one, and is aligned to 16 bytes. */
8265 off
= -AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
8266 STACK_BOUNDARY
/ BITS_PER_UNIT
);
8267 off
-= vr_saved
* UNITS_PER_VREG
;
8269 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
8273 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
8274 mem
= gen_frame_mem (mode
, ptr
);
8275 set_mem_alias_set (mem
, get_varargs_alias_set ());
8276 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
8277 off
+= UNITS_PER_VREG
;
8282 /* We don't save the size into *PRETEND_SIZE because we want to avoid
8283 any complication of having crtl->args.pretend_args_size changed. */
8284 cfun
->machine
->frame
.saved_varargs_size
8285 = (AARCH64_ROUND_UP (gr_saved
* UNITS_PER_WORD
,
8286 STACK_BOUNDARY
/ BITS_PER_UNIT
)
8287 + vr_saved
* UNITS_PER_VREG
);
8291 aarch64_conditional_register_usage (void)
8296 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
8299 call_used_regs
[i
] = 1;
8304 /* Walk down the type tree of TYPE counting consecutive base elements.
8305 If *MODEP is VOIDmode, then set it to the first valid floating point
8306 type. If a non-floating point type is found, or if a floating point
8307 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
8308 otherwise return the count in the sub-tree. */
8310 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
8315 switch (TREE_CODE (type
))
8318 mode
= TYPE_MODE (type
);
8319 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
8322 if (*modep
== VOIDmode
)
8331 mode
= TYPE_MODE (TREE_TYPE (type
));
8332 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
8335 if (*modep
== VOIDmode
)
8344 /* Use V2SImode and V4SImode as representatives of all 64-bit
8345 and 128-bit vector types. */
8346 size
= int_size_in_bytes (type
);
8359 if (*modep
== VOIDmode
)
8362 /* Vector modes are considered to be opaque: two vectors are
8363 equivalent for the purposes of being homogeneous aggregates
8364 if they are the same size. */
8373 tree index
= TYPE_DOMAIN (type
);
8375 /* Can't handle incomplete types nor sizes that are not
8377 if (!COMPLETE_TYPE_P (type
)
8378 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
8381 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
8384 || !TYPE_MAX_VALUE (index
)
8385 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
8386 || !TYPE_MIN_VALUE (index
)
8387 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
8391 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
8392 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
8394 /* There must be no padding. */
8395 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
8407 /* Can't handle incomplete types nor sizes that are not
8409 if (!COMPLETE_TYPE_P (type
)
8410 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
8413 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
8415 if (TREE_CODE (field
) != FIELD_DECL
)
8418 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
8424 /* There must be no padding. */
8425 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
8432 case QUAL_UNION_TYPE
:
8434 /* These aren't very interesting except in a degenerate case. */
8439 /* Can't handle incomplete types nor sizes that are not
8441 if (!COMPLETE_TYPE_P (type
)
8442 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
8445 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
8447 if (TREE_CODE (field
) != FIELD_DECL
)
8450 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
8453 count
= count
> sub_count
? count
: sub_count
;
8456 /* There must be no padding. */
8457 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
8470 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
8471 type as described in AAPCS64 \S 4.1.2.
8473 See the comment above aarch64_composite_type_p for the notes on MODE. */
8476 aarch64_short_vector_p (const_tree type
,
8479 HOST_WIDE_INT size
= -1;
8481 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
8482 size
= int_size_in_bytes (type
);
8483 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
8484 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8485 size
= GET_MODE_SIZE (mode
);
8487 return (size
== 8 || size
== 16);
8490 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
8491 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
8492 array types. The C99 floating-point complex types are also considered
8493 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
8494 types, which are GCC extensions and out of the scope of AAPCS64, are
8495 treated as composite types here as well.
8497 Note that MODE itself is not sufficient in determining whether a type
8498 is such a composite type or not. This is because
8499 stor-layout.c:compute_record_mode may have already changed the MODE
8500 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
8501 structure with only one field may have its MODE set to the mode of the
8502 field. Also an integer mode whose size matches the size of the
8503 RECORD_TYPE type may be used to substitute the original mode
8504 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
8505 solely relied on. */
8508 aarch64_composite_type_p (const_tree type
,
8511 if (aarch64_short_vector_p (type
, mode
))
8514 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
8518 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
8519 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
8525 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
8526 shall be passed or returned in simd/fp register(s) (providing these
8527 parameter passing registers are available).
8529 Upon successful return, *COUNT returns the number of needed registers,
8530 *BASE_MODE returns the mode of the individual register and when IS_HAF
8531 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
8532 floating-point aggregate or a homogeneous short-vector aggregate. */
8535 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
8537 machine_mode
*base_mode
,
8541 machine_mode new_mode
= VOIDmode
;
8542 bool composite_p
= aarch64_composite_type_p (type
, mode
);
8544 if (is_ha
!= NULL
) *is_ha
= false;
8546 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
8547 || aarch64_short_vector_p (type
, mode
))
8552 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
8554 if (is_ha
!= NULL
) *is_ha
= true;
8556 new_mode
= GET_MODE_INNER (mode
);
8558 else if (type
&& composite_p
)
8560 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
8562 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
8564 if (is_ha
!= NULL
) *is_ha
= true;
8573 *base_mode
= new_mode
;
8577 /* Implement TARGET_STRUCT_VALUE_RTX. */
8580 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
8581 int incoming ATTRIBUTE_UNUSED
)
8583 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
8586 /* Implements target hook vector_mode_supported_p. */
8588 aarch64_vector_mode_supported_p (machine_mode mode
)
8591 && (mode
== V4SImode
|| mode
== V8HImode
8592 || mode
== V16QImode
|| mode
== V2DImode
8593 || mode
== V2SImode
|| mode
== V4HImode
8594 || mode
== V8QImode
|| mode
== V2SFmode
8595 || mode
== V4SFmode
|| mode
== V2DFmode
8596 || mode
== V1DFmode
))
8602 /* Return appropriate SIMD container
8603 for MODE within a vector of WIDTH bits. */
8605 aarch64_simd_container_mode (machine_mode mode
, unsigned width
)
8607 gcc_assert (width
== 64 || width
== 128);
8646 /* Return 128-bit container as the preferred SIMD mode for MODE. */
8648 aarch64_preferred_simd_mode (machine_mode mode
)
8650 return aarch64_simd_container_mode (mode
, 128);
8653 /* Return the bitmask of possible vector sizes for the vectorizer
8656 aarch64_autovectorize_vector_sizes (void)
8661 /* Implement TARGET_MANGLE_TYPE. */
8664 aarch64_mangle_type (const_tree type
)
8666 /* The AArch64 ABI documents say that "__va_list" has to be
8667 managled as if it is in the "std" namespace. */
8668 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
8669 return "St9__va_list";
8671 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
8673 if (TYPE_NAME (type
) != NULL
)
8674 return aarch64_mangle_builtin_type (type
);
8676 /* Use the default mangling. */
8681 /* Return true if the rtx_insn contains a MEM RTX somewhere
8685 has_memory_op (rtx_insn
*mem_insn
)
8687 subrtx_iterator::array_type array
;
8688 FOR_EACH_SUBRTX (iter
, array
, PATTERN (mem_insn
), ALL
)
8695 /* Find the first rtx_insn before insn that will generate an assembly
8699 aarch64_prev_real_insn (rtx_insn
*insn
)
8706 insn
= prev_real_insn (insn
);
8708 while (insn
&& recog_memoized (insn
) < 0);
8714 is_madd_op (enum attr_type t1
)
8717 /* A number of these may be AArch32 only. */
8718 enum attr_type mlatypes
[] = {
8719 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
8720 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
8721 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
8724 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
8726 if (t1
== mlatypes
[i
])
8733 /* Check if there is a register dependency between a load and the insn
8734 for which we hold recog_data. */
8737 dep_between_memop_and_curr (rtx memop
)
8742 gcc_assert (GET_CODE (memop
) == SET
);
8744 if (!REG_P (SET_DEST (memop
)))
8747 load_reg
= SET_DEST (memop
);
8748 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
8750 rtx operand
= recog_data
.operand
[opno
];
8752 && reg_overlap_mentioned_p (load_reg
, operand
))
8760 /* When working around the Cortex-A53 erratum 835769,
8761 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
8762 instruction and has a preceding memory instruction such that a NOP
8763 should be inserted between them. */
8766 aarch64_madd_needs_nop (rtx_insn
* insn
)
8768 enum attr_type attr_type
;
8772 if (!aarch64_fix_a53_err835769
)
8775 if (recog_memoized (insn
) < 0)
8778 attr_type
= get_attr_type (insn
);
8779 if (!is_madd_op (attr_type
))
8782 prev
= aarch64_prev_real_insn (insn
);
8783 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
8784 Restore recog state to INSN to avoid state corruption. */
8785 extract_constrain_insn_cached (insn
);
8787 if (!prev
|| !has_memory_op (prev
))
8790 body
= single_set (prev
);
8792 /* If the previous insn is a memory op and there is no dependency between
8793 it and the DImode madd, emit a NOP between them. If body is NULL then we
8794 have a complex memory operation, probably a load/store pair.
8795 Be conservative for now and emit a NOP. */
8796 if (GET_MODE (recog_data
.operand
[0]) == DImode
8797 && (!body
|| !dep_between_memop_and_curr (body
)))
8805 /* Implement FINAL_PRESCAN_INSN. */
8808 aarch64_final_prescan_insn (rtx_insn
*insn
)
8810 if (aarch64_madd_needs_nop (insn
))
8811 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
8815 /* Return the equivalent letter for size. */
8817 sizetochar (int size
)
8821 case 64: return 'd';
8822 case 32: return 's';
8823 case 16: return 'h';
8824 case 8 : return 'b';
8825 default: gcc_unreachable ();
8829 /* Return true iff x is a uniform vector of floating-point
8830 constants, and the constant can be represented in
8831 quarter-precision form. Note, as aarch64_float_const_representable
8832 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
8834 aarch64_vect_float_const_representable_p (rtx x
)
8837 REAL_VALUE_TYPE r0
, ri
;
8840 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
8843 x0
= CONST_VECTOR_ELT (x
, 0);
8844 if (!CONST_DOUBLE_P (x0
))
8847 REAL_VALUE_FROM_CONST_DOUBLE (r0
, x0
);
8849 for (i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
8851 xi
= CONST_VECTOR_ELT (x
, i
);
8852 if (!CONST_DOUBLE_P (xi
))
8855 REAL_VALUE_FROM_CONST_DOUBLE (ri
, xi
);
8856 if (!REAL_VALUES_EQUAL (r0
, ri
))
8860 return aarch64_float_const_representable_p (x0
);
8863 /* Return true for valid and false for invalid. */
8865 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
8866 struct simd_immediate_info
*info
)
8868 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
8870 for (i = 0; i < idx; i += (STRIDE)) \
8875 immtype = (CLASS); \
8876 elsize = (ELSIZE); \
8882 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
8883 unsigned int innersize
= GET_MODE_SIZE (GET_MODE_INNER (mode
));
8884 unsigned char bytes
[16];
8885 int immtype
= -1, matches
;
8886 unsigned int invmask
= inverse
? 0xff : 0;
8889 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
8891 if (! (aarch64_simd_imm_zero_p (op
, mode
)
8892 || aarch64_vect_float_const_representable_p (op
)))
8897 info
->value
= CONST_VECTOR_ELT (op
, 0);
8898 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
8906 /* Splat vector constant out into a byte vector. */
8907 for (i
= 0; i
< n_elts
; i
++)
8909 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
8910 it must be laid out in the vector register in reverse order. */
8911 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
8912 unsigned HOST_WIDE_INT elpart
;
8913 unsigned int part
, parts
;
8915 if (CONST_INT_P (el
))
8917 elpart
= INTVAL (el
);
8920 else if (GET_CODE (el
) == CONST_DOUBLE
)
8922 elpart
= CONST_DOUBLE_LOW (el
);
8928 for (part
= 0; part
< parts
; part
++)
8931 for (byte
= 0; byte
< innersize
; byte
++)
8933 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
8934 elpart
>>= BITS_PER_UNIT
;
8936 if (GET_CODE (el
) == CONST_DOUBLE
)
8937 elpart
= CONST_DOUBLE_HIGH (el
);
8942 gcc_assert (idx
== GET_MODE_SIZE (mode
));
8946 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
8947 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
8949 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8950 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8952 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8953 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8955 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8956 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
8958 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
8960 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
8962 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
8963 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
8965 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8966 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8968 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8969 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8971 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8972 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
8974 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
8976 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
8978 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
8979 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
8981 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
8982 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
8984 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
8985 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
8987 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
8988 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
8990 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
8992 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
8993 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
9002 info
->element_width
= elsize
;
9003 info
->mvn
= emvn
!= 0;
9004 info
->shift
= eshift
;
9006 unsigned HOST_WIDE_INT imm
= 0;
9008 if (immtype
>= 12 && immtype
<= 15)
9011 /* Un-invert bytes of recognized vector, if necessary. */
9013 for (i
= 0; i
< idx
; i
++)
9014 bytes
[i
] ^= invmask
;
9018 /* FIXME: Broken on 32-bit H_W_I hosts. */
9019 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
9021 for (i
= 0; i
< 8; i
++)
9022 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
9023 << (i
* BITS_PER_UNIT
);
9026 info
->value
= GEN_INT (imm
);
9030 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
9031 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
9033 /* Construct 'abcdefgh' because the assembler cannot handle
9034 generic constants. */
9037 imm
= (imm
>> info
->shift
) & 0xff;
9038 info
->value
= GEN_INT (imm
);
9046 /* Check of immediate shift constants are within range. */
9048 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
9050 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
9052 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
9054 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
9057 /* Return true if X is a uniform vector where all elements
9058 are either the floating-point constant 0.0 or the
9059 integer constant 0. */
9061 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
9063 return x
== CONST0_RTX (mode
);
9067 aarch64_simd_imm_scalar_p (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
)
9069 HOST_WIDE_INT imm
= INTVAL (x
);
9072 for (i
= 0; i
< 8; i
++)
9074 unsigned int byte
= imm
& 0xff;
9075 if (byte
!= 0xff && byte
!= 0)
9084 aarch64_mov_operand_p (rtx x
,
9085 enum aarch64_symbol_context context
,
9088 if (GET_CODE (x
) == HIGH
9089 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
9092 if (CONST_INT_P (x
))
9095 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
9098 return aarch64_classify_symbolic_expression (x
, context
)
9099 == SYMBOL_TINY_ABSOLUTE
;
9102 /* Return a const_int vector of VAL. */
9104 aarch64_simd_gen_const_vector_dup (machine_mode mode
, int val
)
9106 int nunits
= GET_MODE_NUNITS (mode
);
9107 rtvec v
= rtvec_alloc (nunits
);
9110 for (i
=0; i
< nunits
; i
++)
9111 RTVEC_ELT (v
, i
) = GEN_INT (val
);
9113 return gen_rtx_CONST_VECTOR (mode
, v
);
9116 /* Check OP is a legal scalar immediate for the MOVI instruction. */
9119 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, machine_mode mode
)
9123 gcc_assert (!VECTOR_MODE_P (mode
));
9124 vmode
= aarch64_preferred_simd_mode (mode
);
9125 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
9126 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
9129 /* Construct and return a PARALLEL RTX vector with elements numbering the
9130 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
9131 the vector - from the perspective of the architecture. This does not
9132 line up with GCC's perspective on lane numbers, so we end up with
9133 different masks depending on our target endian-ness. The diagram
9134 below may help. We must draw the distinction when building masks
9135 which select one half of the vector. An instruction selecting
9136 architectural low-lanes for a big-endian target, must be described using
9137 a mask selecting GCC high-lanes.
9139 Big-Endian Little-Endian
9142 | x | x | x | x | | x | x | x | x |
9143 Architecture 3 2 1 0 3 2 1 0
9145 Low Mask: { 2, 3 } { 0, 1 }
9146 High Mask: { 0, 1 } { 2, 3 }
9150 aarch64_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
9152 int nunits
= GET_MODE_NUNITS (mode
);
9153 rtvec v
= rtvec_alloc (nunits
/ 2);
9154 int high_base
= nunits
/ 2;
9160 if (BYTES_BIG_ENDIAN
)
9161 base
= high
? low_base
: high_base
;
9163 base
= high
? high_base
: low_base
;
9165 for (i
= 0; i
< nunits
/ 2; i
++)
9166 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
9168 t1
= gen_rtx_PARALLEL (mode
, v
);
9172 /* Check OP for validity as a PARALLEL RTX vector with elements
9173 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
9174 from the perspective of the architecture. See the diagram above
9175 aarch64_simd_vect_par_cnst_half for more details. */
9178 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
9181 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
9182 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
9183 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
9186 if (!VECTOR_MODE_P (mode
))
9189 if (count_op
!= count_ideal
)
9192 for (i
= 0; i
< count_ideal
; i
++)
9194 rtx elt_op
= XVECEXP (op
, 0, i
);
9195 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
9197 if (!CONST_INT_P (elt_op
)
9198 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
9204 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
9205 HIGH (exclusive). */
9207 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
9211 gcc_assert (CONST_INT_P (operand
));
9212 lane
= INTVAL (operand
);
9214 if (lane
< low
|| lane
>= high
)
9217 error ("%Klane %wd out of range %wd - %wd", exp
, lane
, low
, high
- 1);
9219 error ("lane %wd out of range %wd - %wd", lane
, low
, high
- 1);
9223 /* Return TRUE if OP is a valid vector addressing mode. */
9225 aarch64_simd_mem_operand_p (rtx op
)
9227 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
9228 || REG_P (XEXP (op
, 0)));
9231 /* Emit a register copy from operand to operand, taking care not to
9232 early-clobber source registers in the process.
9234 COUNT is the number of components into which the copy needs to be
9237 aarch64_simd_emit_reg_reg_move (rtx
*operands
, enum machine_mode mode
,
9241 int rdest
= REGNO (operands
[0]);
9242 int rsrc
= REGNO (operands
[1]);
9244 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
9246 for (i
= 0; i
< count
; i
++)
9247 emit_move_insn (gen_rtx_REG (mode
, rdest
+ i
),
9248 gen_rtx_REG (mode
, rsrc
+ i
));
9250 for (i
= 0; i
< count
; i
++)
9251 emit_move_insn (gen_rtx_REG (mode
, rdest
+ count
- i
- 1),
9252 gen_rtx_REG (mode
, rsrc
+ count
- i
- 1));
9255 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
9256 one of VSTRUCT modes: OI, CI or XI. */
9258 aarch64_simd_attr_length_move (rtx_insn
*insn
)
9262 extract_insn_cached (insn
);
9264 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
9266 mode
= GET_MODE (recog_data
.operand
[0]);
9282 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
9283 one of VSTRUCT modes: OI, CI, EI, or XI. */
9285 aarch64_simd_attr_length_rglist (enum machine_mode mode
)
9287 return (GET_MODE_SIZE (mode
) / UNITS_PER_VREG
) * 4;
9290 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
9291 alignment of a vector to 128 bits. */
9292 static HOST_WIDE_INT
9293 aarch64_simd_vector_alignment (const_tree type
)
9295 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
9296 return MIN (align
, 128);
9299 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
9301 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
9306 /* We guarantee alignment for vectors up to 128-bits. */
9307 if (tree_int_cst_compare (TYPE_SIZE (type
),
9308 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
9311 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
9315 /* If VALS is a vector constant that can be loaded into a register
9316 using DUP, generate instructions to do so and return an RTX to
9317 assign to the register. Otherwise return NULL_RTX. */
9319 aarch64_simd_dup_constant (rtx vals
)
9321 machine_mode mode
= GET_MODE (vals
);
9322 machine_mode inner_mode
= GET_MODE_INNER (mode
);
9323 int n_elts
= GET_MODE_NUNITS (mode
);
9324 bool all_same
= true;
9328 if (GET_CODE (vals
) != CONST_VECTOR
)
9331 for (i
= 1; i
< n_elts
; ++i
)
9333 x
= CONST_VECTOR_ELT (vals
, i
);
9334 if (!rtx_equal_p (x
, CONST_VECTOR_ELT (vals
, 0)))
9341 /* We can load this constant by using DUP and a constant in a
9342 single ARM register. This will be cheaper than a vector
9344 x
= copy_to_mode_reg (inner_mode
, CONST_VECTOR_ELT (vals
, 0));
9345 return gen_rtx_VEC_DUPLICATE (mode
, x
);
9349 /* Generate code to load VALS, which is a PARALLEL containing only
9350 constants (for vec_init) or CONST_VECTOR, efficiently into a
9351 register. Returns an RTX to copy into the register, or NULL_RTX
9352 for a PARALLEL that can not be converted into a CONST_VECTOR. */
9354 aarch64_simd_make_constant (rtx vals
)
9356 machine_mode mode
= GET_MODE (vals
);
9358 rtx const_vec
= NULL_RTX
;
9359 int n_elts
= GET_MODE_NUNITS (mode
);
9363 if (GET_CODE (vals
) == CONST_VECTOR
)
9365 else if (GET_CODE (vals
) == PARALLEL
)
9367 /* A CONST_VECTOR must contain only CONST_INTs and
9368 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
9369 Only store valid constants in a CONST_VECTOR. */
9370 for (i
= 0; i
< n_elts
; ++i
)
9372 rtx x
= XVECEXP (vals
, 0, i
);
9373 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
9376 if (n_const
== n_elts
)
9377 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
9382 if (const_vec
!= NULL_RTX
9383 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
9384 /* Load using MOVI/MVNI. */
9386 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
9387 /* Loaded using DUP. */
9389 else if (const_vec
!= NULL_RTX
)
9390 /* Load from constant pool. We can not take advantage of single-cycle
9391 LD1 because we need a PC-relative addressing mode. */
9394 /* A PARALLEL containing something not valid inside CONST_VECTOR.
9395 We can not construct an initializer. */
9400 aarch64_expand_vector_init (rtx target
, rtx vals
)
9402 machine_mode mode
= GET_MODE (target
);
9403 machine_mode inner_mode
= GET_MODE_INNER (mode
);
9404 int n_elts
= GET_MODE_NUNITS (mode
);
9406 rtx any_const
= NULL_RTX
;
9407 bool all_same
= true;
9409 for (int i
= 0; i
< n_elts
; ++i
)
9411 rtx x
= XVECEXP (vals
, 0, i
);
9412 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
9417 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
9423 rtx constant
= aarch64_simd_make_constant (vals
);
9424 if (constant
!= NULL_RTX
)
9426 emit_move_insn (target
, constant
);
9431 /* Splat a single non-constant element if we can. */
9434 rtx x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
9435 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
9439 /* Half the fields (or less) are non-constant. Load constant then overwrite
9440 varying fields. Hope that this is more efficient than using the stack. */
9441 if (n_var
<= n_elts
/2)
9443 rtx copy
= copy_rtx (vals
);
9445 /* Load constant part of vector. We really don't care what goes into the
9446 parts we will overwrite, but we're more likely to be able to load the
9447 constant efficiently if it has fewer, larger, repeating parts
9448 (see aarch64_simd_valid_immediate). */
9449 for (int i
= 0; i
< n_elts
; i
++)
9451 rtx x
= XVECEXP (vals
, 0, i
);
9452 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
9454 rtx subst
= any_const
;
9455 for (int bit
= n_elts
/ 2; bit
> 0; bit
/= 2)
9457 /* Look in the copied vector, as more elements are const. */
9458 rtx test
= XVECEXP (copy
, 0, i
^ bit
);
9459 if (CONST_INT_P (test
) || CONST_DOUBLE_P (test
))
9465 XVECEXP (copy
, 0, i
) = subst
;
9467 aarch64_expand_vector_init (target
, copy
);
9469 /* Insert variables. */
9470 enum insn_code icode
= optab_handler (vec_set_optab
, mode
);
9471 gcc_assert (icode
!= CODE_FOR_nothing
);
9473 for (int i
= 0; i
< n_elts
; i
++)
9475 rtx x
= XVECEXP (vals
, 0, i
);
9476 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
9478 x
= copy_to_mode_reg (inner_mode
, x
);
9479 emit_insn (GEN_FCN (icode
) (target
, x
, GEN_INT (i
)));
9484 /* Construct the vector in memory one field at a time
9485 and load the whole vector. */
9486 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
9487 for (int i
= 0; i
< n_elts
; i
++)
9488 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
9489 i
* GET_MODE_SIZE (inner_mode
)),
9490 XVECEXP (vals
, 0, i
));
9491 emit_move_insn (target
, mem
);
9495 static unsigned HOST_WIDE_INT
9496 aarch64_shift_truncation_mask (machine_mode mode
)
9499 (aarch64_vector_mode_supported_p (mode
)
9500 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
9503 #ifndef TLS_SECTION_ASM_FLAG
9504 #define TLS_SECTION_ASM_FLAG 'T'
9508 aarch64_elf_asm_named_section (const char *name
, unsigned int flags
,
9509 tree decl ATTRIBUTE_UNUSED
)
9511 char flagchars
[10], *f
= flagchars
;
9513 /* If we have already declared this section, we can use an
9514 abbreviated form to switch back to it -- unless this section is
9515 part of a COMDAT groups, in which case GAS requires the full
9516 declaration every time. */
9517 if (!(HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
9518 && (flags
& SECTION_DECLARED
))
9520 fprintf (asm_out_file
, "\t.section\t%s\n", name
);
9524 if (!(flags
& SECTION_DEBUG
))
9526 if (flags
& SECTION_WRITE
)
9528 if (flags
& SECTION_CODE
)
9530 if (flags
& SECTION_SMALL
)
9532 if (flags
& SECTION_MERGE
)
9534 if (flags
& SECTION_STRINGS
)
9536 if (flags
& SECTION_TLS
)
9537 *f
++ = TLS_SECTION_ASM_FLAG
;
9538 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
9542 fprintf (asm_out_file
, "\t.section\t%s,\"%s\"", name
, flagchars
);
9544 if (!(flags
& SECTION_NOTYPE
))
9549 if (flags
& SECTION_BSS
)
9554 #ifdef TYPE_OPERAND_FMT
9555 format
= "," TYPE_OPERAND_FMT
;
9560 fprintf (asm_out_file
, format
, type
);
9562 if (flags
& SECTION_ENTSIZE
)
9563 fprintf (asm_out_file
, ",%d", flags
& SECTION_ENTSIZE
);
9564 if (HAVE_COMDAT_GROUP
&& (flags
& SECTION_LINKONCE
))
9566 if (TREE_CODE (decl
) == IDENTIFIER_NODE
)
9567 fprintf (asm_out_file
, ",%s,comdat", IDENTIFIER_POINTER (decl
));
9569 fprintf (asm_out_file
, ",%s,comdat",
9570 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl
)));
9574 putc ('\n', asm_out_file
);
9577 /* Select a format to encode pointers in exception handling data. */
9579 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
9582 switch (aarch64_cmodel
)
9584 case AARCH64_CMODEL_TINY
:
9585 case AARCH64_CMODEL_TINY_PIC
:
9586 case AARCH64_CMODEL_SMALL
:
9587 case AARCH64_CMODEL_SMALL_PIC
:
9588 case AARCH64_CMODEL_SMALL_SPIC
:
9589 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
9591 type
= DW_EH_PE_sdata4
;
9594 /* No assumptions here. 8-byte relocs required. */
9595 type
= DW_EH_PE_sdata8
;
9598 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
9601 /* Emit load exclusive. */
9604 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
9605 rtx mem
, rtx model_rtx
)
9607 rtx (*gen
) (rtx
, rtx
, rtx
);
9611 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
9612 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
9613 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
9614 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
9619 emit_insn (gen (rval
, mem
, model_rtx
));
9622 /* Emit store exclusive. */
9625 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
9626 rtx rval
, rtx mem
, rtx model_rtx
)
9628 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
9632 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
9633 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
9634 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
9635 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
9640 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
9643 /* Mark the previous jump instruction as unlikely. */
9646 aarch64_emit_unlikely_jump (rtx insn
)
9648 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
9650 insn
= emit_jump_insn (insn
);
9651 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
9654 /* Expand a compare and swap pattern. */
9657 aarch64_expand_compare_and_swap (rtx operands
[])
9659 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
9660 machine_mode mode
, cmp_mode
;
9661 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
9666 oldval
= operands
[3];
9667 newval
= operands
[4];
9668 is_weak
= operands
[5];
9669 mod_s
= operands
[6];
9670 mod_f
= operands
[7];
9671 mode
= GET_MODE (mem
);
9674 /* Normally the succ memory model must be stronger than fail, but in the
9675 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
9676 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
9678 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
9679 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
9680 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
9686 /* For short modes, we're going to perform the comparison in SImode,
9687 so do the zero-extension now. */
9689 rval
= gen_reg_rtx (SImode
);
9690 oldval
= convert_modes (SImode
, mode
, oldval
, true);
9695 /* Force the value into a register if needed. */
9696 if (!aarch64_plus_operand (oldval
, mode
))
9697 oldval
= force_reg (cmp_mode
, oldval
);
9706 case QImode
: gen
= gen_atomic_compare_and_swapqi_1
; break;
9707 case HImode
: gen
= gen_atomic_compare_and_swaphi_1
; break;
9708 case SImode
: gen
= gen_atomic_compare_and_swapsi_1
; break;
9709 case DImode
: gen
= gen_atomic_compare_and_swapdi_1
; break;
9714 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
9716 if (mode
== QImode
|| mode
== HImode
)
9717 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
9719 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9720 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
9721 emit_insn (gen_rtx_SET (bval
, x
));
9724 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
9725 sequence implementing an atomic operation. */
9728 aarch64_emit_post_barrier (enum memmodel model
)
9730 const enum memmodel base_model
= memmodel_base (model
);
9732 if (is_mm_sync (model
)
9733 && (base_model
== MEMMODEL_ACQUIRE
9734 || base_model
== MEMMODEL_ACQ_REL
9735 || base_model
== MEMMODEL_SEQ_CST
))
9737 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST
)));
9741 /* Split a compare and swap pattern. */
9744 aarch64_split_compare_and_swap (rtx operands
[])
9746 rtx rval
, mem
, oldval
, newval
, scratch
;
9749 rtx_code_label
*label1
, *label2
;
9751 enum memmodel model
;
9756 oldval
= operands
[2];
9757 newval
= operands
[3];
9758 is_weak
= (operands
[4] != const0_rtx
);
9759 model_rtx
= operands
[5];
9760 scratch
= operands
[7];
9761 mode
= GET_MODE (mem
);
9762 model
= memmodel_from_int (INTVAL (model_rtx
));
9767 label1
= gen_label_rtx ();
9768 emit_label (label1
);
9770 label2
= gen_label_rtx ();
9772 /* The initial load can be relaxed for a __sync operation since a final
9773 barrier will be emitted to stop code hoisting. */
9774 if (is_mm_sync (model
))
9775 aarch64_emit_load_exclusive (mode
, rval
, mem
,
9776 GEN_INT (MEMMODEL_RELAXED
));
9778 aarch64_emit_load_exclusive (mode
, rval
, mem
, model_rtx
);
9780 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
9781 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9782 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9783 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
9784 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
9786 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, model_rtx
);
9790 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
9791 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9792 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
9793 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
9797 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
9798 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
9799 emit_insn (gen_rtx_SET (cond
, x
));
9802 emit_label (label2
);
9804 /* Emit any final barrier needed for a __sync operation. */
9805 if (is_mm_sync (model
))
9806 aarch64_emit_post_barrier (model
);
9809 /* Split an atomic operation. */
9812 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
9813 rtx value
, rtx model_rtx
, rtx cond
)
9815 machine_mode mode
= GET_MODE (mem
);
9816 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
9817 const enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
9818 const bool is_sync
= is_mm_sync (model
);
9819 rtx_code_label
*label
;
9822 label
= gen_label_rtx ();
9826 new_out
= gen_lowpart (wmode
, new_out
);
9828 old_out
= gen_lowpart (wmode
, old_out
);
9831 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
9833 /* The initial load can be relaxed for a __sync operation since a final
9834 barrier will be emitted to stop code hoisting. */
9836 aarch64_emit_load_exclusive (mode
, old_out
, mem
,
9837 GEN_INT (MEMMODEL_RELAXED
));
9839 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
9848 x
= gen_rtx_AND (wmode
, old_out
, value
);
9849 emit_insn (gen_rtx_SET (new_out
, x
));
9850 x
= gen_rtx_NOT (wmode
, new_out
);
9851 emit_insn (gen_rtx_SET (new_out
, x
));
9855 if (CONST_INT_P (value
))
9857 value
= GEN_INT (-INTVAL (value
));
9863 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
9864 emit_insn (gen_rtx_SET (new_out
, x
));
9868 aarch64_emit_store_exclusive (mode
, cond
, mem
,
9869 gen_lowpart (mode
, new_out
), model_rtx
);
9871 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
9872 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
9873 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
9874 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
9876 /* Emit any final barrier needed for a __sync operation. */
9878 aarch64_emit_post_barrier (model
);
9882 aarch64_print_extension (void)
9884 const struct aarch64_option_extension
*opt
= NULL
;
9886 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
9887 if ((aarch64_isa_flags
& opt
->flags_on
) == opt
->flags_on
)
9888 asm_fprintf (asm_out_file
, "+%s", opt
->name
);
9890 asm_fprintf (asm_out_file
, "\n");
9894 aarch64_start_file (void)
9898 asm_fprintf (asm_out_file
, "\t.arch %s", selected_arch
->name
);
9899 aarch64_print_extension ();
9901 else if (selected_cpu
)
9903 const char *truncated_name
9904 = aarch64_rewrite_selected_cpu (selected_cpu
->name
);
9905 asm_fprintf (asm_out_file
, "\t.cpu %s", truncated_name
);
9906 aarch64_print_extension ();
9908 default_file_start();
9911 /* Target hook for c_mode_for_suffix. */
9913 aarch64_c_mode_for_suffix (char suffix
)
9921 /* We can only represent floating point constants which will fit in
9922 "quarter-precision" values. These values are characterised by
9923 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
9926 (-1)^s * (n/16) * 2^r
9929 's' is the sign bit.
9930 'n' is an integer in the range 16 <= n <= 31.
9931 'r' is an integer in the range -3 <= r <= 4. */
9933 /* Return true iff X can be represented by a quarter-precision
9934 floating point immediate operand X. Note, we cannot represent 0.0. */
9936 aarch64_float_const_representable_p (rtx x
)
9938 /* This represents our current view of how many bits
9939 make up the mantissa. */
9940 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
9942 unsigned HOST_WIDE_INT mantissa
, mask
;
9943 REAL_VALUE_TYPE r
, m
;
9946 if (!CONST_DOUBLE_P (x
))
9949 if (GET_MODE (x
) == VOIDmode
)
9952 REAL_VALUE_FROM_CONST_DOUBLE (r
, x
);
9954 /* We cannot represent infinities, NaNs or +/-zero. We won't
9955 know if we have +zero until we analyse the mantissa, but we
9956 can reject the other invalid values. */
9957 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
9958 || REAL_VALUE_MINUS_ZERO (r
))
9961 /* Extract exponent. */
9962 r
= real_value_abs (&r
);
9963 exponent
= REAL_EXP (&r
);
9965 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
9966 highest (sign) bit, with a fixed binary point at bit point_pos.
9967 m1 holds the low part of the mantissa, m2 the high part.
9968 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
9969 bits for the mantissa, this can fail (low bits will be lost). */
9970 real_ldexp (&m
, &r
, point_pos
- exponent
);
9971 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
9973 /* If the low part of the mantissa has bits set we cannot represent
9977 /* We have rejected the lower HOST_WIDE_INT, so update our
9978 understanding of how many bits lie in the mantissa and
9979 look only at the high HOST_WIDE_INT. */
9980 mantissa
= w
.elt (1);
9981 point_pos
-= HOST_BITS_PER_WIDE_INT
;
9983 /* We can only represent values with a mantissa of the form 1.xxxx. */
9984 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
9985 if ((mantissa
& mask
) != 0)
9988 /* Having filtered unrepresentable values, we may now remove all
9989 but the highest 5 bits. */
9990 mantissa
>>= point_pos
- 5;
9992 /* We cannot represent the value 0.0, so reject it. This is handled
9997 /* Then, as bit 4 is always set, we can mask it off, leaving
9998 the mantissa in the range [0, 15]. */
9999 mantissa
&= ~(1 << 4);
10000 gcc_assert (mantissa
<= 15);
10002 /* GCC internally does not use IEEE754-like encoding (where normalized
10003 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
10004 Our mantissa values are shifted 4 places to the left relative to
10005 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
10006 by 5 places to correct for GCC's representation. */
10007 exponent
= 5 - exponent
;
10009 return (exponent
>= 0 && exponent
<= 7);
10013 aarch64_output_simd_mov_immediate (rtx const_vector
,
10018 static char templ
[40];
10019 const char *mnemonic
;
10020 const char *shift_op
;
10021 unsigned int lane_count
= 0;
10024 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
10026 /* This will return true to show const_vector is legal for use as either
10027 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
10028 also update INFO to show how the immediate should be generated. */
10029 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
10030 gcc_assert (is_valid
);
10032 element_char
= sizetochar (info
.element_width
);
10033 lane_count
= width
/ info
.element_width
;
10035 mode
= GET_MODE_INNER (mode
);
10036 if (mode
== SFmode
|| mode
== DFmode
)
10038 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
10039 if (aarch64_float_const_zero_rtx_p (info
.value
))
10040 info
.value
= GEN_INT (0);
10043 #define buf_size 20
10045 REAL_VALUE_FROM_CONST_DOUBLE (r
, info
.value
);
10046 char float_buf
[buf_size
] = {'\0'};
10047 real_to_decimal_for_mode (float_buf
, &r
, buf_size
, buf_size
, 1, mode
);
10050 if (lane_count
== 1)
10051 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
10053 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
10054 lane_count
, element_char
, float_buf
);
10059 mnemonic
= info
.mvn
? "mvni" : "movi";
10060 shift_op
= info
.msl
? "msl" : "lsl";
10062 if (lane_count
== 1)
10063 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
10064 mnemonic
, UINTVAL (info
.value
));
10065 else if (info
.shift
)
10066 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
10067 ", %s %d", mnemonic
, lane_count
, element_char
,
10068 UINTVAL (info
.value
), shift_op
, info
.shift
);
10070 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
10071 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
10076 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
10079 machine_mode vmode
;
10081 gcc_assert (!VECTOR_MODE_P (mode
));
10082 vmode
= aarch64_simd_container_mode (mode
, 64);
10083 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
10084 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
10087 /* Split operands into moves from op[1] + op[2] into op[0]. */
10090 aarch64_split_combinev16qi (rtx operands
[3])
10092 unsigned int dest
= REGNO (operands
[0]);
10093 unsigned int src1
= REGNO (operands
[1]);
10094 unsigned int src2
= REGNO (operands
[2]);
10095 machine_mode halfmode
= GET_MODE (operands
[1]);
10096 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
10097 rtx destlo
, desthi
;
10099 gcc_assert (halfmode
== V16QImode
);
10101 if (src1
== dest
&& src2
== dest
+ halfregs
)
10103 /* No-op move. Can't split to nothing; emit something. */
10104 emit_note (NOTE_INSN_DELETED
);
10108 /* Preserve register attributes for variable tracking. */
10109 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
10110 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
10111 GET_MODE_SIZE (halfmode
));
10113 /* Special case of reversed high/low parts. */
10114 if (reg_overlap_mentioned_p (operands
[2], destlo
)
10115 && reg_overlap_mentioned_p (operands
[1], desthi
))
10117 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
10118 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
10119 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
10121 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
10123 /* Try to avoid unnecessary moves if part of the result
10124 is in the right place already. */
10126 emit_move_insn (destlo
, operands
[1]);
10127 if (src2
!= dest
+ halfregs
)
10128 emit_move_insn (desthi
, operands
[2]);
10132 if (src2
!= dest
+ halfregs
)
10133 emit_move_insn (desthi
, operands
[2]);
10135 emit_move_insn (destlo
, operands
[1]);
10139 /* vec_perm support. */
10141 #define MAX_VECT_LEN 16
10143 struct expand_vec_perm_d
10145 rtx target
, op0
, op1
;
10146 unsigned char perm
[MAX_VECT_LEN
];
10147 machine_mode vmode
;
10148 unsigned char nelt
;
10153 /* Generate a variable permutation. */
10156 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
10158 machine_mode vmode
= GET_MODE (target
);
10159 bool one_vector_p
= rtx_equal_p (op0
, op1
);
10161 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
10162 gcc_checking_assert (GET_MODE (op0
) == vmode
);
10163 gcc_checking_assert (GET_MODE (op1
) == vmode
);
10164 gcc_checking_assert (GET_MODE (sel
) == vmode
);
10165 gcc_checking_assert (TARGET_SIMD
);
10169 if (vmode
== V8QImode
)
10171 /* Expand the argument to a V16QI mode by duplicating it. */
10172 rtx pair
= gen_reg_rtx (V16QImode
);
10173 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
10174 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
10178 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
10185 if (vmode
== V8QImode
)
10187 pair
= gen_reg_rtx (V16QImode
);
10188 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
10189 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
10193 pair
= gen_reg_rtx (OImode
);
10194 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
10195 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
10201 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
10203 machine_mode vmode
= GET_MODE (target
);
10204 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
10205 bool one_vector_p
= rtx_equal_p (op0
, op1
);
10208 /* The TBL instruction does not use a modulo index, so we must take care
10209 of that ourselves. */
10210 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
10211 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
10212 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
10214 /* For big-endian, we also need to reverse the index within the vector
10215 (but not which vector). */
10216 if (BYTES_BIG_ENDIAN
)
10218 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
10220 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
10221 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
10222 NULL
, 0, OPTAB_LIB_WIDEN
);
10224 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
10227 /* Recognize patterns suitable for the TRN instructions. */
10229 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
10231 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
10232 rtx out
, in0
, in1
, x
;
10233 rtx (*gen
) (rtx
, rtx
, rtx
);
10234 machine_mode vmode
= d
->vmode
;
10236 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
10239 /* Note that these are little-endian tests.
10240 We correct for big-endian later. */
10241 if (d
->perm
[0] == 0)
10243 else if (d
->perm
[0] == 1)
10247 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
10249 for (i
= 0; i
< nelt
; i
+= 2)
10251 if (d
->perm
[i
] != i
+ odd
)
10253 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
10263 if (BYTES_BIG_ENDIAN
)
10265 x
= in0
, in0
= in1
, in1
= x
;
10274 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
10275 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
10276 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
10277 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
10278 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
10279 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
10280 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
10281 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
10282 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
10283 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
10292 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
10293 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
10294 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
10295 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
10296 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
10297 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
10298 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
10299 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
10300 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
10301 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
10307 emit_insn (gen (out
, in0
, in1
));
10311 /* Recognize patterns suitable for the UZP instructions. */
10313 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
10315 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
10316 rtx out
, in0
, in1
, x
;
10317 rtx (*gen
) (rtx
, rtx
, rtx
);
10318 machine_mode vmode
= d
->vmode
;
10320 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
10323 /* Note that these are little-endian tests.
10324 We correct for big-endian later. */
10325 if (d
->perm
[0] == 0)
10327 else if (d
->perm
[0] == 1)
10331 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
10333 for (i
= 0; i
< nelt
; i
++)
10335 unsigned elt
= (i
* 2 + odd
) & mask
;
10336 if (d
->perm
[i
] != elt
)
10346 if (BYTES_BIG_ENDIAN
)
10348 x
= in0
, in0
= in1
, in1
= x
;
10357 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
10358 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
10359 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
10360 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
10361 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
10362 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
10363 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
10364 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
10365 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
10366 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
10375 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
10376 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
10377 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
10378 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
10379 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
10380 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
10381 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
10382 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
10383 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
10384 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
10390 emit_insn (gen (out
, in0
, in1
));
10394 /* Recognize patterns suitable for the ZIP instructions. */
10396 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
10398 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
10399 rtx out
, in0
, in1
, x
;
10400 rtx (*gen
) (rtx
, rtx
, rtx
);
10401 machine_mode vmode
= d
->vmode
;
10403 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
10406 /* Note that these are little-endian tests.
10407 We correct for big-endian later. */
10409 if (d
->perm
[0] == high
)
10412 else if (d
->perm
[0] == 0)
10416 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
10418 for (i
= 0; i
< nelt
/ 2; i
++)
10420 unsigned elt
= (i
+ high
) & mask
;
10421 if (d
->perm
[i
* 2] != elt
)
10423 elt
= (elt
+ nelt
) & mask
;
10424 if (d
->perm
[i
* 2 + 1] != elt
)
10434 if (BYTES_BIG_ENDIAN
)
10436 x
= in0
, in0
= in1
, in1
= x
;
10445 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
10446 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
10447 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
10448 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
10449 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
10450 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
10451 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
10452 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
10453 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
10454 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
10463 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
10464 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
10465 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
10466 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
10467 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
10468 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
10469 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
10470 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
10471 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
10472 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
10478 emit_insn (gen (out
, in0
, in1
));
10482 /* Recognize patterns for the EXT insn. */
10485 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
10487 unsigned int i
, nelt
= d
->nelt
;
10488 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
10491 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
10493 /* Check if the extracted indices are increasing by one. */
10494 for (i
= 1; i
< nelt
; i
++)
10496 unsigned int required
= location
+ i
;
10497 if (d
->one_vector_p
)
10499 /* We'll pass the same vector in twice, so allow indices to wrap. */
10500 required
&= (nelt
- 1);
10502 if (d
->perm
[i
] != required
)
10508 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
10509 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
10510 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
10511 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
10512 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
10513 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
10514 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
10515 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
10516 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
10517 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
10526 /* The case where (location == 0) is a no-op for both big- and little-endian,
10527 and is removed by the mid-end at optimization levels -O1 and higher. */
10529 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
10531 /* After setup, we want the high elements of the first vector (stored
10532 at the LSB end of the register), and the low elements of the second
10533 vector (stored at the MSB end of the register). So swap. */
10534 std::swap (d
->op0
, d
->op1
);
10535 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
10536 location
= nelt
- location
;
10539 offset
= GEN_INT (location
);
10540 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
10544 /* Recognize patterns for the REV insns. */
10547 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
10549 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
10550 rtx (*gen
) (rtx
, rtx
);
10552 if (!d
->one_vector_p
)
10561 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
10562 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
10570 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
10571 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
10572 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
10573 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
10581 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
10582 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
10583 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
10584 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
10585 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
10586 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
10587 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
10588 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
10597 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
10598 for (j
= 0; j
<= diff
; j
+= 1)
10600 /* This is guaranteed to be true as the value of diff
10601 is 7, 3, 1 and we should have enough elements in the
10602 queue to generate this. Getting a vector mask with a
10603 value of diff other than these values implies that
10604 something is wrong by the time we get here. */
10605 gcc_assert (i
+ j
< nelt
);
10606 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
10614 emit_insn (gen (d
->target
, d
->op0
));
10619 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
10621 rtx (*gen
) (rtx
, rtx
, rtx
);
10622 rtx out
= d
->target
;
10624 machine_mode vmode
= d
->vmode
;
10625 unsigned int i
, elt
, nelt
= d
->nelt
;
10629 for (i
= 1; i
< nelt
; i
++)
10631 if (elt
!= d
->perm
[i
])
10635 /* The generic preparation in aarch64_expand_vec_perm_const_1
10636 swaps the operand order and the permute indices if it finds
10637 d->perm[0] to be in the second operand. Thus, we can always
10638 use d->op0 and need not do any extra arithmetic to get the
10639 correct lane number. */
10641 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
10645 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
10646 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
10647 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
10648 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
10649 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
10650 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
10651 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
10652 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
10653 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
10654 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
10659 emit_insn (gen (out
, in0
, lane
));
10664 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
10666 rtx rperm
[MAX_VECT_LEN
], sel
;
10667 machine_mode vmode
= d
->vmode
;
10668 unsigned int i
, nelt
= d
->nelt
;
10673 /* Generic code will try constant permutation twice. Once with the
10674 original mode and again with the elements lowered to QImode.
10675 So wait and don't do the selector expansion ourselves. */
10676 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
10679 for (i
= 0; i
< nelt
; ++i
)
10681 int nunits
= GET_MODE_NUNITS (vmode
);
10683 /* If big-endian and two vectors we end up with a weird mixed-endian
10684 mode on NEON. Reverse the index within each word but not the word
10686 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
10689 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
10690 sel
= force_reg (vmode
, sel
);
10692 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
10697 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
10699 /* The pattern matching functions above are written to look for a small
10700 number to begin the sequence (0, 1, N/2). If we begin with an index
10701 from the second operand, we can swap the operands. */
10702 if (d
->perm
[0] >= d
->nelt
)
10704 unsigned i
, nelt
= d
->nelt
;
10706 gcc_assert (nelt
== (nelt
& -nelt
));
10707 for (i
= 0; i
< nelt
; ++i
)
10708 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
10710 std::swap (d
->op0
, d
->op1
);
10715 if (aarch64_evpc_rev (d
))
10717 else if (aarch64_evpc_ext (d
))
10719 else if (aarch64_evpc_dup (d
))
10721 else if (aarch64_evpc_zip (d
))
10723 else if (aarch64_evpc_uzp (d
))
10725 else if (aarch64_evpc_trn (d
))
10727 return aarch64_evpc_tbl (d
);
10732 /* Expand a vec_perm_const pattern. */
10735 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
10737 struct expand_vec_perm_d d
;
10738 int i
, nelt
, which
;
10744 d
.vmode
= GET_MODE (target
);
10745 gcc_assert (VECTOR_MODE_P (d
.vmode
));
10746 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10747 d
.testing_p
= false;
10749 for (i
= which
= 0; i
< nelt
; ++i
)
10751 rtx e
= XVECEXP (sel
, 0, i
);
10752 int ei
= INTVAL (e
) & (2 * nelt
- 1);
10753 which
|= (ei
< nelt
? 1 : 2);
10760 gcc_unreachable ();
10763 d
.one_vector_p
= false;
10764 if (!rtx_equal_p (op0
, op1
))
10767 /* The elements of PERM do not suggest that only the first operand
10768 is used, but both operands are identical. Allow easier matching
10769 of the permutation by folding the permutation into the single
10771 /* Fall Through. */
10773 for (i
= 0; i
< nelt
; ++i
)
10774 d
.perm
[i
] &= nelt
- 1;
10776 d
.one_vector_p
= true;
10781 d
.one_vector_p
= true;
10785 return aarch64_expand_vec_perm_const_1 (&d
);
10789 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
10790 const unsigned char *sel
)
10792 struct expand_vec_perm_d d
;
10793 unsigned int i
, nelt
, which
;
10797 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
10798 d
.testing_p
= true;
10799 memcpy (d
.perm
, sel
, nelt
);
10801 /* Calculate whether all elements are in one vector. */
10802 for (i
= which
= 0; i
< nelt
; ++i
)
10804 unsigned char e
= d
.perm
[i
];
10805 gcc_assert (e
< 2 * nelt
);
10806 which
|= (e
< nelt
? 1 : 2);
10809 /* If all elements are from the second vector, reindex as if from the
10812 for (i
= 0; i
< nelt
; ++i
)
10815 /* Check whether the mask can be applied to a single vector. */
10816 d
.one_vector_p
= (which
!= 3);
10818 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
10819 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
10820 if (!d
.one_vector_p
)
10821 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
10824 ret
= aarch64_expand_vec_perm_const_1 (&d
);
10831 aarch64_reverse_mask (enum machine_mode mode
)
10833 /* We have to reverse each vector because we dont have
10834 a permuted load that can reverse-load according to ABI rules. */
10836 rtvec v
= rtvec_alloc (16);
10838 int nunits
= GET_MODE_NUNITS (mode
);
10839 int usize
= GET_MODE_UNIT_SIZE (mode
);
10841 gcc_assert (BYTES_BIG_ENDIAN
);
10842 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode
));
10844 for (i
= 0; i
< nunits
; i
++)
10845 for (j
= 0; j
< usize
; j
++)
10846 RTVEC_ELT (v
, i
* usize
+ j
) = GEN_INT ((i
+ 1) * usize
- 1 - j
);
10847 mask
= gen_rtx_CONST_VECTOR (V16QImode
, v
);
10848 return force_reg (V16QImode
, mask
);
10851 /* Implement MODES_TIEABLE_P. */
10854 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10856 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
10859 /* We specifically want to allow elements of "structure" modes to
10860 be tieable to the structure. This more general condition allows
10861 other rarer situations too. */
10863 && aarch64_vector_mode_p (mode1
)
10864 && aarch64_vector_mode_p (mode2
))
10870 /* Return a new RTX holding the result of moving POINTER forward by
10874 aarch64_move_pointer (rtx pointer
, int amount
)
10876 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
10878 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
10882 /* Return a new RTX holding the result of moving POINTER forward by the
10883 size of the mode it points to. */
10886 aarch64_progress_pointer (rtx pointer
)
10888 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
10890 return aarch64_move_pointer (pointer
, amount
);
10893 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
10897 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
10900 rtx reg
= gen_reg_rtx (mode
);
10902 /* "Cast" the pointers to the correct mode. */
10903 *src
= adjust_address (*src
, mode
, 0);
10904 *dst
= adjust_address (*dst
, mode
, 0);
10905 /* Emit the memcpy. */
10906 emit_move_insn (reg
, *src
);
10907 emit_move_insn (*dst
, reg
);
10908 /* Move the pointers forward. */
10909 *src
= aarch64_progress_pointer (*src
);
10910 *dst
= aarch64_progress_pointer (*dst
);
10913 /* Expand movmem, as if from a __builtin_memcpy. Return true if
10914 we succeed, otherwise return false. */
10917 aarch64_expand_movmem (rtx
*operands
)
10920 rtx dst
= operands
[0];
10921 rtx src
= operands
[1];
10923 bool speed_p
= !optimize_function_for_size_p (cfun
);
10925 /* When optimizing for size, give a better estimate of the length of a
10926 memcpy call, but use the default otherwise. */
10927 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
10929 /* We can't do anything smart if the amount to copy is not constant. */
10930 if (!CONST_INT_P (operands
[2]))
10933 n
= UINTVAL (operands
[2]);
10935 /* Try to keep the number of instructions low. For cases below 16 bytes we
10936 need to make at most two moves. For cases above 16 bytes it will be one
10937 move for each 16 byte chunk, then at most two additional moves. */
10938 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
10941 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
10942 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
10944 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
10945 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
10947 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
10953 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
10958 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
10963 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
10964 4-byte chunk, partially overlapping with the previously copied chunk. */
10967 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10973 src
= aarch64_move_pointer (src
, move
);
10974 dst
= aarch64_move_pointer (dst
, move
);
10975 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
10980 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
10981 them, then (if applicable) an 8-byte chunk. */
10986 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
10991 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
10996 /* Finish the final bytes of the copy. We can always do this in one
10997 instruction. We either copy the exact amount we need, or partially
10998 overlap with the previous chunk we copied and copy 8-bytes. */
11002 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
11004 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
11006 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
11011 src
= aarch64_move_pointer (src
, -1);
11012 dst
= aarch64_move_pointer (dst
, -1);
11013 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
11019 src
= aarch64_move_pointer (src
, move
);
11020 dst
= aarch64_move_pointer (dst
, move
);
11021 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
11028 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
11030 static unsigned HOST_WIDE_INT
11031 aarch64_asan_shadow_offset (void)
11033 return (HOST_WIDE_INT_1
<< 36);
11037 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
11038 unsigned int align
,
11039 enum by_pieces_operation op
,
11042 /* STORE_BY_PIECES can be used when copying a constant string, but
11043 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
11044 For now we always fail this and let the move_by_pieces code copy
11045 the string from read-only memory. */
11046 if (op
== STORE_BY_PIECES
)
11049 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
11052 static enum machine_mode
11053 aarch64_code_to_ccmode (enum rtx_code code
)
11076 return CC_DLEUmode
;
11079 return CC_DLTUmode
;
11082 return CC_DGEUmode
;
11085 return CC_DGTUmode
;
11093 aarch64_gen_ccmp_first (rtx
*prep_seq
, rtx
*gen_seq
,
11094 int code
, tree treeop0
, tree treeop1
)
11096 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
11097 rtx op0
, op1
, cmp
, target
;
11098 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
11099 enum insn_code icode
;
11100 struct expand_operand ops
[4];
11102 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) code
);
11103 if (cc_mode
== CCmode
)
11107 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
11109 op_mode
= GET_MODE (op0
);
11110 if (op_mode
== VOIDmode
)
11111 op_mode
= GET_MODE (op1
);
11119 icode
= CODE_FOR_cmpsi
;
11124 icode
= CODE_FOR_cmpdi
;
11132 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
11133 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
11139 *prep_seq
= get_insns ();
11142 cmp
= gen_rtx_fmt_ee ((enum rtx_code
) code
, cmp_mode
, op0
, op1
);
11143 target
= gen_rtx_REG (CCmode
, CC_REGNUM
);
11145 create_output_operand (&ops
[0], target
, CCmode
);
11146 create_fixed_operand (&ops
[1], cmp
);
11147 create_fixed_operand (&ops
[2], op0
);
11148 create_fixed_operand (&ops
[3], op1
);
11151 if (!maybe_expand_insn (icode
, 4, ops
))
11156 *gen_seq
= get_insns ();
11159 return gen_rtx_REG (cc_mode
, CC_REGNUM
);
11163 aarch64_gen_ccmp_next (rtx
*prep_seq
, rtx
*gen_seq
, rtx prev
, int cmp_code
,
11164 tree treeop0
, tree treeop1
, int bit_code
)
11166 rtx op0
, op1
, cmp0
, cmp1
, target
;
11167 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
11168 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
11169 enum insn_code icode
= CODE_FOR_ccmp_andsi
;
11170 struct expand_operand ops
[6];
11172 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) cmp_code
);
11173 if (cc_mode
== CCmode
)
11176 push_to_sequence ((rtx_insn
*) *prep_seq
);
11177 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
11179 op_mode
= GET_MODE (op0
);
11180 if (op_mode
== VOIDmode
)
11181 op_mode
= GET_MODE (op1
);
11189 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_andsi
11190 : CODE_FOR_ccmp_iorsi
;
11195 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_anddi
11196 : CODE_FOR_ccmp_iordi
;
11204 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
11205 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
11211 *prep_seq
= get_insns ();
11214 target
= gen_rtx_REG (cc_mode
, CC_REGNUM
);
11215 cmp1
= gen_rtx_fmt_ee ((enum rtx_code
) cmp_code
, cmp_mode
, op0
, op1
);
11216 cmp0
= gen_rtx_fmt_ee (NE
, cmp_mode
, prev
, const0_rtx
);
11218 create_fixed_operand (&ops
[0], prev
);
11219 create_fixed_operand (&ops
[1], target
);
11220 create_fixed_operand (&ops
[2], op0
);
11221 create_fixed_operand (&ops
[3], op1
);
11222 create_fixed_operand (&ops
[4], cmp0
);
11223 create_fixed_operand (&ops
[5], cmp1
);
11225 push_to_sequence ((rtx_insn
*) *gen_seq
);
11226 if (!maybe_expand_insn (icode
, 6, ops
))
11232 *gen_seq
= get_insns ();
11238 #undef TARGET_GEN_CCMP_FIRST
11239 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
11241 #undef TARGET_GEN_CCMP_NEXT
11242 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
11244 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
11245 instruction fusion of some sort. */
11248 aarch64_macro_fusion_p (void)
11250 return aarch64_tune_params
.fusible_ops
!= AARCH64_FUSE_NOTHING
;
11254 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
11255 should be kept together during scheduling. */
11258 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
11261 rtx prev_set
= single_set (prev
);
11262 rtx curr_set
= single_set (curr
);
11263 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
11264 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
11266 if (!aarch64_macro_fusion_p ())
11270 && (aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_MOV_MOVK
))
11272 /* We are trying to match:
11273 prev (mov) == (set (reg r0) (const_int imm16))
11274 curr (movk) == (set (zero_extract (reg r0)
11277 (const_int imm16_1)) */
11279 set_dest
= SET_DEST (curr_set
);
11281 if (GET_CODE (set_dest
) == ZERO_EXTRACT
11282 && CONST_INT_P (SET_SRC (curr_set
))
11283 && CONST_INT_P (SET_SRC (prev_set
))
11284 && CONST_INT_P (XEXP (set_dest
, 2))
11285 && INTVAL (XEXP (set_dest
, 2)) == 16
11286 && REG_P (XEXP (set_dest
, 0))
11287 && REG_P (SET_DEST (prev_set
))
11288 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
11295 && (aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_ADRP_ADD
))
11298 /* We're trying to match:
11299 prev (adrp) == (set (reg r1)
11300 (high (symbol_ref ("SYM"))))
11301 curr (add) == (set (reg r0)
11303 (symbol_ref ("SYM"))))
11304 Note that r0 need not necessarily be the same as r1, especially
11305 during pre-regalloc scheduling. */
11307 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
11308 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
11310 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
11311 && REG_P (XEXP (SET_SRC (curr_set
), 0))
11312 && REGNO (XEXP (SET_SRC (curr_set
), 0))
11313 == REGNO (SET_DEST (prev_set
))
11314 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
11315 XEXP (SET_SRC (curr_set
), 1)))
11321 && (aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_MOVK_MOVK
))
11324 /* We're trying to match:
11325 prev (movk) == (set (zero_extract (reg r0)
11328 (const_int imm16_1))
11329 curr (movk) == (set (zero_extract (reg r0)
11332 (const_int imm16_2)) */
11334 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
11335 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
11336 && REG_P (XEXP (SET_DEST (prev_set
), 0))
11337 && REG_P (XEXP (SET_DEST (curr_set
), 0))
11338 && REGNO (XEXP (SET_DEST (prev_set
), 0))
11339 == REGNO (XEXP (SET_DEST (curr_set
), 0))
11340 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
11341 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
11342 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
11343 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
11344 && CONST_INT_P (SET_SRC (prev_set
))
11345 && CONST_INT_P (SET_SRC (curr_set
)))
11350 && (aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_ADRP_LDR
))
11352 /* We're trying to match:
11353 prev (adrp) == (set (reg r0)
11354 (high (symbol_ref ("SYM"))))
11355 curr (ldr) == (set (reg r1)
11356 (mem (lo_sum (reg r0)
11357 (symbol_ref ("SYM")))))
11359 curr (ldr) == (set (reg r1)
11362 (symbol_ref ("SYM")))))) */
11363 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
11364 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
11366 rtx curr_src
= SET_SRC (curr_set
);
11368 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
11369 curr_src
= XEXP (curr_src
, 0);
11371 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
11372 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
11373 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
11374 == REGNO (SET_DEST (prev_set
))
11375 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
11376 XEXP (SET_SRC (prev_set
), 0)))
11381 if ((aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_CMP_BRANCH
)
11382 && any_condjump_p (curr
))
11384 enum attr_type prev_type
= get_attr_type (prev
);
11386 /* FIXME: this misses some which is considered simple arthematic
11387 instructions for ThunderX. Simple shifts are missed here. */
11388 if (prev_type
== TYPE_ALUS_SREG
11389 || prev_type
== TYPE_ALUS_IMM
11390 || prev_type
== TYPE_LOGICS_REG
11391 || prev_type
== TYPE_LOGICS_IMM
)
11398 /* If MEM is in the form of [base+offset], extract the two parts
11399 of address and set to BASE and OFFSET, otherwise return false
11400 after clearing BASE and OFFSET. */
11403 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
11407 gcc_assert (MEM_P (mem
));
11409 addr
= XEXP (mem
, 0);
11414 *offset
= const0_rtx
;
11418 if (GET_CODE (addr
) == PLUS
11419 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
11421 *base
= XEXP (addr
, 0);
11422 *offset
= XEXP (addr
, 1);
11427 *offset
= NULL_RTX
;
11432 /* Types for scheduling fusion. */
11433 enum sched_fusion_type
11435 SCHED_FUSION_NONE
= 0,
11436 SCHED_FUSION_LD_SIGN_EXTEND
,
11437 SCHED_FUSION_LD_ZERO_EXTEND
,
11443 /* If INSN is a load or store of address in the form of [base+offset],
11444 extract the two parts and set to BASE and OFFSET. Return scheduling
11445 fusion type this INSN is. */
11447 static enum sched_fusion_type
11448 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
11451 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
11453 gcc_assert (INSN_P (insn
));
11454 x
= PATTERN (insn
);
11455 if (GET_CODE (x
) != SET
)
11456 return SCHED_FUSION_NONE
;
11459 dest
= SET_DEST (x
);
11461 if (GET_MODE (dest
) != SImode
&& GET_MODE (dest
) != DImode
11462 && GET_MODE (dest
) != SFmode
&& GET_MODE (dest
) != DFmode
)
11463 return SCHED_FUSION_NONE
;
11465 if (GET_CODE (src
) == SIGN_EXTEND
)
11467 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
11468 src
= XEXP (src
, 0);
11469 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
11470 return SCHED_FUSION_NONE
;
11472 else if (GET_CODE (src
) == ZERO_EXTEND
)
11474 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
11475 src
= XEXP (src
, 0);
11476 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
11477 return SCHED_FUSION_NONE
;
11480 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
11481 extract_base_offset_in_addr (src
, base
, offset
);
11482 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
11484 fusion
= SCHED_FUSION_ST
;
11485 extract_base_offset_in_addr (dest
, base
, offset
);
11488 return SCHED_FUSION_NONE
;
11490 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
11491 fusion
= SCHED_FUSION_NONE
;
11496 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
11498 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
11499 and PRI are only calculated for these instructions. For other instruction,
11500 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
11501 type instruction fusion can be added by returning different priorities.
11503 It's important that irrelevant instructions get the largest FUSION_PRI. */
11506 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
11507 int *fusion_pri
, int *pri
)
11511 enum sched_fusion_type fusion
;
11513 gcc_assert (INSN_P (insn
));
11516 fusion
= fusion_load_store (insn
, &base
, &offset
);
11517 if (fusion
== SCHED_FUSION_NONE
)
11524 /* Set FUSION_PRI according to fusion type and base register. */
11525 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
11527 /* Calculate PRI. */
11530 /* INSN with smaller offset goes first. */
11531 off_val
= (int)(INTVAL (offset
));
11533 tmp
-= (off_val
& 0xfffff);
11535 tmp
+= ((- off_val
) & 0xfffff);
11541 /* Given OPERANDS of consecutive load/store, check if we can merge
11542 them into ldp/stp. LOAD is true if they are load instructions.
11543 MODE is the mode of memory operands. */
11546 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
11547 enum machine_mode mode
)
11549 HOST_WIDE_INT offval_1
, offval_2
, msize
;
11550 enum reg_class rclass_1
, rclass_2
;
11551 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
11555 mem_1
= operands
[1];
11556 mem_2
= operands
[3];
11557 reg_1
= operands
[0];
11558 reg_2
= operands
[2];
11559 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
11560 if (REGNO (reg_1
) == REGNO (reg_2
))
11565 mem_1
= operands
[0];
11566 mem_2
= operands
[2];
11567 reg_1
= operands
[1];
11568 reg_2
= operands
[3];
11571 /* The mems cannot be volatile. */
11572 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
11575 /* Check if the addresses are in the form of [base+offset]. */
11576 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
11577 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
11579 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
11580 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
11583 /* Check if the bases are same. */
11584 if (!rtx_equal_p (base_1
, base_2
))
11587 offval_1
= INTVAL (offset_1
);
11588 offval_2
= INTVAL (offset_2
);
11589 msize
= GET_MODE_SIZE (mode
);
11590 /* Check if the offsets are consecutive. */
11591 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
11594 /* Check if the addresses are clobbered by load. */
11597 if (reg_mentioned_p (reg_1
, mem_1
))
11600 /* In increasing order, the last load can clobber the address. */
11601 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
11605 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
11606 rclass_1
= FP_REGS
;
11608 rclass_1
= GENERAL_REGS
;
11610 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
11611 rclass_2
= FP_REGS
;
11613 rclass_2
= GENERAL_REGS
;
11615 /* Check if the registers are of same class. */
11616 if (rclass_1
!= rclass_2
)
11622 /* Given OPERANDS of consecutive load/store, check if we can merge
11623 them into ldp/stp by adjusting the offset. LOAD is true if they
11624 are load instructions. MODE is the mode of memory operands.
11626 Given below consecutive stores:
11628 str w1, [xb, 0x100]
11629 str w1, [xb, 0x104]
11630 str w1, [xb, 0x108]
11631 str w1, [xb, 0x10c]
11633 Though the offsets are out of the range supported by stp, we can
11634 still pair them after adjusting the offset, like:
11636 add scratch, xb, 0x100
11637 stp w1, w1, [scratch]
11638 stp w1, w1, [scratch, 0x8]
11640 The peephole patterns detecting this opportunity should guarantee
11641 the scratch register is avaliable. */
11644 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
11645 enum machine_mode mode
)
11647 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
11648 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
11649 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
11650 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
11654 reg_1
= operands
[0];
11655 mem_1
= operands
[1];
11656 reg_2
= operands
[2];
11657 mem_2
= operands
[3];
11658 reg_3
= operands
[4];
11659 mem_3
= operands
[5];
11660 reg_4
= operands
[6];
11661 mem_4
= operands
[7];
11662 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
11663 && REG_P (reg_3
) && REG_P (reg_4
));
11664 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
11669 mem_1
= operands
[0];
11670 reg_1
= operands
[1];
11671 mem_2
= operands
[2];
11672 reg_2
= operands
[3];
11673 mem_3
= operands
[4];
11674 reg_3
= operands
[5];
11675 mem_4
= operands
[6];
11676 reg_4
= operands
[7];
11678 /* Skip if memory operand is by itslef valid for ldp/stp. */
11679 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
11682 /* The mems cannot be volatile. */
11683 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
11684 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
11687 /* Check if the addresses are in the form of [base+offset]. */
11688 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
11689 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
11691 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
11692 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
11694 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
11695 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
11697 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
11698 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
11701 /* Check if the bases are same. */
11702 if (!rtx_equal_p (base_1
, base_2
)
11703 || !rtx_equal_p (base_2
, base_3
)
11704 || !rtx_equal_p (base_3
, base_4
))
11707 offval_1
= INTVAL (offset_1
);
11708 offval_2
= INTVAL (offset_2
);
11709 offval_3
= INTVAL (offset_3
);
11710 offval_4
= INTVAL (offset_4
);
11711 msize
= GET_MODE_SIZE (mode
);
11712 /* Check if the offsets are consecutive. */
11713 if ((offval_1
!= (offval_2
+ msize
)
11714 || offval_1
!= (offval_3
+ msize
* 2)
11715 || offval_1
!= (offval_4
+ msize
* 3))
11716 && (offval_4
!= (offval_3
+ msize
)
11717 || offval_4
!= (offval_2
+ msize
* 2)
11718 || offval_4
!= (offval_1
+ msize
* 3)))
11721 /* Check if the addresses are clobbered by load. */
11724 if (reg_mentioned_p (reg_1
, mem_1
)
11725 || reg_mentioned_p (reg_2
, mem_2
)
11726 || reg_mentioned_p (reg_3
, mem_3
))
11729 /* In increasing order, the last load can clobber the address. */
11730 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
11734 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
11735 rclass_1
= FP_REGS
;
11737 rclass_1
= GENERAL_REGS
;
11739 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
11740 rclass_2
= FP_REGS
;
11742 rclass_2
= GENERAL_REGS
;
11744 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
11745 rclass_3
= FP_REGS
;
11747 rclass_3
= GENERAL_REGS
;
11749 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
11750 rclass_4
= FP_REGS
;
11752 rclass_4
= GENERAL_REGS
;
11754 /* Check if the registers are of same class. */
11755 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
11761 /* Given OPERANDS of consecutive load/store, this function pairs them
11762 into ldp/stp after adjusting the offset. It depends on the fact
11763 that addresses of load/store instructions are in increasing order.
11764 MODE is the mode of memory operands. CODE is the rtl operator
11765 which should be applied to all memory operands, it's SIGN_EXTEND,
11766 ZERO_EXTEND or UNKNOWN. */
11769 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
11770 enum machine_mode mode
, RTX_CODE code
)
11772 rtx base
, offset
, t1
, t2
;
11773 rtx mem_1
, mem_2
, mem_3
, mem_4
;
11774 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
11778 mem_1
= operands
[1];
11779 mem_2
= operands
[3];
11780 mem_3
= operands
[5];
11781 mem_4
= operands
[7];
11785 mem_1
= operands
[0];
11786 mem_2
= operands
[2];
11787 mem_3
= operands
[4];
11788 mem_4
= operands
[6];
11789 gcc_assert (code
== UNKNOWN
);
11792 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
11793 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
11795 /* Adjust offset thus it can fit in ldp/stp instruction. */
11796 msize
= GET_MODE_SIZE (mode
);
11797 stp_off_limit
= msize
* 0x40;
11798 off_val
= INTVAL (offset
);
11799 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
11800 new_off
= abs_off
% stp_off_limit
;
11801 adj_off
= abs_off
- new_off
;
11803 /* Further adjust to make sure all offsets are OK. */
11804 if ((new_off
+ msize
* 2) >= stp_off_limit
)
11806 adj_off
+= stp_off_limit
;
11807 new_off
-= stp_off_limit
;
11810 /* Make sure the adjustment can be done with ADD/SUB instructions. */
11811 if (adj_off
>= 0x1000)
11816 adj_off
= -adj_off
;
11817 new_off
= -new_off
;
11820 /* Create new memory references. */
11821 mem_1
= change_address (mem_1
, VOIDmode
,
11822 plus_constant (DImode
, operands
[8], new_off
));
11824 /* Check if the adjusted address is OK for ldp/stp. */
11825 if (!aarch64_mem_pair_operand (mem_1
, mode
))
11828 msize
= GET_MODE_SIZE (mode
);
11829 mem_2
= change_address (mem_2
, VOIDmode
,
11830 plus_constant (DImode
,
11833 mem_3
= change_address (mem_3
, VOIDmode
,
11834 plus_constant (DImode
,
11836 new_off
+ msize
* 2));
11837 mem_4
= change_address (mem_4
, VOIDmode
,
11838 plus_constant (DImode
,
11840 new_off
+ msize
* 3));
11842 if (code
== ZERO_EXTEND
)
11844 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
11845 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
11846 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
11847 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
11849 else if (code
== SIGN_EXTEND
)
11851 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
11852 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
11853 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
11854 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
11859 operands
[1] = mem_1
;
11860 operands
[3] = mem_2
;
11861 operands
[5] = mem_3
;
11862 operands
[7] = mem_4
;
11866 operands
[0] = mem_1
;
11867 operands
[2] = mem_2
;
11868 operands
[4] = mem_3
;
11869 operands
[6] = mem_4
;
11872 /* Emit adjusting instruction. */
11873 emit_insn (gen_rtx_SET (operands
[8], plus_constant (DImode
, base
, adj_off
)));
11874 /* Emit ldp/stp instructions. */
11875 t1
= gen_rtx_SET (operands
[0], operands
[1]);
11876 t2
= gen_rtx_SET (operands
[2], operands
[3]);
11877 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
11878 t1
= gen_rtx_SET (operands
[4], operands
[5]);
11879 t2
= gen_rtx_SET (operands
[6], operands
[7]);
11880 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
11884 /* Return 1 if pseudo register should be created and used to hold
11885 GOT address for PIC code. */
11888 aarch64_use_pseudo_pic_reg (void)
11890 return aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
;
11893 #undef TARGET_ADDRESS_COST
11894 #define TARGET_ADDRESS_COST aarch64_address_cost
11896 /* This hook will determines whether unnamed bitfields affect the alignment
11897 of the containing structure. The hook returns true if the structure
11898 should inherit the alignment requirements of an unnamed bitfield's
11900 #undef TARGET_ALIGN_ANON_BITFIELD
11901 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
11903 #undef TARGET_ASM_ALIGNED_DI_OP
11904 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
11906 #undef TARGET_ASM_ALIGNED_HI_OP
11907 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
11909 #undef TARGET_ASM_ALIGNED_SI_OP
11910 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
11912 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11913 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
11914 hook_bool_const_tree_hwi_hwi_const_tree_true
11916 #undef TARGET_ASM_FILE_START
11917 #define TARGET_ASM_FILE_START aarch64_start_file
11919 #undef TARGET_ASM_OUTPUT_MI_THUNK
11920 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
11922 #undef TARGET_ASM_SELECT_RTX_SECTION
11923 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
11925 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11926 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
11928 #undef TARGET_BUILD_BUILTIN_VA_LIST
11929 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
11931 #undef TARGET_CALLEE_COPIES
11932 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
11934 #undef TARGET_CAN_ELIMINATE
11935 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
11937 #undef TARGET_CANNOT_FORCE_CONST_MEM
11938 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
11940 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11941 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
11943 /* Only the least significant bit is used for initialization guard
11945 #undef TARGET_CXX_GUARD_MASK_BIT
11946 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
11948 #undef TARGET_C_MODE_FOR_SUFFIX
11949 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
11951 #ifdef TARGET_BIG_ENDIAN_DEFAULT
11952 #undef TARGET_DEFAULT_TARGET_FLAGS
11953 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
11956 #undef TARGET_CLASS_MAX_NREGS
11957 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
11959 #undef TARGET_BUILTIN_DECL
11960 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
11962 #undef TARGET_EXPAND_BUILTIN
11963 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
11965 #undef TARGET_EXPAND_BUILTIN_VA_START
11966 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
11968 #undef TARGET_FOLD_BUILTIN
11969 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
11971 #undef TARGET_FUNCTION_ARG
11972 #define TARGET_FUNCTION_ARG aarch64_function_arg
11974 #undef TARGET_FUNCTION_ARG_ADVANCE
11975 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
11977 #undef TARGET_FUNCTION_ARG_BOUNDARY
11978 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
11980 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11981 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
11983 #undef TARGET_FUNCTION_VALUE
11984 #define TARGET_FUNCTION_VALUE aarch64_function_value
11986 #undef TARGET_FUNCTION_VALUE_REGNO_P
11987 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
11989 #undef TARGET_FRAME_POINTER_REQUIRED
11990 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
11992 #undef TARGET_GIMPLE_FOLD_BUILTIN
11993 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
11995 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11996 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
11998 #undef TARGET_INIT_BUILTINS
11999 #define TARGET_INIT_BUILTINS aarch64_init_builtins
12001 #undef TARGET_LEGITIMATE_ADDRESS_P
12002 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
12004 #undef TARGET_LEGITIMATE_CONSTANT_P
12005 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
12007 #undef TARGET_LIBGCC_CMP_RETURN_MODE
12008 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
12010 #undef TARGET_LRA_P
12011 #define TARGET_LRA_P hook_bool_void_true
12013 #undef TARGET_MANGLE_TYPE
12014 #define TARGET_MANGLE_TYPE aarch64_mangle_type
12016 #undef TARGET_MEMORY_MOVE_COST
12017 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
12019 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
12020 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
12022 #undef TARGET_MUST_PASS_IN_STACK
12023 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
12025 /* This target hook should return true if accesses to volatile bitfields
12026 should use the narrowest mode possible. It should return false if these
12027 accesses should use the bitfield container type. */
12028 #undef TARGET_NARROW_VOLATILE_BITFIELD
12029 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
12031 #undef TARGET_OPTION_OVERRIDE
12032 #define TARGET_OPTION_OVERRIDE aarch64_override_options
12034 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
12035 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
12036 aarch64_override_options_after_change
12038 #undef TARGET_PASS_BY_REFERENCE
12039 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
12041 #undef TARGET_PREFERRED_RELOAD_CLASS
12042 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
12044 #undef TARGET_SCHED_REASSOCIATION_WIDTH
12045 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
12047 #undef TARGET_SECONDARY_RELOAD
12048 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
12050 #undef TARGET_SHIFT_TRUNCATION_MASK
12051 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
12053 #undef TARGET_SETUP_INCOMING_VARARGS
12054 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
12056 #undef TARGET_STRUCT_VALUE_RTX
12057 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
12059 #undef TARGET_REGISTER_MOVE_COST
12060 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
12062 #undef TARGET_RETURN_IN_MEMORY
12063 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
12065 #undef TARGET_RETURN_IN_MSB
12066 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
12068 #undef TARGET_RTX_COSTS
12069 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
12071 #undef TARGET_SCHED_ISSUE_RATE
12072 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
12074 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
12075 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
12076 aarch64_sched_first_cycle_multipass_dfa_lookahead
12078 #undef TARGET_TRAMPOLINE_INIT
12079 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
12081 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
12082 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
12084 #undef TARGET_VECTOR_MODE_SUPPORTED_P
12085 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
12087 #undef TARGET_ARRAY_MODE_SUPPORTED_P
12088 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
12090 #undef TARGET_VECTORIZE_ADD_STMT_COST
12091 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
12093 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
12094 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
12095 aarch64_builtin_vectorization_cost
12097 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
12098 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
12100 #undef TARGET_VECTORIZE_BUILTINS
12101 #define TARGET_VECTORIZE_BUILTINS
12103 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
12104 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
12105 aarch64_builtin_vectorized_function
12107 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
12108 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
12109 aarch64_autovectorize_vector_sizes
12111 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
12112 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
12113 aarch64_atomic_assign_expand_fenv
12115 /* Section anchor support. */
12117 #undef TARGET_MIN_ANCHOR_OFFSET
12118 #define TARGET_MIN_ANCHOR_OFFSET -256
12120 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
12121 byte offset; we can do much more for larger data types, but have no way
12122 to determine the size of the access. We assume accesses are aligned. */
12123 #undef TARGET_MAX_ANCHOR_OFFSET
12124 #define TARGET_MAX_ANCHOR_OFFSET 4095
12126 #undef TARGET_VECTOR_ALIGNMENT
12127 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
12129 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
12130 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
12131 aarch64_simd_vector_alignment_reachable
12133 /* vec_perm support. */
12135 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
12136 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
12137 aarch64_vectorize_vec_perm_const_ok
12140 #undef TARGET_FIXED_CONDITION_CODE_REGS
12141 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
12143 #undef TARGET_FLAGS_REGNUM
12144 #define TARGET_FLAGS_REGNUM CC_REGNUM
12146 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
12147 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
12149 #undef TARGET_ASAN_SHADOW_OFFSET
12150 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
12152 #undef TARGET_LEGITIMIZE_ADDRESS
12153 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
12155 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
12156 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
12157 aarch64_use_by_pieces_infrastructure_p
12159 #undef TARGET_CAN_USE_DOLOOP_P
12160 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
12162 #undef TARGET_SCHED_MACRO_FUSION_P
12163 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
12165 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
12166 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
12168 #undef TARGET_SCHED_FUSION_PRIORITY
12169 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
12171 #undef TARGET_USE_PSEUDO_PIC_REG
12172 #define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
12174 struct gcc_target targetm
= TARGET_INITIALIZER
;
12176 #include "gt-aarch64.h"