1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2015 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
33 #include "stringpool.h"
38 #include "diagnostic.h"
39 #include "insn-attr.h"
41 #include "fold-const.h"
42 #include "stor-layout.h"
50 #include "langhooks.h"
55 #include "gimple-iterator.h"
56 #include "tree-vectorizer.h"
57 #include "aarch64-cost-tables.h"
61 #include "tm-constrs.h"
62 #include "sched-int.h"
63 #include "cortex-a57-fma-steering.h"
64 #include "target-globals.h"
66 /* This file should be included last. */
67 #include "target-def.h"
69 /* Defined for convenience. */
70 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
72 /* Classifies an address.
75 A simple base register plus immediate offset.
78 A base register indexed by immediate offset with writeback.
81 A base register indexed by (optionally scaled) register.
84 A base register indexed by (optionally scaled) zero-extended register.
87 A base register indexed by (optionally scaled) sign-extended register.
90 A LO_SUM rtx with a base register and "LO12" symbol relocation.
93 A constant symbolic address, in pc-relative literal pool. */
95 enum aarch64_address_type
{
105 struct aarch64_address_info
{
106 enum aarch64_address_type type
;
110 enum aarch64_symbol_type symbol_type
;
113 struct simd_immediate_info
122 /* The current code model. */
123 enum aarch64_code_model aarch64_cmodel
;
126 #undef TARGET_HAVE_TLS
127 #define TARGET_HAVE_TLS 1
130 static bool aarch64_composite_type_p (const_tree
, machine_mode
);
131 static bool aarch64_vfp_is_call_or_return_candidate (machine_mode
,
133 machine_mode
*, int *,
135 static void aarch64_elf_asm_constructor (rtx
, int) ATTRIBUTE_UNUSED
;
136 static void aarch64_elf_asm_destructor (rtx
, int) ATTRIBUTE_UNUSED
;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (machine_mode
);
139 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
140 const unsigned char *sel
);
141 static int aarch64_address_cost (rtx
, machine_mode
, addr_space_t
, bool);
143 /* Major revision number of the ARM Architecture implemented by the target. */
144 unsigned aarch64_architecture_version
;
146 /* The processor for which instructions should be scheduled. */
147 enum aarch64_processor aarch64_tune
= cortexa53
;
149 /* Mask to specify which instruction scheduling options should be used. */
150 unsigned long aarch64_tune_flags
= 0;
152 /* Global flag for PC relative loads. */
153 bool aarch64_nopcrelative_literal_loads
;
155 /* Support for command line parsing of boolean flags in the tuning
157 struct aarch64_flag_desc
163 #define AARCH64_FUSION_PAIR(name, internal_name) \
164 { name, AARCH64_FUSE_##internal_name },
165 static const struct aarch64_flag_desc aarch64_fusible_pairs
[] =
167 { "none", AARCH64_FUSE_NOTHING
},
168 #include "aarch64-fusion-pairs.def"
169 { "all", AARCH64_FUSE_ALL
},
170 { NULL
, AARCH64_FUSE_NOTHING
}
172 #undef AARCH64_FUION_PAIR
174 #define AARCH64_EXTRA_TUNING_OPTION(name, internal_name) \
175 { name, AARCH64_EXTRA_TUNE_##internal_name },
176 static const struct aarch64_flag_desc aarch64_tuning_flags
[] =
178 { "none", AARCH64_EXTRA_TUNE_NONE
},
179 #include "aarch64-tuning-flags.def"
180 { "all", AARCH64_EXTRA_TUNE_ALL
},
181 { NULL
, AARCH64_EXTRA_TUNE_NONE
}
183 #undef AARCH64_EXTRA_TUNING_OPTION
185 /* Tuning parameters. */
187 static const struct cpu_addrcost_table generic_addrcost_table
=
197 0, /* register_offset */
198 0, /* register_sextend */
199 0, /* register_zextend */
203 static const struct cpu_addrcost_table cortexa57_addrcost_table
=
213 0, /* register_offset */
214 0, /* register_sextend */
215 0, /* register_zextend */
219 static const struct cpu_addrcost_table xgene1_addrcost_table
=
229 0, /* register_offset */
230 1, /* register_sextend */
231 1, /* register_zextend */
235 static const struct cpu_regmove_cost generic_regmove_cost
=
238 /* Avoid the use of slow int<->fp moves for spilling by setting
239 their cost higher than memmov_cost. */
245 static const struct cpu_regmove_cost cortexa57_regmove_cost
=
248 /* Avoid the use of slow int<->fp moves for spilling by setting
249 their cost higher than memmov_cost. */
255 static const struct cpu_regmove_cost cortexa53_regmove_cost
=
258 /* Avoid the use of slow int<->fp moves for spilling by setting
259 their cost higher than memmov_cost. */
265 static const struct cpu_regmove_cost thunderx_regmove_cost
=
273 static const struct cpu_regmove_cost xgene1_regmove_cost
=
276 /* Avoid the use of slow int<->fp moves for spilling by setting
277 their cost higher than memmov_cost. */
283 /* Generic costs for vector insn classes. */
284 static const struct cpu_vector_cost generic_vector_cost
=
286 1, /* scalar_stmt_cost */
287 1, /* scalar_load_cost */
288 1, /* scalar_store_cost */
289 1, /* vec_stmt_cost */
290 1, /* vec_to_scalar_cost */
291 1, /* scalar_to_vec_cost */
292 1, /* vec_align_load_cost */
293 1, /* vec_unalign_load_cost */
294 1, /* vec_unalign_store_cost */
295 1, /* vec_store_cost */
296 3, /* cond_taken_branch_cost */
297 1 /* cond_not_taken_branch_cost */
300 /* Generic costs for vector insn classes. */
301 static const struct cpu_vector_cost cortexa57_vector_cost
=
303 1, /* scalar_stmt_cost */
304 4, /* scalar_load_cost */
305 1, /* scalar_store_cost */
306 3, /* vec_stmt_cost */
307 8, /* vec_to_scalar_cost */
308 8, /* scalar_to_vec_cost */
309 5, /* vec_align_load_cost */
310 5, /* vec_unalign_load_cost */
311 1, /* vec_unalign_store_cost */
312 1, /* vec_store_cost */
313 1, /* cond_taken_branch_cost */
314 1 /* cond_not_taken_branch_cost */
317 /* Generic costs for vector insn classes. */
318 static const struct cpu_vector_cost xgene1_vector_cost
=
320 1, /* scalar_stmt_cost */
321 5, /* scalar_load_cost */
322 1, /* scalar_store_cost */
323 2, /* vec_stmt_cost */
324 4, /* vec_to_scalar_cost */
325 4, /* scalar_to_vec_cost */
326 10, /* vec_align_load_cost */
327 10, /* vec_unalign_load_cost */
328 2, /* vec_unalign_store_cost */
329 2, /* vec_store_cost */
330 2, /* cond_taken_branch_cost */
331 1 /* cond_not_taken_branch_cost */
334 /* Generic costs for branch instructions. */
335 static const struct cpu_branch_cost generic_branch_cost
=
337 2, /* Predictable. */
338 2 /* Unpredictable. */
341 static const struct tune_params generic_tunings
=
343 &cortexa57_extra_costs
,
344 &generic_addrcost_table
,
345 &generic_regmove_cost
,
346 &generic_vector_cost
,
347 &generic_branch_cost
,
350 AARCH64_FUSE_NOTHING
, /* fusible_ops */
351 8, /* function_align. */
354 2, /* int_reassoc_width. */
355 4, /* fp_reassoc_width. */
356 1, /* vec_reassoc_width. */
357 2, /* min_div_recip_mul_sf. */
358 2, /* min_div_recip_mul_df. */
359 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
360 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
363 static const struct tune_params cortexa53_tunings
=
365 &cortexa53_extra_costs
,
366 &generic_addrcost_table
,
367 &cortexa53_regmove_cost
,
368 &generic_vector_cost
,
369 &generic_branch_cost
,
372 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
373 | AARCH64_FUSE_MOVK_MOVK
| AARCH64_FUSE_ADRP_LDR
), /* fusible_ops */
374 8, /* function_align. */
377 2, /* int_reassoc_width. */
378 4, /* fp_reassoc_width. */
379 1, /* vec_reassoc_width. */
380 2, /* min_div_recip_mul_sf. */
381 2, /* min_div_recip_mul_df. */
382 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
383 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
386 static const struct tune_params cortexa57_tunings
=
388 &cortexa57_extra_costs
,
389 &cortexa57_addrcost_table
,
390 &cortexa57_regmove_cost
,
391 &cortexa57_vector_cost
,
392 &generic_branch_cost
,
395 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
396 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
397 16, /* function_align. */
400 2, /* int_reassoc_width. */
401 4, /* fp_reassoc_width. */
402 1, /* vec_reassoc_width. */
403 2, /* min_div_recip_mul_sf. */
404 2, /* min_div_recip_mul_df. */
405 tune_params::AUTOPREFETCHER_WEAK
, /* autoprefetcher_model. */
406 (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS
407 | AARCH64_EXTRA_TUNE_RECIP_SQRT
) /* tune_flags. */
410 static const struct tune_params cortexa72_tunings
=
412 &cortexa57_extra_costs
,
413 &cortexa57_addrcost_table
,
414 &cortexa57_regmove_cost
,
415 &cortexa57_vector_cost
,
416 &generic_branch_cost
,
419 (AARCH64_FUSE_MOV_MOVK
| AARCH64_FUSE_ADRP_ADD
420 | AARCH64_FUSE_MOVK_MOVK
), /* fusible_ops */
421 16, /* function_align. */
424 2, /* int_reassoc_width. */
425 4, /* fp_reassoc_width. */
426 1, /* vec_reassoc_width. */
427 2, /* min_div_recip_mul_sf. */
428 2, /* min_div_recip_mul_df. */
429 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
430 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
433 static const struct tune_params thunderx_tunings
=
435 &thunderx_extra_costs
,
436 &generic_addrcost_table
,
437 &thunderx_regmove_cost
,
438 &generic_vector_cost
,
439 &generic_branch_cost
,
442 AARCH64_FUSE_CMP_BRANCH
, /* fusible_ops */
443 8, /* function_align. */
446 2, /* int_reassoc_width. */
447 4, /* fp_reassoc_width. */
448 1, /* vec_reassoc_width. */
449 2, /* min_div_recip_mul_sf. */
450 2, /* min_div_recip_mul_df. */
451 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
452 (AARCH64_EXTRA_TUNE_NONE
) /* tune_flags. */
455 static const struct tune_params xgene1_tunings
=
458 &xgene1_addrcost_table
,
459 &xgene1_regmove_cost
,
461 &generic_branch_cost
,
464 AARCH64_FUSE_NOTHING
, /* fusible_ops */
465 16, /* function_align. */
467 16, /* loop_align. */
468 2, /* int_reassoc_width. */
469 4, /* fp_reassoc_width. */
470 1, /* vec_reassoc_width. */
471 2, /* min_div_recip_mul_sf. */
472 2, /* min_div_recip_mul_df. */
473 tune_params::AUTOPREFETCHER_OFF
, /* autoprefetcher_model. */
474 (AARCH64_EXTRA_TUNE_RECIP_SQRT
) /* tune_flags. */
477 /* Support for fine-grained override of the tuning structures. */
478 struct aarch64_tuning_override_function
481 void (*parse_override
)(const char*, struct tune_params
*);
484 static void aarch64_parse_fuse_string (const char*, struct tune_params
*);
485 static void aarch64_parse_tune_string (const char*, struct tune_params
*);
487 static const struct aarch64_tuning_override_function
488 aarch64_tuning_override_functions
[] =
490 { "fuse", aarch64_parse_fuse_string
},
491 { "tune", aarch64_parse_tune_string
},
495 /* A processor implementing AArch64. */
498 const char *const name
;
499 enum aarch64_processor ident
;
500 enum aarch64_processor sched_core
;
501 enum aarch64_arch arch
;
502 unsigned architecture_version
;
503 const unsigned long flags
;
504 const struct tune_params
*const tune
;
507 /* Architectures implementing AArch64. */
508 static const struct processor all_architectures
[] =
510 #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
511 {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
512 #include "aarch64-arches.def"
514 {NULL
, aarch64_none
, aarch64_none
, aarch64_no_arch
, 0, 0, NULL
}
517 /* Processor cores implementing AArch64. */
518 static const struct processor all_cores
[] =
520 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART) \
521 {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
522 all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
523 FLAGS, &COSTS##_tunings},
524 #include "aarch64-cores.def"
526 {"generic", generic
, cortexa53
, AARCH64_ARCH_8A
, 8,
527 AARCH64_FL_FOR_ARCH8
, &generic_tunings
},
528 {NULL
, aarch64_none
, aarch64_none
, aarch64_no_arch
, 0, 0, NULL
}
532 /* Target specification. These are populated by the -march, -mtune, -mcpu
533 handling code or by target attributes. */
534 static const struct processor
*selected_arch
;
535 static const struct processor
*selected_cpu
;
536 static const struct processor
*selected_tune
;
538 /* The current tuning set. */
539 struct tune_params aarch64_tune_params
= generic_tunings
;
541 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
543 /* An ISA extension in the co-processor and main instruction set space. */
544 struct aarch64_option_extension
546 const char *const name
;
547 const unsigned long flags_on
;
548 const unsigned long flags_off
;
551 /* ISA extensions in AArch64. */
552 static const struct aarch64_option_extension all_extensions
[] =
554 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF, FEATURE_STRING) \
555 {NAME, FLAGS_ON, FLAGS_OFF},
556 #include "aarch64-option-extensions.def"
557 #undef AARCH64_OPT_EXTENSION
561 typedef enum aarch64_cond_code
563 AARCH64_EQ
= 0, AARCH64_NE
, AARCH64_CS
, AARCH64_CC
, AARCH64_MI
, AARCH64_PL
,
564 AARCH64_VS
, AARCH64_VC
, AARCH64_HI
, AARCH64_LS
, AARCH64_GE
, AARCH64_LT
,
565 AARCH64_GT
, AARCH64_LE
, AARCH64_AL
, AARCH64_NV
569 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
571 /* The condition codes of the processor, and the inverse function. */
572 static const char * const aarch64_condition_codes
[] =
574 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
575 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
578 /* Generate code to enable conditional branches in functions over 1 MiB. */
580 aarch64_gen_far_branch (rtx
* operands
, int pos_label
, const char * dest
,
581 const char * branch_format
)
583 rtx_code_label
* tmp_label
= gen_label_rtx ();
586 ASM_GENERATE_INTERNAL_LABEL (label_buf
, dest
,
587 CODE_LABEL_NUMBER (tmp_label
));
588 const char *label_ptr
= targetm
.strip_name_encoding (label_buf
);
589 rtx dest_label
= operands
[pos_label
];
590 operands
[pos_label
] = tmp_label
;
592 snprintf (buffer
, sizeof (buffer
), "%s%s", branch_format
, label_ptr
);
593 output_asm_insn (buffer
, operands
);
595 snprintf (buffer
, sizeof (buffer
), "b\t%%l%d\n%s:", pos_label
, label_ptr
);
596 operands
[pos_label
] = dest_label
;
597 output_asm_insn (buffer
, operands
);
602 aarch64_err_no_fpadvsimd (machine_mode mode
, const char *msg
)
604 const char *mc
= FLOAT_MODE_P (mode
) ? "floating-point" : "vector";
605 if (TARGET_GENERAL_REGS_ONLY
)
606 error ("%qs is incompatible with %s %s", "-mgeneral-regs-only", mc
, msg
);
608 error ("%qs feature modifier is incompatible with %s %s", "+nofp", mc
, msg
);
612 aarch64_min_divisions_for_recip_mul (enum machine_mode mode
)
614 if (GET_MODE_UNIT_SIZE (mode
) == 4)
615 return aarch64_tune_params
.min_div_recip_mul_sf
;
616 return aarch64_tune_params
.min_div_recip_mul_df
;
620 aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED
,
621 enum machine_mode mode
)
623 if (VECTOR_MODE_P (mode
))
624 return aarch64_tune_params
.vec_reassoc_width
;
625 if (INTEGRAL_MODE_P (mode
))
626 return aarch64_tune_params
.int_reassoc_width
;
627 if (FLOAT_MODE_P (mode
))
628 return aarch64_tune_params
.fp_reassoc_width
;
632 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
634 aarch64_dbx_register_number (unsigned regno
)
636 if (GP_REGNUM_P (regno
))
637 return AARCH64_DWARF_R0
+ regno
- R0_REGNUM
;
638 else if (regno
== SP_REGNUM
)
639 return AARCH64_DWARF_SP
;
640 else if (FP_REGNUM_P (regno
))
641 return AARCH64_DWARF_V0
+ regno
- V0_REGNUM
;
643 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
644 equivalent DWARF register. */
645 return DWARF_FRAME_REGISTERS
;
648 /* Return TRUE if MODE is any of the large INT modes. */
650 aarch64_vect_struct_mode_p (machine_mode mode
)
652 return mode
== OImode
|| mode
== CImode
|| mode
== XImode
;
655 /* Return TRUE if MODE is any of the vector modes. */
657 aarch64_vector_mode_p (machine_mode mode
)
659 return aarch64_vector_mode_supported_p (mode
)
660 || aarch64_vect_struct_mode_p (mode
);
663 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
665 aarch64_array_mode_supported_p (machine_mode mode
,
666 unsigned HOST_WIDE_INT nelems
)
669 && (AARCH64_VALID_SIMD_QREG_MODE (mode
)
670 || AARCH64_VALID_SIMD_DREG_MODE (mode
))
671 && (nelems
>= 2 && nelems
<= 4))
677 /* Implement HARD_REGNO_NREGS. */
680 aarch64_hard_regno_nregs (unsigned regno
, machine_mode mode
)
682 switch (aarch64_regno_regclass (regno
))
686 return (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
;
688 return (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
693 /* Implement HARD_REGNO_MODE_OK. */
696 aarch64_hard_regno_mode_ok (unsigned regno
, machine_mode mode
)
698 if (GET_MODE_CLASS (mode
) == MODE_CC
)
699 return regno
== CC_REGNUM
;
701 if (regno
== SP_REGNUM
)
702 /* The purpose of comparing with ptr_mode is to support the
703 global register variable associated with the stack pointer
704 register via the syntax of asm ("wsp") in ILP32. */
705 return mode
== Pmode
|| mode
== ptr_mode
;
707 if (regno
== FRAME_POINTER_REGNUM
|| regno
== ARG_POINTER_REGNUM
)
708 return mode
== Pmode
;
710 if (GP_REGNUM_P (regno
) && ! aarch64_vect_struct_mode_p (mode
))
713 if (FP_REGNUM_P (regno
))
715 if (aarch64_vect_struct_mode_p (mode
))
717 (regno
+ aarch64_hard_regno_nregs (regno
, mode
) - 1) <= V31_REGNUM
;
725 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
727 aarch64_hard_regno_caller_save_mode (unsigned regno
, unsigned nregs
,
730 /* Handle modes that fit within single registers. */
731 if (nregs
== 1 && GET_MODE_SIZE (mode
) <= 16)
733 if (GET_MODE_SIZE (mode
) >= 4)
738 /* Fall back to generic for multi-reg and very large modes. */
740 return choose_hard_reg_mode (regno
, nregs
, false);
743 /* Return true if calls to DECL should be treated as
744 long-calls (ie called via a register). */
746 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED
)
751 /* Return true if calls to symbol-ref SYM should be treated as
752 long-calls (ie called via a register). */
754 aarch64_is_long_call_p (rtx sym
)
756 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym
));
759 /* Return true if calls to symbol-ref SYM should not go through
763 aarch64_is_noplt_call_p (rtx sym
)
765 const_tree decl
= SYMBOL_REF_DECL (sym
);
770 || lookup_attribute ("noplt", DECL_ATTRIBUTES (decl
)))
771 && !targetm
.binds_local_p (decl
))
777 /* Return true if the offsets to a zero/sign-extract operation
778 represent an expression that matches an extend operation. The
779 operands represent the paramters from
781 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
783 aarch64_is_extend_from_extract (machine_mode mode
, rtx mult_imm
,
786 HOST_WIDE_INT mult_val
, extract_val
;
788 if (! CONST_INT_P (mult_imm
) || ! CONST_INT_P (extract_imm
))
791 mult_val
= INTVAL (mult_imm
);
792 extract_val
= INTVAL (extract_imm
);
795 && extract_val
< GET_MODE_BITSIZE (mode
)
796 && exact_log2 (extract_val
& ~7) > 0
797 && (extract_val
& 7) <= 4
798 && mult_val
== (1 << (extract_val
& 7)))
804 /* Emit an insn that's a simple single-set. Both the operands must be
805 known to be valid. */
807 emit_set_insn (rtx x
, rtx y
)
809 return emit_insn (gen_rtx_SET (x
, y
));
812 /* X and Y are two things to compare using CODE. Emit the compare insn and
813 return the rtx for register 0 in the proper mode. */
815 aarch64_gen_compare_reg (RTX_CODE code
, rtx x
, rtx y
)
817 machine_mode mode
= SELECT_CC_MODE (code
, x
, y
);
818 rtx cc_reg
= gen_rtx_REG (mode
, CC_REGNUM
);
820 emit_set_insn (cc_reg
, gen_rtx_COMPARE (mode
, x
, y
));
824 /* Build the SYMBOL_REF for __tls_get_addr. */
826 static GTY(()) rtx tls_get_addr_libfunc
;
829 aarch64_tls_get_addr (void)
831 if (!tls_get_addr_libfunc
)
832 tls_get_addr_libfunc
= init_one_libfunc ("__tls_get_addr");
833 return tls_get_addr_libfunc
;
836 /* Return the TLS model to use for ADDR. */
838 static enum tls_model
839 tls_symbolic_operand_type (rtx addr
)
841 enum tls_model tls_kind
= TLS_MODEL_NONE
;
844 if (GET_CODE (addr
) == CONST
)
846 split_const (addr
, &sym
, &addend
);
847 if (GET_CODE (sym
) == SYMBOL_REF
)
848 tls_kind
= SYMBOL_REF_TLS_MODEL (sym
);
850 else if (GET_CODE (addr
) == SYMBOL_REF
)
851 tls_kind
= SYMBOL_REF_TLS_MODEL (addr
);
856 /* We'll allow lo_sum's in addresses in our legitimate addresses
857 so that combine would take care of combining addresses where
858 necessary, but for generation purposes, we'll generate the address
861 tmp = hi (symbol_ref); adrp x1, foo
862 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
866 adrp x1, :got:foo adrp tmp, :tlsgd:foo
867 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
871 Load TLS symbol, depending on TLS mechanism and TLS access model.
873 Global Dynamic - Traditional TLS:
875 add dest, tmp, #:tlsgd_lo12:imm
878 Global Dynamic - TLS Descriptors:
879 adrp dest, :tlsdesc:imm
880 ldr tmp, [dest, #:tlsdesc_lo12:imm]
881 add dest, dest, #:tlsdesc_lo12:imm
888 adrp tmp, :gottprel:imm
889 ldr dest, [tmp, #:gottprel_lo12:imm]
894 add t0, tp, #:tprel_hi12:imm, lsl #12
895 add t0, t0, #:tprel_lo12_nc:imm
899 aarch64_load_symref_appropriately (rtx dest
, rtx imm
,
900 enum aarch64_symbol_type type
)
904 case SYMBOL_SMALL_ABSOLUTE
:
906 /* In ILP32, the mode of dest can be either SImode or DImode. */
908 machine_mode mode
= GET_MODE (dest
);
910 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
912 if (can_create_pseudo_p ())
913 tmp_reg
= gen_reg_rtx (mode
);
915 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
916 emit_insn (gen_add_losym (dest
, tmp_reg
, imm
));
920 case SYMBOL_TINY_ABSOLUTE
:
921 emit_insn (gen_rtx_SET (dest
, imm
));
924 case SYMBOL_SMALL_GOT_28K
:
926 machine_mode mode
= GET_MODE (dest
);
927 rtx gp_rtx
= pic_offset_table_rtx
;
931 /* NOTE: pic_offset_table_rtx can be NULL_RTX, because we can reach
932 here before rtl expand. Tree IVOPT will generate rtl pattern to
933 decide rtx costs, in which case pic_offset_table_rtx is not
934 initialized. For that case no need to generate the first adrp
935 instruction as the final cost for global variable access is
939 /* -fpic for -mcmodel=small allow 32K GOT table size (but we are
940 using the page base as GOT base, the first page may be wasted,
941 in the worst scenario, there is only 28K space for GOT).
943 The generate instruction sequence for accessing global variable
946 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym]
948 Only one instruction needed. But we must initialize
949 pic_offset_table_rtx properly. We generate initialize insn for
950 every global access, and allow CSE to remove all redundant.
952 The final instruction sequences will look like the following
953 for multiply global variables access.
955 adrp pic_offset_table_rtx, _GLOBAL_OFFSET_TABLE_
957 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym1]
958 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym2]
959 ldr reg, [pic_offset_table_rtx, #:gotpage_lo15:sym3]
962 rtx s
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
963 crtl
->uses_pic_offset_table
= 1;
964 emit_move_insn (gp_rtx
, gen_rtx_HIGH (Pmode
, s
));
966 if (mode
!= GET_MODE (gp_rtx
))
967 gp_rtx
= simplify_gen_subreg (mode
, gp_rtx
, GET_MODE (gp_rtx
), 0);
970 if (mode
== ptr_mode
)
973 insn
= gen_ldr_got_small_28k_di (dest
, gp_rtx
, imm
);
975 insn
= gen_ldr_got_small_28k_si (dest
, gp_rtx
, imm
);
977 mem
= XVECEXP (SET_SRC (insn
), 0, 0);
981 gcc_assert (mode
== Pmode
);
983 insn
= gen_ldr_got_small_28k_sidi (dest
, gp_rtx
, imm
);
984 mem
= XVECEXP (XEXP (SET_SRC (insn
), 0), 0, 0);
987 /* The operand is expected to be MEM. Whenever the related insn
988 pattern changed, above code which calculate mem should be
990 gcc_assert (GET_CODE (mem
) == MEM
);
991 MEM_READONLY_P (mem
) = 1;
992 MEM_NOTRAP_P (mem
) = 1;
997 case SYMBOL_SMALL_GOT_4G
:
999 /* In ILP32, the mode of dest can be either SImode or DImode,
1000 while the got entry is always of SImode size. The mode of
1001 dest depends on how dest is used: if dest is assigned to a
1002 pointer (e.g. in the memory), it has SImode; it may have
1003 DImode if dest is dereferenced to access the memeory.
1004 This is why we have to handle three different ldr_got_small
1005 patterns here (two patterns for ILP32). */
1010 machine_mode mode
= GET_MODE (dest
);
1012 if (can_create_pseudo_p ())
1013 tmp_reg
= gen_reg_rtx (mode
);
1015 emit_move_insn (tmp_reg
, gen_rtx_HIGH (mode
, imm
));
1016 if (mode
== ptr_mode
)
1019 insn
= gen_ldr_got_small_di (dest
, tmp_reg
, imm
);
1021 insn
= gen_ldr_got_small_si (dest
, tmp_reg
, imm
);
1023 mem
= XVECEXP (SET_SRC (insn
), 0, 0);
1027 gcc_assert (mode
== Pmode
);
1029 insn
= gen_ldr_got_small_sidi (dest
, tmp_reg
, imm
);
1030 mem
= XVECEXP (XEXP (SET_SRC (insn
), 0), 0, 0);
1033 gcc_assert (GET_CODE (mem
) == MEM
);
1034 MEM_READONLY_P (mem
) = 1;
1035 MEM_NOTRAP_P (mem
) = 1;
1040 case SYMBOL_SMALL_TLSGD
:
1043 rtx result
= gen_rtx_REG (Pmode
, R0_REGNUM
);
1046 aarch64_emit_call_insn (gen_tlsgd_small (result
, imm
));
1047 insns
= get_insns ();
1050 RTL_CONST_CALL_P (insns
) = 1;
1051 emit_libcall_block (insns
, dest
, result
, imm
);
1055 case SYMBOL_SMALL_TLSDESC
:
1057 machine_mode mode
= GET_MODE (dest
);
1058 rtx x0
= gen_rtx_REG (mode
, R0_REGNUM
);
1061 gcc_assert (mode
== Pmode
|| mode
== ptr_mode
);
1063 /* In ILP32, the got entry is always of SImode size. Unlike
1064 small GOT, the dest is fixed at reg 0. */
1066 emit_insn (gen_tlsdesc_small_si (imm
));
1068 emit_insn (gen_tlsdesc_small_di (imm
));
1069 tp
= aarch64_load_tp (NULL
);
1072 tp
= gen_lowpart (mode
, tp
);
1074 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, x0
)));
1075 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1079 case SYMBOL_SMALL_TLSIE
:
1081 /* In ILP32, the mode of dest can be either SImode or DImode,
1082 while the got entry is always of SImode size. The mode of
1083 dest depends on how dest is used: if dest is assigned to a
1084 pointer (e.g. in the memory), it has SImode; it may have
1085 DImode if dest is dereferenced to access the memeory.
1086 This is why we have to handle three different tlsie_small
1087 patterns here (two patterns for ILP32). */
1088 machine_mode mode
= GET_MODE (dest
);
1089 rtx tmp_reg
= gen_reg_rtx (mode
);
1090 rtx tp
= aarch64_load_tp (NULL
);
1092 if (mode
== ptr_mode
)
1095 emit_insn (gen_tlsie_small_di (tmp_reg
, imm
));
1098 emit_insn (gen_tlsie_small_si (tmp_reg
, imm
));
1099 tp
= gen_lowpart (mode
, tp
);
1104 gcc_assert (mode
== Pmode
);
1105 emit_insn (gen_tlsie_small_sidi (tmp_reg
, imm
));
1108 emit_insn (gen_rtx_SET (dest
, gen_rtx_PLUS (mode
, tp
, tmp_reg
)));
1109 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1113 case SYMBOL_TLSLE12
:
1114 case SYMBOL_TLSLE24
:
1115 case SYMBOL_TLSLE32
:
1116 case SYMBOL_TLSLE48
:
1118 machine_mode mode
= GET_MODE (dest
);
1119 rtx tp
= aarch64_load_tp (NULL
);
1122 tp
= gen_lowpart (mode
, tp
);
1126 case SYMBOL_TLSLE12
:
1127 emit_insn ((mode
== DImode
? gen_tlsle12_di
: gen_tlsle12_si
)
1130 case SYMBOL_TLSLE24
:
1131 emit_insn ((mode
== DImode
? gen_tlsle24_di
: gen_tlsle24_si
)
1134 case SYMBOL_TLSLE32
:
1135 emit_insn ((mode
== DImode
? gen_tlsle32_di
: gen_tlsle32_si
)
1137 emit_insn ((mode
== DImode
? gen_adddi3
: gen_addsi3
)
1140 case SYMBOL_TLSLE48
:
1141 emit_insn ((mode
== DImode
? gen_tlsle48_di
: gen_tlsle48_si
)
1143 emit_insn ((mode
== DImode
? gen_adddi3
: gen_addsi3
)
1150 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1154 case SYMBOL_TINY_GOT
:
1155 emit_insn (gen_ldr_got_tiny (dest
, imm
));
1158 case SYMBOL_TINY_TLSIE
:
1160 machine_mode mode
= GET_MODE (dest
);
1161 rtx tp
= aarch64_load_tp (NULL
);
1163 if (mode
== ptr_mode
)
1166 emit_insn (gen_tlsie_tiny_di (dest
, imm
, tp
));
1169 tp
= gen_lowpart (mode
, tp
);
1170 emit_insn (gen_tlsie_tiny_si (dest
, imm
, tp
));
1175 gcc_assert (mode
== Pmode
);
1176 emit_insn (gen_tlsie_tiny_sidi (dest
, imm
, tp
));
1179 set_unique_reg_note (get_last_insn (), REG_EQUIV
, imm
);
1188 /* Emit a move from SRC to DEST. Assume that the move expanders can
1189 handle all moves if !can_create_pseudo_p (). The distinction is
1190 important because, unlike emit_move_insn, the move expanders know
1191 how to force Pmode objects into the constant pool even when the
1192 constant pool address is not itself legitimate. */
1194 aarch64_emit_move (rtx dest
, rtx src
)
1196 return (can_create_pseudo_p ()
1197 ? emit_move_insn (dest
, src
)
1198 : emit_move_insn_1 (dest
, src
));
1201 /* Split a 128-bit move operation into two 64-bit move operations,
1202 taking care to handle partial overlap of register to register
1203 copies. Special cases are needed when moving between GP regs and
1204 FP regs. SRC can be a register, constant or memory; DST a register
1205 or memory. If either operand is memory it must not have any side
1208 aarch64_split_128bit_move (rtx dst
, rtx src
)
1213 machine_mode mode
= GET_MODE (dst
);
1215 gcc_assert (mode
== TImode
|| mode
== TFmode
);
1216 gcc_assert (!(side_effects_p (src
) || side_effects_p (dst
)));
1217 gcc_assert (mode
== GET_MODE (src
) || GET_MODE (src
) == VOIDmode
);
1219 if (REG_P (dst
) && REG_P (src
))
1221 int src_regno
= REGNO (src
);
1222 int dst_regno
= REGNO (dst
);
1224 /* Handle FP <-> GP regs. */
1225 if (FP_REGNUM_P (dst_regno
) && GP_REGNUM_P (src_regno
))
1227 src_lo
= gen_lowpart (word_mode
, src
);
1228 src_hi
= gen_highpart (word_mode
, src
);
1232 emit_insn (gen_aarch64_movtilow_di (dst
, src_lo
));
1233 emit_insn (gen_aarch64_movtihigh_di (dst
, src_hi
));
1237 emit_insn (gen_aarch64_movtflow_di (dst
, src_lo
));
1238 emit_insn (gen_aarch64_movtfhigh_di (dst
, src_hi
));
1242 else if (GP_REGNUM_P (dst_regno
) && FP_REGNUM_P (src_regno
))
1244 dst_lo
= gen_lowpart (word_mode
, dst
);
1245 dst_hi
= gen_highpart (word_mode
, dst
);
1249 emit_insn (gen_aarch64_movdi_tilow (dst_lo
, src
));
1250 emit_insn (gen_aarch64_movdi_tihigh (dst_hi
, src
));
1254 emit_insn (gen_aarch64_movdi_tflow (dst_lo
, src
));
1255 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi
, src
));
1261 dst_lo
= gen_lowpart (word_mode
, dst
);
1262 dst_hi
= gen_highpart (word_mode
, dst
);
1263 src_lo
= gen_lowpart (word_mode
, src
);
1264 src_hi
= gen_highpart_mode (word_mode
, mode
, src
);
1266 /* At most one pairing may overlap. */
1267 if (reg_overlap_mentioned_p (dst_lo
, src_hi
))
1269 aarch64_emit_move (dst_hi
, src_hi
);
1270 aarch64_emit_move (dst_lo
, src_lo
);
1274 aarch64_emit_move (dst_lo
, src_lo
);
1275 aarch64_emit_move (dst_hi
, src_hi
);
1280 aarch64_split_128bit_move_p (rtx dst
, rtx src
)
1282 return (! REG_P (src
)
1283 || ! (FP_REGNUM_P (REGNO (dst
)) && FP_REGNUM_P (REGNO (src
))));
1286 /* Split a complex SIMD combine. */
1289 aarch64_split_simd_combine (rtx dst
, rtx src1
, rtx src2
)
1291 machine_mode src_mode
= GET_MODE (src1
);
1292 machine_mode dst_mode
= GET_MODE (dst
);
1294 gcc_assert (VECTOR_MODE_P (dst_mode
));
1296 if (REG_P (dst
) && REG_P (src1
) && REG_P (src2
))
1298 rtx (*gen
) (rtx
, rtx
, rtx
);
1303 gen
= gen_aarch64_simd_combinev8qi
;
1306 gen
= gen_aarch64_simd_combinev4hi
;
1309 gen
= gen_aarch64_simd_combinev2si
;
1312 gen
= gen_aarch64_simd_combinev4hf
;
1315 gen
= gen_aarch64_simd_combinev2sf
;
1318 gen
= gen_aarch64_simd_combinedi
;
1321 gen
= gen_aarch64_simd_combinedf
;
1327 emit_insn (gen (dst
, src1
, src2
));
1332 /* Split a complex SIMD move. */
1335 aarch64_split_simd_move (rtx dst
, rtx src
)
1337 machine_mode src_mode
= GET_MODE (src
);
1338 machine_mode dst_mode
= GET_MODE (dst
);
1340 gcc_assert (VECTOR_MODE_P (dst_mode
));
1342 if (REG_P (dst
) && REG_P (src
))
1344 rtx (*gen
) (rtx
, rtx
);
1346 gcc_assert (VECTOR_MODE_P (src_mode
));
1351 gen
= gen_aarch64_split_simd_movv16qi
;
1354 gen
= gen_aarch64_split_simd_movv8hi
;
1357 gen
= gen_aarch64_split_simd_movv4si
;
1360 gen
= gen_aarch64_split_simd_movv2di
;
1363 gen
= gen_aarch64_split_simd_movv8hf
;
1366 gen
= gen_aarch64_split_simd_movv4sf
;
1369 gen
= gen_aarch64_split_simd_movv2df
;
1375 emit_insn (gen (dst
, src
));
1381 aarch64_force_temporary (machine_mode mode
, rtx x
, rtx value
)
1383 if (can_create_pseudo_p ())
1384 return force_reg (mode
, value
);
1387 x
= aarch64_emit_move (x
, value
);
1394 aarch64_add_offset (machine_mode mode
, rtx temp
, rtx reg
, HOST_WIDE_INT offset
)
1396 if (!aarch64_plus_immediate (GEN_INT (offset
), mode
))
1399 /* Load the full offset into a register. This
1400 might be improvable in the future. */
1401 high
= GEN_INT (offset
);
1403 high
= aarch64_force_temporary (mode
, temp
, high
);
1404 reg
= aarch64_force_temporary (mode
, temp
,
1405 gen_rtx_PLUS (mode
, high
, reg
));
1407 return plus_constant (mode
, reg
, offset
);
1411 aarch64_internal_mov_immediate (rtx dest
, rtx imm
, bool generate
,
1415 unsigned HOST_WIDE_INT val
, val2
, mask
;
1416 int one_match
, zero_match
;
1421 if (aarch64_move_imm (val
, mode
))
1424 emit_insn (gen_rtx_SET (dest
, imm
));
1428 if ((val
>> 32) == 0 || mode
== SImode
)
1432 emit_insn (gen_rtx_SET (dest
, GEN_INT (val
& 0xffff)));
1434 emit_insn (gen_insv_immsi (dest
, GEN_INT (16),
1435 GEN_INT ((val
>> 16) & 0xffff)));
1437 emit_insn (gen_insv_immdi (dest
, GEN_INT (16),
1438 GEN_INT ((val
>> 16) & 0xffff)));
1443 /* Remaining cases are all for DImode. */
1446 zero_match
= ((val
& mask
) == 0) + ((val
& (mask
<< 16)) == 0) +
1447 ((val
& (mask
<< 32)) == 0) + ((val
& (mask
<< 48)) == 0);
1448 one_match
= ((~val
& mask
) == 0) + ((~val
& (mask
<< 16)) == 0) +
1449 ((~val
& (mask
<< 32)) == 0) + ((~val
& (mask
<< 48)) == 0);
1451 if (zero_match
!= 2 && one_match
!= 2)
1453 /* Try emitting a bitmask immediate with a movk replacing 16 bits.
1454 For a 64-bit bitmask try whether changing 16 bits to all ones or
1455 zeroes creates a valid bitmask. To check any repeated bitmask,
1456 try using 16 bits from the other 32-bit half of val. */
1458 for (i
= 0; i
< 64; i
+= 16, mask
<<= 16)
1461 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1464 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1466 val2
= val2
& ~mask
;
1467 val2
= val2
| (((val2
>> 32) | (val2
<< 32)) & mask
);
1468 if (val2
!= val
&& aarch64_bitmask_imm (val2
, mode
))
1475 emit_insn (gen_rtx_SET (dest
, GEN_INT (val2
)));
1476 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1477 GEN_INT ((val
>> i
) & 0xffff)));
1482 /* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
1483 are emitted by the initial mov. If one_match > zero_match, skip set bits,
1484 otherwise skip zero bits. */
1488 val2
= one_match
> zero_match
? ~val
: val
;
1489 i
= (val2
& mask
) != 0 ? 0 : (val2
& (mask
<< 16)) != 0 ? 16 : 32;
1492 emit_insn (gen_rtx_SET (dest
, GEN_INT (one_match
> zero_match
1493 ? (val
| ~(mask
<< i
))
1494 : (val
& (mask
<< i
)))));
1495 for (i
+= 16; i
< 64; i
+= 16)
1497 if ((val2
& (mask
<< i
)) == 0)
1500 emit_insn (gen_insv_immdi (dest
, GEN_INT (i
),
1501 GEN_INT ((val
>> i
) & 0xffff)));
1510 aarch64_expand_mov_immediate (rtx dest
, rtx imm
)
1512 machine_mode mode
= GET_MODE (dest
);
1514 gcc_assert (mode
== SImode
|| mode
== DImode
);
1516 /* Check on what type of symbol it is. */
1517 if (GET_CODE (imm
) == SYMBOL_REF
1518 || GET_CODE (imm
) == LABEL_REF
1519 || GET_CODE (imm
) == CONST
)
1521 rtx mem
, base
, offset
;
1522 enum aarch64_symbol_type sty
;
1524 /* If we have (const (plus symbol offset)), separate out the offset
1525 before we start classifying the symbol. */
1526 split_const (imm
, &base
, &offset
);
1528 sty
= aarch64_classify_symbol (base
, offset
);
1531 case SYMBOL_FORCE_TO_MEM
:
1532 if (offset
!= const0_rtx
1533 && targetm
.cannot_force_const_mem (mode
, imm
))
1535 gcc_assert (can_create_pseudo_p ());
1536 base
= aarch64_force_temporary (mode
, dest
, base
);
1537 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1538 aarch64_emit_move (dest
, base
);
1542 mem
= force_const_mem (ptr_mode
, imm
);
1545 /* If we aren't generating PC relative literals, then
1546 we need to expand the literal pool access carefully.
1547 This is something that needs to be done in a number
1548 of places, so could well live as a separate function. */
1549 if (aarch64_nopcrelative_literal_loads
)
1551 gcc_assert (can_create_pseudo_p ());
1552 base
= gen_reg_rtx (ptr_mode
);
1553 aarch64_expand_mov_immediate (base
, XEXP (mem
, 0));
1554 mem
= gen_rtx_MEM (ptr_mode
, base
);
1557 if (mode
!= ptr_mode
)
1558 mem
= gen_rtx_ZERO_EXTEND (mode
, mem
);
1560 emit_insn (gen_rtx_SET (dest
, mem
));
1564 case SYMBOL_SMALL_TLSGD
:
1565 case SYMBOL_SMALL_TLSDESC
:
1566 case SYMBOL_SMALL_TLSIE
:
1567 case SYMBOL_SMALL_GOT_28K
:
1568 case SYMBOL_SMALL_GOT_4G
:
1569 case SYMBOL_TINY_GOT
:
1570 case SYMBOL_TINY_TLSIE
:
1571 if (offset
!= const0_rtx
)
1573 gcc_assert(can_create_pseudo_p ());
1574 base
= aarch64_force_temporary (mode
, dest
, base
);
1575 base
= aarch64_add_offset (mode
, NULL
, base
, INTVAL (offset
));
1576 aarch64_emit_move (dest
, base
);
1581 case SYMBOL_SMALL_ABSOLUTE
:
1582 case SYMBOL_TINY_ABSOLUTE
:
1583 case SYMBOL_TLSLE12
:
1584 case SYMBOL_TLSLE24
:
1585 case SYMBOL_TLSLE32
:
1586 case SYMBOL_TLSLE48
:
1587 aarch64_load_symref_appropriately (dest
, imm
, sty
);
1595 if (!CONST_INT_P (imm
))
1597 if (GET_CODE (imm
) == HIGH
)
1598 emit_insn (gen_rtx_SET (dest
, imm
));
1601 rtx mem
= force_const_mem (mode
, imm
);
1603 emit_insn (gen_rtx_SET (dest
, mem
));
1609 aarch64_internal_mov_immediate (dest
, imm
, true, GET_MODE (dest
));
1613 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED
,
1614 tree exp ATTRIBUTE_UNUSED
)
1616 /* Currently, always true. */
1620 /* Implement TARGET_PASS_BY_REFERENCE. */
1623 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED
,
1626 bool named ATTRIBUTE_UNUSED
)
1629 machine_mode dummymode
;
1632 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1633 size
= (mode
== BLKmode
&& type
)
1634 ? int_size_in_bytes (type
) : (int) GET_MODE_SIZE (mode
);
1636 /* Aggregates are passed by reference based on their size. */
1637 if (type
&& AGGREGATE_TYPE_P (type
))
1639 size
= int_size_in_bytes (type
);
1642 /* Variable sized arguments are always returned by reference. */
1646 /* Can this be a candidate to be passed in fp/simd register(s)? */
1647 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1652 /* Arguments which are variable sized or larger than 2 registers are
1653 passed by reference unless they are a homogenous floating point
1655 return size
> 2 * UNITS_PER_WORD
;
1658 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1660 aarch64_return_in_msb (const_tree valtype
)
1662 machine_mode dummy_mode
;
1665 /* Never happens in little-endian mode. */
1666 if (!BYTES_BIG_ENDIAN
)
1669 /* Only composite types smaller than or equal to 16 bytes can
1670 be potentially returned in registers. */
1671 if (!aarch64_composite_type_p (valtype
, TYPE_MODE (valtype
))
1672 || int_size_in_bytes (valtype
) <= 0
1673 || int_size_in_bytes (valtype
) > 16)
1676 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1677 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1678 is always passed/returned in the least significant bits of fp/simd
1680 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype
), valtype
,
1681 &dummy_mode
, &dummy_int
, NULL
))
1687 /* Implement TARGET_FUNCTION_VALUE.
1688 Define how to find the value returned by a function. */
1691 aarch64_function_value (const_tree type
, const_tree func
,
1692 bool outgoing ATTRIBUTE_UNUSED
)
1697 machine_mode ag_mode
;
1699 mode
= TYPE_MODE (type
);
1700 if (INTEGRAL_TYPE_P (type
))
1701 mode
= promote_function_mode (type
, mode
, &unsignedp
, func
, 1);
1703 if (aarch64_return_in_msb (type
))
1705 HOST_WIDE_INT size
= int_size_in_bytes (type
);
1707 if (size
% UNITS_PER_WORD
!= 0)
1709 size
+= UNITS_PER_WORD
- size
% UNITS_PER_WORD
;
1710 mode
= mode_for_size (size
* BITS_PER_UNIT
, MODE_INT
, 0);
1714 if (aarch64_vfp_is_call_or_return_candidate (mode
, type
,
1715 &ag_mode
, &count
, NULL
))
1717 if (!aarch64_composite_type_p (type
, mode
))
1719 gcc_assert (count
== 1 && mode
== ag_mode
);
1720 return gen_rtx_REG (mode
, V0_REGNUM
);
1727 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (count
));
1728 for (i
= 0; i
< count
; i
++)
1730 rtx tmp
= gen_rtx_REG (ag_mode
, V0_REGNUM
+ i
);
1731 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1732 GEN_INT (i
* GET_MODE_SIZE (ag_mode
)));
1733 XVECEXP (par
, 0, i
) = tmp
;
1739 return gen_rtx_REG (mode
, R0_REGNUM
);
1742 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1743 Return true if REGNO is the number of a hard register in which the values
1744 of called function may come back. */
1747 aarch64_function_value_regno_p (const unsigned int regno
)
1749 /* Maximum of 16 bytes can be returned in the general registers. Examples
1750 of 16-byte return values are: 128-bit integers and 16-byte small
1751 structures (excluding homogeneous floating-point aggregates). */
1752 if (regno
== R0_REGNUM
|| regno
== R1_REGNUM
)
1755 /* Up to four fp/simd registers can return a function value, e.g. a
1756 homogeneous floating-point aggregate having four members. */
1757 if (regno
>= V0_REGNUM
&& regno
< V0_REGNUM
+ HA_MAX_NUM_FLDS
)
1758 return TARGET_FLOAT
;
1763 /* Implement TARGET_RETURN_IN_MEMORY.
1765 If the type T of the result of a function is such that
1767 would require that arg be passed as a value in a register (or set of
1768 registers) according to the parameter passing rules, then the result
1769 is returned in the same registers as would be used for such an
1773 aarch64_return_in_memory (const_tree type
, const_tree fndecl ATTRIBUTE_UNUSED
)
1776 machine_mode ag_mode
;
1779 if (!AGGREGATE_TYPE_P (type
)
1780 && TREE_CODE (type
) != COMPLEX_TYPE
1781 && TREE_CODE (type
) != VECTOR_TYPE
)
1782 /* Simple scalar types always returned in registers. */
1785 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
),
1792 /* Types larger than 2 registers returned in memory. */
1793 size
= int_size_in_bytes (type
);
1794 return (size
< 0 || size
> 2 * UNITS_PER_WORD
);
1798 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v
, machine_mode mode
,
1799 const_tree type
, int *nregs
)
1801 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1802 return aarch64_vfp_is_call_or_return_candidate (mode
,
1804 &pcum
->aapcs_vfp_rmode
,
1809 /* Given MODE and TYPE of a function argument, return the alignment in
1810 bits. The idea is to suppress any stronger alignment requested by
1811 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1812 This is a helper function for local use only. */
1815 aarch64_function_arg_alignment (machine_mode mode
, const_tree type
)
1817 unsigned int alignment
;
1821 if (!integer_zerop (TYPE_SIZE (type
)))
1823 if (TYPE_MODE (type
) == mode
)
1824 alignment
= TYPE_ALIGN (type
);
1826 alignment
= GET_MODE_ALIGNMENT (mode
);
1832 alignment
= GET_MODE_ALIGNMENT (mode
);
1837 /* Layout a function argument according to the AAPCS64 rules. The rule
1838 numbers refer to the rule numbers in the AAPCS64. */
1841 aarch64_layout_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1843 bool named ATTRIBUTE_UNUSED
)
1845 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1846 int ncrn
, nvrn
, nregs
;
1847 bool allocate_ncrn
, allocate_nvrn
;
1850 /* We need to do this once per argument. */
1851 if (pcum
->aapcs_arg_processed
)
1854 pcum
->aapcs_arg_processed
= true;
1856 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1858 = ROUND_UP (type
? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
),
1861 allocate_ncrn
= (type
) ? !(FLOAT_TYPE_P (type
)) : !FLOAT_MODE_P (mode
);
1862 allocate_nvrn
= aarch64_vfp_is_call_candidate (pcum_v
,
1867 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1868 The following code thus handles passing by SIMD/FP registers first. */
1870 nvrn
= pcum
->aapcs_nvrn
;
1872 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1873 and homogenous short-vector aggregates (HVA). */
1877 aarch64_err_no_fpadvsimd (mode
, "argument");
1879 if (nvrn
+ nregs
<= NUM_FP_ARG_REGS
)
1881 pcum
->aapcs_nextnvrn
= nvrn
+ nregs
;
1882 if (!aarch64_composite_type_p (type
, mode
))
1884 gcc_assert (nregs
== 1);
1885 pcum
->aapcs_reg
= gen_rtx_REG (mode
, V0_REGNUM
+ nvrn
);
1891 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1892 for (i
= 0; i
< nregs
; i
++)
1894 rtx tmp
= gen_rtx_REG (pcum
->aapcs_vfp_rmode
,
1895 V0_REGNUM
+ nvrn
+ i
);
1896 tmp
= gen_rtx_EXPR_LIST
1898 GEN_INT (i
* GET_MODE_SIZE (pcum
->aapcs_vfp_rmode
)));
1899 XVECEXP (par
, 0, i
) = tmp
;
1901 pcum
->aapcs_reg
= par
;
1907 /* C.3 NSRN is set to 8. */
1908 pcum
->aapcs_nextnvrn
= NUM_FP_ARG_REGS
;
1913 ncrn
= pcum
->aapcs_ncrn
;
1914 nregs
= size
/ UNITS_PER_WORD
;
1916 /* C6 - C9. though the sign and zero extension semantics are
1917 handled elsewhere. This is the case where the argument fits
1918 entirely general registers. */
1919 if (allocate_ncrn
&& (ncrn
+ nregs
<= NUM_ARG_REGS
))
1921 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
1923 gcc_assert (nregs
== 0 || nregs
== 1 || nregs
== 2);
1925 /* C.8 if the argument has an alignment of 16 then the NGRN is
1926 rounded up to the next even number. */
1927 if (nregs
== 2 && alignment
== 16 * BITS_PER_UNIT
&& ncrn
% 2)
1930 gcc_assert (ncrn
+ nregs
<= NUM_ARG_REGS
);
1932 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1933 A reg is still generated for it, but the caller should be smart
1934 enough not to use it. */
1935 if (nregs
== 0 || nregs
== 1 || GET_MODE_CLASS (mode
) == MODE_INT
)
1937 pcum
->aapcs_reg
= gen_rtx_REG (mode
, R0_REGNUM
+ ncrn
);
1944 par
= gen_rtx_PARALLEL (mode
, rtvec_alloc (nregs
));
1945 for (i
= 0; i
< nregs
; i
++)
1947 rtx tmp
= gen_rtx_REG (word_mode
, R0_REGNUM
+ ncrn
+ i
);
1948 tmp
= gen_rtx_EXPR_LIST (VOIDmode
, tmp
,
1949 GEN_INT (i
* UNITS_PER_WORD
));
1950 XVECEXP (par
, 0, i
) = tmp
;
1952 pcum
->aapcs_reg
= par
;
1955 pcum
->aapcs_nextncrn
= ncrn
+ nregs
;
1960 pcum
->aapcs_nextncrn
= NUM_ARG_REGS
;
1962 /* The argument is passed on stack; record the needed number of words for
1963 this argument and align the total size if necessary. */
1965 pcum
->aapcs_stack_words
= size
/ UNITS_PER_WORD
;
1966 if (aarch64_function_arg_alignment (mode
, type
) == 16 * BITS_PER_UNIT
)
1967 pcum
->aapcs_stack_size
= ROUND_UP (pcum
->aapcs_stack_size
,
1968 16 / UNITS_PER_WORD
);
1972 /* Implement TARGET_FUNCTION_ARG. */
1975 aarch64_function_arg (cumulative_args_t pcum_v
, machine_mode mode
,
1976 const_tree type
, bool named
)
1978 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
1979 gcc_assert (pcum
->pcs_variant
== ARM_PCS_AAPCS64
);
1981 if (mode
== VOIDmode
)
1984 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
1985 return pcum
->aapcs_reg
;
1989 aarch64_init_cumulative_args (CUMULATIVE_ARGS
*pcum
,
1990 const_tree fntype ATTRIBUTE_UNUSED
,
1991 rtx libname ATTRIBUTE_UNUSED
,
1992 const_tree fndecl ATTRIBUTE_UNUSED
,
1993 unsigned n_named ATTRIBUTE_UNUSED
)
1995 pcum
->aapcs_ncrn
= 0;
1996 pcum
->aapcs_nvrn
= 0;
1997 pcum
->aapcs_nextncrn
= 0;
1998 pcum
->aapcs_nextnvrn
= 0;
1999 pcum
->pcs_variant
= ARM_PCS_AAPCS64
;
2000 pcum
->aapcs_reg
= NULL_RTX
;
2001 pcum
->aapcs_arg_processed
= false;
2002 pcum
->aapcs_stack_words
= 0;
2003 pcum
->aapcs_stack_size
= 0;
2006 && fndecl
&& TREE_PUBLIC (fndecl
)
2007 && fntype
&& fntype
!= error_mark_node
)
2009 const_tree type
= TREE_TYPE (fntype
);
2010 machine_mode mode ATTRIBUTE_UNUSED
; /* To pass pointer as argument. */
2011 int nregs ATTRIBUTE_UNUSED
; /* Likewise. */
2012 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type
), type
,
2013 &mode
, &nregs
, NULL
))
2014 aarch64_err_no_fpadvsimd (TYPE_MODE (type
), "return type");
2020 aarch64_function_arg_advance (cumulative_args_t pcum_v
,
2025 CUMULATIVE_ARGS
*pcum
= get_cumulative_args (pcum_v
);
2026 if (pcum
->pcs_variant
== ARM_PCS_AAPCS64
)
2028 aarch64_layout_arg (pcum_v
, mode
, type
, named
);
2029 gcc_assert ((pcum
->aapcs_reg
!= NULL_RTX
)
2030 != (pcum
->aapcs_stack_words
!= 0));
2031 pcum
->aapcs_arg_processed
= false;
2032 pcum
->aapcs_ncrn
= pcum
->aapcs_nextncrn
;
2033 pcum
->aapcs_nvrn
= pcum
->aapcs_nextnvrn
;
2034 pcum
->aapcs_stack_size
+= pcum
->aapcs_stack_words
;
2035 pcum
->aapcs_stack_words
= 0;
2036 pcum
->aapcs_reg
= NULL_RTX
;
2041 aarch64_function_arg_regno_p (unsigned regno
)
2043 return ((GP_REGNUM_P (regno
) && regno
< R0_REGNUM
+ NUM_ARG_REGS
)
2044 || (FP_REGNUM_P (regno
) && regno
< V0_REGNUM
+ NUM_FP_ARG_REGS
));
2047 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
2048 PARM_BOUNDARY bits of alignment, but will be given anything up
2049 to STACK_BOUNDARY bits if the type requires it. This makes sure
2050 that both before and after the layout of each argument, the Next
2051 Stacked Argument Address (NSAA) will have a minimum alignment of
2055 aarch64_function_arg_boundary (machine_mode mode
, const_tree type
)
2057 unsigned int alignment
= aarch64_function_arg_alignment (mode
, type
);
2059 if (alignment
< PARM_BOUNDARY
)
2060 alignment
= PARM_BOUNDARY
;
2061 if (alignment
> STACK_BOUNDARY
)
2062 alignment
= STACK_BOUNDARY
;
2066 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
2068 Return true if an argument passed on the stack should be padded upwards,
2069 i.e. if the least-significant byte of the stack slot has useful data.
2071 Small aggregate types are placed in the lowest memory address.
2073 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
2076 aarch64_pad_arg_upward (machine_mode mode
, const_tree type
)
2078 /* On little-endian targets, the least significant byte of every stack
2079 argument is passed at the lowest byte address of the stack slot. */
2080 if (!BYTES_BIG_ENDIAN
)
2083 /* Otherwise, integral, floating-point and pointer types are padded downward:
2084 the least significant byte of a stack argument is passed at the highest
2085 byte address of the stack slot. */
2087 ? (INTEGRAL_TYPE_P (type
) || SCALAR_FLOAT_TYPE_P (type
)
2088 || POINTER_TYPE_P (type
))
2089 : (SCALAR_INT_MODE_P (mode
) || SCALAR_FLOAT_MODE_P (mode
)))
2092 /* Everything else padded upward, i.e. data in first byte of stack slot. */
2096 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
2098 It specifies padding for the last (may also be the only)
2099 element of a block move between registers and memory. If
2100 assuming the block is in the memory, padding upward means that
2101 the last element is padded after its highest significant byte,
2102 while in downward padding, the last element is padded at the
2103 its least significant byte side.
2105 Small aggregates and small complex types are always padded
2108 We don't need to worry about homogeneous floating-point or
2109 short-vector aggregates; their move is not affected by the
2110 padding direction determined here. Regardless of endianness,
2111 each element of such an aggregate is put in the least
2112 significant bits of a fp/simd register.
2114 Return !BYTES_BIG_ENDIAN if the least significant byte of the
2115 register has useful data, and return the opposite if the most
2116 significant byte does. */
2119 aarch64_pad_reg_upward (machine_mode mode
, const_tree type
,
2120 bool first ATTRIBUTE_UNUSED
)
2123 /* Small composite types are always padded upward. */
2124 if (BYTES_BIG_ENDIAN
&& aarch64_composite_type_p (type
, mode
))
2126 HOST_WIDE_INT size
= (type
? int_size_in_bytes (type
)
2127 : GET_MODE_SIZE (mode
));
2128 if (size
< 2 * UNITS_PER_WORD
)
2132 /* Otherwise, use the default padding. */
2133 return !BYTES_BIG_ENDIAN
;
2137 aarch64_libgcc_cmp_return_mode (void)
2143 aarch64_frame_pointer_required (void)
2145 /* In aarch64_override_options_after_change
2146 flag_omit_leaf_frame_pointer turns off the frame pointer by
2147 default. Turn it back on now if we've not got a leaf
2149 if (flag_omit_leaf_frame_pointer
2150 && (!crtl
->is_leaf
|| df_regs_ever_live_p (LR_REGNUM
)))
2156 /* Mark the registers that need to be saved by the callee and calculate
2157 the size of the callee-saved registers area and frame record (both FP
2158 and LR may be omitted). */
2160 aarch64_layout_frame (void)
2162 HOST_WIDE_INT offset
= 0;
2165 if (reload_completed
&& cfun
->machine
->frame
.laid_out
)
2168 #define SLOT_NOT_REQUIRED (-2)
2169 #define SLOT_REQUIRED (-1)
2171 cfun
->machine
->frame
.wb_candidate1
= FIRST_PSEUDO_REGISTER
;
2172 cfun
->machine
->frame
.wb_candidate2
= FIRST_PSEUDO_REGISTER
;
2174 /* First mark all the registers that really need to be saved... */
2175 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2176 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2178 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2179 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_NOT_REQUIRED
;
2181 /* ... that includes the eh data registers (if needed)... */
2182 if (crtl
->calls_eh_return
)
2183 for (regno
= 0; EH_RETURN_DATA_REGNO (regno
) != INVALID_REGNUM
; regno
++)
2184 cfun
->machine
->frame
.reg_offset
[EH_RETURN_DATA_REGNO (regno
)]
2187 /* ... and any callee saved register that dataflow says is live. */
2188 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2189 if (df_regs_ever_live_p (regno
)
2190 && (regno
== R30_REGNUM
2191 || !call_used_regs
[regno
]))
2192 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2194 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2195 if (df_regs_ever_live_p (regno
)
2196 && !call_used_regs
[regno
])
2197 cfun
->machine
->frame
.reg_offset
[regno
] = SLOT_REQUIRED
;
2199 if (frame_pointer_needed
)
2201 /* FP and LR are placed in the linkage record. */
2202 cfun
->machine
->frame
.reg_offset
[R29_REGNUM
] = 0;
2203 cfun
->machine
->frame
.wb_candidate1
= R29_REGNUM
;
2204 cfun
->machine
->frame
.reg_offset
[R30_REGNUM
] = UNITS_PER_WORD
;
2205 cfun
->machine
->frame
.wb_candidate2
= R30_REGNUM
;
2206 cfun
->machine
->frame
.hardfp_offset
= 2 * UNITS_PER_WORD
;
2207 offset
+= 2 * UNITS_PER_WORD
;
2210 /* Now assign stack slots for them. */
2211 for (regno
= R0_REGNUM
; regno
<= R30_REGNUM
; regno
++)
2212 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2214 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2215 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2216 cfun
->machine
->frame
.wb_candidate1
= regno
;
2217 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
)
2218 cfun
->machine
->frame
.wb_candidate2
= regno
;
2219 offset
+= UNITS_PER_WORD
;
2222 for (regno
= V0_REGNUM
; regno
<= V31_REGNUM
; regno
++)
2223 if (cfun
->machine
->frame
.reg_offset
[regno
] == SLOT_REQUIRED
)
2225 cfun
->machine
->frame
.reg_offset
[regno
] = offset
;
2226 if (cfun
->machine
->frame
.wb_candidate1
== FIRST_PSEUDO_REGISTER
)
2227 cfun
->machine
->frame
.wb_candidate1
= regno
;
2228 else if (cfun
->machine
->frame
.wb_candidate2
== FIRST_PSEUDO_REGISTER
2229 && cfun
->machine
->frame
.wb_candidate1
>= V0_REGNUM
)
2230 cfun
->machine
->frame
.wb_candidate2
= regno
;
2231 offset
+= UNITS_PER_WORD
;
2234 cfun
->machine
->frame
.padding0
=
2235 (ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
) - offset
);
2236 offset
= ROUND_UP (offset
, STACK_BOUNDARY
/ BITS_PER_UNIT
);
2238 cfun
->machine
->frame
.saved_regs_size
= offset
;
2240 cfun
->machine
->frame
.hard_fp_offset
2241 = ROUND_UP (cfun
->machine
->frame
.saved_varargs_size
2243 + cfun
->machine
->frame
.saved_regs_size
,
2244 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2246 cfun
->machine
->frame
.frame_size
2247 = ROUND_UP (cfun
->machine
->frame
.hard_fp_offset
2248 + crtl
->outgoing_args_size
,
2249 STACK_BOUNDARY
/ BITS_PER_UNIT
);
2251 cfun
->machine
->frame
.laid_out
= true;
2255 aarch64_register_saved_on_entry (int regno
)
2257 return cfun
->machine
->frame
.reg_offset
[regno
] >= 0;
2261 aarch64_next_callee_save (unsigned regno
, unsigned limit
)
2263 while (regno
<= limit
&& !aarch64_register_saved_on_entry (regno
))
2269 aarch64_pushwb_single_reg (machine_mode mode
, unsigned regno
,
2270 HOST_WIDE_INT adjustment
)
2272 rtx base_rtx
= stack_pointer_rtx
;
2275 reg
= gen_rtx_REG (mode
, regno
);
2276 mem
= gen_rtx_PRE_MODIFY (Pmode
, base_rtx
,
2277 plus_constant (Pmode
, base_rtx
, -adjustment
));
2278 mem
= gen_rtx_MEM (mode
, mem
);
2280 insn
= emit_move_insn (mem
, reg
);
2281 RTX_FRAME_RELATED_P (insn
) = 1;
2285 aarch64_gen_storewb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2286 HOST_WIDE_INT adjustment
)
2291 return gen_storewb_pairdi_di (base
, base
, reg
, reg2
,
2292 GEN_INT (-adjustment
),
2293 GEN_INT (UNITS_PER_WORD
- adjustment
));
2295 return gen_storewb_pairdf_di (base
, base
, reg
, reg2
,
2296 GEN_INT (-adjustment
),
2297 GEN_INT (UNITS_PER_WORD
- adjustment
));
2304 aarch64_pushwb_pair_reg (machine_mode mode
, unsigned regno1
,
2305 unsigned regno2
, HOST_WIDE_INT adjustment
)
2308 rtx reg1
= gen_rtx_REG (mode
, regno1
);
2309 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2311 insn
= emit_insn (aarch64_gen_storewb_pair (mode
, stack_pointer_rtx
, reg1
,
2313 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 2)) = 1;
2314 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2315 RTX_FRAME_RELATED_P (insn
) = 1;
2319 aarch64_gen_loadwb_pair (machine_mode mode
, rtx base
, rtx reg
, rtx reg2
,
2320 HOST_WIDE_INT adjustment
)
2325 return gen_loadwb_pairdi_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2326 GEN_INT (UNITS_PER_WORD
));
2328 return gen_loadwb_pairdf_di (base
, base
, reg
, reg2
, GEN_INT (adjustment
),
2329 GEN_INT (UNITS_PER_WORD
));
2336 aarch64_gen_store_pair (machine_mode mode
, rtx mem1
, rtx reg1
, rtx mem2
,
2342 return gen_store_pairdi (mem1
, reg1
, mem2
, reg2
);
2345 return gen_store_pairdf (mem1
, reg1
, mem2
, reg2
);
2353 aarch64_gen_load_pair (machine_mode mode
, rtx reg1
, rtx mem1
, rtx reg2
,
2359 return gen_load_pairdi (reg1
, mem1
, reg2
, mem2
);
2362 return gen_load_pairdf (reg1
, mem1
, reg2
, mem2
);
2371 aarch64_save_callee_saves (machine_mode mode
, HOST_WIDE_INT start_offset
,
2372 unsigned start
, unsigned limit
, bool skip_wb
)
2375 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2376 ? gen_frame_mem
: gen_rtx_MEM
);
2380 for (regno
= aarch64_next_callee_save (start
, limit
);
2382 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2385 HOST_WIDE_INT offset
;
2388 && (regno
== cfun
->machine
->frame
.wb_candidate1
2389 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2392 reg
= gen_rtx_REG (mode
, regno
);
2393 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2394 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2397 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2400 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2401 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2404 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2407 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2408 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, stack_pointer_rtx
,
2410 insn
= emit_insn (aarch64_gen_store_pair (mode
, mem
, reg
, mem2
,
2413 /* The first part of a frame-related parallel insn is
2414 always assumed to be relevant to the frame
2415 calculations; subsequent parts, are only
2416 frame-related if explicitly marked. */
2417 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn
), 0, 1)) = 1;
2421 insn
= emit_move_insn (mem
, reg
);
2423 RTX_FRAME_RELATED_P (insn
) = 1;
2428 aarch64_restore_callee_saves (machine_mode mode
,
2429 HOST_WIDE_INT start_offset
, unsigned start
,
2430 unsigned limit
, bool skip_wb
, rtx
*cfi_ops
)
2432 rtx base_rtx
= stack_pointer_rtx
;
2433 rtx (*gen_mem_ref
) (machine_mode
, rtx
) = (frame_pointer_needed
2434 ? gen_frame_mem
: gen_rtx_MEM
);
2437 HOST_WIDE_INT offset
;
2439 for (regno
= aarch64_next_callee_save (start
, limit
);
2441 regno
= aarch64_next_callee_save (regno
+ 1, limit
))
2446 && (regno
== cfun
->machine
->frame
.wb_candidate1
2447 || regno
== cfun
->machine
->frame
.wb_candidate2
))
2450 reg
= gen_rtx_REG (mode
, regno
);
2451 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno
];
2452 mem
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2454 regno2
= aarch64_next_callee_save (regno
+ 1, limit
);
2457 && ((cfun
->machine
->frame
.reg_offset
[regno
] + UNITS_PER_WORD
)
2458 == cfun
->machine
->frame
.reg_offset
[regno2
]))
2460 rtx reg2
= gen_rtx_REG (mode
, regno2
);
2463 offset
= start_offset
+ cfun
->machine
->frame
.reg_offset
[regno2
];
2464 mem2
= gen_mem_ref (mode
, plus_constant (Pmode
, base_rtx
, offset
));
2465 emit_insn (aarch64_gen_load_pair (mode
, reg
, mem
, reg2
, mem2
));
2467 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg2
, *cfi_ops
);
2471 emit_move_insn (reg
, mem
);
2472 *cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, reg
, *cfi_ops
);
2476 /* AArch64 stack frames generated by this compiler look like:
2478 +-------------------------------+
2480 | incoming stack arguments |
2482 +-------------------------------+
2483 | | <-- incoming stack pointer (aligned)
2484 | callee-allocated save area |
2485 | for register varargs |
2487 +-------------------------------+
2488 | local variables | <-- frame_pointer_rtx
2490 +-------------------------------+
2492 +-------------------------------+ |
2493 | callee-saved registers | | frame.saved_regs_size
2494 +-------------------------------+ |
2496 +-------------------------------+ |
2497 | FP' | / <- hard_frame_pointer_rtx (aligned)
2498 +-------------------------------+
2499 | dynamic allocation |
2500 +-------------------------------+
2502 +-------------------------------+
2503 | outgoing stack arguments | <-- arg_pointer
2505 +-------------------------------+
2506 | | <-- stack_pointer_rtx (aligned)
2508 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2509 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2512 /* Generate the prologue instructions for entry into a function.
2513 Establish the stack frame by decreasing the stack pointer with a
2514 properly calculated size and, if necessary, create a frame record
2515 filled with the values of LR and previous frame pointer. The
2516 current FP is also set up if it is in use. */
2519 aarch64_expand_prologue (void)
2521 /* sub sp, sp, #<frame_size>
2522 stp {fp, lr}, [sp, #<frame_size> - 16]
2523 add fp, sp, #<frame_size> - hardfp_offset
2524 stp {cs_reg}, [fp, #-16] etc.
2526 sub sp, sp, <final_adjustment_if_any>
2528 HOST_WIDE_INT frame_size
, offset
;
2529 HOST_WIDE_INT fp_offset
; /* Offset from hard FP to SP. */
2530 HOST_WIDE_INT hard_fp_offset
;
2533 aarch64_layout_frame ();
2535 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2536 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2537 fp_offset
= frame_size
- hard_fp_offset
;
2539 if (flag_stack_usage_info
)
2540 current_function_static_stack_size
= frame_size
;
2542 /* Store pairs and load pairs have a range only -512 to 504. */
2545 /* When the frame has a large size, an initial decrease is done on
2546 the stack pointer to jump over the callee-allocated save area for
2547 register varargs, the local variable area and/or the callee-saved
2548 register area. This will allow the pre-index write-back
2549 store pair instructions to be used for setting up the stack frame
2551 offset
= hard_fp_offset
;
2553 offset
= cfun
->machine
->frame
.saved_regs_size
;
2555 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2558 if (frame_size
>= 0x1000000)
2560 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2561 emit_move_insn (op0
, GEN_INT (-frame_size
));
2562 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2564 add_reg_note (insn
, REG_CFA_ADJUST_CFA
,
2565 gen_rtx_SET (stack_pointer_rtx
,
2566 plus_constant (Pmode
, stack_pointer_rtx
,
2568 RTX_FRAME_RELATED_P (insn
) = 1;
2570 else if (frame_size
> 0)
2572 int hi_ofs
= frame_size
& 0xfff000;
2573 int lo_ofs
= frame_size
& 0x000fff;
2577 insn
= emit_insn (gen_add2_insn
2578 (stack_pointer_rtx
, GEN_INT (-hi_ofs
)));
2579 RTX_FRAME_RELATED_P (insn
) = 1;
2583 insn
= emit_insn (gen_add2_insn
2584 (stack_pointer_rtx
, GEN_INT (-lo_ofs
)));
2585 RTX_FRAME_RELATED_P (insn
) = 1;
2594 bool skip_wb
= false;
2596 if (frame_pointer_needed
)
2602 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2603 GEN_INT (-offset
)));
2604 RTX_FRAME_RELATED_P (insn
) = 1;
2606 aarch64_save_callee_saves (DImode
, fp_offset
, R29_REGNUM
,
2610 aarch64_pushwb_pair_reg (DImode
, R29_REGNUM
, R30_REGNUM
, offset
);
2612 /* Set up frame pointer to point to the location of the
2613 previous frame pointer on the stack. */
2614 insn
= emit_insn (gen_add3_insn (hard_frame_pointer_rtx
,
2616 GEN_INT (fp_offset
)));
2617 RTX_FRAME_RELATED_P (insn
) = 1;
2618 emit_insn (gen_stack_tie (stack_pointer_rtx
, hard_frame_pointer_rtx
));
2622 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2623 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2626 || reg1
== FIRST_PSEUDO_REGISTER
2627 || (reg2
== FIRST_PSEUDO_REGISTER
2630 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2631 GEN_INT (-offset
)));
2632 RTX_FRAME_RELATED_P (insn
) = 1;
2636 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2640 if (reg2
== FIRST_PSEUDO_REGISTER
)
2641 aarch64_pushwb_single_reg (mode1
, reg1
, offset
);
2643 aarch64_pushwb_pair_reg (mode1
, reg1
, reg2
, offset
);
2647 aarch64_save_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2649 aarch64_save_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2653 /* when offset >= 512,
2654 sub sp, sp, #<outgoing_args_size> */
2655 if (frame_size
> -1)
2657 if (crtl
->outgoing_args_size
> 0)
2659 insn
= emit_insn (gen_add2_insn
2661 GEN_INT (- crtl
->outgoing_args_size
)));
2662 RTX_FRAME_RELATED_P (insn
) = 1;
2667 /* Return TRUE if we can use a simple_return insn.
2669 This function checks whether the callee saved stack is empty, which
2670 means no restore actions are need. The pro_and_epilogue will use
2671 this to check whether shrink-wrapping opt is feasible. */
2674 aarch64_use_return_insn_p (void)
2676 if (!reload_completed
)
2682 aarch64_layout_frame ();
2684 return cfun
->machine
->frame
.frame_size
== 0;
2687 /* Generate the epilogue instructions for returning from a function. */
2689 aarch64_expand_epilogue (bool for_sibcall
)
2691 HOST_WIDE_INT frame_size
, offset
;
2692 HOST_WIDE_INT fp_offset
;
2693 HOST_WIDE_INT hard_fp_offset
;
2695 /* We need to add memory barrier to prevent read from deallocated stack. */
2696 bool need_barrier_p
= (get_frame_size () != 0
2697 || cfun
->machine
->frame
.saved_varargs_size
);
2699 aarch64_layout_frame ();
2701 offset
= frame_size
= cfun
->machine
->frame
.frame_size
;
2702 hard_fp_offset
= cfun
->machine
->frame
.hard_fp_offset
;
2703 fp_offset
= frame_size
- hard_fp_offset
;
2705 /* Store pairs and load pairs have a range only -512 to 504. */
2708 offset
= hard_fp_offset
;
2710 offset
= cfun
->machine
->frame
.saved_regs_size
;
2712 frame_size
-= (offset
+ crtl
->outgoing_args_size
);
2714 if (!frame_pointer_needed
&& crtl
->outgoing_args_size
> 0)
2716 insn
= emit_insn (gen_add2_insn
2718 GEN_INT (crtl
->outgoing_args_size
)));
2719 RTX_FRAME_RELATED_P (insn
) = 1;
2725 /* If there were outgoing arguments or we've done dynamic stack
2726 allocation, then restore the stack pointer from the frame
2727 pointer. This is at most one insn and more efficient than using
2728 GCC's internal mechanism. */
2729 if (frame_pointer_needed
2730 && (crtl
->outgoing_args_size
|| cfun
->calls_alloca
))
2732 if (cfun
->calls_alloca
)
2733 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2735 insn
= emit_insn (gen_add3_insn (stack_pointer_rtx
,
2736 hard_frame_pointer_rtx
,
2738 offset
= offset
- fp_offset
;
2743 unsigned reg1
= cfun
->machine
->frame
.wb_candidate1
;
2744 unsigned reg2
= cfun
->machine
->frame
.wb_candidate2
;
2745 bool skip_wb
= true;
2748 if (frame_pointer_needed
)
2751 || reg1
== FIRST_PSEUDO_REGISTER
2752 || (reg2
== FIRST_PSEUDO_REGISTER
2756 aarch64_restore_callee_saves (DImode
, fp_offset
, R0_REGNUM
, R30_REGNUM
,
2758 aarch64_restore_callee_saves (DFmode
, fp_offset
, V0_REGNUM
, V31_REGNUM
,
2762 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2766 machine_mode mode1
= (reg1
<= R30_REGNUM
) ? DImode
: DFmode
;
2767 rtx rreg1
= gen_rtx_REG (mode1
, reg1
);
2769 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg1
, cfi_ops
);
2770 if (reg2
== FIRST_PSEUDO_REGISTER
)
2772 rtx mem
= plus_constant (Pmode
, stack_pointer_rtx
, offset
);
2773 mem
= gen_rtx_POST_MODIFY (Pmode
, stack_pointer_rtx
, mem
);
2774 mem
= gen_rtx_MEM (mode1
, mem
);
2775 insn
= emit_move_insn (rreg1
, mem
);
2779 rtx rreg2
= gen_rtx_REG (mode1
, reg2
);
2781 cfi_ops
= alloc_reg_note (REG_CFA_RESTORE
, rreg2
, cfi_ops
);
2782 insn
= emit_insn (aarch64_gen_loadwb_pair
2783 (mode1
, stack_pointer_rtx
, rreg1
,
2789 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
,
2793 /* Reset the CFA to be SP + FRAME_SIZE. */
2794 rtx new_cfa
= stack_pointer_rtx
;
2796 new_cfa
= plus_constant (Pmode
, new_cfa
, frame_size
);
2797 cfi_ops
= alloc_reg_note (REG_CFA_DEF_CFA
, new_cfa
, cfi_ops
);
2798 REG_NOTES (insn
) = cfi_ops
;
2799 RTX_FRAME_RELATED_P (insn
) = 1;
2805 emit_insn (gen_stack_tie (stack_pointer_rtx
, stack_pointer_rtx
));
2807 if (frame_size
>= 0x1000000)
2809 rtx op0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
2810 emit_move_insn (op0
, GEN_INT (frame_size
));
2811 insn
= emit_insn (gen_add2_insn (stack_pointer_rtx
, op0
));
2815 int hi_ofs
= frame_size
& 0xfff000;
2816 int lo_ofs
= frame_size
& 0x000fff;
2818 if (hi_ofs
&& lo_ofs
)
2820 insn
= emit_insn (gen_add2_insn
2821 (stack_pointer_rtx
, GEN_INT (hi_ofs
)));
2822 RTX_FRAME_RELATED_P (insn
) = 1;
2823 frame_size
= lo_ofs
;
2825 insn
= emit_insn (gen_add2_insn
2826 (stack_pointer_rtx
, GEN_INT (frame_size
)));
2829 /* Reset the CFA to be SP + 0. */
2830 add_reg_note (insn
, REG_CFA_DEF_CFA
, stack_pointer_rtx
);
2831 RTX_FRAME_RELATED_P (insn
) = 1;
2834 /* Stack adjustment for exception handler. */
2835 if (crtl
->calls_eh_return
)
2837 /* We need to unwind the stack by the offset computed by
2838 EH_RETURN_STACKADJ_RTX. We have already reset the CFA
2839 to be SP; letting the CFA move during this adjustment
2840 is just as correct as retaining the CFA from the body
2841 of the function. Therefore, do nothing special. */
2842 emit_insn (gen_add2_insn (stack_pointer_rtx
, EH_RETURN_STACKADJ_RTX
));
2845 emit_use (gen_rtx_REG (DImode
, LR_REGNUM
));
2847 emit_jump_insn (ret_rtx
);
2850 /* Return the place to copy the exception unwinding return address to.
2851 This will probably be a stack slot, but could (in theory be the
2852 return register). */
2854 aarch64_final_eh_return_addr (void)
2856 HOST_WIDE_INT fp_offset
;
2858 aarch64_layout_frame ();
2860 fp_offset
= cfun
->machine
->frame
.frame_size
2861 - cfun
->machine
->frame
.hard_fp_offset
;
2863 if (cfun
->machine
->frame
.reg_offset
[LR_REGNUM
] < 0)
2864 return gen_rtx_REG (DImode
, LR_REGNUM
);
2866 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2867 result in a store to save LR introduced by builtin_eh_return () being
2868 incorrectly deleted because the alias is not detected.
2869 So in the calculation of the address to copy the exception unwinding
2870 return address to, we note 2 cases.
2871 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2872 we return a SP-relative location since all the addresses are SP-relative
2873 in this case. This prevents the store from being optimized away.
2874 If the fp_offset is not 0, then the addresses will be FP-relative and
2875 therefore we return a FP-relative location. */
2877 if (frame_pointer_needed
)
2880 return gen_frame_mem (DImode
,
2881 plus_constant (Pmode
, hard_frame_pointer_rtx
, UNITS_PER_WORD
));
2883 return gen_frame_mem (DImode
,
2884 plus_constant (Pmode
, stack_pointer_rtx
, UNITS_PER_WORD
));
2887 /* If FP is not needed, we calculate the location of LR, which would be
2888 at the top of the saved registers block. */
2890 return gen_frame_mem (DImode
,
2891 plus_constant (Pmode
,
2894 + cfun
->machine
->frame
.saved_regs_size
2895 - 2 * UNITS_PER_WORD
));
2898 /* Possibly output code to build up a constant in a register. For
2899 the benefit of the costs infrastructure, returns the number of
2900 instructions which would be emitted. GENERATE inhibits or
2901 enables code generation. */
2904 aarch64_build_constant (int regnum
, HOST_WIDE_INT val
, bool generate
)
2908 if (aarch64_bitmask_imm (val
, DImode
))
2911 emit_move_insn (gen_rtx_REG (Pmode
, regnum
), GEN_INT (val
));
2919 HOST_WIDE_INT valp
= val
>> 16;
2923 for (i
= 16; i
< 64; i
+= 16)
2925 valm
= (valp
& 0xffff);
2936 /* zcount contains the number of additional MOVK instructions
2937 required if the constant is built up with an initial MOVZ instruction,
2938 while ncount is the number of MOVK instructions required if starting
2939 with a MOVN instruction. Choose the sequence that yields the fewest
2940 number of instructions, preferring MOVZ instructions when they are both
2942 if (ncount
< zcount
)
2945 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2946 GEN_INT (val
| ~(HOST_WIDE_INT
) 0xffff));
2953 emit_move_insn (gen_rtx_REG (Pmode
, regnum
),
2954 GEN_INT (val
& 0xffff));
2961 for (i
= 16; i
< 64; i
+= 16)
2963 if ((val
& 0xffff) != tval
)
2966 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode
, regnum
),
2968 GEN_INT (val
& 0xffff)));
2978 aarch64_add_constant (int regnum
, int scratchreg
, HOST_WIDE_INT delta
)
2980 HOST_WIDE_INT mdelta
= delta
;
2981 rtx this_rtx
= gen_rtx_REG (Pmode
, regnum
);
2982 rtx scratch_rtx
= gen_rtx_REG (Pmode
, scratchreg
);
2987 if (mdelta
>= 4096 * 4096)
2989 (void) aarch64_build_constant (scratchreg
, delta
, true);
2990 emit_insn (gen_add3_insn (this_rtx
, this_rtx
, scratch_rtx
));
2992 else if (mdelta
> 0)
2996 emit_insn (gen_rtx_SET (scratch_rtx
, GEN_INT (mdelta
/ 4096)));
2997 rtx shift
= gen_rtx_ASHIFT (Pmode
, scratch_rtx
, GEN_INT (12));
2999 emit_insn (gen_rtx_SET (this_rtx
,
3000 gen_rtx_MINUS (Pmode
, this_rtx
, shift
)));
3002 emit_insn (gen_rtx_SET (this_rtx
,
3003 gen_rtx_PLUS (Pmode
, this_rtx
, shift
)));
3005 if (mdelta
% 4096 != 0)
3007 scratch_rtx
= GEN_INT ((delta
< 0 ? -1 : 1) * (mdelta
% 4096));
3008 emit_insn (gen_rtx_SET (this_rtx
,
3009 gen_rtx_PLUS (Pmode
, this_rtx
, scratch_rtx
)));
3014 /* Output code to add DELTA to the first argument, and then jump
3015 to FUNCTION. Used for C++ multiple inheritance. */
3017 aarch64_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
3018 HOST_WIDE_INT delta
,
3019 HOST_WIDE_INT vcall_offset
,
3022 /* The this pointer is always in x0. Note that this differs from
3023 Arm where the this pointer maybe bumped to r1 if r0 is required
3024 to return a pointer to an aggregate. On AArch64 a result value
3025 pointer will be in x8. */
3026 int this_regno
= R0_REGNUM
;
3027 rtx this_rtx
, temp0
, temp1
, addr
, funexp
;
3030 reload_completed
= 1;
3031 emit_note (NOTE_INSN_PROLOGUE_END
);
3033 if (vcall_offset
== 0)
3034 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
3037 gcc_assert ((vcall_offset
& (POINTER_BYTES
- 1)) == 0);
3039 this_rtx
= gen_rtx_REG (Pmode
, this_regno
);
3040 temp0
= gen_rtx_REG (Pmode
, IP0_REGNUM
);
3041 temp1
= gen_rtx_REG (Pmode
, IP1_REGNUM
);
3046 if (delta
>= -256 && delta
< 256)
3047 addr
= gen_rtx_PRE_MODIFY (Pmode
, this_rtx
,
3048 plus_constant (Pmode
, this_rtx
, delta
));
3050 aarch64_add_constant (this_regno
, IP1_REGNUM
, delta
);
3053 if (Pmode
== ptr_mode
)
3054 aarch64_emit_move (temp0
, gen_rtx_MEM (ptr_mode
, addr
));
3056 aarch64_emit_move (temp0
,
3057 gen_rtx_ZERO_EXTEND (Pmode
,
3058 gen_rtx_MEM (ptr_mode
, addr
)));
3060 if (vcall_offset
>= -256 && vcall_offset
< 4096 * POINTER_BYTES
)
3061 addr
= plus_constant (Pmode
, temp0
, vcall_offset
);
3064 (void) aarch64_build_constant (IP1_REGNUM
, vcall_offset
, true);
3065 addr
= gen_rtx_PLUS (Pmode
, temp0
, temp1
);
3068 if (Pmode
== ptr_mode
)
3069 aarch64_emit_move (temp1
, gen_rtx_MEM (ptr_mode
,addr
));
3071 aarch64_emit_move (temp1
,
3072 gen_rtx_SIGN_EXTEND (Pmode
,
3073 gen_rtx_MEM (ptr_mode
, addr
)));
3075 emit_insn (gen_add2_insn (this_rtx
, temp1
));
3078 /* Generate a tail call to the target function. */
3079 if (!TREE_USED (function
))
3081 assemble_external (function
);
3082 TREE_USED (function
) = 1;
3084 funexp
= XEXP (DECL_RTL (function
), 0);
3085 funexp
= gen_rtx_MEM (FUNCTION_MODE
, funexp
);
3086 insn
= emit_call_insn (gen_sibcall (funexp
, const0_rtx
, NULL_RTX
));
3087 SIBLING_CALL_P (insn
) = 1;
3089 insn
= get_insns ();
3090 shorten_branches (insn
);
3091 final_start_function (insn
, file
, 1);
3092 final (insn
, file
, 1);
3093 final_end_function ();
3095 /* Stop pretending to be a post-reload pass. */
3096 reload_completed
= 0;
3100 aarch64_tls_referenced_p (rtx x
)
3102 if (!TARGET_HAVE_TLS
)
3104 subrtx_iterator::array_type array
;
3105 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3107 const_rtx x
= *iter
;
3108 if (GET_CODE (x
) == SYMBOL_REF
&& SYMBOL_REF_TLS_MODEL (x
) != 0)
3110 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
3111 TLS offsets, not real symbol references. */
3112 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
3113 iter
.skip_subrtxes ();
3119 /* Return true if val can be encoded as a 12-bit unsigned immediate with
3120 a left shift of 0 or 12 bits. */
3122 aarch64_uimm12_shift (HOST_WIDE_INT val
)
3124 return ((val
& (((HOST_WIDE_INT
) 0xfff) << 0)) == val
3125 || (val
& (((HOST_WIDE_INT
) 0xfff) << 12)) == val
3130 /* Return true if val is an immediate that can be loaded into a
3131 register by a MOVZ instruction. */
3133 aarch64_movw_imm (HOST_WIDE_INT val
, machine_mode mode
)
3135 if (GET_MODE_SIZE (mode
) > 4)
3137 if ((val
& (((HOST_WIDE_INT
) 0xffff) << 32)) == val
3138 || (val
& (((HOST_WIDE_INT
) 0xffff) << 48)) == val
)
3143 /* Ignore sign extension. */
3144 val
&= (HOST_WIDE_INT
) 0xffffffff;
3146 return ((val
& (((HOST_WIDE_INT
) 0xffff) << 0)) == val
3147 || (val
& (((HOST_WIDE_INT
) 0xffff) << 16)) == val
);
3150 /* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
3152 static const unsigned HOST_WIDE_INT bitmask_imm_mul
[] =
3154 0x0000000100000001ull
,
3155 0x0001000100010001ull
,
3156 0x0101010101010101ull
,
3157 0x1111111111111111ull
,
3158 0x5555555555555555ull
,
3162 /* Return true if val is a valid bitmask immediate. */
3165 aarch64_bitmask_imm (HOST_WIDE_INT val_in
, machine_mode mode
)
3167 unsigned HOST_WIDE_INT val
, tmp
, mask
, first_one
, next_one
;
3170 /* Check for a single sequence of one bits and return quickly if so.
3171 The special cases of all ones and all zeroes returns false. */
3172 val
= (unsigned HOST_WIDE_INT
) val_in
;
3173 tmp
= val
+ (val
& -val
);
3175 if (tmp
== (tmp
& -tmp
))
3176 return (val
+ 1) > 1;
3178 /* Replicate 32-bit immediates so we can treat them as 64-bit. */
3180 val
= (val
<< 32) | (val
& 0xffffffff);
3182 /* Invert if the immediate doesn't start with a zero bit - this means we
3183 only need to search for sequences of one bits. */
3187 /* Find the first set bit and set tmp to val with the first sequence of one
3188 bits removed. Return success if there is a single sequence of ones. */
3189 first_one
= val
& -val
;
3190 tmp
= val
& (val
+ first_one
);
3195 /* Find the next set bit and compute the difference in bit position. */
3196 next_one
= tmp
& -tmp
;
3197 bits
= clz_hwi (first_one
) - clz_hwi (next_one
);
3200 /* Check the bit position difference is a power of 2, and that the first
3201 sequence of one bits fits within 'bits' bits. */
3202 if ((mask
>> bits
) != 0 || bits
!= (bits
& -bits
))
3205 /* Check the sequence of one bits is repeated 64/bits times. */
3206 return val
== mask
* bitmask_imm_mul
[__builtin_clz (bits
) - 26];
3210 /* Return true if val is an immediate that can be loaded into a
3211 register in a single instruction. */
3213 aarch64_move_imm (HOST_WIDE_INT val
, machine_mode mode
)
3215 if (aarch64_movw_imm (val
, mode
) || aarch64_movw_imm (~val
, mode
))
3217 return aarch64_bitmask_imm (val
, mode
);
3221 aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED
, rtx x
)
3225 if (GET_CODE (x
) == HIGH
)
3228 split_const (x
, &base
, &offset
);
3229 if (GET_CODE (base
) == SYMBOL_REF
|| GET_CODE (base
) == LABEL_REF
)
3231 if (aarch64_classify_symbol (base
, offset
)
3232 != SYMBOL_FORCE_TO_MEM
)
3235 /* Avoid generating a 64-bit relocation in ILP32; leave
3236 to aarch64_expand_mov_immediate to handle it properly. */
3237 return mode
!= ptr_mode
;
3240 return aarch64_tls_referenced_p (x
);
3243 /* Return true if register REGNO is a valid index register.
3244 STRICT_P is true if REG_OK_STRICT is in effect. */
3247 aarch64_regno_ok_for_index_p (int regno
, bool strict_p
)
3249 if (!HARD_REGISTER_NUM_P (regno
))
3257 regno
= reg_renumber
[regno
];
3259 return GP_REGNUM_P (regno
);
3262 /* Return true if register REGNO is a valid base register for mode MODE.
3263 STRICT_P is true if REG_OK_STRICT is in effect. */
3266 aarch64_regno_ok_for_base_p (int regno
, bool strict_p
)
3268 if (!HARD_REGISTER_NUM_P (regno
))
3276 regno
= reg_renumber
[regno
];
3279 /* The fake registers will be eliminated to either the stack or
3280 hard frame pointer, both of which are usually valid base registers.
3281 Reload deals with the cases where the eliminated form isn't valid. */
3282 return (GP_REGNUM_P (regno
)
3283 || regno
== SP_REGNUM
3284 || regno
== FRAME_POINTER_REGNUM
3285 || regno
== ARG_POINTER_REGNUM
);
3288 /* Return true if X is a valid base register for mode MODE.
3289 STRICT_P is true if REG_OK_STRICT is in effect. */
3292 aarch64_base_register_rtx_p (rtx x
, bool strict_p
)
3294 if (!strict_p
&& GET_CODE (x
) == SUBREG
)
3297 return (REG_P (x
) && aarch64_regno_ok_for_base_p (REGNO (x
), strict_p
));
3300 /* Return true if address offset is a valid index. If it is, fill in INFO
3301 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3304 aarch64_classify_index (struct aarch64_address_info
*info
, rtx x
,
3305 machine_mode mode
, bool strict_p
)
3307 enum aarch64_address_type type
;
3312 if ((REG_P (x
) || GET_CODE (x
) == SUBREG
)
3313 && GET_MODE (x
) == Pmode
)
3315 type
= ADDRESS_REG_REG
;
3319 /* (sign_extend:DI (reg:SI)) */
3320 else if ((GET_CODE (x
) == SIGN_EXTEND
3321 || GET_CODE (x
) == ZERO_EXTEND
)
3322 && GET_MODE (x
) == DImode
3323 && GET_MODE (XEXP (x
, 0)) == SImode
)
3325 type
= (GET_CODE (x
) == SIGN_EXTEND
)
3326 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3327 index
= XEXP (x
, 0);
3330 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3331 else if (GET_CODE (x
) == MULT
3332 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3333 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3334 && GET_MODE (XEXP (x
, 0)) == DImode
3335 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3336 && CONST_INT_P (XEXP (x
, 1)))
3338 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3339 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3340 index
= XEXP (XEXP (x
, 0), 0);
3341 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3343 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3344 else if (GET_CODE (x
) == ASHIFT
3345 && (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
3346 || GET_CODE (XEXP (x
, 0)) == ZERO_EXTEND
)
3347 && GET_MODE (XEXP (x
, 0)) == DImode
3348 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == SImode
3349 && CONST_INT_P (XEXP (x
, 1)))
3351 type
= (GET_CODE (XEXP (x
, 0)) == SIGN_EXTEND
)
3352 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3353 index
= XEXP (XEXP (x
, 0), 0);
3354 shift
= INTVAL (XEXP (x
, 1));
3356 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3357 else if ((GET_CODE (x
) == SIGN_EXTRACT
3358 || GET_CODE (x
) == ZERO_EXTRACT
)
3359 && GET_MODE (x
) == DImode
3360 && GET_CODE (XEXP (x
, 0)) == MULT
3361 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3362 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3364 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3365 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3366 index
= XEXP (XEXP (x
, 0), 0);
3367 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3368 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3369 || INTVAL (XEXP (x
, 2)) != 0)
3372 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3373 (const_int 0xffffffff<<shift)) */
3374 else if (GET_CODE (x
) == AND
3375 && GET_MODE (x
) == DImode
3376 && GET_CODE (XEXP (x
, 0)) == MULT
3377 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3378 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3379 && CONST_INT_P (XEXP (x
, 1)))
3381 type
= ADDRESS_REG_UXTW
;
3382 index
= XEXP (XEXP (x
, 0), 0);
3383 shift
= exact_log2 (INTVAL (XEXP (XEXP (x
, 0), 1)));
3384 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3387 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3388 else if ((GET_CODE (x
) == SIGN_EXTRACT
3389 || GET_CODE (x
) == ZERO_EXTRACT
)
3390 && GET_MODE (x
) == DImode
3391 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3392 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3393 && CONST_INT_P (XEXP (XEXP (x
, 0), 1)))
3395 type
= (GET_CODE (x
) == SIGN_EXTRACT
)
3396 ? ADDRESS_REG_SXTW
: ADDRESS_REG_UXTW
;
3397 index
= XEXP (XEXP (x
, 0), 0);
3398 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3399 if (INTVAL (XEXP (x
, 1)) != 32 + shift
3400 || INTVAL (XEXP (x
, 2)) != 0)
3403 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3404 (const_int 0xffffffff<<shift)) */
3405 else if (GET_CODE (x
) == AND
3406 && GET_MODE (x
) == DImode
3407 && GET_CODE (XEXP (x
, 0)) == ASHIFT
3408 && GET_MODE (XEXP (XEXP (x
, 0), 0)) == DImode
3409 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3410 && CONST_INT_P (XEXP (x
, 1)))
3412 type
= ADDRESS_REG_UXTW
;
3413 index
= XEXP (XEXP (x
, 0), 0);
3414 shift
= INTVAL (XEXP (XEXP (x
, 0), 1));
3415 if (INTVAL (XEXP (x
, 1)) != (HOST_WIDE_INT
)0xffffffff << shift
)
3418 /* (mult:P (reg:P) (const_int scale)) */
3419 else if (GET_CODE (x
) == MULT
3420 && GET_MODE (x
) == Pmode
3421 && GET_MODE (XEXP (x
, 0)) == Pmode
3422 && CONST_INT_P (XEXP (x
, 1)))
3424 type
= ADDRESS_REG_REG
;
3425 index
= XEXP (x
, 0);
3426 shift
= exact_log2 (INTVAL (XEXP (x
, 1)));
3428 /* (ashift:P (reg:P) (const_int shift)) */
3429 else if (GET_CODE (x
) == ASHIFT
3430 && GET_MODE (x
) == Pmode
3431 && GET_MODE (XEXP (x
, 0)) == Pmode
3432 && CONST_INT_P (XEXP (x
, 1)))
3434 type
= ADDRESS_REG_REG
;
3435 index
= XEXP (x
, 0);
3436 shift
= INTVAL (XEXP (x
, 1));
3441 if (GET_CODE (index
) == SUBREG
)
3442 index
= SUBREG_REG (index
);
3445 (shift
> 0 && shift
<= 3
3446 && (1 << shift
) == GET_MODE_SIZE (mode
)))
3448 && aarch64_regno_ok_for_index_p (REGNO (index
), strict_p
))
3451 info
->offset
= index
;
3452 info
->shift
= shift
;
3460 aarch64_offset_7bit_signed_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3462 return (offset
>= -64 * GET_MODE_SIZE (mode
)
3463 && offset
< 64 * GET_MODE_SIZE (mode
)
3464 && offset
% GET_MODE_SIZE (mode
) == 0);
3468 offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED
,
3469 HOST_WIDE_INT offset
)
3471 return offset
>= -256 && offset
< 256;
3475 offset_12bit_unsigned_scaled_p (machine_mode mode
, HOST_WIDE_INT offset
)
3478 && offset
< 4096 * GET_MODE_SIZE (mode
)
3479 && offset
% GET_MODE_SIZE (mode
) == 0);
3482 /* Return true if MODE is one of the modes for which we
3483 support LDP/STP operations. */
3486 aarch64_mode_valid_for_sched_fusion_p (machine_mode mode
)
3488 return mode
== SImode
|| mode
== DImode
3489 || mode
== SFmode
|| mode
== DFmode
3490 || (aarch64_vector_mode_supported_p (mode
)
3491 && GET_MODE_SIZE (mode
) == 8);
3494 /* Return true if X is a valid address for machine mode MODE. If it is,
3495 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3496 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3499 aarch64_classify_address (struct aarch64_address_info
*info
,
3500 rtx x
, machine_mode mode
,
3501 RTX_CODE outer_code
, bool strict_p
)
3503 enum rtx_code code
= GET_CODE (x
);
3506 /* On BE, we use load/store pair for all large int mode load/stores. */
3507 bool load_store_pair_p
= (outer_code
== PARALLEL
3508 || (BYTES_BIG_ENDIAN
3509 && aarch64_vect_struct_mode_p (mode
)));
3511 bool allow_reg_index_p
=
3513 && (GET_MODE_SIZE (mode
) != 16 || aarch64_vector_mode_supported_p (mode
))
3514 && !aarch64_vect_struct_mode_p (mode
);
3516 /* On LE, for AdvSIMD, don't support anything other than POST_INC or
3518 if (aarch64_vect_struct_mode_p (mode
) && !BYTES_BIG_ENDIAN
3519 && (code
!= POST_INC
&& code
!= REG
))
3526 info
->type
= ADDRESS_REG_IMM
;
3528 info
->offset
= const0_rtx
;
3529 return aarch64_base_register_rtx_p (x
, strict_p
);
3537 && (op0
== virtual_stack_vars_rtx
3538 || op0
== frame_pointer_rtx
3539 || op0
== arg_pointer_rtx
)
3540 && CONST_INT_P (op1
))
3542 info
->type
= ADDRESS_REG_IMM
;
3549 if (GET_MODE_SIZE (mode
) != 0
3550 && CONST_INT_P (op1
)
3551 && aarch64_base_register_rtx_p (op0
, strict_p
))
3553 HOST_WIDE_INT offset
= INTVAL (op1
);
3555 info
->type
= ADDRESS_REG_IMM
;
3559 /* TImode and TFmode values are allowed in both pairs of X
3560 registers and individual Q registers. The available
3562 X,X: 7-bit signed scaled offset
3563 Q: 9-bit signed offset
3564 We conservatively require an offset representable in either mode.
3566 if (mode
== TImode
|| mode
== TFmode
)
3567 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3568 && offset_9bit_signed_unscaled_p (mode
, offset
));
3570 /* A 7bit offset check because OImode will emit a ldp/stp
3571 instruction (only big endian will get here).
3572 For ldp/stp instructions, the offset is scaled for the size of a
3573 single element of the pair. */
3575 return aarch64_offset_7bit_signed_scaled_p (TImode
, offset
);
3577 /* Three 9/12 bit offsets checks because CImode will emit three
3578 ldr/str instructions (only big endian will get here). */
3580 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3581 && (offset_9bit_signed_unscaled_p (V16QImode
, offset
+ 32)
3582 || offset_12bit_unsigned_scaled_p (V16QImode
,
3585 /* Two 7bit offsets checks because XImode will emit two ldp/stp
3586 instructions (only big endian will get here). */
3588 return (aarch64_offset_7bit_signed_scaled_p (TImode
, offset
)
3589 && aarch64_offset_7bit_signed_scaled_p (TImode
,
3592 if (load_store_pair_p
)
3593 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3594 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3596 return (offset_9bit_signed_unscaled_p (mode
, offset
)
3597 || offset_12bit_unsigned_scaled_p (mode
, offset
));
3600 if (allow_reg_index_p
)
3602 /* Look for base + (scaled/extended) index register. */
3603 if (aarch64_base_register_rtx_p (op0
, strict_p
)
3604 && aarch64_classify_index (info
, op1
, mode
, strict_p
))
3609 if (aarch64_base_register_rtx_p (op1
, strict_p
)
3610 && aarch64_classify_index (info
, op0
, mode
, strict_p
))
3623 info
->type
= ADDRESS_REG_WB
;
3624 info
->base
= XEXP (x
, 0);
3625 info
->offset
= NULL_RTX
;
3626 return aarch64_base_register_rtx_p (info
->base
, strict_p
);
3630 info
->type
= ADDRESS_REG_WB
;
3631 info
->base
= XEXP (x
, 0);
3632 if (GET_CODE (XEXP (x
, 1)) == PLUS
3633 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3634 && rtx_equal_p (XEXP (XEXP (x
, 1), 0), info
->base
)
3635 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3637 HOST_WIDE_INT offset
;
3638 info
->offset
= XEXP (XEXP (x
, 1), 1);
3639 offset
= INTVAL (info
->offset
);
3641 /* TImode and TFmode values are allowed in both pairs of X
3642 registers and individual Q registers. The available
3644 X,X: 7-bit signed scaled offset
3645 Q: 9-bit signed offset
3646 We conservatively require an offset representable in either mode.
3648 if (mode
== TImode
|| mode
== TFmode
)
3649 return (aarch64_offset_7bit_signed_scaled_p (mode
, offset
)
3650 && offset_9bit_signed_unscaled_p (mode
, offset
));
3652 if (load_store_pair_p
)
3653 return ((GET_MODE_SIZE (mode
) == 4 || GET_MODE_SIZE (mode
) == 8)
3654 && aarch64_offset_7bit_signed_scaled_p (mode
, offset
));
3656 return offset_9bit_signed_unscaled_p (mode
, offset
);
3663 /* load literal: pc-relative constant pool entry. Only supported
3664 for SI mode or larger. */
3665 info
->type
= ADDRESS_SYMBOLIC
;
3667 if (!load_store_pair_p
&& GET_MODE_SIZE (mode
) >= 4)
3671 split_const (x
, &sym
, &addend
);
3672 return ((GET_CODE (sym
) == LABEL_REF
3673 || (GET_CODE (sym
) == SYMBOL_REF
3674 && CONSTANT_POOL_ADDRESS_P (sym
)
3675 && !aarch64_nopcrelative_literal_loads
)));
3680 info
->type
= ADDRESS_LO_SUM
;
3681 info
->base
= XEXP (x
, 0);
3682 info
->offset
= XEXP (x
, 1);
3683 if (allow_reg_index_p
3684 && aarch64_base_register_rtx_p (info
->base
, strict_p
))
3687 split_const (info
->offset
, &sym
, &offs
);
3688 if (GET_CODE (sym
) == SYMBOL_REF
3689 && (aarch64_classify_symbol (sym
, offs
) == SYMBOL_SMALL_ABSOLUTE
))
3691 /* The symbol and offset must be aligned to the access size. */
3693 unsigned int ref_size
;
3695 if (CONSTANT_POOL_ADDRESS_P (sym
))
3696 align
= GET_MODE_ALIGNMENT (get_pool_mode (sym
));
3697 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym
))
3699 tree exp
= SYMBOL_REF_DECL (sym
);
3700 align
= TYPE_ALIGN (TREE_TYPE (exp
));
3701 align
= CONSTANT_ALIGNMENT (exp
, align
);
3703 else if (SYMBOL_REF_DECL (sym
))
3704 align
= DECL_ALIGN (SYMBOL_REF_DECL (sym
));
3705 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym
)
3706 && SYMBOL_REF_BLOCK (sym
) != NULL
)
3707 align
= SYMBOL_REF_BLOCK (sym
)->alignment
;
3709 align
= BITS_PER_UNIT
;
3711 ref_size
= GET_MODE_SIZE (mode
);
3713 ref_size
= GET_MODE_SIZE (DImode
);
3715 return ((INTVAL (offs
) & (ref_size
- 1)) == 0
3716 && ((align
/ BITS_PER_UNIT
) & (ref_size
- 1)) == 0);
3727 aarch64_symbolic_address_p (rtx x
)
3731 split_const (x
, &x
, &offset
);
3732 return GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == LABEL_REF
;
3735 /* Classify the base of symbolic expression X. */
3737 enum aarch64_symbol_type
3738 aarch64_classify_symbolic_expression (rtx x
)
3742 split_const (x
, &x
, &offset
);
3743 return aarch64_classify_symbol (x
, offset
);
3747 /* Return TRUE if X is a legitimate address for accessing memory in
3750 aarch64_legitimate_address_hook_p (machine_mode mode
, rtx x
, bool strict_p
)
3752 struct aarch64_address_info addr
;
3754 return aarch64_classify_address (&addr
, x
, mode
, MEM
, strict_p
);
3757 /* Return TRUE if X is a legitimate address for accessing memory in
3758 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3761 aarch64_legitimate_address_p (machine_mode mode
, rtx x
,
3762 RTX_CODE outer_code
, bool strict_p
)
3764 struct aarch64_address_info addr
;
3766 return aarch64_classify_address (&addr
, x
, mode
, outer_code
, strict_p
);
3769 /* Return TRUE if rtx X is immediate constant 0.0 */
3771 aarch64_float_const_zero_rtx_p (rtx x
)
3773 if (GET_MODE (x
) == VOIDmode
)
3776 if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x
)))
3777 return !HONOR_SIGNED_ZEROS (GET_MODE (x
));
3778 return real_equal (CONST_DOUBLE_REAL_VALUE (x
), &dconst0
);
3781 /* Return the fixed registers used for condition codes. */
3784 aarch64_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
3787 *p2
= INVALID_REGNUM
;
3791 /* Emit call insn with PAT and do aarch64-specific handling. */
3794 aarch64_emit_call_insn (rtx pat
)
3796 rtx insn
= emit_call_insn (pat
);
3798 rtx
*fusage
= &CALL_INSN_FUNCTION_USAGE (insn
);
3799 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP0_REGNUM
));
3800 clobber_reg (fusage
, gen_rtx_REG (word_mode
, IP1_REGNUM
));
3804 aarch64_select_cc_mode (RTX_CODE code
, rtx x
, rtx y
)
3806 /* All floating point compares return CCFP if it is an equality
3807 comparison, and CCFPE otherwise. */
3808 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_FLOAT
)
3835 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3837 && (code
== EQ
|| code
== NE
|| code
== LT
|| code
== GE
)
3838 && (GET_CODE (x
) == PLUS
|| GET_CODE (x
) == MINUS
|| GET_CODE (x
) == AND
3839 || GET_CODE (x
) == NEG
))
3842 /* A compare with a shifted operand. Because of canonicalization,
3843 the comparison will have to be swapped when we emit the assembly
3845 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3846 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3847 && (GET_CODE (x
) == ASHIFT
|| GET_CODE (x
) == ASHIFTRT
3848 || GET_CODE (x
) == LSHIFTRT
3849 || GET_CODE (x
) == ZERO_EXTEND
|| GET_CODE (x
) == SIGN_EXTEND
))
3852 /* Similarly for a negated operand, but we can only do this for
3854 if ((GET_MODE (x
) == SImode
|| GET_MODE (x
) == DImode
)
3855 && (REG_P (y
) || GET_CODE (y
) == SUBREG
)
3856 && (code
== EQ
|| code
== NE
)
3857 && GET_CODE (x
) == NEG
)
3860 /* A compare of a mode narrower than SI mode against zero can be done
3861 by extending the value in the comparison. */
3862 if ((GET_MODE (x
) == QImode
|| GET_MODE (x
) == HImode
)
3864 /* Only use sign-extension if we really need it. */
3865 return ((code
== GT
|| code
== GE
|| code
== LE
|| code
== LT
)
3866 ? CC_SESWPmode
: CC_ZESWPmode
);
3868 /* For everything else, return CCmode. */
3873 aarch64_get_condition_code_1 (enum machine_mode
, enum rtx_code
);
3876 aarch64_get_condition_code (rtx x
)
3878 machine_mode mode
= GET_MODE (XEXP (x
, 0));
3879 enum rtx_code comp_code
= GET_CODE (x
);
3881 if (GET_MODE_CLASS (mode
) != MODE_CC
)
3882 mode
= SELECT_CC_MODE (comp_code
, XEXP (x
, 0), XEXP (x
, 1));
3883 return aarch64_get_condition_code_1 (mode
, comp_code
);
3887 aarch64_get_condition_code_1 (enum machine_mode mode
, enum rtx_code comp_code
)
3889 int ne
= -1, eq
= -1;
3896 case GE
: return AARCH64_GE
;
3897 case GT
: return AARCH64_GT
;
3898 case LE
: return AARCH64_LS
;
3899 case LT
: return AARCH64_MI
;
3900 case NE
: return AARCH64_NE
;
3901 case EQ
: return AARCH64_EQ
;
3902 case ORDERED
: return AARCH64_VC
;
3903 case UNORDERED
: return AARCH64_VS
;
3904 case UNLT
: return AARCH64_LT
;
3905 case UNLE
: return AARCH64_LE
;
3906 case UNGT
: return AARCH64_HI
;
3907 case UNGE
: return AARCH64_PL
;
3965 case NE
: return AARCH64_NE
;
3966 case EQ
: return AARCH64_EQ
;
3967 case GE
: return AARCH64_GE
;
3968 case GT
: return AARCH64_GT
;
3969 case LE
: return AARCH64_LE
;
3970 case LT
: return AARCH64_LT
;
3971 case GEU
: return AARCH64_CS
;
3972 case GTU
: return AARCH64_HI
;
3973 case LEU
: return AARCH64_LS
;
3974 case LTU
: return AARCH64_CC
;
3984 case NE
: return AARCH64_NE
;
3985 case EQ
: return AARCH64_EQ
;
3986 case GE
: return AARCH64_LE
;
3987 case GT
: return AARCH64_LT
;
3988 case LE
: return AARCH64_GE
;
3989 case LT
: return AARCH64_GT
;
3990 case GEU
: return AARCH64_LS
;
3991 case GTU
: return AARCH64_CC
;
3992 case LEU
: return AARCH64_CS
;
3993 case LTU
: return AARCH64_HI
;
4001 case NE
: return AARCH64_NE
;
4002 case EQ
: return AARCH64_EQ
;
4003 case GE
: return AARCH64_PL
;
4004 case LT
: return AARCH64_MI
;
4012 case NE
: return AARCH64_NE
;
4013 case EQ
: return AARCH64_EQ
;
4023 if (comp_code
== NE
)
4026 if (comp_code
== EQ
)
4033 aarch64_const_vec_all_same_in_range_p (rtx x
,
4034 HOST_WIDE_INT minval
,
4035 HOST_WIDE_INT maxval
)
4037 HOST_WIDE_INT firstval
;
4040 if (GET_CODE (x
) != CONST_VECTOR
4041 || GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_INT
)
4044 firstval
= INTVAL (CONST_VECTOR_ELT (x
, 0));
4045 if (firstval
< minval
|| firstval
> maxval
)
4048 count
= CONST_VECTOR_NUNITS (x
);
4049 for (i
= 1; i
< count
; i
++)
4050 if (INTVAL (CONST_VECTOR_ELT (x
, i
)) != firstval
)
4057 aarch64_const_vec_all_same_int_p (rtx x
, HOST_WIDE_INT val
)
4059 return aarch64_const_vec_all_same_in_range_p (x
, val
, val
);
4064 #define AARCH64_CC_V 1
4065 #define AARCH64_CC_C (1 << 1)
4066 #define AARCH64_CC_Z (1 << 2)
4067 #define AARCH64_CC_N (1 << 3)
4069 /* N Z C V flags for ccmp. The first code is for AND op and the other
4070 is for IOR op. Indexed by AARCH64_COND_CODE. */
4071 static const int aarch64_nzcv_codes
[][2] =
4073 {AARCH64_CC_Z
, 0}, /* EQ, Z == 1. */
4074 {0, AARCH64_CC_Z
}, /* NE, Z == 0. */
4075 {AARCH64_CC_C
, 0}, /* CS, C == 1. */
4076 {0, AARCH64_CC_C
}, /* CC, C == 0. */
4077 {AARCH64_CC_N
, 0}, /* MI, N == 1. */
4078 {0, AARCH64_CC_N
}, /* PL, N == 0. */
4079 {AARCH64_CC_V
, 0}, /* VS, V == 1. */
4080 {0, AARCH64_CC_V
}, /* VC, V == 0. */
4081 {AARCH64_CC_C
, 0}, /* HI, C ==1 && Z == 0. */
4082 {0, AARCH64_CC_C
}, /* LS, !(C == 1 && Z == 0). */
4083 {0, AARCH64_CC_V
}, /* GE, N == V. */
4084 {AARCH64_CC_V
, 0}, /* LT, N != V. */
4085 {0, AARCH64_CC_Z
}, /* GT, Z == 0 && N == V. */
4086 {AARCH64_CC_Z
, 0}, /* LE, !(Z == 0 && N == V). */
4087 {0, 0}, /* AL, Any. */
4088 {0, 0}, /* NV, Any. */
4092 aarch64_ccmp_mode_to_code (enum machine_mode mode
)
4133 aarch64_print_operand (FILE *f
, rtx x
, int code
)
4137 /* An integer or symbol address without a preceding # sign. */
4139 switch (GET_CODE (x
))
4142 fprintf (f
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
));
4146 output_addr_const (f
, x
);
4150 if (GET_CODE (XEXP (x
, 0)) == PLUS
4151 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4153 output_addr_const (f
, x
);
4159 output_operand_lossage ("Unsupported operand for code '%c'", code
);
4164 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
4168 if (!CONST_INT_P (x
)
4169 || (n
= exact_log2 (INTVAL (x
) & ~7)) <= 0)
4171 output_operand_lossage ("invalid operand for '%%%c'", code
);
4187 output_operand_lossage ("invalid operand for '%%%c'", code
);
4197 /* Print N such that 2^N == X. */
4198 if (!CONST_INT_P (x
) || (n
= exact_log2 (INTVAL (x
))) < 0)
4200 output_operand_lossage ("invalid operand for '%%%c'", code
);
4204 asm_fprintf (f
, "%d", n
);
4209 /* Print the number of non-zero bits in X (a const_int). */
4210 if (!CONST_INT_P (x
))
4212 output_operand_lossage ("invalid operand for '%%%c'", code
);
4216 asm_fprintf (f
, "%u", popcount_hwi (INTVAL (x
)));
4220 /* Print the higher numbered register of a pair (TImode) of regs. */
4221 if (!REG_P (x
) || !GP_REGNUM_P (REGNO (x
) + 1))
4223 output_operand_lossage ("invalid operand for '%%%c'", code
);
4227 asm_fprintf (f
, "%s", reg_names
[REGNO (x
) + 1]);
4233 /* Print a condition (eq, ne, etc). */
4235 /* CONST_TRUE_RTX means always -- that's the default. */
4236 if (x
== const_true_rtx
)
4239 if (!COMPARISON_P (x
))
4241 output_operand_lossage ("invalid operand for '%%%c'", code
);
4245 cond_code
= aarch64_get_condition_code (x
);
4246 gcc_assert (cond_code
>= 0);
4247 fputs (aarch64_condition_codes
[cond_code
], f
);
4254 /* Print the inverse of a condition (eq <-> ne, etc). */
4256 /* CONST_TRUE_RTX means never -- that's the default. */
4257 if (x
== const_true_rtx
)
4263 if (!COMPARISON_P (x
))
4265 output_operand_lossage ("invalid operand for '%%%c'", code
);
4268 cond_code
= aarch64_get_condition_code (x
);
4269 gcc_assert (cond_code
>= 0);
4270 fputs (aarch64_condition_codes
[AARCH64_INVERSE_CONDITION_CODE
4280 /* Print a scalar FP/SIMD register name. */
4281 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4283 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4286 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - V0_REGNUM
);
4293 /* Print the first FP/SIMD register name in a list. */
4294 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4296 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4299 asm_fprintf (f
, "v%d", REGNO (x
) - V0_REGNUM
+ (code
- 'S'));
4303 /* Print a scalar FP/SIMD register name + 1. */
4304 if (!REG_P (x
) || !FP_REGNUM_P (REGNO (x
)))
4306 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code
);
4309 asm_fprintf (f
, "q%d", REGNO (x
) - V0_REGNUM
+ 1);
4313 /* Print bottom 16 bits of integer constant in hex. */
4314 if (!CONST_INT_P (x
))
4316 output_operand_lossage ("invalid operand for '%%%c'", code
);
4319 asm_fprintf (f
, "0x%wx", UINTVAL (x
) & 0xffff);
4324 /* Print a general register name or the zero register (32-bit or
4327 || (CONST_DOUBLE_P (x
) && aarch64_float_const_zero_rtx_p (x
)))
4329 asm_fprintf (f
, "%czr", code
);
4333 if (REG_P (x
) && GP_REGNUM_P (REGNO (x
)))
4335 asm_fprintf (f
, "%c%d", code
, REGNO (x
) - R0_REGNUM
);
4339 if (REG_P (x
) && REGNO (x
) == SP_REGNUM
)
4341 asm_fprintf (f
, "%ssp", code
== 'w' ? "w" : "");
4348 /* Print a normal operand, if it's a general register, then we
4352 output_operand_lossage ("missing operand");
4356 switch (GET_CODE (x
))
4359 asm_fprintf (f
, "%s", reg_names
[REGNO (x
)]);
4363 output_address (GET_MODE (x
), XEXP (x
, 0));
4369 output_addr_const (asm_out_file
, x
);
4373 asm_fprintf (f
, "%wd", INTVAL (x
));
4377 if (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_INT
)
4380 aarch64_const_vec_all_same_in_range_p (x
,
4382 HOST_WIDE_INT_MAX
));
4383 asm_fprintf (f
, "%wd", INTVAL (CONST_VECTOR_ELT (x
, 0)));
4385 else if (aarch64_simd_imm_zero_p (x
, GET_MODE (x
)))
4394 /* Since we define TARGET_SUPPORTS_WIDE_INT we shouldn't ever
4395 be getting CONST_DOUBLEs holding integers. */
4396 gcc_assert (GET_MODE (x
) != VOIDmode
);
4397 if (aarch64_float_const_zero_rtx_p (x
))
4402 else if (aarch64_float_const_representable_p (x
))
4405 char float_buf
[buf_size
] = {'\0'};
4406 real_to_decimal_for_mode (float_buf
,
4407 CONST_DOUBLE_REAL_VALUE (x
),
4410 asm_fprintf (asm_out_file
, "%s", float_buf
);
4414 output_operand_lossage ("invalid constant");
4417 output_operand_lossage ("invalid operand");
4423 if (GET_CODE (x
) == HIGH
)
4426 switch (aarch64_classify_symbolic_expression (x
))
4428 case SYMBOL_SMALL_GOT_4G
:
4429 asm_fprintf (asm_out_file
, ":got:");
4432 case SYMBOL_SMALL_TLSGD
:
4433 asm_fprintf (asm_out_file
, ":tlsgd:");
4436 case SYMBOL_SMALL_TLSDESC
:
4437 asm_fprintf (asm_out_file
, ":tlsdesc:");
4440 case SYMBOL_SMALL_TLSIE
:
4441 asm_fprintf (asm_out_file
, ":gottprel:");
4444 case SYMBOL_TLSLE24
:
4445 asm_fprintf (asm_out_file
, ":tprel:");
4448 case SYMBOL_TINY_GOT
:
4455 output_addr_const (asm_out_file
, x
);
4459 switch (aarch64_classify_symbolic_expression (x
))
4461 case SYMBOL_SMALL_GOT_4G
:
4462 asm_fprintf (asm_out_file
, ":lo12:");
4465 case SYMBOL_SMALL_TLSGD
:
4466 asm_fprintf (asm_out_file
, ":tlsgd_lo12:");
4469 case SYMBOL_SMALL_TLSDESC
:
4470 asm_fprintf (asm_out_file
, ":tlsdesc_lo12:");
4473 case SYMBOL_SMALL_TLSIE
:
4474 asm_fprintf (asm_out_file
, ":gottprel_lo12:");
4477 case SYMBOL_TLSLE12
:
4478 asm_fprintf (asm_out_file
, ":tprel_lo12:");
4481 case SYMBOL_TLSLE24
:
4482 asm_fprintf (asm_out_file
, ":tprel_lo12_nc:");
4485 case SYMBOL_TINY_GOT
:
4486 asm_fprintf (asm_out_file
, ":got:");
4489 case SYMBOL_TINY_TLSIE
:
4490 asm_fprintf (asm_out_file
, ":gottprel:");
4496 output_addr_const (asm_out_file
, x
);
4501 switch (aarch64_classify_symbolic_expression (x
))
4503 case SYMBOL_TLSLE24
:
4504 asm_fprintf (asm_out_file
, ":tprel_hi12:");
4509 output_addr_const (asm_out_file
, x
);
4517 if (!COMPARISON_P (x
))
4519 output_operand_lossage ("invalid operand for '%%%c'", code
);
4523 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4524 gcc_assert (cond_code
>= 0);
4525 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][0]);
4534 if (!COMPARISON_P (x
))
4536 output_operand_lossage ("invalid operand for '%%%c'", code
);
4540 cond_code
= aarch64_get_condition_code_1 (CCmode
, GET_CODE (x
));
4541 gcc_assert (cond_code
>= 0);
4542 asm_fprintf (f
, "%d", aarch64_nzcv_codes
[cond_code
][1]);
4547 output_operand_lossage ("invalid operand prefix '%%%c'", code
);
4553 aarch64_print_operand_address (FILE *f
, machine_mode mode
, rtx x
)
4555 struct aarch64_address_info addr
;
4557 if (aarch64_classify_address (&addr
, x
, mode
, MEM
, true))
4560 case ADDRESS_REG_IMM
:
4561 if (addr
.offset
== const0_rtx
)
4562 asm_fprintf (f
, "[%s]", reg_names
[REGNO (addr
.base
)]);
4564 asm_fprintf (f
, "[%s, %wd]", reg_names
[REGNO (addr
.base
)],
4565 INTVAL (addr
.offset
));
4568 case ADDRESS_REG_REG
:
4569 if (addr
.shift
== 0)
4570 asm_fprintf (f
, "[%s, %s]", reg_names
[REGNO (addr
.base
)],
4571 reg_names
[REGNO (addr
.offset
)]);
4573 asm_fprintf (f
, "[%s, %s, lsl %u]", reg_names
[REGNO (addr
.base
)],
4574 reg_names
[REGNO (addr
.offset
)], addr
.shift
);
4577 case ADDRESS_REG_UXTW
:
4578 if (addr
.shift
== 0)
4579 asm_fprintf (f
, "[%s, w%d, uxtw]", reg_names
[REGNO (addr
.base
)],
4580 REGNO (addr
.offset
) - R0_REGNUM
);
4582 asm_fprintf (f
, "[%s, w%d, uxtw %u]", reg_names
[REGNO (addr
.base
)],
4583 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4586 case ADDRESS_REG_SXTW
:
4587 if (addr
.shift
== 0)
4588 asm_fprintf (f
, "[%s, w%d, sxtw]", reg_names
[REGNO (addr
.base
)],
4589 REGNO (addr
.offset
) - R0_REGNUM
);
4591 asm_fprintf (f
, "[%s, w%d, sxtw %u]", reg_names
[REGNO (addr
.base
)],
4592 REGNO (addr
.offset
) - R0_REGNUM
, addr
.shift
);
4595 case ADDRESS_REG_WB
:
4596 switch (GET_CODE (x
))
4599 asm_fprintf (f
, "[%s, %d]!", reg_names
[REGNO (addr
.base
)],
4600 GET_MODE_SIZE (mode
));
4603 asm_fprintf (f
, "[%s], %d", reg_names
[REGNO (addr
.base
)],
4604 GET_MODE_SIZE (mode
));
4607 asm_fprintf (f
, "[%s, -%d]!", reg_names
[REGNO (addr
.base
)],
4608 GET_MODE_SIZE (mode
));
4611 asm_fprintf (f
, "[%s], -%d", reg_names
[REGNO (addr
.base
)],
4612 GET_MODE_SIZE (mode
));
4615 asm_fprintf (f
, "[%s, %wd]!", reg_names
[REGNO (addr
.base
)],
4616 INTVAL (addr
.offset
));
4619 asm_fprintf (f
, "[%s], %wd", reg_names
[REGNO (addr
.base
)],
4620 INTVAL (addr
.offset
));
4627 case ADDRESS_LO_SUM
:
4628 asm_fprintf (f
, "[%s, #:lo12:", reg_names
[REGNO (addr
.base
)]);
4629 output_addr_const (f
, addr
.offset
);
4630 asm_fprintf (f
, "]");
4633 case ADDRESS_SYMBOLIC
:
4637 output_addr_const (f
, x
);
4641 aarch64_label_mentioned_p (rtx x
)
4646 if (GET_CODE (x
) == LABEL_REF
)
4649 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4650 referencing instruction, but they are constant offsets, not
4652 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLS
)
4655 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
4656 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
4662 for (j
= XVECLEN (x
, i
) - 1; j
>= 0; j
--)
4663 if (aarch64_label_mentioned_p (XVECEXP (x
, i
, j
)))
4666 else if (fmt
[i
] == 'e' && aarch64_label_mentioned_p (XEXP (x
, i
)))
4673 /* Implement REGNO_REG_CLASS. */
4676 aarch64_regno_regclass (unsigned regno
)
4678 if (GP_REGNUM_P (regno
))
4679 return GENERAL_REGS
;
4681 if (regno
== SP_REGNUM
)
4684 if (regno
== FRAME_POINTER_REGNUM
4685 || regno
== ARG_POINTER_REGNUM
)
4686 return POINTER_REGS
;
4688 if (FP_REGNUM_P (regno
))
4689 return FP_LO_REGNUM_P (regno
) ? FP_LO_REGS
: FP_REGS
;
4695 aarch64_legitimize_address (rtx x
, rtx
/* orig_x */, machine_mode mode
)
4697 /* Try to split X+CONST into Y=X+(CONST & ~mask), Y+(CONST&mask),
4698 where mask is selected by alignment and size of the offset.
4699 We try to pick as large a range for the offset as possible to
4700 maximize the chance of a CSE. However, for aligned addresses
4701 we limit the range to 4k so that structures with different sized
4702 elements are likely to use the same base. */
4704 if (GET_CODE (x
) == PLUS
&& CONST_INT_P (XEXP (x
, 1)))
4706 HOST_WIDE_INT offset
= INTVAL (XEXP (x
, 1));
4707 HOST_WIDE_INT base_offset
;
4709 /* Does it look like we'll need a load/store-pair operation? */
4710 if (GET_MODE_SIZE (mode
) > 16
4712 base_offset
= ((offset
+ 64 * GET_MODE_SIZE (mode
))
4713 & ~((128 * GET_MODE_SIZE (mode
)) - 1));
4714 /* For offsets aren't a multiple of the access size, the limit is
4716 else if (offset
& (GET_MODE_SIZE (mode
) - 1))
4717 base_offset
= (offset
+ 0x100) & ~0x1ff;
4719 base_offset
= offset
& ~0xfff;
4721 if (base_offset
== 0)
4724 offset
-= base_offset
;
4725 rtx base_reg
= gen_reg_rtx (Pmode
);
4726 rtx val
= force_operand (plus_constant (Pmode
, XEXP (x
, 0), base_offset
),
4728 emit_move_insn (base_reg
, val
);
4729 x
= plus_constant (Pmode
, base_reg
, offset
);
4735 /* Try a machine-dependent way of reloading an illegitimate address
4736 operand. If we find one, push the reload and return the new rtx. */
4739 aarch64_legitimize_reload_address (rtx
*x_p
,
4741 int opnum
, int type
,
4742 int ind_levels ATTRIBUTE_UNUSED
)
4746 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4747 if (aarch64_vect_struct_mode_p (mode
)
4748 && GET_CODE (x
) == PLUS
4749 && REG_P (XEXP (x
, 0))
4750 && CONST_INT_P (XEXP (x
, 1)))
4754 push_reload (orig_rtx
, NULL_RTX
, x_p
, NULL
,
4755 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4756 opnum
, (enum reload_type
) type
);
4760 /* We must recognize output that we have already generated ourselves. */
4761 if (GET_CODE (x
) == PLUS
4762 && GET_CODE (XEXP (x
, 0)) == PLUS
4763 && REG_P (XEXP (XEXP (x
, 0), 0))
4764 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
4765 && CONST_INT_P (XEXP (x
, 1)))
4767 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4768 BASE_REG_CLASS
, GET_MODE (x
), VOIDmode
, 0, 0,
4769 opnum
, (enum reload_type
) type
);
4773 /* We wish to handle large displacements off a base register by splitting
4774 the addend across an add and the mem insn. This can cut the number of
4775 extra insns needed from 3 to 1. It is only useful for load/store of a
4776 single register with 12 bit offset field. */
4777 if (GET_CODE (x
) == PLUS
4778 && REG_P (XEXP (x
, 0))
4779 && CONST_INT_P (XEXP (x
, 1))
4780 && HARD_REGISTER_P (XEXP (x
, 0))
4783 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x
, 0)), true))
4785 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4786 HOST_WIDE_INT low
= val
& 0xfff;
4787 HOST_WIDE_INT high
= val
- low
;
4790 machine_mode xmode
= GET_MODE (x
);
4792 /* In ILP32, xmode can be either DImode or SImode. */
4793 gcc_assert (xmode
== DImode
|| xmode
== SImode
);
4795 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4796 BLKmode alignment. */
4797 if (GET_MODE_SIZE (mode
) == 0)
4800 offs
= low
% GET_MODE_SIZE (mode
);
4802 /* Align misaligned offset by adjusting high part to compensate. */
4805 if (aarch64_uimm12_shift (high
+ offs
))
4814 offs
= GET_MODE_SIZE (mode
) - offs
;
4816 high
= high
+ (low
& 0x1000) - offs
;
4821 /* Check for overflow. */
4822 if (high
+ low
!= val
)
4825 cst
= GEN_INT (high
);
4826 if (!aarch64_uimm12_shift (high
))
4827 cst
= force_const_mem (xmode
, cst
);
4829 /* Reload high part into base reg, leaving the low part
4830 in the mem instruction.
4831 Note that replacing this gen_rtx_PLUS with plus_constant is
4832 wrong in this case because we rely on the
4833 (plus (plus reg c1) c2) structure being preserved so that
4834 XEXP (*p, 0) in push_reload below uses the correct term. */
4835 x
= gen_rtx_PLUS (xmode
,
4836 gen_rtx_PLUS (xmode
, XEXP (x
, 0), cst
),
4839 push_reload (XEXP (x
, 0), NULL_RTX
, &XEXP (x
, 0), NULL
,
4840 BASE_REG_CLASS
, xmode
, VOIDmode
, 0, 0,
4841 opnum
, (enum reload_type
) type
);
4849 /* Return the reload icode required for a constant pool in mode. */
4850 static enum insn_code
4851 aarch64_constant_pool_reload_icode (machine_mode mode
)
4856 return CODE_FOR_aarch64_reload_movcpsfdi
;
4859 return CODE_FOR_aarch64_reload_movcpdfdi
;
4862 return CODE_FOR_aarch64_reload_movcptfdi
;
4865 return CODE_FOR_aarch64_reload_movcpv8qidi
;
4868 return CODE_FOR_aarch64_reload_movcpv16qidi
;
4871 return CODE_FOR_aarch64_reload_movcpv4hidi
;
4874 return CODE_FOR_aarch64_reload_movcpv8hidi
;
4877 return CODE_FOR_aarch64_reload_movcpv2sidi
;
4880 return CODE_FOR_aarch64_reload_movcpv4sidi
;
4883 return CODE_FOR_aarch64_reload_movcpv2didi
;
4886 return CODE_FOR_aarch64_reload_movcpv2dfdi
;
4895 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED
, rtx x
,
4898 secondary_reload_info
*sri
)
4901 /* If we have to disable direct literal pool loads and stores because the
4902 function is too big, then we need a scratch register. */
4903 if (MEM_P (x
) && GET_CODE (x
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x
)
4904 && (SCALAR_FLOAT_MODE_P (GET_MODE (x
))
4905 || targetm
.vector_mode_supported_p (GET_MODE (x
)))
4906 && aarch64_nopcrelative_literal_loads
)
4908 sri
->icode
= aarch64_constant_pool_reload_icode (mode
);
4912 /* Without the TARGET_SIMD instructions we cannot move a Q register
4913 to a Q register directly. We need a scratch. */
4914 if (REG_P (x
) && (mode
== TFmode
|| mode
== TImode
) && mode
== GET_MODE (x
)
4915 && FP_REGNUM_P (REGNO (x
)) && !TARGET_SIMD
4916 && reg_class_subset_p (rclass
, FP_REGS
))
4919 sri
->icode
= CODE_FOR_aarch64_reload_movtf
;
4920 else if (mode
== TImode
)
4921 sri
->icode
= CODE_FOR_aarch64_reload_movti
;
4925 /* A TFmode or TImode memory access should be handled via an FP_REGS
4926 because AArch64 has richer addressing modes for LDR/STR instructions
4927 than LDP/STP instructions. */
4928 if (TARGET_FLOAT
&& rclass
== GENERAL_REGS
4929 && GET_MODE_SIZE (mode
) == 16 && MEM_P (x
))
4932 if (rclass
== FP_REGS
&& (mode
== TImode
|| mode
== TFmode
) && CONSTANT_P(x
))
4933 return GENERAL_REGS
;
4939 aarch64_can_eliminate (const int from
, const int to
)
4941 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4942 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4944 if (frame_pointer_needed
)
4946 if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4948 if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4950 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
4951 && !cfun
->calls_alloca
)
4953 if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4960 /* If we decided that we didn't need a leaf frame pointer but then used
4961 LR in the function, then we'll want a frame pointer after all, so
4962 prevent this elimination to ensure a frame pointer is used. */
4963 if (to
== STACK_POINTER_REGNUM
4964 && flag_omit_leaf_frame_pointer
4965 && df_regs_ever_live_p (LR_REGNUM
))
4973 aarch64_initial_elimination_offset (unsigned from
, unsigned to
)
4975 aarch64_layout_frame ();
4977 if (to
== HARD_FRAME_POINTER_REGNUM
)
4979 if (from
== ARG_POINTER_REGNUM
)
4980 return cfun
->machine
->frame
.frame_size
- crtl
->outgoing_args_size
;
4982 if (from
== FRAME_POINTER_REGNUM
)
4983 return (cfun
->machine
->frame
.hard_fp_offset
4984 - cfun
->machine
->frame
.saved_varargs_size
);
4987 if (to
== STACK_POINTER_REGNUM
)
4989 if (from
== FRAME_POINTER_REGNUM
)
4990 return (cfun
->machine
->frame
.frame_size
4991 - cfun
->machine
->frame
.saved_varargs_size
);
4994 return cfun
->machine
->frame
.frame_size
;
4997 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
5001 aarch64_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
5005 return get_hard_reg_initial_val (Pmode
, LR_REGNUM
);
5010 aarch64_asm_trampoline_template (FILE *f
)
5014 asm_fprintf (f
, "\tldr\tw%d, .+16\n", IP1_REGNUM
- R0_REGNUM
);
5015 asm_fprintf (f
, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM
- R0_REGNUM
);
5019 asm_fprintf (f
, "\tldr\t%s, .+16\n", reg_names
[IP1_REGNUM
]);
5020 asm_fprintf (f
, "\tldr\t%s, .+20\n", reg_names
[STATIC_CHAIN_REGNUM
]);
5022 asm_fprintf (f
, "\tbr\t%s\n", reg_names
[IP1_REGNUM
]);
5023 assemble_aligned_integer (4, const0_rtx
);
5024 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5025 assemble_aligned_integer (POINTER_BYTES
, const0_rtx
);
5029 aarch64_trampoline_init (rtx m_tramp
, tree fndecl
, rtx chain_value
)
5031 rtx fnaddr
, mem
, a_tramp
;
5032 const int tramp_code_sz
= 16;
5034 /* Don't need to copy the trailing D-words, we fill those in below. */
5035 emit_block_move (m_tramp
, assemble_trampoline_template (),
5036 GEN_INT (tramp_code_sz
), BLOCK_OP_NORMAL
);
5037 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
);
5038 fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5039 if (GET_MODE (fnaddr
) != ptr_mode
)
5040 fnaddr
= convert_memory_address (ptr_mode
, fnaddr
);
5041 emit_move_insn (mem
, fnaddr
);
5043 mem
= adjust_address (m_tramp
, ptr_mode
, tramp_code_sz
+ POINTER_BYTES
);
5044 emit_move_insn (mem
, chain_value
);
5046 /* XXX We should really define a "clear_cache" pattern and use
5047 gen_clear_cache(). */
5048 a_tramp
= XEXP (m_tramp
, 0);
5049 emit_library_call (gen_rtx_SYMBOL_REF (Pmode
, "__clear_cache"),
5050 LCT_NORMAL
, VOIDmode
, 2, a_tramp
, ptr_mode
,
5051 plus_constant (ptr_mode
, a_tramp
, TRAMPOLINE_SIZE
),
5055 static unsigned char
5056 aarch64_class_max_nregs (reg_class_t regclass
, machine_mode mode
)
5060 case CALLER_SAVE_REGS
:
5067 aarch64_vector_mode_p (mode
)
5068 ? (GET_MODE_SIZE (mode
) + UNITS_PER_VREG
- 1) / UNITS_PER_VREG
5069 : (GET_MODE_SIZE (mode
) + UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
5083 aarch64_preferred_reload_class (rtx x
, reg_class_t regclass
)
5085 if (regclass
== POINTER_REGS
)
5086 return GENERAL_REGS
;
5088 if (regclass
== STACK_REG
)
5091 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x
)), POINTER_REGS
))
5097 /* If it's an integer immediate that MOVI can't handle, then
5098 FP_REGS is not an option, so we return NO_REGS instead. */
5099 if (CONST_INT_P (x
) && reg_class_subset_p (regclass
, FP_REGS
)
5100 && !aarch64_simd_imm_scalar_p (x
, GET_MODE (x
)))
5103 /* Register eliminiation can result in a request for
5104 SP+constant->FP_REGS. We cannot support such operations which
5105 use SP as source and an FP_REG as destination, so reject out
5107 if (! reg_class_subset_p (regclass
, GENERAL_REGS
) && GET_CODE (x
) == PLUS
)
5109 rtx lhs
= XEXP (x
, 0);
5111 /* Look through a possible SUBREG introduced by ILP32. */
5112 if (GET_CODE (lhs
) == SUBREG
)
5113 lhs
= SUBREG_REG (lhs
);
5115 gcc_assert (REG_P (lhs
));
5116 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs
)),
5125 aarch64_asm_output_labelref (FILE* f
, const char *name
)
5127 asm_fprintf (f
, "%U%s", name
);
5131 aarch64_elf_asm_constructor (rtx symbol
, int priority
)
5133 if (priority
== DEFAULT_INIT_PRIORITY
)
5134 default_ctor_section_asm_out_constructor (symbol
, priority
);
5139 snprintf (buf
, sizeof (buf
), ".init_array.%.5u", priority
);
5140 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5141 switch_to_section (s
);
5142 assemble_align (POINTER_SIZE
);
5143 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5148 aarch64_elf_asm_destructor (rtx symbol
, int priority
)
5150 if (priority
== DEFAULT_INIT_PRIORITY
)
5151 default_dtor_section_asm_out_destructor (symbol
, priority
);
5156 snprintf (buf
, sizeof (buf
), ".fini_array.%.5u", priority
);
5157 s
= get_section (buf
, SECTION_WRITE
, NULL
);
5158 switch_to_section (s
);
5159 assemble_align (POINTER_SIZE
);
5160 assemble_aligned_integer (POINTER_BYTES
, symbol
);
5165 aarch64_output_casesi (rtx
*operands
)
5169 rtx diff_vec
= PATTERN (NEXT_INSN (as_a
<rtx_insn
*> (operands
[2])));
5171 static const char *const patterns
[4][2] =
5174 "ldrb\t%w3, [%0,%w1,uxtw]",
5175 "add\t%3, %4, %w3, sxtb #2"
5178 "ldrh\t%w3, [%0,%w1,uxtw #1]",
5179 "add\t%3, %4, %w3, sxth #2"
5182 "ldr\t%w3, [%0,%w1,uxtw #2]",
5183 "add\t%3, %4, %w3, sxtw #2"
5185 /* We assume that DImode is only generated when not optimizing and
5186 that we don't really need 64-bit address offsets. That would
5187 imply an object file with 8GB of code in a single function! */
5189 "ldr\t%w3, [%0,%w1,uxtw #2]",
5190 "add\t%3, %4, %w3, sxtw #2"
5194 gcc_assert (GET_CODE (diff_vec
) == ADDR_DIFF_VEC
);
5196 index
= exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec
)));
5198 gcc_assert (index
>= 0 && index
<= 3);
5200 /* Need to implement table size reduction, by chaning the code below. */
5201 output_asm_insn (patterns
[index
][0], operands
);
5202 ASM_GENERATE_INTERNAL_LABEL (label
, "Lrtx", CODE_LABEL_NUMBER (operands
[2]));
5203 snprintf (buf
, sizeof (buf
),
5204 "adr\t%%4, %s", targetm
.strip_name_encoding (label
));
5205 output_asm_insn (buf
, operands
);
5206 output_asm_insn (patterns
[index
][1], operands
);
5207 output_asm_insn ("br\t%3", operands
);
5208 assemble_label (asm_out_file
, label
);
5213 /* Return size in bits of an arithmetic operand which is shifted/scaled and
5214 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
5218 aarch64_uxt_size (int shift
, HOST_WIDE_INT mask
)
5220 if (shift
>= 0 && shift
<= 3)
5223 for (size
= 8; size
<= 32; size
*= 2)
5225 HOST_WIDE_INT bits
= ((HOST_WIDE_INT
)1U << size
) - 1;
5226 if (mask
== bits
<< shift
)
5233 /* Constant pools are per function only when PC relative
5234 literal loads are true or we are in the large memory
5238 aarch64_can_use_per_function_literal_pools_p (void)
5240 return (!aarch64_nopcrelative_literal_loads
5241 || aarch64_cmodel
== AARCH64_CMODEL_LARGE
);
5245 aarch64_use_blocks_for_constant_p (machine_mode
, const_rtx
)
5247 /* We can't use blocks for constants when we're using a per-function
5249 return !aarch64_can_use_per_function_literal_pools_p ();
5252 /* Select appropriate section for constants depending
5253 on where we place literal pools. */
5256 aarch64_select_rtx_section (machine_mode mode
,
5258 unsigned HOST_WIDE_INT align
)
5260 if (aarch64_can_use_per_function_literal_pools_p ())
5261 return function_section (current_function_decl
);
5263 return default_elf_select_rtx_section (mode
, x
, align
);
5268 /* Helper function for rtx cost calculation. Strip a shift expression
5269 from X. Returns the inner operand if successful, or the original
5270 expression on failure. */
5272 aarch64_strip_shift (rtx x
)
5276 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
5277 we can convert both to ROR during final output. */
5278 if ((GET_CODE (op
) == ASHIFT
5279 || GET_CODE (op
) == ASHIFTRT
5280 || GET_CODE (op
) == LSHIFTRT
5281 || GET_CODE (op
) == ROTATERT
5282 || GET_CODE (op
) == ROTATE
)
5283 && CONST_INT_P (XEXP (op
, 1)))
5284 return XEXP (op
, 0);
5286 if (GET_CODE (op
) == MULT
5287 && CONST_INT_P (XEXP (op
, 1))
5288 && ((unsigned) exact_log2 (INTVAL (XEXP (op
, 1)))) < 64)
5289 return XEXP (op
, 0);
5294 /* Helper function for rtx cost calculation. Strip an extend
5295 expression from X. Returns the inner operand if successful, or the
5296 original expression on failure. We deal with a number of possible
5297 canonicalization variations here. */
5299 aarch64_strip_extend (rtx x
)
5303 /* Zero and sign extraction of a widened value. */
5304 if ((GET_CODE (op
) == ZERO_EXTRACT
|| GET_CODE (op
) == SIGN_EXTRACT
)
5305 && XEXP (op
, 2) == const0_rtx
5306 && GET_CODE (XEXP (op
, 0)) == MULT
5307 && aarch64_is_extend_from_extract (GET_MODE (op
), XEXP (XEXP (op
, 0), 1),
5309 return XEXP (XEXP (op
, 0), 0);
5311 /* It can also be represented (for zero-extend) as an AND with an
5313 if (GET_CODE (op
) == AND
5314 && GET_CODE (XEXP (op
, 0)) == MULT
5315 && CONST_INT_P (XEXP (XEXP (op
, 0), 1))
5316 && CONST_INT_P (XEXP (op
, 1))
5317 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op
, 0), 1))),
5318 INTVAL (XEXP (op
, 1))) != 0)
5319 return XEXP (XEXP (op
, 0), 0);
5321 /* Now handle extended register, as this may also have an optional
5322 left shift by 1..4. */
5323 if (GET_CODE (op
) == ASHIFT
5324 && CONST_INT_P (XEXP (op
, 1))
5325 && ((unsigned HOST_WIDE_INT
) INTVAL (XEXP (op
, 1))) <= 4)
5328 if (GET_CODE (op
) == ZERO_EXTEND
5329 || GET_CODE (op
) == SIGN_EXTEND
)
5338 /* Return true iff CODE is a shift supported in combination
5339 with arithmetic instructions. */
5342 aarch64_shift_p (enum rtx_code code
)
5344 return code
== ASHIFT
|| code
== ASHIFTRT
|| code
== LSHIFTRT
;
5347 /* Helper function for rtx cost calculation. Calculate the cost of
5348 a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
5349 Return the calculated cost of the expression, recursing manually in to
5350 operands where needed. */
5353 aarch64_rtx_mult_cost (rtx x
, enum rtx_code code
, int outer
, bool speed
)
5356 const struct cpu_cost_table
*extra_cost
5357 = aarch64_tune_params
.insn_extra_cost
;
5359 bool compound_p
= (outer
== PLUS
|| outer
== MINUS
);
5360 machine_mode mode
= GET_MODE (x
);
5362 gcc_checking_assert (code
== MULT
);
5367 if (VECTOR_MODE_P (mode
))
5368 mode
= GET_MODE_INNER (mode
);
5370 /* Integer multiply/fma. */
5371 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5373 /* The multiply will be canonicalized as a shift, cost it as such. */
5374 if (aarch64_shift_p (GET_CODE (x
))
5375 || (CONST_INT_P (op1
)
5376 && exact_log2 (INTVAL (op1
)) > 0))
5378 bool is_extend
= GET_CODE (op0
) == ZERO_EXTEND
5379 || GET_CODE (op0
) == SIGN_EXTEND
;
5385 /* ARITH + shift-by-register. */
5386 cost
+= extra_cost
->alu
.arith_shift_reg
;
5388 /* ARITH + extended register. We don't have a cost field
5389 for ARITH+EXTEND+SHIFT, so use extend_arith here. */
5390 cost
+= extra_cost
->alu
.extend_arith
;
5392 /* ARITH + shift-by-immediate. */
5393 cost
+= extra_cost
->alu
.arith_shift
;
5396 /* LSL (immediate). */
5397 cost
+= extra_cost
->alu
.shift
;
5400 /* Strip extends as we will have costed them in the case above. */
5402 op0
= aarch64_strip_extend (op0
);
5404 cost
+= rtx_cost (op0
, VOIDmode
, code
, 0, speed
);
5409 /* MNEG or [US]MNEGL. Extract the NEG operand and indicate that it's a
5410 compound and let the below cases handle it. After all, MNEG is a
5411 special-case alias of MSUB. */
5412 if (GET_CODE (op0
) == NEG
)
5414 op0
= XEXP (op0
, 0);
5418 /* Integer multiplies or FMAs have zero/sign extending variants. */
5419 if ((GET_CODE (op0
) == ZERO_EXTEND
5420 && GET_CODE (op1
) == ZERO_EXTEND
)
5421 || (GET_CODE (op0
) == SIGN_EXTEND
5422 && GET_CODE (op1
) == SIGN_EXTEND
))
5424 cost
+= rtx_cost (XEXP (op0
, 0), VOIDmode
, MULT
, 0, speed
);
5425 cost
+= rtx_cost (XEXP (op1
, 0), VOIDmode
, MULT
, 1, speed
);
5430 /* SMADDL/UMADDL/UMSUBL/SMSUBL. */
5431 cost
+= extra_cost
->mult
[0].extend_add
;
5433 /* MUL/SMULL/UMULL. */
5434 cost
+= extra_cost
->mult
[0].extend
;
5440 /* This is either an integer multiply or a MADD. In both cases
5441 we want to recurse and cost the operands. */
5442 cost
+= rtx_cost (op0
, mode
, MULT
, 0, speed
);
5443 cost
+= rtx_cost (op1
, mode
, MULT
, 1, speed
);
5449 cost
+= extra_cost
->mult
[mode
== DImode
].add
;
5452 cost
+= extra_cost
->mult
[mode
== DImode
].simple
;
5461 /* Floating-point FMA/FMUL can also support negations of the
5462 operands, unless the rounding mode is upward or downward in
5463 which case FNMUL is different than FMUL with operand negation. */
5464 bool neg0
= GET_CODE (op0
) == NEG
;
5465 bool neg1
= GET_CODE (op1
) == NEG
;
5466 if (compound_p
|| !flag_rounding_math
|| (neg0
&& neg1
))
5469 op0
= XEXP (op0
, 0);
5471 op1
= XEXP (op1
, 0);
5475 /* FMADD/FNMADD/FNMSUB/FMSUB. */
5476 cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
5479 cost
+= extra_cost
->fp
[mode
== DFmode
].mult
;
5482 cost
+= rtx_cost (op0
, mode
, MULT
, 0, speed
);
5483 cost
+= rtx_cost (op1
, mode
, MULT
, 1, speed
);
5489 aarch64_address_cost (rtx x
,
5491 addr_space_t as ATTRIBUTE_UNUSED
,
5494 enum rtx_code c
= GET_CODE (x
);
5495 const struct cpu_addrcost_table
*addr_cost
= aarch64_tune_params
.addr_cost
;
5496 struct aarch64_address_info info
;
5500 if (!aarch64_classify_address (&info
, x
, mode
, c
, false))
5502 if (GET_CODE (x
) == CONST
|| GET_CODE (x
) == SYMBOL_REF
)
5504 /* This is a CONST or SYMBOL ref which will be split
5505 in a different way depending on the code model in use.
5506 Cost it through the generic infrastructure. */
5507 int cost_symbol_ref
= rtx_cost (x
, Pmode
, MEM
, 1, speed
);
5508 /* Divide through by the cost of one instruction to
5509 bring it to the same units as the address costs. */
5510 cost_symbol_ref
/= COSTS_N_INSNS (1);
5511 /* The cost is then the cost of preparing the address,
5512 followed by an immediate (possibly 0) offset. */
5513 return cost_symbol_ref
+ addr_cost
->imm_offset
;
5517 /* This is most likely a jump table from a case
5519 return addr_cost
->register_offset
;
5525 case ADDRESS_LO_SUM
:
5526 case ADDRESS_SYMBOLIC
:
5527 case ADDRESS_REG_IMM
:
5528 cost
+= addr_cost
->imm_offset
;
5531 case ADDRESS_REG_WB
:
5532 if (c
== PRE_INC
|| c
== PRE_DEC
|| c
== PRE_MODIFY
)
5533 cost
+= addr_cost
->pre_modify
;
5534 else if (c
== POST_INC
|| c
== POST_DEC
|| c
== POST_MODIFY
)
5535 cost
+= addr_cost
->post_modify
;
5541 case ADDRESS_REG_REG
:
5542 cost
+= addr_cost
->register_offset
;
5545 case ADDRESS_REG_SXTW
:
5546 cost
+= addr_cost
->register_sextend
;
5549 case ADDRESS_REG_UXTW
:
5550 cost
+= addr_cost
->register_zextend
;
5560 /* For the sake of calculating the cost of the shifted register
5561 component, we can treat same sized modes in the same way. */
5562 switch (GET_MODE_BITSIZE (mode
))
5565 cost
+= addr_cost
->addr_scale_costs
.hi
;
5569 cost
+= addr_cost
->addr_scale_costs
.si
;
5573 cost
+= addr_cost
->addr_scale_costs
.di
;
5576 /* We can't tell, or this is a 128-bit vector. */
5578 cost
+= addr_cost
->addr_scale_costs
.ti
;
5586 /* Return the cost of a branch. If SPEED_P is true then the compiler is
5587 optimizing for speed. If PREDICTABLE_P is true then the branch is predicted
5591 aarch64_branch_cost (bool speed_p
, bool predictable_p
)
5593 /* When optimizing for speed, use the cost of unpredictable branches. */
5594 const struct cpu_branch_cost
*branch_costs
=
5595 aarch64_tune_params
.branch_costs
;
5597 if (!speed_p
|| predictable_p
)
5598 return branch_costs
->predictable
;
5600 return branch_costs
->unpredictable
;
5603 /* Return true if the RTX X in mode MODE is a zero or sign extract
5604 usable in an ADD or SUB (extended register) instruction. */
5606 aarch64_rtx_arith_op_extract_p (rtx x
, machine_mode mode
)
5608 /* Catch add with a sign extract.
5609 This is add_<optab><mode>_multp2. */
5610 if (GET_CODE (x
) == SIGN_EXTRACT
5611 || GET_CODE (x
) == ZERO_EXTRACT
)
5613 rtx op0
= XEXP (x
, 0);
5614 rtx op1
= XEXP (x
, 1);
5615 rtx op2
= XEXP (x
, 2);
5617 if (GET_CODE (op0
) == MULT
5618 && CONST_INT_P (op1
)
5619 && op2
== const0_rtx
5620 && CONST_INT_P (XEXP (op0
, 1))
5621 && aarch64_is_extend_from_extract (mode
,
5628 /* The simple case <ARITH>, XD, XN, XM, [us]xt.
5630 else if (GET_CODE (x
) == SIGN_EXTEND
5631 || GET_CODE (x
) == ZERO_EXTEND
)
5632 return REG_P (XEXP (x
, 0));
5638 aarch64_frint_unspec_p (unsigned int u
)
5656 /* Return true iff X is an rtx that will match an extr instruction
5657 i.e. as described in the *extr<mode>5_insn family of patterns.
5658 OP0 and OP1 will be set to the operands of the shifts involved
5659 on success and will be NULL_RTX otherwise. */
5662 aarch64_extr_rtx_p (rtx x
, rtx
*res_op0
, rtx
*res_op1
)
5665 machine_mode mode
= GET_MODE (x
);
5667 *res_op0
= NULL_RTX
;
5668 *res_op1
= NULL_RTX
;
5670 if (GET_CODE (x
) != IOR
)
5676 if ((GET_CODE (op0
) == ASHIFT
&& GET_CODE (op1
) == LSHIFTRT
)
5677 || (GET_CODE (op1
) == ASHIFT
&& GET_CODE (op0
) == LSHIFTRT
))
5679 /* Canonicalise locally to ashift in op0, lshiftrt in op1. */
5680 if (GET_CODE (op1
) == ASHIFT
)
5681 std::swap (op0
, op1
);
5683 if (!CONST_INT_P (XEXP (op0
, 1)) || !CONST_INT_P (XEXP (op1
, 1)))
5686 unsigned HOST_WIDE_INT shft_amnt_0
= UINTVAL (XEXP (op0
, 1));
5687 unsigned HOST_WIDE_INT shft_amnt_1
= UINTVAL (XEXP (op1
, 1));
5689 if (shft_amnt_0
< GET_MODE_BITSIZE (mode
)
5690 && shft_amnt_0
+ shft_amnt_1
== GET_MODE_BITSIZE (mode
))
5692 *res_op0
= XEXP (op0
, 0);
5693 *res_op1
= XEXP (op1
, 0);
5701 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
5702 storing it in *COST. Result is true if the total cost of the operation
5703 has now been calculated. */
5705 aarch64_if_then_else_costs (rtx op0
, rtx op1
, rtx op2
, int *cost
, bool speed
)
5709 enum rtx_code cmpcode
;
5711 if (COMPARISON_P (op0
))
5713 inner
= XEXP (op0
, 0);
5714 comparator
= XEXP (op0
, 1);
5715 cmpcode
= GET_CODE (op0
);
5720 comparator
= const0_rtx
;
5724 if (GET_CODE (op1
) == PC
|| GET_CODE (op2
) == PC
)
5726 /* Conditional branch. */
5727 if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5731 if (cmpcode
== NE
|| cmpcode
== EQ
)
5733 if (comparator
== const0_rtx
)
5735 /* TBZ/TBNZ/CBZ/CBNZ. */
5736 if (GET_CODE (inner
) == ZERO_EXTRACT
)
5738 *cost
+= rtx_cost (XEXP (inner
, 0), VOIDmode
,
5739 ZERO_EXTRACT
, 0, speed
);
5742 *cost
+= rtx_cost (inner
, VOIDmode
, cmpcode
, 0, speed
);
5747 else if (cmpcode
== LT
|| cmpcode
== GE
)
5750 if (comparator
== const0_rtx
)
5755 else if (GET_MODE_CLASS (GET_MODE (inner
)) == MODE_CC
)
5757 /* It's a conditional operation based on the status flags,
5758 so it must be some flavor of CSEL. */
5760 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5761 if (GET_CODE (op1
) == NEG
5762 || GET_CODE (op1
) == NOT
5763 || (GET_CODE (op1
) == PLUS
&& XEXP (op1
, 1) == const1_rtx
))
5764 op1
= XEXP (op1
, 0);
5766 *cost
+= rtx_cost (op1
, VOIDmode
, IF_THEN_ELSE
, 1, speed
);
5767 *cost
+= rtx_cost (op2
, VOIDmode
, IF_THEN_ELSE
, 2, speed
);
5771 /* We don't know what this is, cost all operands. */
5775 /* Calculate the cost of calculating X, storing it in *COST. Result
5776 is true if the total cost of the operation has now been calculated. */
5778 aarch64_rtx_costs (rtx x
, machine_mode mode
, int outer ATTRIBUTE_UNUSED
,
5779 int param ATTRIBUTE_UNUSED
, int *cost
, bool speed
)
5782 const struct cpu_cost_table
*extra_cost
5783 = aarch64_tune_params
.insn_extra_cost
;
5784 int code
= GET_CODE (x
);
5786 /* By default, assume that everything has equivalent cost to the
5787 cheapest instruction. Any additional costs are applied as a delta
5788 above this default. */
5789 *cost
= COSTS_N_INSNS (1);
5794 /* The cost depends entirely on the operands to SET. */
5799 switch (GET_CODE (op0
))
5804 rtx address
= XEXP (op0
, 0);
5805 if (VECTOR_MODE_P (mode
))
5806 *cost
+= extra_cost
->ldst
.storev
;
5807 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
5808 *cost
+= extra_cost
->ldst
.store
;
5809 else if (mode
== SFmode
)
5810 *cost
+= extra_cost
->ldst
.storef
;
5811 else if (mode
== DFmode
)
5812 *cost
+= extra_cost
->ldst
.stored
;
5815 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5819 *cost
+= rtx_cost (op1
, mode
, SET
, 1, speed
);
5823 if (! REG_P (SUBREG_REG (op0
)))
5824 *cost
+= rtx_cost (SUBREG_REG (op0
), VOIDmode
, SET
, 0, speed
);
5828 /* The cost is one per vector-register copied. */
5829 if (VECTOR_MODE_P (GET_MODE (op0
)) && REG_P (op1
))
5831 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5832 / GET_MODE_SIZE (V4SImode
);
5833 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5835 /* const0_rtx is in general free, but we will use an
5836 instruction to set a register to 0. */
5837 else if (REG_P (op1
) || op1
== const0_rtx
)
5839 /* The cost is 1 per register copied. */
5840 int n_minus_1
= (GET_MODE_SIZE (GET_MODE (op0
)) - 1)
5842 *cost
= COSTS_N_INSNS (n_minus_1
+ 1);
5845 /* Cost is just the cost of the RHS of the set. */
5846 *cost
+= rtx_cost (op1
, mode
, SET
, 1, speed
);
5851 /* Bit-field insertion. Strip any redundant widening of
5852 the RHS to meet the width of the target. */
5853 if (GET_CODE (op1
) == SUBREG
)
5854 op1
= SUBREG_REG (op1
);
5855 if ((GET_CODE (op1
) == ZERO_EXTEND
5856 || GET_CODE (op1
) == SIGN_EXTEND
)
5857 && CONST_INT_P (XEXP (op0
, 1))
5858 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1
, 0)))
5859 >= INTVAL (XEXP (op0
, 1))))
5860 op1
= XEXP (op1
, 0);
5862 if (CONST_INT_P (op1
))
5864 /* MOV immediate is assumed to always be cheap. */
5865 *cost
= COSTS_N_INSNS (1);
5871 *cost
+= extra_cost
->alu
.bfi
;
5872 *cost
+= rtx_cost (op1
, VOIDmode
, (enum rtx_code
) code
, 1, speed
);
5878 /* We can't make sense of this, assume default cost. */
5879 *cost
= COSTS_N_INSNS (1);
5885 /* If an instruction can incorporate a constant within the
5886 instruction, the instruction's expression avoids calling
5887 rtx_cost() on the constant. If rtx_cost() is called on a
5888 constant, then it is usually because the constant must be
5889 moved into a register by one or more instructions.
5891 The exception is constant 0, which can be expressed
5892 as XZR/WZR and is therefore free. The exception to this is
5893 if we have (set (reg) (const0_rtx)) in which case we must cost
5894 the move. However, we can catch that when we cost the SET, so
5895 we don't need to consider that here. */
5896 if (x
== const0_rtx
)
5900 /* To an approximation, building any other constant is
5901 proportionally expensive to the number of instructions
5902 required to build that constant. This is true whether we
5903 are compiling for SPEED or otherwise. */
5904 *cost
= COSTS_N_INSNS (aarch64_internal_mov_immediate
5905 (NULL_RTX
, x
, false, mode
));
5912 /* mov[df,sf]_aarch64. */
5913 if (aarch64_float_const_representable_p (x
))
5914 /* FMOV (scalar immediate). */
5915 *cost
+= extra_cost
->fp
[mode
== DFmode
].fpconst
;
5916 else if (!aarch64_float_const_zero_rtx_p (x
))
5918 /* This will be a load from memory. */
5920 *cost
+= extra_cost
->ldst
.loadd
;
5922 *cost
+= extra_cost
->ldst
.loadf
;
5925 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5926 or MOV v0.s[0], wzr - neither of which are modeled by the
5927 cost tables. Just use the default cost. */
5937 /* For loads we want the base cost of a load, plus an
5938 approximation for the additional cost of the addressing
5940 rtx address
= XEXP (x
, 0);
5941 if (VECTOR_MODE_P (mode
))
5942 *cost
+= extra_cost
->ldst
.loadv
;
5943 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
5944 *cost
+= extra_cost
->ldst
.load
;
5945 else if (mode
== SFmode
)
5946 *cost
+= extra_cost
->ldst
.loadf
;
5947 else if (mode
== DFmode
)
5948 *cost
+= extra_cost
->ldst
.loadd
;
5951 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
5960 if (VECTOR_MODE_P (mode
))
5965 *cost
+= extra_cost
->vect
.alu
;
5970 if (GET_MODE_CLASS (mode
) == MODE_INT
)
5972 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
5973 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
5976 *cost
+= rtx_cost (XEXP (op0
, 0), VOIDmode
, NEG
, 0, speed
);
5980 /* Cost this as SUB wzr, X. */
5981 op0
= CONST0_RTX (mode
);
5986 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
5988 /* Support (neg(fma...)) as a single instruction only if
5989 sign of zeros is unimportant. This matches the decision
5990 making in aarch64.md. */
5991 if (GET_CODE (op0
) == FMA
&& !HONOR_SIGNED_ZEROS (GET_MODE (op0
)))
5994 *cost
= rtx_cost (op0
, mode
, NEG
, 0, speed
);
5997 if (GET_CODE (op0
) == MULT
)
6000 *cost
= rtx_cost (op0
, mode
, NEG
, 0, speed
);
6005 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6015 if (VECTOR_MODE_P (mode
))
6016 *cost
+= extra_cost
->vect
.alu
;
6018 *cost
+= extra_cost
->alu
.clz
;
6027 if (op1
== const0_rtx
6028 && GET_CODE (op0
) == AND
)
6031 mode
= GET_MODE (op0
);
6035 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
6037 /* TODO: A write to the CC flags possibly costs extra, this
6038 needs encoding in the cost tables. */
6040 /* CC_ZESWPmode supports zero extend for free. */
6041 if (mode
== CC_ZESWPmode
&& GET_CODE (op0
) == ZERO_EXTEND
)
6042 op0
= XEXP (op0
, 0);
6044 mode
= GET_MODE (op0
);
6046 if (GET_CODE (op0
) == AND
)
6052 if (GET_CODE (op0
) == PLUS
)
6054 /* ADDS (and CMN alias). */
6059 if (GET_CODE (op0
) == MINUS
)
6066 if (GET_CODE (op1
) == NEG
)
6070 *cost
+= extra_cost
->alu
.arith
;
6072 *cost
+= rtx_cost (op0
, mode
, COMPARE
, 0, speed
);
6073 *cost
+= rtx_cost (XEXP (op1
, 0), mode
, NEG
, 1, speed
);
6079 Compare can freely swap the order of operands, and
6080 canonicalization puts the more complex operation first.
6081 But the integer MINUS logic expects the shift/extend
6082 operation in op1. */
6084 || (GET_CODE (op0
) == SUBREG
&& REG_P (SUBREG_REG (op0
)))))
6092 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_FLOAT
)
6096 *cost
+= extra_cost
->fp
[mode
== DFmode
].compare
;
6098 if (CONST_DOUBLE_P (op1
) && aarch64_float_const_zero_rtx_p (op1
))
6100 *cost
+= rtx_cost (op0
, VOIDmode
, COMPARE
, 0, speed
);
6101 /* FCMP supports constant 0.0 for no extra cost. */
6107 if (VECTOR_MODE_P (mode
))
6109 /* Vector compare. */
6111 *cost
+= extra_cost
->vect
.alu
;
6113 if (aarch64_float_const_zero_rtx_p (op1
))
6115 /* Vector cm (eq|ge|gt|lt|le) supports constant 0.0 for no extra
6129 *cost
+= rtx_cost (op0
, mode
, MINUS
, 0, speed
);
6131 /* Detect valid immediates. */
6132 if ((GET_MODE_CLASS (mode
) == MODE_INT
6133 || (GET_MODE_CLASS (mode
) == MODE_CC
6134 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
))
6135 && CONST_INT_P (op1
)
6136 && aarch64_uimm12_shift (INTVAL (op1
)))
6139 /* SUB(S) (immediate). */
6140 *cost
+= extra_cost
->alu
.arith
;
6144 /* Look for SUB (extended register). */
6145 if (aarch64_rtx_arith_op_extract_p (op1
, mode
))
6148 *cost
+= extra_cost
->alu
.extend_arith
;
6150 op1
= aarch64_strip_extend (op1
);
6151 *cost
+= rtx_cost (op1
, VOIDmode
,
6152 (enum rtx_code
) GET_CODE (op1
), 0, speed
);
6156 rtx new_op1
= aarch64_strip_extend (op1
);
6158 /* Cost this as an FMA-alike operation. */
6159 if ((GET_CODE (new_op1
) == MULT
6160 || aarch64_shift_p (GET_CODE (new_op1
)))
6163 *cost
+= aarch64_rtx_mult_cost (new_op1
, MULT
,
6164 (enum rtx_code
) code
,
6169 *cost
+= rtx_cost (new_op1
, VOIDmode
, MINUS
, 1, speed
);
6173 if (VECTOR_MODE_P (mode
))
6176 *cost
+= extra_cost
->vect
.alu
;
6178 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6181 *cost
+= extra_cost
->alu
.arith
;
6183 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6186 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6200 if (GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMPARE
6201 || GET_RTX_CLASS (GET_CODE (op0
)) == RTX_COMM_COMPARE
)
6204 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, PLUS
, 0, speed
);
6205 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed
);
6209 if (GET_MODE_CLASS (mode
) == MODE_INT
6210 && CONST_INT_P (op1
)
6211 && aarch64_uimm12_shift (INTVAL (op1
)))
6213 *cost
+= rtx_cost (op0
, mode
, PLUS
, 0, speed
);
6216 /* ADD (immediate). */
6217 *cost
+= extra_cost
->alu
.arith
;
6221 *cost
+= rtx_cost (op1
, mode
, PLUS
, 1, speed
);
6223 /* Look for ADD (extended register). */
6224 if (aarch64_rtx_arith_op_extract_p (op0
, mode
))
6227 *cost
+= extra_cost
->alu
.extend_arith
;
6229 op0
= aarch64_strip_extend (op0
);
6230 *cost
+= rtx_cost (op0
, VOIDmode
,
6231 (enum rtx_code
) GET_CODE (op0
), 0, speed
);
6235 /* Strip any extend, leave shifts behind as we will
6236 cost them through mult_cost. */
6237 new_op0
= aarch64_strip_extend (op0
);
6239 if (GET_CODE (new_op0
) == MULT
6240 || aarch64_shift_p (GET_CODE (new_op0
)))
6242 *cost
+= aarch64_rtx_mult_cost (new_op0
, MULT
, PLUS
,
6247 *cost
+= rtx_cost (new_op0
, VOIDmode
, PLUS
, 0, speed
);
6251 if (VECTOR_MODE_P (mode
))
6254 *cost
+= extra_cost
->vect
.alu
;
6256 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6259 *cost
+= extra_cost
->alu
.arith
;
6261 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6264 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6271 *cost
= COSTS_N_INSNS (1);
6275 if (VECTOR_MODE_P (mode
))
6276 *cost
+= extra_cost
->vect
.alu
;
6278 *cost
+= extra_cost
->alu
.rev
;
6283 if (aarch_rev16_p (x
))
6285 *cost
= COSTS_N_INSNS (1);
6289 if (VECTOR_MODE_P (mode
))
6290 *cost
+= extra_cost
->vect
.alu
;
6292 *cost
+= extra_cost
->alu
.rev
;
6297 if (aarch64_extr_rtx_p (x
, &op0
, &op1
))
6299 *cost
+= rtx_cost (op0
, mode
, IOR
, 0, speed
);
6300 *cost
+= rtx_cost (op1
, mode
, IOR
, 1, speed
);
6302 *cost
+= extra_cost
->alu
.shift
;
6313 if (VECTOR_MODE_P (mode
))
6316 *cost
+= extra_cost
->vect
.alu
;
6321 && GET_CODE (op0
) == MULT
6322 && CONST_INT_P (XEXP (op0
, 1))
6323 && CONST_INT_P (op1
)
6324 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0
, 1))),
6327 /* This is a UBFM/SBFM. */
6328 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, ZERO_EXTRACT
, 0, speed
);
6330 *cost
+= extra_cost
->alu
.bfx
;
6334 if (GET_MODE_CLASS (mode
) == MODE_INT
)
6336 /* We possibly get the immediate for free, this is not
6338 if (CONST_INT_P (op1
)
6339 && aarch64_bitmask_imm (INTVAL (op1
), mode
))
6341 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
6344 *cost
+= extra_cost
->alu
.logical
;
6352 /* Handle ORN, EON, or BIC. */
6353 if (GET_CODE (op0
) == NOT
)
6354 op0
= XEXP (op0
, 0);
6356 new_op0
= aarch64_strip_shift (op0
);
6358 /* If we had a shift on op0 then this is a logical-shift-
6359 by-register/immediate operation. Otherwise, this is just
6360 a logical operation. */
6365 /* Shift by immediate. */
6366 if (CONST_INT_P (XEXP (op0
, 1)))
6367 *cost
+= extra_cost
->alu
.log_shift
;
6369 *cost
+= extra_cost
->alu
.log_shift_reg
;
6372 *cost
+= extra_cost
->alu
.logical
;
6375 /* In both cases we want to cost both operands. */
6376 *cost
+= rtx_cost (new_op0
, mode
, (enum rtx_code
) code
, 0, speed
);
6377 *cost
+= rtx_cost (op1
, mode
, (enum rtx_code
) code
, 1, speed
);
6386 op0
= aarch64_strip_shift (x
);
6388 if (VECTOR_MODE_P (mode
))
6391 *cost
+= extra_cost
->vect
.alu
;
6395 /* MVN-shifted-reg. */
6398 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
6401 *cost
+= extra_cost
->alu
.log_shift
;
6405 /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
6406 Handle the second form here taking care that 'a' in the above can
6408 else if (GET_CODE (op0
) == XOR
)
6410 rtx newop0
= XEXP (op0
, 0);
6411 rtx newop1
= XEXP (op0
, 1);
6412 rtx op0_stripped
= aarch64_strip_shift (newop0
);
6414 *cost
+= rtx_cost (newop1
, mode
, (enum rtx_code
) code
, 1, speed
);
6415 *cost
+= rtx_cost (op0_stripped
, mode
, XOR
, 0, speed
);
6419 if (op0_stripped
!= newop0
)
6420 *cost
+= extra_cost
->alu
.log_shift
;
6422 *cost
+= extra_cost
->alu
.logical
;
6429 *cost
+= extra_cost
->alu
.logical
;
6436 /* If a value is written in SI mode, then zero extended to DI
6437 mode, the operation will in general be free as a write to
6438 a 'w' register implicitly zeroes the upper bits of an 'x'
6439 register. However, if this is
6441 (set (reg) (zero_extend (reg)))
6443 we must cost the explicit register move. */
6445 && GET_MODE (op0
) == SImode
6448 int op_cost
= rtx_cost (op0
, VOIDmode
, ZERO_EXTEND
, 0, speed
);
6450 if (!op_cost
&& speed
)
6452 *cost
+= extra_cost
->alu
.extend
;
6454 /* Free, the cost is that of the SI mode operation. */
6459 else if (MEM_P (op0
))
6461 /* All loads can zero extend to any size for free. */
6462 *cost
= rtx_cost (op0
, VOIDmode
, ZERO_EXTEND
, param
, speed
);
6468 if (VECTOR_MODE_P (mode
))
6471 *cost
+= extra_cost
->vect
.alu
;
6476 *cost
+= extra_cost
->alu
.extend
;
6482 if (MEM_P (XEXP (x
, 0)))
6487 rtx address
= XEXP (XEXP (x
, 0), 0);
6488 *cost
+= extra_cost
->ldst
.load_sign_extend
;
6491 COSTS_N_INSNS (aarch64_address_cost (address
, mode
,
6499 if (VECTOR_MODE_P (mode
))
6500 *cost
+= extra_cost
->vect
.alu
;
6502 *cost
+= extra_cost
->alu
.extend
;
6510 if (CONST_INT_P (op1
))
6514 if (VECTOR_MODE_P (mode
))
6516 /* Vector shift (immediate). */
6517 *cost
+= extra_cost
->vect
.alu
;
6521 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
6523 *cost
+= extra_cost
->alu
.shift
;
6527 /* We can incorporate zero/sign extend for free. */
6528 if (GET_CODE (op0
) == ZERO_EXTEND
6529 || GET_CODE (op0
) == SIGN_EXTEND
)
6530 op0
= XEXP (op0
, 0);
6532 *cost
+= rtx_cost (op0
, VOIDmode
, ASHIFT
, 0, speed
);
6539 if (VECTOR_MODE_P (mode
))
6541 /* Vector shift (register). */
6542 *cost
+= extra_cost
->vect
.alu
;
6547 *cost
+= extra_cost
->alu
.shift_reg
;
6550 return false; /* All arguments need to be in registers. */
6560 if (CONST_INT_P (op1
))
6562 /* ASR (immediate) and friends. */
6565 if (VECTOR_MODE_P (mode
))
6566 *cost
+= extra_cost
->vect
.alu
;
6568 *cost
+= extra_cost
->alu
.shift
;
6571 *cost
+= rtx_cost (op0
, mode
, (enum rtx_code
) code
, 0, speed
);
6577 /* ASR (register) and friends. */
6580 if (VECTOR_MODE_P (mode
))
6581 *cost
+= extra_cost
->vect
.alu
;
6583 *cost
+= extra_cost
->alu
.shift_reg
;
6585 return false; /* All arguments need to be in registers. */
6590 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
6591 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
)
6595 *cost
+= extra_cost
->ldst
.load
;
6597 else if (aarch64_cmodel
== AARCH64_CMODEL_SMALL
6598 || aarch64_cmodel
== AARCH64_CMODEL_SMALL_PIC
)
6600 /* ADRP, followed by ADD. */
6601 *cost
+= COSTS_N_INSNS (1);
6603 *cost
+= 2 * extra_cost
->alu
.arith
;
6605 else if (aarch64_cmodel
== AARCH64_CMODEL_TINY
6606 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
)
6610 *cost
+= extra_cost
->alu
.arith
;
6615 /* One extra load instruction, after accessing the GOT. */
6616 *cost
+= COSTS_N_INSNS (1);
6618 *cost
+= extra_cost
->ldst
.load
;
6624 /* ADRP/ADD (immediate). */
6626 *cost
+= extra_cost
->alu
.arith
;
6634 if (VECTOR_MODE_P (mode
))
6635 *cost
+= extra_cost
->vect
.alu
;
6637 *cost
+= extra_cost
->alu
.bfx
;
6640 /* We can trust that the immediates used will be correct (there
6641 are no by-register forms), so we need only cost op0. */
6642 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, (enum rtx_code
) code
, 0, speed
);
6646 *cost
+= aarch64_rtx_mult_cost (x
, MULT
, 0, speed
);
6647 /* aarch64_rtx_mult_cost always handles recursion to its
6652 /* We can expand signed mod by power of 2 using a NEGS, two parallel
6653 ANDs and a CSNEG. Assume here that CSNEG is the same as the cost of
6654 an unconditional negate. This case should only ever be reached through
6655 the set_smod_pow2_cheap check in expmed.c. */
6656 if (CONST_INT_P (XEXP (x
, 1))
6657 && exact_log2 (INTVAL (XEXP (x
, 1))) > 0
6658 && (mode
== SImode
|| mode
== DImode
))
6660 /* We expand to 4 instructions. Reset the baseline. */
6661 *cost
= COSTS_N_INSNS (4);
6664 *cost
+= 2 * extra_cost
->alu
.logical
6665 + 2 * extra_cost
->alu
.arith
;
6674 if (VECTOR_MODE_P (mode
))
6675 *cost
+= extra_cost
->vect
.alu
;
6676 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6677 *cost
+= (extra_cost
->mult
[mode
== DImode
].add
6678 + extra_cost
->mult
[mode
== DImode
].idiv
);
6679 else if (mode
== DFmode
)
6680 *cost
+= (extra_cost
->fp
[1].mult
6681 + extra_cost
->fp
[1].div
);
6682 else if (mode
== SFmode
)
6683 *cost
+= (extra_cost
->fp
[0].mult
6684 + extra_cost
->fp
[0].div
);
6686 return false; /* All arguments need to be in registers. */
6693 if (VECTOR_MODE_P (mode
))
6694 *cost
+= extra_cost
->vect
.alu
;
6695 else if (GET_MODE_CLASS (mode
) == MODE_INT
)
6696 /* There is no integer SQRT, so only DIV and UDIV can get
6698 *cost
+= extra_cost
->mult
[mode
== DImode
].idiv
;
6700 *cost
+= extra_cost
->fp
[mode
== DFmode
].div
;
6702 return false; /* All arguments need to be in registers. */
6705 return aarch64_if_then_else_costs (XEXP (x
, 0), XEXP (x
, 1),
6706 XEXP (x
, 2), cost
, speed
);
6719 return false; /* All arguments must be in registers. */
6728 if (VECTOR_MODE_P (mode
))
6729 *cost
+= extra_cost
->vect
.alu
;
6731 *cost
+= extra_cost
->fp
[mode
== DFmode
].fma
;
6734 /* FMSUB, FNMADD, and FNMSUB are free. */
6735 if (GET_CODE (op0
) == NEG
)
6736 op0
= XEXP (op0
, 0);
6738 if (GET_CODE (op2
) == NEG
)
6739 op2
= XEXP (op2
, 0);
6741 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
6742 and the by-element operand as operand 0. */
6743 if (GET_CODE (op1
) == NEG
)
6744 op1
= XEXP (op1
, 0);
6746 /* Catch vector-by-element operations. The by-element operand can
6747 either be (vec_duplicate (vec_select (x))) or just
6748 (vec_select (x)), depending on whether we are multiplying by
6749 a vector or a scalar.
6751 Canonicalization is not very good in these cases, FMA4 will put the
6752 by-element operand as operand 0, FNMA4 will have it as operand 1. */
6753 if (GET_CODE (op0
) == VEC_DUPLICATE
)
6754 op0
= XEXP (op0
, 0);
6755 else if (GET_CODE (op1
) == VEC_DUPLICATE
)
6756 op1
= XEXP (op1
, 0);
6758 if (GET_CODE (op0
) == VEC_SELECT
)
6759 op0
= XEXP (op0
, 0);
6760 else if (GET_CODE (op1
) == VEC_SELECT
)
6761 op1
= XEXP (op1
, 0);
6763 /* If the remaining parameters are not registers,
6764 get the cost to put them into registers. */
6765 *cost
+= rtx_cost (op0
, mode
, FMA
, 0, speed
);
6766 *cost
+= rtx_cost (op1
, mode
, FMA
, 1, speed
);
6767 *cost
+= rtx_cost (op2
, mode
, FMA
, 2, speed
);
6771 case UNSIGNED_FLOAT
:
6773 *cost
+= extra_cost
->fp
[mode
== DFmode
].fromint
;
6779 if (VECTOR_MODE_P (mode
))
6781 /*Vector truncate. */
6782 *cost
+= extra_cost
->vect
.alu
;
6785 *cost
+= extra_cost
->fp
[mode
== DFmode
].widen
;
6789 case FLOAT_TRUNCATE
:
6792 if (VECTOR_MODE_P (mode
))
6794 /*Vector conversion. */
6795 *cost
+= extra_cost
->vect
.alu
;
6798 *cost
+= extra_cost
->fp
[mode
== DFmode
].narrow
;
6805 /* Strip the rounding part. They will all be implemented
6806 by the fcvt* family of instructions anyway. */
6807 if (GET_CODE (x
) == UNSPEC
)
6809 unsigned int uns_code
= XINT (x
, 1);
6811 if (uns_code
== UNSPEC_FRINTA
6812 || uns_code
== UNSPEC_FRINTM
6813 || uns_code
== UNSPEC_FRINTN
6814 || uns_code
== UNSPEC_FRINTP
6815 || uns_code
== UNSPEC_FRINTZ
)
6816 x
= XVECEXP (x
, 0, 0);
6821 if (VECTOR_MODE_P (mode
))
6822 *cost
+= extra_cost
->vect
.alu
;
6824 *cost
+= extra_cost
->fp
[GET_MODE (x
) == DFmode
].toint
;
6827 /* We can combine fmul by a power of 2 followed by a fcvt into a single
6828 fixed-point fcvt. */
6829 if (GET_CODE (x
) == MULT
6830 && ((VECTOR_MODE_P (mode
)
6831 && aarch64_vec_fpconst_pow_of_2 (XEXP (x
, 1)) > 0)
6832 || aarch64_fpconst_pow_of_2 (XEXP (x
, 1)) > 0))
6834 *cost
+= rtx_cost (XEXP (x
, 0), VOIDmode
, (rtx_code
) code
,
6839 *cost
+= rtx_cost (x
, VOIDmode
, (enum rtx_code
) code
, 0, speed
);
6843 if (VECTOR_MODE_P (mode
))
6847 *cost
+= extra_cost
->vect
.alu
;
6849 else if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
6853 /* FABD, which is analogous to FADD. */
6854 if (GET_CODE (op0
) == MINUS
)
6856 *cost
+= rtx_cost (XEXP (op0
, 0), mode
, MINUS
, 0, speed
);
6857 *cost
+= rtx_cost (XEXP (op0
, 1), mode
, MINUS
, 1, speed
);
6859 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6863 /* Simple FABS is analogous to FNEG. */
6865 *cost
+= extra_cost
->fp
[mode
== DFmode
].neg
;
6869 /* Integer ABS will either be split to
6870 two arithmetic instructions, or will be an ABS
6871 (scalar), which we don't model. */
6872 *cost
= COSTS_N_INSNS (2);
6874 *cost
+= 2 * extra_cost
->alu
.arith
;
6882 if (VECTOR_MODE_P (mode
))
6883 *cost
+= extra_cost
->vect
.alu
;
6886 /* FMAXNM/FMINNM/FMAX/FMIN.
6887 TODO: This may not be accurate for all implementations, but
6888 we do not model this in the cost tables. */
6889 *cost
+= extra_cost
->fp
[mode
== DFmode
].addsub
;
6895 /* The floating point round to integer frint* instructions. */
6896 if (aarch64_frint_unspec_p (XINT (x
, 1)))
6899 *cost
+= extra_cost
->fp
[mode
== DFmode
].roundint
;
6904 if (XINT (x
, 1) == UNSPEC_RBIT
)
6907 *cost
+= extra_cost
->alu
.rev
;
6915 /* Decompose <su>muldi3_highpart. */
6916 if (/* (truncate:DI */
6919 && GET_MODE (XEXP (x
, 0)) == TImode
6920 && GET_CODE (XEXP (x
, 0)) == LSHIFTRT
6922 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == MULT
6923 /* (ANY_EXTEND:TI (reg:DI))
6924 (ANY_EXTEND:TI (reg:DI))) */
6925 && ((GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == ZERO_EXTEND
6926 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == ZERO_EXTEND
)
6927 || (GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 0)) == SIGN_EXTEND
6928 && GET_CODE (XEXP (XEXP (XEXP (x
, 0), 0), 1)) == SIGN_EXTEND
))
6929 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0)) == DImode
6930 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0)) == DImode
6931 /* (const_int 64) */
6932 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
6933 && UINTVAL (XEXP (XEXP (x
, 0), 1)) == 64)
6937 *cost
+= extra_cost
->mult
[mode
== DImode
].extend
;
6938 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 0), 0),
6939 mode
, MULT
, 0, speed
);
6940 *cost
+= rtx_cost (XEXP (XEXP (XEXP (XEXP (x
, 0), 0), 1), 0),
6941 mode
, MULT
, 1, speed
);
6950 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6952 "\nFailed to cost RTX. Assuming default cost.\n");
6957 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
6958 calculated for X. This cost is stored in *COST. Returns true
6959 if the total cost of X was calculated. */
6961 aarch64_rtx_costs_wrapper (rtx x
, machine_mode mode
, int outer
,
6962 int param
, int *cost
, bool speed
)
6964 bool result
= aarch64_rtx_costs (x
, mode
, outer
, param
, cost
, speed
);
6966 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6968 print_rtl_single (dump_file
, x
);
6969 fprintf (dump_file
, "\n%s cost: %d (%s)\n",
6970 speed
? "Hot" : "Cold",
6971 *cost
, result
? "final" : "partial");
6978 aarch64_register_move_cost (machine_mode mode
,
6979 reg_class_t from_i
, reg_class_t to_i
)
6981 enum reg_class from
= (enum reg_class
) from_i
;
6982 enum reg_class to
= (enum reg_class
) to_i
;
6983 const struct cpu_regmove_cost
*regmove_cost
6984 = aarch64_tune_params
.regmove_cost
;
6986 /* Caller save and pointer regs are equivalent to GENERAL_REGS. */
6987 if (to
== CALLER_SAVE_REGS
|| to
== POINTER_REGS
)
6990 if (from
== CALLER_SAVE_REGS
|| from
== POINTER_REGS
)
6991 from
= GENERAL_REGS
;
6993 /* Moving between GPR and stack cost is the same as GP2GP. */
6994 if ((from
== GENERAL_REGS
&& to
== STACK_REG
)
6995 || (to
== GENERAL_REGS
&& from
== STACK_REG
))
6996 return regmove_cost
->GP2GP
;
6998 /* To/From the stack register, we move via the gprs. */
6999 if (to
== STACK_REG
|| from
== STACK_REG
)
7000 return aarch64_register_move_cost (mode
, from
, GENERAL_REGS
)
7001 + aarch64_register_move_cost (mode
, GENERAL_REGS
, to
);
7003 if (GET_MODE_SIZE (mode
) == 16)
7005 /* 128-bit operations on general registers require 2 instructions. */
7006 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
7007 return regmove_cost
->GP2GP
* 2;
7008 else if (from
== GENERAL_REGS
)
7009 return regmove_cost
->GP2FP
* 2;
7010 else if (to
== GENERAL_REGS
)
7011 return regmove_cost
->FP2GP
* 2;
7013 /* When AdvSIMD instructions are disabled it is not possible to move
7014 a 128-bit value directly between Q registers. This is handled in
7015 secondary reload. A general register is used as a scratch to move
7016 the upper DI value and the lower DI value is moved directly,
7017 hence the cost is the sum of three moves. */
7019 return regmove_cost
->GP2FP
+ regmove_cost
->FP2GP
+ regmove_cost
->FP2FP
;
7021 return regmove_cost
->FP2FP
;
7024 if (from
== GENERAL_REGS
&& to
== GENERAL_REGS
)
7025 return regmove_cost
->GP2GP
;
7026 else if (from
== GENERAL_REGS
)
7027 return regmove_cost
->GP2FP
;
7028 else if (to
== GENERAL_REGS
)
7029 return regmove_cost
->FP2GP
;
7031 return regmove_cost
->FP2FP
;
7035 aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
7036 reg_class_t rclass ATTRIBUTE_UNUSED
,
7037 bool in ATTRIBUTE_UNUSED
)
7039 return aarch64_tune_params
.memmov_cost
;
7042 /* Function to decide when to use
7043 reciprocal square root builtins. */
7046 aarch64_builtin_reciprocal (unsigned int fn
,
7050 if (flag_trapping_math
7051 || !flag_unsafe_math_optimizations
7053 || ! (aarch64_tune_params
.extra_tuning_flags
7054 & AARCH64_EXTRA_TUNE_RECIP_SQRT
))
7059 return aarch64_builtin_rsqrt (fn
, md_fn
);
7062 typedef rtx (*rsqrte_type
) (rtx
, rtx
);
7064 /* Select reciprocal square root initial estimate
7065 insn depending on machine mode. */
7068 get_rsqrte_type (machine_mode mode
)
7072 case DFmode
: return gen_aarch64_rsqrte_df2
;
7073 case SFmode
: return gen_aarch64_rsqrte_sf2
;
7074 case V2DFmode
: return gen_aarch64_rsqrte_v2df2
;
7075 case V2SFmode
: return gen_aarch64_rsqrte_v2sf2
;
7076 case V4SFmode
: return gen_aarch64_rsqrte_v4sf2
;
7077 default: gcc_unreachable ();
7081 typedef rtx (*rsqrts_type
) (rtx
, rtx
, rtx
);
7083 /* Select reciprocal square root Newton-Raphson step
7084 insn depending on machine mode. */
7087 get_rsqrts_type (machine_mode mode
)
7091 case DFmode
: return gen_aarch64_rsqrts_df3
;
7092 case SFmode
: return gen_aarch64_rsqrts_sf3
;
7093 case V2DFmode
: return gen_aarch64_rsqrts_v2df3
;
7094 case V2SFmode
: return gen_aarch64_rsqrts_v2sf3
;
7095 case V4SFmode
: return gen_aarch64_rsqrts_v4sf3
;
7096 default: gcc_unreachable ();
7100 /* Emit instruction sequence to compute
7101 reciprocal square root. Use two Newton-Raphson steps
7102 for single precision and three for double precision. */
7105 aarch64_emit_swrsqrt (rtx dst
, rtx src
)
7107 machine_mode mode
= GET_MODE (src
);
7109 mode
== SFmode
|| mode
== V2SFmode
|| mode
== V4SFmode
7110 || mode
== DFmode
|| mode
== V2DFmode
);
7112 rtx xsrc
= gen_reg_rtx (mode
);
7113 emit_move_insn (xsrc
, src
);
7114 rtx x0
= gen_reg_rtx (mode
);
7116 emit_insn ((*get_rsqrte_type (mode
)) (x0
, xsrc
));
7118 bool double_mode
= (mode
== DFmode
|| mode
== V2DFmode
);
7120 int iterations
= double_mode
? 3 : 2;
7122 if (flag_mrecip_low_precision_sqrt
)
7125 for (int i
= 0; i
< iterations
; ++i
)
7127 rtx x1
= gen_reg_rtx (mode
);
7128 rtx x2
= gen_reg_rtx (mode
);
7129 rtx x3
= gen_reg_rtx (mode
);
7130 emit_set_insn (x2
, gen_rtx_MULT (mode
, x0
, x0
));
7132 emit_insn ((*get_rsqrts_type (mode
)) (x3
, xsrc
, x2
));
7134 emit_set_insn (x1
, gen_rtx_MULT (mode
, x0
, x3
));
7138 emit_move_insn (dst
, x0
);
7141 /* Return the number of instructions that can be issued per cycle. */
7143 aarch64_sched_issue_rate (void)
7145 return aarch64_tune_params
.issue_rate
;
7149 aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
7151 int issue_rate
= aarch64_sched_issue_rate ();
7153 return issue_rate
> 1 && !sched_fusion
? issue_rate
: 0;
7157 /* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
7158 autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
7159 has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
7162 aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn
*insn
,
7165 return autopref_multipass_dfa_lookahead_guard (insn
, ready_index
);
7169 /* Vectorizer cost model target hooks. */
7171 /* Implement targetm.vectorize.builtin_vectorization_cost. */
7173 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
7175 int misalign ATTRIBUTE_UNUSED
)
7179 switch (type_of_cost
)
7182 return aarch64_tune_params
.vec_costs
->scalar_stmt_cost
;
7185 return aarch64_tune_params
.vec_costs
->scalar_load_cost
;
7188 return aarch64_tune_params
.vec_costs
->scalar_store_cost
;
7191 return aarch64_tune_params
.vec_costs
->vec_stmt_cost
;
7194 return aarch64_tune_params
.vec_costs
->vec_align_load_cost
;
7197 return aarch64_tune_params
.vec_costs
->vec_store_cost
;
7200 return aarch64_tune_params
.vec_costs
->vec_to_scalar_cost
;
7203 return aarch64_tune_params
.vec_costs
->scalar_to_vec_cost
;
7205 case unaligned_load
:
7206 return aarch64_tune_params
.vec_costs
->vec_unalign_load_cost
;
7208 case unaligned_store
:
7209 return aarch64_tune_params
.vec_costs
->vec_unalign_store_cost
;
7211 case cond_branch_taken
:
7212 return aarch64_tune_params
.vec_costs
->cond_taken_branch_cost
;
7214 case cond_branch_not_taken
:
7215 return aarch64_tune_params
.vec_costs
->cond_not_taken_branch_cost
;
7218 case vec_promote_demote
:
7219 return aarch64_tune_params
.vec_costs
->vec_stmt_cost
;
7222 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
7223 return elements
/ 2 + 1;
7230 /* Implement targetm.vectorize.add_stmt_cost. */
7232 aarch64_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
7233 struct _stmt_vec_info
*stmt_info
, int misalign
,
7234 enum vect_cost_model_location where
)
7236 unsigned *cost
= (unsigned *) data
;
7237 unsigned retval
= 0;
7239 if (flag_vect_cost_model
)
7241 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
7243 aarch64_builtin_vectorization_cost (kind
, vectype
, misalign
);
7245 /* Statements in an inner loop relative to the loop being
7246 vectorized are weighted more heavily. The value here is
7247 arbitrary and could potentially be improved with analysis. */
7248 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
7249 count
*= 50; /* FIXME */
7251 retval
= (unsigned) (count
* stmt_cost
);
7252 cost
[where
] += retval
;
7258 static void initialize_aarch64_code_model (struct gcc_options
*);
7260 /* Enum describing the various ways that the
7261 aarch64_parse_{arch,tune,cpu,extension} functions can fail.
7262 This way their callers can choose what kind of error to give. */
7264 enum aarch64_parse_opt_result
7266 AARCH64_PARSE_OK
, /* Parsing was successful. */
7267 AARCH64_PARSE_MISSING_ARG
, /* Missing argument. */
7268 AARCH64_PARSE_INVALID_FEATURE
, /* Invalid feature modifier. */
7269 AARCH64_PARSE_INVALID_ARG
/* Invalid arch, tune, cpu arg. */
7272 /* Parse the architecture extension string STR and update ISA_FLAGS
7273 with the architecture features turned on or off. Return a
7274 aarch64_parse_opt_result describing the result. */
7276 static enum aarch64_parse_opt_result
7277 aarch64_parse_extension (char *str
, unsigned long *isa_flags
)
7279 /* The extension string is parsed left to right. */
7280 const struct aarch64_option_extension
*opt
= NULL
;
7282 /* Flag to say whether we are adding or removing an extension. */
7283 int adding_ext
= -1;
7285 while (str
!= NULL
&& *str
!= 0)
7291 ext
= strchr (str
, '+');
7298 if (len
>= 2 && strncmp (str
, "no", 2) == 0)
7308 return AARCH64_PARSE_MISSING_ARG
;
7311 /* Scan over the extensions table trying to find an exact match. */
7312 for (opt
= all_extensions
; opt
->name
!= NULL
; opt
++)
7314 if (strlen (opt
->name
) == len
&& strncmp (opt
->name
, str
, len
) == 0)
7316 /* Add or remove the extension. */
7318 *isa_flags
|= opt
->flags_on
;
7320 *isa_flags
&= ~(opt
->flags_off
);
7325 if (opt
->name
== NULL
)
7327 /* Extension not found in list. */
7328 return AARCH64_PARSE_INVALID_FEATURE
;
7334 return AARCH64_PARSE_OK
;
7337 /* Parse the TO_PARSE string and put the architecture struct that it
7338 selects into RES and the architectural features into ISA_FLAGS.
7339 Return an aarch64_parse_opt_result describing the parse result.
7340 If there is an error parsing, RES and ISA_FLAGS are left unchanged. */
7342 static enum aarch64_parse_opt_result
7343 aarch64_parse_arch (const char *to_parse
, const struct processor
**res
,
7344 unsigned long *isa_flags
)
7347 const struct processor
*arch
;
7348 char *str
= (char *) alloca (strlen (to_parse
) + 1);
7351 strcpy (str
, to_parse
);
7353 ext
= strchr (str
, '+');
7361 return AARCH64_PARSE_MISSING_ARG
;
7364 /* Loop through the list of supported ARCHes to find a match. */
7365 for (arch
= all_architectures
; arch
->name
!= NULL
; arch
++)
7367 if (strlen (arch
->name
) == len
&& strncmp (arch
->name
, str
, len
) == 0)
7369 unsigned long isa_temp
= arch
->flags
;
7373 /* TO_PARSE string contains at least one extension. */
7374 enum aarch64_parse_opt_result ext_res
7375 = aarch64_parse_extension (ext
, &isa_temp
);
7377 if (ext_res
!= AARCH64_PARSE_OK
)
7380 /* Extension parsing was successful. Confirm the result
7381 arch and ISA flags. */
7383 *isa_flags
= isa_temp
;
7384 return AARCH64_PARSE_OK
;
7388 /* ARCH name not found in list. */
7389 return AARCH64_PARSE_INVALID_ARG
;
7392 /* Parse the TO_PARSE string and put the result tuning in RES and the
7393 architecture flags in ISA_FLAGS. Return an aarch64_parse_opt_result
7394 describing the parse result. If there is an error parsing, RES and
7395 ISA_FLAGS are left unchanged. */
7397 static enum aarch64_parse_opt_result
7398 aarch64_parse_cpu (const char *to_parse
, const struct processor
**res
,
7399 unsigned long *isa_flags
)
7402 const struct processor
*cpu
;
7403 char *str
= (char *) alloca (strlen (to_parse
) + 1);
7406 strcpy (str
, to_parse
);
7408 ext
= strchr (str
, '+');
7416 return AARCH64_PARSE_MISSING_ARG
;
7419 /* Loop through the list of supported CPUs to find a match. */
7420 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
7422 if (strlen (cpu
->name
) == len
&& strncmp (cpu
->name
, str
, len
) == 0)
7424 unsigned long isa_temp
= cpu
->flags
;
7429 /* TO_PARSE string contains at least one extension. */
7430 enum aarch64_parse_opt_result ext_res
7431 = aarch64_parse_extension (ext
, &isa_temp
);
7433 if (ext_res
!= AARCH64_PARSE_OK
)
7436 /* Extension parsing was successfull. Confirm the result
7437 cpu and ISA flags. */
7439 *isa_flags
= isa_temp
;
7440 return AARCH64_PARSE_OK
;
7444 /* CPU name not found in list. */
7445 return AARCH64_PARSE_INVALID_ARG
;
7448 /* Parse the TO_PARSE string and put the cpu it selects into RES.
7449 Return an aarch64_parse_opt_result describing the parse result.
7450 If the parsing fails the RES does not change. */
7452 static enum aarch64_parse_opt_result
7453 aarch64_parse_tune (const char *to_parse
, const struct processor
**res
)
7455 const struct processor
*cpu
;
7456 char *str
= (char *) alloca (strlen (to_parse
) + 1);
7458 strcpy (str
, to_parse
);
7460 /* Loop through the list of supported CPUs to find a match. */
7461 for (cpu
= all_cores
; cpu
->name
!= NULL
; cpu
++)
7463 if (strcmp (cpu
->name
, str
) == 0)
7466 return AARCH64_PARSE_OK
;
7470 /* CPU name not found in list. */
7471 return AARCH64_PARSE_INVALID_ARG
;
7474 /* Parse TOKEN, which has length LENGTH to see if it is an option
7475 described in FLAG. If it is, return the index bit for that fusion type.
7476 If not, error (printing OPTION_NAME) and return zero. */
7479 aarch64_parse_one_option_token (const char *token
,
7481 const struct aarch64_flag_desc
*flag
,
7482 const char *option_name
)
7484 for (; flag
->name
!= NULL
; flag
++)
7486 if (length
== strlen (flag
->name
)
7487 && !strncmp (flag
->name
, token
, length
))
7491 error ("unknown flag passed in -moverride=%s (%s)", option_name
, token
);
7495 /* Parse OPTION which is a comma-separated list of flags to enable.
7496 FLAGS gives the list of flags we understand, INITIAL_STATE gives any
7497 default state we inherit from the CPU tuning structures. OPTION_NAME
7498 gives the top-level option we are parsing in the -moverride string,
7499 for use in error messages. */
7502 aarch64_parse_boolean_options (const char *option
,
7503 const struct aarch64_flag_desc
*flags
,
7504 unsigned int initial_state
,
7505 const char *option_name
)
7507 const char separator
= '.';
7508 const char* specs
= option
;
7509 const char* ntoken
= option
;
7510 unsigned int found_flags
= initial_state
;
7512 while ((ntoken
= strchr (specs
, separator
)))
7514 size_t token_length
= ntoken
- specs
;
7515 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
7519 /* If we find "none" (or, for simplicity's sake, an error) anywhere
7520 in the token stream, reset the supported operations. So:
7522 adrp+add.cmp+branch.none.adrp+add
7524 would have the result of turning on only adrp+add fusion. */
7528 found_flags
|= token_ops
;
7532 /* We ended with a comma, print something. */
7535 error ("%s string ill-formed\n", option_name
);
7539 /* We still have one more token to parse. */
7540 size_t token_length
= strlen (specs
);
7541 unsigned token_ops
= aarch64_parse_one_option_token (specs
,
7548 found_flags
|= token_ops
;
7552 /* Support for overriding instruction fusion. */
7555 aarch64_parse_fuse_string (const char *fuse_string
,
7556 struct tune_params
*tune
)
7558 tune
->fusible_ops
= aarch64_parse_boolean_options (fuse_string
,
7559 aarch64_fusible_pairs
,
7564 /* Support for overriding other tuning flags. */
7567 aarch64_parse_tune_string (const char *tune_string
,
7568 struct tune_params
*tune
)
7570 tune
->extra_tuning_flags
7571 = aarch64_parse_boolean_options (tune_string
,
7572 aarch64_tuning_flags
,
7573 tune
->extra_tuning_flags
,
7577 /* Parse TOKEN, which has length LENGTH to see if it is a tuning option
7578 we understand. If it is, extract the option string and handoff to
7579 the appropriate function. */
7582 aarch64_parse_one_override_token (const char* token
,
7584 struct tune_params
*tune
)
7586 const struct aarch64_tuning_override_function
*fn
7587 = aarch64_tuning_override_functions
;
7589 const char *option_part
= strchr (token
, '=');
7592 error ("tuning string missing in option (%s)", token
);
7596 /* Get the length of the option name. */
7597 length
= option_part
- token
;
7598 /* Skip the '=' to get to the option string. */
7601 for (; fn
->name
!= NULL
; fn
++)
7603 if (!strncmp (fn
->name
, token
, length
))
7605 fn
->parse_override (option_part
, tune
);
7610 error ("unknown tuning option (%s)",token
);
7614 /* A checking mechanism for the implementation of the tls size. */
7617 initialize_aarch64_tls_size (struct gcc_options
*opts
)
7619 if (aarch64_tls_size
== 0)
7620 aarch64_tls_size
= 24;
7622 switch (opts
->x_aarch64_cmodel_var
)
7624 case AARCH64_CMODEL_TINY
:
7625 /* Both the default and maximum TLS size allowed under tiny is 1M which
7626 needs two instructions to address, so we clamp the size to 24. */
7627 if (aarch64_tls_size
> 24)
7628 aarch64_tls_size
= 24;
7630 case AARCH64_CMODEL_SMALL
:
7631 /* The maximum TLS size allowed under small is 4G. */
7632 if (aarch64_tls_size
> 32)
7633 aarch64_tls_size
= 32;
7635 case AARCH64_CMODEL_LARGE
:
7636 /* The maximum TLS size allowed under large is 16E.
7637 FIXME: 16E should be 64bit, we only support 48bit offset now. */
7638 if (aarch64_tls_size
> 48)
7639 aarch64_tls_size
= 48;
7648 /* Parse STRING looking for options in the format:
7649 string :: option:string
7650 option :: name=substring
7652 substring :: defined by option. */
7655 aarch64_parse_override_string (const char* input_string
,
7656 struct tune_params
* tune
)
7658 const char separator
= ':';
7659 size_t string_length
= strlen (input_string
) + 1;
7660 char *string_root
= (char *) xmalloc (sizeof (*string_root
) * string_length
);
7661 char *string
= string_root
;
7662 strncpy (string
, input_string
, string_length
);
7663 string
[string_length
- 1] = '\0';
7665 char* ntoken
= string
;
7667 while ((ntoken
= strchr (string
, separator
)))
7669 size_t token_length
= ntoken
- string
;
7670 /* Make this substring look like a string. */
7672 aarch64_parse_one_override_token (string
, token_length
, tune
);
7676 /* One last option to parse. */
7677 aarch64_parse_one_override_token (string
, strlen (string
), tune
);
7683 aarch64_override_options_after_change_1 (struct gcc_options
*opts
)
7685 if (opts
->x_flag_omit_frame_pointer
)
7686 opts
->x_flag_omit_leaf_frame_pointer
= false;
7687 else if (opts
->x_flag_omit_leaf_frame_pointer
)
7688 opts
->x_flag_omit_frame_pointer
= true;
7690 /* If not optimizing for size, set the default
7691 alignment to what the target wants. */
7692 if (!opts
->x_optimize_size
)
7694 if (opts
->x_align_loops
<= 0)
7695 opts
->x_align_loops
= aarch64_tune_params
.loop_align
;
7696 if (opts
->x_align_jumps
<= 0)
7697 opts
->x_align_jumps
= aarch64_tune_params
.jump_align
;
7698 if (opts
->x_align_functions
<= 0)
7699 opts
->x_align_functions
= aarch64_tune_params
.function_align
;
7702 /* If nopcrelative_literal_loads is set on the command line, this
7703 implies that the user asked for PC relative literal loads. */
7704 if (opts
->x_nopcrelative_literal_loads
== 1)
7705 aarch64_nopcrelative_literal_loads
= false;
7707 /* If it is not set on the command line, we default to no
7708 pc relative literal loads. */
7709 if (opts
->x_nopcrelative_literal_loads
== 2)
7710 aarch64_nopcrelative_literal_loads
= true;
7712 /* In the tiny memory model it makes no sense
7713 to disallow non PC relative literal pool loads
7714 as many other things will break anyway. */
7715 if (opts
->x_nopcrelative_literal_loads
7716 && (aarch64_cmodel
== AARCH64_CMODEL_TINY
7717 || aarch64_cmodel
== AARCH64_CMODEL_TINY_PIC
))
7718 aarch64_nopcrelative_literal_loads
= false;
7721 /* 'Unpack' up the internal tuning structs and update the options
7722 in OPTS. The caller must have set up selected_tune and selected_arch
7723 as all the other target-specific codegen decisions are
7724 derived from them. */
7727 aarch64_override_options_internal (struct gcc_options
*opts
)
7729 aarch64_tune_flags
= selected_tune
->flags
;
7730 aarch64_tune
= selected_tune
->sched_core
;
7731 /* Make a copy of the tuning parameters attached to the core, which
7732 we may later overwrite. */
7733 aarch64_tune_params
= *(selected_tune
->tune
);
7734 aarch64_architecture_version
= selected_arch
->architecture_version
;
7736 if (opts
->x_aarch64_override_tune_string
)
7737 aarch64_parse_override_string (opts
->x_aarch64_override_tune_string
,
7738 &aarch64_tune_params
);
7740 /* This target defaults to strict volatile bitfields. */
7741 if (opts
->x_flag_strict_volatile_bitfields
< 0 && abi_version_at_least (2))
7742 opts
->x_flag_strict_volatile_bitfields
= 1;
7744 /* -mgeneral-regs-only sets a mask in target_flags, make sure that
7745 aarch64_isa_flags does not contain the FP/SIMD/Crypto feature flags
7746 in case some code tries reading aarch64_isa_flags directly to check if
7747 FP is available. Reuse the aarch64_parse_extension machinery since it
7748 knows how to disable any other flags that fp implies. */
7749 if (TARGET_GENERAL_REGS_ONLY_P (opts
->x_target_flags
))
7751 /* aarch64_parse_extension takes char* rather than const char* because
7752 it is usually called from within other parsing functions. */
7753 char tmp_str
[] = "+nofp";
7754 aarch64_parse_extension (tmp_str
, &opts
->x_aarch64_isa_flags
);
7757 initialize_aarch64_code_model (opts
);
7758 initialize_aarch64_tls_size (opts
);
7760 int queue_depth
= 0;
7761 switch (aarch64_tune_params
.autoprefetcher_model
)
7763 case tune_params::AUTOPREFETCHER_OFF
:
7766 case tune_params::AUTOPREFETCHER_WEAK
:
7769 case tune_params::AUTOPREFETCHER_STRONG
:
7770 queue_depth
= max_insn_queue_index
+ 1;
7776 /* We don't mind passing in global_options_set here as we don't use
7777 the *options_set structs anyway. */
7778 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH
,
7780 opts
->x_param_values
,
7781 global_options_set
.x_param_values
);
7783 aarch64_override_options_after_change_1 (opts
);
7786 /* Validate a command-line -mcpu option. Parse the cpu and extensions (if any)
7787 specified in STR and throw errors if appropriate. Put the results if
7788 they are valid in RES and ISA_FLAGS. Return whether the option is
7792 aarch64_validate_mcpu (const char *str
, const struct processor
**res
,
7793 unsigned long *isa_flags
)
7795 enum aarch64_parse_opt_result parse_res
7796 = aarch64_parse_cpu (str
, res
, isa_flags
);
7798 if (parse_res
== AARCH64_PARSE_OK
)
7803 case AARCH64_PARSE_MISSING_ARG
:
7804 error ("missing cpu name in -mcpu=%qs", str
);
7806 case AARCH64_PARSE_INVALID_ARG
:
7807 error ("unknown value %qs for -mcpu", str
);
7809 case AARCH64_PARSE_INVALID_FEATURE
:
7810 error ("invalid feature modifier in -mcpu=%qs", str
);
7819 /* Validate a command-line -march option. Parse the arch and extensions
7820 (if any) specified in STR and throw errors if appropriate. Put the
7821 results, if they are valid, in RES and ISA_FLAGS. Return whether the
7825 aarch64_validate_march (const char *str
, const struct processor
**res
,
7826 unsigned long *isa_flags
)
7828 enum aarch64_parse_opt_result parse_res
7829 = aarch64_parse_arch (str
, res
, isa_flags
);
7831 if (parse_res
== AARCH64_PARSE_OK
)
7836 case AARCH64_PARSE_MISSING_ARG
:
7837 error ("missing arch name in -march=%qs", str
);
7839 case AARCH64_PARSE_INVALID_ARG
:
7840 error ("unknown value %qs for -march", str
);
7842 case AARCH64_PARSE_INVALID_FEATURE
:
7843 error ("invalid feature modifier in -march=%qs", str
);
7852 /* Validate a command-line -mtune option. Parse the cpu
7853 specified in STR and throw errors if appropriate. Put the
7854 result, if it is valid, in RES. Return whether the option is
7858 aarch64_validate_mtune (const char *str
, const struct processor
**res
)
7860 enum aarch64_parse_opt_result parse_res
7861 = aarch64_parse_tune (str
, res
);
7863 if (parse_res
== AARCH64_PARSE_OK
)
7868 case AARCH64_PARSE_MISSING_ARG
:
7869 error ("missing cpu name in -mtune=%qs", str
);
7871 case AARCH64_PARSE_INVALID_ARG
:
7872 error ("unknown value %qs for -mtune", str
);
7880 /* Return the CPU corresponding to the enum CPU.
7881 If it doesn't specify a cpu, return the default. */
7883 static const struct processor
*
7884 aarch64_get_tune_cpu (enum aarch64_processor cpu
)
7886 if (cpu
!= aarch64_none
)
7887 return &all_cores
[cpu
];
7889 /* The & 0x3f is to extract the bottom 6 bits that encode the
7890 default cpu as selected by the --with-cpu GCC configure option
7892 ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
7893 flags mechanism should be reworked to make it more sane. */
7894 return &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
7897 /* Return the architecture corresponding to the enum ARCH.
7898 If it doesn't specify a valid architecture, return the default. */
7900 static const struct processor
*
7901 aarch64_get_arch (enum aarch64_arch arch
)
7903 if (arch
!= aarch64_no_arch
)
7904 return &all_architectures
[arch
];
7906 const struct processor
*cpu
= &all_cores
[TARGET_CPU_DEFAULT
& 0x3f];
7908 return &all_architectures
[cpu
->arch
];
7911 /* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
7912 and is used to parse the -m{cpu,tune,arch} strings and setup the initial
7913 tuning structs. In particular it must set selected_tune and
7914 aarch64_isa_flags that define the available ISA features and tuning
7915 decisions. It must also set selected_arch as this will be used to
7916 output the .arch asm tags for each function. */
7919 aarch64_override_options (void)
7921 unsigned long cpu_isa
= 0;
7922 unsigned long arch_isa
= 0;
7923 aarch64_isa_flags
= 0;
7925 bool valid_cpu
= true;
7926 bool valid_tune
= true;
7927 bool valid_arch
= true;
7929 selected_cpu
= NULL
;
7930 selected_arch
= NULL
;
7931 selected_tune
= NULL
;
7933 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
7934 If either of -march or -mtune is given, they override their
7935 respective component of -mcpu. */
7936 if (aarch64_cpu_string
)
7937 valid_cpu
= aarch64_validate_mcpu (aarch64_cpu_string
, &selected_cpu
,
7940 if (aarch64_arch_string
)
7941 valid_arch
= aarch64_validate_march (aarch64_arch_string
, &selected_arch
,
7944 if (aarch64_tune_string
)
7945 valid_tune
= aarch64_validate_mtune (aarch64_tune_string
, &selected_tune
);
7947 /* If the user did not specify a processor, choose the default
7948 one for them. This will be the CPU set during configuration using
7949 --with-cpu, otherwise it is "generic". */
7954 selected_cpu
= &all_cores
[selected_arch
->ident
];
7955 aarch64_isa_flags
= arch_isa
;
7956 explicit_arch
= selected_arch
->arch
;
7960 /* Get default configure-time CPU. */
7961 selected_cpu
= aarch64_get_tune_cpu (aarch64_none
);
7962 aarch64_isa_flags
= TARGET_CPU_DEFAULT
>> 6;
7966 explicit_tune_core
= selected_tune
->ident
;
7968 /* If both -mcpu and -march are specified check that they are architecturally
7969 compatible, warn if they're not and prefer the -march ISA flags. */
7970 else if (selected_arch
)
7972 if (selected_arch
->arch
!= selected_cpu
->arch
)
7974 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
7975 all_architectures
[selected_cpu
->arch
].name
,
7976 selected_arch
->name
);
7978 aarch64_isa_flags
= arch_isa
;
7979 explicit_arch
= selected_arch
->arch
;
7980 explicit_tune_core
= selected_tune
? selected_tune
->ident
7981 : selected_cpu
->ident
;
7985 /* -mcpu but no -march. */
7986 aarch64_isa_flags
= cpu_isa
;
7987 explicit_tune_core
= selected_tune
? selected_tune
->ident
7988 : selected_cpu
->ident
;
7989 gcc_assert (selected_cpu
);
7990 selected_arch
= &all_architectures
[selected_cpu
->arch
];
7991 explicit_arch
= selected_arch
->arch
;
7994 /* Set the arch as well as we will need it when outputing
7995 the .arch directive in assembly. */
7998 gcc_assert (selected_cpu
);
7999 selected_arch
= &all_architectures
[selected_cpu
->arch
];
8003 selected_tune
= selected_cpu
;
8005 #ifndef HAVE_AS_MABI_OPTION
8006 /* The compiler may have been configured with 2.23.* binutils, which does
8007 not have support for ILP32. */
8009 error ("Assembler does not support -mabi=ilp32");
8012 /* Make sure we properly set up the explicit options. */
8013 if ((aarch64_cpu_string
&& valid_cpu
)
8014 || (aarch64_tune_string
&& valid_tune
))
8015 gcc_assert (explicit_tune_core
!= aarch64_none
);
8017 if ((aarch64_cpu_string
&& valid_cpu
)
8018 || (aarch64_arch_string
&& valid_arch
))
8019 gcc_assert (explicit_arch
!= aarch64_no_arch
);
8021 aarch64_override_options_internal (&global_options
);
8023 /* Save these options as the default ones in case we push and pop them later
8024 while processing functions with potential target attributes. */
8025 target_option_default_node
= target_option_current_node
8026 = build_target_option_node (&global_options
);
8028 aarch64_register_fma_steering ();
8032 /* Implement targetm.override_options_after_change. */
8035 aarch64_override_options_after_change (void)
8037 aarch64_override_options_after_change_1 (&global_options
);
8040 static struct machine_function
*
8041 aarch64_init_machine_status (void)
8043 struct machine_function
*machine
;
8044 machine
= ggc_cleared_alloc
<machine_function
> ();
8049 aarch64_init_expanders (void)
8051 init_machine_status
= aarch64_init_machine_status
;
8054 /* A checking mechanism for the implementation of the various code models. */
8056 initialize_aarch64_code_model (struct gcc_options
*opts
)
8058 if (opts
->x_flag_pic
)
8060 switch (opts
->x_aarch64_cmodel_var
)
8062 case AARCH64_CMODEL_TINY
:
8063 aarch64_cmodel
= AARCH64_CMODEL_TINY_PIC
;
8065 case AARCH64_CMODEL_SMALL
:
8066 #ifdef HAVE_AS_SMALL_PIC_RELOCS
8067 aarch64_cmodel
= (flag_pic
== 2
8068 ? AARCH64_CMODEL_SMALL_PIC
8069 : AARCH64_CMODEL_SMALL_SPIC
);
8071 aarch64_cmodel
= AARCH64_CMODEL_SMALL_PIC
;
8074 case AARCH64_CMODEL_LARGE
:
8075 sorry ("code model %qs with -f%s", "large",
8076 opts
->x_flag_pic
> 1 ? "PIC" : "pic");
8083 aarch64_cmodel
= opts
->x_aarch64_cmodel_var
;
8086 /* Implement TARGET_OPTION_SAVE. */
8089 aarch64_option_save (struct cl_target_option
*ptr
, struct gcc_options
*opts
)
8091 ptr
->x_aarch64_override_tune_string
= opts
->x_aarch64_override_tune_string
;
8094 /* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
8095 using the information saved in PTR. */
8098 aarch64_option_restore (struct gcc_options
*opts
, struct cl_target_option
*ptr
)
8100 opts
->x_explicit_tune_core
= ptr
->x_explicit_tune_core
;
8101 selected_tune
= aarch64_get_tune_cpu (ptr
->x_explicit_tune_core
);
8102 opts
->x_explicit_arch
= ptr
->x_explicit_arch
;
8103 selected_arch
= aarch64_get_arch (ptr
->x_explicit_arch
);
8104 opts
->x_aarch64_override_tune_string
= ptr
->x_aarch64_override_tune_string
;
8106 aarch64_override_options_internal (opts
);
8109 /* Implement TARGET_OPTION_PRINT. */
8112 aarch64_option_print (FILE *file
, int indent
, struct cl_target_option
*ptr
)
8114 const struct processor
*cpu
8115 = aarch64_get_tune_cpu (ptr
->x_explicit_tune_core
);
8116 unsigned long isa_flags
= ptr
->x_aarch64_isa_flags
;
8117 const struct processor
*arch
= aarch64_get_arch (ptr
->x_explicit_arch
);
8118 std::string extension
8119 = aarch64_get_extension_string_for_isa_flags (isa_flags
);
8121 fprintf (file
, "%*sselected tune = %s\n", indent
, "", cpu
->name
);
8122 fprintf (file
, "%*sselected arch = %s%s\n", indent
, "",
8123 arch
->name
, extension
.c_str ());
8126 static GTY(()) tree aarch64_previous_fndecl
;
8129 aarch64_reset_previous_fndecl (void)
8131 aarch64_previous_fndecl
= NULL
;
8134 /* Implement TARGET_SET_CURRENT_FUNCTION. Unpack the codegen decisions
8135 like tuning and ISA features from the DECL_FUNCTION_SPECIFIC_TARGET
8136 of the function, if such exists. This function may be called multiple
8137 times on a single function so use aarch64_previous_fndecl to avoid
8138 setting up identical state. */
8141 aarch64_set_current_function (tree fndecl
)
8143 tree old_tree
= (aarch64_previous_fndecl
8144 ? DECL_FUNCTION_SPECIFIC_TARGET (aarch64_previous_fndecl
)
8147 tree new_tree
= (fndecl
8148 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl
)
8152 if (fndecl
&& fndecl
!= aarch64_previous_fndecl
)
8154 aarch64_previous_fndecl
= fndecl
;
8155 if (old_tree
== new_tree
)
8158 else if (new_tree
&& new_tree
!= target_option_default_node
)
8160 cl_target_option_restore (&global_options
,
8161 TREE_TARGET_OPTION (new_tree
));
8162 if (TREE_TARGET_GLOBALS (new_tree
))
8163 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
8165 TREE_TARGET_GLOBALS (new_tree
)
8166 = save_target_globals_default_opts ();
8169 else if (old_tree
&& old_tree
!= target_option_default_node
)
8171 new_tree
= target_option_current_node
;
8172 cl_target_option_restore (&global_options
,
8173 TREE_TARGET_OPTION (new_tree
));
8174 if (TREE_TARGET_GLOBALS (new_tree
))
8175 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
8176 else if (new_tree
== target_option_default_node
)
8177 restore_target_globals (&default_target_globals
);
8179 TREE_TARGET_GLOBALS (new_tree
)
8180 = save_target_globals_default_opts ();
8187 /* If we turned on SIMD make sure that any vector parameters are re-laid out
8188 so that they use proper vector modes. */
8191 tree parms
= DECL_ARGUMENTS (fndecl
);
8192 for (; parms
&& parms
!= void_list_node
; parms
= TREE_CHAIN (parms
))
8194 if (TREE_CODE (parms
) == PARM_DECL
8195 && VECTOR_TYPE_P (TREE_TYPE (parms
))
8196 && DECL_MODE (parms
) != TYPE_MODE (TREE_TYPE (parms
)))
8197 relayout_decl (parms
);
8202 /* Enum describing the various ways we can handle attributes.
8203 In many cases we can reuse the generic option handling machinery. */
8205 enum aarch64_attr_opt_type
8207 aarch64_attr_mask
, /* Attribute should set a bit in target_flags. */
8208 aarch64_attr_bool
, /* Attribute sets or unsets a boolean variable. */
8209 aarch64_attr_enum
, /* Attribute sets an enum variable. */
8210 aarch64_attr_custom
/* Attribute requires a custom handling function. */
8213 /* All the information needed to handle a target attribute.
8214 NAME is the name of the attribute.
8215 ATTR_TYPE specifies the type of behaviour of the attribute as described
8216 in the definition of enum aarch64_attr_opt_type.
8217 ALLOW_NEG is true if the attribute supports a "no-" form.
8218 HANDLER is the function that takes the attribute string and whether
8219 it is a pragma or attribute and handles the option. It is needed only
8220 when the ATTR_TYPE is aarch64_attr_custom.
8221 OPT_NUM is the enum specifying the option that the attribute modifies.
8222 This is needed for attributes that mirror the behaviour of a command-line
8223 option, that is it has ATTR_TYPE aarch64_attr_mask, aarch64_attr_bool or
8224 aarch64_attr_enum. */
8226 struct aarch64_attribute_info
8229 enum aarch64_attr_opt_type attr_type
;
8231 bool (*handler
) (const char *, const char *);
8232 enum opt_code opt_num
;
8235 /* Handle the ARCH_STR argument to the arch= target attribute.
8236 PRAGMA_OR_ATTR is used in potential error messages. */
8239 aarch64_handle_attr_arch (const char *str
, const char *pragma_or_attr
)
8241 const struct processor
*tmp_arch
= NULL
;
8242 enum aarch64_parse_opt_result parse_res
8243 = aarch64_parse_arch (str
, &tmp_arch
, &aarch64_isa_flags
);
8245 if (parse_res
== AARCH64_PARSE_OK
)
8247 gcc_assert (tmp_arch
);
8248 selected_arch
= tmp_arch
;
8249 explicit_arch
= selected_arch
->arch
;
8255 case AARCH64_PARSE_MISSING_ARG
:
8256 error ("missing architecture name in 'arch' target %s", pragma_or_attr
);
8258 case AARCH64_PARSE_INVALID_ARG
:
8259 error ("unknown value %qs for 'arch' target %s", str
, pragma_or_attr
);
8261 case AARCH64_PARSE_INVALID_FEATURE
:
8262 error ("invalid feature modifier %qs for 'arch' target %s",
8263 str
, pragma_or_attr
);
8272 /* Handle the argument CPU_STR to the cpu= target attribute.
8273 PRAGMA_OR_ATTR is used in potential error messages. */
8276 aarch64_handle_attr_cpu (const char *str
, const char *pragma_or_attr
)
8278 const struct processor
*tmp_cpu
= NULL
;
8279 enum aarch64_parse_opt_result parse_res
8280 = aarch64_parse_cpu (str
, &tmp_cpu
, &aarch64_isa_flags
);
8282 if (parse_res
== AARCH64_PARSE_OK
)
8284 gcc_assert (tmp_cpu
);
8285 selected_tune
= tmp_cpu
;
8286 explicit_tune_core
= selected_tune
->ident
;
8288 selected_arch
= &all_architectures
[tmp_cpu
->arch
];
8289 explicit_arch
= selected_arch
->arch
;
8295 case AARCH64_PARSE_MISSING_ARG
:
8296 error ("missing cpu name in 'cpu' target %s", pragma_or_attr
);
8298 case AARCH64_PARSE_INVALID_ARG
:
8299 error ("unknown value %qs for 'cpu' target %s", str
, pragma_or_attr
);
8301 case AARCH64_PARSE_INVALID_FEATURE
:
8302 error ("invalid feature modifier %qs for 'cpu' target %s",
8303 str
, pragma_or_attr
);
8312 /* Handle the argument STR to the tune= target attribute.
8313 PRAGMA_OR_ATTR is used in potential error messages. */
8316 aarch64_handle_attr_tune (const char *str
, const char *pragma_or_attr
)
8318 const struct processor
*tmp_tune
= NULL
;
8319 enum aarch64_parse_opt_result parse_res
8320 = aarch64_parse_tune (str
, &tmp_tune
);
8322 if (parse_res
== AARCH64_PARSE_OK
)
8324 gcc_assert (tmp_tune
);
8325 selected_tune
= tmp_tune
;
8326 explicit_tune_core
= selected_tune
->ident
;
8332 case AARCH64_PARSE_INVALID_ARG
:
8333 error ("unknown value %qs for 'tune' target %s", str
, pragma_or_attr
);
8342 /* Parse an architecture extensions target attribute string specified in STR.
8343 For example "+fp+nosimd". Show any errors if needed. Return TRUE
8344 if successful. Update aarch64_isa_flags to reflect the ISA features
8346 PRAGMA_OR_ATTR is used in potential error messages. */
8349 aarch64_handle_attr_isa_flags (char *str
, const char *pragma_or_attr
)
8351 enum aarch64_parse_opt_result parse_res
;
8352 unsigned long isa_flags
= aarch64_isa_flags
;
8354 /* We allow "+nothing" in the beginning to clear out all architectural
8355 features if the user wants to handpick specific features. */
8356 if (strncmp ("+nothing", str
, 8) == 0)
8362 parse_res
= aarch64_parse_extension (str
, &isa_flags
);
8364 if (parse_res
== AARCH64_PARSE_OK
)
8366 aarch64_isa_flags
= isa_flags
;
8372 case AARCH64_PARSE_MISSING_ARG
:
8373 error ("missing feature modifier in target %s %qs",
8374 pragma_or_attr
, str
);
8377 case AARCH64_PARSE_INVALID_FEATURE
:
8378 error ("invalid feature modifier in target %s %qs",
8379 pragma_or_attr
, str
);
8389 /* The target attributes that we support. On top of these we also support just
8390 ISA extensions, like __attribute__ ((target ("+crc"))), but that case is
8391 handled explicitly in aarch64_process_one_target_attr. */
8393 static const struct aarch64_attribute_info aarch64_attributes
[] =
8395 { "general-regs-only", aarch64_attr_mask
, false, NULL
,
8396 OPT_mgeneral_regs_only
},
8397 { "fix-cortex-a53-835769", aarch64_attr_bool
, true, NULL
,
8398 OPT_mfix_cortex_a53_835769
},
8399 { "cmodel", aarch64_attr_enum
, false, NULL
, OPT_mcmodel_
},
8400 { "strict-align", aarch64_attr_mask
, false, NULL
, OPT_mstrict_align
},
8401 { "omit-leaf-frame-pointer", aarch64_attr_bool
, true, NULL
,
8402 OPT_momit_leaf_frame_pointer
},
8403 { "tls-dialect", aarch64_attr_enum
, false, NULL
, OPT_mtls_dialect_
},
8404 { "arch", aarch64_attr_custom
, false, aarch64_handle_attr_arch
,
8406 { "cpu", aarch64_attr_custom
, false, aarch64_handle_attr_cpu
, OPT_mcpu_
},
8407 { "tune", aarch64_attr_custom
, false, aarch64_handle_attr_tune
,
8409 { NULL
, aarch64_attr_custom
, false, NULL
, OPT____
}
8412 /* Parse ARG_STR which contains the definition of one target attribute.
8413 Show appropriate errors if any or return true if the attribute is valid.
8414 PRAGMA_OR_ATTR holds the string to use in error messages about whether
8415 we're processing a target attribute or pragma. */
8418 aarch64_process_one_target_attr (char *arg_str
, const char* pragma_or_attr
)
8420 bool invert
= false;
8422 size_t len
= strlen (arg_str
);
8426 error ("malformed target %s", pragma_or_attr
);
8430 char *str_to_check
= (char *) alloca (len
+ 1);
8431 strcpy (str_to_check
, arg_str
);
8433 /* Skip leading whitespace. */
8434 while (*str_to_check
== ' ' || *str_to_check
== '\t')
8437 /* We have something like __attribute__ ((target ("+fp+nosimd"))).
8438 It is easier to detect and handle it explicitly here rather than going
8439 through the machinery for the rest of the target attributes in this
8441 if (*str_to_check
== '+')
8442 return aarch64_handle_attr_isa_flags (str_to_check
, pragma_or_attr
);
8444 if (len
> 3 && strncmp (str_to_check
, "no-", 3) == 0)
8449 char *arg
= strchr (str_to_check
, '=');
8451 /* If we found opt=foo then terminate STR_TO_CHECK at the '='
8452 and point ARG to "foo". */
8458 const struct aarch64_attribute_info
*p_attr
;
8459 for (p_attr
= aarch64_attributes
; p_attr
->name
; p_attr
++)
8461 /* If the names don't match up, or the user has given an argument
8462 to an attribute that doesn't accept one, or didn't give an argument
8463 to an attribute that expects one, fail to match. */
8464 if (strcmp (str_to_check
, p_attr
->name
) != 0)
8467 bool attr_need_arg_p
= p_attr
->attr_type
== aarch64_attr_custom
8468 || p_attr
->attr_type
== aarch64_attr_enum
;
8470 if (attr_need_arg_p
^ (arg
!= NULL
))
8472 error ("target %s %qs does not accept an argument",
8473 pragma_or_attr
, str_to_check
);
8477 /* If the name matches but the attribute does not allow "no-" versions
8478 then we can't match. */
8479 if (invert
&& !p_attr
->allow_neg
)
8481 error ("target %s %qs does not allow a negated form",
8482 pragma_or_attr
, str_to_check
);
8486 switch (p_attr
->attr_type
)
8488 /* Has a custom handler registered.
8489 For example, cpu=, arch=, tune=. */
8490 case aarch64_attr_custom
:
8491 gcc_assert (p_attr
->handler
);
8492 if (!p_attr
->handler (arg
, pragma_or_attr
))
8496 /* Either set or unset a boolean option. */
8497 case aarch64_attr_bool
:
8499 struct cl_decoded_option decoded
;
8501 generate_option (p_attr
->opt_num
, NULL
, !invert
,
8502 CL_TARGET
, &decoded
);
8503 aarch64_handle_option (&global_options
, &global_options_set
,
8504 &decoded
, input_location
);
8507 /* Set or unset a bit in the target_flags. aarch64_handle_option
8508 should know what mask to apply given the option number. */
8509 case aarch64_attr_mask
:
8511 struct cl_decoded_option decoded
;
8512 /* We only need to specify the option number.
8513 aarch64_handle_option will know which mask to apply. */
8514 decoded
.opt_index
= p_attr
->opt_num
;
8515 decoded
.value
= !invert
;
8516 aarch64_handle_option (&global_options
, &global_options_set
,
8517 &decoded
, input_location
);
8520 /* Use the option setting machinery to set an option to an enum. */
8521 case aarch64_attr_enum
:
8526 valid
= opt_enum_arg_to_value (p_attr
->opt_num
, arg
,
8530 set_option (&global_options
, NULL
, p_attr
->opt_num
, value
,
8531 NULL
, DK_UNSPECIFIED
, input_location
,
8536 error ("target %s %s=%s is not valid",
8537 pragma_or_attr
, str_to_check
, arg
);
8549 /* Count how many times the character C appears in
8550 NULL-terminated string STR. */
8553 num_occurences_in_str (char c
, char *str
)
8555 unsigned int res
= 0;
8556 while (*str
!= '\0')
8567 /* Parse the tree in ARGS that contains the target attribute information
8568 and update the global target options space. PRAGMA_OR_ATTR is a string
8569 to be used in error messages, specifying whether this is processing
8570 a target attribute or a target pragma. */
8573 aarch64_process_target_attr (tree args
, const char* pragma_or_attr
)
8575 if (TREE_CODE (args
) == TREE_LIST
)
8579 tree head
= TREE_VALUE (args
);
8582 if (!aarch64_process_target_attr (head
, pragma_or_attr
))
8585 args
= TREE_CHAIN (args
);
8590 /* We expect to find a string to parse. */
8591 gcc_assert (TREE_CODE (args
) == STRING_CST
);
8593 size_t len
= strlen (TREE_STRING_POINTER (args
));
8594 char *str_to_check
= (char *) alloca (len
+ 1);
8595 strcpy (str_to_check
, TREE_STRING_POINTER (args
));
8599 error ("malformed target %s value", pragma_or_attr
);
8603 /* Used to catch empty spaces between commas i.e.
8604 attribute ((target ("attr1,,attr2"))). */
8605 unsigned int num_commas
= num_occurences_in_str (',', str_to_check
);
8607 /* Handle multiple target attributes separated by ','. */
8608 char *token
= strtok (str_to_check
, ",");
8610 unsigned int num_attrs
= 0;
8614 if (!aarch64_process_one_target_attr (token
, pragma_or_attr
))
8616 error ("target %s %qs is invalid", pragma_or_attr
, token
);
8620 token
= strtok (NULL
, ",");
8623 if (num_attrs
!= num_commas
+ 1)
8625 error ("malformed target %s list %qs",
8626 pragma_or_attr
, TREE_STRING_POINTER (args
));
8633 /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P. This is used to
8634 process attribute ((target ("..."))). */
8637 aarch64_option_valid_attribute_p (tree fndecl
, tree
, tree args
, int)
8639 struct cl_target_option cur_target
;
8642 tree new_target
, new_optimize
;
8643 tree existing_target
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
8645 /* If what we're processing is the current pragma string then the
8646 target option node is already stored in target_option_current_node
8647 by aarch64_pragma_target_parse in aarch64-c.c. Use that to avoid
8648 having to re-parse the string. This is especially useful to keep
8649 arm_neon.h compile times down since that header contains a lot
8650 of intrinsics enclosed in pragmas. */
8651 if (!existing_target
&& args
== current_target_pragma
)
8653 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = target_option_current_node
;
8656 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
8658 old_optimize
= build_optimization_node (&global_options
);
8659 func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
8661 /* If the function changed the optimization levels as well as setting
8662 target options, start with the optimizations specified. */
8663 if (func_optimize
&& func_optimize
!= old_optimize
)
8664 cl_optimization_restore (&global_options
,
8665 TREE_OPTIMIZATION (func_optimize
));
8667 /* Save the current target options to restore at the end. */
8668 cl_target_option_save (&cur_target
, &global_options
);
8670 /* If fndecl already has some target attributes applied to it, unpack
8671 them so that we add this attribute on top of them, rather than
8672 overwriting them. */
8673 if (existing_target
)
8675 struct cl_target_option
*existing_options
8676 = TREE_TARGET_OPTION (existing_target
);
8678 if (existing_options
)
8679 cl_target_option_restore (&global_options
, existing_options
);
8682 cl_target_option_restore (&global_options
,
8683 TREE_TARGET_OPTION (target_option_current_node
));
8686 ret
= aarch64_process_target_attr (args
, "attribute");
8688 /* Set up any additional state. */
8691 aarch64_override_options_internal (&global_options
);
8692 /* Initialize SIMD builtins if we haven't already.
8693 Set current_target_pragma to NULL for the duration so that
8694 the builtin initialization code doesn't try to tag the functions
8695 being built with the attributes specified by any current pragma, thus
8696 going into an infinite recursion. */
8699 tree saved_current_target_pragma
= current_target_pragma
;
8700 current_target_pragma
= NULL
;
8701 aarch64_init_simd_builtins ();
8702 current_target_pragma
= saved_current_target_pragma
;
8704 new_target
= build_target_option_node (&global_options
);
8709 new_optimize
= build_optimization_node (&global_options
);
8713 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
8715 if (old_optimize
!= new_optimize
)
8716 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
8719 cl_target_option_restore (&global_options
, &cur_target
);
8721 if (old_optimize
!= new_optimize
)
8722 cl_optimization_restore (&global_options
,
8723 TREE_OPTIMIZATION (old_optimize
));
8727 /* Helper for aarch64_can_inline_p. In the case where CALLER and CALLEE are
8728 tri-bool options (yes, no, don't care) and the default value is
8729 DEF, determine whether to reject inlining. */
8732 aarch64_tribools_ok_for_inlining_p (int caller
, int callee
,
8733 int dont_care
, int def
)
8735 /* If the callee doesn't care, always allow inlining. */
8736 if (callee
== dont_care
)
8739 /* If the caller doesn't care, always allow inlining. */
8740 if (caller
== dont_care
)
8743 /* Otherwise, allow inlining if either the callee and caller values
8744 agree, or if the callee is using the default value. */
8745 return (callee
== caller
|| callee
== def
);
8748 /* Implement TARGET_CAN_INLINE_P. Decide whether it is valid
8749 to inline CALLEE into CALLER based on target-specific info.
8750 Make sure that the caller and callee have compatible architectural
8751 features. Then go through the other possible target attributes
8752 and see if they can block inlining. Try not to reject always_inline
8753 callees unless they are incompatible architecturally. */
8756 aarch64_can_inline_p (tree caller
, tree callee
)
8758 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
8759 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
8761 /* If callee has no option attributes, then it is ok to inline. */
8765 struct cl_target_option
*caller_opts
8766 = TREE_TARGET_OPTION (caller_tree
? caller_tree
8767 : target_option_default_node
);
8769 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
8772 /* Callee's ISA flags should be a subset of the caller's. */
8773 if ((caller_opts
->x_aarch64_isa_flags
& callee_opts
->x_aarch64_isa_flags
)
8774 != callee_opts
->x_aarch64_isa_flags
)
8777 /* Allow non-strict aligned functions inlining into strict
8779 if ((TARGET_STRICT_ALIGN_P (caller_opts
->x_target_flags
)
8780 != TARGET_STRICT_ALIGN_P (callee_opts
->x_target_flags
))
8781 && !(!TARGET_STRICT_ALIGN_P (callee_opts
->x_target_flags
)
8782 && TARGET_STRICT_ALIGN_P (caller_opts
->x_target_flags
)))
8785 bool always_inline
= lookup_attribute ("always_inline",
8786 DECL_ATTRIBUTES (callee
));
8788 /* If the architectural features match up and the callee is always_inline
8789 then the other attributes don't matter. */
8793 if (caller_opts
->x_aarch64_cmodel_var
8794 != callee_opts
->x_aarch64_cmodel_var
)
8797 if (caller_opts
->x_aarch64_tls_dialect
8798 != callee_opts
->x_aarch64_tls_dialect
)
8801 /* Honour explicit requests to workaround errata. */
8802 if (!aarch64_tribools_ok_for_inlining_p (
8803 caller_opts
->x_aarch64_fix_a53_err835769
,
8804 callee_opts
->x_aarch64_fix_a53_err835769
,
8805 2, TARGET_FIX_ERR_A53_835769_DEFAULT
))
8808 /* If the user explicitly specified -momit-leaf-frame-pointer for the
8809 caller and calle and they don't match up, reject inlining. */
8810 if (!aarch64_tribools_ok_for_inlining_p (
8811 caller_opts
->x_flag_omit_leaf_frame_pointer
,
8812 callee_opts
->x_flag_omit_leaf_frame_pointer
,
8816 /* If the callee has specific tuning overrides, respect them. */
8817 if (callee_opts
->x_aarch64_override_tune_string
!= NULL
8818 && caller_opts
->x_aarch64_override_tune_string
== NULL
)
8821 /* If the user specified tuning override strings for the
8822 caller and callee and they don't match up, reject inlining.
8823 We just do a string compare here, we don't analyze the meaning
8824 of the string, as it would be too costly for little gain. */
8825 if (callee_opts
->x_aarch64_override_tune_string
8826 && caller_opts
->x_aarch64_override_tune_string
8827 && (strcmp (callee_opts
->x_aarch64_override_tune_string
,
8828 caller_opts
->x_aarch64_override_tune_string
) != 0))
8834 /* Return true if SYMBOL_REF X binds locally. */
8837 aarch64_symbol_binds_local_p (const_rtx x
)
8839 return (SYMBOL_REF_DECL (x
)
8840 ? targetm
.binds_local_p (SYMBOL_REF_DECL (x
))
8841 : SYMBOL_REF_LOCAL_P (x
));
8844 /* Return true if SYMBOL_REF X is thread local */
8846 aarch64_tls_symbol_p (rtx x
)
8848 if (! TARGET_HAVE_TLS
)
8851 if (GET_CODE (x
) != SYMBOL_REF
)
8854 return SYMBOL_REF_TLS_MODEL (x
) != 0;
8857 /* Classify a TLS symbol into one of the TLS kinds. */
8858 enum aarch64_symbol_type
8859 aarch64_classify_tls_symbol (rtx x
)
8861 enum tls_model tls_kind
= tls_symbolic_operand_type (x
);
8865 case TLS_MODEL_GLOBAL_DYNAMIC
:
8866 case TLS_MODEL_LOCAL_DYNAMIC
:
8867 return TARGET_TLS_DESC
? SYMBOL_SMALL_TLSDESC
: SYMBOL_SMALL_TLSGD
;
8869 case TLS_MODEL_INITIAL_EXEC
:
8870 switch (aarch64_cmodel
)
8872 case AARCH64_CMODEL_TINY
:
8873 case AARCH64_CMODEL_TINY_PIC
:
8874 return SYMBOL_TINY_TLSIE
;
8876 return SYMBOL_SMALL_TLSIE
;
8879 case TLS_MODEL_LOCAL_EXEC
:
8880 if (aarch64_tls_size
== 12)
8881 return SYMBOL_TLSLE12
;
8882 else if (aarch64_tls_size
== 24)
8883 return SYMBOL_TLSLE24
;
8884 else if (aarch64_tls_size
== 32)
8885 return SYMBOL_TLSLE32
;
8886 else if (aarch64_tls_size
== 48)
8887 return SYMBOL_TLSLE48
;
8891 case TLS_MODEL_EMULATED
:
8892 case TLS_MODEL_NONE
:
8893 return SYMBOL_FORCE_TO_MEM
;
8900 /* Return the method that should be used to access SYMBOL_REF or
8903 enum aarch64_symbol_type
8904 aarch64_classify_symbol (rtx x
, rtx offset
)
8906 if (GET_CODE (x
) == LABEL_REF
)
8908 switch (aarch64_cmodel
)
8910 case AARCH64_CMODEL_LARGE
:
8911 return SYMBOL_FORCE_TO_MEM
;
8913 case AARCH64_CMODEL_TINY_PIC
:
8914 case AARCH64_CMODEL_TINY
:
8915 return SYMBOL_TINY_ABSOLUTE
;
8917 case AARCH64_CMODEL_SMALL_SPIC
:
8918 case AARCH64_CMODEL_SMALL_PIC
:
8919 case AARCH64_CMODEL_SMALL
:
8920 return SYMBOL_SMALL_ABSOLUTE
;
8927 if (GET_CODE (x
) == SYMBOL_REF
)
8929 if (aarch64_cmodel
== AARCH64_CMODEL_LARGE
)
8931 /* This is alright even in PIC code as the constant
8932 pool reference is always PC relative and within
8933 the same translation unit. */
8934 if (nopcrelative_literal_loads
8935 && CONSTANT_POOL_ADDRESS_P (x
))
8936 return SYMBOL_SMALL_ABSOLUTE
;
8938 return SYMBOL_FORCE_TO_MEM
;
8941 if (aarch64_tls_symbol_p (x
))
8942 return aarch64_classify_tls_symbol (x
);
8944 switch (aarch64_cmodel
)
8946 case AARCH64_CMODEL_TINY
:
8947 /* When we retreive symbol + offset address, we have to make sure
8948 the offset does not cause overflow of the final address. But
8949 we have no way of knowing the address of symbol at compile time
8950 so we can't accurately say if the distance between the PC and
8951 symbol + offset is outside the addressible range of +/-1M in the
8952 TINY code model. So we rely on images not being greater than
8953 1M and cap the offset at 1M and anything beyond 1M will have to
8954 be loaded using an alternative mechanism. */
8955 if (SYMBOL_REF_WEAK (x
)
8956 || INTVAL (offset
) < -1048575 || INTVAL (offset
) > 1048575)
8957 return SYMBOL_FORCE_TO_MEM
;
8958 return SYMBOL_TINY_ABSOLUTE
;
8960 case AARCH64_CMODEL_SMALL
:
8961 /* Same reasoning as the tiny code model, but the offset cap here is
8963 if (SYMBOL_REF_WEAK (x
)
8964 || !IN_RANGE (INTVAL (offset
), HOST_WIDE_INT_C (-4294967263),
8965 HOST_WIDE_INT_C (4294967264)))
8966 return SYMBOL_FORCE_TO_MEM
;
8967 return SYMBOL_SMALL_ABSOLUTE
;
8969 case AARCH64_CMODEL_TINY_PIC
:
8970 if (!aarch64_symbol_binds_local_p (x
))
8971 return SYMBOL_TINY_GOT
;
8972 return SYMBOL_TINY_ABSOLUTE
;
8974 case AARCH64_CMODEL_SMALL_SPIC
:
8975 case AARCH64_CMODEL_SMALL_PIC
:
8976 if (!aarch64_symbol_binds_local_p (x
))
8977 return (aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
8978 ? SYMBOL_SMALL_GOT_28K
: SYMBOL_SMALL_GOT_4G
);
8979 return SYMBOL_SMALL_ABSOLUTE
;
8986 /* By default push everything into the constant pool. */
8987 return SYMBOL_FORCE_TO_MEM
;
8991 aarch64_constant_address_p (rtx x
)
8993 return (CONSTANT_P (x
) && memory_address_p (DImode
, x
));
8997 aarch64_legitimate_pic_operand_p (rtx x
)
8999 if (GET_CODE (x
) == SYMBOL_REF
9000 || (GET_CODE (x
) == CONST
9001 && GET_CODE (XEXP (x
, 0)) == PLUS
9002 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
))
9008 /* Return true if X holds either a quarter-precision or
9009 floating-point +0.0 constant. */
9011 aarch64_valid_floating_const (machine_mode mode
, rtx x
)
9013 if (!CONST_DOUBLE_P (x
))
9016 if (aarch64_float_const_zero_rtx_p (x
))
9019 /* We only handle moving 0.0 to a TFmode register. */
9020 if (!(mode
== SFmode
|| mode
== DFmode
))
9023 return aarch64_float_const_representable_p (x
);
9027 aarch64_legitimate_constant_p (machine_mode mode
, rtx x
)
9029 /* Do not allow vector struct mode constants. We could support
9030 0 and -1 easily, but they need support in aarch64-simd.md. */
9031 if (TARGET_SIMD
&& aarch64_vect_struct_mode_p (mode
))
9034 /* This could probably go away because
9035 we now decompose CONST_INTs according to expand_mov_immediate. */
9036 if ((GET_CODE (x
) == CONST_VECTOR
9037 && aarch64_simd_valid_immediate (x
, mode
, false, NULL
))
9038 || CONST_INT_P (x
) || aarch64_valid_floating_const (mode
, x
))
9039 return !targetm
.cannot_force_const_mem (mode
, x
);
9041 if (GET_CODE (x
) == HIGH
9042 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
9045 return aarch64_constant_address_p (x
);
9049 aarch64_load_tp (rtx target
)
9052 || GET_MODE (target
) != Pmode
9053 || !register_operand (target
, Pmode
))
9054 target
= gen_reg_rtx (Pmode
);
9056 /* Can return in any reg. */
9057 emit_insn (gen_aarch64_load_tp_hard (target
));
9061 /* On AAPCS systems, this is the "struct __va_list". */
9062 static GTY(()) tree va_list_type
;
9064 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
9065 Return the type to use as __builtin_va_list.
9067 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
9079 aarch64_build_builtin_va_list (void)
9082 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
9084 /* Create the type. */
9085 va_list_type
= lang_hooks
.types
.make_type (RECORD_TYPE
);
9086 /* Give it the required name. */
9087 va_list_name
= build_decl (BUILTINS_LOCATION
,
9089 get_identifier ("__va_list"),
9091 DECL_ARTIFICIAL (va_list_name
) = 1;
9092 TYPE_NAME (va_list_type
) = va_list_name
;
9093 TYPE_STUB_DECL (va_list_type
) = va_list_name
;
9095 /* Create the fields. */
9096 f_stack
= build_decl (BUILTINS_LOCATION
,
9097 FIELD_DECL
, get_identifier ("__stack"),
9099 f_grtop
= build_decl (BUILTINS_LOCATION
,
9100 FIELD_DECL
, get_identifier ("__gr_top"),
9102 f_vrtop
= build_decl (BUILTINS_LOCATION
,
9103 FIELD_DECL
, get_identifier ("__vr_top"),
9105 f_groff
= build_decl (BUILTINS_LOCATION
,
9106 FIELD_DECL
, get_identifier ("__gr_offs"),
9108 f_vroff
= build_decl (BUILTINS_LOCATION
,
9109 FIELD_DECL
, get_identifier ("__vr_offs"),
9112 DECL_ARTIFICIAL (f_stack
) = 1;
9113 DECL_ARTIFICIAL (f_grtop
) = 1;
9114 DECL_ARTIFICIAL (f_vrtop
) = 1;
9115 DECL_ARTIFICIAL (f_groff
) = 1;
9116 DECL_ARTIFICIAL (f_vroff
) = 1;
9118 DECL_FIELD_CONTEXT (f_stack
) = va_list_type
;
9119 DECL_FIELD_CONTEXT (f_grtop
) = va_list_type
;
9120 DECL_FIELD_CONTEXT (f_vrtop
) = va_list_type
;
9121 DECL_FIELD_CONTEXT (f_groff
) = va_list_type
;
9122 DECL_FIELD_CONTEXT (f_vroff
) = va_list_type
;
9124 TYPE_FIELDS (va_list_type
) = f_stack
;
9125 DECL_CHAIN (f_stack
) = f_grtop
;
9126 DECL_CHAIN (f_grtop
) = f_vrtop
;
9127 DECL_CHAIN (f_vrtop
) = f_groff
;
9128 DECL_CHAIN (f_groff
) = f_vroff
;
9130 /* Compute its layout. */
9131 layout_type (va_list_type
);
9133 return va_list_type
;
9136 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
9138 aarch64_expand_builtin_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
9140 const CUMULATIVE_ARGS
*cum
;
9141 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
9142 tree stack
, grtop
, vrtop
, groff
, vroff
;
9144 int gr_save_area_size
;
9145 int vr_save_area_size
;
9148 cum
= &crtl
->args
.info
;
9150 = (NUM_ARG_REGS
- cum
->aapcs_ncrn
) * UNITS_PER_WORD
;
9152 = (NUM_FP_ARG_REGS
- cum
->aapcs_nvrn
) * UNITS_PER_VREG
;
9156 gcc_assert (cum
->aapcs_nvrn
== 0);
9157 vr_save_area_size
= 0;
9160 f_stack
= TYPE_FIELDS (va_list_type_node
);
9161 f_grtop
= DECL_CHAIN (f_stack
);
9162 f_vrtop
= DECL_CHAIN (f_grtop
);
9163 f_groff
= DECL_CHAIN (f_vrtop
);
9164 f_vroff
= DECL_CHAIN (f_groff
);
9166 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), valist
, f_stack
,
9168 grtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
), valist
, f_grtop
,
9170 vrtop
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
), valist
, f_vrtop
,
9172 groff
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
), valist
, f_groff
,
9174 vroff
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
), valist
, f_vroff
,
9177 /* Emit code to initialize STACK, which points to the next varargs stack
9178 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
9179 by named arguments. STACK is 8-byte aligned. */
9180 t
= make_tree (TREE_TYPE (stack
), virtual_incoming_args_rtx
);
9181 if (cum
->aapcs_stack_size
> 0)
9182 t
= fold_build_pointer_plus_hwi (t
, cum
->aapcs_stack_size
* UNITS_PER_WORD
);
9183 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), stack
, t
);
9184 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
9186 /* Emit code to initialize GRTOP, the top of the GR save area.
9187 virtual_incoming_args_rtx should have been 16 byte aligned. */
9188 t
= make_tree (TREE_TYPE (grtop
), virtual_incoming_args_rtx
);
9189 t
= build2 (MODIFY_EXPR
, TREE_TYPE (grtop
), grtop
, t
);
9190 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
9192 /* Emit code to initialize VRTOP, the top of the VR save area.
9193 This address is gr_save_area_bytes below GRTOP, rounded
9194 down to the next 16-byte boundary. */
9195 t
= make_tree (TREE_TYPE (vrtop
), virtual_incoming_args_rtx
);
9196 vr_offset
= ROUND_UP (gr_save_area_size
,
9197 STACK_BOUNDARY
/ BITS_PER_UNIT
);
9200 t
= fold_build_pointer_plus_hwi (t
, -vr_offset
);
9201 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vrtop
), vrtop
, t
);
9202 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
9204 /* Emit code to initialize GROFF, the offset from GRTOP of the
9205 next GPR argument. */
9206 t
= build2 (MODIFY_EXPR
, TREE_TYPE (groff
), groff
,
9207 build_int_cst (TREE_TYPE (groff
), -gr_save_area_size
));
9208 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
9210 /* Likewise emit code to initialize VROFF, the offset from FTOP
9211 of the next VR argument. */
9212 t
= build2 (MODIFY_EXPR
, TREE_TYPE (vroff
), vroff
,
9213 build_int_cst (TREE_TYPE (vroff
), -vr_save_area_size
));
9214 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
9217 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
9220 aarch64_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
*pre_p
,
9221 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
9225 bool is_ha
; /* is HFA or HVA. */
9226 bool dw_align
; /* double-word align. */
9227 machine_mode ag_mode
= VOIDmode
;
9231 tree f_stack
, f_grtop
, f_vrtop
, f_groff
, f_vroff
;
9232 tree stack
, f_top
, f_off
, off
, arg
, roundup
, on_stack
;
9233 HOST_WIDE_INT size
, rsize
, adjust
, align
;
9234 tree t
, u
, cond1
, cond2
;
9236 indirect_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
, false);
9238 type
= build_pointer_type (type
);
9240 mode
= TYPE_MODE (type
);
9242 f_stack
= TYPE_FIELDS (va_list_type_node
);
9243 f_grtop
= DECL_CHAIN (f_stack
);
9244 f_vrtop
= DECL_CHAIN (f_grtop
);
9245 f_groff
= DECL_CHAIN (f_vrtop
);
9246 f_vroff
= DECL_CHAIN (f_groff
);
9248 stack
= build3 (COMPONENT_REF
, TREE_TYPE (f_stack
), unshare_expr (valist
),
9249 f_stack
, NULL_TREE
);
9250 size
= int_size_in_bytes (type
);
9251 align
= aarch64_function_arg_alignment (mode
, type
) / BITS_PER_UNIT
;
9255 if (aarch64_vfp_is_call_or_return_candidate (mode
,
9261 /* TYPE passed in fp/simd registers. */
9263 aarch64_err_no_fpadvsimd (mode
, "varargs");
9265 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_vrtop
),
9266 unshare_expr (valist
), f_vrtop
, NULL_TREE
);
9267 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_vroff
),
9268 unshare_expr (valist
), f_vroff
, NULL_TREE
);
9270 rsize
= nregs
* UNITS_PER_VREG
;
9274 if (BYTES_BIG_ENDIAN
&& GET_MODE_SIZE (ag_mode
) < UNITS_PER_VREG
)
9275 adjust
= UNITS_PER_VREG
- GET_MODE_SIZE (ag_mode
);
9277 else if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
9278 && size
< UNITS_PER_VREG
)
9280 adjust
= UNITS_PER_VREG
- size
;
9285 /* TYPE passed in general registers. */
9286 f_top
= build3 (COMPONENT_REF
, TREE_TYPE (f_grtop
),
9287 unshare_expr (valist
), f_grtop
, NULL_TREE
);
9288 f_off
= build3 (COMPONENT_REF
, TREE_TYPE (f_groff
),
9289 unshare_expr (valist
), f_groff
, NULL_TREE
);
9290 rsize
= ROUND_UP (size
, UNITS_PER_WORD
);
9291 nregs
= rsize
/ UNITS_PER_WORD
;
9296 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
9297 && size
< UNITS_PER_WORD
)
9299 adjust
= UNITS_PER_WORD
- size
;
9303 /* Get a local temporary for the field value. */
9304 off
= get_initialized_tmp_var (f_off
, pre_p
, NULL
);
9306 /* Emit code to branch if off >= 0. */
9307 t
= build2 (GE_EXPR
, boolean_type_node
, off
,
9308 build_int_cst (TREE_TYPE (off
), 0));
9309 cond1
= build3 (COND_EXPR
, ptr_type_node
, t
, NULL_TREE
, NULL_TREE
);
9313 /* Emit: offs = (offs + 15) & -16. */
9314 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
9315 build_int_cst (TREE_TYPE (off
), 15));
9316 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (off
), t
,
9317 build_int_cst (TREE_TYPE (off
), -16));
9318 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (off
), off
, t
);
9323 /* Update ap.__[g|v]r_offs */
9324 t
= build2 (PLUS_EXPR
, TREE_TYPE (off
), off
,
9325 build_int_cst (TREE_TYPE (off
), rsize
));
9326 t
= build2 (MODIFY_EXPR
, TREE_TYPE (f_off
), unshare_expr (f_off
), t
);
9330 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
9332 /* [cond2] if (ap.__[g|v]r_offs > 0) */
9333 u
= build2 (GT_EXPR
, boolean_type_node
, unshare_expr (f_off
),
9334 build_int_cst (TREE_TYPE (f_off
), 0));
9335 cond2
= build3 (COND_EXPR
, ptr_type_node
, u
, NULL_TREE
, NULL_TREE
);
9337 /* String up: make sure the assignment happens before the use. */
9338 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (cond2
), t
, cond2
);
9339 COND_EXPR_ELSE (cond1
) = t
;
9341 /* Prepare the trees handling the argument that is passed on the stack;
9342 the top level node will store in ON_STACK. */
9343 arg
= get_initialized_tmp_var (stack
, pre_p
, NULL
);
9346 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
9347 t
= fold_convert (intDI_type_node
, arg
);
9348 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
9349 build_int_cst (TREE_TYPE (t
), 15));
9350 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
9351 build_int_cst (TREE_TYPE (t
), -16));
9352 t
= fold_convert (TREE_TYPE (arg
), t
);
9353 roundup
= build2 (MODIFY_EXPR
, TREE_TYPE (arg
), arg
, t
);
9357 /* Advance ap.__stack */
9358 t
= fold_convert (intDI_type_node
, arg
);
9359 t
= build2 (PLUS_EXPR
, TREE_TYPE (t
), t
,
9360 build_int_cst (TREE_TYPE (t
), size
+ 7));
9361 t
= build2 (BIT_AND_EXPR
, TREE_TYPE (t
), t
,
9362 build_int_cst (TREE_TYPE (t
), -8));
9363 t
= fold_convert (TREE_TYPE (arg
), t
);
9364 t
= build2 (MODIFY_EXPR
, TREE_TYPE (stack
), unshare_expr (stack
), t
);
9365 /* String up roundup and advance. */
9367 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), roundup
, t
);
9368 /* String up with arg */
9369 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), t
, arg
);
9370 /* Big-endianness related address adjustment. */
9371 if (BLOCK_REG_PADDING (mode
, type
, 1) == downward
9372 && size
< UNITS_PER_WORD
)
9374 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (arg
), arg
,
9375 size_int (UNITS_PER_WORD
- size
));
9376 on_stack
= build2 (COMPOUND_EXPR
, TREE_TYPE (arg
), on_stack
, t
);
9379 COND_EXPR_THEN (cond1
) = unshare_expr (on_stack
);
9380 COND_EXPR_THEN (cond2
) = unshare_expr (on_stack
);
9382 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
9385 t
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (off
), off
,
9386 build_int_cst (TREE_TYPE (off
), adjust
));
9388 t
= fold_convert (sizetype
, t
);
9389 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (f_top
), f_top
, t
);
9393 /* type ha; // treat as "struct {ftype field[n];}"
9394 ... [computing offs]
9395 for (i = 0; i <nregs; ++i, offs += 16)
9396 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
9399 tree tmp_ha
, field_t
, field_ptr_t
;
9401 /* Declare a local variable. */
9402 tmp_ha
= create_tmp_var_raw (type
, "ha");
9403 gimple_add_tmp_var (tmp_ha
);
9405 /* Establish the base type. */
9409 field_t
= float_type_node
;
9410 field_ptr_t
= float_ptr_type_node
;
9413 field_t
= double_type_node
;
9414 field_ptr_t
= double_ptr_type_node
;
9417 field_t
= long_double_type_node
;
9418 field_ptr_t
= long_double_ptr_type_node
;
9420 /* The half precision and quad precision are not fully supported yet. Enable
9421 the following code after the support is complete. Need to find the correct
9422 type node for __fp16 *. */
9425 field_t
= float_type_node
;
9426 field_ptr_t
= float_ptr_type_node
;
9432 tree innertype
= make_signed_type (GET_MODE_PRECISION (SImode
));
9433 field_t
= build_vector_type_for_mode (innertype
, ag_mode
);
9434 field_ptr_t
= build_pointer_type (field_t
);
9441 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
9442 tmp_ha
= build1 (ADDR_EXPR
, field_ptr_t
, tmp_ha
);
9444 t
= fold_convert (field_ptr_t
, addr
);
9445 t
= build2 (MODIFY_EXPR
, field_t
,
9446 build1 (INDIRECT_REF
, field_t
, tmp_ha
),
9447 build1 (INDIRECT_REF
, field_t
, t
));
9449 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
9450 for (i
= 1; i
< nregs
; ++i
)
9452 addr
= fold_build_pointer_plus_hwi (addr
, UNITS_PER_VREG
);
9453 u
= fold_convert (field_ptr_t
, addr
);
9454 u
= build2 (MODIFY_EXPR
, field_t
,
9455 build2 (MEM_REF
, field_t
, tmp_ha
,
9456 build_int_cst (field_ptr_t
,
9458 int_size_in_bytes (field_t
)))),
9459 build1 (INDIRECT_REF
, field_t
, u
));
9460 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (t
), t
, u
);
9463 u
= fold_convert (TREE_TYPE (f_top
), tmp_ha
);
9464 t
= build2 (COMPOUND_EXPR
, TREE_TYPE (f_top
), t
, u
);
9467 COND_EXPR_ELSE (cond2
) = t
;
9468 addr
= fold_convert (build_pointer_type (type
), cond1
);
9469 addr
= build_va_arg_indirect_ref (addr
);
9472 addr
= build_va_arg_indirect_ref (addr
);
9477 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
9480 aarch64_setup_incoming_varargs (cumulative_args_t cum_v
, machine_mode mode
,
9481 tree type
, int *pretend_size ATTRIBUTE_UNUSED
,
9484 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
9485 CUMULATIVE_ARGS local_cum
;
9486 int gr_saved
, vr_saved
;
9488 /* The caller has advanced CUM up to, but not beyond, the last named
9489 argument. Advance a local copy of CUM past the last "real" named
9490 argument, to find out how many registers are left over. */
9492 aarch64_function_arg_advance (pack_cumulative_args(&local_cum
), mode
, type
, true);
9494 /* Found out how many registers we need to save. */
9495 gr_saved
= NUM_ARG_REGS
- local_cum
.aapcs_ncrn
;
9496 vr_saved
= NUM_FP_ARG_REGS
- local_cum
.aapcs_nvrn
;
9500 gcc_assert (local_cum
.aapcs_nvrn
== 0);
9510 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
9511 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
,
9512 - gr_saved
* UNITS_PER_WORD
);
9513 mem
= gen_frame_mem (BLKmode
, ptr
);
9514 set_mem_alias_set (mem
, get_varargs_alias_set ());
9516 move_block_from_reg (local_cum
.aapcs_ncrn
+ R0_REGNUM
,
9521 /* We can't use move_block_from_reg, because it will use
9522 the wrong mode, storing D regs only. */
9523 machine_mode mode
= TImode
;
9526 /* Set OFF to the offset from virtual_incoming_args_rtx of
9527 the first vector register. The VR save area lies below
9528 the GR one, and is aligned to 16 bytes. */
9529 off
= -ROUND_UP (gr_saved
* UNITS_PER_WORD
,
9530 STACK_BOUNDARY
/ BITS_PER_UNIT
);
9531 off
-= vr_saved
* UNITS_PER_VREG
;
9533 for (i
= local_cum
.aapcs_nvrn
; i
< NUM_FP_ARG_REGS
; ++i
)
9537 ptr
= plus_constant (Pmode
, virtual_incoming_args_rtx
, off
);
9538 mem
= gen_frame_mem (mode
, ptr
);
9539 set_mem_alias_set (mem
, get_varargs_alias_set ());
9540 aarch64_emit_move (mem
, gen_rtx_REG (mode
, V0_REGNUM
+ i
));
9541 off
+= UNITS_PER_VREG
;
9546 /* We don't save the size into *PRETEND_SIZE because we want to avoid
9547 any complication of having crtl->args.pretend_args_size changed. */
9548 cfun
->machine
->frame
.saved_varargs_size
9549 = (ROUND_UP (gr_saved
* UNITS_PER_WORD
,
9550 STACK_BOUNDARY
/ BITS_PER_UNIT
)
9551 + vr_saved
* UNITS_PER_VREG
);
9555 aarch64_conditional_register_usage (void)
9560 for (i
= V0_REGNUM
; i
<= V31_REGNUM
; i
++)
9563 call_used_regs
[i
] = 1;
9568 /* Walk down the type tree of TYPE counting consecutive base elements.
9569 If *MODEP is VOIDmode, then set it to the first valid floating point
9570 type. If a non-floating point type is found, or if a floating point
9571 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
9572 otherwise return the count in the sub-tree. */
9574 aapcs_vfp_sub_candidate (const_tree type
, machine_mode
*modep
)
9579 switch (TREE_CODE (type
))
9582 mode
= TYPE_MODE (type
);
9583 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
9586 if (*modep
== VOIDmode
)
9595 mode
= TYPE_MODE (TREE_TYPE (type
));
9596 if (mode
!= DFmode
&& mode
!= SFmode
&& mode
!= TFmode
)
9599 if (*modep
== VOIDmode
)
9608 /* Use V2SImode and V4SImode as representatives of all 64-bit
9609 and 128-bit vector types. */
9610 size
= int_size_in_bytes (type
);
9623 if (*modep
== VOIDmode
)
9626 /* Vector modes are considered to be opaque: two vectors are
9627 equivalent for the purposes of being homogeneous aggregates
9628 if they are the same size. */
9637 tree index
= TYPE_DOMAIN (type
);
9639 /* Can't handle incomplete types nor sizes that are not
9641 if (!COMPLETE_TYPE_P (type
)
9642 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
9645 count
= aapcs_vfp_sub_candidate (TREE_TYPE (type
), modep
);
9648 || !TYPE_MAX_VALUE (index
)
9649 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index
))
9650 || !TYPE_MIN_VALUE (index
)
9651 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index
))
9655 count
*= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index
))
9656 - tree_to_uhwi (TYPE_MIN_VALUE (index
)));
9658 /* There must be no padding. */
9659 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
9671 /* Can't handle incomplete types nor sizes that are not
9673 if (!COMPLETE_TYPE_P (type
)
9674 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
9677 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
9679 if (TREE_CODE (field
) != FIELD_DECL
)
9682 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
9688 /* There must be no padding. */
9689 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
9696 case QUAL_UNION_TYPE
:
9698 /* These aren't very interesting except in a degenerate case. */
9703 /* Can't handle incomplete types nor sizes that are not
9705 if (!COMPLETE_TYPE_P (type
)
9706 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
)
9709 for (field
= TYPE_FIELDS (type
); field
; field
= TREE_CHAIN (field
))
9711 if (TREE_CODE (field
) != FIELD_DECL
)
9714 sub_count
= aapcs_vfp_sub_candidate (TREE_TYPE (field
), modep
);
9717 count
= count
> sub_count
? count
: sub_count
;
9720 /* There must be no padding. */
9721 if (wi::ne_p (TYPE_SIZE (type
), count
* GET_MODE_BITSIZE (*modep
)))
9734 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
9735 type as described in AAPCS64 \S 4.1.2.
9737 See the comment above aarch64_composite_type_p for the notes on MODE. */
9740 aarch64_short_vector_p (const_tree type
,
9743 HOST_WIDE_INT size
= -1;
9745 if (type
&& TREE_CODE (type
) == VECTOR_TYPE
)
9746 size
= int_size_in_bytes (type
);
9747 else if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
9748 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
9749 size
= GET_MODE_SIZE (mode
);
9751 return (size
== 8 || size
== 16);
9754 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
9755 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
9756 array types. The C99 floating-point complex types are also considered
9757 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
9758 types, which are GCC extensions and out of the scope of AAPCS64, are
9759 treated as composite types here as well.
9761 Note that MODE itself is not sufficient in determining whether a type
9762 is such a composite type or not. This is because
9763 stor-layout.c:compute_record_mode may have already changed the MODE
9764 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
9765 structure with only one field may have its MODE set to the mode of the
9766 field. Also an integer mode whose size matches the size of the
9767 RECORD_TYPE type may be used to substitute the original mode
9768 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
9769 solely relied on. */
9772 aarch64_composite_type_p (const_tree type
,
9775 if (aarch64_short_vector_p (type
, mode
))
9778 if (type
&& (AGGREGATE_TYPE_P (type
) || TREE_CODE (type
) == COMPLEX_TYPE
))
9782 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
9783 || GET_MODE_CLASS (mode
) == MODE_COMPLEX_INT
)
9789 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
9790 shall be passed or returned in simd/fp register(s) (providing these
9791 parameter passing registers are available).
9793 Upon successful return, *COUNT returns the number of needed registers,
9794 *BASE_MODE returns the mode of the individual register and when IS_HAF
9795 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
9796 floating-point aggregate or a homogeneous short-vector aggregate. */
9799 aarch64_vfp_is_call_or_return_candidate (machine_mode mode
,
9801 machine_mode
*base_mode
,
9805 machine_mode new_mode
= VOIDmode
;
9806 bool composite_p
= aarch64_composite_type_p (type
, mode
);
9808 if (is_ha
!= NULL
) *is_ha
= false;
9810 if ((!composite_p
&& GET_MODE_CLASS (mode
) == MODE_FLOAT
)
9811 || aarch64_short_vector_p (type
, mode
))
9816 else if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
9818 if (is_ha
!= NULL
) *is_ha
= true;
9820 new_mode
= GET_MODE_INNER (mode
);
9822 else if (type
&& composite_p
)
9824 int ag_count
= aapcs_vfp_sub_candidate (type
, &new_mode
);
9826 if (ag_count
> 0 && ag_count
<= HA_MAX_NUM_FLDS
)
9828 if (is_ha
!= NULL
) *is_ha
= true;
9837 *base_mode
= new_mode
;
9841 /* Implement TARGET_STRUCT_VALUE_RTX. */
9844 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED
,
9845 int incoming ATTRIBUTE_UNUSED
)
9847 return gen_rtx_REG (Pmode
, AARCH64_STRUCT_VALUE_REGNUM
);
9850 /* Implements target hook vector_mode_supported_p. */
9852 aarch64_vector_mode_supported_p (machine_mode mode
)
9855 && (mode
== V4SImode
|| mode
== V8HImode
9856 || mode
== V16QImode
|| mode
== V2DImode
9857 || mode
== V2SImode
|| mode
== V4HImode
9858 || mode
== V8QImode
|| mode
== V2SFmode
9859 || mode
== V4SFmode
|| mode
== V2DFmode
9860 || mode
== V4HFmode
|| mode
== V8HFmode
9861 || mode
== V1DFmode
))
9867 /* Return appropriate SIMD container
9868 for MODE within a vector of WIDTH bits. */
9870 aarch64_simd_container_mode (machine_mode mode
, unsigned width
)
9872 gcc_assert (width
== 64 || width
== 128);
9911 /* Return 128-bit container as the preferred SIMD mode for MODE. */
9913 aarch64_preferred_simd_mode (machine_mode mode
)
9915 return aarch64_simd_container_mode (mode
, 128);
9918 /* Return the bitmask of possible vector sizes for the vectorizer
9921 aarch64_autovectorize_vector_sizes (void)
9926 /* Implement TARGET_MANGLE_TYPE. */
9929 aarch64_mangle_type (const_tree type
)
9931 /* The AArch64 ABI documents say that "__va_list" has to be
9932 managled as if it is in the "std" namespace. */
9933 if (lang_hooks
.types_compatible_p (CONST_CAST_TREE (type
), va_list_type
))
9934 return "St9__va_list";
9936 /* Half-precision float. */
9937 if (TREE_CODE (type
) == REAL_TYPE
&& TYPE_PRECISION (type
) == 16)
9940 /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for
9942 if (TYPE_NAME (type
) != NULL
)
9943 return aarch64_mangle_builtin_type (type
);
9945 /* Use the default mangling. */
9950 /* Return true if the rtx_insn contains a MEM RTX somewhere
9954 has_memory_op (rtx_insn
*mem_insn
)
9956 subrtx_iterator::array_type array
;
9957 FOR_EACH_SUBRTX (iter
, array
, PATTERN (mem_insn
), ALL
)
9964 /* Find the first rtx_insn before insn that will generate an assembly
9968 aarch64_prev_real_insn (rtx_insn
*insn
)
9975 insn
= prev_real_insn (insn
);
9977 while (insn
&& recog_memoized (insn
) < 0);
9983 is_madd_op (enum attr_type t1
)
9986 /* A number of these may be AArch32 only. */
9987 enum attr_type mlatypes
[] = {
9988 TYPE_MLA
, TYPE_MLAS
, TYPE_SMLAD
, TYPE_SMLADX
, TYPE_SMLAL
, TYPE_SMLALD
,
9989 TYPE_SMLALS
, TYPE_SMLALXY
, TYPE_SMLAWX
, TYPE_SMLAWY
, TYPE_SMLAXY
,
9990 TYPE_SMMLA
, TYPE_UMLAL
, TYPE_UMLALS
,TYPE_SMLSD
, TYPE_SMLSDX
, TYPE_SMLSLD
9993 for (i
= 0; i
< sizeof (mlatypes
) / sizeof (enum attr_type
); i
++)
9995 if (t1
== mlatypes
[i
])
10002 /* Check if there is a register dependency between a load and the insn
10003 for which we hold recog_data. */
10006 dep_between_memop_and_curr (rtx memop
)
10011 gcc_assert (GET_CODE (memop
) == SET
);
10013 if (!REG_P (SET_DEST (memop
)))
10016 load_reg
= SET_DEST (memop
);
10017 for (opno
= 1; opno
< recog_data
.n_operands
; opno
++)
10019 rtx operand
= recog_data
.operand
[opno
];
10020 if (REG_P (operand
)
10021 && reg_overlap_mentioned_p (load_reg
, operand
))
10029 /* When working around the Cortex-A53 erratum 835769,
10030 given rtx_insn INSN, return true if it is a 64-bit multiply-accumulate
10031 instruction and has a preceding memory instruction such that a NOP
10032 should be inserted between them. */
10035 aarch64_madd_needs_nop (rtx_insn
* insn
)
10037 enum attr_type attr_type
;
10041 if (!TARGET_FIX_ERR_A53_835769
)
10044 if (recog_memoized (insn
) < 0)
10047 attr_type
= get_attr_type (insn
);
10048 if (!is_madd_op (attr_type
))
10051 prev
= aarch64_prev_real_insn (insn
);
10052 /* aarch64_prev_real_insn can call recog_memoized on insns other than INSN.
10053 Restore recog state to INSN to avoid state corruption. */
10054 extract_constrain_insn_cached (insn
);
10056 if (!prev
|| !has_memory_op (prev
))
10059 body
= single_set (prev
);
10061 /* If the previous insn is a memory op and there is no dependency between
10062 it and the DImode madd, emit a NOP between them. If body is NULL then we
10063 have a complex memory operation, probably a load/store pair.
10064 Be conservative for now and emit a NOP. */
10065 if (GET_MODE (recog_data
.operand
[0]) == DImode
10066 && (!body
|| !dep_between_memop_and_curr (body
)))
10074 /* Implement FINAL_PRESCAN_INSN. */
10077 aarch64_final_prescan_insn (rtx_insn
*insn
)
10079 if (aarch64_madd_needs_nop (insn
))
10080 fprintf (asm_out_file
, "\tnop // between mem op and mult-accumulate\n");
10084 /* Return the equivalent letter for size. */
10086 sizetochar (int size
)
10090 case 64: return 'd';
10091 case 32: return 's';
10092 case 16: return 'h';
10093 case 8 : return 'b';
10094 default: gcc_unreachable ();
10098 /* Return true iff x is a uniform vector of floating-point
10099 constants, and the constant can be represented in
10100 quarter-precision form. Note, as aarch64_float_const_representable
10101 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
10103 aarch64_vect_float_const_representable_p (rtx x
)
10106 return (GET_MODE_CLASS (GET_MODE (x
)) == MODE_VECTOR_FLOAT
10107 && const_vec_duplicate_p (x
, &elt
)
10108 && aarch64_float_const_representable_p (elt
));
10111 /* Return true for valid and false for invalid. */
10113 aarch64_simd_valid_immediate (rtx op
, machine_mode mode
, bool inverse
,
10114 struct simd_immediate_info
*info
)
10116 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
10118 for (i = 0; i < idx; i += (STRIDE)) \
10123 immtype = (CLASS); \
10124 elsize = (ELSIZE); \
10125 eshift = (SHIFT); \
10130 unsigned int i
, elsize
= 0, idx
= 0, n_elts
= CONST_VECTOR_NUNITS (op
);
10131 unsigned int innersize
= GET_MODE_UNIT_SIZE (mode
);
10132 unsigned char bytes
[16];
10133 int immtype
= -1, matches
;
10134 unsigned int invmask
= inverse
? 0xff : 0;
10137 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
)
10139 if (! (aarch64_simd_imm_zero_p (op
, mode
)
10140 || aarch64_vect_float_const_representable_p (op
)))
10145 info
->value
= CONST_VECTOR_ELT (op
, 0);
10146 info
->element_width
= GET_MODE_BITSIZE (GET_MODE (info
->value
));
10154 /* Splat vector constant out into a byte vector. */
10155 for (i
= 0; i
< n_elts
; i
++)
10157 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
10158 it must be laid out in the vector register in reverse order. */
10159 rtx el
= CONST_VECTOR_ELT (op
, BYTES_BIG_ENDIAN
? (n_elts
- 1 - i
) : i
);
10160 unsigned HOST_WIDE_INT elpart
;
10161 unsigned int part
, parts
;
10163 if (CONST_INT_P (el
))
10165 elpart
= INTVAL (el
);
10168 else if (GET_CODE (el
) == CONST_DOUBLE
)
10170 elpart
= CONST_DOUBLE_LOW (el
);
10174 gcc_unreachable ();
10176 for (part
= 0; part
< parts
; part
++)
10179 for (byte
= 0; byte
< innersize
; byte
++)
10181 bytes
[idx
++] = (elpart
& 0xff) ^ invmask
;
10182 elpart
>>= BITS_PER_UNIT
;
10184 if (GET_CODE (el
) == CONST_DOUBLE
)
10185 elpart
= CONST_DOUBLE_HIGH (el
);
10189 /* Sanity check. */
10190 gcc_assert (idx
== GET_MODE_SIZE (mode
));
10194 CHECK (4, 32, 0, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0
10195 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 0, 0);
10197 CHECK (4, 32, 1, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
10198 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
10200 CHECK (4, 32, 2, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
10201 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
10203 CHECK (4, 32, 3, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
10204 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == bytes
[3], 24, 0);
10206 CHECK (2, 16, 4, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0, 0, 0);
10208 CHECK (2, 16, 5, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1], 8, 0);
10210 CHECK (4, 32, 6, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff
10211 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 0, 1);
10213 CHECK (4, 32, 7, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
10214 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
10216 CHECK (4, 32, 8, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
10217 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
10219 CHECK (4, 32, 9, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
10220 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == bytes
[3], 24, 1);
10222 CHECK (2, 16, 10, bytes
[i
] == bytes
[0] && bytes
[i
+ 1] == 0xff, 0, 1);
10224 CHECK (2, 16, 11, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1], 8, 1);
10226 CHECK (4, 32, 12, bytes
[i
] == 0xff && bytes
[i
+ 1] == bytes
[1]
10227 && bytes
[i
+ 2] == 0 && bytes
[i
+ 3] == 0, 8, 0);
10229 CHECK (4, 32, 13, bytes
[i
] == 0 && bytes
[i
+ 1] == bytes
[1]
10230 && bytes
[i
+ 2] == 0xff && bytes
[i
+ 3] == 0xff, 8, 1);
10232 CHECK (4, 32, 14, bytes
[i
] == 0xff && bytes
[i
+ 1] == 0xff
10233 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0, 16, 0);
10235 CHECK (4, 32, 15, bytes
[i
] == 0 && bytes
[i
+ 1] == 0
10236 && bytes
[i
+ 2] == bytes
[2] && bytes
[i
+ 3] == 0xff, 16, 1);
10238 CHECK (1, 8, 16, bytes
[i
] == bytes
[0], 0, 0);
10240 CHECK (1, 64, 17, (bytes
[i
] == 0 || bytes
[i
] == 0xff)
10241 && bytes
[i
] == bytes
[(i
+ 8) % idx
], 0, 0);
10250 info
->element_width
= elsize
;
10251 info
->mvn
= emvn
!= 0;
10252 info
->shift
= eshift
;
10254 unsigned HOST_WIDE_INT imm
= 0;
10256 if (immtype
>= 12 && immtype
<= 15)
10259 /* Un-invert bytes of recognized vector, if necessary. */
10261 for (i
= 0; i
< idx
; i
++)
10262 bytes
[i
] ^= invmask
;
10266 /* FIXME: Broken on 32-bit H_W_I hosts. */
10267 gcc_assert (sizeof (HOST_WIDE_INT
) == 8);
10269 for (i
= 0; i
< 8; i
++)
10270 imm
|= (unsigned HOST_WIDE_INT
) (bytes
[i
] ? 0xff : 0)
10271 << (i
* BITS_PER_UNIT
);
10274 info
->value
= GEN_INT (imm
);
10278 for (i
= 0; i
< elsize
/ BITS_PER_UNIT
; i
++)
10279 imm
|= (unsigned HOST_WIDE_INT
) bytes
[i
] << (i
* BITS_PER_UNIT
);
10281 /* Construct 'abcdefgh' because the assembler cannot handle
10282 generic constants. */
10285 imm
= (imm
>> info
->shift
) & 0xff;
10286 info
->value
= GEN_INT (imm
);
10294 /* Check of immediate shift constants are within range. */
10296 aarch64_simd_shift_imm_p (rtx x
, machine_mode mode
, bool left
)
10298 int bit_width
= GET_MODE_UNIT_SIZE (mode
) * BITS_PER_UNIT
;
10300 return aarch64_const_vec_all_same_in_range_p (x
, 0, bit_width
- 1);
10302 return aarch64_const_vec_all_same_in_range_p (x
, 1, bit_width
);
10305 /* Return true if X is a uniform vector where all elements
10306 are either the floating-point constant 0.0 or the
10307 integer constant 0. */
10309 aarch64_simd_imm_zero_p (rtx x
, machine_mode mode
)
10311 return x
== CONST0_RTX (mode
);
10315 aarch64_simd_imm_scalar_p (rtx x
, machine_mode mode ATTRIBUTE_UNUSED
)
10317 HOST_WIDE_INT imm
= INTVAL (x
);
10320 for (i
= 0; i
< 8; i
++)
10322 unsigned int byte
= imm
& 0xff;
10323 if (byte
!= 0xff && byte
!= 0)
10332 aarch64_mov_operand_p (rtx x
, machine_mode mode
)
10334 if (GET_CODE (x
) == HIGH
10335 && aarch64_valid_symref (XEXP (x
, 0), GET_MODE (XEXP (x
, 0))))
10338 if (CONST_INT_P (x
))
10341 if (GET_CODE (x
) == SYMBOL_REF
&& mode
== DImode
&& CONSTANT_ADDRESS_P (x
))
10344 return aarch64_classify_symbolic_expression (x
)
10345 == SYMBOL_TINY_ABSOLUTE
;
10348 /* Return a const_int vector of VAL. */
10350 aarch64_simd_gen_const_vector_dup (machine_mode mode
, int val
)
10352 int nunits
= GET_MODE_NUNITS (mode
);
10353 rtvec v
= rtvec_alloc (nunits
);
10356 for (i
=0; i
< nunits
; i
++)
10357 RTVEC_ELT (v
, i
) = GEN_INT (val
);
10359 return gen_rtx_CONST_VECTOR (mode
, v
);
10362 /* Check OP is a legal scalar immediate for the MOVI instruction. */
10365 aarch64_simd_scalar_immediate_valid_for_move (rtx op
, machine_mode mode
)
10367 machine_mode vmode
;
10369 gcc_assert (!VECTOR_MODE_P (mode
));
10370 vmode
= aarch64_preferred_simd_mode (mode
);
10371 rtx op_v
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (op
));
10372 return aarch64_simd_valid_immediate (op_v
, vmode
, false, NULL
);
10375 /* Construct and return a PARALLEL RTX vector with elements numbering the
10376 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
10377 the vector - from the perspective of the architecture. This does not
10378 line up with GCC's perspective on lane numbers, so we end up with
10379 different masks depending on our target endian-ness. The diagram
10380 below may help. We must draw the distinction when building masks
10381 which select one half of the vector. An instruction selecting
10382 architectural low-lanes for a big-endian target, must be described using
10383 a mask selecting GCC high-lanes.
10385 Big-Endian Little-Endian
10387 GCC 0 1 2 3 3 2 1 0
10388 | x | x | x | x | | x | x | x | x |
10389 Architecture 3 2 1 0 3 2 1 0
10391 Low Mask: { 2, 3 } { 0, 1 }
10392 High Mask: { 0, 1 } { 2, 3 }
10396 aarch64_simd_vect_par_cnst_half (machine_mode mode
, bool high
)
10398 int nunits
= GET_MODE_NUNITS (mode
);
10399 rtvec v
= rtvec_alloc (nunits
/ 2);
10400 int high_base
= nunits
/ 2;
10406 if (BYTES_BIG_ENDIAN
)
10407 base
= high
? low_base
: high_base
;
10409 base
= high
? high_base
: low_base
;
10411 for (i
= 0; i
< nunits
/ 2; i
++)
10412 RTVEC_ELT (v
, i
) = GEN_INT (base
+ i
);
10414 t1
= gen_rtx_PARALLEL (mode
, v
);
10418 /* Check OP for validity as a PARALLEL RTX vector with elements
10419 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
10420 from the perspective of the architecture. See the diagram above
10421 aarch64_simd_vect_par_cnst_half for more details. */
10424 aarch64_simd_check_vect_par_cnst_half (rtx op
, machine_mode mode
,
10427 rtx ideal
= aarch64_simd_vect_par_cnst_half (mode
, high
);
10428 HOST_WIDE_INT count_op
= XVECLEN (op
, 0);
10429 HOST_WIDE_INT count_ideal
= XVECLEN (ideal
, 0);
10432 if (!VECTOR_MODE_P (mode
))
10435 if (count_op
!= count_ideal
)
10438 for (i
= 0; i
< count_ideal
; i
++)
10440 rtx elt_op
= XVECEXP (op
, 0, i
);
10441 rtx elt_ideal
= XVECEXP (ideal
, 0, i
);
10443 if (!CONST_INT_P (elt_op
)
10444 || INTVAL (elt_ideal
) != INTVAL (elt_op
))
10450 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
10451 HIGH (exclusive). */
10453 aarch64_simd_lane_bounds (rtx operand
, HOST_WIDE_INT low
, HOST_WIDE_INT high
,
10456 HOST_WIDE_INT lane
;
10457 gcc_assert (CONST_INT_P (operand
));
10458 lane
= INTVAL (operand
);
10460 if (lane
< low
|| lane
>= high
)
10463 error ("%Klane %wd out of range %wd - %wd", exp
, lane
, low
, high
- 1);
10465 error ("lane %wd out of range %wd - %wd", lane
, low
, high
- 1);
10469 /* Return TRUE if OP is a valid vector addressing mode. */
10471 aarch64_simd_mem_operand_p (rtx op
)
10473 return MEM_P (op
) && (GET_CODE (XEXP (op
, 0)) == POST_INC
10474 || REG_P (XEXP (op
, 0)));
10477 /* Emit a register copy from operand to operand, taking care not to
10478 early-clobber source registers in the process.
10480 COUNT is the number of components into which the copy needs to be
10483 aarch64_simd_emit_reg_reg_move (rtx
*operands
, enum machine_mode mode
,
10484 unsigned int count
)
10487 int rdest
= REGNO (operands
[0]);
10488 int rsrc
= REGNO (operands
[1]);
10490 if (!reg_overlap_mentioned_p (operands
[0], operands
[1])
10492 for (i
= 0; i
< count
; i
++)
10493 emit_move_insn (gen_rtx_REG (mode
, rdest
+ i
),
10494 gen_rtx_REG (mode
, rsrc
+ i
));
10496 for (i
= 0; i
< count
; i
++)
10497 emit_move_insn (gen_rtx_REG (mode
, rdest
+ count
- i
- 1),
10498 gen_rtx_REG (mode
, rsrc
+ count
- i
- 1));
10501 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
10502 one of VSTRUCT modes: OI, CI or XI. */
10504 aarch64_simd_attr_length_move (rtx_insn
*insn
)
10508 extract_insn_cached (insn
);
10510 if (REG_P (recog_data
.operand
[0]) && REG_P (recog_data
.operand
[1]))
10512 mode
= GET_MODE (recog_data
.operand
[0]);
10522 gcc_unreachable ();
10528 /* Compute and return the length of aarch64_simd_reglist<mode>, where <mode> is
10529 one of VSTRUCT modes: OI, CI, or XI. */
10531 aarch64_simd_attr_length_rglist (enum machine_mode mode
)
10533 return (GET_MODE_SIZE (mode
) / UNITS_PER_VREG
) * 4;
10536 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
10537 alignment of a vector to 128 bits. */
10538 static HOST_WIDE_INT
10539 aarch64_simd_vector_alignment (const_tree type
)
10541 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
10542 return MIN (align
, 128);
10545 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
10547 aarch64_simd_vector_alignment_reachable (const_tree type
, bool is_packed
)
10552 /* We guarantee alignment for vectors up to 128-bits. */
10553 if (tree_int_cst_compare (TYPE_SIZE (type
),
10554 bitsize_int (BIGGEST_ALIGNMENT
)) > 0)
10557 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
10561 /* If VALS is a vector constant that can be loaded into a register
10562 using DUP, generate instructions to do so and return an RTX to
10563 assign to the register. Otherwise return NULL_RTX. */
10565 aarch64_simd_dup_constant (rtx vals
)
10567 machine_mode mode
= GET_MODE (vals
);
10568 machine_mode inner_mode
= GET_MODE_INNER (mode
);
10571 if (!const_vec_duplicate_p (vals
, &x
))
10574 /* We can load this constant by using DUP and a constant in a
10575 single ARM register. This will be cheaper than a vector
10577 x
= copy_to_mode_reg (inner_mode
, x
);
10578 return gen_rtx_VEC_DUPLICATE (mode
, x
);
10582 /* Generate code to load VALS, which is a PARALLEL containing only
10583 constants (for vec_init) or CONST_VECTOR, efficiently into a
10584 register. Returns an RTX to copy into the register, or NULL_RTX
10585 for a PARALLEL that can not be converted into a CONST_VECTOR. */
10587 aarch64_simd_make_constant (rtx vals
)
10589 machine_mode mode
= GET_MODE (vals
);
10591 rtx const_vec
= NULL_RTX
;
10592 int n_elts
= GET_MODE_NUNITS (mode
);
10596 if (GET_CODE (vals
) == CONST_VECTOR
)
10598 else if (GET_CODE (vals
) == PARALLEL
)
10600 /* A CONST_VECTOR must contain only CONST_INTs and
10601 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
10602 Only store valid constants in a CONST_VECTOR. */
10603 for (i
= 0; i
< n_elts
; ++i
)
10605 rtx x
= XVECEXP (vals
, 0, i
);
10606 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
10609 if (n_const
== n_elts
)
10610 const_vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
10613 gcc_unreachable ();
10615 if (const_vec
!= NULL_RTX
10616 && aarch64_simd_valid_immediate (const_vec
, mode
, false, NULL
))
10617 /* Load using MOVI/MVNI. */
10619 else if ((const_dup
= aarch64_simd_dup_constant (vals
)) != NULL_RTX
)
10620 /* Loaded using DUP. */
10622 else if (const_vec
!= NULL_RTX
)
10623 /* Load from constant pool. We can not take advantage of single-cycle
10624 LD1 because we need a PC-relative addressing mode. */
10627 /* A PARALLEL containing something not valid inside CONST_VECTOR.
10628 We can not construct an initializer. */
10633 aarch64_expand_vector_init (rtx target
, rtx vals
)
10635 machine_mode mode
= GET_MODE (target
);
10636 machine_mode inner_mode
= GET_MODE_INNER (mode
);
10637 int n_elts
= GET_MODE_NUNITS (mode
);
10639 rtx any_const
= NULL_RTX
;
10640 bool all_same
= true;
10642 for (int i
= 0; i
< n_elts
; ++i
)
10644 rtx x
= XVECEXP (vals
, 0, i
);
10645 if (!CONST_INT_P (x
) && !CONST_DOUBLE_P (x
))
10650 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
10656 rtx constant
= aarch64_simd_make_constant (vals
);
10657 if (constant
!= NULL_RTX
)
10659 emit_move_insn (target
, constant
);
10664 /* Splat a single non-constant element if we can. */
10667 rtx x
= copy_to_mode_reg (inner_mode
, XVECEXP (vals
, 0, 0));
10668 aarch64_emit_move (target
, gen_rtx_VEC_DUPLICATE (mode
, x
));
10672 /* Half the fields (or less) are non-constant. Load constant then overwrite
10673 varying fields. Hope that this is more efficient than using the stack. */
10674 if (n_var
<= n_elts
/2)
10676 rtx copy
= copy_rtx (vals
);
10678 /* Load constant part of vector. We really don't care what goes into the
10679 parts we will overwrite, but we're more likely to be able to load the
10680 constant efficiently if it has fewer, larger, repeating parts
10681 (see aarch64_simd_valid_immediate). */
10682 for (int i
= 0; i
< n_elts
; i
++)
10684 rtx x
= XVECEXP (vals
, 0, i
);
10685 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
10687 rtx subst
= any_const
;
10688 for (int bit
= n_elts
/ 2; bit
> 0; bit
/= 2)
10690 /* Look in the copied vector, as more elements are const. */
10691 rtx test
= XVECEXP (copy
, 0, i
^ bit
);
10692 if (CONST_INT_P (test
) || CONST_DOUBLE_P (test
))
10698 XVECEXP (copy
, 0, i
) = subst
;
10700 aarch64_expand_vector_init (target
, copy
);
10702 /* Insert variables. */
10703 enum insn_code icode
= optab_handler (vec_set_optab
, mode
);
10704 gcc_assert (icode
!= CODE_FOR_nothing
);
10706 for (int i
= 0; i
< n_elts
; i
++)
10708 rtx x
= XVECEXP (vals
, 0, i
);
10709 if (CONST_INT_P (x
) || CONST_DOUBLE_P (x
))
10711 x
= copy_to_mode_reg (inner_mode
, x
);
10712 emit_insn (GEN_FCN (icode
) (target
, x
, GEN_INT (i
)));
10717 /* Construct the vector in memory one field at a time
10718 and load the whole vector. */
10719 rtx mem
= assign_stack_temp (mode
, GET_MODE_SIZE (mode
));
10720 for (int i
= 0; i
< n_elts
; i
++)
10721 emit_move_insn (adjust_address_nv (mem
, inner_mode
,
10722 i
* GET_MODE_SIZE (inner_mode
)),
10723 XVECEXP (vals
, 0, i
));
10724 emit_move_insn (target
, mem
);
10728 static unsigned HOST_WIDE_INT
10729 aarch64_shift_truncation_mask (machine_mode mode
)
10732 (aarch64_vector_mode_supported_p (mode
)
10733 || aarch64_vect_struct_mode_p (mode
)) ? 0 : (GET_MODE_BITSIZE (mode
) - 1);
10736 /* Select a format to encode pointers in exception handling data. */
10738 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED
, int global
)
10741 switch (aarch64_cmodel
)
10743 case AARCH64_CMODEL_TINY
:
10744 case AARCH64_CMODEL_TINY_PIC
:
10745 case AARCH64_CMODEL_SMALL
:
10746 case AARCH64_CMODEL_SMALL_PIC
:
10747 case AARCH64_CMODEL_SMALL_SPIC
:
10748 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
10750 type
= DW_EH_PE_sdata4
;
10753 /* No assumptions here. 8-byte relocs required. */
10754 type
= DW_EH_PE_sdata8
;
10757 return (global
? DW_EH_PE_indirect
: 0) | DW_EH_PE_pcrel
| type
;
10760 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
10761 by the function fndecl. */
10764 aarch64_declare_function_name (FILE *stream
, const char* name
,
10767 tree target_parts
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
10769 struct cl_target_option
*targ_options
;
10771 targ_options
= TREE_TARGET_OPTION (target_parts
);
10773 targ_options
= TREE_TARGET_OPTION (target_option_current_node
);
10774 gcc_assert (targ_options
);
10776 const struct processor
*this_arch
10777 = aarch64_get_arch (targ_options
->x_explicit_arch
);
10779 unsigned long isa_flags
= targ_options
->x_aarch64_isa_flags
;
10780 std::string extension
10781 = aarch64_get_extension_string_for_isa_flags (isa_flags
);
10782 asm_fprintf (asm_out_file
, "\t.arch %s%s\n",
10783 this_arch
->name
, extension
.c_str ());
10785 /* Print the cpu name we're tuning for in the comments, might be
10786 useful to readers of the generated asm. */
10788 const struct processor
*this_tune
10789 = aarch64_get_tune_cpu (targ_options
->x_explicit_tune_core
);
10791 asm_fprintf (asm_out_file
, "\t" ASM_COMMENT_START
".tune %s\n",
10794 /* Don't forget the type directive for ELF. */
10795 ASM_OUTPUT_TYPE_DIRECTIVE (stream
, name
, "function");
10796 ASM_OUTPUT_LABEL (stream
, name
);
10799 /* Emit load exclusive. */
10802 aarch64_emit_load_exclusive (machine_mode mode
, rtx rval
,
10803 rtx mem
, rtx model_rtx
)
10805 rtx (*gen
) (rtx
, rtx
, rtx
);
10809 case QImode
: gen
= gen_aarch64_load_exclusiveqi
; break;
10810 case HImode
: gen
= gen_aarch64_load_exclusivehi
; break;
10811 case SImode
: gen
= gen_aarch64_load_exclusivesi
; break;
10812 case DImode
: gen
= gen_aarch64_load_exclusivedi
; break;
10814 gcc_unreachable ();
10817 emit_insn (gen (rval
, mem
, model_rtx
));
10820 /* Emit store exclusive. */
10823 aarch64_emit_store_exclusive (machine_mode mode
, rtx bval
,
10824 rtx rval
, rtx mem
, rtx model_rtx
)
10826 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
10830 case QImode
: gen
= gen_aarch64_store_exclusiveqi
; break;
10831 case HImode
: gen
= gen_aarch64_store_exclusivehi
; break;
10832 case SImode
: gen
= gen_aarch64_store_exclusivesi
; break;
10833 case DImode
: gen
= gen_aarch64_store_exclusivedi
; break;
10835 gcc_unreachable ();
10838 emit_insn (gen (bval
, rval
, mem
, model_rtx
));
10841 /* Mark the previous jump instruction as unlikely. */
10844 aarch64_emit_unlikely_jump (rtx insn
)
10846 int very_unlikely
= REG_BR_PROB_BASE
/ 100 - 1;
10848 insn
= emit_jump_insn (insn
);
10849 add_int_reg_note (insn
, REG_BR_PROB
, very_unlikely
);
10852 /* Expand a compare and swap pattern. */
10855 aarch64_expand_compare_and_swap (rtx operands
[])
10857 rtx bval
, rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
, x
;
10858 machine_mode mode
, cmp_mode
;
10859 typedef rtx (*gen_cas_fn
) (rtx
, rtx
, rtx
, rtx
, rtx
, rtx
, rtx
);
10862 const gen_cas_fn split_cas
[] =
10864 gen_aarch64_compare_and_swapqi
,
10865 gen_aarch64_compare_and_swaphi
,
10866 gen_aarch64_compare_and_swapsi
,
10867 gen_aarch64_compare_and_swapdi
10869 const gen_cas_fn atomic_cas
[] =
10871 gen_aarch64_compare_and_swapqi_lse
,
10872 gen_aarch64_compare_and_swaphi_lse
,
10873 gen_aarch64_compare_and_swapsi_lse
,
10874 gen_aarch64_compare_and_swapdi_lse
10877 bval
= operands
[0];
10878 rval
= operands
[1];
10880 oldval
= operands
[3];
10881 newval
= operands
[4];
10882 is_weak
= operands
[5];
10883 mod_s
= operands
[6];
10884 mod_f
= operands
[7];
10885 mode
= GET_MODE (mem
);
10888 /* Normally the succ memory model must be stronger than fail, but in the
10889 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
10890 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
10892 if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f
)))
10893 && is_mm_release (memmodel_from_int (INTVAL (mod_s
))))
10894 mod_s
= GEN_INT (MEMMODEL_ACQ_REL
);
10900 /* For short modes, we're going to perform the comparison in SImode,
10901 so do the zero-extension now. */
10903 rval
= gen_reg_rtx (SImode
);
10904 oldval
= convert_modes (SImode
, mode
, oldval
, true);
10905 /* Fall through. */
10909 /* Force the value into a register if needed. */
10910 if (!aarch64_plus_operand (oldval
, mode
))
10911 oldval
= force_reg (cmp_mode
, oldval
);
10915 gcc_unreachable ();
10920 case QImode
: idx
= 0; break;
10921 case HImode
: idx
= 1; break;
10922 case SImode
: idx
= 2; break;
10923 case DImode
: idx
= 3; break;
10925 gcc_unreachable ();
10928 gen
= atomic_cas
[idx
];
10930 gen
= split_cas
[idx
];
10932 emit_insn (gen (rval
, mem
, oldval
, newval
, is_weak
, mod_s
, mod_f
));
10934 if (mode
== QImode
|| mode
== HImode
)
10935 emit_move_insn (operands
[1], gen_lowpart (mode
, rval
));
10937 x
= gen_rtx_REG (CCmode
, CC_REGNUM
);
10938 x
= gen_rtx_EQ (SImode
, x
, const0_rtx
);
10939 emit_insn (gen_rtx_SET (bval
, x
));
10942 /* Test whether the target supports using a atomic load-operate instruction.
10943 CODE is the operation and AFTER is TRUE if the data in memory after the
10944 operation should be returned and FALSE if the data before the operation
10945 should be returned. Returns FALSE if the operation isn't supported by the
10949 aarch64_atomic_ldop_supported_p (enum rtx_code code
)
10968 /* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
10969 sequence implementing an atomic operation. */
10972 aarch64_emit_post_barrier (enum memmodel model
)
10974 const enum memmodel base_model
= memmodel_base (model
);
10976 if (is_mm_sync (model
)
10977 && (base_model
== MEMMODEL_ACQUIRE
10978 || base_model
== MEMMODEL_ACQ_REL
10979 || base_model
== MEMMODEL_SEQ_CST
))
10981 emit_insn (gen_mem_thread_fence (GEN_INT (MEMMODEL_SEQ_CST
)));
10985 /* Emit an atomic compare-and-swap operation. RVAL is the destination register
10986 for the data in memory. EXPECTED is the value expected to be in memory.
10987 DESIRED is the value to store to memory. MEM is the memory location. MODEL
10988 is the memory ordering to use. */
10991 aarch64_gen_atomic_cas (rtx rval
, rtx mem
,
10992 rtx expected
, rtx desired
,
10995 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
10998 mode
= GET_MODE (mem
);
11002 case QImode
: gen
= gen_aarch64_atomic_casqi
; break;
11003 case HImode
: gen
= gen_aarch64_atomic_cashi
; break;
11004 case SImode
: gen
= gen_aarch64_atomic_cassi
; break;
11005 case DImode
: gen
= gen_aarch64_atomic_casdi
; break;
11007 gcc_unreachable ();
11010 /* Move the expected value into the CAS destination register. */
11011 emit_insn (gen_rtx_SET (rval
, expected
));
11013 /* Emit the CAS. */
11014 emit_insn (gen (rval
, mem
, desired
, model
));
11016 /* Compare the expected value with the value loaded by the CAS, to establish
11017 whether the swap was made. */
11018 aarch64_gen_compare_reg (EQ
, rval
, expected
);
11021 /* Split a compare and swap pattern. */
11024 aarch64_split_compare_and_swap (rtx operands
[])
11026 rtx rval
, mem
, oldval
, newval
, scratch
;
11029 rtx_code_label
*label1
, *label2
;
11031 enum memmodel model
;
11034 rval
= operands
[0];
11036 oldval
= operands
[2];
11037 newval
= operands
[3];
11038 is_weak
= (operands
[4] != const0_rtx
);
11039 model_rtx
= operands
[5];
11040 scratch
= operands
[7];
11041 mode
= GET_MODE (mem
);
11042 model
= memmodel_from_int (INTVAL (model_rtx
));
11047 label1
= gen_label_rtx ();
11048 emit_label (label1
);
11050 label2
= gen_label_rtx ();
11052 /* The initial load can be relaxed for a __sync operation since a final
11053 barrier will be emitted to stop code hoisting. */
11054 if (is_mm_sync (model
))
11055 aarch64_emit_load_exclusive (mode
, rval
, mem
,
11056 GEN_INT (MEMMODEL_RELAXED
));
11058 aarch64_emit_load_exclusive (mode
, rval
, mem
, model_rtx
);
11060 cond
= aarch64_gen_compare_reg (NE
, rval
, oldval
);
11061 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
11062 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
11063 gen_rtx_LABEL_REF (Pmode
, label2
), pc_rtx
);
11064 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
11066 aarch64_emit_store_exclusive (mode
, scratch
, mem
, newval
, model_rtx
);
11070 x
= gen_rtx_NE (VOIDmode
, scratch
, const0_rtx
);
11071 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
11072 gen_rtx_LABEL_REF (Pmode
, label1
), pc_rtx
);
11073 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
11077 cond
= gen_rtx_REG (CCmode
, CC_REGNUM
);
11078 x
= gen_rtx_COMPARE (CCmode
, scratch
, const0_rtx
);
11079 emit_insn (gen_rtx_SET (cond
, x
));
11082 emit_label (label2
);
11084 /* Emit any final barrier needed for a __sync operation. */
11085 if (is_mm_sync (model
))
11086 aarch64_emit_post_barrier (model
);
11089 /* Emit a BIC instruction. */
11092 aarch64_emit_bic (machine_mode mode
, rtx dst
, rtx s1
, rtx s2
, int shift
)
11094 rtx shift_rtx
= GEN_INT (shift
);
11095 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
11099 case SImode
: gen
= gen_and_one_cmpl_lshrsi3
; break;
11100 case DImode
: gen
= gen_and_one_cmpl_lshrdi3
; break;
11102 gcc_unreachable ();
11105 emit_insn (gen (dst
, s2
, shift_rtx
, s1
));
11108 /* Emit an atomic swap. */
11111 aarch64_emit_atomic_swap (machine_mode mode
, rtx dst
, rtx value
,
11112 rtx mem
, rtx model
)
11114 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
11118 case QImode
: gen
= gen_aarch64_atomic_swpqi
; break;
11119 case HImode
: gen
= gen_aarch64_atomic_swphi
; break;
11120 case SImode
: gen
= gen_aarch64_atomic_swpsi
; break;
11121 case DImode
: gen
= gen_aarch64_atomic_swpdi
; break;
11123 gcc_unreachable ();
11126 emit_insn (gen (dst
, mem
, value
, model
));
11129 /* Operations supported by aarch64_emit_atomic_load_op. */
11131 enum aarch64_atomic_load_op_code
11133 AARCH64_LDOP_PLUS
, /* A + B */
11134 AARCH64_LDOP_XOR
, /* A ^ B */
11135 AARCH64_LDOP_OR
, /* A | B */
11136 AARCH64_LDOP_BIC
/* A & ~B */
11139 /* Emit an atomic load-operate. */
11142 aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code
,
11143 machine_mode mode
, rtx dst
, rtx src
,
11144 rtx mem
, rtx model
)
11146 typedef rtx (*aarch64_atomic_load_op_fn
) (rtx
, rtx
, rtx
, rtx
);
11147 const aarch64_atomic_load_op_fn plus
[] =
11149 gen_aarch64_atomic_loadaddqi
,
11150 gen_aarch64_atomic_loadaddhi
,
11151 gen_aarch64_atomic_loadaddsi
,
11152 gen_aarch64_atomic_loadadddi
11154 const aarch64_atomic_load_op_fn eor
[] =
11156 gen_aarch64_atomic_loadeorqi
,
11157 gen_aarch64_atomic_loadeorhi
,
11158 gen_aarch64_atomic_loadeorsi
,
11159 gen_aarch64_atomic_loadeordi
11161 const aarch64_atomic_load_op_fn ior
[] =
11163 gen_aarch64_atomic_loadsetqi
,
11164 gen_aarch64_atomic_loadsethi
,
11165 gen_aarch64_atomic_loadsetsi
,
11166 gen_aarch64_atomic_loadsetdi
11168 const aarch64_atomic_load_op_fn bic
[] =
11170 gen_aarch64_atomic_loadclrqi
,
11171 gen_aarch64_atomic_loadclrhi
,
11172 gen_aarch64_atomic_loadclrsi
,
11173 gen_aarch64_atomic_loadclrdi
11175 aarch64_atomic_load_op_fn gen
;
11180 case QImode
: idx
= 0; break;
11181 case HImode
: idx
= 1; break;
11182 case SImode
: idx
= 2; break;
11183 case DImode
: idx
= 3; break;
11185 gcc_unreachable ();
11190 case AARCH64_LDOP_PLUS
: gen
= plus
[idx
]; break;
11191 case AARCH64_LDOP_XOR
: gen
= eor
[idx
]; break;
11192 case AARCH64_LDOP_OR
: gen
= ior
[idx
]; break;
11193 case AARCH64_LDOP_BIC
: gen
= bic
[idx
]; break;
11195 gcc_unreachable ();
11198 emit_insn (gen (dst
, mem
, src
, model
));
11201 /* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
11202 location to store the data read from memory. OUT_RESULT is the location to
11203 store the result of the operation. MEM is the memory location to read and
11204 modify. MODEL_RTX is the memory ordering to use. VALUE is the second
11205 operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
11209 aarch64_gen_atomic_ldop (enum rtx_code code
, rtx out_data
, rtx out_result
,
11210 rtx mem
, rtx value
, rtx model_rtx
)
11212 machine_mode mode
= GET_MODE (mem
);
11213 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
11214 const bool short_mode
= (mode
< SImode
);
11215 aarch64_atomic_load_op_code ldop_code
;
11220 out_data
= gen_lowpart (mode
, out_data
);
11223 out_result
= gen_lowpart (mode
, out_result
);
11225 /* Make sure the value is in a register, putting it into a destination
11226 register if it needs to be manipulated. */
11227 if (!register_operand (value
, mode
)
11228 || code
== AND
|| code
== MINUS
)
11230 src
= out_result
? out_result
: out_data
;
11231 emit_move_insn (src
, gen_lowpart (mode
, value
));
11235 gcc_assert (register_operand (src
, mode
));
11237 /* Preprocess the data for the operation as necessary. If the operation is
11238 a SET then emit a swap instruction and finish. */
11242 aarch64_emit_atomic_swap (mode
, out_data
, src
, mem
, model_rtx
);
11246 /* Negate the value and treat it as a PLUS. */
11250 /* Resize the value if necessary. */
11252 src
= gen_lowpart (wmode
, src
);
11254 neg_src
= gen_rtx_NEG (wmode
, src
);
11255 emit_insn (gen_rtx_SET (src
, neg_src
));
11258 src
= gen_lowpart (mode
, src
);
11260 /* Fall-through. */
11262 ldop_code
= AARCH64_LDOP_PLUS
;
11266 ldop_code
= AARCH64_LDOP_OR
;
11270 ldop_code
= AARCH64_LDOP_XOR
;
11277 /* Resize the value if necessary. */
11279 src
= gen_lowpart (wmode
, src
);
11281 not_src
= gen_rtx_NOT (wmode
, src
);
11282 emit_insn (gen_rtx_SET (src
, not_src
));
11285 src
= gen_lowpart (mode
, src
);
11287 ldop_code
= AARCH64_LDOP_BIC
;
11291 /* The operation can't be done with atomic instructions. */
11292 gcc_unreachable ();
11295 aarch64_emit_atomic_load_op (ldop_code
, mode
, out_data
, src
, mem
, model_rtx
);
11297 /* If necessary, calculate the data in memory after the update by redoing the
11298 operation from values in registers. */
11304 src
= gen_lowpart (wmode
, src
);
11305 out_data
= gen_lowpart (wmode
, out_data
);
11306 out_result
= gen_lowpart (wmode
, out_result
);
11315 x
= gen_rtx_PLUS (wmode
, out_data
, src
);
11318 x
= gen_rtx_IOR (wmode
, out_data
, src
);
11321 x
= gen_rtx_XOR (wmode
, out_data
, src
);
11324 aarch64_emit_bic (wmode
, out_result
, out_data
, src
, 0);
11327 gcc_unreachable ();
11330 emit_set_insn (out_result
, x
);
11335 /* Split an atomic operation. */
11338 aarch64_split_atomic_op (enum rtx_code code
, rtx old_out
, rtx new_out
, rtx mem
,
11339 rtx value
, rtx model_rtx
, rtx cond
)
11341 machine_mode mode
= GET_MODE (mem
);
11342 machine_mode wmode
= (mode
== DImode
? DImode
: SImode
);
11343 const enum memmodel model
= memmodel_from_int (INTVAL (model_rtx
));
11344 const bool is_sync
= is_mm_sync (model
);
11345 rtx_code_label
*label
;
11348 /* Split the atomic operation into a sequence. */
11349 label
= gen_label_rtx ();
11350 emit_label (label
);
11353 new_out
= gen_lowpart (wmode
, new_out
);
11355 old_out
= gen_lowpart (wmode
, old_out
);
11358 value
= simplify_gen_subreg (wmode
, value
, mode
, 0);
11360 /* The initial load can be relaxed for a __sync operation since a final
11361 barrier will be emitted to stop code hoisting. */
11363 aarch64_emit_load_exclusive (mode
, old_out
, mem
,
11364 GEN_INT (MEMMODEL_RELAXED
));
11366 aarch64_emit_load_exclusive (mode
, old_out
, mem
, model_rtx
);
11375 x
= gen_rtx_AND (wmode
, old_out
, value
);
11376 emit_insn (gen_rtx_SET (new_out
, x
));
11377 x
= gen_rtx_NOT (wmode
, new_out
);
11378 emit_insn (gen_rtx_SET (new_out
, x
));
11382 if (CONST_INT_P (value
))
11384 value
= GEN_INT (-INTVAL (value
));
11387 /* Fall through. */
11390 x
= gen_rtx_fmt_ee (code
, wmode
, old_out
, value
);
11391 emit_insn (gen_rtx_SET (new_out
, x
));
11395 aarch64_emit_store_exclusive (mode
, cond
, mem
,
11396 gen_lowpart (mode
, new_out
), model_rtx
);
11398 x
= gen_rtx_NE (VOIDmode
, cond
, const0_rtx
);
11399 x
= gen_rtx_IF_THEN_ELSE (VOIDmode
, x
,
11400 gen_rtx_LABEL_REF (Pmode
, label
), pc_rtx
);
11401 aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx
, x
));
11403 /* Emit any final barrier needed for a __sync operation. */
11405 aarch64_emit_post_barrier (model
);
11409 aarch64_init_libfuncs (void)
11411 /* Half-precision float operations. The compiler handles all operations
11412 with NULL libfuncs by converting to SFmode. */
11415 set_conv_libfunc (trunc_optab
, HFmode
, SFmode
, "__gnu_f2h_ieee");
11416 set_conv_libfunc (sext_optab
, SFmode
, HFmode
, "__gnu_h2f_ieee");
11419 set_optab_libfunc (add_optab
, HFmode
, NULL
);
11420 set_optab_libfunc (sdiv_optab
, HFmode
, NULL
);
11421 set_optab_libfunc (smul_optab
, HFmode
, NULL
);
11422 set_optab_libfunc (neg_optab
, HFmode
, NULL
);
11423 set_optab_libfunc (sub_optab
, HFmode
, NULL
);
11426 set_optab_libfunc (eq_optab
, HFmode
, NULL
);
11427 set_optab_libfunc (ne_optab
, HFmode
, NULL
);
11428 set_optab_libfunc (lt_optab
, HFmode
, NULL
);
11429 set_optab_libfunc (le_optab
, HFmode
, NULL
);
11430 set_optab_libfunc (ge_optab
, HFmode
, NULL
);
11431 set_optab_libfunc (gt_optab
, HFmode
, NULL
);
11432 set_optab_libfunc (unord_optab
, HFmode
, NULL
);
11435 /* Target hook for c_mode_for_suffix. */
11436 static machine_mode
11437 aarch64_c_mode_for_suffix (char suffix
)
11445 /* We can only represent floating point constants which will fit in
11446 "quarter-precision" values. These values are characterised by
11447 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
11450 (-1)^s * (n/16) * 2^r
11453 's' is the sign bit.
11454 'n' is an integer in the range 16 <= n <= 31.
11455 'r' is an integer in the range -3 <= r <= 4. */
11457 /* Return true iff X can be represented by a quarter-precision
11458 floating point immediate operand X. Note, we cannot represent 0.0. */
11460 aarch64_float_const_representable_p (rtx x
)
11462 /* This represents our current view of how many bits
11463 make up the mantissa. */
11464 int point_pos
= 2 * HOST_BITS_PER_WIDE_INT
- 1;
11466 unsigned HOST_WIDE_INT mantissa
, mask
;
11467 REAL_VALUE_TYPE r
, m
;
11470 if (!CONST_DOUBLE_P (x
))
11473 /* We don't support HFmode constants yet. */
11474 if (GET_MODE (x
) == VOIDmode
|| GET_MODE (x
) == HFmode
)
11477 r
= *CONST_DOUBLE_REAL_VALUE (x
);
11479 /* We cannot represent infinities, NaNs or +/-zero. We won't
11480 know if we have +zero until we analyse the mantissa, but we
11481 can reject the other invalid values. */
11482 if (REAL_VALUE_ISINF (r
) || REAL_VALUE_ISNAN (r
)
11483 || REAL_VALUE_MINUS_ZERO (r
))
11486 /* Extract exponent. */
11487 r
= real_value_abs (&r
);
11488 exponent
= REAL_EXP (&r
);
11490 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11491 highest (sign) bit, with a fixed binary point at bit point_pos.
11492 m1 holds the low part of the mantissa, m2 the high part.
11493 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
11494 bits for the mantissa, this can fail (low bits will be lost). */
11495 real_ldexp (&m
, &r
, point_pos
- exponent
);
11496 wide_int w
= real_to_integer (&m
, &fail
, HOST_BITS_PER_WIDE_INT
* 2);
11498 /* If the low part of the mantissa has bits set we cannot represent
11500 if (w
.elt (0) != 0)
11502 /* We have rejected the lower HOST_WIDE_INT, so update our
11503 understanding of how many bits lie in the mantissa and
11504 look only at the high HOST_WIDE_INT. */
11505 mantissa
= w
.elt (1);
11506 point_pos
-= HOST_BITS_PER_WIDE_INT
;
11508 /* We can only represent values with a mantissa of the form 1.xxxx. */
11509 mask
= ((unsigned HOST_WIDE_INT
)1 << (point_pos
- 5)) - 1;
11510 if ((mantissa
& mask
) != 0)
11513 /* Having filtered unrepresentable values, we may now remove all
11514 but the highest 5 bits. */
11515 mantissa
>>= point_pos
- 5;
11517 /* We cannot represent the value 0.0, so reject it. This is handled
11522 /* Then, as bit 4 is always set, we can mask it off, leaving
11523 the mantissa in the range [0, 15]. */
11524 mantissa
&= ~(1 << 4);
11525 gcc_assert (mantissa
<= 15);
11527 /* GCC internally does not use IEEE754-like encoding (where normalized
11528 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
11529 Our mantissa values are shifted 4 places to the left relative to
11530 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
11531 by 5 places to correct for GCC's representation. */
11532 exponent
= 5 - exponent
;
11534 return (exponent
>= 0 && exponent
<= 7);
11538 aarch64_output_simd_mov_immediate (rtx const_vector
,
11543 static char templ
[40];
11544 const char *mnemonic
;
11545 const char *shift_op
;
11546 unsigned int lane_count
= 0;
11549 struct simd_immediate_info info
= { NULL_RTX
, 0, 0, false, false };
11551 /* This will return true to show const_vector is legal for use as either
11552 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
11553 also update INFO to show how the immediate should be generated. */
11554 is_valid
= aarch64_simd_valid_immediate (const_vector
, mode
, false, &info
);
11555 gcc_assert (is_valid
);
11557 element_char
= sizetochar (info
.element_width
);
11558 lane_count
= width
/ info
.element_width
;
11560 mode
= GET_MODE_INNER (mode
);
11561 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
11563 gcc_assert (info
.shift
== 0 && ! info
.mvn
);
11564 /* For FP zero change it to a CONST_INT 0 and use the integer SIMD
11565 move immediate path. */
11566 if (aarch64_float_const_zero_rtx_p (info
.value
))
11567 info
.value
= GEN_INT (0);
11570 #define buf_size 20
11571 char float_buf
[buf_size
] = {'\0'};
11572 real_to_decimal_for_mode (float_buf
,
11573 CONST_DOUBLE_REAL_VALUE (info
.value
),
11574 buf_size
, buf_size
, 1, mode
);
11577 if (lane_count
== 1)
11578 snprintf (templ
, sizeof (templ
), "fmov\t%%d0, %s", float_buf
);
11580 snprintf (templ
, sizeof (templ
), "fmov\t%%0.%d%c, %s",
11581 lane_count
, element_char
, float_buf
);
11586 mnemonic
= info
.mvn
? "mvni" : "movi";
11587 shift_op
= info
.msl
? "msl" : "lsl";
11589 gcc_assert (CONST_INT_P (info
.value
));
11590 if (lane_count
== 1)
11591 snprintf (templ
, sizeof (templ
), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX
,
11592 mnemonic
, UINTVAL (info
.value
));
11593 else if (info
.shift
)
11594 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
11595 ", %s %d", mnemonic
, lane_count
, element_char
,
11596 UINTVAL (info
.value
), shift_op
, info
.shift
);
11598 snprintf (templ
, sizeof (templ
), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
,
11599 mnemonic
, lane_count
, element_char
, UINTVAL (info
.value
));
11604 aarch64_output_scalar_simd_mov_immediate (rtx immediate
,
11607 machine_mode vmode
;
11609 gcc_assert (!VECTOR_MODE_P (mode
));
11610 vmode
= aarch64_simd_container_mode (mode
, 64);
11611 rtx v_op
= aarch64_simd_gen_const_vector_dup (vmode
, INTVAL (immediate
));
11612 return aarch64_output_simd_mov_immediate (v_op
, vmode
, 64);
11615 /* Split operands into moves from op[1] + op[2] into op[0]. */
11618 aarch64_split_combinev16qi (rtx operands
[3])
11620 unsigned int dest
= REGNO (operands
[0]);
11621 unsigned int src1
= REGNO (operands
[1]);
11622 unsigned int src2
= REGNO (operands
[2]);
11623 machine_mode halfmode
= GET_MODE (operands
[1]);
11624 unsigned int halfregs
= HARD_REGNO_NREGS (src1
, halfmode
);
11625 rtx destlo
, desthi
;
11627 gcc_assert (halfmode
== V16QImode
);
11629 if (src1
== dest
&& src2
== dest
+ halfregs
)
11631 /* No-op move. Can't split to nothing; emit something. */
11632 emit_note (NOTE_INSN_DELETED
);
11636 /* Preserve register attributes for variable tracking. */
11637 destlo
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
, 0);
11638 desthi
= gen_rtx_REG_offset (operands
[0], halfmode
, dest
+ halfregs
,
11639 GET_MODE_SIZE (halfmode
));
11641 /* Special case of reversed high/low parts. */
11642 if (reg_overlap_mentioned_p (operands
[2], destlo
)
11643 && reg_overlap_mentioned_p (operands
[1], desthi
))
11645 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
11646 emit_insn (gen_xorv16qi3 (operands
[2], operands
[1], operands
[2]));
11647 emit_insn (gen_xorv16qi3 (operands
[1], operands
[1], operands
[2]));
11649 else if (!reg_overlap_mentioned_p (operands
[2], destlo
))
11651 /* Try to avoid unnecessary moves if part of the result
11652 is in the right place already. */
11654 emit_move_insn (destlo
, operands
[1]);
11655 if (src2
!= dest
+ halfregs
)
11656 emit_move_insn (desthi
, operands
[2]);
11660 if (src2
!= dest
+ halfregs
)
11661 emit_move_insn (desthi
, operands
[2]);
11663 emit_move_insn (destlo
, operands
[1]);
11667 /* vec_perm support. */
11669 #define MAX_VECT_LEN 16
11671 struct expand_vec_perm_d
11673 rtx target
, op0
, op1
;
11674 unsigned char perm
[MAX_VECT_LEN
];
11675 machine_mode vmode
;
11676 unsigned char nelt
;
11681 /* Generate a variable permutation. */
11684 aarch64_expand_vec_perm_1 (rtx target
, rtx op0
, rtx op1
, rtx sel
)
11686 machine_mode vmode
= GET_MODE (target
);
11687 bool one_vector_p
= rtx_equal_p (op0
, op1
);
11689 gcc_checking_assert (vmode
== V8QImode
|| vmode
== V16QImode
);
11690 gcc_checking_assert (GET_MODE (op0
) == vmode
);
11691 gcc_checking_assert (GET_MODE (op1
) == vmode
);
11692 gcc_checking_assert (GET_MODE (sel
) == vmode
);
11693 gcc_checking_assert (TARGET_SIMD
);
11697 if (vmode
== V8QImode
)
11699 /* Expand the argument to a V16QI mode by duplicating it. */
11700 rtx pair
= gen_reg_rtx (V16QImode
);
11701 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op0
));
11702 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
11706 emit_insn (gen_aarch64_tbl1v16qi (target
, op0
, sel
));
11713 if (vmode
== V8QImode
)
11715 pair
= gen_reg_rtx (V16QImode
);
11716 emit_insn (gen_aarch64_combinev8qi (pair
, op0
, op1
));
11717 emit_insn (gen_aarch64_tbl1v8qi (target
, pair
, sel
));
11721 pair
= gen_reg_rtx (OImode
);
11722 emit_insn (gen_aarch64_combinev16qi (pair
, op0
, op1
));
11723 emit_insn (gen_aarch64_tbl2v16qi (target
, pair
, sel
));
11729 aarch64_expand_vec_perm (rtx target
, rtx op0
, rtx op1
, rtx sel
)
11731 machine_mode vmode
= GET_MODE (target
);
11732 unsigned int nelt
= GET_MODE_NUNITS (vmode
);
11733 bool one_vector_p
= rtx_equal_p (op0
, op1
);
11736 /* The TBL instruction does not use a modulo index, so we must take care
11737 of that ourselves. */
11738 mask
= aarch64_simd_gen_const_vector_dup (vmode
,
11739 one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
11740 sel
= expand_simple_binop (vmode
, AND
, sel
, mask
, NULL
, 0, OPTAB_LIB_WIDEN
);
11742 /* For big-endian, we also need to reverse the index within the vector
11743 (but not which vector). */
11744 if (BYTES_BIG_ENDIAN
)
11746 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
11748 mask
= aarch64_simd_gen_const_vector_dup (vmode
, nelt
- 1);
11749 sel
= expand_simple_binop (vmode
, XOR
, sel
, mask
,
11750 NULL
, 0, OPTAB_LIB_WIDEN
);
11752 aarch64_expand_vec_perm_1 (target
, op0
, op1
, sel
);
11755 /* Recognize patterns suitable for the TRN instructions. */
11757 aarch64_evpc_trn (struct expand_vec_perm_d
*d
)
11759 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
11760 rtx out
, in0
, in1
, x
;
11761 rtx (*gen
) (rtx
, rtx
, rtx
);
11762 machine_mode vmode
= d
->vmode
;
11764 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
11767 /* Note that these are little-endian tests.
11768 We correct for big-endian later. */
11769 if (d
->perm
[0] == 0)
11771 else if (d
->perm
[0] == 1)
11775 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
11777 for (i
= 0; i
< nelt
; i
+= 2)
11779 if (d
->perm
[i
] != i
+ odd
)
11781 if (d
->perm
[i
+ 1] != ((i
+ nelt
+ odd
) & mask
))
11791 if (BYTES_BIG_ENDIAN
)
11793 x
= in0
, in0
= in1
, in1
= x
;
11802 case V16QImode
: gen
= gen_aarch64_trn2v16qi
; break;
11803 case V8QImode
: gen
= gen_aarch64_trn2v8qi
; break;
11804 case V8HImode
: gen
= gen_aarch64_trn2v8hi
; break;
11805 case V4HImode
: gen
= gen_aarch64_trn2v4hi
; break;
11806 case V4SImode
: gen
= gen_aarch64_trn2v4si
; break;
11807 case V2SImode
: gen
= gen_aarch64_trn2v2si
; break;
11808 case V2DImode
: gen
= gen_aarch64_trn2v2di
; break;
11809 case V4SFmode
: gen
= gen_aarch64_trn2v4sf
; break;
11810 case V2SFmode
: gen
= gen_aarch64_trn2v2sf
; break;
11811 case V2DFmode
: gen
= gen_aarch64_trn2v2df
; break;
11820 case V16QImode
: gen
= gen_aarch64_trn1v16qi
; break;
11821 case V8QImode
: gen
= gen_aarch64_trn1v8qi
; break;
11822 case V8HImode
: gen
= gen_aarch64_trn1v8hi
; break;
11823 case V4HImode
: gen
= gen_aarch64_trn1v4hi
; break;
11824 case V4SImode
: gen
= gen_aarch64_trn1v4si
; break;
11825 case V2SImode
: gen
= gen_aarch64_trn1v2si
; break;
11826 case V2DImode
: gen
= gen_aarch64_trn1v2di
; break;
11827 case V4SFmode
: gen
= gen_aarch64_trn1v4sf
; break;
11828 case V2SFmode
: gen
= gen_aarch64_trn1v2sf
; break;
11829 case V2DFmode
: gen
= gen_aarch64_trn1v2df
; break;
11835 emit_insn (gen (out
, in0
, in1
));
11839 /* Recognize patterns suitable for the UZP instructions. */
11841 aarch64_evpc_uzp (struct expand_vec_perm_d
*d
)
11843 unsigned int i
, odd
, mask
, nelt
= d
->nelt
;
11844 rtx out
, in0
, in1
, x
;
11845 rtx (*gen
) (rtx
, rtx
, rtx
);
11846 machine_mode vmode
= d
->vmode
;
11848 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
11851 /* Note that these are little-endian tests.
11852 We correct for big-endian later. */
11853 if (d
->perm
[0] == 0)
11855 else if (d
->perm
[0] == 1)
11859 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
11861 for (i
= 0; i
< nelt
; i
++)
11863 unsigned elt
= (i
* 2 + odd
) & mask
;
11864 if (d
->perm
[i
] != elt
)
11874 if (BYTES_BIG_ENDIAN
)
11876 x
= in0
, in0
= in1
, in1
= x
;
11885 case V16QImode
: gen
= gen_aarch64_uzp2v16qi
; break;
11886 case V8QImode
: gen
= gen_aarch64_uzp2v8qi
; break;
11887 case V8HImode
: gen
= gen_aarch64_uzp2v8hi
; break;
11888 case V4HImode
: gen
= gen_aarch64_uzp2v4hi
; break;
11889 case V4SImode
: gen
= gen_aarch64_uzp2v4si
; break;
11890 case V2SImode
: gen
= gen_aarch64_uzp2v2si
; break;
11891 case V2DImode
: gen
= gen_aarch64_uzp2v2di
; break;
11892 case V4SFmode
: gen
= gen_aarch64_uzp2v4sf
; break;
11893 case V2SFmode
: gen
= gen_aarch64_uzp2v2sf
; break;
11894 case V2DFmode
: gen
= gen_aarch64_uzp2v2df
; break;
11903 case V16QImode
: gen
= gen_aarch64_uzp1v16qi
; break;
11904 case V8QImode
: gen
= gen_aarch64_uzp1v8qi
; break;
11905 case V8HImode
: gen
= gen_aarch64_uzp1v8hi
; break;
11906 case V4HImode
: gen
= gen_aarch64_uzp1v4hi
; break;
11907 case V4SImode
: gen
= gen_aarch64_uzp1v4si
; break;
11908 case V2SImode
: gen
= gen_aarch64_uzp1v2si
; break;
11909 case V2DImode
: gen
= gen_aarch64_uzp1v2di
; break;
11910 case V4SFmode
: gen
= gen_aarch64_uzp1v4sf
; break;
11911 case V2SFmode
: gen
= gen_aarch64_uzp1v2sf
; break;
11912 case V2DFmode
: gen
= gen_aarch64_uzp1v2df
; break;
11918 emit_insn (gen (out
, in0
, in1
));
11922 /* Recognize patterns suitable for the ZIP instructions. */
11924 aarch64_evpc_zip (struct expand_vec_perm_d
*d
)
11926 unsigned int i
, high
, mask
, nelt
= d
->nelt
;
11927 rtx out
, in0
, in1
, x
;
11928 rtx (*gen
) (rtx
, rtx
, rtx
);
11929 machine_mode vmode
= d
->vmode
;
11931 if (GET_MODE_UNIT_SIZE (vmode
) > 8)
11934 /* Note that these are little-endian tests.
11935 We correct for big-endian later. */
11937 if (d
->perm
[0] == high
)
11940 else if (d
->perm
[0] == 0)
11944 mask
= (d
->one_vector_p
? nelt
- 1 : 2 * nelt
- 1);
11946 for (i
= 0; i
< nelt
/ 2; i
++)
11948 unsigned elt
= (i
+ high
) & mask
;
11949 if (d
->perm
[i
* 2] != elt
)
11951 elt
= (elt
+ nelt
) & mask
;
11952 if (d
->perm
[i
* 2 + 1] != elt
)
11962 if (BYTES_BIG_ENDIAN
)
11964 x
= in0
, in0
= in1
, in1
= x
;
11973 case V16QImode
: gen
= gen_aarch64_zip2v16qi
; break;
11974 case V8QImode
: gen
= gen_aarch64_zip2v8qi
; break;
11975 case V8HImode
: gen
= gen_aarch64_zip2v8hi
; break;
11976 case V4HImode
: gen
= gen_aarch64_zip2v4hi
; break;
11977 case V4SImode
: gen
= gen_aarch64_zip2v4si
; break;
11978 case V2SImode
: gen
= gen_aarch64_zip2v2si
; break;
11979 case V2DImode
: gen
= gen_aarch64_zip2v2di
; break;
11980 case V4SFmode
: gen
= gen_aarch64_zip2v4sf
; break;
11981 case V2SFmode
: gen
= gen_aarch64_zip2v2sf
; break;
11982 case V2DFmode
: gen
= gen_aarch64_zip2v2df
; break;
11991 case V16QImode
: gen
= gen_aarch64_zip1v16qi
; break;
11992 case V8QImode
: gen
= gen_aarch64_zip1v8qi
; break;
11993 case V8HImode
: gen
= gen_aarch64_zip1v8hi
; break;
11994 case V4HImode
: gen
= gen_aarch64_zip1v4hi
; break;
11995 case V4SImode
: gen
= gen_aarch64_zip1v4si
; break;
11996 case V2SImode
: gen
= gen_aarch64_zip1v2si
; break;
11997 case V2DImode
: gen
= gen_aarch64_zip1v2di
; break;
11998 case V4SFmode
: gen
= gen_aarch64_zip1v4sf
; break;
11999 case V2SFmode
: gen
= gen_aarch64_zip1v2sf
; break;
12000 case V2DFmode
: gen
= gen_aarch64_zip1v2df
; break;
12006 emit_insn (gen (out
, in0
, in1
));
12010 /* Recognize patterns for the EXT insn. */
12013 aarch64_evpc_ext (struct expand_vec_perm_d
*d
)
12015 unsigned int i
, nelt
= d
->nelt
;
12016 rtx (*gen
) (rtx
, rtx
, rtx
, rtx
);
12019 unsigned int location
= d
->perm
[0]; /* Always < nelt. */
12021 /* Check if the extracted indices are increasing by one. */
12022 for (i
= 1; i
< nelt
; i
++)
12024 unsigned int required
= location
+ i
;
12025 if (d
->one_vector_p
)
12027 /* We'll pass the same vector in twice, so allow indices to wrap. */
12028 required
&= (nelt
- 1);
12030 if (d
->perm
[i
] != required
)
12036 case V16QImode
: gen
= gen_aarch64_extv16qi
; break;
12037 case V8QImode
: gen
= gen_aarch64_extv8qi
; break;
12038 case V4HImode
: gen
= gen_aarch64_extv4hi
; break;
12039 case V8HImode
: gen
= gen_aarch64_extv8hi
; break;
12040 case V2SImode
: gen
= gen_aarch64_extv2si
; break;
12041 case V4SImode
: gen
= gen_aarch64_extv4si
; break;
12042 case V2SFmode
: gen
= gen_aarch64_extv2sf
; break;
12043 case V4SFmode
: gen
= gen_aarch64_extv4sf
; break;
12044 case V2DImode
: gen
= gen_aarch64_extv2di
; break;
12045 case V2DFmode
: gen
= gen_aarch64_extv2df
; break;
12054 /* The case where (location == 0) is a no-op for both big- and little-endian,
12055 and is removed by the mid-end at optimization levels -O1 and higher. */
12057 if (BYTES_BIG_ENDIAN
&& (location
!= 0))
12059 /* After setup, we want the high elements of the first vector (stored
12060 at the LSB end of the register), and the low elements of the second
12061 vector (stored at the MSB end of the register). So swap. */
12062 std::swap (d
->op0
, d
->op1
);
12063 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
12064 location
= nelt
- location
;
12067 offset
= GEN_INT (location
);
12068 emit_insn (gen (d
->target
, d
->op0
, d
->op1
, offset
));
12072 /* Recognize patterns for the REV insns. */
12075 aarch64_evpc_rev (struct expand_vec_perm_d
*d
)
12077 unsigned int i
, j
, diff
, nelt
= d
->nelt
;
12078 rtx (*gen
) (rtx
, rtx
);
12080 if (!d
->one_vector_p
)
12089 case V16QImode
: gen
= gen_aarch64_rev64v16qi
; break;
12090 case V8QImode
: gen
= gen_aarch64_rev64v8qi
; break;
12098 case V16QImode
: gen
= gen_aarch64_rev32v16qi
; break;
12099 case V8QImode
: gen
= gen_aarch64_rev32v8qi
; break;
12100 case V8HImode
: gen
= gen_aarch64_rev64v8hi
; break;
12101 case V4HImode
: gen
= gen_aarch64_rev64v4hi
; break;
12109 case V16QImode
: gen
= gen_aarch64_rev16v16qi
; break;
12110 case V8QImode
: gen
= gen_aarch64_rev16v8qi
; break;
12111 case V8HImode
: gen
= gen_aarch64_rev32v8hi
; break;
12112 case V4HImode
: gen
= gen_aarch64_rev32v4hi
; break;
12113 case V4SImode
: gen
= gen_aarch64_rev64v4si
; break;
12114 case V2SImode
: gen
= gen_aarch64_rev64v2si
; break;
12115 case V4SFmode
: gen
= gen_aarch64_rev64v4sf
; break;
12116 case V2SFmode
: gen
= gen_aarch64_rev64v2sf
; break;
12125 for (i
= 0; i
< nelt
; i
+= diff
+ 1)
12126 for (j
= 0; j
<= diff
; j
+= 1)
12128 /* This is guaranteed to be true as the value of diff
12129 is 7, 3, 1 and we should have enough elements in the
12130 queue to generate this. Getting a vector mask with a
12131 value of diff other than these values implies that
12132 something is wrong by the time we get here. */
12133 gcc_assert (i
+ j
< nelt
);
12134 if (d
->perm
[i
+ j
] != i
+ diff
- j
)
12142 emit_insn (gen (d
->target
, d
->op0
));
12147 aarch64_evpc_dup (struct expand_vec_perm_d
*d
)
12149 rtx (*gen
) (rtx
, rtx
, rtx
);
12150 rtx out
= d
->target
;
12152 machine_mode vmode
= d
->vmode
;
12153 unsigned int i
, elt
, nelt
= d
->nelt
;
12157 for (i
= 1; i
< nelt
; i
++)
12159 if (elt
!= d
->perm
[i
])
12163 /* The generic preparation in aarch64_expand_vec_perm_const_1
12164 swaps the operand order and the permute indices if it finds
12165 d->perm[0] to be in the second operand. Thus, we can always
12166 use d->op0 and need not do any extra arithmetic to get the
12167 correct lane number. */
12169 lane
= GEN_INT (elt
); /* The pattern corrects for big-endian. */
12173 case V16QImode
: gen
= gen_aarch64_dup_lanev16qi
; break;
12174 case V8QImode
: gen
= gen_aarch64_dup_lanev8qi
; break;
12175 case V8HImode
: gen
= gen_aarch64_dup_lanev8hi
; break;
12176 case V4HImode
: gen
= gen_aarch64_dup_lanev4hi
; break;
12177 case V4SImode
: gen
= gen_aarch64_dup_lanev4si
; break;
12178 case V2SImode
: gen
= gen_aarch64_dup_lanev2si
; break;
12179 case V2DImode
: gen
= gen_aarch64_dup_lanev2di
; break;
12180 case V8HFmode
: gen
= gen_aarch64_dup_lanev8hf
; break;
12181 case V4HFmode
: gen
= gen_aarch64_dup_lanev4hf
; break;
12182 case V4SFmode
: gen
= gen_aarch64_dup_lanev4sf
; break;
12183 case V2SFmode
: gen
= gen_aarch64_dup_lanev2sf
; break;
12184 case V2DFmode
: gen
= gen_aarch64_dup_lanev2df
; break;
12189 emit_insn (gen (out
, in0
, lane
));
12194 aarch64_evpc_tbl (struct expand_vec_perm_d
*d
)
12196 rtx rperm
[MAX_VECT_LEN
], sel
;
12197 machine_mode vmode
= d
->vmode
;
12198 unsigned int i
, nelt
= d
->nelt
;
12203 /* Generic code will try constant permutation twice. Once with the
12204 original mode and again with the elements lowered to QImode.
12205 So wait and don't do the selector expansion ourselves. */
12206 if (vmode
!= V8QImode
&& vmode
!= V16QImode
)
12209 for (i
= 0; i
< nelt
; ++i
)
12211 int nunits
= GET_MODE_NUNITS (vmode
);
12213 /* If big-endian and two vectors we end up with a weird mixed-endian
12214 mode on NEON. Reverse the index within each word but not the word
12216 rperm
[i
] = GEN_INT (BYTES_BIG_ENDIAN
? d
->perm
[i
] ^ (nunits
- 1)
12219 sel
= gen_rtx_CONST_VECTOR (vmode
, gen_rtvec_v (nelt
, rperm
));
12220 sel
= force_reg (vmode
, sel
);
12222 aarch64_expand_vec_perm_1 (d
->target
, d
->op0
, d
->op1
, sel
);
12227 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d
*d
)
12229 /* The pattern matching functions above are written to look for a small
12230 number to begin the sequence (0, 1, N/2). If we begin with an index
12231 from the second operand, we can swap the operands. */
12232 if (d
->perm
[0] >= d
->nelt
)
12234 unsigned i
, nelt
= d
->nelt
;
12236 gcc_assert (nelt
== (nelt
& -nelt
));
12237 for (i
= 0; i
< nelt
; ++i
)
12238 d
->perm
[i
] ^= nelt
; /* Keep the same index, but in the other vector. */
12240 std::swap (d
->op0
, d
->op1
);
12245 if (aarch64_evpc_rev (d
))
12247 else if (aarch64_evpc_ext (d
))
12249 else if (aarch64_evpc_dup (d
))
12251 else if (aarch64_evpc_zip (d
))
12253 else if (aarch64_evpc_uzp (d
))
12255 else if (aarch64_evpc_trn (d
))
12257 return aarch64_evpc_tbl (d
);
12262 /* Expand a vec_perm_const pattern. */
12265 aarch64_expand_vec_perm_const (rtx target
, rtx op0
, rtx op1
, rtx sel
)
12267 struct expand_vec_perm_d d
;
12268 int i
, nelt
, which
;
12274 d
.vmode
= GET_MODE (target
);
12275 gcc_assert (VECTOR_MODE_P (d
.vmode
));
12276 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
12277 d
.testing_p
= false;
12279 for (i
= which
= 0; i
< nelt
; ++i
)
12281 rtx e
= XVECEXP (sel
, 0, i
);
12282 int ei
= INTVAL (e
) & (2 * nelt
- 1);
12283 which
|= (ei
< nelt
? 1 : 2);
12290 gcc_unreachable ();
12293 d
.one_vector_p
= false;
12294 if (!rtx_equal_p (op0
, op1
))
12297 /* The elements of PERM do not suggest that only the first operand
12298 is used, but both operands are identical. Allow easier matching
12299 of the permutation by folding the permutation into the single
12301 /* Fall Through. */
12303 for (i
= 0; i
< nelt
; ++i
)
12304 d
.perm
[i
] &= nelt
- 1;
12306 d
.one_vector_p
= true;
12311 d
.one_vector_p
= true;
12315 return aarch64_expand_vec_perm_const_1 (&d
);
12319 aarch64_vectorize_vec_perm_const_ok (machine_mode vmode
,
12320 const unsigned char *sel
)
12322 struct expand_vec_perm_d d
;
12323 unsigned int i
, nelt
, which
;
12327 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
12328 d
.testing_p
= true;
12329 memcpy (d
.perm
, sel
, nelt
);
12331 /* Calculate whether all elements are in one vector. */
12332 for (i
= which
= 0; i
< nelt
; ++i
)
12334 unsigned char e
= d
.perm
[i
];
12335 gcc_assert (e
< 2 * nelt
);
12336 which
|= (e
< nelt
? 1 : 2);
12339 /* If all elements are from the second vector, reindex as if from the
12342 for (i
= 0; i
< nelt
; ++i
)
12345 /* Check whether the mask can be applied to a single vector. */
12346 d
.one_vector_p
= (which
!= 3);
12348 d
.target
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 1);
12349 d
.op1
= d
.op0
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 2);
12350 if (!d
.one_vector_p
)
12351 d
.op1
= gen_raw_REG (d
.vmode
, LAST_VIRTUAL_REGISTER
+ 3);
12354 ret
= aarch64_expand_vec_perm_const_1 (&d
);
12361 aarch64_reverse_mask (enum machine_mode mode
)
12363 /* We have to reverse each vector because we dont have
12364 a permuted load that can reverse-load according to ABI rules. */
12366 rtvec v
= rtvec_alloc (16);
12368 int nunits
= GET_MODE_NUNITS (mode
);
12369 int usize
= GET_MODE_UNIT_SIZE (mode
);
12371 gcc_assert (BYTES_BIG_ENDIAN
);
12372 gcc_assert (AARCH64_VALID_SIMD_QREG_MODE (mode
));
12374 for (i
= 0; i
< nunits
; i
++)
12375 for (j
= 0; j
< usize
; j
++)
12376 RTVEC_ELT (v
, i
* usize
+ j
) = GEN_INT ((i
+ 1) * usize
- 1 - j
);
12377 mask
= gen_rtx_CONST_VECTOR (V16QImode
, v
);
12378 return force_reg (V16QImode
, mask
);
12381 /* Implement MODES_TIEABLE_P. */
12384 aarch64_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
12386 if (GET_MODE_CLASS (mode1
) == GET_MODE_CLASS (mode2
))
12389 /* We specifically want to allow elements of "structure" modes to
12390 be tieable to the structure. This more general condition allows
12391 other rarer situations too. */
12393 && aarch64_vector_mode_p (mode1
)
12394 && aarch64_vector_mode_p (mode2
))
12400 /* Return a new RTX holding the result of moving POINTER forward by
12404 aarch64_move_pointer (rtx pointer
, int amount
)
12406 rtx next
= plus_constant (Pmode
, XEXP (pointer
, 0), amount
);
12408 return adjust_automodify_address (pointer
, GET_MODE (pointer
),
12412 /* Return a new RTX holding the result of moving POINTER forward by the
12413 size of the mode it points to. */
12416 aarch64_progress_pointer (rtx pointer
)
12418 HOST_WIDE_INT amount
= GET_MODE_SIZE (GET_MODE (pointer
));
12420 return aarch64_move_pointer (pointer
, amount
);
12423 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
12427 aarch64_copy_one_block_and_progress_pointers (rtx
*src
, rtx
*dst
,
12430 rtx reg
= gen_reg_rtx (mode
);
12432 /* "Cast" the pointers to the correct mode. */
12433 *src
= adjust_address (*src
, mode
, 0);
12434 *dst
= adjust_address (*dst
, mode
, 0);
12435 /* Emit the memcpy. */
12436 emit_move_insn (reg
, *src
);
12437 emit_move_insn (*dst
, reg
);
12438 /* Move the pointers forward. */
12439 *src
= aarch64_progress_pointer (*src
);
12440 *dst
= aarch64_progress_pointer (*dst
);
12443 /* Expand movmem, as if from a __builtin_memcpy. Return true if
12444 we succeed, otherwise return false. */
12447 aarch64_expand_movmem (rtx
*operands
)
12450 rtx dst
= operands
[0];
12451 rtx src
= operands
[1];
12453 bool speed_p
= !optimize_function_for_size_p (cfun
);
12455 /* When optimizing for size, give a better estimate of the length of a
12456 memcpy call, but use the default otherwise. */
12457 unsigned int max_instructions
= (speed_p
? 15 : AARCH64_CALL_RATIO
) / 2;
12459 /* We can't do anything smart if the amount to copy is not constant. */
12460 if (!CONST_INT_P (operands
[2]))
12463 n
= UINTVAL (operands
[2]);
12465 /* Try to keep the number of instructions low. For cases below 16 bytes we
12466 need to make at most two moves. For cases above 16 bytes it will be one
12467 move for each 16 byte chunk, then at most two additional moves. */
12468 if (((n
/ 16) + (n
% 16 ? 2 : 0)) > max_instructions
)
12471 base
= copy_to_mode_reg (Pmode
, XEXP (dst
, 0));
12472 dst
= adjust_automodify_address (dst
, VOIDmode
, base
, 0);
12474 base
= copy_to_mode_reg (Pmode
, XEXP (src
, 0));
12475 src
= adjust_automodify_address (src
, VOIDmode
, base
, 0);
12477 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
12483 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
12488 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
12493 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
12494 4-byte chunk, partially overlapping with the previously copied chunk. */
12497 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
12503 src
= aarch64_move_pointer (src
, move
);
12504 dst
= aarch64_move_pointer (dst
, move
);
12505 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
12510 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
12511 them, then (if applicable) an 8-byte chunk. */
12516 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, TImode
);
12521 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
12526 /* Finish the final bytes of the copy. We can always do this in one
12527 instruction. We either copy the exact amount we need, or partially
12528 overlap with the previous chunk we copied and copy 8-bytes. */
12532 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, QImode
);
12534 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, HImode
);
12536 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
12541 src
= aarch64_move_pointer (src
, -1);
12542 dst
= aarch64_move_pointer (dst
, -1);
12543 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, SImode
);
12549 src
= aarch64_move_pointer (src
, move
);
12550 dst
= aarch64_move_pointer (dst
, move
);
12551 aarch64_copy_one_block_and_progress_pointers (&src
, &dst
, DImode
);
12558 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12560 static unsigned HOST_WIDE_INT
12561 aarch64_asan_shadow_offset (void)
12563 return (HOST_WIDE_INT_1
<< 36);
12567 aarch64_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
12568 unsigned int align
,
12569 enum by_pieces_operation op
,
12572 /* STORE_BY_PIECES can be used when copying a constant string, but
12573 in that case each 64-bit chunk takes 5 insns instead of 2 (LDR/STR).
12574 For now we always fail this and let the move_by_pieces code copy
12575 the string from read-only memory. */
12576 if (op
== STORE_BY_PIECES
)
12579 return default_use_by_pieces_infrastructure_p (size
, align
, op
, speed_p
);
12582 static enum machine_mode
12583 aarch64_code_to_ccmode (enum rtx_code code
)
12606 return CC_DLEUmode
;
12609 return CC_DLTUmode
;
12612 return CC_DGEUmode
;
12615 return CC_DGTUmode
;
12623 aarch64_gen_ccmp_first (rtx
*prep_seq
, rtx
*gen_seq
,
12624 int code
, tree treeop0
, tree treeop1
)
12626 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
12627 rtx op0
, op1
, cmp
, target
;
12628 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
12629 enum insn_code icode
;
12630 struct expand_operand ops
[4];
12632 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) code
);
12633 if (cc_mode
== CCmode
)
12637 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
12639 op_mode
= GET_MODE (op0
);
12640 if (op_mode
== VOIDmode
)
12641 op_mode
= GET_MODE (op1
);
12649 icode
= CODE_FOR_cmpsi
;
12654 icode
= CODE_FOR_cmpdi
;
12662 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
12663 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
12669 *prep_seq
= get_insns ();
12672 cmp
= gen_rtx_fmt_ee ((enum rtx_code
) code
, cmp_mode
, op0
, op1
);
12673 target
= gen_rtx_REG (CCmode
, CC_REGNUM
);
12675 create_output_operand (&ops
[0], target
, CCmode
);
12676 create_fixed_operand (&ops
[1], cmp
);
12677 create_fixed_operand (&ops
[2], op0
);
12678 create_fixed_operand (&ops
[3], op1
);
12681 if (!maybe_expand_insn (icode
, 4, ops
))
12686 *gen_seq
= get_insns ();
12689 return gen_rtx_REG (cc_mode
, CC_REGNUM
);
12693 aarch64_gen_ccmp_next (rtx
*prep_seq
, rtx
*gen_seq
, rtx prev
, int cmp_code
,
12694 tree treeop0
, tree treeop1
, int bit_code
)
12696 rtx op0
, op1
, cmp0
, cmp1
, target
;
12697 enum machine_mode op_mode
, cmp_mode
, cc_mode
;
12698 int unsignedp
= TYPE_UNSIGNED (TREE_TYPE (treeop0
));
12699 enum insn_code icode
= CODE_FOR_ccmp_andsi
;
12700 struct expand_operand ops
[6];
12702 cc_mode
= aarch64_code_to_ccmode ((enum rtx_code
) cmp_code
);
12703 if (cc_mode
== CCmode
)
12706 push_to_sequence ((rtx_insn
*) *prep_seq
);
12707 expand_operands (treeop0
, treeop1
, NULL_RTX
, &op0
, &op1
, EXPAND_NORMAL
);
12709 op_mode
= GET_MODE (op0
);
12710 if (op_mode
== VOIDmode
)
12711 op_mode
= GET_MODE (op1
);
12719 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_andsi
12720 : CODE_FOR_ccmp_iorsi
;
12725 icode
= (enum rtx_code
) bit_code
== AND
? CODE_FOR_ccmp_anddi
12726 : CODE_FOR_ccmp_iordi
;
12734 op0
= prepare_operand (icode
, op0
, 2, op_mode
, cmp_mode
, unsignedp
);
12735 op1
= prepare_operand (icode
, op1
, 3, op_mode
, cmp_mode
, unsignedp
);
12741 *prep_seq
= get_insns ();
12744 target
= gen_rtx_REG (cc_mode
, CC_REGNUM
);
12745 cmp1
= gen_rtx_fmt_ee ((enum rtx_code
) cmp_code
, cmp_mode
, op0
, op1
);
12746 cmp0
= gen_rtx_fmt_ee (NE
, cmp_mode
, prev
, const0_rtx
);
12748 create_fixed_operand (&ops
[0], prev
);
12749 create_fixed_operand (&ops
[1], target
);
12750 create_fixed_operand (&ops
[2], op0
);
12751 create_fixed_operand (&ops
[3], op1
);
12752 create_fixed_operand (&ops
[4], cmp0
);
12753 create_fixed_operand (&ops
[5], cmp1
);
12755 push_to_sequence ((rtx_insn
*) *gen_seq
);
12756 if (!maybe_expand_insn (icode
, 6, ops
))
12762 *gen_seq
= get_insns ();
12768 #undef TARGET_GEN_CCMP_FIRST
12769 #define TARGET_GEN_CCMP_FIRST aarch64_gen_ccmp_first
12771 #undef TARGET_GEN_CCMP_NEXT
12772 #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
12774 /* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
12775 instruction fusion of some sort. */
12778 aarch64_macro_fusion_p (void)
12780 return aarch64_tune_params
.fusible_ops
!= AARCH64_FUSE_NOTHING
;
12784 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
12785 should be kept together during scheduling. */
12788 aarch_macro_fusion_pair_p (rtx_insn
*prev
, rtx_insn
*curr
)
12791 rtx prev_set
= single_set (prev
);
12792 rtx curr_set
= single_set (curr
);
12793 /* prev and curr are simple SET insns i.e. no flag setting or branching. */
12794 bool simple_sets_p
= prev_set
&& curr_set
&& !any_condjump_p (curr
);
12796 if (!aarch64_macro_fusion_p ())
12800 && (aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_MOV_MOVK
))
12802 /* We are trying to match:
12803 prev (mov) == (set (reg r0) (const_int imm16))
12804 curr (movk) == (set (zero_extract (reg r0)
12807 (const_int imm16_1)) */
12809 set_dest
= SET_DEST (curr_set
);
12811 if (GET_CODE (set_dest
) == ZERO_EXTRACT
12812 && CONST_INT_P (SET_SRC (curr_set
))
12813 && CONST_INT_P (SET_SRC (prev_set
))
12814 && CONST_INT_P (XEXP (set_dest
, 2))
12815 && INTVAL (XEXP (set_dest
, 2)) == 16
12816 && REG_P (XEXP (set_dest
, 0))
12817 && REG_P (SET_DEST (prev_set
))
12818 && REGNO (XEXP (set_dest
, 0)) == REGNO (SET_DEST (prev_set
)))
12825 && (aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_ADRP_ADD
))
12828 /* We're trying to match:
12829 prev (adrp) == (set (reg r1)
12830 (high (symbol_ref ("SYM"))))
12831 curr (add) == (set (reg r0)
12833 (symbol_ref ("SYM"))))
12834 Note that r0 need not necessarily be the same as r1, especially
12835 during pre-regalloc scheduling. */
12837 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
12838 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
12840 if (GET_CODE (SET_SRC (curr_set
)) == LO_SUM
12841 && REG_P (XEXP (SET_SRC (curr_set
), 0))
12842 && REGNO (XEXP (SET_SRC (curr_set
), 0))
12843 == REGNO (SET_DEST (prev_set
))
12844 && rtx_equal_p (XEXP (SET_SRC (prev_set
), 0),
12845 XEXP (SET_SRC (curr_set
), 1)))
12851 && (aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_MOVK_MOVK
))
12854 /* We're trying to match:
12855 prev (movk) == (set (zero_extract (reg r0)
12858 (const_int imm16_1))
12859 curr (movk) == (set (zero_extract (reg r0)
12862 (const_int imm16_2)) */
12864 if (GET_CODE (SET_DEST (prev_set
)) == ZERO_EXTRACT
12865 && GET_CODE (SET_DEST (curr_set
)) == ZERO_EXTRACT
12866 && REG_P (XEXP (SET_DEST (prev_set
), 0))
12867 && REG_P (XEXP (SET_DEST (curr_set
), 0))
12868 && REGNO (XEXP (SET_DEST (prev_set
), 0))
12869 == REGNO (XEXP (SET_DEST (curr_set
), 0))
12870 && CONST_INT_P (XEXP (SET_DEST (prev_set
), 2))
12871 && CONST_INT_P (XEXP (SET_DEST (curr_set
), 2))
12872 && INTVAL (XEXP (SET_DEST (prev_set
), 2)) == 32
12873 && INTVAL (XEXP (SET_DEST (curr_set
), 2)) == 48
12874 && CONST_INT_P (SET_SRC (prev_set
))
12875 && CONST_INT_P (SET_SRC (curr_set
)))
12880 && (aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_ADRP_LDR
))
12882 /* We're trying to match:
12883 prev (adrp) == (set (reg r0)
12884 (high (symbol_ref ("SYM"))))
12885 curr (ldr) == (set (reg r1)
12886 (mem (lo_sum (reg r0)
12887 (symbol_ref ("SYM")))))
12889 curr (ldr) == (set (reg r1)
12892 (symbol_ref ("SYM")))))) */
12893 if (satisfies_constraint_Ush (SET_SRC (prev_set
))
12894 && REG_P (SET_DEST (prev_set
)) && REG_P (SET_DEST (curr_set
)))
12896 rtx curr_src
= SET_SRC (curr_set
);
12898 if (GET_CODE (curr_src
) == ZERO_EXTEND
)
12899 curr_src
= XEXP (curr_src
, 0);
12901 if (MEM_P (curr_src
) && GET_CODE (XEXP (curr_src
, 0)) == LO_SUM
12902 && REG_P (XEXP (XEXP (curr_src
, 0), 0))
12903 && REGNO (XEXP (XEXP (curr_src
, 0), 0))
12904 == REGNO (SET_DEST (prev_set
))
12905 && rtx_equal_p (XEXP (XEXP (curr_src
, 0), 1),
12906 XEXP (SET_SRC (prev_set
), 0)))
12911 if ((aarch64_tune_params
.fusible_ops
& AARCH64_FUSE_CMP_BRANCH
)
12912 && any_condjump_p (curr
))
12914 enum attr_type prev_type
= get_attr_type (prev
);
12916 /* FIXME: this misses some which is considered simple arthematic
12917 instructions for ThunderX. Simple shifts are missed here. */
12918 if (prev_type
== TYPE_ALUS_SREG
12919 || prev_type
== TYPE_ALUS_IMM
12920 || prev_type
== TYPE_LOGICS_REG
12921 || prev_type
== TYPE_LOGICS_IMM
)
12928 /* If MEM is in the form of [base+offset], extract the two parts
12929 of address and set to BASE and OFFSET, otherwise return false
12930 after clearing BASE and OFFSET. */
12933 extract_base_offset_in_addr (rtx mem
, rtx
*base
, rtx
*offset
)
12937 gcc_assert (MEM_P (mem
));
12939 addr
= XEXP (mem
, 0);
12944 *offset
= const0_rtx
;
12948 if (GET_CODE (addr
) == PLUS
12949 && REG_P (XEXP (addr
, 0)) && CONST_INT_P (XEXP (addr
, 1)))
12951 *base
= XEXP (addr
, 0);
12952 *offset
= XEXP (addr
, 1);
12957 *offset
= NULL_RTX
;
12962 /* Types for scheduling fusion. */
12963 enum sched_fusion_type
12965 SCHED_FUSION_NONE
= 0,
12966 SCHED_FUSION_LD_SIGN_EXTEND
,
12967 SCHED_FUSION_LD_ZERO_EXTEND
,
12973 /* If INSN is a load or store of address in the form of [base+offset],
12974 extract the two parts and set to BASE and OFFSET. Return scheduling
12975 fusion type this INSN is. */
12977 static enum sched_fusion_type
12978 fusion_load_store (rtx_insn
*insn
, rtx
*base
, rtx
*offset
)
12981 enum sched_fusion_type fusion
= SCHED_FUSION_LD
;
12983 gcc_assert (INSN_P (insn
));
12984 x
= PATTERN (insn
);
12985 if (GET_CODE (x
) != SET
)
12986 return SCHED_FUSION_NONE
;
12989 dest
= SET_DEST (x
);
12991 machine_mode dest_mode
= GET_MODE (dest
);
12993 if (!aarch64_mode_valid_for_sched_fusion_p (dest_mode
))
12994 return SCHED_FUSION_NONE
;
12996 if (GET_CODE (src
) == SIGN_EXTEND
)
12998 fusion
= SCHED_FUSION_LD_SIGN_EXTEND
;
12999 src
= XEXP (src
, 0);
13000 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
13001 return SCHED_FUSION_NONE
;
13003 else if (GET_CODE (src
) == ZERO_EXTEND
)
13005 fusion
= SCHED_FUSION_LD_ZERO_EXTEND
;
13006 src
= XEXP (src
, 0);
13007 if (GET_CODE (src
) != MEM
|| GET_MODE (src
) != SImode
)
13008 return SCHED_FUSION_NONE
;
13011 if (GET_CODE (src
) == MEM
&& REG_P (dest
))
13012 extract_base_offset_in_addr (src
, base
, offset
);
13013 else if (GET_CODE (dest
) == MEM
&& (REG_P (src
) || src
== const0_rtx
))
13015 fusion
= SCHED_FUSION_ST
;
13016 extract_base_offset_in_addr (dest
, base
, offset
);
13019 return SCHED_FUSION_NONE
;
13021 if (*base
== NULL_RTX
|| *offset
== NULL_RTX
)
13022 fusion
= SCHED_FUSION_NONE
;
13027 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
13029 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
13030 and PRI are only calculated for these instructions. For other instruction,
13031 FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
13032 type instruction fusion can be added by returning different priorities.
13034 It's important that irrelevant instructions get the largest FUSION_PRI. */
13037 aarch64_sched_fusion_priority (rtx_insn
*insn
, int max_pri
,
13038 int *fusion_pri
, int *pri
)
13042 enum sched_fusion_type fusion
;
13044 gcc_assert (INSN_P (insn
));
13047 fusion
= fusion_load_store (insn
, &base
, &offset
);
13048 if (fusion
== SCHED_FUSION_NONE
)
13055 /* Set FUSION_PRI according to fusion type and base register. */
13056 *fusion_pri
= tmp
- fusion
* FIRST_PSEUDO_REGISTER
- REGNO (base
);
13058 /* Calculate PRI. */
13061 /* INSN with smaller offset goes first. */
13062 off_val
= (int)(INTVAL (offset
));
13064 tmp
-= (off_val
& 0xfffff);
13066 tmp
+= ((- off_val
) & 0xfffff);
13072 /* Given OPERANDS of consecutive load/store, check if we can merge
13073 them into ldp/stp. LOAD is true if they are load instructions.
13074 MODE is the mode of memory operands. */
13077 aarch64_operands_ok_for_ldpstp (rtx
*operands
, bool load
,
13078 enum machine_mode mode
)
13080 HOST_WIDE_INT offval_1
, offval_2
, msize
;
13081 enum reg_class rclass_1
, rclass_2
;
13082 rtx mem_1
, mem_2
, reg_1
, reg_2
, base_1
, base_2
, offset_1
, offset_2
;
13086 mem_1
= operands
[1];
13087 mem_2
= operands
[3];
13088 reg_1
= operands
[0];
13089 reg_2
= operands
[2];
13090 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
));
13091 if (REGNO (reg_1
) == REGNO (reg_2
))
13096 mem_1
= operands
[0];
13097 mem_2
= operands
[2];
13098 reg_1
= operands
[1];
13099 reg_2
= operands
[3];
13102 /* The mems cannot be volatile. */
13103 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
))
13106 /* Check if the addresses are in the form of [base+offset]. */
13107 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
13108 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
13110 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
13111 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
13114 /* Check if the bases are same. */
13115 if (!rtx_equal_p (base_1
, base_2
))
13118 offval_1
= INTVAL (offset_1
);
13119 offval_2
= INTVAL (offset_2
);
13120 msize
= GET_MODE_SIZE (mode
);
13121 /* Check if the offsets are consecutive. */
13122 if (offval_1
!= (offval_2
+ msize
) && offval_2
!= (offval_1
+ msize
))
13125 /* Check if the addresses are clobbered by load. */
13128 if (reg_mentioned_p (reg_1
, mem_1
))
13131 /* In increasing order, the last load can clobber the address. */
13132 if (offval_1
> offval_2
&& reg_mentioned_p (reg_2
, mem_2
))
13136 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
13137 rclass_1
= FP_REGS
;
13139 rclass_1
= GENERAL_REGS
;
13141 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
13142 rclass_2
= FP_REGS
;
13144 rclass_2
= GENERAL_REGS
;
13146 /* Check if the registers are of same class. */
13147 if (rclass_1
!= rclass_2
)
13153 /* Given OPERANDS of consecutive load/store, check if we can merge
13154 them into ldp/stp by adjusting the offset. LOAD is true if they
13155 are load instructions. MODE is the mode of memory operands.
13157 Given below consecutive stores:
13159 str w1, [xb, 0x100]
13160 str w1, [xb, 0x104]
13161 str w1, [xb, 0x108]
13162 str w1, [xb, 0x10c]
13164 Though the offsets are out of the range supported by stp, we can
13165 still pair them after adjusting the offset, like:
13167 add scratch, xb, 0x100
13168 stp w1, w1, [scratch]
13169 stp w1, w1, [scratch, 0x8]
13171 The peephole patterns detecting this opportunity should guarantee
13172 the scratch register is avaliable. */
13175 aarch64_operands_adjust_ok_for_ldpstp (rtx
*operands
, bool load
,
13176 enum machine_mode mode
)
13178 enum reg_class rclass_1
, rclass_2
, rclass_3
, rclass_4
;
13179 HOST_WIDE_INT offval_1
, offval_2
, offval_3
, offval_4
, msize
;
13180 rtx mem_1
, mem_2
, mem_3
, mem_4
, reg_1
, reg_2
, reg_3
, reg_4
;
13181 rtx base_1
, base_2
, base_3
, base_4
, offset_1
, offset_2
, offset_3
, offset_4
;
13185 reg_1
= operands
[0];
13186 mem_1
= operands
[1];
13187 reg_2
= operands
[2];
13188 mem_2
= operands
[3];
13189 reg_3
= operands
[4];
13190 mem_3
= operands
[5];
13191 reg_4
= operands
[6];
13192 mem_4
= operands
[7];
13193 gcc_assert (REG_P (reg_1
) && REG_P (reg_2
)
13194 && REG_P (reg_3
) && REG_P (reg_4
));
13195 if (REGNO (reg_1
) == REGNO (reg_2
) || REGNO (reg_3
) == REGNO (reg_4
))
13200 mem_1
= operands
[0];
13201 reg_1
= operands
[1];
13202 mem_2
= operands
[2];
13203 reg_2
= operands
[3];
13204 mem_3
= operands
[4];
13205 reg_3
= operands
[5];
13206 mem_4
= operands
[6];
13207 reg_4
= operands
[7];
13209 /* Skip if memory operand is by itslef valid for ldp/stp. */
13210 if (!MEM_P (mem_1
) || aarch64_mem_pair_operand (mem_1
, mode
))
13213 /* The mems cannot be volatile. */
13214 if (MEM_VOLATILE_P (mem_1
) || MEM_VOLATILE_P (mem_2
)
13215 || MEM_VOLATILE_P (mem_3
) ||MEM_VOLATILE_P (mem_4
))
13218 /* Check if the addresses are in the form of [base+offset]. */
13219 extract_base_offset_in_addr (mem_1
, &base_1
, &offset_1
);
13220 if (base_1
== NULL_RTX
|| offset_1
== NULL_RTX
)
13222 extract_base_offset_in_addr (mem_2
, &base_2
, &offset_2
);
13223 if (base_2
== NULL_RTX
|| offset_2
== NULL_RTX
)
13225 extract_base_offset_in_addr (mem_3
, &base_3
, &offset_3
);
13226 if (base_3
== NULL_RTX
|| offset_3
== NULL_RTX
)
13228 extract_base_offset_in_addr (mem_4
, &base_4
, &offset_4
);
13229 if (base_4
== NULL_RTX
|| offset_4
== NULL_RTX
)
13232 /* Check if the bases are same. */
13233 if (!rtx_equal_p (base_1
, base_2
)
13234 || !rtx_equal_p (base_2
, base_3
)
13235 || !rtx_equal_p (base_3
, base_4
))
13238 offval_1
= INTVAL (offset_1
);
13239 offval_2
= INTVAL (offset_2
);
13240 offval_3
= INTVAL (offset_3
);
13241 offval_4
= INTVAL (offset_4
);
13242 msize
= GET_MODE_SIZE (mode
);
13243 /* Check if the offsets are consecutive. */
13244 if ((offval_1
!= (offval_2
+ msize
)
13245 || offval_1
!= (offval_3
+ msize
* 2)
13246 || offval_1
!= (offval_4
+ msize
* 3))
13247 && (offval_4
!= (offval_3
+ msize
)
13248 || offval_4
!= (offval_2
+ msize
* 2)
13249 || offval_4
!= (offval_1
+ msize
* 3)))
13252 /* Check if the addresses are clobbered by load. */
13255 if (reg_mentioned_p (reg_1
, mem_1
)
13256 || reg_mentioned_p (reg_2
, mem_2
)
13257 || reg_mentioned_p (reg_3
, mem_3
))
13260 /* In increasing order, the last load can clobber the address. */
13261 if (offval_1
> offval_2
&& reg_mentioned_p (reg_4
, mem_4
))
13265 if (REG_P (reg_1
) && FP_REGNUM_P (REGNO (reg_1
)))
13266 rclass_1
= FP_REGS
;
13268 rclass_1
= GENERAL_REGS
;
13270 if (REG_P (reg_2
) && FP_REGNUM_P (REGNO (reg_2
)))
13271 rclass_2
= FP_REGS
;
13273 rclass_2
= GENERAL_REGS
;
13275 if (REG_P (reg_3
) && FP_REGNUM_P (REGNO (reg_3
)))
13276 rclass_3
= FP_REGS
;
13278 rclass_3
= GENERAL_REGS
;
13280 if (REG_P (reg_4
) && FP_REGNUM_P (REGNO (reg_4
)))
13281 rclass_4
= FP_REGS
;
13283 rclass_4
= GENERAL_REGS
;
13285 /* Check if the registers are of same class. */
13286 if (rclass_1
!= rclass_2
|| rclass_2
!= rclass_3
|| rclass_3
!= rclass_4
)
13292 /* Given OPERANDS of consecutive load/store, this function pairs them
13293 into ldp/stp after adjusting the offset. It depends on the fact
13294 that addresses of load/store instructions are in increasing order.
13295 MODE is the mode of memory operands. CODE is the rtl operator
13296 which should be applied to all memory operands, it's SIGN_EXTEND,
13297 ZERO_EXTEND or UNKNOWN. */
13300 aarch64_gen_adjusted_ldpstp (rtx
*operands
, bool load
,
13301 enum machine_mode mode
, RTX_CODE code
)
13303 rtx base
, offset
, t1
, t2
;
13304 rtx mem_1
, mem_2
, mem_3
, mem_4
;
13305 HOST_WIDE_INT off_val
, abs_off
, adj_off
, new_off
, stp_off_limit
, msize
;
13309 mem_1
= operands
[1];
13310 mem_2
= operands
[3];
13311 mem_3
= operands
[5];
13312 mem_4
= operands
[7];
13316 mem_1
= operands
[0];
13317 mem_2
= operands
[2];
13318 mem_3
= operands
[4];
13319 mem_4
= operands
[6];
13320 gcc_assert (code
== UNKNOWN
);
13323 extract_base_offset_in_addr (mem_1
, &base
, &offset
);
13324 gcc_assert (base
!= NULL_RTX
&& offset
!= NULL_RTX
);
13326 /* Adjust offset thus it can fit in ldp/stp instruction. */
13327 msize
= GET_MODE_SIZE (mode
);
13328 stp_off_limit
= msize
* 0x40;
13329 off_val
= INTVAL (offset
);
13330 abs_off
= (off_val
< 0) ? -off_val
: off_val
;
13331 new_off
= abs_off
% stp_off_limit
;
13332 adj_off
= abs_off
- new_off
;
13334 /* Further adjust to make sure all offsets are OK. */
13335 if ((new_off
+ msize
* 2) >= stp_off_limit
)
13337 adj_off
+= stp_off_limit
;
13338 new_off
-= stp_off_limit
;
13341 /* Make sure the adjustment can be done with ADD/SUB instructions. */
13342 if (adj_off
>= 0x1000)
13347 adj_off
= -adj_off
;
13348 new_off
= -new_off
;
13351 /* Create new memory references. */
13352 mem_1
= change_address (mem_1
, VOIDmode
,
13353 plus_constant (DImode
, operands
[8], new_off
));
13355 /* Check if the adjusted address is OK for ldp/stp. */
13356 if (!aarch64_mem_pair_operand (mem_1
, mode
))
13359 msize
= GET_MODE_SIZE (mode
);
13360 mem_2
= change_address (mem_2
, VOIDmode
,
13361 plus_constant (DImode
,
13364 mem_3
= change_address (mem_3
, VOIDmode
,
13365 plus_constant (DImode
,
13367 new_off
+ msize
* 2));
13368 mem_4
= change_address (mem_4
, VOIDmode
,
13369 plus_constant (DImode
,
13371 new_off
+ msize
* 3));
13373 if (code
== ZERO_EXTEND
)
13375 mem_1
= gen_rtx_ZERO_EXTEND (DImode
, mem_1
);
13376 mem_2
= gen_rtx_ZERO_EXTEND (DImode
, mem_2
);
13377 mem_3
= gen_rtx_ZERO_EXTEND (DImode
, mem_3
);
13378 mem_4
= gen_rtx_ZERO_EXTEND (DImode
, mem_4
);
13380 else if (code
== SIGN_EXTEND
)
13382 mem_1
= gen_rtx_SIGN_EXTEND (DImode
, mem_1
);
13383 mem_2
= gen_rtx_SIGN_EXTEND (DImode
, mem_2
);
13384 mem_3
= gen_rtx_SIGN_EXTEND (DImode
, mem_3
);
13385 mem_4
= gen_rtx_SIGN_EXTEND (DImode
, mem_4
);
13390 operands
[1] = mem_1
;
13391 operands
[3] = mem_2
;
13392 operands
[5] = mem_3
;
13393 operands
[7] = mem_4
;
13397 operands
[0] = mem_1
;
13398 operands
[2] = mem_2
;
13399 operands
[4] = mem_3
;
13400 operands
[6] = mem_4
;
13403 /* Emit adjusting instruction. */
13404 emit_insn (gen_rtx_SET (operands
[8], plus_constant (DImode
, base
, adj_off
)));
13405 /* Emit ldp/stp instructions. */
13406 t1
= gen_rtx_SET (operands
[0], operands
[1]);
13407 t2
= gen_rtx_SET (operands
[2], operands
[3]);
13408 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
13409 t1
= gen_rtx_SET (operands
[4], operands
[5]);
13410 t2
= gen_rtx_SET (operands
[6], operands
[7]);
13411 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, t1
, t2
)));
13415 /* Return 1 if pseudo register should be created and used to hold
13416 GOT address for PIC code. */
13419 aarch64_use_pseudo_pic_reg (void)
13421 return aarch64_cmodel
== AARCH64_CMODEL_SMALL_SPIC
;
13424 /* Implement TARGET_UNSPEC_MAY_TRAP_P. */
13427 aarch64_unspec_may_trap_p (const_rtx x
, unsigned flags
)
13429 switch (XINT (x
, 1))
13431 case UNSPEC_GOTSMALLPIC
:
13432 case UNSPEC_GOTSMALLPIC28K
:
13433 case UNSPEC_GOTTINYPIC
:
13439 return default_unspec_may_trap_p (x
, flags
);
13443 /* If X is a positive CONST_DOUBLE with a value that is a power of 2
13444 return the log2 of that value. Otherwise return -1. */
13447 aarch64_fpconst_pow_of_2 (rtx x
)
13449 const REAL_VALUE_TYPE
*r
;
13451 if (!CONST_DOUBLE_P (x
))
13454 r
= CONST_DOUBLE_REAL_VALUE (x
);
13456 if (REAL_VALUE_NEGATIVE (*r
)
13457 || REAL_VALUE_ISNAN (*r
)
13458 || REAL_VALUE_ISINF (*r
)
13459 || !real_isinteger (r
, DFmode
))
13462 return exact_log2 (real_to_integer (r
));
13465 /* If X is a vector of equal CONST_DOUBLE values and that value is
13466 Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */
13469 aarch64_vec_fpconst_pow_of_2 (rtx x
)
13471 if (GET_CODE (x
) != CONST_VECTOR
)
13474 if (GET_MODE_CLASS (GET_MODE (x
)) != MODE_VECTOR_FLOAT
)
13477 int firstval
= aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x
, 0));
13481 for (int i
= 1; i
< CONST_VECTOR_NUNITS (x
); i
++)
13482 if (aarch64_fpconst_pow_of_2 (CONST_VECTOR_ELT (x
, i
)) != firstval
)
13488 /* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
13490 aarch64_promoted_type (const_tree t
)
13492 if (SCALAR_FLOAT_TYPE_P (t
) && TYPE_PRECISION (t
) == 16)
13493 return float_type_node
;
13496 #undef TARGET_ADDRESS_COST
13497 #define TARGET_ADDRESS_COST aarch64_address_cost
13499 /* This hook will determines whether unnamed bitfields affect the alignment
13500 of the containing structure. The hook returns true if the structure
13501 should inherit the alignment requirements of an unnamed bitfield's
13503 #undef TARGET_ALIGN_ANON_BITFIELD
13504 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
13506 #undef TARGET_ASM_ALIGNED_DI_OP
13507 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
13509 #undef TARGET_ASM_ALIGNED_HI_OP
13510 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
13512 #undef TARGET_ASM_ALIGNED_SI_OP
13513 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
13515 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
13516 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
13517 hook_bool_const_tree_hwi_hwi_const_tree_true
13519 #undef TARGET_ASM_OUTPUT_MI_THUNK
13520 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
13522 #undef TARGET_ASM_SELECT_RTX_SECTION
13523 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
13525 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
13526 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
13528 #undef TARGET_BUILD_BUILTIN_VA_LIST
13529 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
13531 #undef TARGET_CALLEE_COPIES
13532 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
13534 #undef TARGET_CAN_ELIMINATE
13535 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
13537 #undef TARGET_CAN_INLINE_P
13538 #define TARGET_CAN_INLINE_P aarch64_can_inline_p
13540 #undef TARGET_CANNOT_FORCE_CONST_MEM
13541 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
13543 #undef TARGET_CONDITIONAL_REGISTER_USAGE
13544 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
13546 /* Only the least significant bit is used for initialization guard
13548 #undef TARGET_CXX_GUARD_MASK_BIT
13549 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
13551 #undef TARGET_C_MODE_FOR_SUFFIX
13552 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
13554 #ifdef TARGET_BIG_ENDIAN_DEFAULT
13555 #undef TARGET_DEFAULT_TARGET_FLAGS
13556 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
13559 #undef TARGET_CLASS_MAX_NREGS
13560 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
13562 #undef TARGET_BUILTIN_DECL
13563 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
13565 #undef TARGET_BUILTIN_RECIPROCAL
13566 #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
13568 #undef TARGET_EXPAND_BUILTIN
13569 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
13571 #undef TARGET_EXPAND_BUILTIN_VA_START
13572 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
13574 #undef TARGET_FOLD_BUILTIN
13575 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
13577 #undef TARGET_FUNCTION_ARG
13578 #define TARGET_FUNCTION_ARG aarch64_function_arg
13580 #undef TARGET_FUNCTION_ARG_ADVANCE
13581 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
13583 #undef TARGET_FUNCTION_ARG_BOUNDARY
13584 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
13586 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
13587 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
13589 #undef TARGET_FUNCTION_VALUE
13590 #define TARGET_FUNCTION_VALUE aarch64_function_value
13592 #undef TARGET_FUNCTION_VALUE_REGNO_P
13593 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
13595 #undef TARGET_FRAME_POINTER_REQUIRED
13596 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
13598 #undef TARGET_GIMPLE_FOLD_BUILTIN
13599 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
13601 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
13602 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
13604 #undef TARGET_INIT_BUILTINS
13605 #define TARGET_INIT_BUILTINS aarch64_init_builtins
13607 #undef TARGET_LEGITIMATE_ADDRESS_P
13608 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
13610 #undef TARGET_LEGITIMATE_CONSTANT_P
13611 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
13613 #undef TARGET_LIBGCC_CMP_RETURN_MODE
13614 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
13616 #undef TARGET_LRA_P
13617 #define TARGET_LRA_P hook_bool_void_true
13619 #undef TARGET_MANGLE_TYPE
13620 #define TARGET_MANGLE_TYPE aarch64_mangle_type
13622 #undef TARGET_MEMORY_MOVE_COST
13623 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
13625 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
13626 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
13628 #undef TARGET_MUST_PASS_IN_STACK
13629 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
13631 /* This target hook should return true if accesses to volatile bitfields
13632 should use the narrowest mode possible. It should return false if these
13633 accesses should use the bitfield container type. */
13634 #undef TARGET_NARROW_VOLATILE_BITFIELD
13635 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
13637 #undef TARGET_OPTION_OVERRIDE
13638 #define TARGET_OPTION_OVERRIDE aarch64_override_options
13640 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
13641 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
13642 aarch64_override_options_after_change
13644 #undef TARGET_OPTION_SAVE
13645 #define TARGET_OPTION_SAVE aarch64_option_save
13647 #undef TARGET_OPTION_RESTORE
13648 #define TARGET_OPTION_RESTORE aarch64_option_restore
13650 #undef TARGET_OPTION_PRINT
13651 #define TARGET_OPTION_PRINT aarch64_option_print
13653 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
13654 #define TARGET_OPTION_VALID_ATTRIBUTE_P aarch64_option_valid_attribute_p
13656 #undef TARGET_SET_CURRENT_FUNCTION
13657 #define TARGET_SET_CURRENT_FUNCTION aarch64_set_current_function
13659 #undef TARGET_PASS_BY_REFERENCE
13660 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
13662 #undef TARGET_PREFERRED_RELOAD_CLASS
13663 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
13665 #undef TARGET_SCHED_REASSOCIATION_WIDTH
13666 #define TARGET_SCHED_REASSOCIATION_WIDTH aarch64_reassociation_width
13668 #undef TARGET_PROMOTED_TYPE
13669 #define TARGET_PROMOTED_TYPE aarch64_promoted_type
13671 #undef TARGET_SECONDARY_RELOAD
13672 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
13674 #undef TARGET_SHIFT_TRUNCATION_MASK
13675 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
13677 #undef TARGET_SETUP_INCOMING_VARARGS
13678 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
13680 #undef TARGET_STRUCT_VALUE_RTX
13681 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
13683 #undef TARGET_REGISTER_MOVE_COST
13684 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
13686 #undef TARGET_RETURN_IN_MEMORY
13687 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
13689 #undef TARGET_RETURN_IN_MSB
13690 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
13692 #undef TARGET_RTX_COSTS
13693 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
13695 #undef TARGET_SCHED_ISSUE_RATE
13696 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
13698 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
13699 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
13700 aarch64_sched_first_cycle_multipass_dfa_lookahead
13702 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
13703 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
13704 aarch64_first_cycle_multipass_dfa_lookahead_guard
13706 #undef TARGET_TRAMPOLINE_INIT
13707 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
13709 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
13710 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
13712 #undef TARGET_VECTOR_MODE_SUPPORTED_P
13713 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
13715 #undef TARGET_ARRAY_MODE_SUPPORTED_P
13716 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
13718 #undef TARGET_VECTORIZE_ADD_STMT_COST
13719 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
13721 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
13722 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
13723 aarch64_builtin_vectorization_cost
13725 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
13726 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
13728 #undef TARGET_VECTORIZE_BUILTINS
13729 #define TARGET_VECTORIZE_BUILTINS
13731 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
13732 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
13733 aarch64_builtin_vectorized_function
13735 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
13736 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
13737 aarch64_autovectorize_vector_sizes
13739 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
13740 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
13741 aarch64_atomic_assign_expand_fenv
13743 /* Section anchor support. */
13745 #undef TARGET_MIN_ANCHOR_OFFSET
13746 #define TARGET_MIN_ANCHOR_OFFSET -256
13748 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
13749 byte offset; we can do much more for larger data types, but have no way
13750 to determine the size of the access. We assume accesses are aligned. */
13751 #undef TARGET_MAX_ANCHOR_OFFSET
13752 #define TARGET_MAX_ANCHOR_OFFSET 4095
13754 #undef TARGET_VECTOR_ALIGNMENT
13755 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
13757 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
13758 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
13759 aarch64_simd_vector_alignment_reachable
13761 /* vec_perm support. */
13763 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
13764 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
13765 aarch64_vectorize_vec_perm_const_ok
13767 #undef TARGET_INIT_LIBFUNCS
13768 #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
13770 #undef TARGET_FIXED_CONDITION_CODE_REGS
13771 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
13773 #undef TARGET_FLAGS_REGNUM
13774 #define TARGET_FLAGS_REGNUM CC_REGNUM
13776 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
13777 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
13779 #undef TARGET_ASAN_SHADOW_OFFSET
13780 #define TARGET_ASAN_SHADOW_OFFSET aarch64_asan_shadow_offset
13782 #undef TARGET_LEGITIMIZE_ADDRESS
13783 #define TARGET_LEGITIMIZE_ADDRESS aarch64_legitimize_address
13785 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
13786 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
13787 aarch64_use_by_pieces_infrastructure_p
13789 #undef TARGET_CAN_USE_DOLOOP_P
13790 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
13792 #undef TARGET_SCHED_MACRO_FUSION_P
13793 #define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
13795 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
13796 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
13798 #undef TARGET_SCHED_FUSION_PRIORITY
13799 #define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
13801 #undef TARGET_UNSPEC_MAY_TRAP_P
13802 #define TARGET_UNSPEC_MAY_TRAP_P aarch64_unspec_may_trap_p
13804 #undef TARGET_USE_PSEUDO_PIC_REG
13805 #define TARGET_USE_PSEUDO_PIC_REG aarch64_use_pseudo_pic_reg
13807 #undef TARGET_PRINT_OPERAND
13808 #define TARGET_PRINT_OPERAND aarch64_print_operand
13810 #undef TARGET_PRINT_OPERAND_ADDRESS
13811 #define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address
13813 struct gcc_target targetm
= TARGET_INITIALIZER
;
13815 #include "gt-aarch64.h"