[AArch64] Unify vector and core register save/restore code.
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blobe3a0c570b762ae216c13efc721574a0a20b387e4
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
66 #include "dumpfile.h"
67 #include "builtins.h"
69 /* Defined for convenience. */
70 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
72 /* Classifies an address.
74 ADDRESS_REG_IMM
75 A simple base register plus immediate offset.
77 ADDRESS_REG_WB
78 A base register indexed by immediate offset with writeback.
80 ADDRESS_REG_REG
81 A base register indexed by (optionally scaled) register.
83 ADDRESS_REG_UXTW
84 A base register indexed by (optionally scaled) zero-extended register.
86 ADDRESS_REG_SXTW
87 A base register indexed by (optionally scaled) sign-extended register.
89 ADDRESS_LO_SUM
90 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 ADDRESS_SYMBOLIC:
93 A constant symbolic address, in pc-relative literal pool. */
95 enum aarch64_address_type {
96 ADDRESS_REG_IMM,
97 ADDRESS_REG_WB,
98 ADDRESS_REG_REG,
99 ADDRESS_REG_UXTW,
100 ADDRESS_REG_SXTW,
101 ADDRESS_LO_SUM,
102 ADDRESS_SYMBOLIC
105 struct aarch64_address_info {
106 enum aarch64_address_type type;
107 rtx base;
108 rtx offset;
109 int shift;
110 enum aarch64_symbol_type symbol_type;
113 struct simd_immediate_info
115 rtx value;
116 int shift;
117 int element_width;
118 bool mvn;
119 bool msl;
122 /* The current code model. */
123 enum aarch64_code_model aarch64_cmodel;
125 #ifdef HAVE_AS_TLS
126 #undef TARGET_HAVE_TLS
127 #define TARGET_HAVE_TLS 1
128 #endif
130 static bool aarch64_lra_p (void);
131 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
132 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
133 const_tree,
134 enum machine_mode *, int *,
135 bool *);
136 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
138 static void aarch64_override_options_after_change (void);
139 static bool aarch64_vector_mode_supported_p (enum machine_mode);
140 static unsigned bit_count (unsigned HOST_WIDE_INT);
141 static bool aarch64_const_vec_all_same_int_p (rtx,
142 HOST_WIDE_INT, HOST_WIDE_INT);
144 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
145 const unsigned char *sel);
146 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
148 /* The processor for which instructions should be scheduled. */
149 enum aarch64_processor aarch64_tune = cortexa53;
151 /* The current tuning set. */
152 const struct tune_params *aarch64_tune_params;
154 /* Mask to specify which instructions we are allowed to generate. */
155 unsigned long aarch64_isa_flags = 0;
157 /* Mask to specify which instruction scheduling options should be used. */
158 unsigned long aarch64_tune_flags = 0;
160 /* Tuning parameters. */
162 #if HAVE_DESIGNATED_INITIALIZERS
163 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164 #else
165 #define NAMED_PARAM(NAME, VAL) (VAL)
166 #endif
168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169 __extension__
170 #endif
172 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
173 __extension__
174 #endif
175 static const struct cpu_addrcost_table generic_addrcost_table =
177 #if HAVE_DESIGNATED_INITIALIZERS
178 .addr_scale_costs =
179 #endif
181 NAMED_PARAM (qi, 0),
182 NAMED_PARAM (hi, 0),
183 NAMED_PARAM (si, 0),
184 NAMED_PARAM (ti, 0),
186 NAMED_PARAM (pre_modify, 0),
187 NAMED_PARAM (post_modify, 0),
188 NAMED_PARAM (register_offset, 0),
189 NAMED_PARAM (register_extend, 0),
190 NAMED_PARAM (imm_offset, 0)
193 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
194 __extension__
195 #endif
196 static const struct cpu_addrcost_table cortexa57_addrcost_table =
198 #if HAVE_DESIGNATED_INITIALIZERS
199 .addr_scale_costs =
200 #endif
202 NAMED_PARAM (qi, 0),
203 NAMED_PARAM (hi, 1),
204 NAMED_PARAM (si, 0),
205 NAMED_PARAM (ti, 1),
207 NAMED_PARAM (pre_modify, 0),
208 NAMED_PARAM (post_modify, 0),
209 NAMED_PARAM (register_offset, 0),
210 NAMED_PARAM (register_extend, 0),
211 NAMED_PARAM (imm_offset, 0),
214 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
215 __extension__
216 #endif
217 static const struct cpu_regmove_cost generic_regmove_cost =
219 NAMED_PARAM (GP2GP, 1),
220 NAMED_PARAM (GP2FP, 2),
221 NAMED_PARAM (FP2GP, 2),
222 /* We currently do not provide direct support for TFmode Q->Q move.
223 Therefore we need to raise the cost above 2 in order to have
224 reload handle the situation. */
225 NAMED_PARAM (FP2FP, 4)
228 /* Generic costs for vector insn classes. */
229 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
230 __extension__
231 #endif
232 static const struct cpu_vector_cost generic_vector_cost =
234 NAMED_PARAM (scalar_stmt_cost, 1),
235 NAMED_PARAM (scalar_load_cost, 1),
236 NAMED_PARAM (scalar_store_cost, 1),
237 NAMED_PARAM (vec_stmt_cost, 1),
238 NAMED_PARAM (vec_to_scalar_cost, 1),
239 NAMED_PARAM (scalar_to_vec_cost, 1),
240 NAMED_PARAM (vec_align_load_cost, 1),
241 NAMED_PARAM (vec_unalign_load_cost, 1),
242 NAMED_PARAM (vec_unalign_store_cost, 1),
243 NAMED_PARAM (vec_store_cost, 1),
244 NAMED_PARAM (cond_taken_branch_cost, 3),
245 NAMED_PARAM (cond_not_taken_branch_cost, 1)
248 /* Generic costs for vector insn classes. */
249 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
250 __extension__
251 #endif
252 static const struct cpu_vector_cost cortexa57_vector_cost =
254 NAMED_PARAM (scalar_stmt_cost, 1),
255 NAMED_PARAM (scalar_load_cost, 4),
256 NAMED_PARAM (scalar_store_cost, 1),
257 NAMED_PARAM (vec_stmt_cost, 3),
258 NAMED_PARAM (vec_to_scalar_cost, 8),
259 NAMED_PARAM (scalar_to_vec_cost, 8),
260 NAMED_PARAM (vec_align_load_cost, 5),
261 NAMED_PARAM (vec_unalign_load_cost, 5),
262 NAMED_PARAM (vec_unalign_store_cost, 1),
263 NAMED_PARAM (vec_store_cost, 1),
264 NAMED_PARAM (cond_taken_branch_cost, 1),
265 NAMED_PARAM (cond_not_taken_branch_cost, 1)
268 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
269 __extension__
270 #endif
271 static const struct tune_params generic_tunings =
273 &cortexa57_extra_costs,
274 &generic_addrcost_table,
275 &generic_regmove_cost,
276 &generic_vector_cost,
277 NAMED_PARAM (memmov_cost, 4),
278 NAMED_PARAM (issue_rate, 2)
281 static const struct tune_params cortexa53_tunings =
283 &cortexa53_extra_costs,
284 &generic_addrcost_table,
285 &generic_regmove_cost,
286 &generic_vector_cost,
287 NAMED_PARAM (memmov_cost, 4),
288 NAMED_PARAM (issue_rate, 2)
291 static const struct tune_params cortexa57_tunings =
293 &cortexa57_extra_costs,
294 &cortexa57_addrcost_table,
295 &generic_regmove_cost,
296 &cortexa57_vector_cost,
297 NAMED_PARAM (memmov_cost, 4),
298 NAMED_PARAM (issue_rate, 3)
301 /* A processor implementing AArch64. */
302 struct processor
304 const char *const name;
305 enum aarch64_processor core;
306 const char *arch;
307 const unsigned long flags;
308 const struct tune_params *const tune;
311 /* Processor cores implementing AArch64. */
312 static const struct processor all_cores[] =
314 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
315 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
316 #include "aarch64-cores.def"
317 #undef AARCH64_CORE
318 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
319 {NULL, aarch64_none, NULL, 0, NULL}
322 /* Architectures implementing AArch64. */
323 static const struct processor all_architectures[] =
325 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
326 {NAME, CORE, #ARCH, FLAGS, NULL},
327 #include "aarch64-arches.def"
328 #undef AARCH64_ARCH
329 {NULL, aarch64_none, NULL, 0, NULL}
332 /* Target specification. These are populated as commandline arguments
333 are processed, or NULL if not specified. */
334 static const struct processor *selected_arch;
335 static const struct processor *selected_cpu;
336 static const struct processor *selected_tune;
338 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
340 /* An ISA extension in the co-processor and main instruction set space. */
341 struct aarch64_option_extension
343 const char *const name;
344 const unsigned long flags_on;
345 const unsigned long flags_off;
348 /* ISA extensions in AArch64. */
349 static const struct aarch64_option_extension all_extensions[] =
351 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
352 {NAME, FLAGS_ON, FLAGS_OFF},
353 #include "aarch64-option-extensions.def"
354 #undef AARCH64_OPT_EXTENSION
355 {NULL, 0, 0}
358 /* Used to track the size of an address when generating a pre/post
359 increment address. */
360 static enum machine_mode aarch64_memory_reference_mode;
362 /* Used to force GTY into this file. */
363 static GTY(()) int gty_dummy;
365 /* A table of valid AArch64 "bitmask immediate" values for
366 logical instructions. */
368 #define AARCH64_NUM_BITMASKS 5334
369 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
371 typedef enum aarch64_cond_code
373 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
374 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
375 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
377 aarch64_cc;
379 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
381 /* The condition codes of the processor, and the inverse function. */
382 static const char * const aarch64_condition_codes[] =
384 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
385 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
388 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
389 unsigned
390 aarch64_dbx_register_number (unsigned regno)
392 if (GP_REGNUM_P (regno))
393 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
394 else if (regno == SP_REGNUM)
395 return AARCH64_DWARF_SP;
396 else if (FP_REGNUM_P (regno))
397 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
399 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
400 equivalent DWARF register. */
401 return DWARF_FRAME_REGISTERS;
404 /* Return TRUE if MODE is any of the large INT modes. */
405 static bool
406 aarch64_vect_struct_mode_p (enum machine_mode mode)
408 return mode == OImode || mode == CImode || mode == XImode;
411 /* Return TRUE if MODE is any of the vector modes. */
412 static bool
413 aarch64_vector_mode_p (enum machine_mode mode)
415 return aarch64_vector_mode_supported_p (mode)
416 || aarch64_vect_struct_mode_p (mode);
419 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420 static bool
421 aarch64_array_mode_supported_p (enum machine_mode mode,
422 unsigned HOST_WIDE_INT nelems)
424 if (TARGET_SIMD
425 && AARCH64_VALID_SIMD_QREG_MODE (mode)
426 && (nelems >= 2 && nelems <= 4))
427 return true;
429 return false;
432 /* Implement HARD_REGNO_NREGS. */
435 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
437 switch (aarch64_regno_regclass (regno))
439 case FP_REGS:
440 case FP_LO_REGS:
441 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
442 default:
443 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
445 gcc_unreachable ();
448 /* Implement HARD_REGNO_MODE_OK. */
451 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
453 if (GET_MODE_CLASS (mode) == MODE_CC)
454 return regno == CC_REGNUM;
456 if (regno == SP_REGNUM)
457 /* The purpose of comparing with ptr_mode is to support the
458 global register variable associated with the stack pointer
459 register via the syntax of asm ("wsp") in ILP32. */
460 return mode == Pmode || mode == ptr_mode;
462 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
463 return mode == Pmode;
465 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
466 return 1;
468 if (FP_REGNUM_P (regno))
470 if (aarch64_vect_struct_mode_p (mode))
471 return
472 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
473 else
474 return 1;
477 return 0;
480 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
481 enum machine_mode
482 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
483 enum machine_mode mode)
485 /* Handle modes that fit within single registers. */
486 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
488 if (GET_MODE_SIZE (mode) >= 4)
489 return mode;
490 else
491 return SImode;
493 /* Fall back to generic for multi-reg and very large modes. */
494 else
495 return choose_hard_reg_mode (regno, nregs, false);
498 /* Return true if calls to DECL should be treated as
499 long-calls (ie called via a register). */
500 static bool
501 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
503 return false;
506 /* Return true if calls to symbol-ref SYM should be treated as
507 long-calls (ie called via a register). */
508 bool
509 aarch64_is_long_call_p (rtx sym)
511 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
514 /* Return true if the offsets to a zero/sign-extract operation
515 represent an expression that matches an extend operation. The
516 operands represent the paramters from
518 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
519 bool
520 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
521 rtx extract_imm)
523 HOST_WIDE_INT mult_val, extract_val;
525 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
526 return false;
528 mult_val = INTVAL (mult_imm);
529 extract_val = INTVAL (extract_imm);
531 if (extract_val > 8
532 && extract_val < GET_MODE_BITSIZE (mode)
533 && exact_log2 (extract_val & ~7) > 0
534 && (extract_val & 7) <= 4
535 && mult_val == (1 << (extract_val & 7)))
536 return true;
538 return false;
541 /* Emit an insn that's a simple single-set. Both the operands must be
542 known to be valid. */
543 inline static rtx
544 emit_set_insn (rtx x, rtx y)
546 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
549 /* X and Y are two things to compare using CODE. Emit the compare insn and
550 return the rtx for register 0 in the proper mode. */
552 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
554 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
555 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
557 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
558 return cc_reg;
561 /* Build the SYMBOL_REF for __tls_get_addr. */
563 static GTY(()) rtx tls_get_addr_libfunc;
566 aarch64_tls_get_addr (void)
568 if (!tls_get_addr_libfunc)
569 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
570 return tls_get_addr_libfunc;
573 /* Return the TLS model to use for ADDR. */
575 static enum tls_model
576 tls_symbolic_operand_type (rtx addr)
578 enum tls_model tls_kind = TLS_MODEL_NONE;
579 rtx sym, addend;
581 if (GET_CODE (addr) == CONST)
583 split_const (addr, &sym, &addend);
584 if (GET_CODE (sym) == SYMBOL_REF)
585 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
587 else if (GET_CODE (addr) == SYMBOL_REF)
588 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
590 return tls_kind;
593 /* We'll allow lo_sum's in addresses in our legitimate addresses
594 so that combine would take care of combining addresses where
595 necessary, but for generation purposes, we'll generate the address
596 as :
597 RTL Absolute
598 tmp = hi (symbol_ref); adrp x1, foo
599 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
602 PIC TLS
603 adrp x1, :got:foo adrp tmp, :tlsgd:foo
604 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
605 bl __tls_get_addr
608 Load TLS symbol, depending on TLS mechanism and TLS access model.
610 Global Dynamic - Traditional TLS:
611 adrp tmp, :tlsgd:imm
612 add dest, tmp, #:tlsgd_lo12:imm
613 bl __tls_get_addr
615 Global Dynamic - TLS Descriptors:
616 adrp dest, :tlsdesc:imm
617 ldr tmp, [dest, #:tlsdesc_lo12:imm]
618 add dest, dest, #:tlsdesc_lo12:imm
619 blr tmp
620 mrs tp, tpidr_el0
621 add dest, dest, tp
623 Initial Exec:
624 mrs tp, tpidr_el0
625 adrp tmp, :gottprel:imm
626 ldr dest, [tmp, #:gottprel_lo12:imm]
627 add dest, dest, tp
629 Local Exec:
630 mrs tp, tpidr_el0
631 add t0, tp, #:tprel_hi12:imm
632 add t0, #:tprel_lo12_nc:imm
635 static void
636 aarch64_load_symref_appropriately (rtx dest, rtx imm,
637 enum aarch64_symbol_type type)
639 switch (type)
641 case SYMBOL_SMALL_ABSOLUTE:
643 /* In ILP32, the mode of dest can be either SImode or DImode. */
644 rtx tmp_reg = dest;
645 enum machine_mode mode = GET_MODE (dest);
647 gcc_assert (mode == Pmode || mode == ptr_mode);
649 if (can_create_pseudo_p ())
650 tmp_reg = gen_reg_rtx (mode);
652 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
653 emit_insn (gen_add_losym (dest, tmp_reg, imm));
654 return;
657 case SYMBOL_TINY_ABSOLUTE:
658 emit_insn (gen_rtx_SET (Pmode, dest, imm));
659 return;
661 case SYMBOL_SMALL_GOT:
663 /* In ILP32, the mode of dest can be either SImode or DImode,
664 while the got entry is always of SImode size. The mode of
665 dest depends on how dest is used: if dest is assigned to a
666 pointer (e.g. in the memory), it has SImode; it may have
667 DImode if dest is dereferenced to access the memeory.
668 This is why we have to handle three different ldr_got_small
669 patterns here (two patterns for ILP32). */
670 rtx tmp_reg = dest;
671 enum machine_mode mode = GET_MODE (dest);
673 if (can_create_pseudo_p ())
674 tmp_reg = gen_reg_rtx (mode);
676 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
677 if (mode == ptr_mode)
679 if (mode == DImode)
680 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
681 else
682 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
684 else
686 gcc_assert (mode == Pmode);
687 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
690 return;
693 case SYMBOL_SMALL_TLSGD:
695 rtx insns;
696 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
698 start_sequence ();
699 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
700 insns = get_insns ();
701 end_sequence ();
703 RTL_CONST_CALL_P (insns) = 1;
704 emit_libcall_block (insns, dest, result, imm);
705 return;
708 case SYMBOL_SMALL_TLSDESC:
710 enum machine_mode mode = GET_MODE (dest);
711 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
712 rtx tp;
714 gcc_assert (mode == Pmode || mode == ptr_mode);
716 /* In ILP32, the got entry is always of SImode size. Unlike
717 small GOT, the dest is fixed at reg 0. */
718 if (TARGET_ILP32)
719 emit_insn (gen_tlsdesc_small_si (imm));
720 else
721 emit_insn (gen_tlsdesc_small_di (imm));
722 tp = aarch64_load_tp (NULL);
724 if (mode != Pmode)
725 tp = gen_lowpart (mode, tp);
727 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
728 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
729 return;
732 case SYMBOL_SMALL_GOTTPREL:
734 /* In ILP32, the mode of dest can be either SImode or DImode,
735 while the got entry is always of SImode size. The mode of
736 dest depends on how dest is used: if dest is assigned to a
737 pointer (e.g. in the memory), it has SImode; it may have
738 DImode if dest is dereferenced to access the memeory.
739 This is why we have to handle three different tlsie_small
740 patterns here (two patterns for ILP32). */
741 enum machine_mode mode = GET_MODE (dest);
742 rtx tmp_reg = gen_reg_rtx (mode);
743 rtx tp = aarch64_load_tp (NULL);
745 if (mode == ptr_mode)
747 if (mode == DImode)
748 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
749 else
751 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
752 tp = gen_lowpart (mode, tp);
755 else
757 gcc_assert (mode == Pmode);
758 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
761 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
762 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
763 return;
766 case SYMBOL_SMALL_TPREL:
768 rtx tp = aarch64_load_tp (NULL);
769 emit_insn (gen_tlsle_small (dest, tp, imm));
770 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
771 return;
774 case SYMBOL_TINY_GOT:
775 emit_insn (gen_ldr_got_tiny (dest, imm));
776 return;
778 default:
779 gcc_unreachable ();
783 /* Emit a move from SRC to DEST. Assume that the move expanders can
784 handle all moves if !can_create_pseudo_p (). The distinction is
785 important because, unlike emit_move_insn, the move expanders know
786 how to force Pmode objects into the constant pool even when the
787 constant pool address is not itself legitimate. */
788 static rtx
789 aarch64_emit_move (rtx dest, rtx src)
791 return (can_create_pseudo_p ()
792 ? emit_move_insn (dest, src)
793 : emit_move_insn_1 (dest, src));
796 /* Split a 128-bit move operation into two 64-bit move operations,
797 taking care to handle partial overlap of register to register
798 copies. Special cases are needed when moving between GP regs and
799 FP regs. SRC can be a register, constant or memory; DST a register
800 or memory. If either operand is memory it must not have any side
801 effects. */
802 void
803 aarch64_split_128bit_move (rtx dst, rtx src)
805 rtx dst_lo, dst_hi;
806 rtx src_lo, src_hi;
808 enum machine_mode mode = GET_MODE (dst);
810 gcc_assert (mode == TImode || mode == TFmode);
811 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
812 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
814 if (REG_P (dst) && REG_P (src))
816 int src_regno = REGNO (src);
817 int dst_regno = REGNO (dst);
819 /* Handle FP <-> GP regs. */
820 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
822 src_lo = gen_lowpart (word_mode, src);
823 src_hi = gen_highpart (word_mode, src);
825 if (mode == TImode)
827 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
828 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
830 else
832 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
833 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
835 return;
837 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
839 dst_lo = gen_lowpart (word_mode, dst);
840 dst_hi = gen_highpart (word_mode, dst);
842 if (mode == TImode)
844 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
845 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
847 else
849 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
850 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
852 return;
856 dst_lo = gen_lowpart (word_mode, dst);
857 dst_hi = gen_highpart (word_mode, dst);
858 src_lo = gen_lowpart (word_mode, src);
859 src_hi = gen_highpart_mode (word_mode, mode, src);
861 /* At most one pairing may overlap. */
862 if (reg_overlap_mentioned_p (dst_lo, src_hi))
864 aarch64_emit_move (dst_hi, src_hi);
865 aarch64_emit_move (dst_lo, src_lo);
867 else
869 aarch64_emit_move (dst_lo, src_lo);
870 aarch64_emit_move (dst_hi, src_hi);
874 bool
875 aarch64_split_128bit_move_p (rtx dst, rtx src)
877 return (! REG_P (src)
878 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
881 /* Split a complex SIMD combine. */
883 void
884 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
886 enum machine_mode src_mode = GET_MODE (src1);
887 enum machine_mode dst_mode = GET_MODE (dst);
889 gcc_assert (VECTOR_MODE_P (dst_mode));
891 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
893 rtx (*gen) (rtx, rtx, rtx);
895 switch (src_mode)
897 case V8QImode:
898 gen = gen_aarch64_simd_combinev8qi;
899 break;
900 case V4HImode:
901 gen = gen_aarch64_simd_combinev4hi;
902 break;
903 case V2SImode:
904 gen = gen_aarch64_simd_combinev2si;
905 break;
906 case V2SFmode:
907 gen = gen_aarch64_simd_combinev2sf;
908 break;
909 case DImode:
910 gen = gen_aarch64_simd_combinedi;
911 break;
912 case DFmode:
913 gen = gen_aarch64_simd_combinedf;
914 break;
915 default:
916 gcc_unreachable ();
919 emit_insn (gen (dst, src1, src2));
920 return;
924 /* Split a complex SIMD move. */
926 void
927 aarch64_split_simd_move (rtx dst, rtx src)
929 enum machine_mode src_mode = GET_MODE (src);
930 enum machine_mode dst_mode = GET_MODE (dst);
932 gcc_assert (VECTOR_MODE_P (dst_mode));
934 if (REG_P (dst) && REG_P (src))
936 rtx (*gen) (rtx, rtx);
938 gcc_assert (VECTOR_MODE_P (src_mode));
940 switch (src_mode)
942 case V16QImode:
943 gen = gen_aarch64_split_simd_movv16qi;
944 break;
945 case V8HImode:
946 gen = gen_aarch64_split_simd_movv8hi;
947 break;
948 case V4SImode:
949 gen = gen_aarch64_split_simd_movv4si;
950 break;
951 case V2DImode:
952 gen = gen_aarch64_split_simd_movv2di;
953 break;
954 case V4SFmode:
955 gen = gen_aarch64_split_simd_movv4sf;
956 break;
957 case V2DFmode:
958 gen = gen_aarch64_split_simd_movv2df;
959 break;
960 default:
961 gcc_unreachable ();
964 emit_insn (gen (dst, src));
965 return;
969 static rtx
970 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
972 if (can_create_pseudo_p ())
973 return force_reg (mode, value);
974 else
976 x = aarch64_emit_move (x, value);
977 return x;
982 static rtx
983 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
985 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
987 rtx high;
988 /* Load the full offset into a register. This
989 might be improvable in the future. */
990 high = GEN_INT (offset);
991 offset = 0;
992 high = aarch64_force_temporary (mode, temp, high);
993 reg = aarch64_force_temporary (mode, temp,
994 gen_rtx_PLUS (mode, high, reg));
996 return plus_constant (mode, reg, offset);
999 void
1000 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1002 enum machine_mode mode = GET_MODE (dest);
1003 unsigned HOST_WIDE_INT mask;
1004 int i;
1005 bool first;
1006 unsigned HOST_WIDE_INT val;
1007 bool subtargets;
1008 rtx subtarget;
1009 int one_match, zero_match;
1011 gcc_assert (mode == SImode || mode == DImode);
1013 /* Check on what type of symbol it is. */
1014 if (GET_CODE (imm) == SYMBOL_REF
1015 || GET_CODE (imm) == LABEL_REF
1016 || GET_CODE (imm) == CONST)
1018 rtx mem, base, offset;
1019 enum aarch64_symbol_type sty;
1021 /* If we have (const (plus symbol offset)), separate out the offset
1022 before we start classifying the symbol. */
1023 split_const (imm, &base, &offset);
1025 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1026 switch (sty)
1028 case SYMBOL_FORCE_TO_MEM:
1029 if (offset != const0_rtx
1030 && targetm.cannot_force_const_mem (mode, imm))
1032 gcc_assert (can_create_pseudo_p ());
1033 base = aarch64_force_temporary (mode, dest, base);
1034 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1035 aarch64_emit_move (dest, base);
1036 return;
1038 mem = force_const_mem (ptr_mode, imm);
1039 gcc_assert (mem);
1040 if (mode != ptr_mode)
1041 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1042 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1043 return;
1045 case SYMBOL_SMALL_TLSGD:
1046 case SYMBOL_SMALL_TLSDESC:
1047 case SYMBOL_SMALL_GOTTPREL:
1048 case SYMBOL_SMALL_GOT:
1049 case SYMBOL_TINY_GOT:
1050 if (offset != const0_rtx)
1052 gcc_assert(can_create_pseudo_p ());
1053 base = aarch64_force_temporary (mode, dest, base);
1054 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1055 aarch64_emit_move (dest, base);
1056 return;
1058 /* FALLTHRU */
1060 case SYMBOL_SMALL_TPREL:
1061 case SYMBOL_SMALL_ABSOLUTE:
1062 case SYMBOL_TINY_ABSOLUTE:
1063 aarch64_load_symref_appropriately (dest, imm, sty);
1064 return;
1066 default:
1067 gcc_unreachable ();
1071 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1073 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1074 return;
1077 if (!CONST_INT_P (imm))
1079 if (GET_CODE (imm) == HIGH)
1080 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1081 else
1083 rtx mem = force_const_mem (mode, imm);
1084 gcc_assert (mem);
1085 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1088 return;
1091 if (mode == SImode)
1093 /* We know we can't do this in 1 insn, and we must be able to do it
1094 in two; so don't mess around looking for sequences that don't buy
1095 us anything. */
1096 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1097 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1098 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1099 return;
1102 /* Remaining cases are all for DImode. */
1104 val = INTVAL (imm);
1105 subtargets = optimize && can_create_pseudo_p ();
1107 one_match = 0;
1108 zero_match = 0;
1109 mask = 0xffff;
1111 for (i = 0; i < 64; i += 16, mask <<= 16)
1113 if ((val & mask) == 0)
1114 zero_match++;
1115 else if ((val & mask) == mask)
1116 one_match++;
1119 if (one_match == 2)
1121 mask = 0xffff;
1122 for (i = 0; i < 64; i += 16, mask <<= 16)
1124 if ((val & mask) != mask)
1126 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1127 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1128 GEN_INT ((val >> i) & 0xffff)));
1129 return;
1132 gcc_unreachable ();
1135 if (zero_match == 2)
1136 goto simple_sequence;
1138 mask = 0x0ffff0000UL;
1139 for (i = 16; i < 64; i += 16, mask <<= 16)
1141 HOST_WIDE_INT comp = mask & ~(mask - 1);
1143 if (aarch64_uimm12_shift (val - (val & mask)))
1145 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1147 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1148 emit_insn (gen_adddi3 (dest, subtarget,
1149 GEN_INT (val - (val & mask))));
1150 return;
1152 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1154 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1156 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1157 GEN_INT ((val + comp) & mask)));
1158 emit_insn (gen_adddi3 (dest, subtarget,
1159 GEN_INT (val - ((val + comp) & mask))));
1160 return;
1162 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1164 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1166 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1167 GEN_INT ((val - comp) | ~mask)));
1168 emit_insn (gen_adddi3 (dest, subtarget,
1169 GEN_INT (val - ((val - comp) | ~mask))));
1170 return;
1172 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1174 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1176 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1177 GEN_INT (val | ~mask)));
1178 emit_insn (gen_adddi3 (dest, subtarget,
1179 GEN_INT (val - (val | ~mask))));
1180 return;
1184 /* See if we can do it by arithmetically combining two
1185 immediates. */
1186 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1188 int j;
1189 mask = 0xffff;
1191 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1192 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1194 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1195 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1196 GEN_INT (aarch64_bitmasks[i])));
1197 emit_insn (gen_adddi3 (dest, subtarget,
1198 GEN_INT (val - aarch64_bitmasks[i])));
1199 return;
1202 for (j = 0; j < 64; j += 16, mask <<= 16)
1204 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1206 emit_insn (gen_rtx_SET (VOIDmode, dest,
1207 GEN_INT (aarch64_bitmasks[i])));
1208 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1209 GEN_INT ((val >> j) & 0xffff)));
1210 return;
1215 /* See if we can do it by logically combining two immediates. */
1216 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1218 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1220 int j;
1222 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1223 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1225 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1226 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1227 GEN_INT (aarch64_bitmasks[i])));
1228 emit_insn (gen_iordi3 (dest, subtarget,
1229 GEN_INT (aarch64_bitmasks[j])));
1230 return;
1233 else if ((val & aarch64_bitmasks[i]) == val)
1235 int j;
1237 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1238 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1241 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1242 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1243 GEN_INT (aarch64_bitmasks[j])));
1244 emit_insn (gen_anddi3 (dest, subtarget,
1245 GEN_INT (aarch64_bitmasks[i])));
1246 return;
1251 simple_sequence:
1252 first = true;
1253 mask = 0xffff;
1254 for (i = 0; i < 64; i += 16, mask <<= 16)
1256 if ((val & mask) != 0)
1258 if (first)
1260 emit_insn (gen_rtx_SET (VOIDmode, dest,
1261 GEN_INT (val & mask)));
1262 first = false;
1264 else
1265 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1266 GEN_INT ((val >> i) & 0xffff)));
1271 static bool
1272 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1273 tree exp ATTRIBUTE_UNUSED)
1275 /* Currently, always true. */
1276 return true;
1279 /* Implement TARGET_PASS_BY_REFERENCE. */
1281 static bool
1282 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1283 enum machine_mode mode,
1284 const_tree type,
1285 bool named ATTRIBUTE_UNUSED)
1287 HOST_WIDE_INT size;
1288 enum machine_mode dummymode;
1289 int nregs;
1291 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1292 size = (mode == BLKmode && type)
1293 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1295 /* Aggregates are passed by reference based on their size. */
1296 if (type && AGGREGATE_TYPE_P (type))
1298 size = int_size_in_bytes (type);
1301 /* Variable sized arguments are always returned by reference. */
1302 if (size < 0)
1303 return true;
1305 /* Can this be a candidate to be passed in fp/simd register(s)? */
1306 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1307 &dummymode, &nregs,
1308 NULL))
1309 return false;
1311 /* Arguments which are variable sized or larger than 2 registers are
1312 passed by reference unless they are a homogenous floating point
1313 aggregate. */
1314 return size > 2 * UNITS_PER_WORD;
1317 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1318 static bool
1319 aarch64_return_in_msb (const_tree valtype)
1321 enum machine_mode dummy_mode;
1322 int dummy_int;
1324 /* Never happens in little-endian mode. */
1325 if (!BYTES_BIG_ENDIAN)
1326 return false;
1328 /* Only composite types smaller than or equal to 16 bytes can
1329 be potentially returned in registers. */
1330 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1331 || int_size_in_bytes (valtype) <= 0
1332 || int_size_in_bytes (valtype) > 16)
1333 return false;
1335 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1336 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1337 is always passed/returned in the least significant bits of fp/simd
1338 register(s). */
1339 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1340 &dummy_mode, &dummy_int, NULL))
1341 return false;
1343 return true;
1346 /* Implement TARGET_FUNCTION_VALUE.
1347 Define how to find the value returned by a function. */
1349 static rtx
1350 aarch64_function_value (const_tree type, const_tree func,
1351 bool outgoing ATTRIBUTE_UNUSED)
1353 enum machine_mode mode;
1354 int unsignedp;
1355 int count;
1356 enum machine_mode ag_mode;
1358 mode = TYPE_MODE (type);
1359 if (INTEGRAL_TYPE_P (type))
1360 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1362 if (aarch64_return_in_msb (type))
1364 HOST_WIDE_INT size = int_size_in_bytes (type);
1366 if (size % UNITS_PER_WORD != 0)
1368 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1369 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1373 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1374 &ag_mode, &count, NULL))
1376 if (!aarch64_composite_type_p (type, mode))
1378 gcc_assert (count == 1 && mode == ag_mode);
1379 return gen_rtx_REG (mode, V0_REGNUM);
1381 else
1383 int i;
1384 rtx par;
1386 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1387 for (i = 0; i < count; i++)
1389 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1390 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1391 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1392 XVECEXP (par, 0, i) = tmp;
1394 return par;
1397 else
1398 return gen_rtx_REG (mode, R0_REGNUM);
1401 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1402 Return true if REGNO is the number of a hard register in which the values
1403 of called function may come back. */
1405 static bool
1406 aarch64_function_value_regno_p (const unsigned int regno)
1408 /* Maximum of 16 bytes can be returned in the general registers. Examples
1409 of 16-byte return values are: 128-bit integers and 16-byte small
1410 structures (excluding homogeneous floating-point aggregates). */
1411 if (regno == R0_REGNUM || regno == R1_REGNUM)
1412 return true;
1414 /* Up to four fp/simd registers can return a function value, e.g. a
1415 homogeneous floating-point aggregate having four members. */
1416 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1417 return !TARGET_GENERAL_REGS_ONLY;
1419 return false;
1422 /* Implement TARGET_RETURN_IN_MEMORY.
1424 If the type T of the result of a function is such that
1425 void func (T arg)
1426 would require that arg be passed as a value in a register (or set of
1427 registers) according to the parameter passing rules, then the result
1428 is returned in the same registers as would be used for such an
1429 argument. */
1431 static bool
1432 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1434 HOST_WIDE_INT size;
1435 enum machine_mode ag_mode;
1436 int count;
1438 if (!AGGREGATE_TYPE_P (type)
1439 && TREE_CODE (type) != COMPLEX_TYPE
1440 && TREE_CODE (type) != VECTOR_TYPE)
1441 /* Simple scalar types always returned in registers. */
1442 return false;
1444 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1445 type,
1446 &ag_mode,
1447 &count,
1448 NULL))
1449 return false;
1451 /* Types larger than 2 registers returned in memory. */
1452 size = int_size_in_bytes (type);
1453 return (size < 0 || size > 2 * UNITS_PER_WORD);
1456 static bool
1457 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1458 const_tree type, int *nregs)
1460 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1461 return aarch64_vfp_is_call_or_return_candidate (mode,
1462 type,
1463 &pcum->aapcs_vfp_rmode,
1464 nregs,
1465 NULL);
1468 /* Given MODE and TYPE of a function argument, return the alignment in
1469 bits. The idea is to suppress any stronger alignment requested by
1470 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1471 This is a helper function for local use only. */
1473 static unsigned int
1474 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1476 unsigned int alignment;
1478 if (type)
1480 if (!integer_zerop (TYPE_SIZE (type)))
1482 if (TYPE_MODE (type) == mode)
1483 alignment = TYPE_ALIGN (type);
1484 else
1485 alignment = GET_MODE_ALIGNMENT (mode);
1487 else
1488 alignment = 0;
1490 else
1491 alignment = GET_MODE_ALIGNMENT (mode);
1493 return alignment;
1496 /* Layout a function argument according to the AAPCS64 rules. The rule
1497 numbers refer to the rule numbers in the AAPCS64. */
1499 static void
1500 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1501 const_tree type,
1502 bool named ATTRIBUTE_UNUSED)
1504 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1505 int ncrn, nvrn, nregs;
1506 bool allocate_ncrn, allocate_nvrn;
1507 HOST_WIDE_INT size;
1509 /* We need to do this once per argument. */
1510 if (pcum->aapcs_arg_processed)
1511 return;
1513 pcum->aapcs_arg_processed = true;
1515 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1516 size
1517 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1518 UNITS_PER_WORD);
1520 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1521 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1522 mode,
1523 type,
1524 &nregs);
1526 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1527 The following code thus handles passing by SIMD/FP registers first. */
1529 nvrn = pcum->aapcs_nvrn;
1531 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1532 and homogenous short-vector aggregates (HVA). */
1533 if (allocate_nvrn)
1535 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1537 pcum->aapcs_nextnvrn = nvrn + nregs;
1538 if (!aarch64_composite_type_p (type, mode))
1540 gcc_assert (nregs == 1);
1541 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1543 else
1545 rtx par;
1546 int i;
1547 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1548 for (i = 0; i < nregs; i++)
1550 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1551 V0_REGNUM + nvrn + i);
1552 tmp = gen_rtx_EXPR_LIST
1553 (VOIDmode, tmp,
1554 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1555 XVECEXP (par, 0, i) = tmp;
1557 pcum->aapcs_reg = par;
1559 return;
1561 else
1563 /* C.3 NSRN is set to 8. */
1564 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1565 goto on_stack;
1569 ncrn = pcum->aapcs_ncrn;
1570 nregs = size / UNITS_PER_WORD;
1572 /* C6 - C9. though the sign and zero extension semantics are
1573 handled elsewhere. This is the case where the argument fits
1574 entirely general registers. */
1575 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1577 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1579 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1581 /* C.8 if the argument has an alignment of 16 then the NGRN is
1582 rounded up to the next even number. */
1583 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1585 ++ncrn;
1586 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1588 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1589 A reg is still generated for it, but the caller should be smart
1590 enough not to use it. */
1591 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1593 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1595 else
1597 rtx par;
1598 int i;
1600 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1601 for (i = 0; i < nregs; i++)
1603 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1604 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1605 GEN_INT (i * UNITS_PER_WORD));
1606 XVECEXP (par, 0, i) = tmp;
1608 pcum->aapcs_reg = par;
1611 pcum->aapcs_nextncrn = ncrn + nregs;
1612 return;
1615 /* C.11 */
1616 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1618 /* The argument is passed on stack; record the needed number of words for
1619 this argument and align the total size if necessary. */
1620 on_stack:
1621 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1622 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1623 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1624 16 / UNITS_PER_WORD);
1625 return;
1628 /* Implement TARGET_FUNCTION_ARG. */
1630 static rtx
1631 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1632 const_tree type, bool named)
1634 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1635 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1637 if (mode == VOIDmode)
1638 return NULL_RTX;
1640 aarch64_layout_arg (pcum_v, mode, type, named);
1641 return pcum->aapcs_reg;
1644 void
1645 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1646 const_tree fntype ATTRIBUTE_UNUSED,
1647 rtx libname ATTRIBUTE_UNUSED,
1648 const_tree fndecl ATTRIBUTE_UNUSED,
1649 unsigned n_named ATTRIBUTE_UNUSED)
1651 pcum->aapcs_ncrn = 0;
1652 pcum->aapcs_nvrn = 0;
1653 pcum->aapcs_nextncrn = 0;
1654 pcum->aapcs_nextnvrn = 0;
1655 pcum->pcs_variant = ARM_PCS_AAPCS64;
1656 pcum->aapcs_reg = NULL_RTX;
1657 pcum->aapcs_arg_processed = false;
1658 pcum->aapcs_stack_words = 0;
1659 pcum->aapcs_stack_size = 0;
1661 return;
1664 static void
1665 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1666 enum machine_mode mode,
1667 const_tree type,
1668 bool named)
1670 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1671 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1673 aarch64_layout_arg (pcum_v, mode, type, named);
1674 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1675 != (pcum->aapcs_stack_words != 0));
1676 pcum->aapcs_arg_processed = false;
1677 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1678 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1679 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1680 pcum->aapcs_stack_words = 0;
1681 pcum->aapcs_reg = NULL_RTX;
1685 bool
1686 aarch64_function_arg_regno_p (unsigned regno)
1688 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1689 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1692 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1693 PARM_BOUNDARY bits of alignment, but will be given anything up
1694 to STACK_BOUNDARY bits if the type requires it. This makes sure
1695 that both before and after the layout of each argument, the Next
1696 Stacked Argument Address (NSAA) will have a minimum alignment of
1697 8 bytes. */
1699 static unsigned int
1700 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1702 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1704 if (alignment < PARM_BOUNDARY)
1705 alignment = PARM_BOUNDARY;
1706 if (alignment > STACK_BOUNDARY)
1707 alignment = STACK_BOUNDARY;
1708 return alignment;
1711 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1713 Return true if an argument passed on the stack should be padded upwards,
1714 i.e. if the least-significant byte of the stack slot has useful data.
1716 Small aggregate types are placed in the lowest memory address.
1718 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1720 bool
1721 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1723 /* On little-endian targets, the least significant byte of every stack
1724 argument is passed at the lowest byte address of the stack slot. */
1725 if (!BYTES_BIG_ENDIAN)
1726 return true;
1728 /* Otherwise, integral, floating-point and pointer types are padded downward:
1729 the least significant byte of a stack argument is passed at the highest
1730 byte address of the stack slot. */
1731 if (type
1732 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1733 || POINTER_TYPE_P (type))
1734 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1735 return false;
1737 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1738 return true;
1741 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1743 It specifies padding for the last (may also be the only)
1744 element of a block move between registers and memory. If
1745 assuming the block is in the memory, padding upward means that
1746 the last element is padded after its highest significant byte,
1747 while in downward padding, the last element is padded at the
1748 its least significant byte side.
1750 Small aggregates and small complex types are always padded
1751 upwards.
1753 We don't need to worry about homogeneous floating-point or
1754 short-vector aggregates; their move is not affected by the
1755 padding direction determined here. Regardless of endianness,
1756 each element of such an aggregate is put in the least
1757 significant bits of a fp/simd register.
1759 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1760 register has useful data, and return the opposite if the most
1761 significant byte does. */
1763 bool
1764 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1765 bool first ATTRIBUTE_UNUSED)
1768 /* Small composite types are always padded upward. */
1769 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1771 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1772 : GET_MODE_SIZE (mode));
1773 if (size < 2 * UNITS_PER_WORD)
1774 return true;
1777 /* Otherwise, use the default padding. */
1778 return !BYTES_BIG_ENDIAN;
1781 static enum machine_mode
1782 aarch64_libgcc_cmp_return_mode (void)
1784 return SImode;
1787 static bool
1788 aarch64_frame_pointer_required (void)
1790 /* If the function contains dynamic stack allocations, we need to
1791 use the frame pointer to access the static parts of the frame. */
1792 if (cfun->calls_alloca)
1793 return true;
1795 /* In aarch64_override_options_after_change
1796 flag_omit_leaf_frame_pointer turns off the frame pointer by
1797 default. Turn it back on now if we've not got a leaf
1798 function. */
1799 if (flag_omit_leaf_frame_pointer
1800 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1801 return true;
1803 return false;
1806 /* Mark the registers that need to be saved by the callee and calculate
1807 the size of the callee-saved registers area and frame record (both FP
1808 and LR may be omitted). */
1809 static void
1810 aarch64_layout_frame (void)
1812 HOST_WIDE_INT offset = 0;
1813 int regno;
1815 if (reload_completed && cfun->machine->frame.laid_out)
1816 return;
1818 #define SLOT_NOT_REQUIRED (-2)
1819 #define SLOT_REQUIRED (-1)
1821 /* First mark all the registers that really need to be saved... */
1822 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1823 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1825 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1826 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1828 /* ... that includes the eh data registers (if needed)... */
1829 if (crtl->calls_eh_return)
1830 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1831 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1832 = SLOT_REQUIRED;
1834 /* ... and any callee saved register that dataflow says is live. */
1835 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1836 if (df_regs_ever_live_p (regno)
1837 && !call_used_regs[regno])
1838 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1840 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1841 if (df_regs_ever_live_p (regno)
1842 && !call_used_regs[regno])
1843 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1845 if (frame_pointer_needed)
1847 /* FP and LR are placed in the linkage record. */
1848 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1849 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
1850 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1851 offset += 2 * UNITS_PER_WORD;
1854 /* Now assign stack slots for them. */
1855 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1856 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1858 cfun->machine->frame.reg_offset[regno] = offset;
1859 offset += UNITS_PER_WORD;
1862 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1863 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1865 cfun->machine->frame.reg_offset[regno] = offset;
1866 offset += UNITS_PER_WORD;
1869 cfun->machine->frame.padding0 =
1870 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1871 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1873 cfun->machine->frame.saved_regs_size = offset;
1875 cfun->machine->frame.hard_fp_offset
1876 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1877 + get_frame_size ()
1878 + cfun->machine->frame.saved_regs_size,
1879 STACK_BOUNDARY / BITS_PER_UNIT);
1881 cfun->machine->frame.frame_size
1882 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1883 + crtl->outgoing_args_size,
1884 STACK_BOUNDARY / BITS_PER_UNIT);
1886 cfun->machine->frame.laid_out = true;
1889 /* Make the last instruction frame-related and note that it performs
1890 the operation described by FRAME_PATTERN. */
1892 static void
1893 aarch64_set_frame_expr (rtx frame_pattern)
1895 rtx insn;
1897 insn = get_last_insn ();
1898 RTX_FRAME_RELATED_P (insn) = 1;
1899 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1900 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1901 frame_pattern,
1902 REG_NOTES (insn));
1905 static bool
1906 aarch64_register_saved_on_entry (int regno)
1908 return cfun->machine->frame.reg_offset[regno] >= 0;
1911 static unsigned
1912 aarch64_next_callee_save (unsigned regno, unsigned limit)
1914 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1915 regno ++;
1916 return regno;
1919 static rtx
1920 aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
1921 rtx reg2)
1923 switch (mode)
1925 case DImode:
1926 return gen_store_pairdi (mem1, reg1, mem2, reg2);
1928 case DFmode:
1929 return gen_store_pairdf (mem1, reg1, mem2, reg2);
1931 default:
1932 gcc_unreachable ();
1936 static rtx
1937 aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
1938 rtx mem2)
1940 switch (mode)
1942 case DImode:
1943 return gen_load_pairdi (reg1, mem1, reg2, mem2);
1945 case DFmode:
1946 return gen_load_pairdf (reg1, mem1, reg2, mem2);
1948 default:
1949 gcc_unreachable ();
1954 /* offset from the stack pointer of where the saves and
1955 restore's have to happen. */
1956 static void
1957 aarch64_save_or_restore_callee_saves (enum machine_mode mode,
1958 HOST_WIDE_INT start_offset,
1959 unsigned start, unsigned limit,
1960 bool restore)
1962 rtx insn;
1963 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
1964 ? gen_frame_mem : gen_rtx_MEM);
1965 unsigned regno;
1966 unsigned regno2;
1968 for (regno = aarch64_next_callee_save (start, limit);
1969 regno <= limit;
1970 regno = aarch64_next_callee_save (regno + 1, limit))
1972 rtx reg = gen_rtx_REG (mode, regno);
1973 rtx mem;
1975 HOST_WIDE_INT offset = start_offset
1976 + cfun->machine->frame.reg_offset[regno];
1977 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
1978 offset));
1980 regno2 = aarch64_next_callee_save (regno + 1, limit);
1982 if (regno2 <= limit
1983 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
1984 == cfun->machine->frame.reg_offset[regno2]))
1987 rtx reg2 = gen_rtx_REG (mode, regno2);
1988 rtx mem2;
1990 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
1991 mem2 = gen_mem_ref (mode,
1992 plus_constant (Pmode, stack_pointer_rtx, offset));
1993 if (restore == false)
1994 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
1995 reg2));
1996 else
1998 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2,
1999 mem2));
2000 add_reg_note (insn, REG_CFA_RESTORE, reg);
2001 add_reg_note (insn, REG_CFA_RESTORE, reg2);
2004 /* The first part of a frame-related parallel insn is
2005 always assumed to be relevant to the frame
2006 calculations; subsequent parts, are only
2007 frame-related if explicitly marked. */
2008 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2009 regno = regno2;
2011 else
2013 if (restore == false)
2014 insn = emit_move_insn (mem, reg);
2015 else
2017 insn = emit_move_insn (reg, mem);
2018 add_reg_note (insn, REG_CFA_RESTORE, reg);
2021 RTX_FRAME_RELATED_P (insn) = 1;
2025 /* AArch64 stack frames generated by this compiler look like:
2027 +-------------------------------+
2029 | incoming stack arguments |
2031 +-------------------------------+
2032 | | <-- incoming stack pointer (aligned)
2033 | callee-allocated save area |
2034 | for register varargs |
2036 +-------------------------------+
2037 | local variables | <-- frame_pointer_rtx
2039 +-------------------------------+
2040 | padding0 | \
2041 +-------------------------------+ |
2042 | callee-saved registers | | frame.saved_regs_size
2043 +-------------------------------+ |
2044 | LR' | |
2045 +-------------------------------+ |
2046 | FP' | / <- hard_frame_pointer_rtx (aligned)
2047 +-------------------------------+
2048 | dynamic allocation |
2049 +-------------------------------+
2050 | padding |
2051 +-------------------------------+
2052 | outgoing stack arguments | <-- arg_pointer
2054 +-------------------------------+
2055 | | <-- stack_pointer_rtx (aligned)
2057 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2058 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2059 unchanged. */
2061 /* Generate the prologue instructions for entry into a function.
2062 Establish the stack frame by decreasing the stack pointer with a
2063 properly calculated size and, if necessary, create a frame record
2064 filled with the values of LR and previous frame pointer. The
2065 current FP is also set up if it is in use. */
2067 void
2068 aarch64_expand_prologue (void)
2070 /* sub sp, sp, #<frame_size>
2071 stp {fp, lr}, [sp, #<frame_size> - 16]
2072 add fp, sp, #<frame_size> - hardfp_offset
2073 stp {cs_reg}, [fp, #-16] etc.
2075 sub sp, sp, <final_adjustment_if_any>
2077 HOST_WIDE_INT frame_size, offset;
2078 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
2079 rtx insn;
2081 aarch64_layout_frame ();
2083 if (flag_stack_usage_info)
2084 current_function_static_stack_size = cfun->machine->frame.frame_size;
2086 frame_size = cfun->machine->frame.frame_size;
2087 offset = cfun->machine->frame.frame_size;
2089 fp_offset = cfun->machine->frame.frame_size
2090 - cfun->machine->frame.hard_fp_offset;
2092 /* Store pairs and load pairs have a range only -512 to 504. */
2093 if (offset >= 512)
2095 /* When the frame has a large size, an initial decrease is done on
2096 the stack pointer to jump over the callee-allocated save area for
2097 register varargs, the local variable area and/or the callee-saved
2098 register area. This will allow the pre-index write-back
2099 store pair instructions to be used for setting up the stack frame
2100 efficiently. */
2101 offset = cfun->machine->frame.hard_fp_offset;
2102 if (offset >= 512)
2103 offset = cfun->machine->frame.saved_regs_size;
2105 frame_size -= (offset + crtl->outgoing_args_size);
2106 fp_offset = 0;
2108 if (frame_size >= 0x1000000)
2110 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2111 emit_move_insn (op0, GEN_INT (-frame_size));
2112 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2113 aarch64_set_frame_expr (gen_rtx_SET
2114 (Pmode, stack_pointer_rtx,
2115 plus_constant (Pmode,
2116 stack_pointer_rtx,
2117 -frame_size)));
2119 else if (frame_size > 0)
2121 if ((frame_size & 0xfff) != frame_size)
2123 insn = emit_insn (gen_add2_insn
2124 (stack_pointer_rtx,
2125 GEN_INT (-(frame_size
2126 & ~(HOST_WIDE_INT)0xfff))));
2127 RTX_FRAME_RELATED_P (insn) = 1;
2129 if ((frame_size & 0xfff) != 0)
2131 insn = emit_insn (gen_add2_insn
2132 (stack_pointer_rtx,
2133 GEN_INT (-(frame_size
2134 & (HOST_WIDE_INT)0xfff))));
2135 RTX_FRAME_RELATED_P (insn) = 1;
2139 else
2140 frame_size = -1;
2142 if (offset > 0)
2144 /* Save the frame pointer and lr if the frame pointer is needed
2145 first. Make the frame pointer point to the location of the
2146 old frame pointer on the stack. */
2147 if (frame_pointer_needed)
2149 rtx mem_fp, mem_lr;
2151 if (fp_offset)
2153 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2154 GEN_INT (-offset)));
2155 RTX_FRAME_RELATED_P (insn) = 1;
2156 aarch64_set_frame_expr (gen_rtx_SET
2157 (Pmode, stack_pointer_rtx,
2158 gen_rtx_MINUS (Pmode,
2159 stack_pointer_rtx,
2160 GEN_INT (offset))));
2161 mem_fp = gen_frame_mem (DImode,
2162 plus_constant (Pmode,
2163 stack_pointer_rtx,
2164 fp_offset));
2165 mem_lr = gen_frame_mem (DImode,
2166 plus_constant (Pmode,
2167 stack_pointer_rtx,
2168 fp_offset
2169 + UNITS_PER_WORD));
2170 insn = emit_insn (gen_store_pairdi (mem_fp,
2171 hard_frame_pointer_rtx,
2172 mem_lr,
2173 gen_rtx_REG (DImode,
2174 LR_REGNUM)));
2176 else
2178 insn = emit_insn (gen_storewb_pairdi_di
2179 (stack_pointer_rtx, stack_pointer_rtx,
2180 hard_frame_pointer_rtx,
2181 gen_rtx_REG (DImode, LR_REGNUM),
2182 GEN_INT (-offset),
2183 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2184 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2187 /* The first part of a frame-related parallel insn is always
2188 assumed to be relevant to the frame calculations;
2189 subsequent parts, are only frame-related if explicitly
2190 marked. */
2191 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2192 RTX_FRAME_RELATED_P (insn) = 1;
2194 /* Set up frame pointer to point to the location of the
2195 previous frame pointer on the stack. */
2196 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2197 stack_pointer_rtx,
2198 GEN_INT (fp_offset)));
2199 aarch64_set_frame_expr (gen_rtx_SET
2200 (Pmode, hard_frame_pointer_rtx,
2201 plus_constant (Pmode,
2202 stack_pointer_rtx,
2203 fp_offset)));
2204 RTX_FRAME_RELATED_P (insn) = 1;
2205 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2206 hard_frame_pointer_rtx));
2208 else
2210 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2211 GEN_INT (-offset)));
2212 RTX_FRAME_RELATED_P (insn) = 1;
2215 aarch64_save_or_restore_callee_saves (DImode, fp_offset, R0_REGNUM,
2216 frame_pointer_needed
2217 ? R28_REGNUM : R30_REGNUM, false);
2218 aarch64_save_or_restore_callee_saves (DFmode, fp_offset, V0_REGNUM,
2219 V31_REGNUM, false);
2222 /* when offset >= 512,
2223 sub sp, sp, #<outgoing_args_size> */
2224 if (frame_size > -1)
2226 if (crtl->outgoing_args_size > 0)
2228 insn = emit_insn (gen_add2_insn
2229 (stack_pointer_rtx,
2230 GEN_INT (- crtl->outgoing_args_size)));
2231 RTX_FRAME_RELATED_P (insn) = 1;
2236 /* Generate the epilogue instructions for returning from a function. */
2237 void
2238 aarch64_expand_epilogue (bool for_sibcall)
2240 HOST_WIDE_INT frame_size, offset;
2241 HOST_WIDE_INT fp_offset;
2242 rtx insn;
2243 rtx cfa_reg;
2245 aarch64_layout_frame ();
2247 offset = frame_size = cfun->machine->frame.frame_size;
2248 fp_offset = cfun->machine->frame.frame_size
2249 - cfun->machine->frame.hard_fp_offset;
2251 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2253 /* Store pairs and load pairs have a range only -512 to 504. */
2254 if (offset >= 512)
2256 offset = cfun->machine->frame.hard_fp_offset;
2257 if (offset >= 512)
2258 offset = cfun->machine->frame.saved_regs_size;
2260 frame_size -= (offset + crtl->outgoing_args_size);
2261 fp_offset = 0;
2262 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2264 insn = emit_insn (gen_add2_insn
2265 (stack_pointer_rtx,
2266 GEN_INT (crtl->outgoing_args_size)));
2267 RTX_FRAME_RELATED_P (insn) = 1;
2270 else
2271 frame_size = -1;
2273 /* If there were outgoing arguments or we've done dynamic stack
2274 allocation, then restore the stack pointer from the frame
2275 pointer. This is at most one insn and more efficient than using
2276 GCC's internal mechanism. */
2277 if (frame_pointer_needed
2278 && (crtl->outgoing_args_size || cfun->calls_alloca))
2280 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2281 hard_frame_pointer_rtx,
2282 GEN_INT (- fp_offset)));
2283 RTX_FRAME_RELATED_P (insn) = 1;
2284 /* As SP is set to (FP - fp_offset), according to the rules in
2285 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2286 from the value of SP from now on. */
2287 cfa_reg = stack_pointer_rtx;
2290 aarch64_save_or_restore_callee_saves (DImode, fp_offset, R0_REGNUM,
2291 frame_pointer_needed
2292 ? R28_REGNUM : R30_REGNUM, true);
2293 aarch64_save_or_restore_callee_saves (DFmode, fp_offset, V0_REGNUM,
2294 V31_REGNUM, true);
2296 /* Restore the frame pointer and lr if the frame pointer is needed. */
2297 if (offset > 0)
2299 if (frame_pointer_needed)
2301 rtx mem_fp, mem_lr;
2303 if (fp_offset)
2305 mem_fp = gen_frame_mem (DImode,
2306 plus_constant (Pmode,
2307 stack_pointer_rtx,
2308 fp_offset));
2309 mem_lr = gen_frame_mem (DImode,
2310 plus_constant (Pmode,
2311 stack_pointer_rtx,
2312 fp_offset
2313 + UNITS_PER_WORD));
2314 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2315 mem_fp,
2316 gen_rtx_REG (DImode,
2317 LR_REGNUM),
2318 mem_lr));
2320 else
2322 insn = emit_insn (gen_loadwb_pairdi_di
2323 (stack_pointer_rtx,
2324 stack_pointer_rtx,
2325 hard_frame_pointer_rtx,
2326 gen_rtx_REG (DImode, LR_REGNUM),
2327 GEN_INT (offset),
2328 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2329 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2330 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2331 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2332 plus_constant (Pmode, cfa_reg,
2333 offset))));
2336 /* The first part of a frame-related parallel insn
2337 is always assumed to be relevant to the frame
2338 calculations; subsequent parts, are only
2339 frame-related if explicitly marked. */
2340 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2341 RTX_FRAME_RELATED_P (insn) = 1;
2342 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2343 add_reg_note (insn, REG_CFA_RESTORE,
2344 gen_rtx_REG (DImode, LR_REGNUM));
2346 if (fp_offset)
2348 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2349 GEN_INT (offset)));
2350 RTX_FRAME_RELATED_P (insn) = 1;
2353 else
2355 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2356 GEN_INT (offset)));
2357 RTX_FRAME_RELATED_P (insn) = 1;
2361 /* Stack adjustment for exception handler. */
2362 if (crtl->calls_eh_return)
2364 /* We need to unwind the stack by the offset computed by
2365 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2366 based on SP. Ideally we would update the SP and define the
2367 CFA along the lines of:
2369 SP = SP + EH_RETURN_STACKADJ_RTX
2370 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2372 However the dwarf emitter only understands a constant
2373 register offset.
2375 The solution chosen here is to use the otherwise unused IP0
2376 as a temporary register to hold the current SP value. The
2377 CFA is described using IP0 then SP is modified. */
2379 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2381 insn = emit_move_insn (ip0, stack_pointer_rtx);
2382 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2383 RTX_FRAME_RELATED_P (insn) = 1;
2385 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2387 /* Ensure the assignment to IP0 does not get optimized away. */
2388 emit_use (ip0);
2391 if (frame_size > -1)
2393 if (frame_size >= 0x1000000)
2395 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2396 emit_move_insn (op0, GEN_INT (frame_size));
2397 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2398 aarch64_set_frame_expr (gen_rtx_SET
2399 (Pmode, stack_pointer_rtx,
2400 plus_constant (Pmode,
2401 stack_pointer_rtx,
2402 frame_size)));
2404 else if (frame_size > 0)
2406 if ((frame_size & 0xfff) != 0)
2408 insn = emit_insn (gen_add2_insn
2409 (stack_pointer_rtx,
2410 GEN_INT ((frame_size
2411 & (HOST_WIDE_INT) 0xfff))));
2412 RTX_FRAME_RELATED_P (insn) = 1;
2414 if ((frame_size & 0xfff) != frame_size)
2416 insn = emit_insn (gen_add2_insn
2417 (stack_pointer_rtx,
2418 GEN_INT ((frame_size
2419 & ~ (HOST_WIDE_INT) 0xfff))));
2420 RTX_FRAME_RELATED_P (insn) = 1;
2424 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2425 plus_constant (Pmode,
2426 stack_pointer_rtx,
2427 offset)));
2430 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2431 if (!for_sibcall)
2432 emit_jump_insn (ret_rtx);
2435 /* Return the place to copy the exception unwinding return address to.
2436 This will probably be a stack slot, but could (in theory be the
2437 return register). */
2439 aarch64_final_eh_return_addr (void)
2441 HOST_WIDE_INT fp_offset;
2443 aarch64_layout_frame ();
2445 fp_offset = cfun->machine->frame.frame_size
2446 - cfun->machine->frame.hard_fp_offset;
2448 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2449 return gen_rtx_REG (DImode, LR_REGNUM);
2451 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2452 result in a store to save LR introduced by builtin_eh_return () being
2453 incorrectly deleted because the alias is not detected.
2454 So in the calculation of the address to copy the exception unwinding
2455 return address to, we note 2 cases.
2456 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2457 we return a SP-relative location since all the addresses are SP-relative
2458 in this case. This prevents the store from being optimized away.
2459 If the fp_offset is not 0, then the addresses will be FP-relative and
2460 therefore we return a FP-relative location. */
2462 if (frame_pointer_needed)
2464 if (fp_offset)
2465 return gen_frame_mem (DImode,
2466 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2467 else
2468 return gen_frame_mem (DImode,
2469 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2472 /* If FP is not needed, we calculate the location of LR, which would be
2473 at the top of the saved registers block. */
2475 return gen_frame_mem (DImode,
2476 plus_constant (Pmode,
2477 stack_pointer_rtx,
2478 fp_offset
2479 + cfun->machine->frame.saved_regs_size
2480 - 2 * UNITS_PER_WORD));
2483 /* Possibly output code to build up a constant in a register. For
2484 the benefit of the costs infrastructure, returns the number of
2485 instructions which would be emitted. GENERATE inhibits or
2486 enables code generation. */
2488 static int
2489 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2491 int insns = 0;
2493 if (aarch64_bitmask_imm (val, DImode))
2495 if (generate)
2496 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2497 insns = 1;
2499 else
2501 int i;
2502 int ncount = 0;
2503 int zcount = 0;
2504 HOST_WIDE_INT valp = val >> 16;
2505 HOST_WIDE_INT valm;
2506 HOST_WIDE_INT tval;
2508 for (i = 16; i < 64; i += 16)
2510 valm = (valp & 0xffff);
2512 if (valm != 0)
2513 ++ zcount;
2515 if (valm != 0xffff)
2516 ++ ncount;
2518 valp >>= 16;
2521 /* zcount contains the number of additional MOVK instructions
2522 required if the constant is built up with an initial MOVZ instruction,
2523 while ncount is the number of MOVK instructions required if starting
2524 with a MOVN instruction. Choose the sequence that yields the fewest
2525 number of instructions, preferring MOVZ instructions when they are both
2526 the same. */
2527 if (ncount < zcount)
2529 if (generate)
2530 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2531 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2532 tval = 0xffff;
2533 insns++;
2535 else
2537 if (generate)
2538 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2539 GEN_INT (val & 0xffff));
2540 tval = 0;
2541 insns++;
2544 val >>= 16;
2546 for (i = 16; i < 64; i += 16)
2548 if ((val & 0xffff) != tval)
2550 if (generate)
2551 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2552 GEN_INT (i),
2553 GEN_INT (val & 0xffff)));
2554 insns++;
2556 val >>= 16;
2559 return insns;
2562 static void
2563 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2565 HOST_WIDE_INT mdelta = delta;
2566 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2567 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2569 if (mdelta < 0)
2570 mdelta = -mdelta;
2572 if (mdelta >= 4096 * 4096)
2574 (void) aarch64_build_constant (scratchreg, delta, true);
2575 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2577 else if (mdelta > 0)
2579 if (mdelta >= 4096)
2581 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2582 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2583 if (delta < 0)
2584 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2585 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2586 else
2587 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2588 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2590 if (mdelta % 4096 != 0)
2592 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2593 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2594 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2599 /* Output code to add DELTA to the first argument, and then jump
2600 to FUNCTION. Used for C++ multiple inheritance. */
2601 static void
2602 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2603 HOST_WIDE_INT delta,
2604 HOST_WIDE_INT vcall_offset,
2605 tree function)
2607 /* The this pointer is always in x0. Note that this differs from
2608 Arm where the this pointer maybe bumped to r1 if r0 is required
2609 to return a pointer to an aggregate. On AArch64 a result value
2610 pointer will be in x8. */
2611 int this_regno = R0_REGNUM;
2612 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2614 reload_completed = 1;
2615 emit_note (NOTE_INSN_PROLOGUE_END);
2617 if (vcall_offset == 0)
2618 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2619 else
2621 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2623 this_rtx = gen_rtx_REG (Pmode, this_regno);
2624 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2625 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2627 addr = this_rtx;
2628 if (delta != 0)
2630 if (delta >= -256 && delta < 256)
2631 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2632 plus_constant (Pmode, this_rtx, delta));
2633 else
2634 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2637 if (Pmode == ptr_mode)
2638 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2639 else
2640 aarch64_emit_move (temp0,
2641 gen_rtx_ZERO_EXTEND (Pmode,
2642 gen_rtx_MEM (ptr_mode, addr)));
2644 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2645 addr = plus_constant (Pmode, temp0, vcall_offset);
2646 else
2648 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2649 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2652 if (Pmode == ptr_mode)
2653 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2654 else
2655 aarch64_emit_move (temp1,
2656 gen_rtx_SIGN_EXTEND (Pmode,
2657 gen_rtx_MEM (ptr_mode, addr)));
2659 emit_insn (gen_add2_insn (this_rtx, temp1));
2662 /* Generate a tail call to the target function. */
2663 if (!TREE_USED (function))
2665 assemble_external (function);
2666 TREE_USED (function) = 1;
2668 funexp = XEXP (DECL_RTL (function), 0);
2669 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2670 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2671 SIBLING_CALL_P (insn) = 1;
2673 insn = get_insns ();
2674 shorten_branches (insn);
2675 final_start_function (insn, file, 1);
2676 final (insn, file, 1);
2677 final_end_function ();
2679 /* Stop pretending to be a post-reload pass. */
2680 reload_completed = 0;
2683 static int
2684 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2686 if (GET_CODE (*x) == SYMBOL_REF)
2687 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2689 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2690 TLS offsets, not real symbol references. */
2691 if (GET_CODE (*x) == UNSPEC
2692 && XINT (*x, 1) == UNSPEC_TLS)
2693 return -1;
2695 return 0;
2698 static bool
2699 aarch64_tls_referenced_p (rtx x)
2701 if (!TARGET_HAVE_TLS)
2702 return false;
2704 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2708 static int
2709 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2711 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2712 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2714 if (*imm1 < *imm2)
2715 return -1;
2716 if (*imm1 > *imm2)
2717 return +1;
2718 return 0;
2722 static void
2723 aarch64_build_bitmask_table (void)
2725 unsigned HOST_WIDE_INT mask, imm;
2726 unsigned int log_e, e, s, r;
2727 unsigned int nimms = 0;
2729 for (log_e = 1; log_e <= 6; log_e++)
2731 e = 1 << log_e;
2732 if (e == 64)
2733 mask = ~(HOST_WIDE_INT) 0;
2734 else
2735 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2736 for (s = 1; s < e; s++)
2738 for (r = 0; r < e; r++)
2740 /* set s consecutive bits to 1 (s < 64) */
2741 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2742 /* rotate right by r */
2743 if (r != 0)
2744 imm = ((imm >> r) | (imm << (e - r))) & mask;
2745 /* replicate the constant depending on SIMD size */
2746 switch (log_e) {
2747 case 1: imm |= (imm << 2);
2748 case 2: imm |= (imm << 4);
2749 case 3: imm |= (imm << 8);
2750 case 4: imm |= (imm << 16);
2751 case 5: imm |= (imm << 32);
2752 case 6:
2753 break;
2754 default:
2755 gcc_unreachable ();
2757 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2758 aarch64_bitmasks[nimms++] = imm;
2763 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2764 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2765 aarch64_bitmasks_cmp);
2769 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2770 a left shift of 0 or 12 bits. */
2771 bool
2772 aarch64_uimm12_shift (HOST_WIDE_INT val)
2774 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2775 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2780 /* Return true if val is an immediate that can be loaded into a
2781 register by a MOVZ instruction. */
2782 static bool
2783 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2785 if (GET_MODE_SIZE (mode) > 4)
2787 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2788 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2789 return 1;
2791 else
2793 /* Ignore sign extension. */
2794 val &= (HOST_WIDE_INT) 0xffffffff;
2796 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2797 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2801 /* Return true if val is a valid bitmask immediate. */
2802 bool
2803 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2805 if (GET_MODE_SIZE (mode) < 8)
2807 /* Replicate bit pattern. */
2808 val &= (HOST_WIDE_INT) 0xffffffff;
2809 val |= val << 32;
2811 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2812 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2816 /* Return true if val is an immediate that can be loaded into a
2817 register in a single instruction. */
2818 bool
2819 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2821 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2822 return 1;
2823 return aarch64_bitmask_imm (val, mode);
2826 static bool
2827 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2829 rtx base, offset;
2831 if (GET_CODE (x) == HIGH)
2832 return true;
2834 split_const (x, &base, &offset);
2835 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2837 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2838 != SYMBOL_FORCE_TO_MEM)
2839 return true;
2840 else
2841 /* Avoid generating a 64-bit relocation in ILP32; leave
2842 to aarch64_expand_mov_immediate to handle it properly. */
2843 return mode != ptr_mode;
2846 return aarch64_tls_referenced_p (x);
2849 /* Return true if register REGNO is a valid index register.
2850 STRICT_P is true if REG_OK_STRICT is in effect. */
2852 bool
2853 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2855 if (!HARD_REGISTER_NUM_P (regno))
2857 if (!strict_p)
2858 return true;
2860 if (!reg_renumber)
2861 return false;
2863 regno = reg_renumber[regno];
2865 return GP_REGNUM_P (regno);
2868 /* Return true if register REGNO is a valid base register for mode MODE.
2869 STRICT_P is true if REG_OK_STRICT is in effect. */
2871 bool
2872 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2874 if (!HARD_REGISTER_NUM_P (regno))
2876 if (!strict_p)
2877 return true;
2879 if (!reg_renumber)
2880 return false;
2882 regno = reg_renumber[regno];
2885 /* The fake registers will be eliminated to either the stack or
2886 hard frame pointer, both of which are usually valid base registers.
2887 Reload deals with the cases where the eliminated form isn't valid. */
2888 return (GP_REGNUM_P (regno)
2889 || regno == SP_REGNUM
2890 || regno == FRAME_POINTER_REGNUM
2891 || regno == ARG_POINTER_REGNUM);
2894 /* Return true if X is a valid base register for mode MODE.
2895 STRICT_P is true if REG_OK_STRICT is in effect. */
2897 static bool
2898 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2900 if (!strict_p && GET_CODE (x) == SUBREG)
2901 x = SUBREG_REG (x);
2903 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2906 /* Return true if address offset is a valid index. If it is, fill in INFO
2907 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2909 static bool
2910 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2911 enum machine_mode mode, bool strict_p)
2913 enum aarch64_address_type type;
2914 rtx index;
2915 int shift;
2917 /* (reg:P) */
2918 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2919 && GET_MODE (x) == Pmode)
2921 type = ADDRESS_REG_REG;
2922 index = x;
2923 shift = 0;
2925 /* (sign_extend:DI (reg:SI)) */
2926 else if ((GET_CODE (x) == SIGN_EXTEND
2927 || GET_CODE (x) == ZERO_EXTEND)
2928 && GET_MODE (x) == DImode
2929 && GET_MODE (XEXP (x, 0)) == SImode)
2931 type = (GET_CODE (x) == SIGN_EXTEND)
2932 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2933 index = XEXP (x, 0);
2934 shift = 0;
2936 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2937 else if (GET_CODE (x) == MULT
2938 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2939 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2940 && GET_MODE (XEXP (x, 0)) == DImode
2941 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2942 && CONST_INT_P (XEXP (x, 1)))
2944 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2945 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2946 index = XEXP (XEXP (x, 0), 0);
2947 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2949 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2950 else if (GET_CODE (x) == ASHIFT
2951 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2952 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2953 && GET_MODE (XEXP (x, 0)) == DImode
2954 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2955 && CONST_INT_P (XEXP (x, 1)))
2957 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2958 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2959 index = XEXP (XEXP (x, 0), 0);
2960 shift = INTVAL (XEXP (x, 1));
2962 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2963 else if ((GET_CODE (x) == SIGN_EXTRACT
2964 || GET_CODE (x) == ZERO_EXTRACT)
2965 && GET_MODE (x) == DImode
2966 && GET_CODE (XEXP (x, 0)) == MULT
2967 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2968 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2970 type = (GET_CODE (x) == SIGN_EXTRACT)
2971 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2972 index = XEXP (XEXP (x, 0), 0);
2973 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2974 if (INTVAL (XEXP (x, 1)) != 32 + shift
2975 || INTVAL (XEXP (x, 2)) != 0)
2976 shift = -1;
2978 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2979 (const_int 0xffffffff<<shift)) */
2980 else if (GET_CODE (x) == AND
2981 && GET_MODE (x) == DImode
2982 && GET_CODE (XEXP (x, 0)) == MULT
2983 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2984 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2985 && CONST_INT_P (XEXP (x, 1)))
2987 type = ADDRESS_REG_UXTW;
2988 index = XEXP (XEXP (x, 0), 0);
2989 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2990 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2991 shift = -1;
2993 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2994 else if ((GET_CODE (x) == SIGN_EXTRACT
2995 || GET_CODE (x) == ZERO_EXTRACT)
2996 && GET_MODE (x) == DImode
2997 && GET_CODE (XEXP (x, 0)) == ASHIFT
2998 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2999 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3001 type = (GET_CODE (x) == SIGN_EXTRACT)
3002 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3003 index = XEXP (XEXP (x, 0), 0);
3004 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3005 if (INTVAL (XEXP (x, 1)) != 32 + shift
3006 || INTVAL (XEXP (x, 2)) != 0)
3007 shift = -1;
3009 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3010 (const_int 0xffffffff<<shift)) */
3011 else if (GET_CODE (x) == AND
3012 && GET_MODE (x) == DImode
3013 && GET_CODE (XEXP (x, 0)) == ASHIFT
3014 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3015 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3016 && CONST_INT_P (XEXP (x, 1)))
3018 type = ADDRESS_REG_UXTW;
3019 index = XEXP (XEXP (x, 0), 0);
3020 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3021 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3022 shift = -1;
3024 /* (mult:P (reg:P) (const_int scale)) */
3025 else if (GET_CODE (x) == MULT
3026 && GET_MODE (x) == Pmode
3027 && GET_MODE (XEXP (x, 0)) == Pmode
3028 && CONST_INT_P (XEXP (x, 1)))
3030 type = ADDRESS_REG_REG;
3031 index = XEXP (x, 0);
3032 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3034 /* (ashift:P (reg:P) (const_int shift)) */
3035 else if (GET_CODE (x) == ASHIFT
3036 && GET_MODE (x) == Pmode
3037 && GET_MODE (XEXP (x, 0)) == Pmode
3038 && CONST_INT_P (XEXP (x, 1)))
3040 type = ADDRESS_REG_REG;
3041 index = XEXP (x, 0);
3042 shift = INTVAL (XEXP (x, 1));
3044 else
3045 return false;
3047 if (GET_CODE (index) == SUBREG)
3048 index = SUBREG_REG (index);
3050 if ((shift == 0 ||
3051 (shift > 0 && shift <= 3
3052 && (1 << shift) == GET_MODE_SIZE (mode)))
3053 && REG_P (index)
3054 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3056 info->type = type;
3057 info->offset = index;
3058 info->shift = shift;
3059 return true;
3062 return false;
3065 static inline bool
3066 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3068 return (offset >= -64 * GET_MODE_SIZE (mode)
3069 && offset < 64 * GET_MODE_SIZE (mode)
3070 && offset % GET_MODE_SIZE (mode) == 0);
3073 static inline bool
3074 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3075 HOST_WIDE_INT offset)
3077 return offset >= -256 && offset < 256;
3080 static inline bool
3081 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3083 return (offset >= 0
3084 && offset < 4096 * GET_MODE_SIZE (mode)
3085 && offset % GET_MODE_SIZE (mode) == 0);
3088 /* Return true if X is a valid address for machine mode MODE. If it is,
3089 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3090 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3092 static bool
3093 aarch64_classify_address (struct aarch64_address_info *info,
3094 rtx x, enum machine_mode mode,
3095 RTX_CODE outer_code, bool strict_p)
3097 enum rtx_code code = GET_CODE (x);
3098 rtx op0, op1;
3099 bool allow_reg_index_p =
3100 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3101 || aarch64_vector_mode_supported_p (mode));
3102 /* Don't support anything other than POST_INC or REG addressing for
3103 AdvSIMD. */
3104 if (aarch64_vect_struct_mode_p (mode)
3105 && (code != POST_INC && code != REG))
3106 return false;
3108 switch (code)
3110 case REG:
3111 case SUBREG:
3112 info->type = ADDRESS_REG_IMM;
3113 info->base = x;
3114 info->offset = const0_rtx;
3115 return aarch64_base_register_rtx_p (x, strict_p);
3117 case PLUS:
3118 op0 = XEXP (x, 0);
3119 op1 = XEXP (x, 1);
3120 if (GET_MODE_SIZE (mode) != 0
3121 && CONST_INT_P (op1)
3122 && aarch64_base_register_rtx_p (op0, strict_p))
3124 HOST_WIDE_INT offset = INTVAL (op1);
3126 info->type = ADDRESS_REG_IMM;
3127 info->base = op0;
3128 info->offset = op1;
3130 /* TImode and TFmode values are allowed in both pairs of X
3131 registers and individual Q registers. The available
3132 address modes are:
3133 X,X: 7-bit signed scaled offset
3134 Q: 9-bit signed offset
3135 We conservatively require an offset representable in either mode.
3137 if (mode == TImode || mode == TFmode)
3138 return (offset_7bit_signed_scaled_p (mode, offset)
3139 && offset_9bit_signed_unscaled_p (mode, offset));
3141 if (outer_code == PARALLEL)
3142 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3143 && offset_7bit_signed_scaled_p (mode, offset));
3144 else
3145 return (offset_9bit_signed_unscaled_p (mode, offset)
3146 || offset_12bit_unsigned_scaled_p (mode, offset));
3149 if (allow_reg_index_p)
3151 /* Look for base + (scaled/extended) index register. */
3152 if (aarch64_base_register_rtx_p (op0, strict_p)
3153 && aarch64_classify_index (info, op1, mode, strict_p))
3155 info->base = op0;
3156 return true;
3158 if (aarch64_base_register_rtx_p (op1, strict_p)
3159 && aarch64_classify_index (info, op0, mode, strict_p))
3161 info->base = op1;
3162 return true;
3166 return false;
3168 case POST_INC:
3169 case POST_DEC:
3170 case PRE_INC:
3171 case PRE_DEC:
3172 info->type = ADDRESS_REG_WB;
3173 info->base = XEXP (x, 0);
3174 info->offset = NULL_RTX;
3175 return aarch64_base_register_rtx_p (info->base, strict_p);
3177 case POST_MODIFY:
3178 case PRE_MODIFY:
3179 info->type = ADDRESS_REG_WB;
3180 info->base = XEXP (x, 0);
3181 if (GET_CODE (XEXP (x, 1)) == PLUS
3182 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3183 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3184 && aarch64_base_register_rtx_p (info->base, strict_p))
3186 HOST_WIDE_INT offset;
3187 info->offset = XEXP (XEXP (x, 1), 1);
3188 offset = INTVAL (info->offset);
3190 /* TImode and TFmode values are allowed in both pairs of X
3191 registers and individual Q registers. The available
3192 address modes are:
3193 X,X: 7-bit signed scaled offset
3194 Q: 9-bit signed offset
3195 We conservatively require an offset representable in either mode.
3197 if (mode == TImode || mode == TFmode)
3198 return (offset_7bit_signed_scaled_p (mode, offset)
3199 && offset_9bit_signed_unscaled_p (mode, offset));
3201 if (outer_code == PARALLEL)
3202 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3203 && offset_7bit_signed_scaled_p (mode, offset));
3204 else
3205 return offset_9bit_signed_unscaled_p (mode, offset);
3207 return false;
3209 case CONST:
3210 case SYMBOL_REF:
3211 case LABEL_REF:
3212 /* load literal: pc-relative constant pool entry. Only supported
3213 for SI mode or larger. */
3214 info->type = ADDRESS_SYMBOLIC;
3215 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3217 rtx sym, addend;
3219 split_const (x, &sym, &addend);
3220 return (GET_CODE (sym) == LABEL_REF
3221 || (GET_CODE (sym) == SYMBOL_REF
3222 && CONSTANT_POOL_ADDRESS_P (sym)));
3224 return false;
3226 case LO_SUM:
3227 info->type = ADDRESS_LO_SUM;
3228 info->base = XEXP (x, 0);
3229 info->offset = XEXP (x, 1);
3230 if (allow_reg_index_p
3231 && aarch64_base_register_rtx_p (info->base, strict_p))
3233 rtx sym, offs;
3234 split_const (info->offset, &sym, &offs);
3235 if (GET_CODE (sym) == SYMBOL_REF
3236 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3237 == SYMBOL_SMALL_ABSOLUTE))
3239 /* The symbol and offset must be aligned to the access size. */
3240 unsigned int align;
3241 unsigned int ref_size;
3243 if (CONSTANT_POOL_ADDRESS_P (sym))
3244 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3245 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3247 tree exp = SYMBOL_REF_DECL (sym);
3248 align = TYPE_ALIGN (TREE_TYPE (exp));
3249 align = CONSTANT_ALIGNMENT (exp, align);
3251 else if (SYMBOL_REF_DECL (sym))
3252 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3253 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3254 && SYMBOL_REF_BLOCK (sym) != NULL)
3255 align = SYMBOL_REF_BLOCK (sym)->alignment;
3256 else
3257 align = BITS_PER_UNIT;
3259 ref_size = GET_MODE_SIZE (mode);
3260 if (ref_size == 0)
3261 ref_size = GET_MODE_SIZE (DImode);
3263 return ((INTVAL (offs) & (ref_size - 1)) == 0
3264 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3267 return false;
3269 default:
3270 return false;
3274 bool
3275 aarch64_symbolic_address_p (rtx x)
3277 rtx offset;
3279 split_const (x, &x, &offset);
3280 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3283 /* Classify the base of symbolic expression X, given that X appears in
3284 context CONTEXT. */
3286 enum aarch64_symbol_type
3287 aarch64_classify_symbolic_expression (rtx x,
3288 enum aarch64_symbol_context context)
3290 rtx offset;
3292 split_const (x, &x, &offset);
3293 return aarch64_classify_symbol (x, context);
3297 /* Return TRUE if X is a legitimate address for accessing memory in
3298 mode MODE. */
3299 static bool
3300 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3302 struct aarch64_address_info addr;
3304 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3307 /* Return TRUE if X is a legitimate address for accessing memory in
3308 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3309 pair operation. */
3310 bool
3311 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3312 RTX_CODE outer_code, bool strict_p)
3314 struct aarch64_address_info addr;
3316 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3319 /* Return TRUE if rtx X is immediate constant 0.0 */
3320 bool
3321 aarch64_float_const_zero_rtx_p (rtx x)
3323 REAL_VALUE_TYPE r;
3325 if (GET_MODE (x) == VOIDmode)
3326 return false;
3328 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3329 if (REAL_VALUE_MINUS_ZERO (r))
3330 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3331 return REAL_VALUES_EQUAL (r, dconst0);
3334 /* Return the fixed registers used for condition codes. */
3336 static bool
3337 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3339 *p1 = CC_REGNUM;
3340 *p2 = INVALID_REGNUM;
3341 return true;
3344 /* Emit call insn with PAT and do aarch64-specific handling. */
3346 void
3347 aarch64_emit_call_insn (rtx pat)
3349 rtx insn = emit_call_insn (pat);
3351 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3352 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3353 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3356 enum machine_mode
3357 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3359 /* All floating point compares return CCFP if it is an equality
3360 comparison, and CCFPE otherwise. */
3361 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3363 switch (code)
3365 case EQ:
3366 case NE:
3367 case UNORDERED:
3368 case ORDERED:
3369 case UNLT:
3370 case UNLE:
3371 case UNGT:
3372 case UNGE:
3373 case UNEQ:
3374 case LTGT:
3375 return CCFPmode;
3377 case LT:
3378 case LE:
3379 case GT:
3380 case GE:
3381 return CCFPEmode;
3383 default:
3384 gcc_unreachable ();
3388 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3389 && y == const0_rtx
3390 && (code == EQ || code == NE || code == LT || code == GE)
3391 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3392 || GET_CODE (x) == NEG))
3393 return CC_NZmode;
3395 /* A compare with a shifted operand. Because of canonicalization,
3396 the comparison will have to be swapped when we emit the assembly
3397 code. */
3398 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3399 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3400 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3401 || GET_CODE (x) == LSHIFTRT
3402 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3403 return CC_SWPmode;
3405 /* Similarly for a negated operand, but we can only do this for
3406 equalities. */
3407 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3408 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3409 && (code == EQ || code == NE)
3410 && GET_CODE (x) == NEG)
3411 return CC_Zmode;
3413 /* A compare of a mode narrower than SI mode against zero can be done
3414 by extending the value in the comparison. */
3415 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3416 && y == const0_rtx)
3417 /* Only use sign-extension if we really need it. */
3418 return ((code == GT || code == GE || code == LE || code == LT)
3419 ? CC_SESWPmode : CC_ZESWPmode);
3421 /* For everything else, return CCmode. */
3422 return CCmode;
3425 static unsigned
3426 aarch64_get_condition_code (rtx x)
3428 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3429 enum rtx_code comp_code = GET_CODE (x);
3431 if (GET_MODE_CLASS (mode) != MODE_CC)
3432 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3434 switch (mode)
3436 case CCFPmode:
3437 case CCFPEmode:
3438 switch (comp_code)
3440 case GE: return AARCH64_GE;
3441 case GT: return AARCH64_GT;
3442 case LE: return AARCH64_LS;
3443 case LT: return AARCH64_MI;
3444 case NE: return AARCH64_NE;
3445 case EQ: return AARCH64_EQ;
3446 case ORDERED: return AARCH64_VC;
3447 case UNORDERED: return AARCH64_VS;
3448 case UNLT: return AARCH64_LT;
3449 case UNLE: return AARCH64_LE;
3450 case UNGT: return AARCH64_HI;
3451 case UNGE: return AARCH64_PL;
3452 default: gcc_unreachable ();
3454 break;
3456 case CCmode:
3457 switch (comp_code)
3459 case NE: return AARCH64_NE;
3460 case EQ: return AARCH64_EQ;
3461 case GE: return AARCH64_GE;
3462 case GT: return AARCH64_GT;
3463 case LE: return AARCH64_LE;
3464 case LT: return AARCH64_LT;
3465 case GEU: return AARCH64_CS;
3466 case GTU: return AARCH64_HI;
3467 case LEU: return AARCH64_LS;
3468 case LTU: return AARCH64_CC;
3469 default: gcc_unreachable ();
3471 break;
3473 case CC_SWPmode:
3474 case CC_ZESWPmode:
3475 case CC_SESWPmode:
3476 switch (comp_code)
3478 case NE: return AARCH64_NE;
3479 case EQ: return AARCH64_EQ;
3480 case GE: return AARCH64_LE;
3481 case GT: return AARCH64_LT;
3482 case LE: return AARCH64_GE;
3483 case LT: return AARCH64_GT;
3484 case GEU: return AARCH64_LS;
3485 case GTU: return AARCH64_CC;
3486 case LEU: return AARCH64_CS;
3487 case LTU: return AARCH64_HI;
3488 default: gcc_unreachable ();
3490 break;
3492 case CC_NZmode:
3493 switch (comp_code)
3495 case NE: return AARCH64_NE;
3496 case EQ: return AARCH64_EQ;
3497 case GE: return AARCH64_PL;
3498 case LT: return AARCH64_MI;
3499 default: gcc_unreachable ();
3501 break;
3503 case CC_Zmode:
3504 switch (comp_code)
3506 case NE: return AARCH64_NE;
3507 case EQ: return AARCH64_EQ;
3508 default: gcc_unreachable ();
3510 break;
3512 default:
3513 gcc_unreachable ();
3514 break;
3518 static unsigned
3519 bit_count (unsigned HOST_WIDE_INT value)
3521 unsigned count = 0;
3523 while (value)
3525 count++;
3526 value &= value - 1;
3529 return count;
3532 void
3533 aarch64_print_operand (FILE *f, rtx x, char code)
3535 switch (code)
3537 /* An integer or symbol address without a preceding # sign. */
3538 case 'c':
3539 switch (GET_CODE (x))
3541 case CONST_INT:
3542 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3543 break;
3545 case SYMBOL_REF:
3546 output_addr_const (f, x);
3547 break;
3549 case CONST:
3550 if (GET_CODE (XEXP (x, 0)) == PLUS
3551 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3553 output_addr_const (f, x);
3554 break;
3556 /* Fall through. */
3558 default:
3559 output_operand_lossage ("Unsupported operand for code '%c'", code);
3561 break;
3563 case 'e':
3564 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3566 int n;
3568 if (GET_CODE (x) != CONST_INT
3569 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3571 output_operand_lossage ("invalid operand for '%%%c'", code);
3572 return;
3575 switch (n)
3577 case 3:
3578 fputc ('b', f);
3579 break;
3580 case 4:
3581 fputc ('h', f);
3582 break;
3583 case 5:
3584 fputc ('w', f);
3585 break;
3586 default:
3587 output_operand_lossage ("invalid operand for '%%%c'", code);
3588 return;
3591 break;
3593 case 'p':
3595 int n;
3597 /* Print N such that 2^N == X. */
3598 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3600 output_operand_lossage ("invalid operand for '%%%c'", code);
3601 return;
3604 asm_fprintf (f, "%d", n);
3606 break;
3608 case 'P':
3609 /* Print the number of non-zero bits in X (a const_int). */
3610 if (GET_CODE (x) != CONST_INT)
3612 output_operand_lossage ("invalid operand for '%%%c'", code);
3613 return;
3616 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3617 break;
3619 case 'H':
3620 /* Print the higher numbered register of a pair (TImode) of regs. */
3621 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3623 output_operand_lossage ("invalid operand for '%%%c'", code);
3624 return;
3627 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3628 break;
3630 case 'm':
3631 /* Print a condition (eq, ne, etc). */
3633 /* CONST_TRUE_RTX means always -- that's the default. */
3634 if (x == const_true_rtx)
3635 return;
3637 if (!COMPARISON_P (x))
3639 output_operand_lossage ("invalid operand for '%%%c'", code);
3640 return;
3643 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3644 break;
3646 case 'M':
3647 /* Print the inverse of a condition (eq <-> ne, etc). */
3649 /* CONST_TRUE_RTX means never -- that's the default. */
3650 if (x == const_true_rtx)
3652 fputs ("nv", f);
3653 return;
3656 if (!COMPARISON_P (x))
3658 output_operand_lossage ("invalid operand for '%%%c'", code);
3659 return;
3662 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3663 (aarch64_get_condition_code (x))], f);
3664 break;
3666 case 'b':
3667 case 'h':
3668 case 's':
3669 case 'd':
3670 case 'q':
3671 /* Print a scalar FP/SIMD register name. */
3672 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3674 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3675 return;
3677 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3678 break;
3680 case 'S':
3681 case 'T':
3682 case 'U':
3683 case 'V':
3684 /* Print the first FP/SIMD register name in a list. */
3685 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3687 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3688 return;
3690 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3691 break;
3693 case 'X':
3694 /* Print bottom 16 bits of integer constant in hex. */
3695 if (GET_CODE (x) != CONST_INT)
3697 output_operand_lossage ("invalid operand for '%%%c'", code);
3698 return;
3700 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3701 break;
3703 case 'w':
3704 case 'x':
3705 /* Print a general register name or the zero register (32-bit or
3706 64-bit). */
3707 if (x == const0_rtx
3708 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3710 asm_fprintf (f, "%czr", code);
3711 break;
3714 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3716 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3717 break;
3720 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3722 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3723 break;
3726 /* Fall through */
3728 case 0:
3729 /* Print a normal operand, if it's a general register, then we
3730 assume DImode. */
3731 if (x == NULL)
3733 output_operand_lossage ("missing operand");
3734 return;
3737 switch (GET_CODE (x))
3739 case REG:
3740 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3741 break;
3743 case MEM:
3744 aarch64_memory_reference_mode = GET_MODE (x);
3745 output_address (XEXP (x, 0));
3746 break;
3748 case LABEL_REF:
3749 case SYMBOL_REF:
3750 output_addr_const (asm_out_file, x);
3751 break;
3753 case CONST_INT:
3754 asm_fprintf (f, "%wd", INTVAL (x));
3755 break;
3757 case CONST_VECTOR:
3758 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3760 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3761 HOST_WIDE_INT_MIN,
3762 HOST_WIDE_INT_MAX));
3763 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3765 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3767 fputc ('0', f);
3769 else
3770 gcc_unreachable ();
3771 break;
3773 case CONST_DOUBLE:
3774 /* CONST_DOUBLE can represent a double-width integer.
3775 In this case, the mode of x is VOIDmode. */
3776 if (GET_MODE (x) == VOIDmode)
3777 ; /* Do Nothing. */
3778 else if (aarch64_float_const_zero_rtx_p (x))
3780 fputc ('0', f);
3781 break;
3783 else if (aarch64_float_const_representable_p (x))
3785 #define buf_size 20
3786 char float_buf[buf_size] = {'\0'};
3787 REAL_VALUE_TYPE r;
3788 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3789 real_to_decimal_for_mode (float_buf, &r,
3790 buf_size, buf_size,
3791 1, GET_MODE (x));
3792 asm_fprintf (asm_out_file, "%s", float_buf);
3793 break;
3794 #undef buf_size
3796 output_operand_lossage ("invalid constant");
3797 return;
3798 default:
3799 output_operand_lossage ("invalid operand");
3800 return;
3802 break;
3804 case 'A':
3805 if (GET_CODE (x) == HIGH)
3806 x = XEXP (x, 0);
3808 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3810 case SYMBOL_SMALL_GOT:
3811 asm_fprintf (asm_out_file, ":got:");
3812 break;
3814 case SYMBOL_SMALL_TLSGD:
3815 asm_fprintf (asm_out_file, ":tlsgd:");
3816 break;
3818 case SYMBOL_SMALL_TLSDESC:
3819 asm_fprintf (asm_out_file, ":tlsdesc:");
3820 break;
3822 case SYMBOL_SMALL_GOTTPREL:
3823 asm_fprintf (asm_out_file, ":gottprel:");
3824 break;
3826 case SYMBOL_SMALL_TPREL:
3827 asm_fprintf (asm_out_file, ":tprel:");
3828 break;
3830 case SYMBOL_TINY_GOT:
3831 gcc_unreachable ();
3832 break;
3834 default:
3835 break;
3837 output_addr_const (asm_out_file, x);
3838 break;
3840 case 'L':
3841 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3843 case SYMBOL_SMALL_GOT:
3844 asm_fprintf (asm_out_file, ":lo12:");
3845 break;
3847 case SYMBOL_SMALL_TLSGD:
3848 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3849 break;
3851 case SYMBOL_SMALL_TLSDESC:
3852 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3853 break;
3855 case SYMBOL_SMALL_GOTTPREL:
3856 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3857 break;
3859 case SYMBOL_SMALL_TPREL:
3860 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3861 break;
3863 case SYMBOL_TINY_GOT:
3864 asm_fprintf (asm_out_file, ":got:");
3865 break;
3867 default:
3868 break;
3870 output_addr_const (asm_out_file, x);
3871 break;
3873 case 'G':
3875 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3877 case SYMBOL_SMALL_TPREL:
3878 asm_fprintf (asm_out_file, ":tprel_hi12:");
3879 break;
3880 default:
3881 break;
3883 output_addr_const (asm_out_file, x);
3884 break;
3886 default:
3887 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3888 return;
3892 void
3893 aarch64_print_operand_address (FILE *f, rtx x)
3895 struct aarch64_address_info addr;
3897 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3898 MEM, true))
3899 switch (addr.type)
3901 case ADDRESS_REG_IMM:
3902 if (addr.offset == const0_rtx)
3903 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3904 else
3905 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
3906 INTVAL (addr.offset));
3907 return;
3909 case ADDRESS_REG_REG:
3910 if (addr.shift == 0)
3911 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
3912 reg_names [REGNO (addr.offset)]);
3913 else
3914 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
3915 reg_names [REGNO (addr.offset)], addr.shift);
3916 return;
3918 case ADDRESS_REG_UXTW:
3919 if (addr.shift == 0)
3920 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
3921 REGNO (addr.offset) - R0_REGNUM);
3922 else
3923 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
3924 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3925 return;
3927 case ADDRESS_REG_SXTW:
3928 if (addr.shift == 0)
3929 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
3930 REGNO (addr.offset) - R0_REGNUM);
3931 else
3932 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
3933 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3934 return;
3936 case ADDRESS_REG_WB:
3937 switch (GET_CODE (x))
3939 case PRE_INC:
3940 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3941 GET_MODE_SIZE (aarch64_memory_reference_mode));
3942 return;
3943 case POST_INC:
3944 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
3945 GET_MODE_SIZE (aarch64_memory_reference_mode));
3946 return;
3947 case PRE_DEC:
3948 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
3949 GET_MODE_SIZE (aarch64_memory_reference_mode));
3950 return;
3951 case POST_DEC:
3952 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
3953 GET_MODE_SIZE (aarch64_memory_reference_mode));
3954 return;
3955 case PRE_MODIFY:
3956 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
3957 INTVAL (addr.offset));
3958 return;
3959 case POST_MODIFY:
3960 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
3961 INTVAL (addr.offset));
3962 return;
3963 default:
3964 break;
3966 break;
3968 case ADDRESS_LO_SUM:
3969 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
3970 output_addr_const (f, addr.offset);
3971 asm_fprintf (f, "]");
3972 return;
3974 case ADDRESS_SYMBOLIC:
3975 break;
3978 output_addr_const (f, x);
3981 bool
3982 aarch64_label_mentioned_p (rtx x)
3984 const char *fmt;
3985 int i;
3987 if (GET_CODE (x) == LABEL_REF)
3988 return true;
3990 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3991 referencing instruction, but they are constant offsets, not
3992 symbols. */
3993 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3994 return false;
3996 fmt = GET_RTX_FORMAT (GET_CODE (x));
3997 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3999 if (fmt[i] == 'E')
4001 int j;
4003 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4004 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4005 return 1;
4007 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4008 return 1;
4011 return 0;
4014 /* Implement REGNO_REG_CLASS. */
4016 enum reg_class
4017 aarch64_regno_regclass (unsigned regno)
4019 if (GP_REGNUM_P (regno))
4020 return GENERAL_REGS;
4022 if (regno == SP_REGNUM)
4023 return STACK_REG;
4025 if (regno == FRAME_POINTER_REGNUM
4026 || regno == ARG_POINTER_REGNUM)
4027 return POINTER_REGS;
4029 if (FP_REGNUM_P (regno))
4030 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4032 return NO_REGS;
4035 /* Try a machine-dependent way of reloading an illegitimate address
4036 operand. If we find one, push the reload and return the new rtx. */
4039 aarch64_legitimize_reload_address (rtx *x_p,
4040 enum machine_mode mode,
4041 int opnum, int type,
4042 int ind_levels ATTRIBUTE_UNUSED)
4044 rtx x = *x_p;
4046 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4047 if (aarch64_vect_struct_mode_p (mode)
4048 && GET_CODE (x) == PLUS
4049 && REG_P (XEXP (x, 0))
4050 && CONST_INT_P (XEXP (x, 1)))
4052 rtx orig_rtx = x;
4053 x = copy_rtx (x);
4054 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4055 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4056 opnum, (enum reload_type) type);
4057 return x;
4060 /* We must recognize output that we have already generated ourselves. */
4061 if (GET_CODE (x) == PLUS
4062 && GET_CODE (XEXP (x, 0)) == PLUS
4063 && REG_P (XEXP (XEXP (x, 0), 0))
4064 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4065 && CONST_INT_P (XEXP (x, 1)))
4067 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4068 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4069 opnum, (enum reload_type) type);
4070 return x;
4073 /* We wish to handle large displacements off a base register by splitting
4074 the addend across an add and the mem insn. This can cut the number of
4075 extra insns needed from 3 to 1. It is only useful for load/store of a
4076 single register with 12 bit offset field. */
4077 if (GET_CODE (x) == PLUS
4078 && REG_P (XEXP (x, 0))
4079 && CONST_INT_P (XEXP (x, 1))
4080 && HARD_REGISTER_P (XEXP (x, 0))
4081 && mode != TImode
4082 && mode != TFmode
4083 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4085 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4086 HOST_WIDE_INT low = val & 0xfff;
4087 HOST_WIDE_INT high = val - low;
4088 HOST_WIDE_INT offs;
4089 rtx cst;
4090 enum machine_mode xmode = GET_MODE (x);
4092 /* In ILP32, xmode can be either DImode or SImode. */
4093 gcc_assert (xmode == DImode || xmode == SImode);
4095 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4096 BLKmode alignment. */
4097 if (GET_MODE_SIZE (mode) == 0)
4098 return NULL_RTX;
4100 offs = low % GET_MODE_SIZE (mode);
4102 /* Align misaligned offset by adjusting high part to compensate. */
4103 if (offs != 0)
4105 if (aarch64_uimm12_shift (high + offs))
4107 /* Align down. */
4108 low = low - offs;
4109 high = high + offs;
4111 else
4113 /* Align up. */
4114 offs = GET_MODE_SIZE (mode) - offs;
4115 low = low + offs;
4116 high = high + (low & 0x1000) - offs;
4117 low &= 0xfff;
4121 /* Check for overflow. */
4122 if (high + low != val)
4123 return NULL_RTX;
4125 cst = GEN_INT (high);
4126 if (!aarch64_uimm12_shift (high))
4127 cst = force_const_mem (xmode, cst);
4129 /* Reload high part into base reg, leaving the low part
4130 in the mem instruction.
4131 Note that replacing this gen_rtx_PLUS with plus_constant is
4132 wrong in this case because we rely on the
4133 (plus (plus reg c1) c2) structure being preserved so that
4134 XEXP (*p, 0) in push_reload below uses the correct term. */
4135 x = gen_rtx_PLUS (xmode,
4136 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4137 GEN_INT (low));
4139 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4140 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4141 opnum, (enum reload_type) type);
4142 return x;
4145 return NULL_RTX;
4149 static reg_class_t
4150 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4151 reg_class_t rclass,
4152 enum machine_mode mode,
4153 secondary_reload_info *sri)
4155 /* Without the TARGET_SIMD instructions we cannot move a Q register
4156 to a Q register directly. We need a scratch. */
4157 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4158 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4159 && reg_class_subset_p (rclass, FP_REGS))
4161 if (mode == TFmode)
4162 sri->icode = CODE_FOR_aarch64_reload_movtf;
4163 else if (mode == TImode)
4164 sri->icode = CODE_FOR_aarch64_reload_movti;
4165 return NO_REGS;
4168 /* A TFmode or TImode memory access should be handled via an FP_REGS
4169 because AArch64 has richer addressing modes for LDR/STR instructions
4170 than LDP/STP instructions. */
4171 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4172 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4173 return FP_REGS;
4175 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4176 return GENERAL_REGS;
4178 return NO_REGS;
4181 static bool
4182 aarch64_can_eliminate (const int from, const int to)
4184 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4185 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4187 if (frame_pointer_needed)
4189 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4190 return true;
4191 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4192 return false;
4193 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4194 && !cfun->calls_alloca)
4195 return true;
4196 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4197 return true;
4199 return false;
4202 return true;
4205 HOST_WIDE_INT
4206 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4208 aarch64_layout_frame ();
4210 if (to == HARD_FRAME_POINTER_REGNUM)
4212 if (from == ARG_POINTER_REGNUM)
4213 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4215 if (from == FRAME_POINTER_REGNUM)
4216 return (cfun->machine->frame.hard_fp_offset
4217 - cfun->machine->frame.saved_varargs_size);
4220 if (to == STACK_POINTER_REGNUM)
4222 if (from == FRAME_POINTER_REGNUM)
4223 return (cfun->machine->frame.frame_size
4224 - cfun->machine->frame.saved_varargs_size);
4227 return cfun->machine->frame.frame_size;
4230 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4231 previous frame. */
4234 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4236 if (count != 0)
4237 return const0_rtx;
4238 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4242 static void
4243 aarch64_asm_trampoline_template (FILE *f)
4245 if (TARGET_ILP32)
4247 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4248 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4250 else
4252 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4253 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4255 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4256 assemble_aligned_integer (4, const0_rtx);
4257 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4258 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4261 static void
4262 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4264 rtx fnaddr, mem, a_tramp;
4265 const int tramp_code_sz = 16;
4267 /* Don't need to copy the trailing D-words, we fill those in below. */
4268 emit_block_move (m_tramp, assemble_trampoline_template (),
4269 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4270 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4271 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4272 if (GET_MODE (fnaddr) != ptr_mode)
4273 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4274 emit_move_insn (mem, fnaddr);
4276 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4277 emit_move_insn (mem, chain_value);
4279 /* XXX We should really define a "clear_cache" pattern and use
4280 gen_clear_cache(). */
4281 a_tramp = XEXP (m_tramp, 0);
4282 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4283 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4284 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4285 ptr_mode);
4288 static unsigned char
4289 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4291 switch (regclass)
4293 case CALLER_SAVE_REGS:
4294 case POINTER_REGS:
4295 case GENERAL_REGS:
4296 case ALL_REGS:
4297 case FP_REGS:
4298 case FP_LO_REGS:
4299 return
4300 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4301 (GET_MODE_SIZE (mode) + 7) / 8;
4302 case STACK_REG:
4303 return 1;
4305 case NO_REGS:
4306 return 0;
4308 default:
4309 break;
4311 gcc_unreachable ();
4314 static reg_class_t
4315 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4317 if (regclass == POINTER_REGS)
4318 return GENERAL_REGS;
4320 if (regclass == STACK_REG)
4322 if (REG_P(x)
4323 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4324 return regclass;
4326 return NO_REGS;
4329 /* If it's an integer immediate that MOVI can't handle, then
4330 FP_REGS is not an option, so we return NO_REGS instead. */
4331 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4332 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4333 return NO_REGS;
4335 /* Register eliminiation can result in a request for
4336 SP+constant->FP_REGS. We cannot support such operations which
4337 use SP as source and an FP_REG as destination, so reject out
4338 right now. */
4339 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4341 rtx lhs = XEXP (x, 0);
4343 /* Look through a possible SUBREG introduced by ILP32. */
4344 if (GET_CODE (lhs) == SUBREG)
4345 lhs = SUBREG_REG (lhs);
4347 gcc_assert (REG_P (lhs));
4348 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4349 POINTER_REGS));
4350 return NO_REGS;
4353 return regclass;
4356 void
4357 aarch64_asm_output_labelref (FILE* f, const char *name)
4359 asm_fprintf (f, "%U%s", name);
4362 static void
4363 aarch64_elf_asm_constructor (rtx symbol, int priority)
4365 if (priority == DEFAULT_INIT_PRIORITY)
4366 default_ctor_section_asm_out_constructor (symbol, priority);
4367 else
4369 section *s;
4370 char buf[18];
4371 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4372 s = get_section (buf, SECTION_WRITE, NULL);
4373 switch_to_section (s);
4374 assemble_align (POINTER_SIZE);
4375 assemble_aligned_integer (POINTER_BYTES, symbol);
4379 static void
4380 aarch64_elf_asm_destructor (rtx symbol, int priority)
4382 if (priority == DEFAULT_INIT_PRIORITY)
4383 default_dtor_section_asm_out_destructor (symbol, priority);
4384 else
4386 section *s;
4387 char buf[18];
4388 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4389 s = get_section (buf, SECTION_WRITE, NULL);
4390 switch_to_section (s);
4391 assemble_align (POINTER_SIZE);
4392 assemble_aligned_integer (POINTER_BYTES, symbol);
4396 const char*
4397 aarch64_output_casesi (rtx *operands)
4399 char buf[100];
4400 char label[100];
4401 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4402 int index;
4403 static const char *const patterns[4][2] =
4406 "ldrb\t%w3, [%0,%w1,uxtw]",
4407 "add\t%3, %4, %w3, sxtb #2"
4410 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4411 "add\t%3, %4, %w3, sxth #2"
4414 "ldr\t%w3, [%0,%w1,uxtw #2]",
4415 "add\t%3, %4, %w3, sxtw #2"
4417 /* We assume that DImode is only generated when not optimizing and
4418 that we don't really need 64-bit address offsets. That would
4419 imply an object file with 8GB of code in a single function! */
4421 "ldr\t%w3, [%0,%w1,uxtw #2]",
4422 "add\t%3, %4, %w3, sxtw #2"
4426 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4428 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4430 gcc_assert (index >= 0 && index <= 3);
4432 /* Need to implement table size reduction, by chaning the code below. */
4433 output_asm_insn (patterns[index][0], operands);
4434 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4435 snprintf (buf, sizeof (buf),
4436 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4437 output_asm_insn (buf, operands);
4438 output_asm_insn (patterns[index][1], operands);
4439 output_asm_insn ("br\t%3", operands);
4440 assemble_label (asm_out_file, label);
4441 return "";
4445 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4446 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4447 operator. */
4450 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4452 if (shift >= 0 && shift <= 3)
4454 int size;
4455 for (size = 8; size <= 32; size *= 2)
4457 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4458 if (mask == bits << shift)
4459 return size;
4462 return 0;
4465 static bool
4466 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4467 const_rtx x ATTRIBUTE_UNUSED)
4469 /* We can't use blocks for constants when we're using a per-function
4470 constant pool. */
4471 return false;
4474 static section *
4475 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4476 rtx x ATTRIBUTE_UNUSED,
4477 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4479 /* Force all constant pool entries into the current function section. */
4480 return function_section (current_function_decl);
4484 /* Costs. */
4486 /* Helper function for rtx cost calculation. Strip a shift expression
4487 from X. Returns the inner operand if successful, or the original
4488 expression on failure. */
4489 static rtx
4490 aarch64_strip_shift (rtx x)
4492 rtx op = x;
4494 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4495 we can convert both to ROR during final output. */
4496 if ((GET_CODE (op) == ASHIFT
4497 || GET_CODE (op) == ASHIFTRT
4498 || GET_CODE (op) == LSHIFTRT
4499 || GET_CODE (op) == ROTATERT
4500 || GET_CODE (op) == ROTATE)
4501 && CONST_INT_P (XEXP (op, 1)))
4502 return XEXP (op, 0);
4504 if (GET_CODE (op) == MULT
4505 && CONST_INT_P (XEXP (op, 1))
4506 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4507 return XEXP (op, 0);
4509 return x;
4512 /* Helper function for rtx cost calculation. Strip an extend
4513 expression from X. Returns the inner operand if successful, or the
4514 original expression on failure. We deal with a number of possible
4515 canonicalization variations here. */
4516 static rtx
4517 aarch64_strip_extend (rtx x)
4519 rtx op = x;
4521 /* Zero and sign extraction of a widened value. */
4522 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4523 && XEXP (op, 2) == const0_rtx
4524 && GET_CODE (XEXP (op, 0)) == MULT
4525 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4526 XEXP (op, 1)))
4527 return XEXP (XEXP (op, 0), 0);
4529 /* It can also be represented (for zero-extend) as an AND with an
4530 immediate. */
4531 if (GET_CODE (op) == AND
4532 && GET_CODE (XEXP (op, 0)) == MULT
4533 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4534 && CONST_INT_P (XEXP (op, 1))
4535 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4536 INTVAL (XEXP (op, 1))) != 0)
4537 return XEXP (XEXP (op, 0), 0);
4539 /* Now handle extended register, as this may also have an optional
4540 left shift by 1..4. */
4541 if (GET_CODE (op) == ASHIFT
4542 && CONST_INT_P (XEXP (op, 1))
4543 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4544 op = XEXP (op, 0);
4546 if (GET_CODE (op) == ZERO_EXTEND
4547 || GET_CODE (op) == SIGN_EXTEND)
4548 op = XEXP (op, 0);
4550 if (op != x)
4551 return op;
4553 return x;
4556 /* Helper function for rtx cost calculation. Calculate the cost of
4557 a MULT, which may be part of a multiply-accumulate rtx. Return
4558 the calculated cost of the expression, recursing manually in to
4559 operands where needed. */
4561 static int
4562 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4564 rtx op0, op1;
4565 const struct cpu_cost_table *extra_cost
4566 = aarch64_tune_params->insn_extra_cost;
4567 int cost = 0;
4568 bool maybe_fma = (outer == PLUS || outer == MINUS);
4569 enum machine_mode mode = GET_MODE (x);
4571 gcc_checking_assert (code == MULT);
4573 op0 = XEXP (x, 0);
4574 op1 = XEXP (x, 1);
4576 if (VECTOR_MODE_P (mode))
4577 mode = GET_MODE_INNER (mode);
4579 /* Integer multiply/fma. */
4580 if (GET_MODE_CLASS (mode) == MODE_INT)
4582 /* The multiply will be canonicalized as a shift, cost it as such. */
4583 if (CONST_INT_P (op1)
4584 && exact_log2 (INTVAL (op1)) > 0)
4586 if (speed)
4588 if (maybe_fma)
4589 /* ADD (shifted register). */
4590 cost += extra_cost->alu.arith_shift;
4591 else
4592 /* LSL (immediate). */
4593 cost += extra_cost->alu.shift;
4596 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4598 return cost;
4601 /* Integer multiplies or FMAs have zero/sign extending variants. */
4602 if ((GET_CODE (op0) == ZERO_EXTEND
4603 && GET_CODE (op1) == ZERO_EXTEND)
4604 || (GET_CODE (op0) == SIGN_EXTEND
4605 && GET_CODE (op1) == SIGN_EXTEND))
4607 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4608 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4610 if (speed)
4612 if (maybe_fma)
4613 /* MADD/SMADDL/UMADDL. */
4614 cost += extra_cost->mult[0].extend_add;
4615 else
4616 /* MUL/SMULL/UMULL. */
4617 cost += extra_cost->mult[0].extend;
4620 return cost;
4623 /* This is either an integer multiply or an FMA. In both cases
4624 we want to recurse and cost the operands. */
4625 cost += rtx_cost (op0, MULT, 0, speed)
4626 + rtx_cost (op1, MULT, 1, speed);
4628 if (speed)
4630 if (maybe_fma)
4631 /* MADD. */
4632 cost += extra_cost->mult[mode == DImode].add;
4633 else
4634 /* MUL. */
4635 cost += extra_cost->mult[mode == DImode].simple;
4638 return cost;
4640 else
4642 if (speed)
4644 /* Floating-point FMA/FMUL can also support negations of the
4645 operands. */
4646 if (GET_CODE (op0) == NEG)
4647 op0 = XEXP (op0, 0);
4648 if (GET_CODE (op1) == NEG)
4649 op1 = XEXP (op1, 0);
4651 if (maybe_fma)
4652 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4653 cost += extra_cost->fp[mode == DFmode].fma;
4654 else
4655 /* FMUL/FNMUL. */
4656 cost += extra_cost->fp[mode == DFmode].mult;
4659 cost += rtx_cost (op0, MULT, 0, speed)
4660 + rtx_cost (op1, MULT, 1, speed);
4661 return cost;
4665 static int
4666 aarch64_address_cost (rtx x,
4667 enum machine_mode mode,
4668 addr_space_t as ATTRIBUTE_UNUSED,
4669 bool speed)
4671 enum rtx_code c = GET_CODE (x);
4672 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4673 struct aarch64_address_info info;
4674 int cost = 0;
4675 info.shift = 0;
4677 if (!aarch64_classify_address (&info, x, mode, c, false))
4679 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4681 /* This is a CONST or SYMBOL ref which will be split
4682 in a different way depending on the code model in use.
4683 Cost it through the generic infrastructure. */
4684 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4685 /* Divide through by the cost of one instruction to
4686 bring it to the same units as the address costs. */
4687 cost_symbol_ref /= COSTS_N_INSNS (1);
4688 /* The cost is then the cost of preparing the address,
4689 followed by an immediate (possibly 0) offset. */
4690 return cost_symbol_ref + addr_cost->imm_offset;
4692 else
4694 /* This is most likely a jump table from a case
4695 statement. */
4696 return addr_cost->register_offset;
4700 switch (info.type)
4702 case ADDRESS_LO_SUM:
4703 case ADDRESS_SYMBOLIC:
4704 case ADDRESS_REG_IMM:
4705 cost += addr_cost->imm_offset;
4706 break;
4708 case ADDRESS_REG_WB:
4709 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4710 cost += addr_cost->pre_modify;
4711 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4712 cost += addr_cost->post_modify;
4713 else
4714 gcc_unreachable ();
4716 break;
4718 case ADDRESS_REG_REG:
4719 cost += addr_cost->register_offset;
4720 break;
4722 case ADDRESS_REG_UXTW:
4723 case ADDRESS_REG_SXTW:
4724 cost += addr_cost->register_extend;
4725 break;
4727 default:
4728 gcc_unreachable ();
4732 if (info.shift > 0)
4734 /* For the sake of calculating the cost of the shifted register
4735 component, we can treat same sized modes in the same way. */
4736 switch (GET_MODE_BITSIZE (mode))
4738 case 16:
4739 cost += addr_cost->addr_scale_costs.hi;
4740 break;
4742 case 32:
4743 cost += addr_cost->addr_scale_costs.si;
4744 break;
4746 case 64:
4747 cost += addr_cost->addr_scale_costs.di;
4748 break;
4750 /* We can't tell, or this is a 128-bit vector. */
4751 default:
4752 cost += addr_cost->addr_scale_costs.ti;
4753 break;
4757 return cost;
4760 /* Return true if the RTX X in mode MODE is a zero or sign extract
4761 usable in an ADD or SUB (extended register) instruction. */
4762 static bool
4763 aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4765 /* Catch add with a sign extract.
4766 This is add_<optab><mode>_multp2. */
4767 if (GET_CODE (x) == SIGN_EXTRACT
4768 || GET_CODE (x) == ZERO_EXTRACT)
4770 rtx op0 = XEXP (x, 0);
4771 rtx op1 = XEXP (x, 1);
4772 rtx op2 = XEXP (x, 2);
4774 if (GET_CODE (op0) == MULT
4775 && CONST_INT_P (op1)
4776 && op2 == const0_rtx
4777 && CONST_INT_P (XEXP (op0, 1))
4778 && aarch64_is_extend_from_extract (mode,
4779 XEXP (op0, 1),
4780 op1))
4782 return true;
4786 return false;
4789 static bool
4790 aarch64_frint_unspec_p (unsigned int u)
4792 switch (u)
4794 case UNSPEC_FRINTZ:
4795 case UNSPEC_FRINTP:
4796 case UNSPEC_FRINTM:
4797 case UNSPEC_FRINTA:
4798 case UNSPEC_FRINTN:
4799 case UNSPEC_FRINTX:
4800 case UNSPEC_FRINTI:
4801 return true;
4803 default:
4804 return false;
4808 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4809 storing it in *COST. Result is true if the total cost of the operation
4810 has now been calculated. */
4811 static bool
4812 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4814 rtx inner;
4815 rtx comparator;
4816 enum rtx_code cmpcode;
4818 if (COMPARISON_P (op0))
4820 inner = XEXP (op0, 0);
4821 comparator = XEXP (op0, 1);
4822 cmpcode = GET_CODE (op0);
4824 else
4826 inner = op0;
4827 comparator = const0_rtx;
4828 cmpcode = NE;
4831 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
4833 /* Conditional branch. */
4834 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4835 return true;
4836 else
4838 if (cmpcode == NE || cmpcode == EQ)
4840 if (comparator == const0_rtx)
4842 /* TBZ/TBNZ/CBZ/CBNZ. */
4843 if (GET_CODE (inner) == ZERO_EXTRACT)
4844 /* TBZ/TBNZ. */
4845 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
4846 0, speed);
4847 else
4848 /* CBZ/CBNZ. */
4849 *cost += rtx_cost (inner, cmpcode, 0, speed);
4851 return true;
4854 else if (cmpcode == LT || cmpcode == GE)
4856 /* TBZ/TBNZ. */
4857 if (comparator == const0_rtx)
4858 return true;
4862 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4864 /* It's a conditional operation based on the status flags,
4865 so it must be some flavor of CSEL. */
4867 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
4868 if (GET_CODE (op1) == NEG
4869 || GET_CODE (op1) == NOT
4870 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
4871 op1 = XEXP (op1, 0);
4873 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
4874 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
4875 return true;
4878 /* We don't know what this is, cost all operands. */
4879 return false;
4882 /* Calculate the cost of calculating X, storing it in *COST. Result
4883 is true if the total cost of the operation has now been calculated. */
4884 static bool
4885 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4886 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4888 rtx op0, op1, op2;
4889 const struct cpu_cost_table *extra_cost
4890 = aarch64_tune_params->insn_extra_cost;
4891 enum machine_mode mode = GET_MODE (x);
4893 /* By default, assume that everything has equivalent cost to the
4894 cheapest instruction. Any additional costs are applied as a delta
4895 above this default. */
4896 *cost = COSTS_N_INSNS (1);
4898 /* TODO: The cost infrastructure currently does not handle
4899 vector operations. Assume that all vector operations
4900 are equally expensive. */
4901 if (VECTOR_MODE_P (mode))
4903 if (speed)
4904 *cost += extra_cost->vect.alu;
4905 return true;
4908 switch (code)
4910 case SET:
4911 /* The cost depends entirely on the operands to SET. */
4912 *cost = 0;
4913 op0 = SET_DEST (x);
4914 op1 = SET_SRC (x);
4916 switch (GET_CODE (op0))
4918 case MEM:
4919 if (speed)
4921 rtx address = XEXP (op0, 0);
4922 if (GET_MODE_CLASS (mode) == MODE_INT)
4923 *cost += extra_cost->ldst.store;
4924 else if (mode == SFmode)
4925 *cost += extra_cost->ldst.storef;
4926 else if (mode == DFmode)
4927 *cost += extra_cost->ldst.stored;
4929 *cost +=
4930 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4931 0, speed));
4934 *cost += rtx_cost (op1, SET, 1, speed);
4935 return true;
4937 case SUBREG:
4938 if (! REG_P (SUBREG_REG (op0)))
4939 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4941 /* Fall through. */
4942 case REG:
4943 /* const0_rtx is in general free, but we will use an
4944 instruction to set a register to 0. */
4945 if (REG_P (op1) || op1 == const0_rtx)
4947 /* The cost is 1 per register copied. */
4948 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4949 / UNITS_PER_WORD;
4950 *cost = COSTS_N_INSNS (n_minus_1 + 1);
4952 else
4953 /* Cost is just the cost of the RHS of the set. */
4954 *cost += rtx_cost (op1, SET, 1, speed);
4955 return true;
4957 case ZERO_EXTRACT:
4958 case SIGN_EXTRACT:
4959 /* Bit-field insertion. Strip any redundant widening of
4960 the RHS to meet the width of the target. */
4961 if (GET_CODE (op1) == SUBREG)
4962 op1 = SUBREG_REG (op1);
4963 if ((GET_CODE (op1) == ZERO_EXTEND
4964 || GET_CODE (op1) == SIGN_EXTEND)
4965 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4966 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4967 >= INTVAL (XEXP (op0, 1))))
4968 op1 = XEXP (op1, 0);
4970 if (CONST_INT_P (op1))
4972 /* MOV immediate is assumed to always be cheap. */
4973 *cost = COSTS_N_INSNS (1);
4975 else
4977 /* BFM. */
4978 if (speed)
4979 *cost += extra_cost->alu.bfi;
4980 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4983 return true;
4985 default:
4986 /* We can't make sense of this, assume default cost. */
4987 *cost = COSTS_N_INSNS (1);
4988 return false;
4990 return false;
4992 case CONST_INT:
4993 /* If an instruction can incorporate a constant within the
4994 instruction, the instruction's expression avoids calling
4995 rtx_cost() on the constant. If rtx_cost() is called on a
4996 constant, then it is usually because the constant must be
4997 moved into a register by one or more instructions.
4999 The exception is constant 0, which can be expressed
5000 as XZR/WZR and is therefore free. The exception to this is
5001 if we have (set (reg) (const0_rtx)) in which case we must cost
5002 the move. However, we can catch that when we cost the SET, so
5003 we don't need to consider that here. */
5004 if (x == const0_rtx)
5005 *cost = 0;
5006 else
5008 /* To an approximation, building any other constant is
5009 proportionally expensive to the number of instructions
5010 required to build that constant. This is true whether we
5011 are compiling for SPEED or otherwise. */
5012 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5013 INTVAL (x),
5014 false));
5016 return true;
5018 case CONST_DOUBLE:
5019 if (speed)
5021 /* mov[df,sf]_aarch64. */
5022 if (aarch64_float_const_representable_p (x))
5023 /* FMOV (scalar immediate). */
5024 *cost += extra_cost->fp[mode == DFmode].fpconst;
5025 else if (!aarch64_float_const_zero_rtx_p (x))
5027 /* This will be a load from memory. */
5028 if (mode == DFmode)
5029 *cost += extra_cost->ldst.loadd;
5030 else
5031 *cost += extra_cost->ldst.loadf;
5033 else
5034 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5035 or MOV v0.s[0], wzr - neither of which are modeled by the
5036 cost tables. Just use the default cost. */
5041 return true;
5043 case MEM:
5044 if (speed)
5046 /* For loads we want the base cost of a load, plus an
5047 approximation for the additional cost of the addressing
5048 mode. */
5049 rtx address = XEXP (x, 0);
5050 if (GET_MODE_CLASS (mode) == MODE_INT)
5051 *cost += extra_cost->ldst.load;
5052 else if (mode == SFmode)
5053 *cost += extra_cost->ldst.loadf;
5054 else if (mode == DFmode)
5055 *cost += extra_cost->ldst.loadd;
5057 *cost +=
5058 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5059 0, speed));
5062 return true;
5064 case NEG:
5065 op0 = XEXP (x, 0);
5067 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5069 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5070 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5072 /* CSETM. */
5073 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5074 return true;
5077 /* Cost this as SUB wzr, X. */
5078 op0 = CONST0_RTX (GET_MODE (x));
5079 op1 = XEXP (x, 0);
5080 goto cost_minus;
5083 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5085 /* Support (neg(fma...)) as a single instruction only if
5086 sign of zeros is unimportant. This matches the decision
5087 making in aarch64.md. */
5088 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5090 /* FNMADD. */
5091 *cost = rtx_cost (op0, NEG, 0, speed);
5092 return true;
5094 if (speed)
5095 /* FNEG. */
5096 *cost += extra_cost->fp[mode == DFmode].neg;
5097 return false;
5100 return false;
5102 case CLRSB:
5103 case CLZ:
5104 if (speed)
5105 *cost += extra_cost->alu.clz;
5107 return false;
5109 case COMPARE:
5110 op0 = XEXP (x, 0);
5111 op1 = XEXP (x, 1);
5113 if (op1 == const0_rtx
5114 && GET_CODE (op0) == AND)
5116 x = op0;
5117 goto cost_logic;
5120 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5122 /* TODO: A write to the CC flags possibly costs extra, this
5123 needs encoding in the cost tables. */
5125 /* CC_ZESWPmode supports zero extend for free. */
5126 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5127 op0 = XEXP (op0, 0);
5129 /* ANDS. */
5130 if (GET_CODE (op0) == AND)
5132 x = op0;
5133 goto cost_logic;
5136 if (GET_CODE (op0) == PLUS)
5138 /* ADDS (and CMN alias). */
5139 x = op0;
5140 goto cost_plus;
5143 if (GET_CODE (op0) == MINUS)
5145 /* SUBS. */
5146 x = op0;
5147 goto cost_minus;
5150 if (GET_CODE (op1) == NEG)
5152 /* CMN. */
5153 if (speed)
5154 *cost += extra_cost->alu.arith;
5156 *cost += rtx_cost (op0, COMPARE, 0, speed);
5157 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5158 return true;
5161 /* CMP.
5163 Compare can freely swap the order of operands, and
5164 canonicalization puts the more complex operation first.
5165 But the integer MINUS logic expects the shift/extend
5166 operation in op1. */
5167 if (! (REG_P (op0)
5168 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5170 op0 = XEXP (x, 1);
5171 op1 = XEXP (x, 0);
5173 goto cost_minus;
5176 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5178 /* FCMP. */
5179 if (speed)
5180 *cost += extra_cost->fp[mode == DFmode].compare;
5182 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5184 /* FCMP supports constant 0.0 for no extra cost. */
5185 return true;
5187 return false;
5190 return false;
5192 case MINUS:
5194 op0 = XEXP (x, 0);
5195 op1 = XEXP (x, 1);
5197 cost_minus:
5198 /* Detect valid immediates. */
5199 if ((GET_MODE_CLASS (mode) == MODE_INT
5200 || (GET_MODE_CLASS (mode) == MODE_CC
5201 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5202 && CONST_INT_P (op1)
5203 && aarch64_uimm12_shift (INTVAL (op1)))
5205 *cost += rtx_cost (op0, MINUS, 0, speed);
5207 if (speed)
5208 /* SUB(S) (immediate). */
5209 *cost += extra_cost->alu.arith;
5210 return true;
5214 /* Look for SUB (extended register). */
5215 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5217 if (speed)
5218 *cost += extra_cost->alu.arith_shift;
5220 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5221 (enum rtx_code) GET_CODE (op1),
5222 0, speed);
5223 return true;
5226 rtx new_op1 = aarch64_strip_extend (op1);
5228 /* Cost this as an FMA-alike operation. */
5229 if ((GET_CODE (new_op1) == MULT
5230 || GET_CODE (new_op1) == ASHIFT)
5231 && code != COMPARE)
5233 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5234 (enum rtx_code) code,
5235 speed);
5236 *cost += rtx_cost (op0, MINUS, 0, speed);
5237 return true;
5240 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5242 if (speed)
5244 if (GET_MODE_CLASS (mode) == MODE_INT)
5245 /* SUB(S). */
5246 *cost += extra_cost->alu.arith;
5247 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5248 /* FSUB. */
5249 *cost += extra_cost->fp[mode == DFmode].addsub;
5251 return true;
5254 case PLUS:
5256 rtx new_op0;
5258 op0 = XEXP (x, 0);
5259 op1 = XEXP (x, 1);
5261 cost_plus:
5262 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5263 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5265 /* CSINC. */
5266 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5267 *cost += rtx_cost (op1, PLUS, 1, speed);
5268 return true;
5271 if (GET_MODE_CLASS (mode) == MODE_INT
5272 && CONST_INT_P (op1)
5273 && aarch64_uimm12_shift (INTVAL (op1)))
5275 *cost += rtx_cost (op0, PLUS, 0, speed);
5277 if (speed)
5278 /* ADD (immediate). */
5279 *cost += extra_cost->alu.arith;
5280 return true;
5283 /* Look for ADD (extended register). */
5284 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5286 if (speed)
5287 *cost += extra_cost->alu.arith_shift;
5289 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5290 (enum rtx_code) GET_CODE (op0),
5291 0, speed);
5292 return true;
5295 /* Strip any extend, leave shifts behind as we will
5296 cost them through mult_cost. */
5297 new_op0 = aarch64_strip_extend (op0);
5299 if (GET_CODE (new_op0) == MULT
5300 || GET_CODE (new_op0) == ASHIFT)
5302 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5303 speed);
5304 *cost += rtx_cost (op1, PLUS, 1, speed);
5305 return true;
5308 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5309 + rtx_cost (op1, PLUS, 1, speed));
5311 if (speed)
5313 if (GET_MODE_CLASS (mode) == MODE_INT)
5314 /* ADD. */
5315 *cost += extra_cost->alu.arith;
5316 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5317 /* FADD. */
5318 *cost += extra_cost->fp[mode == DFmode].addsub;
5320 return true;
5323 case BSWAP:
5324 *cost = COSTS_N_INSNS (1);
5326 if (speed)
5327 *cost += extra_cost->alu.rev;
5329 return false;
5331 case IOR:
5332 if (aarch_rev16_p (x))
5334 *cost = COSTS_N_INSNS (1);
5336 if (speed)
5337 *cost += extra_cost->alu.rev;
5339 return true;
5341 /* Fall through. */
5342 case XOR:
5343 case AND:
5344 cost_logic:
5345 op0 = XEXP (x, 0);
5346 op1 = XEXP (x, 1);
5348 if (code == AND
5349 && GET_CODE (op0) == MULT
5350 && CONST_INT_P (XEXP (op0, 1))
5351 && CONST_INT_P (op1)
5352 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5353 INTVAL (op1)) != 0)
5355 /* This is a UBFM/SBFM. */
5356 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5357 if (speed)
5358 *cost += extra_cost->alu.bfx;
5359 return true;
5362 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5364 /* We possibly get the immediate for free, this is not
5365 modelled. */
5366 if (CONST_INT_P (op1)
5367 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5369 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5371 if (speed)
5372 *cost += extra_cost->alu.logical;
5374 return true;
5376 else
5378 rtx new_op0 = op0;
5380 /* Handle ORN, EON, or BIC. */
5381 if (GET_CODE (op0) == NOT)
5382 op0 = XEXP (op0, 0);
5384 new_op0 = aarch64_strip_shift (op0);
5386 /* If we had a shift on op0 then this is a logical-shift-
5387 by-register/immediate operation. Otherwise, this is just
5388 a logical operation. */
5389 if (speed)
5391 if (new_op0 != op0)
5393 /* Shift by immediate. */
5394 if (CONST_INT_P (XEXP (op0, 1)))
5395 *cost += extra_cost->alu.log_shift;
5396 else
5397 *cost += extra_cost->alu.log_shift_reg;
5399 else
5400 *cost += extra_cost->alu.logical;
5403 /* In both cases we want to cost both operands. */
5404 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5405 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5407 return true;
5410 return false;
5412 case NOT:
5413 /* MVN. */
5414 if (speed)
5415 *cost += extra_cost->alu.logical;
5417 /* The logical instruction could have the shifted register form,
5418 but the cost is the same if the shift is processed as a separate
5419 instruction, so we don't bother with it here. */
5420 return false;
5422 case ZERO_EXTEND:
5424 op0 = XEXP (x, 0);
5425 /* If a value is written in SI mode, then zero extended to DI
5426 mode, the operation will in general be free as a write to
5427 a 'w' register implicitly zeroes the upper bits of an 'x'
5428 register. However, if this is
5430 (set (reg) (zero_extend (reg)))
5432 we must cost the explicit register move. */
5433 if (mode == DImode
5434 && GET_MODE (op0) == SImode
5435 && outer == SET)
5437 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5439 if (!op_cost && speed)
5440 /* MOV. */
5441 *cost += extra_cost->alu.extend;
5442 else
5443 /* Free, the cost is that of the SI mode operation. */
5444 *cost = op_cost;
5446 return true;
5448 else if (MEM_P (XEXP (x, 0)))
5450 /* All loads can zero extend to any size for free. */
5451 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5452 return true;
5455 /* UXTB/UXTH. */
5456 if (speed)
5457 *cost += extra_cost->alu.extend;
5459 return false;
5461 case SIGN_EXTEND:
5462 if (MEM_P (XEXP (x, 0)))
5464 /* LDRSH. */
5465 if (speed)
5467 rtx address = XEXP (XEXP (x, 0), 0);
5468 *cost += extra_cost->ldst.load_sign_extend;
5470 *cost +=
5471 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5472 0, speed));
5474 return true;
5477 if (speed)
5478 *cost += extra_cost->alu.extend;
5479 return false;
5481 case ASHIFT:
5482 op0 = XEXP (x, 0);
5483 op1 = XEXP (x, 1);
5485 if (CONST_INT_P (op1))
5487 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5488 aliases. */
5489 if (speed)
5490 *cost += extra_cost->alu.shift;
5492 /* We can incorporate zero/sign extend for free. */
5493 if (GET_CODE (op0) == ZERO_EXTEND
5494 || GET_CODE (op0) == SIGN_EXTEND)
5495 op0 = XEXP (op0, 0);
5497 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5498 return true;
5500 else
5502 /* LSLV. */
5503 if (speed)
5504 *cost += extra_cost->alu.shift_reg;
5506 return false; /* All arguments need to be in registers. */
5509 case ROTATE:
5510 case ROTATERT:
5511 case LSHIFTRT:
5512 case ASHIFTRT:
5513 op0 = XEXP (x, 0);
5514 op1 = XEXP (x, 1);
5516 if (CONST_INT_P (op1))
5518 /* ASR (immediate) and friends. */
5519 if (speed)
5520 *cost += extra_cost->alu.shift;
5522 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5523 return true;
5525 else
5528 /* ASR (register) and friends. */
5529 if (speed)
5530 *cost += extra_cost->alu.shift_reg;
5532 return false; /* All arguments need to be in registers. */
5535 case SYMBOL_REF:
5537 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5539 /* LDR. */
5540 if (speed)
5541 *cost += extra_cost->ldst.load;
5543 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5544 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5546 /* ADRP, followed by ADD. */
5547 *cost += COSTS_N_INSNS (1);
5548 if (speed)
5549 *cost += 2 * extra_cost->alu.arith;
5551 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5552 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5554 /* ADR. */
5555 if (speed)
5556 *cost += extra_cost->alu.arith;
5559 if (flag_pic)
5561 /* One extra load instruction, after accessing the GOT. */
5562 *cost += COSTS_N_INSNS (1);
5563 if (speed)
5564 *cost += extra_cost->ldst.load;
5566 return true;
5568 case HIGH:
5569 case LO_SUM:
5570 /* ADRP/ADD (immediate). */
5571 if (speed)
5572 *cost += extra_cost->alu.arith;
5573 return true;
5575 case ZERO_EXTRACT:
5576 case SIGN_EXTRACT:
5577 /* UBFX/SBFX. */
5578 if (speed)
5579 *cost += extra_cost->alu.bfx;
5581 /* We can trust that the immediates used will be correct (there
5582 are no by-register forms), so we need only cost op0. */
5583 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5584 return true;
5586 case MULT:
5587 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5588 /* aarch64_rtx_mult_cost always handles recursion to its
5589 operands. */
5590 return true;
5592 case MOD:
5593 case UMOD:
5594 if (speed)
5596 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5597 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5598 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5599 else if (GET_MODE (x) == DFmode)
5600 *cost += (extra_cost->fp[1].mult
5601 + extra_cost->fp[1].div);
5602 else if (GET_MODE (x) == SFmode)
5603 *cost += (extra_cost->fp[0].mult
5604 + extra_cost->fp[0].div);
5606 return false; /* All arguments need to be in registers. */
5608 case DIV:
5609 case UDIV:
5610 case SQRT:
5611 if (speed)
5613 if (GET_MODE_CLASS (mode) == MODE_INT)
5614 /* There is no integer SQRT, so only DIV and UDIV can get
5615 here. */
5616 *cost += extra_cost->mult[mode == DImode].idiv;
5617 else
5618 *cost += extra_cost->fp[mode == DFmode].div;
5620 return false; /* All arguments need to be in registers. */
5622 case IF_THEN_ELSE:
5623 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5624 XEXP (x, 2), cost, speed);
5626 case EQ:
5627 case NE:
5628 case GT:
5629 case GTU:
5630 case LT:
5631 case LTU:
5632 case GE:
5633 case GEU:
5634 case LE:
5635 case LEU:
5637 return false; /* All arguments must be in registers. */
5639 case FMA:
5640 op0 = XEXP (x, 0);
5641 op1 = XEXP (x, 1);
5642 op2 = XEXP (x, 2);
5644 if (speed)
5645 *cost += extra_cost->fp[mode == DFmode].fma;
5647 /* FMSUB, FNMADD, and FNMSUB are free. */
5648 if (GET_CODE (op0) == NEG)
5649 op0 = XEXP (op0, 0);
5651 if (GET_CODE (op2) == NEG)
5652 op2 = XEXP (op2, 0);
5654 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5655 and the by-element operand as operand 0. */
5656 if (GET_CODE (op1) == NEG)
5657 op1 = XEXP (op1, 0);
5659 /* Catch vector-by-element operations. The by-element operand can
5660 either be (vec_duplicate (vec_select (x))) or just
5661 (vec_select (x)), depending on whether we are multiplying by
5662 a vector or a scalar.
5664 Canonicalization is not very good in these cases, FMA4 will put the
5665 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5666 if (GET_CODE (op0) == VEC_DUPLICATE)
5667 op0 = XEXP (op0, 0);
5668 else if (GET_CODE (op1) == VEC_DUPLICATE)
5669 op1 = XEXP (op1, 0);
5671 if (GET_CODE (op0) == VEC_SELECT)
5672 op0 = XEXP (op0, 0);
5673 else if (GET_CODE (op1) == VEC_SELECT)
5674 op1 = XEXP (op1, 0);
5676 /* If the remaining parameters are not registers,
5677 get the cost to put them into registers. */
5678 *cost += rtx_cost (op0, FMA, 0, speed);
5679 *cost += rtx_cost (op1, FMA, 1, speed);
5680 *cost += rtx_cost (op2, FMA, 2, speed);
5681 return true;
5683 case FLOAT_EXTEND:
5684 if (speed)
5685 *cost += extra_cost->fp[mode == DFmode].widen;
5686 return false;
5688 case FLOAT_TRUNCATE:
5689 if (speed)
5690 *cost += extra_cost->fp[mode == DFmode].narrow;
5691 return false;
5693 case FIX:
5694 case UNSIGNED_FIX:
5695 x = XEXP (x, 0);
5696 /* Strip the rounding part. They will all be implemented
5697 by the fcvt* family of instructions anyway. */
5698 if (GET_CODE (x) == UNSPEC)
5700 unsigned int uns_code = XINT (x, 1);
5702 if (uns_code == UNSPEC_FRINTA
5703 || uns_code == UNSPEC_FRINTM
5704 || uns_code == UNSPEC_FRINTN
5705 || uns_code == UNSPEC_FRINTP
5706 || uns_code == UNSPEC_FRINTZ)
5707 x = XVECEXP (x, 0, 0);
5710 if (speed)
5711 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5713 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5714 return true;
5716 case ABS:
5717 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5719 /* FABS and FNEG are analogous. */
5720 if (speed)
5721 *cost += extra_cost->fp[mode == DFmode].neg;
5723 else
5725 /* Integer ABS will either be split to
5726 two arithmetic instructions, or will be an ABS
5727 (scalar), which we don't model. */
5728 *cost = COSTS_N_INSNS (2);
5729 if (speed)
5730 *cost += 2 * extra_cost->alu.arith;
5732 return false;
5734 case SMAX:
5735 case SMIN:
5736 if (speed)
5738 /* FMAXNM/FMINNM/FMAX/FMIN.
5739 TODO: This may not be accurate for all implementations, but
5740 we do not model this in the cost tables. */
5741 *cost += extra_cost->fp[mode == DFmode].addsub;
5743 return false;
5745 case UNSPEC:
5746 /* The floating point round to integer frint* instructions. */
5747 if (aarch64_frint_unspec_p (XINT (x, 1)))
5749 if (speed)
5750 *cost += extra_cost->fp[mode == DFmode].roundint;
5752 return false;
5755 if (XINT (x, 1) == UNSPEC_RBIT)
5757 if (speed)
5758 *cost += extra_cost->alu.rev;
5760 return false;
5762 break;
5764 case TRUNCATE:
5766 /* Decompose <su>muldi3_highpart. */
5767 if (/* (truncate:DI */
5768 mode == DImode
5769 /* (lshiftrt:TI */
5770 && GET_MODE (XEXP (x, 0)) == TImode
5771 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5772 /* (mult:TI */
5773 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5774 /* (ANY_EXTEND:TI (reg:DI))
5775 (ANY_EXTEND:TI (reg:DI))) */
5776 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5777 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5778 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5779 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5780 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5781 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5782 /* (const_int 64) */
5783 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5784 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5786 /* UMULH/SMULH. */
5787 if (speed)
5788 *cost += extra_cost->mult[mode == DImode].extend;
5789 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5790 MULT, 0, speed);
5791 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5792 MULT, 1, speed);
5793 return true;
5796 /* Fall through. */
5797 default:
5798 break;
5801 if (dump_file && (dump_flags & TDF_DETAILS))
5802 fprintf (dump_file,
5803 "\nFailed to cost RTX. Assuming default cost.\n");
5805 return true;
5808 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5809 calculated for X. This cost is stored in *COST. Returns true
5810 if the total cost of X was calculated. */
5811 static bool
5812 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5813 int param, int *cost, bool speed)
5815 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5817 if (dump_file && (dump_flags & TDF_DETAILS))
5819 print_rtl_single (dump_file, x);
5820 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5821 speed ? "Hot" : "Cold",
5822 *cost, result ? "final" : "partial");
5825 return result;
5828 static int
5829 aarch64_register_move_cost (enum machine_mode mode,
5830 reg_class_t from_i, reg_class_t to_i)
5832 enum reg_class from = (enum reg_class) from_i;
5833 enum reg_class to = (enum reg_class) to_i;
5834 const struct cpu_regmove_cost *regmove_cost
5835 = aarch64_tune_params->regmove_cost;
5837 /* Moving between GPR and stack cost is the same as GP2GP. */
5838 if ((from == GENERAL_REGS && to == STACK_REG)
5839 || (to == GENERAL_REGS && from == STACK_REG))
5840 return regmove_cost->GP2GP;
5842 /* To/From the stack register, we move via the gprs. */
5843 if (to == STACK_REG || from == STACK_REG)
5844 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5845 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5847 if (from == GENERAL_REGS && to == GENERAL_REGS)
5848 return regmove_cost->GP2GP;
5849 else if (from == GENERAL_REGS)
5850 return regmove_cost->GP2FP;
5851 else if (to == GENERAL_REGS)
5852 return regmove_cost->FP2GP;
5854 /* When AdvSIMD instructions are disabled it is not possible to move
5855 a 128-bit value directly between Q registers. This is handled in
5856 secondary reload. A general register is used as a scratch to move
5857 the upper DI value and the lower DI value is moved directly,
5858 hence the cost is the sum of three moves. */
5859 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5860 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5862 return regmove_cost->FP2FP;
5865 static int
5866 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5867 reg_class_t rclass ATTRIBUTE_UNUSED,
5868 bool in ATTRIBUTE_UNUSED)
5870 return aarch64_tune_params->memmov_cost;
5873 /* Return the number of instructions that can be issued per cycle. */
5874 static int
5875 aarch64_sched_issue_rate (void)
5877 return aarch64_tune_params->issue_rate;
5880 /* Vectorizer cost model target hooks. */
5882 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5883 static int
5884 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5885 tree vectype,
5886 int misalign ATTRIBUTE_UNUSED)
5888 unsigned elements;
5890 switch (type_of_cost)
5892 case scalar_stmt:
5893 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5895 case scalar_load:
5896 return aarch64_tune_params->vec_costs->scalar_load_cost;
5898 case scalar_store:
5899 return aarch64_tune_params->vec_costs->scalar_store_cost;
5901 case vector_stmt:
5902 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5904 case vector_load:
5905 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5907 case vector_store:
5908 return aarch64_tune_params->vec_costs->vec_store_cost;
5910 case vec_to_scalar:
5911 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5913 case scalar_to_vec:
5914 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5916 case unaligned_load:
5917 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5919 case unaligned_store:
5920 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5922 case cond_branch_taken:
5923 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5925 case cond_branch_not_taken:
5926 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5928 case vec_perm:
5929 case vec_promote_demote:
5930 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5932 case vec_construct:
5933 elements = TYPE_VECTOR_SUBPARTS (vectype);
5934 return elements / 2 + 1;
5936 default:
5937 gcc_unreachable ();
5941 /* Implement targetm.vectorize.add_stmt_cost. */
5942 static unsigned
5943 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5944 struct _stmt_vec_info *stmt_info, int misalign,
5945 enum vect_cost_model_location where)
5947 unsigned *cost = (unsigned *) data;
5948 unsigned retval = 0;
5950 if (flag_vect_cost_model)
5952 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5953 int stmt_cost =
5954 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5956 /* Statements in an inner loop relative to the loop being
5957 vectorized are weighted more heavily. The value here is
5958 a function (linear for now) of the loop nest level. */
5959 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5961 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5962 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5963 unsigned nest_level = loop_depth (loop);
5965 count *= nest_level;
5968 retval = (unsigned) (count * stmt_cost);
5969 cost[where] += retval;
5972 return retval;
5975 static void initialize_aarch64_code_model (void);
5977 /* Parse the architecture extension string. */
5979 static void
5980 aarch64_parse_extension (char *str)
5982 /* The extension string is parsed left to right. */
5983 const struct aarch64_option_extension *opt = NULL;
5985 /* Flag to say whether we are adding or removing an extension. */
5986 int adding_ext = -1;
5988 while (str != NULL && *str != 0)
5990 char *ext;
5991 size_t len;
5993 str++;
5994 ext = strchr (str, '+');
5996 if (ext != NULL)
5997 len = ext - str;
5998 else
5999 len = strlen (str);
6001 if (len >= 2 && strncmp (str, "no", 2) == 0)
6003 adding_ext = 0;
6004 len -= 2;
6005 str += 2;
6007 else if (len > 0)
6008 adding_ext = 1;
6010 if (len == 0)
6012 error ("missing feature modifier after %qs", "+no");
6013 return;
6016 /* Scan over the extensions table trying to find an exact match. */
6017 for (opt = all_extensions; opt->name != NULL; opt++)
6019 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6021 /* Add or remove the extension. */
6022 if (adding_ext)
6023 aarch64_isa_flags |= opt->flags_on;
6024 else
6025 aarch64_isa_flags &= ~(opt->flags_off);
6026 break;
6030 if (opt->name == NULL)
6032 /* Extension not found in list. */
6033 error ("unknown feature modifier %qs", str);
6034 return;
6037 str = ext;
6040 return;
6043 /* Parse the ARCH string. */
6045 static void
6046 aarch64_parse_arch (void)
6048 char *ext;
6049 const struct processor *arch;
6050 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6051 size_t len;
6053 strcpy (str, aarch64_arch_string);
6055 ext = strchr (str, '+');
6057 if (ext != NULL)
6058 len = ext - str;
6059 else
6060 len = strlen (str);
6062 if (len == 0)
6064 error ("missing arch name in -march=%qs", str);
6065 return;
6068 /* Loop through the list of supported ARCHs to find a match. */
6069 for (arch = all_architectures; arch->name != NULL; arch++)
6071 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6073 selected_arch = arch;
6074 aarch64_isa_flags = selected_arch->flags;
6076 if (!selected_cpu)
6077 selected_cpu = &all_cores[selected_arch->core];
6079 if (ext != NULL)
6081 /* ARCH string contains at least one extension. */
6082 aarch64_parse_extension (ext);
6085 if (strcmp (selected_arch->arch, selected_cpu->arch))
6087 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6088 selected_cpu->name, selected_arch->name);
6091 return;
6095 /* ARCH name not found in list. */
6096 error ("unknown value %qs for -march", str);
6097 return;
6100 /* Parse the CPU string. */
6102 static void
6103 aarch64_parse_cpu (void)
6105 char *ext;
6106 const struct processor *cpu;
6107 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6108 size_t len;
6110 strcpy (str, aarch64_cpu_string);
6112 ext = strchr (str, '+');
6114 if (ext != NULL)
6115 len = ext - str;
6116 else
6117 len = strlen (str);
6119 if (len == 0)
6121 error ("missing cpu name in -mcpu=%qs", str);
6122 return;
6125 /* Loop through the list of supported CPUs to find a match. */
6126 for (cpu = all_cores; cpu->name != NULL; cpu++)
6128 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6130 selected_cpu = cpu;
6131 selected_tune = cpu;
6132 aarch64_isa_flags = selected_cpu->flags;
6134 if (ext != NULL)
6136 /* CPU string contains at least one extension. */
6137 aarch64_parse_extension (ext);
6140 return;
6144 /* CPU name not found in list. */
6145 error ("unknown value %qs for -mcpu", str);
6146 return;
6149 /* Parse the TUNE string. */
6151 static void
6152 aarch64_parse_tune (void)
6154 const struct processor *cpu;
6155 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6156 strcpy (str, aarch64_tune_string);
6158 /* Loop through the list of supported CPUs to find a match. */
6159 for (cpu = all_cores; cpu->name != NULL; cpu++)
6161 if (strcmp (cpu->name, str) == 0)
6163 selected_tune = cpu;
6164 return;
6168 /* CPU name not found in list. */
6169 error ("unknown value %qs for -mtune", str);
6170 return;
6174 /* Implement TARGET_OPTION_OVERRIDE. */
6176 static void
6177 aarch64_override_options (void)
6179 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6180 If either of -march or -mtune is given, they override their
6181 respective component of -mcpu.
6183 So, first parse AARCH64_CPU_STRING, then the others, be careful
6184 with -march as, if -mcpu is not present on the command line, march
6185 must set a sensible default CPU. */
6186 if (aarch64_cpu_string)
6188 aarch64_parse_cpu ();
6191 if (aarch64_arch_string)
6193 aarch64_parse_arch ();
6196 if (aarch64_tune_string)
6198 aarch64_parse_tune ();
6201 #ifndef HAVE_AS_MABI_OPTION
6202 /* The compiler may have been configured with 2.23.* binutils, which does
6203 not have support for ILP32. */
6204 if (TARGET_ILP32)
6205 error ("Assembler does not support -mabi=ilp32");
6206 #endif
6208 initialize_aarch64_code_model ();
6210 aarch64_build_bitmask_table ();
6212 /* This target defaults to strict volatile bitfields. */
6213 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6214 flag_strict_volatile_bitfields = 1;
6216 /* If the user did not specify a processor, choose the default
6217 one for them. This will be the CPU set during configuration using
6218 --with-cpu, otherwise it is "generic". */
6219 if (!selected_cpu)
6221 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6222 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6225 gcc_assert (selected_cpu);
6227 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6228 if (!selected_tune)
6229 selected_tune = &all_cores[selected_cpu->core];
6231 aarch64_tune_flags = selected_tune->flags;
6232 aarch64_tune = selected_tune->core;
6233 aarch64_tune_params = selected_tune->tune;
6235 aarch64_override_options_after_change ();
6238 /* Implement targetm.override_options_after_change. */
6240 static void
6241 aarch64_override_options_after_change (void)
6243 if (flag_omit_frame_pointer)
6244 flag_omit_leaf_frame_pointer = false;
6245 else if (flag_omit_leaf_frame_pointer)
6246 flag_omit_frame_pointer = true;
6249 static struct machine_function *
6250 aarch64_init_machine_status (void)
6252 struct machine_function *machine;
6253 machine = ggc_cleared_alloc<machine_function> ();
6254 return machine;
6257 void
6258 aarch64_init_expanders (void)
6260 init_machine_status = aarch64_init_machine_status;
6263 /* A checking mechanism for the implementation of the various code models. */
6264 static void
6265 initialize_aarch64_code_model (void)
6267 if (flag_pic)
6269 switch (aarch64_cmodel_var)
6271 case AARCH64_CMODEL_TINY:
6272 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6273 break;
6274 case AARCH64_CMODEL_SMALL:
6275 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6276 break;
6277 case AARCH64_CMODEL_LARGE:
6278 sorry ("code model %qs with -f%s", "large",
6279 flag_pic > 1 ? "PIC" : "pic");
6280 default:
6281 gcc_unreachable ();
6284 else
6285 aarch64_cmodel = aarch64_cmodel_var;
6288 /* Return true if SYMBOL_REF X binds locally. */
6290 static bool
6291 aarch64_symbol_binds_local_p (const_rtx x)
6293 return (SYMBOL_REF_DECL (x)
6294 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6295 : SYMBOL_REF_LOCAL_P (x));
6298 /* Return true if SYMBOL_REF X is thread local */
6299 static bool
6300 aarch64_tls_symbol_p (rtx x)
6302 if (! TARGET_HAVE_TLS)
6303 return false;
6305 if (GET_CODE (x) != SYMBOL_REF)
6306 return false;
6308 return SYMBOL_REF_TLS_MODEL (x) != 0;
6311 /* Classify a TLS symbol into one of the TLS kinds. */
6312 enum aarch64_symbol_type
6313 aarch64_classify_tls_symbol (rtx x)
6315 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6317 switch (tls_kind)
6319 case TLS_MODEL_GLOBAL_DYNAMIC:
6320 case TLS_MODEL_LOCAL_DYNAMIC:
6321 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6323 case TLS_MODEL_INITIAL_EXEC:
6324 return SYMBOL_SMALL_GOTTPREL;
6326 case TLS_MODEL_LOCAL_EXEC:
6327 return SYMBOL_SMALL_TPREL;
6329 case TLS_MODEL_EMULATED:
6330 case TLS_MODEL_NONE:
6331 return SYMBOL_FORCE_TO_MEM;
6333 default:
6334 gcc_unreachable ();
6338 /* Return the method that should be used to access SYMBOL_REF or
6339 LABEL_REF X in context CONTEXT. */
6341 enum aarch64_symbol_type
6342 aarch64_classify_symbol (rtx x,
6343 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6345 if (GET_CODE (x) == LABEL_REF)
6347 switch (aarch64_cmodel)
6349 case AARCH64_CMODEL_LARGE:
6350 return SYMBOL_FORCE_TO_MEM;
6352 case AARCH64_CMODEL_TINY_PIC:
6353 case AARCH64_CMODEL_TINY:
6354 return SYMBOL_TINY_ABSOLUTE;
6356 case AARCH64_CMODEL_SMALL_PIC:
6357 case AARCH64_CMODEL_SMALL:
6358 return SYMBOL_SMALL_ABSOLUTE;
6360 default:
6361 gcc_unreachable ();
6365 if (GET_CODE (x) == SYMBOL_REF)
6367 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6368 return SYMBOL_FORCE_TO_MEM;
6370 if (aarch64_tls_symbol_p (x))
6371 return aarch64_classify_tls_symbol (x);
6373 switch (aarch64_cmodel)
6375 case AARCH64_CMODEL_TINY:
6376 if (SYMBOL_REF_WEAK (x))
6377 return SYMBOL_FORCE_TO_MEM;
6378 return SYMBOL_TINY_ABSOLUTE;
6380 case AARCH64_CMODEL_SMALL:
6381 if (SYMBOL_REF_WEAK (x))
6382 return SYMBOL_FORCE_TO_MEM;
6383 return SYMBOL_SMALL_ABSOLUTE;
6385 case AARCH64_CMODEL_TINY_PIC:
6386 if (!aarch64_symbol_binds_local_p (x))
6387 return SYMBOL_TINY_GOT;
6388 return SYMBOL_TINY_ABSOLUTE;
6390 case AARCH64_CMODEL_SMALL_PIC:
6391 if (!aarch64_symbol_binds_local_p (x))
6392 return SYMBOL_SMALL_GOT;
6393 return SYMBOL_SMALL_ABSOLUTE;
6395 default:
6396 gcc_unreachable ();
6400 /* By default push everything into the constant pool. */
6401 return SYMBOL_FORCE_TO_MEM;
6404 bool
6405 aarch64_constant_address_p (rtx x)
6407 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6410 bool
6411 aarch64_legitimate_pic_operand_p (rtx x)
6413 if (GET_CODE (x) == SYMBOL_REF
6414 || (GET_CODE (x) == CONST
6415 && GET_CODE (XEXP (x, 0)) == PLUS
6416 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6417 return false;
6419 return true;
6422 /* Return true if X holds either a quarter-precision or
6423 floating-point +0.0 constant. */
6424 static bool
6425 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6427 if (!CONST_DOUBLE_P (x))
6428 return false;
6430 /* TODO: We could handle moving 0.0 to a TFmode register,
6431 but first we would like to refactor the movtf_aarch64
6432 to be more amicable to split moves properly and
6433 correctly gate on TARGET_SIMD. For now - reject all
6434 constants which are not to SFmode or DFmode registers. */
6435 if (!(mode == SFmode || mode == DFmode))
6436 return false;
6438 if (aarch64_float_const_zero_rtx_p (x))
6439 return true;
6440 return aarch64_float_const_representable_p (x);
6443 static bool
6444 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6446 /* Do not allow vector struct mode constants. We could support
6447 0 and -1 easily, but they need support in aarch64-simd.md. */
6448 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6449 return false;
6451 /* This could probably go away because
6452 we now decompose CONST_INTs according to expand_mov_immediate. */
6453 if ((GET_CODE (x) == CONST_VECTOR
6454 && aarch64_simd_valid_immediate (x, mode, false, NULL))
6455 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6456 return !targetm.cannot_force_const_mem (mode, x);
6458 if (GET_CODE (x) == HIGH
6459 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6460 return true;
6462 return aarch64_constant_address_p (x);
6466 aarch64_load_tp (rtx target)
6468 if (!target
6469 || GET_MODE (target) != Pmode
6470 || !register_operand (target, Pmode))
6471 target = gen_reg_rtx (Pmode);
6473 /* Can return in any reg. */
6474 emit_insn (gen_aarch64_load_tp_hard (target));
6475 return target;
6478 /* On AAPCS systems, this is the "struct __va_list". */
6479 static GTY(()) tree va_list_type;
6481 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6482 Return the type to use as __builtin_va_list.
6484 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6486 struct __va_list
6488 void *__stack;
6489 void *__gr_top;
6490 void *__vr_top;
6491 int __gr_offs;
6492 int __vr_offs;
6493 }; */
6495 static tree
6496 aarch64_build_builtin_va_list (void)
6498 tree va_list_name;
6499 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6501 /* Create the type. */
6502 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6503 /* Give it the required name. */
6504 va_list_name = build_decl (BUILTINS_LOCATION,
6505 TYPE_DECL,
6506 get_identifier ("__va_list"),
6507 va_list_type);
6508 DECL_ARTIFICIAL (va_list_name) = 1;
6509 TYPE_NAME (va_list_type) = va_list_name;
6510 TYPE_STUB_DECL (va_list_type) = va_list_name;
6512 /* Create the fields. */
6513 f_stack = build_decl (BUILTINS_LOCATION,
6514 FIELD_DECL, get_identifier ("__stack"),
6515 ptr_type_node);
6516 f_grtop = build_decl (BUILTINS_LOCATION,
6517 FIELD_DECL, get_identifier ("__gr_top"),
6518 ptr_type_node);
6519 f_vrtop = build_decl (BUILTINS_LOCATION,
6520 FIELD_DECL, get_identifier ("__vr_top"),
6521 ptr_type_node);
6522 f_groff = build_decl (BUILTINS_LOCATION,
6523 FIELD_DECL, get_identifier ("__gr_offs"),
6524 integer_type_node);
6525 f_vroff = build_decl (BUILTINS_LOCATION,
6526 FIELD_DECL, get_identifier ("__vr_offs"),
6527 integer_type_node);
6529 DECL_ARTIFICIAL (f_stack) = 1;
6530 DECL_ARTIFICIAL (f_grtop) = 1;
6531 DECL_ARTIFICIAL (f_vrtop) = 1;
6532 DECL_ARTIFICIAL (f_groff) = 1;
6533 DECL_ARTIFICIAL (f_vroff) = 1;
6535 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6536 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6537 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6538 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6539 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6541 TYPE_FIELDS (va_list_type) = f_stack;
6542 DECL_CHAIN (f_stack) = f_grtop;
6543 DECL_CHAIN (f_grtop) = f_vrtop;
6544 DECL_CHAIN (f_vrtop) = f_groff;
6545 DECL_CHAIN (f_groff) = f_vroff;
6547 /* Compute its layout. */
6548 layout_type (va_list_type);
6550 return va_list_type;
6553 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6554 static void
6555 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6557 const CUMULATIVE_ARGS *cum;
6558 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6559 tree stack, grtop, vrtop, groff, vroff;
6560 tree t;
6561 int gr_save_area_size;
6562 int vr_save_area_size;
6563 int vr_offset;
6565 cum = &crtl->args.info;
6566 gr_save_area_size
6567 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6568 vr_save_area_size
6569 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6571 if (TARGET_GENERAL_REGS_ONLY)
6573 if (cum->aapcs_nvrn > 0)
6574 sorry ("%qs and floating point or vector arguments",
6575 "-mgeneral-regs-only");
6576 vr_save_area_size = 0;
6579 f_stack = TYPE_FIELDS (va_list_type_node);
6580 f_grtop = DECL_CHAIN (f_stack);
6581 f_vrtop = DECL_CHAIN (f_grtop);
6582 f_groff = DECL_CHAIN (f_vrtop);
6583 f_vroff = DECL_CHAIN (f_groff);
6585 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6586 NULL_TREE);
6587 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6588 NULL_TREE);
6589 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6590 NULL_TREE);
6591 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6592 NULL_TREE);
6593 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6594 NULL_TREE);
6596 /* Emit code to initialize STACK, which points to the next varargs stack
6597 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6598 by named arguments. STACK is 8-byte aligned. */
6599 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6600 if (cum->aapcs_stack_size > 0)
6601 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6602 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6605 /* Emit code to initialize GRTOP, the top of the GR save area.
6606 virtual_incoming_args_rtx should have been 16 byte aligned. */
6607 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6608 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6609 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6611 /* Emit code to initialize VRTOP, the top of the VR save area.
6612 This address is gr_save_area_bytes below GRTOP, rounded
6613 down to the next 16-byte boundary. */
6614 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6615 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6616 STACK_BOUNDARY / BITS_PER_UNIT);
6618 if (vr_offset)
6619 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6620 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6621 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6623 /* Emit code to initialize GROFF, the offset from GRTOP of the
6624 next GPR argument. */
6625 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6626 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6627 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6629 /* Likewise emit code to initialize VROFF, the offset from FTOP
6630 of the next VR argument. */
6631 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6632 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6633 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6636 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6638 static tree
6639 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6640 gimple_seq *post_p ATTRIBUTE_UNUSED)
6642 tree addr;
6643 bool indirect_p;
6644 bool is_ha; /* is HFA or HVA. */
6645 bool dw_align; /* double-word align. */
6646 enum machine_mode ag_mode = VOIDmode;
6647 int nregs;
6648 enum machine_mode mode;
6650 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6651 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6652 HOST_WIDE_INT size, rsize, adjust, align;
6653 tree t, u, cond1, cond2;
6655 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6656 if (indirect_p)
6657 type = build_pointer_type (type);
6659 mode = TYPE_MODE (type);
6661 f_stack = TYPE_FIELDS (va_list_type_node);
6662 f_grtop = DECL_CHAIN (f_stack);
6663 f_vrtop = DECL_CHAIN (f_grtop);
6664 f_groff = DECL_CHAIN (f_vrtop);
6665 f_vroff = DECL_CHAIN (f_groff);
6667 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6668 f_stack, NULL_TREE);
6669 size = int_size_in_bytes (type);
6670 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6672 dw_align = false;
6673 adjust = 0;
6674 if (aarch64_vfp_is_call_or_return_candidate (mode,
6675 type,
6676 &ag_mode,
6677 &nregs,
6678 &is_ha))
6680 /* TYPE passed in fp/simd registers. */
6681 if (TARGET_GENERAL_REGS_ONLY)
6682 sorry ("%qs and floating point or vector arguments",
6683 "-mgeneral-regs-only");
6685 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6686 unshare_expr (valist), f_vrtop, NULL_TREE);
6687 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6688 unshare_expr (valist), f_vroff, NULL_TREE);
6690 rsize = nregs * UNITS_PER_VREG;
6692 if (is_ha)
6694 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6695 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6697 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6698 && size < UNITS_PER_VREG)
6700 adjust = UNITS_PER_VREG - size;
6703 else
6705 /* TYPE passed in general registers. */
6706 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6707 unshare_expr (valist), f_grtop, NULL_TREE);
6708 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6709 unshare_expr (valist), f_groff, NULL_TREE);
6710 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6711 nregs = rsize / UNITS_PER_WORD;
6713 if (align > 8)
6714 dw_align = true;
6716 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6717 && size < UNITS_PER_WORD)
6719 adjust = UNITS_PER_WORD - size;
6723 /* Get a local temporary for the field value. */
6724 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6726 /* Emit code to branch if off >= 0. */
6727 t = build2 (GE_EXPR, boolean_type_node, off,
6728 build_int_cst (TREE_TYPE (off), 0));
6729 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6731 if (dw_align)
6733 /* Emit: offs = (offs + 15) & -16. */
6734 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6735 build_int_cst (TREE_TYPE (off), 15));
6736 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6737 build_int_cst (TREE_TYPE (off), -16));
6738 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6740 else
6741 roundup = NULL;
6743 /* Update ap.__[g|v]r_offs */
6744 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6745 build_int_cst (TREE_TYPE (off), rsize));
6746 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6748 /* String up. */
6749 if (roundup)
6750 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6752 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6753 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6754 build_int_cst (TREE_TYPE (f_off), 0));
6755 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6757 /* String up: make sure the assignment happens before the use. */
6758 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6759 COND_EXPR_ELSE (cond1) = t;
6761 /* Prepare the trees handling the argument that is passed on the stack;
6762 the top level node will store in ON_STACK. */
6763 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6764 if (align > 8)
6766 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6767 t = fold_convert (intDI_type_node, arg);
6768 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6769 build_int_cst (TREE_TYPE (t), 15));
6770 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6771 build_int_cst (TREE_TYPE (t), -16));
6772 t = fold_convert (TREE_TYPE (arg), t);
6773 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6775 else
6776 roundup = NULL;
6777 /* Advance ap.__stack */
6778 t = fold_convert (intDI_type_node, arg);
6779 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6780 build_int_cst (TREE_TYPE (t), size + 7));
6781 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6782 build_int_cst (TREE_TYPE (t), -8));
6783 t = fold_convert (TREE_TYPE (arg), t);
6784 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6785 /* String up roundup and advance. */
6786 if (roundup)
6787 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6788 /* String up with arg */
6789 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6790 /* Big-endianness related address adjustment. */
6791 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6792 && size < UNITS_PER_WORD)
6794 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6795 size_int (UNITS_PER_WORD - size));
6796 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6799 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6800 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6802 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6803 t = off;
6804 if (adjust)
6805 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6806 build_int_cst (TREE_TYPE (off), adjust));
6808 t = fold_convert (sizetype, t);
6809 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6811 if (is_ha)
6813 /* type ha; // treat as "struct {ftype field[n];}"
6814 ... [computing offs]
6815 for (i = 0; i <nregs; ++i, offs += 16)
6816 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6817 return ha; */
6818 int i;
6819 tree tmp_ha, field_t, field_ptr_t;
6821 /* Declare a local variable. */
6822 tmp_ha = create_tmp_var_raw (type, "ha");
6823 gimple_add_tmp_var (tmp_ha);
6825 /* Establish the base type. */
6826 switch (ag_mode)
6828 case SFmode:
6829 field_t = float_type_node;
6830 field_ptr_t = float_ptr_type_node;
6831 break;
6832 case DFmode:
6833 field_t = double_type_node;
6834 field_ptr_t = double_ptr_type_node;
6835 break;
6836 case TFmode:
6837 field_t = long_double_type_node;
6838 field_ptr_t = long_double_ptr_type_node;
6839 break;
6840 /* The half precision and quad precision are not fully supported yet. Enable
6841 the following code after the support is complete. Need to find the correct
6842 type node for __fp16 *. */
6843 #if 0
6844 case HFmode:
6845 field_t = float_type_node;
6846 field_ptr_t = float_ptr_type_node;
6847 break;
6848 #endif
6849 case V2SImode:
6850 case V4SImode:
6852 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6853 field_t = build_vector_type_for_mode (innertype, ag_mode);
6854 field_ptr_t = build_pointer_type (field_t);
6856 break;
6857 default:
6858 gcc_assert (0);
6861 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6862 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6863 addr = t;
6864 t = fold_convert (field_ptr_t, addr);
6865 t = build2 (MODIFY_EXPR, field_t,
6866 build1 (INDIRECT_REF, field_t, tmp_ha),
6867 build1 (INDIRECT_REF, field_t, t));
6869 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6870 for (i = 1; i < nregs; ++i)
6872 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6873 u = fold_convert (field_ptr_t, addr);
6874 u = build2 (MODIFY_EXPR, field_t,
6875 build2 (MEM_REF, field_t, tmp_ha,
6876 build_int_cst (field_ptr_t,
6877 (i *
6878 int_size_in_bytes (field_t)))),
6879 build1 (INDIRECT_REF, field_t, u));
6880 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6883 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6884 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6887 COND_EXPR_ELSE (cond2) = t;
6888 addr = fold_convert (build_pointer_type (type), cond1);
6889 addr = build_va_arg_indirect_ref (addr);
6891 if (indirect_p)
6892 addr = build_va_arg_indirect_ref (addr);
6894 return addr;
6897 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6899 static void
6900 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6901 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6902 int no_rtl)
6904 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6905 CUMULATIVE_ARGS local_cum;
6906 int gr_saved, vr_saved;
6908 /* The caller has advanced CUM up to, but not beyond, the last named
6909 argument. Advance a local copy of CUM past the last "real" named
6910 argument, to find out how many registers are left over. */
6911 local_cum = *cum;
6912 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6914 /* Found out how many registers we need to save. */
6915 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6916 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6918 if (TARGET_GENERAL_REGS_ONLY)
6920 if (local_cum.aapcs_nvrn > 0)
6921 sorry ("%qs and floating point or vector arguments",
6922 "-mgeneral-regs-only");
6923 vr_saved = 0;
6926 if (!no_rtl)
6928 if (gr_saved > 0)
6930 rtx ptr, mem;
6932 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6933 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6934 - gr_saved * UNITS_PER_WORD);
6935 mem = gen_frame_mem (BLKmode, ptr);
6936 set_mem_alias_set (mem, get_varargs_alias_set ());
6938 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6939 mem, gr_saved);
6941 if (vr_saved > 0)
6943 /* We can't use move_block_from_reg, because it will use
6944 the wrong mode, storing D regs only. */
6945 enum machine_mode mode = TImode;
6946 int off, i;
6948 /* Set OFF to the offset from virtual_incoming_args_rtx of
6949 the first vector register. The VR save area lies below
6950 the GR one, and is aligned to 16 bytes. */
6951 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6952 STACK_BOUNDARY / BITS_PER_UNIT);
6953 off -= vr_saved * UNITS_PER_VREG;
6955 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6957 rtx ptr, mem;
6959 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6960 mem = gen_frame_mem (mode, ptr);
6961 set_mem_alias_set (mem, get_varargs_alias_set ());
6962 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6963 off += UNITS_PER_VREG;
6968 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6969 any complication of having crtl->args.pretend_args_size changed. */
6970 cfun->machine->frame.saved_varargs_size
6971 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6972 STACK_BOUNDARY / BITS_PER_UNIT)
6973 + vr_saved * UNITS_PER_VREG);
6976 static void
6977 aarch64_conditional_register_usage (void)
6979 int i;
6980 if (!TARGET_FLOAT)
6982 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6984 fixed_regs[i] = 1;
6985 call_used_regs[i] = 1;
6990 /* Walk down the type tree of TYPE counting consecutive base elements.
6991 If *MODEP is VOIDmode, then set it to the first valid floating point
6992 type. If a non-floating point type is found, or if a floating point
6993 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6994 otherwise return the count in the sub-tree. */
6995 static int
6996 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6998 enum machine_mode mode;
6999 HOST_WIDE_INT size;
7001 switch (TREE_CODE (type))
7003 case REAL_TYPE:
7004 mode = TYPE_MODE (type);
7005 if (mode != DFmode && mode != SFmode && mode != TFmode)
7006 return -1;
7008 if (*modep == VOIDmode)
7009 *modep = mode;
7011 if (*modep == mode)
7012 return 1;
7014 break;
7016 case COMPLEX_TYPE:
7017 mode = TYPE_MODE (TREE_TYPE (type));
7018 if (mode != DFmode && mode != SFmode && mode != TFmode)
7019 return -1;
7021 if (*modep == VOIDmode)
7022 *modep = mode;
7024 if (*modep == mode)
7025 return 2;
7027 break;
7029 case VECTOR_TYPE:
7030 /* Use V2SImode and V4SImode as representatives of all 64-bit
7031 and 128-bit vector types. */
7032 size = int_size_in_bytes (type);
7033 switch (size)
7035 case 8:
7036 mode = V2SImode;
7037 break;
7038 case 16:
7039 mode = V4SImode;
7040 break;
7041 default:
7042 return -1;
7045 if (*modep == VOIDmode)
7046 *modep = mode;
7048 /* Vector modes are considered to be opaque: two vectors are
7049 equivalent for the purposes of being homogeneous aggregates
7050 if they are the same size. */
7051 if (*modep == mode)
7052 return 1;
7054 break;
7056 case ARRAY_TYPE:
7058 int count;
7059 tree index = TYPE_DOMAIN (type);
7061 /* Can't handle incomplete types nor sizes that are not
7062 fixed. */
7063 if (!COMPLETE_TYPE_P (type)
7064 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7065 return -1;
7067 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7068 if (count == -1
7069 || !index
7070 || !TYPE_MAX_VALUE (index)
7071 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7072 || !TYPE_MIN_VALUE (index)
7073 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7074 || count < 0)
7075 return -1;
7077 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7078 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7080 /* There must be no padding. */
7081 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7082 return -1;
7084 return count;
7087 case RECORD_TYPE:
7089 int count = 0;
7090 int sub_count;
7091 tree field;
7093 /* Can't handle incomplete types nor sizes that are not
7094 fixed. */
7095 if (!COMPLETE_TYPE_P (type)
7096 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7097 return -1;
7099 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7101 if (TREE_CODE (field) != FIELD_DECL)
7102 continue;
7104 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7105 if (sub_count < 0)
7106 return -1;
7107 count += sub_count;
7110 /* There must be no padding. */
7111 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7112 return -1;
7114 return count;
7117 case UNION_TYPE:
7118 case QUAL_UNION_TYPE:
7120 /* These aren't very interesting except in a degenerate case. */
7121 int count = 0;
7122 int sub_count;
7123 tree field;
7125 /* Can't handle incomplete types nor sizes that are not
7126 fixed. */
7127 if (!COMPLETE_TYPE_P (type)
7128 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7129 return -1;
7131 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7133 if (TREE_CODE (field) != FIELD_DECL)
7134 continue;
7136 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7137 if (sub_count < 0)
7138 return -1;
7139 count = count > sub_count ? count : sub_count;
7142 /* There must be no padding. */
7143 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7144 return -1;
7146 return count;
7149 default:
7150 break;
7153 return -1;
7156 /* Return true if we use LRA instead of reload pass. */
7157 static bool
7158 aarch64_lra_p (void)
7160 return aarch64_lra_flag;
7163 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7164 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7165 array types. The C99 floating-point complex types are also considered
7166 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7167 types, which are GCC extensions and out of the scope of AAPCS64, are
7168 treated as composite types here as well.
7170 Note that MODE itself is not sufficient in determining whether a type
7171 is such a composite type or not. This is because
7172 stor-layout.c:compute_record_mode may have already changed the MODE
7173 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7174 structure with only one field may have its MODE set to the mode of the
7175 field. Also an integer mode whose size matches the size of the
7176 RECORD_TYPE type may be used to substitute the original mode
7177 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7178 solely relied on. */
7180 static bool
7181 aarch64_composite_type_p (const_tree type,
7182 enum machine_mode mode)
7184 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7185 return true;
7187 if (mode == BLKmode
7188 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7189 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7190 return true;
7192 return false;
7195 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7196 type as described in AAPCS64 \S 4.1.2.
7198 See the comment above aarch64_composite_type_p for the notes on MODE. */
7200 static bool
7201 aarch64_short_vector_p (const_tree type,
7202 enum machine_mode mode)
7204 HOST_WIDE_INT size = -1;
7206 if (type && TREE_CODE (type) == VECTOR_TYPE)
7207 size = int_size_in_bytes (type);
7208 else if (!aarch64_composite_type_p (type, mode)
7209 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7210 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7211 size = GET_MODE_SIZE (mode);
7213 return (size == 8 || size == 16) ? true : false;
7216 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7217 shall be passed or returned in simd/fp register(s) (providing these
7218 parameter passing registers are available).
7220 Upon successful return, *COUNT returns the number of needed registers,
7221 *BASE_MODE returns the mode of the individual register and when IS_HAF
7222 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7223 floating-point aggregate or a homogeneous short-vector aggregate. */
7225 static bool
7226 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7227 const_tree type,
7228 enum machine_mode *base_mode,
7229 int *count,
7230 bool *is_ha)
7232 enum machine_mode new_mode = VOIDmode;
7233 bool composite_p = aarch64_composite_type_p (type, mode);
7235 if (is_ha != NULL) *is_ha = false;
7237 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7238 || aarch64_short_vector_p (type, mode))
7240 *count = 1;
7241 new_mode = mode;
7243 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7245 if (is_ha != NULL) *is_ha = true;
7246 *count = 2;
7247 new_mode = GET_MODE_INNER (mode);
7249 else if (type && composite_p)
7251 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7253 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7255 if (is_ha != NULL) *is_ha = true;
7256 *count = ag_count;
7258 else
7259 return false;
7261 else
7262 return false;
7264 *base_mode = new_mode;
7265 return true;
7268 /* Implement TARGET_STRUCT_VALUE_RTX. */
7270 static rtx
7271 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7272 int incoming ATTRIBUTE_UNUSED)
7274 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7277 /* Implements target hook vector_mode_supported_p. */
7278 static bool
7279 aarch64_vector_mode_supported_p (enum machine_mode mode)
7281 if (TARGET_SIMD
7282 && (mode == V4SImode || mode == V8HImode
7283 || mode == V16QImode || mode == V2DImode
7284 || mode == V2SImode || mode == V4HImode
7285 || mode == V8QImode || mode == V2SFmode
7286 || mode == V4SFmode || mode == V2DFmode
7287 || mode == V1DFmode))
7288 return true;
7290 return false;
7293 /* Return appropriate SIMD container
7294 for MODE within a vector of WIDTH bits. */
7295 static enum machine_mode
7296 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
7298 gcc_assert (width == 64 || width == 128);
7299 if (TARGET_SIMD)
7301 if (width == 128)
7302 switch (mode)
7304 case DFmode:
7305 return V2DFmode;
7306 case SFmode:
7307 return V4SFmode;
7308 case SImode:
7309 return V4SImode;
7310 case HImode:
7311 return V8HImode;
7312 case QImode:
7313 return V16QImode;
7314 case DImode:
7315 return V2DImode;
7316 default:
7317 break;
7319 else
7320 switch (mode)
7322 case SFmode:
7323 return V2SFmode;
7324 case SImode:
7325 return V2SImode;
7326 case HImode:
7327 return V4HImode;
7328 case QImode:
7329 return V8QImode;
7330 default:
7331 break;
7334 return word_mode;
7337 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7338 static enum machine_mode
7339 aarch64_preferred_simd_mode (enum machine_mode mode)
7341 return aarch64_simd_container_mode (mode, 128);
7344 /* Return the bitmask of possible vector sizes for the vectorizer
7345 to iterate over. */
7346 static unsigned int
7347 aarch64_autovectorize_vector_sizes (void)
7349 return (16 | 8);
7352 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7353 vector types in order to conform to the AAPCS64 (see "Procedure
7354 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7355 qualify for emission with the mangled names defined in that document,
7356 a vector type must not only be of the correct mode but also be
7357 composed of AdvSIMD vector element types (e.g.
7358 _builtin_aarch64_simd_qi); these types are registered by
7359 aarch64_init_simd_builtins (). In other words, vector types defined
7360 in other ways e.g. via vector_size attribute will get default
7361 mangled names. */
7362 typedef struct
7364 enum machine_mode mode;
7365 const char *element_type_name;
7366 const char *mangled_name;
7367 } aarch64_simd_mangle_map_entry;
7369 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7370 /* 64-bit containerized types. */
7371 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7372 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7373 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7374 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7375 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7376 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7377 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7378 { DImode, "__builtin_aarch64_simd_di", "11__Int64x1_t" },
7379 { DImode, "__builtin_aarch64_simd_udi", "12__Uint64x1_t" },
7380 { V1DFmode, "__builtin_aarch64_simd_df", "13__Float64x1_t" },
7381 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7382 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7383 /* 128-bit containerized types. */
7384 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7385 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7386 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7387 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7388 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7389 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7390 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7391 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7392 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7393 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7394 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7395 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7396 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7397 { VOIDmode, NULL, NULL }
7400 /* Implement TARGET_MANGLE_TYPE. */
7402 static const char *
7403 aarch64_mangle_type (const_tree type)
7405 /* The AArch64 ABI documents say that "__va_list" has to be
7406 managled as if it is in the "std" namespace. */
7407 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7408 return "St9__va_list";
7410 /* Check the mode of the vector type, and the name of the vector
7411 element type, against the table. */
7412 if (TREE_CODE (type) == VECTOR_TYPE)
7414 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7416 while (pos->mode != VOIDmode)
7418 tree elt_type = TREE_TYPE (type);
7420 if (pos->mode == TYPE_MODE (type)
7421 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7422 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7423 pos->element_type_name))
7424 return pos->mangled_name;
7426 pos++;
7430 /* Use the default mangling. */
7431 return NULL;
7434 /* Return the equivalent letter for size. */
7435 static char
7436 sizetochar (int size)
7438 switch (size)
7440 case 64: return 'd';
7441 case 32: return 's';
7442 case 16: return 'h';
7443 case 8 : return 'b';
7444 default: gcc_unreachable ();
7448 /* Return true iff x is a uniform vector of floating-point
7449 constants, and the constant can be represented in
7450 quarter-precision form. Note, as aarch64_float_const_representable
7451 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7452 static bool
7453 aarch64_vect_float_const_representable_p (rtx x)
7455 int i = 0;
7456 REAL_VALUE_TYPE r0, ri;
7457 rtx x0, xi;
7459 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7460 return false;
7462 x0 = CONST_VECTOR_ELT (x, 0);
7463 if (!CONST_DOUBLE_P (x0))
7464 return false;
7466 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7468 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7470 xi = CONST_VECTOR_ELT (x, i);
7471 if (!CONST_DOUBLE_P (xi))
7472 return false;
7474 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7475 if (!REAL_VALUES_EQUAL (r0, ri))
7476 return false;
7479 return aarch64_float_const_representable_p (x0);
7482 /* Return true for valid and false for invalid. */
7483 bool
7484 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7485 struct simd_immediate_info *info)
7487 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7488 matches = 1; \
7489 for (i = 0; i < idx; i += (STRIDE)) \
7490 if (!(TEST)) \
7491 matches = 0; \
7492 if (matches) \
7494 immtype = (CLASS); \
7495 elsize = (ELSIZE); \
7496 eshift = (SHIFT); \
7497 emvn = (NEG); \
7498 break; \
7501 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7502 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7503 unsigned char bytes[16];
7504 int immtype = -1, matches;
7505 unsigned int invmask = inverse ? 0xff : 0;
7506 int eshift, emvn;
7508 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7510 if (! (aarch64_simd_imm_zero_p (op, mode)
7511 || aarch64_vect_float_const_representable_p (op)))
7512 return false;
7514 if (info)
7516 info->value = CONST_VECTOR_ELT (op, 0);
7517 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7518 info->mvn = false;
7519 info->shift = 0;
7522 return true;
7525 /* Splat vector constant out into a byte vector. */
7526 for (i = 0; i < n_elts; i++)
7528 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7529 it must be laid out in the vector register in reverse order. */
7530 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7531 unsigned HOST_WIDE_INT elpart;
7532 unsigned int part, parts;
7534 if (GET_CODE (el) == CONST_INT)
7536 elpart = INTVAL (el);
7537 parts = 1;
7539 else if (GET_CODE (el) == CONST_DOUBLE)
7541 elpart = CONST_DOUBLE_LOW (el);
7542 parts = 2;
7544 else
7545 gcc_unreachable ();
7547 for (part = 0; part < parts; part++)
7549 unsigned int byte;
7550 for (byte = 0; byte < innersize; byte++)
7552 bytes[idx++] = (elpart & 0xff) ^ invmask;
7553 elpart >>= BITS_PER_UNIT;
7555 if (GET_CODE (el) == CONST_DOUBLE)
7556 elpart = CONST_DOUBLE_HIGH (el);
7560 /* Sanity check. */
7561 gcc_assert (idx == GET_MODE_SIZE (mode));
7565 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7566 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7568 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7569 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7571 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7572 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7574 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7575 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7577 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7579 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7581 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7582 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7584 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7585 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7587 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7588 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7590 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7591 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7593 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7595 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7597 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7598 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7600 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7601 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7603 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7604 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7606 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7607 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7609 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7611 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7612 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7614 while (0);
7616 if (immtype == -1)
7617 return false;
7619 if (info)
7621 info->element_width = elsize;
7622 info->mvn = emvn != 0;
7623 info->shift = eshift;
7625 unsigned HOST_WIDE_INT imm = 0;
7627 if (immtype >= 12 && immtype <= 15)
7628 info->msl = true;
7630 /* Un-invert bytes of recognized vector, if necessary. */
7631 if (invmask != 0)
7632 for (i = 0; i < idx; i++)
7633 bytes[i] ^= invmask;
7635 if (immtype == 17)
7637 /* FIXME: Broken on 32-bit H_W_I hosts. */
7638 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7640 for (i = 0; i < 8; i++)
7641 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7642 << (i * BITS_PER_UNIT);
7645 info->value = GEN_INT (imm);
7647 else
7649 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7650 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7652 /* Construct 'abcdefgh' because the assembler cannot handle
7653 generic constants. */
7654 if (info->mvn)
7655 imm = ~imm;
7656 imm = (imm >> info->shift) & 0xff;
7657 info->value = GEN_INT (imm);
7661 return true;
7662 #undef CHECK
7665 static bool
7666 aarch64_const_vec_all_same_int_p (rtx x,
7667 HOST_WIDE_INT minval,
7668 HOST_WIDE_INT maxval)
7670 HOST_WIDE_INT firstval;
7671 int count, i;
7673 if (GET_CODE (x) != CONST_VECTOR
7674 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7675 return false;
7677 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7678 if (firstval < minval || firstval > maxval)
7679 return false;
7681 count = CONST_VECTOR_NUNITS (x);
7682 for (i = 1; i < count; i++)
7683 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7684 return false;
7686 return true;
7689 /* Check of immediate shift constants are within range. */
7690 bool
7691 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7693 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7694 if (left)
7695 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7696 else
7697 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7700 /* Return true if X is a uniform vector where all elements
7701 are either the floating-point constant 0.0 or the
7702 integer constant 0. */
7703 bool
7704 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7706 return x == CONST0_RTX (mode);
7709 bool
7710 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7712 HOST_WIDE_INT imm = INTVAL (x);
7713 int i;
7715 for (i = 0; i < 8; i++)
7717 unsigned int byte = imm & 0xff;
7718 if (byte != 0xff && byte != 0)
7719 return false;
7720 imm >>= 8;
7723 return true;
7726 bool
7727 aarch64_mov_operand_p (rtx x,
7728 enum aarch64_symbol_context context,
7729 enum machine_mode mode)
7731 if (GET_CODE (x) == HIGH
7732 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7733 return true;
7735 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7736 return true;
7738 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7739 return true;
7741 return aarch64_classify_symbolic_expression (x, context)
7742 == SYMBOL_TINY_ABSOLUTE;
7745 /* Return a const_int vector of VAL. */
7747 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7749 int nunits = GET_MODE_NUNITS (mode);
7750 rtvec v = rtvec_alloc (nunits);
7751 int i;
7753 for (i=0; i < nunits; i++)
7754 RTVEC_ELT (v, i) = GEN_INT (val);
7756 return gen_rtx_CONST_VECTOR (mode, v);
7759 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7761 bool
7762 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7764 enum machine_mode vmode;
7766 gcc_assert (!VECTOR_MODE_P (mode));
7767 vmode = aarch64_preferred_simd_mode (mode);
7768 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7769 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7772 /* Construct and return a PARALLEL RTX vector. */
7774 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7776 int nunits = GET_MODE_NUNITS (mode);
7777 rtvec v = rtvec_alloc (nunits / 2);
7778 int base = high ? nunits / 2 : 0;
7779 rtx t1;
7780 int i;
7782 for (i=0; i < nunits / 2; i++)
7783 RTVEC_ELT (v, i) = GEN_INT (base + i);
7785 t1 = gen_rtx_PARALLEL (mode, v);
7786 return t1;
7789 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7790 HIGH (exclusive). */
7791 void
7792 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7794 HOST_WIDE_INT lane;
7795 gcc_assert (GET_CODE (operand) == CONST_INT);
7796 lane = INTVAL (operand);
7798 if (lane < low || lane >= high)
7799 error ("lane out of range");
7802 void
7803 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7805 gcc_assert (GET_CODE (operand) == CONST_INT);
7806 HOST_WIDE_INT lane = INTVAL (operand);
7808 if (lane < low || lane >= high)
7809 error ("constant out of range");
7812 /* Emit code to reinterpret one AdvSIMD type as another,
7813 without altering bits. */
7814 void
7815 aarch64_simd_reinterpret (rtx dest, rtx src)
7817 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7820 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7821 registers). */
7822 void
7823 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7824 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7825 rtx op1)
7827 rtx mem = gen_rtx_MEM (mode, destaddr);
7828 rtx tmp1 = gen_reg_rtx (mode);
7829 rtx tmp2 = gen_reg_rtx (mode);
7831 emit_insn (intfn (tmp1, op1, tmp2));
7833 emit_move_insn (mem, tmp1);
7834 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7835 emit_move_insn (mem, tmp2);
7838 /* Return TRUE if OP is a valid vector addressing mode. */
7839 bool
7840 aarch64_simd_mem_operand_p (rtx op)
7842 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7843 || GET_CODE (XEXP (op, 0)) == REG);
7846 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7847 not to early-clobber SRC registers in the process.
7849 We assume that the operands described by SRC and DEST represent a
7850 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7851 number of components into which the copy has been decomposed. */
7852 void
7853 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7854 rtx *src, unsigned int count)
7856 unsigned int i;
7858 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7859 || REGNO (operands[0]) < REGNO (operands[1]))
7861 for (i = 0; i < count; i++)
7863 operands[2 * i] = dest[i];
7864 operands[2 * i + 1] = src[i];
7867 else
7869 for (i = 0; i < count; i++)
7871 operands[2 * i] = dest[count - i - 1];
7872 operands[2 * i + 1] = src[count - i - 1];
7877 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7878 one of VSTRUCT modes: OI, CI or XI. */
7880 aarch64_simd_attr_length_move (rtx insn)
7882 enum machine_mode mode;
7884 extract_insn_cached (insn);
7886 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7888 mode = GET_MODE (recog_data.operand[0]);
7889 switch (mode)
7891 case OImode:
7892 return 8;
7893 case CImode:
7894 return 12;
7895 case XImode:
7896 return 16;
7897 default:
7898 gcc_unreachable ();
7901 return 4;
7904 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7905 alignment of a vector to 128 bits. */
7906 static HOST_WIDE_INT
7907 aarch64_simd_vector_alignment (const_tree type)
7909 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7910 return MIN (align, 128);
7913 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7914 static bool
7915 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7917 if (is_packed)
7918 return false;
7920 /* We guarantee alignment for vectors up to 128-bits. */
7921 if (tree_int_cst_compare (TYPE_SIZE (type),
7922 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7923 return false;
7925 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7926 return true;
7929 /* If VALS is a vector constant that can be loaded into a register
7930 using DUP, generate instructions to do so and return an RTX to
7931 assign to the register. Otherwise return NULL_RTX. */
7932 static rtx
7933 aarch64_simd_dup_constant (rtx vals)
7935 enum machine_mode mode = GET_MODE (vals);
7936 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7937 int n_elts = GET_MODE_NUNITS (mode);
7938 bool all_same = true;
7939 rtx x;
7940 int i;
7942 if (GET_CODE (vals) != CONST_VECTOR)
7943 return NULL_RTX;
7945 for (i = 1; i < n_elts; ++i)
7947 x = CONST_VECTOR_ELT (vals, i);
7948 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7949 all_same = false;
7952 if (!all_same)
7953 return NULL_RTX;
7955 /* We can load this constant by using DUP and a constant in a
7956 single ARM register. This will be cheaper than a vector
7957 load. */
7958 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7959 return gen_rtx_VEC_DUPLICATE (mode, x);
7963 /* Generate code to load VALS, which is a PARALLEL containing only
7964 constants (for vec_init) or CONST_VECTOR, efficiently into a
7965 register. Returns an RTX to copy into the register, or NULL_RTX
7966 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7967 static rtx
7968 aarch64_simd_make_constant (rtx vals)
7970 enum machine_mode mode = GET_MODE (vals);
7971 rtx const_dup;
7972 rtx const_vec = NULL_RTX;
7973 int n_elts = GET_MODE_NUNITS (mode);
7974 int n_const = 0;
7975 int i;
7977 if (GET_CODE (vals) == CONST_VECTOR)
7978 const_vec = vals;
7979 else if (GET_CODE (vals) == PARALLEL)
7981 /* A CONST_VECTOR must contain only CONST_INTs and
7982 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7983 Only store valid constants in a CONST_VECTOR. */
7984 for (i = 0; i < n_elts; ++i)
7986 rtx x = XVECEXP (vals, 0, i);
7987 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7988 n_const++;
7990 if (n_const == n_elts)
7991 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7993 else
7994 gcc_unreachable ();
7996 if (const_vec != NULL_RTX
7997 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7998 /* Load using MOVI/MVNI. */
7999 return const_vec;
8000 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8001 /* Loaded using DUP. */
8002 return const_dup;
8003 else if (const_vec != NULL_RTX)
8004 /* Load from constant pool. We can not take advantage of single-cycle
8005 LD1 because we need a PC-relative addressing mode. */
8006 return const_vec;
8007 else
8008 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8009 We can not construct an initializer. */
8010 return NULL_RTX;
8013 void
8014 aarch64_expand_vector_init (rtx target, rtx vals)
8016 enum machine_mode mode = GET_MODE (target);
8017 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8018 int n_elts = GET_MODE_NUNITS (mode);
8019 int n_var = 0, one_var = -1;
8020 bool all_same = true;
8021 rtx x, mem;
8022 int i;
8024 x = XVECEXP (vals, 0, 0);
8025 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8026 n_var = 1, one_var = 0;
8028 for (i = 1; i < n_elts; ++i)
8030 x = XVECEXP (vals, 0, i);
8031 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8032 ++n_var, one_var = i;
8034 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8035 all_same = false;
8038 if (n_var == 0)
8040 rtx constant = aarch64_simd_make_constant (vals);
8041 if (constant != NULL_RTX)
8043 emit_move_insn (target, constant);
8044 return;
8048 /* Splat a single non-constant element if we can. */
8049 if (all_same)
8051 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8052 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8053 return;
8056 /* One field is non-constant. Load constant then overwrite varying
8057 field. This is more efficient than using the stack. */
8058 if (n_var == 1)
8060 rtx copy = copy_rtx (vals);
8061 rtx index = GEN_INT (one_var);
8062 enum insn_code icode;
8064 /* Load constant part of vector, substitute neighboring value for
8065 varying element. */
8066 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8067 aarch64_expand_vector_init (target, copy);
8069 /* Insert variable. */
8070 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8071 icode = optab_handler (vec_set_optab, mode);
8072 gcc_assert (icode != CODE_FOR_nothing);
8073 emit_insn (GEN_FCN (icode) (target, x, index));
8074 return;
8077 /* Construct the vector in memory one field at a time
8078 and load the whole vector. */
8079 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8080 for (i = 0; i < n_elts; i++)
8081 emit_move_insn (adjust_address_nv (mem, inner_mode,
8082 i * GET_MODE_SIZE (inner_mode)),
8083 XVECEXP (vals, 0, i));
8084 emit_move_insn (target, mem);
8088 static unsigned HOST_WIDE_INT
8089 aarch64_shift_truncation_mask (enum machine_mode mode)
8091 return
8092 (aarch64_vector_mode_supported_p (mode)
8093 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8096 #ifndef TLS_SECTION_ASM_FLAG
8097 #define TLS_SECTION_ASM_FLAG 'T'
8098 #endif
8100 void
8101 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8102 tree decl ATTRIBUTE_UNUSED)
8104 char flagchars[10], *f = flagchars;
8106 /* If we have already declared this section, we can use an
8107 abbreviated form to switch back to it -- unless this section is
8108 part of a COMDAT groups, in which case GAS requires the full
8109 declaration every time. */
8110 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8111 && (flags & SECTION_DECLARED))
8113 fprintf (asm_out_file, "\t.section\t%s\n", name);
8114 return;
8117 if (!(flags & SECTION_DEBUG))
8118 *f++ = 'a';
8119 if (flags & SECTION_WRITE)
8120 *f++ = 'w';
8121 if (flags & SECTION_CODE)
8122 *f++ = 'x';
8123 if (flags & SECTION_SMALL)
8124 *f++ = 's';
8125 if (flags & SECTION_MERGE)
8126 *f++ = 'M';
8127 if (flags & SECTION_STRINGS)
8128 *f++ = 'S';
8129 if (flags & SECTION_TLS)
8130 *f++ = TLS_SECTION_ASM_FLAG;
8131 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8132 *f++ = 'G';
8133 *f = '\0';
8135 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8137 if (!(flags & SECTION_NOTYPE))
8139 const char *type;
8140 const char *format;
8142 if (flags & SECTION_BSS)
8143 type = "nobits";
8144 else
8145 type = "progbits";
8147 #ifdef TYPE_OPERAND_FMT
8148 format = "," TYPE_OPERAND_FMT;
8149 #else
8150 format = ",@%s";
8151 #endif
8153 fprintf (asm_out_file, format, type);
8155 if (flags & SECTION_ENTSIZE)
8156 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8157 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8159 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8160 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8161 else
8162 fprintf (asm_out_file, ",%s,comdat",
8163 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8167 putc ('\n', asm_out_file);
8170 /* Select a format to encode pointers in exception handling data. */
8172 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8174 int type;
8175 switch (aarch64_cmodel)
8177 case AARCH64_CMODEL_TINY:
8178 case AARCH64_CMODEL_TINY_PIC:
8179 case AARCH64_CMODEL_SMALL:
8180 case AARCH64_CMODEL_SMALL_PIC:
8181 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8182 for everything. */
8183 type = DW_EH_PE_sdata4;
8184 break;
8185 default:
8186 /* No assumptions here. 8-byte relocs required. */
8187 type = DW_EH_PE_sdata8;
8188 break;
8190 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8193 /* Emit load exclusive. */
8195 static void
8196 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8197 rtx mem, rtx model_rtx)
8199 rtx (*gen) (rtx, rtx, rtx);
8201 switch (mode)
8203 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8204 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8205 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8206 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8207 default:
8208 gcc_unreachable ();
8211 emit_insn (gen (rval, mem, model_rtx));
8214 /* Emit store exclusive. */
8216 static void
8217 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8218 rtx rval, rtx mem, rtx model_rtx)
8220 rtx (*gen) (rtx, rtx, rtx, rtx);
8222 switch (mode)
8224 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8225 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8226 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8227 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8228 default:
8229 gcc_unreachable ();
8232 emit_insn (gen (bval, rval, mem, model_rtx));
8235 /* Mark the previous jump instruction as unlikely. */
8237 static void
8238 aarch64_emit_unlikely_jump (rtx insn)
8240 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8242 insn = emit_jump_insn (insn);
8243 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8246 /* Expand a compare and swap pattern. */
8248 void
8249 aarch64_expand_compare_and_swap (rtx operands[])
8251 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8252 enum machine_mode mode, cmp_mode;
8253 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8255 bval = operands[0];
8256 rval = operands[1];
8257 mem = operands[2];
8258 oldval = operands[3];
8259 newval = operands[4];
8260 is_weak = operands[5];
8261 mod_s = operands[6];
8262 mod_f = operands[7];
8263 mode = GET_MODE (mem);
8264 cmp_mode = mode;
8266 /* Normally the succ memory model must be stronger than fail, but in the
8267 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8268 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8270 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8271 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8272 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8274 switch (mode)
8276 case QImode:
8277 case HImode:
8278 /* For short modes, we're going to perform the comparison in SImode,
8279 so do the zero-extension now. */
8280 cmp_mode = SImode;
8281 rval = gen_reg_rtx (SImode);
8282 oldval = convert_modes (SImode, mode, oldval, true);
8283 /* Fall through. */
8285 case SImode:
8286 case DImode:
8287 /* Force the value into a register if needed. */
8288 if (!aarch64_plus_operand (oldval, mode))
8289 oldval = force_reg (cmp_mode, oldval);
8290 break;
8292 default:
8293 gcc_unreachable ();
8296 switch (mode)
8298 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8299 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8300 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8301 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8302 default:
8303 gcc_unreachable ();
8306 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8308 if (mode == QImode || mode == HImode)
8309 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8311 x = gen_rtx_REG (CCmode, CC_REGNUM);
8312 x = gen_rtx_EQ (SImode, x, const0_rtx);
8313 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8316 /* Split a compare and swap pattern. */
8318 void
8319 aarch64_split_compare_and_swap (rtx operands[])
8321 rtx rval, mem, oldval, newval, scratch;
8322 enum machine_mode mode;
8323 bool is_weak;
8324 rtx label1, label2, x, cond;
8326 rval = operands[0];
8327 mem = operands[1];
8328 oldval = operands[2];
8329 newval = operands[3];
8330 is_weak = (operands[4] != const0_rtx);
8331 scratch = operands[7];
8332 mode = GET_MODE (mem);
8334 label1 = NULL_RTX;
8335 if (!is_weak)
8337 label1 = gen_label_rtx ();
8338 emit_label (label1);
8340 label2 = gen_label_rtx ();
8342 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8344 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8345 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8346 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8347 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8348 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8350 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8352 if (!is_weak)
8354 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8355 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8356 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8357 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8359 else
8361 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8362 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8363 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8366 emit_label (label2);
8369 /* Split an atomic operation. */
8371 void
8372 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8373 rtx value, rtx model_rtx, rtx cond)
8375 enum machine_mode mode = GET_MODE (mem);
8376 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8377 rtx label, x;
8379 label = gen_label_rtx ();
8380 emit_label (label);
8382 if (new_out)
8383 new_out = gen_lowpart (wmode, new_out);
8384 if (old_out)
8385 old_out = gen_lowpart (wmode, old_out);
8386 else
8387 old_out = new_out;
8388 value = simplify_gen_subreg (wmode, value, mode, 0);
8390 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8392 switch (code)
8394 case SET:
8395 new_out = value;
8396 break;
8398 case NOT:
8399 x = gen_rtx_AND (wmode, old_out, value);
8400 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8401 x = gen_rtx_NOT (wmode, new_out);
8402 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8403 break;
8405 case MINUS:
8406 if (CONST_INT_P (value))
8408 value = GEN_INT (-INTVAL (value));
8409 code = PLUS;
8411 /* Fall through. */
8413 default:
8414 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8415 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8416 break;
8419 aarch64_emit_store_exclusive (mode, cond, mem,
8420 gen_lowpart (mode, new_out), model_rtx);
8422 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8423 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8424 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8425 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8428 static void
8429 aarch64_print_extension (void)
8431 const struct aarch64_option_extension *opt = NULL;
8433 for (opt = all_extensions; opt->name != NULL; opt++)
8434 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8435 asm_fprintf (asm_out_file, "+%s", opt->name);
8437 asm_fprintf (asm_out_file, "\n");
8440 static void
8441 aarch64_start_file (void)
8443 if (selected_arch)
8445 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8446 aarch64_print_extension ();
8448 else if (selected_cpu)
8450 const char *truncated_name
8451 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8452 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8453 aarch64_print_extension ();
8455 default_file_start();
8458 /* Target hook for c_mode_for_suffix. */
8459 static enum machine_mode
8460 aarch64_c_mode_for_suffix (char suffix)
8462 if (suffix == 'q')
8463 return TFmode;
8465 return VOIDmode;
8468 /* We can only represent floating point constants which will fit in
8469 "quarter-precision" values. These values are characterised by
8470 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8473 (-1)^s * (n/16) * 2^r
8475 Where:
8476 's' is the sign bit.
8477 'n' is an integer in the range 16 <= n <= 31.
8478 'r' is an integer in the range -3 <= r <= 4. */
8480 /* Return true iff X can be represented by a quarter-precision
8481 floating point immediate operand X. Note, we cannot represent 0.0. */
8482 bool
8483 aarch64_float_const_representable_p (rtx x)
8485 /* This represents our current view of how many bits
8486 make up the mantissa. */
8487 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8488 int exponent;
8489 unsigned HOST_WIDE_INT mantissa, mask;
8490 REAL_VALUE_TYPE r, m;
8491 bool fail;
8493 if (!CONST_DOUBLE_P (x))
8494 return false;
8496 if (GET_MODE (x) == VOIDmode)
8497 return false;
8499 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8501 /* We cannot represent infinities, NaNs or +/-zero. We won't
8502 know if we have +zero until we analyse the mantissa, but we
8503 can reject the other invalid values. */
8504 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8505 || REAL_VALUE_MINUS_ZERO (r))
8506 return false;
8508 /* Extract exponent. */
8509 r = real_value_abs (&r);
8510 exponent = REAL_EXP (&r);
8512 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8513 highest (sign) bit, with a fixed binary point at bit point_pos.
8514 m1 holds the low part of the mantissa, m2 the high part.
8515 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8516 bits for the mantissa, this can fail (low bits will be lost). */
8517 real_ldexp (&m, &r, point_pos - exponent);
8518 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
8520 /* If the low part of the mantissa has bits set we cannot represent
8521 the value. */
8522 if (w.elt (0) != 0)
8523 return false;
8524 /* We have rejected the lower HOST_WIDE_INT, so update our
8525 understanding of how many bits lie in the mantissa and
8526 look only at the high HOST_WIDE_INT. */
8527 mantissa = w.elt (1);
8528 point_pos -= HOST_BITS_PER_WIDE_INT;
8530 /* We can only represent values with a mantissa of the form 1.xxxx. */
8531 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8532 if ((mantissa & mask) != 0)
8533 return false;
8535 /* Having filtered unrepresentable values, we may now remove all
8536 but the highest 5 bits. */
8537 mantissa >>= point_pos - 5;
8539 /* We cannot represent the value 0.0, so reject it. This is handled
8540 elsewhere. */
8541 if (mantissa == 0)
8542 return false;
8544 /* Then, as bit 4 is always set, we can mask it off, leaving
8545 the mantissa in the range [0, 15]. */
8546 mantissa &= ~(1 << 4);
8547 gcc_assert (mantissa <= 15);
8549 /* GCC internally does not use IEEE754-like encoding (where normalized
8550 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8551 Our mantissa values are shifted 4 places to the left relative to
8552 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8553 by 5 places to correct for GCC's representation. */
8554 exponent = 5 - exponent;
8556 return (exponent >= 0 && exponent <= 7);
8559 char*
8560 aarch64_output_simd_mov_immediate (rtx const_vector,
8561 enum machine_mode mode,
8562 unsigned width)
8564 bool is_valid;
8565 static char templ[40];
8566 const char *mnemonic;
8567 const char *shift_op;
8568 unsigned int lane_count = 0;
8569 char element_char;
8571 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8573 /* This will return true to show const_vector is legal for use as either
8574 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8575 also update INFO to show how the immediate should be generated. */
8576 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8577 gcc_assert (is_valid);
8579 element_char = sizetochar (info.element_width);
8580 lane_count = width / info.element_width;
8582 mode = GET_MODE_INNER (mode);
8583 if (mode == SFmode || mode == DFmode)
8585 gcc_assert (info.shift == 0 && ! info.mvn);
8586 if (aarch64_float_const_zero_rtx_p (info.value))
8587 info.value = GEN_INT (0);
8588 else
8590 #define buf_size 20
8591 REAL_VALUE_TYPE r;
8592 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8593 char float_buf[buf_size] = {'\0'};
8594 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8595 #undef buf_size
8597 if (lane_count == 1)
8598 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8599 else
8600 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8601 lane_count, element_char, float_buf);
8602 return templ;
8606 mnemonic = info.mvn ? "mvni" : "movi";
8607 shift_op = info.msl ? "msl" : "lsl";
8609 if (lane_count == 1)
8610 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8611 mnemonic, UINTVAL (info.value));
8612 else if (info.shift)
8613 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8614 ", %s %d", mnemonic, lane_count, element_char,
8615 UINTVAL (info.value), shift_op, info.shift);
8616 else
8617 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8618 mnemonic, lane_count, element_char, UINTVAL (info.value));
8619 return templ;
8622 char*
8623 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8624 enum machine_mode mode)
8626 enum machine_mode vmode;
8628 gcc_assert (!VECTOR_MODE_P (mode));
8629 vmode = aarch64_simd_container_mode (mode, 64);
8630 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8631 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8634 /* Split operands into moves from op[1] + op[2] into op[0]. */
8636 void
8637 aarch64_split_combinev16qi (rtx operands[3])
8639 unsigned int dest = REGNO (operands[0]);
8640 unsigned int src1 = REGNO (operands[1]);
8641 unsigned int src2 = REGNO (operands[2]);
8642 enum machine_mode halfmode = GET_MODE (operands[1]);
8643 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8644 rtx destlo, desthi;
8646 gcc_assert (halfmode == V16QImode);
8648 if (src1 == dest && src2 == dest + halfregs)
8650 /* No-op move. Can't split to nothing; emit something. */
8651 emit_note (NOTE_INSN_DELETED);
8652 return;
8655 /* Preserve register attributes for variable tracking. */
8656 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8657 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8658 GET_MODE_SIZE (halfmode));
8660 /* Special case of reversed high/low parts. */
8661 if (reg_overlap_mentioned_p (operands[2], destlo)
8662 && reg_overlap_mentioned_p (operands[1], desthi))
8664 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8665 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8666 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8668 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8670 /* Try to avoid unnecessary moves if part of the result
8671 is in the right place already. */
8672 if (src1 != dest)
8673 emit_move_insn (destlo, operands[1]);
8674 if (src2 != dest + halfregs)
8675 emit_move_insn (desthi, operands[2]);
8677 else
8679 if (src2 != dest + halfregs)
8680 emit_move_insn (desthi, operands[2]);
8681 if (src1 != dest)
8682 emit_move_insn (destlo, operands[1]);
8686 /* vec_perm support. */
8688 #define MAX_VECT_LEN 16
8690 struct expand_vec_perm_d
8692 rtx target, op0, op1;
8693 unsigned char perm[MAX_VECT_LEN];
8694 enum machine_mode vmode;
8695 unsigned char nelt;
8696 bool one_vector_p;
8697 bool testing_p;
8700 /* Generate a variable permutation. */
8702 static void
8703 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8705 enum machine_mode vmode = GET_MODE (target);
8706 bool one_vector_p = rtx_equal_p (op0, op1);
8708 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8709 gcc_checking_assert (GET_MODE (op0) == vmode);
8710 gcc_checking_assert (GET_MODE (op1) == vmode);
8711 gcc_checking_assert (GET_MODE (sel) == vmode);
8712 gcc_checking_assert (TARGET_SIMD);
8714 if (one_vector_p)
8716 if (vmode == V8QImode)
8718 /* Expand the argument to a V16QI mode by duplicating it. */
8719 rtx pair = gen_reg_rtx (V16QImode);
8720 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8721 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8723 else
8725 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8728 else
8730 rtx pair;
8732 if (vmode == V8QImode)
8734 pair = gen_reg_rtx (V16QImode);
8735 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8736 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8738 else
8740 pair = gen_reg_rtx (OImode);
8741 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8742 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8747 void
8748 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8750 enum machine_mode vmode = GET_MODE (target);
8751 unsigned int nelt = GET_MODE_NUNITS (vmode);
8752 bool one_vector_p = rtx_equal_p (op0, op1);
8753 rtx mask;
8755 /* The TBL instruction does not use a modulo index, so we must take care
8756 of that ourselves. */
8757 mask = aarch64_simd_gen_const_vector_dup (vmode,
8758 one_vector_p ? nelt - 1 : 2 * nelt - 1);
8759 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8761 /* For big-endian, we also need to reverse the index within the vector
8762 (but not which vector). */
8763 if (BYTES_BIG_ENDIAN)
8765 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
8766 if (!one_vector_p)
8767 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
8768 sel = expand_simple_binop (vmode, XOR, sel, mask,
8769 NULL, 0, OPTAB_LIB_WIDEN);
8771 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8774 /* Recognize patterns suitable for the TRN instructions. */
8775 static bool
8776 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8778 unsigned int i, odd, mask, nelt = d->nelt;
8779 rtx out, in0, in1, x;
8780 rtx (*gen) (rtx, rtx, rtx);
8781 enum machine_mode vmode = d->vmode;
8783 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8784 return false;
8786 /* Note that these are little-endian tests.
8787 We correct for big-endian later. */
8788 if (d->perm[0] == 0)
8789 odd = 0;
8790 else if (d->perm[0] == 1)
8791 odd = 1;
8792 else
8793 return false;
8794 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8796 for (i = 0; i < nelt; i += 2)
8798 if (d->perm[i] != i + odd)
8799 return false;
8800 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8801 return false;
8804 /* Success! */
8805 if (d->testing_p)
8806 return true;
8808 in0 = d->op0;
8809 in1 = d->op1;
8810 if (BYTES_BIG_ENDIAN)
8812 x = in0, in0 = in1, in1 = x;
8813 odd = !odd;
8815 out = d->target;
8817 if (odd)
8819 switch (vmode)
8821 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8822 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8823 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8824 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8825 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8826 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8827 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8828 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8829 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8830 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8831 default:
8832 return false;
8835 else
8837 switch (vmode)
8839 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8840 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8841 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8842 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8843 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8844 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8845 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8846 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8847 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8848 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8849 default:
8850 return false;
8854 emit_insn (gen (out, in0, in1));
8855 return true;
8858 /* Recognize patterns suitable for the UZP instructions. */
8859 static bool
8860 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8862 unsigned int i, odd, mask, nelt = d->nelt;
8863 rtx out, in0, in1, x;
8864 rtx (*gen) (rtx, rtx, rtx);
8865 enum machine_mode vmode = d->vmode;
8867 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8868 return false;
8870 /* Note that these are little-endian tests.
8871 We correct for big-endian later. */
8872 if (d->perm[0] == 0)
8873 odd = 0;
8874 else if (d->perm[0] == 1)
8875 odd = 1;
8876 else
8877 return false;
8878 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8880 for (i = 0; i < nelt; i++)
8882 unsigned elt = (i * 2 + odd) & mask;
8883 if (d->perm[i] != elt)
8884 return false;
8887 /* Success! */
8888 if (d->testing_p)
8889 return true;
8891 in0 = d->op0;
8892 in1 = d->op1;
8893 if (BYTES_BIG_ENDIAN)
8895 x = in0, in0 = in1, in1 = x;
8896 odd = !odd;
8898 out = d->target;
8900 if (odd)
8902 switch (vmode)
8904 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8905 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8906 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8907 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8908 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8909 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8910 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8911 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8912 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8913 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8914 default:
8915 return false;
8918 else
8920 switch (vmode)
8922 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8923 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8924 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8925 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8926 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8927 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8928 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8929 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8930 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8931 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8932 default:
8933 return false;
8937 emit_insn (gen (out, in0, in1));
8938 return true;
8941 /* Recognize patterns suitable for the ZIP instructions. */
8942 static bool
8943 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8945 unsigned int i, high, mask, nelt = d->nelt;
8946 rtx out, in0, in1, x;
8947 rtx (*gen) (rtx, rtx, rtx);
8948 enum machine_mode vmode = d->vmode;
8950 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8951 return false;
8953 /* Note that these are little-endian tests.
8954 We correct for big-endian later. */
8955 high = nelt / 2;
8956 if (d->perm[0] == high)
8957 /* Do Nothing. */
8959 else if (d->perm[0] == 0)
8960 high = 0;
8961 else
8962 return false;
8963 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8965 for (i = 0; i < nelt / 2; i++)
8967 unsigned elt = (i + high) & mask;
8968 if (d->perm[i * 2] != elt)
8969 return false;
8970 elt = (elt + nelt) & mask;
8971 if (d->perm[i * 2 + 1] != elt)
8972 return false;
8975 /* Success! */
8976 if (d->testing_p)
8977 return true;
8979 in0 = d->op0;
8980 in1 = d->op1;
8981 if (BYTES_BIG_ENDIAN)
8983 x = in0, in0 = in1, in1 = x;
8984 high = !high;
8986 out = d->target;
8988 if (high)
8990 switch (vmode)
8992 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8993 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8994 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8995 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8996 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8997 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8998 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8999 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9000 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9001 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9002 default:
9003 return false;
9006 else
9008 switch (vmode)
9010 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9011 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9012 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9013 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9014 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9015 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9016 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9017 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9018 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9019 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9020 default:
9021 return false;
9025 emit_insn (gen (out, in0, in1));
9026 return true;
9029 /* Recognize patterns for the EXT insn. */
9031 static bool
9032 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9034 unsigned int i, nelt = d->nelt;
9035 rtx (*gen) (rtx, rtx, rtx, rtx);
9036 rtx offset;
9038 unsigned int location = d->perm[0]; /* Always < nelt. */
9040 /* Check if the extracted indices are increasing by one. */
9041 for (i = 1; i < nelt; i++)
9043 unsigned int required = location + i;
9044 if (d->one_vector_p)
9046 /* We'll pass the same vector in twice, so allow indices to wrap. */
9047 required &= (nelt - 1);
9049 if (d->perm[i] != required)
9050 return false;
9053 switch (d->vmode)
9055 case V16QImode: gen = gen_aarch64_extv16qi; break;
9056 case V8QImode: gen = gen_aarch64_extv8qi; break;
9057 case V4HImode: gen = gen_aarch64_extv4hi; break;
9058 case V8HImode: gen = gen_aarch64_extv8hi; break;
9059 case V2SImode: gen = gen_aarch64_extv2si; break;
9060 case V4SImode: gen = gen_aarch64_extv4si; break;
9061 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9062 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9063 case V2DImode: gen = gen_aarch64_extv2di; break;
9064 case V2DFmode: gen = gen_aarch64_extv2df; break;
9065 default:
9066 return false;
9069 /* Success! */
9070 if (d->testing_p)
9071 return true;
9073 /* The case where (location == 0) is a no-op for both big- and little-endian,
9074 and is removed by the mid-end at optimization levels -O1 and higher. */
9076 if (BYTES_BIG_ENDIAN && (location != 0))
9078 /* After setup, we want the high elements of the first vector (stored
9079 at the LSB end of the register), and the low elements of the second
9080 vector (stored at the MSB end of the register). So swap. */
9081 rtx temp = d->op0;
9082 d->op0 = d->op1;
9083 d->op1 = temp;
9084 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9085 location = nelt - location;
9088 offset = GEN_INT (location);
9089 emit_insn (gen (d->target, d->op0, d->op1, offset));
9090 return true;
9093 /* Recognize patterns for the REV insns. */
9095 static bool
9096 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9098 unsigned int i, j, diff, nelt = d->nelt;
9099 rtx (*gen) (rtx, rtx);
9101 if (!d->one_vector_p)
9102 return false;
9104 diff = d->perm[0];
9105 switch (diff)
9107 case 7:
9108 switch (d->vmode)
9110 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9111 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9112 default:
9113 return false;
9115 break;
9116 case 3:
9117 switch (d->vmode)
9119 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9120 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9121 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9122 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9123 default:
9124 return false;
9126 break;
9127 case 1:
9128 switch (d->vmode)
9130 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9131 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9132 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9133 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9134 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9135 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9136 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9137 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9138 default:
9139 return false;
9141 break;
9142 default:
9143 return false;
9146 for (i = 0; i < nelt ; i += diff + 1)
9147 for (j = 0; j <= diff; j += 1)
9149 /* This is guaranteed to be true as the value of diff
9150 is 7, 3, 1 and we should have enough elements in the
9151 queue to generate this. Getting a vector mask with a
9152 value of diff other than these values implies that
9153 something is wrong by the time we get here. */
9154 gcc_assert (i + j < nelt);
9155 if (d->perm[i + j] != i + diff - j)
9156 return false;
9159 /* Success! */
9160 if (d->testing_p)
9161 return true;
9163 emit_insn (gen (d->target, d->op0));
9164 return true;
9167 static bool
9168 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9170 rtx (*gen) (rtx, rtx, rtx);
9171 rtx out = d->target;
9172 rtx in0;
9173 enum machine_mode vmode = d->vmode;
9174 unsigned int i, elt, nelt = d->nelt;
9175 rtx lane;
9177 /* TODO: This may not be big-endian safe. */
9178 if (BYTES_BIG_ENDIAN)
9179 return false;
9181 elt = d->perm[0];
9182 for (i = 1; i < nelt; i++)
9184 if (elt != d->perm[i])
9185 return false;
9188 /* The generic preparation in aarch64_expand_vec_perm_const_1
9189 swaps the operand order and the permute indices if it finds
9190 d->perm[0] to be in the second operand. Thus, we can always
9191 use d->op0 and need not do any extra arithmetic to get the
9192 correct lane number. */
9193 in0 = d->op0;
9194 lane = GEN_INT (elt);
9196 switch (vmode)
9198 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9199 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9200 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9201 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9202 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9203 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9204 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9205 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9206 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9207 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9208 default:
9209 return false;
9212 emit_insn (gen (out, in0, lane));
9213 return true;
9216 static bool
9217 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9219 rtx rperm[MAX_VECT_LEN], sel;
9220 enum machine_mode vmode = d->vmode;
9221 unsigned int i, nelt = d->nelt;
9223 if (d->testing_p)
9224 return true;
9226 /* Generic code will try constant permutation twice. Once with the
9227 original mode and again with the elements lowered to QImode.
9228 So wait and don't do the selector expansion ourselves. */
9229 if (vmode != V8QImode && vmode != V16QImode)
9230 return false;
9232 for (i = 0; i < nelt; ++i)
9234 int nunits = GET_MODE_NUNITS (vmode);
9236 /* If big-endian and two vectors we end up with a weird mixed-endian
9237 mode on NEON. Reverse the index within each word but not the word
9238 itself. */
9239 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9240 : d->perm[i]);
9242 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9243 sel = force_reg (vmode, sel);
9245 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9246 return true;
9249 static bool
9250 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9252 /* The pattern matching functions above are written to look for a small
9253 number to begin the sequence (0, 1, N/2). If we begin with an index
9254 from the second operand, we can swap the operands. */
9255 if (d->perm[0] >= d->nelt)
9257 unsigned i, nelt = d->nelt;
9258 rtx x;
9260 gcc_assert (nelt == (nelt & -nelt));
9261 for (i = 0; i < nelt; ++i)
9262 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
9264 x = d->op0;
9265 d->op0 = d->op1;
9266 d->op1 = x;
9269 if (TARGET_SIMD)
9271 if (aarch64_evpc_rev (d))
9272 return true;
9273 else if (aarch64_evpc_ext (d))
9274 return true;
9275 else if (aarch64_evpc_zip (d))
9276 return true;
9277 else if (aarch64_evpc_uzp (d))
9278 return true;
9279 else if (aarch64_evpc_trn (d))
9280 return true;
9281 else if (aarch64_evpc_dup (d))
9282 return true;
9283 return aarch64_evpc_tbl (d);
9285 return false;
9288 /* Expand a vec_perm_const pattern. */
9290 bool
9291 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9293 struct expand_vec_perm_d d;
9294 int i, nelt, which;
9296 d.target = target;
9297 d.op0 = op0;
9298 d.op1 = op1;
9300 d.vmode = GET_MODE (target);
9301 gcc_assert (VECTOR_MODE_P (d.vmode));
9302 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9303 d.testing_p = false;
9305 for (i = which = 0; i < nelt; ++i)
9307 rtx e = XVECEXP (sel, 0, i);
9308 int ei = INTVAL (e) & (2 * nelt - 1);
9309 which |= (ei < nelt ? 1 : 2);
9310 d.perm[i] = ei;
9313 switch (which)
9315 default:
9316 gcc_unreachable ();
9318 case 3:
9319 d.one_vector_p = false;
9320 if (!rtx_equal_p (op0, op1))
9321 break;
9323 /* The elements of PERM do not suggest that only the first operand
9324 is used, but both operands are identical. Allow easier matching
9325 of the permutation by folding the permutation into the single
9326 input vector. */
9327 /* Fall Through. */
9328 case 2:
9329 for (i = 0; i < nelt; ++i)
9330 d.perm[i] &= nelt - 1;
9331 d.op0 = op1;
9332 d.one_vector_p = true;
9333 break;
9335 case 1:
9336 d.op1 = op0;
9337 d.one_vector_p = true;
9338 break;
9341 return aarch64_expand_vec_perm_const_1 (&d);
9344 static bool
9345 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9346 const unsigned char *sel)
9348 struct expand_vec_perm_d d;
9349 unsigned int i, nelt, which;
9350 bool ret;
9352 d.vmode = vmode;
9353 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9354 d.testing_p = true;
9355 memcpy (d.perm, sel, nelt);
9357 /* Calculate whether all elements are in one vector. */
9358 for (i = which = 0; i < nelt; ++i)
9360 unsigned char e = d.perm[i];
9361 gcc_assert (e < 2 * nelt);
9362 which |= (e < nelt ? 1 : 2);
9365 /* If all elements are from the second vector, reindex as if from the
9366 first vector. */
9367 if (which == 2)
9368 for (i = 0; i < nelt; ++i)
9369 d.perm[i] -= nelt;
9371 /* Check whether the mask can be applied to a single vector. */
9372 d.one_vector_p = (which != 3);
9374 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9375 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9376 if (!d.one_vector_p)
9377 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9379 start_sequence ();
9380 ret = aarch64_expand_vec_perm_const_1 (&d);
9381 end_sequence ();
9383 return ret;
9386 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9387 bool
9388 aarch64_cannot_change_mode_class (enum machine_mode from,
9389 enum machine_mode to,
9390 enum reg_class rclass)
9392 /* Full-reg subregs are allowed on general regs or any class if they are
9393 the same size. */
9394 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9395 || !reg_classes_intersect_p (FP_REGS, rclass))
9396 return false;
9398 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9399 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9400 2. Scalar to Scalar for integer modes or same size float modes.
9401 3. Vector to Vector modes.
9402 4. On little-endian only, Vector-Structure to Vector modes. */
9403 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9405 if (aarch64_vector_mode_supported_p (from)
9406 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9407 return false;
9409 if (GET_MODE_NUNITS (from) == 1
9410 && GET_MODE_NUNITS (to) == 1
9411 && (GET_MODE_CLASS (from) == MODE_INT
9412 || from == to))
9413 return false;
9415 if (aarch64_vector_mode_supported_p (from)
9416 && aarch64_vector_mode_supported_p (to))
9417 return false;
9419 /* Within an vector structure straddling multiple vector registers
9420 we are in a mixed-endian representation. As such, we can't
9421 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9422 switch between vectors and vector structures cheaply. */
9423 if (!BYTES_BIG_ENDIAN)
9424 if ((aarch64_vector_mode_supported_p (from)
9425 && aarch64_vect_struct_mode_p (to))
9426 || (aarch64_vector_mode_supported_p (to)
9427 && aarch64_vect_struct_mode_p (from)))
9428 return false;
9431 return true;
9434 /* Implement MODES_TIEABLE_P. */
9436 bool
9437 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9439 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9440 return true;
9442 /* We specifically want to allow elements of "structure" modes to
9443 be tieable to the structure. This more general condition allows
9444 other rarer situations too. */
9445 if (TARGET_SIMD
9446 && aarch64_vector_mode_p (mode1)
9447 && aarch64_vector_mode_p (mode2))
9448 return true;
9450 return false;
9453 /* Return a new RTX holding the result of moving POINTER forward by
9454 AMOUNT bytes. */
9456 static rtx
9457 aarch64_move_pointer (rtx pointer, int amount)
9459 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9461 return adjust_automodify_address (pointer, GET_MODE (pointer),
9462 next, amount);
9465 /* Return a new RTX holding the result of moving POINTER forward by the
9466 size of the mode it points to. */
9468 static rtx
9469 aarch64_progress_pointer (rtx pointer)
9471 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9473 return aarch64_move_pointer (pointer, amount);
9476 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9477 MODE bytes. */
9479 static void
9480 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9481 enum machine_mode mode)
9483 rtx reg = gen_reg_rtx (mode);
9485 /* "Cast" the pointers to the correct mode. */
9486 *src = adjust_address (*src, mode, 0);
9487 *dst = adjust_address (*dst, mode, 0);
9488 /* Emit the memcpy. */
9489 emit_move_insn (reg, *src);
9490 emit_move_insn (*dst, reg);
9491 /* Move the pointers forward. */
9492 *src = aarch64_progress_pointer (*src);
9493 *dst = aarch64_progress_pointer (*dst);
9496 /* Expand movmem, as if from a __builtin_memcpy. Return true if
9497 we succeed, otherwise return false. */
9499 bool
9500 aarch64_expand_movmem (rtx *operands)
9502 unsigned int n;
9503 rtx dst = operands[0];
9504 rtx src = operands[1];
9505 rtx base;
9506 bool speed_p = !optimize_function_for_size_p (cfun);
9508 /* When optimizing for size, give a better estimate of the length of a
9509 memcpy call, but use the default otherwise. */
9510 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9512 /* We can't do anything smart if the amount to copy is not constant. */
9513 if (!CONST_INT_P (operands[2]))
9514 return false;
9516 n = UINTVAL (operands[2]);
9518 /* Try to keep the number of instructions low. For cases below 16 bytes we
9519 need to make at most two moves. For cases above 16 bytes it will be one
9520 move for each 16 byte chunk, then at most two additional moves. */
9521 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9522 return false;
9524 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9525 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9527 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9528 src = adjust_automodify_address (src, VOIDmode, base, 0);
9530 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9531 1-byte chunk. */
9532 if (n < 4)
9534 if (n >= 2)
9536 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9537 n -= 2;
9540 if (n == 1)
9541 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9543 return true;
9546 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9547 4-byte chunk, partially overlapping with the previously copied chunk. */
9548 if (n < 8)
9550 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9551 n -= 4;
9552 if (n > 0)
9554 int move = n - 4;
9556 src = aarch64_move_pointer (src, move);
9557 dst = aarch64_move_pointer (dst, move);
9558 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9560 return true;
9563 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9564 them, then (if applicable) an 8-byte chunk. */
9565 while (n >= 8)
9567 if (n / 16)
9569 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9570 n -= 16;
9572 else
9574 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9575 n -= 8;
9579 /* Finish the final bytes of the copy. We can always do this in one
9580 instruction. We either copy the exact amount we need, or partially
9581 overlap with the previous chunk we copied and copy 8-bytes. */
9582 if (n == 0)
9583 return true;
9584 else if (n == 1)
9585 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9586 else if (n == 2)
9587 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9588 else if (n == 4)
9589 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9590 else
9592 if (n == 3)
9594 src = aarch64_move_pointer (src, -1);
9595 dst = aarch64_move_pointer (dst, -1);
9596 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9598 else
9600 int move = n - 8;
9602 src = aarch64_move_pointer (src, move);
9603 dst = aarch64_move_pointer (dst, move);
9604 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9608 return true;
9611 #undef TARGET_ADDRESS_COST
9612 #define TARGET_ADDRESS_COST aarch64_address_cost
9614 /* This hook will determines whether unnamed bitfields affect the alignment
9615 of the containing structure. The hook returns true if the structure
9616 should inherit the alignment requirements of an unnamed bitfield's
9617 type. */
9618 #undef TARGET_ALIGN_ANON_BITFIELD
9619 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9621 #undef TARGET_ASM_ALIGNED_DI_OP
9622 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9624 #undef TARGET_ASM_ALIGNED_HI_OP
9625 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9627 #undef TARGET_ASM_ALIGNED_SI_OP
9628 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9630 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9631 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9632 hook_bool_const_tree_hwi_hwi_const_tree_true
9634 #undef TARGET_ASM_FILE_START
9635 #define TARGET_ASM_FILE_START aarch64_start_file
9637 #undef TARGET_ASM_OUTPUT_MI_THUNK
9638 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9640 #undef TARGET_ASM_SELECT_RTX_SECTION
9641 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9643 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9644 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9646 #undef TARGET_BUILD_BUILTIN_VA_LIST
9647 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9649 #undef TARGET_CALLEE_COPIES
9650 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9652 #undef TARGET_CAN_ELIMINATE
9653 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9655 #undef TARGET_CANNOT_FORCE_CONST_MEM
9656 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9658 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9659 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9661 /* Only the least significant bit is used for initialization guard
9662 variables. */
9663 #undef TARGET_CXX_GUARD_MASK_BIT
9664 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9666 #undef TARGET_C_MODE_FOR_SUFFIX
9667 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9669 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9670 #undef TARGET_DEFAULT_TARGET_FLAGS
9671 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9672 #endif
9674 #undef TARGET_CLASS_MAX_NREGS
9675 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9677 #undef TARGET_BUILTIN_DECL
9678 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9680 #undef TARGET_EXPAND_BUILTIN
9681 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9683 #undef TARGET_EXPAND_BUILTIN_VA_START
9684 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9686 #undef TARGET_FOLD_BUILTIN
9687 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9689 #undef TARGET_FUNCTION_ARG
9690 #define TARGET_FUNCTION_ARG aarch64_function_arg
9692 #undef TARGET_FUNCTION_ARG_ADVANCE
9693 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9695 #undef TARGET_FUNCTION_ARG_BOUNDARY
9696 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9698 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9699 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9701 #undef TARGET_FUNCTION_VALUE
9702 #define TARGET_FUNCTION_VALUE aarch64_function_value
9704 #undef TARGET_FUNCTION_VALUE_REGNO_P
9705 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9707 #undef TARGET_FRAME_POINTER_REQUIRED
9708 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9710 #undef TARGET_GIMPLE_FOLD_BUILTIN
9711 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9713 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9714 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9716 #undef TARGET_INIT_BUILTINS
9717 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9719 #undef TARGET_LEGITIMATE_ADDRESS_P
9720 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9722 #undef TARGET_LEGITIMATE_CONSTANT_P
9723 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9725 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9726 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9728 #undef TARGET_LRA_P
9729 #define TARGET_LRA_P aarch64_lra_p
9731 #undef TARGET_MANGLE_TYPE
9732 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9734 #undef TARGET_MEMORY_MOVE_COST
9735 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9737 #undef TARGET_MUST_PASS_IN_STACK
9738 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9740 /* This target hook should return true if accesses to volatile bitfields
9741 should use the narrowest mode possible. It should return false if these
9742 accesses should use the bitfield container type. */
9743 #undef TARGET_NARROW_VOLATILE_BITFIELD
9744 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9746 #undef TARGET_OPTION_OVERRIDE
9747 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9749 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9750 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9751 aarch64_override_options_after_change
9753 #undef TARGET_PASS_BY_REFERENCE
9754 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9756 #undef TARGET_PREFERRED_RELOAD_CLASS
9757 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9759 #undef TARGET_SECONDARY_RELOAD
9760 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9762 #undef TARGET_SHIFT_TRUNCATION_MASK
9763 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9765 #undef TARGET_SETUP_INCOMING_VARARGS
9766 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9768 #undef TARGET_STRUCT_VALUE_RTX
9769 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9771 #undef TARGET_REGISTER_MOVE_COST
9772 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9774 #undef TARGET_RETURN_IN_MEMORY
9775 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9777 #undef TARGET_RETURN_IN_MSB
9778 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9780 #undef TARGET_RTX_COSTS
9781 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9783 #undef TARGET_SCHED_ISSUE_RATE
9784 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9786 #undef TARGET_TRAMPOLINE_INIT
9787 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9789 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9790 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9792 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9793 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9795 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9796 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9798 #undef TARGET_VECTORIZE_ADD_STMT_COST
9799 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9801 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9802 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9803 aarch64_builtin_vectorization_cost
9805 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9806 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9808 #undef TARGET_VECTORIZE_BUILTINS
9809 #define TARGET_VECTORIZE_BUILTINS
9811 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9812 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9813 aarch64_builtin_vectorized_function
9815 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9816 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9817 aarch64_autovectorize_vector_sizes
9819 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9820 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9821 aarch64_atomic_assign_expand_fenv
9823 /* Section anchor support. */
9825 #undef TARGET_MIN_ANCHOR_OFFSET
9826 #define TARGET_MIN_ANCHOR_OFFSET -256
9828 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9829 byte offset; we can do much more for larger data types, but have no way
9830 to determine the size of the access. We assume accesses are aligned. */
9831 #undef TARGET_MAX_ANCHOR_OFFSET
9832 #define TARGET_MAX_ANCHOR_OFFSET 4095
9834 #undef TARGET_VECTOR_ALIGNMENT
9835 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9837 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9838 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9839 aarch64_simd_vector_alignment_reachable
9841 /* vec_perm support. */
9843 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9844 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9845 aarch64_vectorize_vec_perm_const_ok
9848 #undef TARGET_FIXED_CONDITION_CODE_REGS
9849 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9851 #undef TARGET_FLAGS_REGNUM
9852 #define TARGET_FLAGS_REGNUM CC_REGNUM
9854 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
9855 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
9857 struct gcc_target targetm = TARGET_INITIALIZER;
9859 #include "gt-aarch64.h"