[AArch64] Use helper functions to handle multiple modes.
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blob21745a0ad0b6e94de500880b20f4a0eff7e1adb4
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
66 #include "dumpfile.h"
67 #include "builtins.h"
69 /* Defined for convenience. */
70 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
72 /* Classifies an address.
74 ADDRESS_REG_IMM
75 A simple base register plus immediate offset.
77 ADDRESS_REG_WB
78 A base register indexed by immediate offset with writeback.
80 ADDRESS_REG_REG
81 A base register indexed by (optionally scaled) register.
83 ADDRESS_REG_UXTW
84 A base register indexed by (optionally scaled) zero-extended register.
86 ADDRESS_REG_SXTW
87 A base register indexed by (optionally scaled) sign-extended register.
89 ADDRESS_LO_SUM
90 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 ADDRESS_SYMBOLIC:
93 A constant symbolic address, in pc-relative literal pool. */
95 enum aarch64_address_type {
96 ADDRESS_REG_IMM,
97 ADDRESS_REG_WB,
98 ADDRESS_REG_REG,
99 ADDRESS_REG_UXTW,
100 ADDRESS_REG_SXTW,
101 ADDRESS_LO_SUM,
102 ADDRESS_SYMBOLIC
105 struct aarch64_address_info {
106 enum aarch64_address_type type;
107 rtx base;
108 rtx offset;
109 int shift;
110 enum aarch64_symbol_type symbol_type;
113 struct simd_immediate_info
115 rtx value;
116 int shift;
117 int element_width;
118 bool mvn;
119 bool msl;
122 /* The current code model. */
123 enum aarch64_code_model aarch64_cmodel;
125 #ifdef HAVE_AS_TLS
126 #undef TARGET_HAVE_TLS
127 #define TARGET_HAVE_TLS 1
128 #endif
130 static bool aarch64_lra_p (void);
131 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
132 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
133 const_tree,
134 enum machine_mode *, int *,
135 bool *);
136 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
138 static void aarch64_override_options_after_change (void);
139 static bool aarch64_vector_mode_supported_p (enum machine_mode);
140 static unsigned bit_count (unsigned HOST_WIDE_INT);
141 static bool aarch64_const_vec_all_same_int_p (rtx,
142 HOST_WIDE_INT, HOST_WIDE_INT);
144 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
145 const unsigned char *sel);
146 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
148 /* The processor for which instructions should be scheduled. */
149 enum aarch64_processor aarch64_tune = cortexa53;
151 /* The current tuning set. */
152 const struct tune_params *aarch64_tune_params;
154 /* Mask to specify which instructions we are allowed to generate. */
155 unsigned long aarch64_isa_flags = 0;
157 /* Mask to specify which instruction scheduling options should be used. */
158 unsigned long aarch64_tune_flags = 0;
160 /* Tuning parameters. */
162 #if HAVE_DESIGNATED_INITIALIZERS
163 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164 #else
165 #define NAMED_PARAM(NAME, VAL) (VAL)
166 #endif
168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169 __extension__
170 #endif
172 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
173 __extension__
174 #endif
175 static const struct cpu_addrcost_table generic_addrcost_table =
177 #if HAVE_DESIGNATED_INITIALIZERS
178 .addr_scale_costs =
179 #endif
181 NAMED_PARAM (qi, 0),
182 NAMED_PARAM (hi, 0),
183 NAMED_PARAM (si, 0),
184 NAMED_PARAM (ti, 0),
186 NAMED_PARAM (pre_modify, 0),
187 NAMED_PARAM (post_modify, 0),
188 NAMED_PARAM (register_offset, 0),
189 NAMED_PARAM (register_extend, 0),
190 NAMED_PARAM (imm_offset, 0)
193 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
194 __extension__
195 #endif
196 static const struct cpu_addrcost_table cortexa57_addrcost_table =
198 #if HAVE_DESIGNATED_INITIALIZERS
199 .addr_scale_costs =
200 #endif
202 NAMED_PARAM (qi, 0),
203 NAMED_PARAM (hi, 1),
204 NAMED_PARAM (si, 0),
205 NAMED_PARAM (ti, 1),
207 NAMED_PARAM (pre_modify, 0),
208 NAMED_PARAM (post_modify, 0),
209 NAMED_PARAM (register_offset, 0),
210 NAMED_PARAM (register_extend, 0),
211 NAMED_PARAM (imm_offset, 0),
214 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
215 __extension__
216 #endif
217 static const struct cpu_regmove_cost generic_regmove_cost =
219 NAMED_PARAM (GP2GP, 1),
220 NAMED_PARAM (GP2FP, 2),
221 NAMED_PARAM (FP2GP, 2),
222 /* We currently do not provide direct support for TFmode Q->Q move.
223 Therefore we need to raise the cost above 2 in order to have
224 reload handle the situation. */
225 NAMED_PARAM (FP2FP, 4)
228 /* Generic costs for vector insn classes. */
229 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
230 __extension__
231 #endif
232 static const struct cpu_vector_cost generic_vector_cost =
234 NAMED_PARAM (scalar_stmt_cost, 1),
235 NAMED_PARAM (scalar_load_cost, 1),
236 NAMED_PARAM (scalar_store_cost, 1),
237 NAMED_PARAM (vec_stmt_cost, 1),
238 NAMED_PARAM (vec_to_scalar_cost, 1),
239 NAMED_PARAM (scalar_to_vec_cost, 1),
240 NAMED_PARAM (vec_align_load_cost, 1),
241 NAMED_PARAM (vec_unalign_load_cost, 1),
242 NAMED_PARAM (vec_unalign_store_cost, 1),
243 NAMED_PARAM (vec_store_cost, 1),
244 NAMED_PARAM (cond_taken_branch_cost, 3),
245 NAMED_PARAM (cond_not_taken_branch_cost, 1)
248 /* Generic costs for vector insn classes. */
249 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
250 __extension__
251 #endif
252 static const struct cpu_vector_cost cortexa57_vector_cost =
254 NAMED_PARAM (scalar_stmt_cost, 1),
255 NAMED_PARAM (scalar_load_cost, 4),
256 NAMED_PARAM (scalar_store_cost, 1),
257 NAMED_PARAM (vec_stmt_cost, 3),
258 NAMED_PARAM (vec_to_scalar_cost, 8),
259 NAMED_PARAM (scalar_to_vec_cost, 8),
260 NAMED_PARAM (vec_align_load_cost, 5),
261 NAMED_PARAM (vec_unalign_load_cost, 5),
262 NAMED_PARAM (vec_unalign_store_cost, 1),
263 NAMED_PARAM (vec_store_cost, 1),
264 NAMED_PARAM (cond_taken_branch_cost, 1),
265 NAMED_PARAM (cond_not_taken_branch_cost, 1)
268 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
269 __extension__
270 #endif
271 static const struct tune_params generic_tunings =
273 &cortexa57_extra_costs,
274 &generic_addrcost_table,
275 &generic_regmove_cost,
276 &generic_vector_cost,
277 NAMED_PARAM (memmov_cost, 4),
278 NAMED_PARAM (issue_rate, 2)
281 static const struct tune_params cortexa53_tunings =
283 &cortexa53_extra_costs,
284 &generic_addrcost_table,
285 &generic_regmove_cost,
286 &generic_vector_cost,
287 NAMED_PARAM (memmov_cost, 4),
288 NAMED_PARAM (issue_rate, 2)
291 static const struct tune_params cortexa57_tunings =
293 &cortexa57_extra_costs,
294 &cortexa57_addrcost_table,
295 &generic_regmove_cost,
296 &cortexa57_vector_cost,
297 NAMED_PARAM (memmov_cost, 4),
298 NAMED_PARAM (issue_rate, 3)
301 /* A processor implementing AArch64. */
302 struct processor
304 const char *const name;
305 enum aarch64_processor core;
306 const char *arch;
307 const unsigned long flags;
308 const struct tune_params *const tune;
311 /* Processor cores implementing AArch64. */
312 static const struct processor all_cores[] =
314 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
315 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
316 #include "aarch64-cores.def"
317 #undef AARCH64_CORE
318 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
319 {NULL, aarch64_none, NULL, 0, NULL}
322 /* Architectures implementing AArch64. */
323 static const struct processor all_architectures[] =
325 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
326 {NAME, CORE, #ARCH, FLAGS, NULL},
327 #include "aarch64-arches.def"
328 #undef AARCH64_ARCH
329 {NULL, aarch64_none, NULL, 0, NULL}
332 /* Target specification. These are populated as commandline arguments
333 are processed, or NULL if not specified. */
334 static const struct processor *selected_arch;
335 static const struct processor *selected_cpu;
336 static const struct processor *selected_tune;
338 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
340 /* An ISA extension in the co-processor and main instruction set space. */
341 struct aarch64_option_extension
343 const char *const name;
344 const unsigned long flags_on;
345 const unsigned long flags_off;
348 /* ISA extensions in AArch64. */
349 static const struct aarch64_option_extension all_extensions[] =
351 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
352 {NAME, FLAGS_ON, FLAGS_OFF},
353 #include "aarch64-option-extensions.def"
354 #undef AARCH64_OPT_EXTENSION
355 {NULL, 0, 0}
358 /* Used to track the size of an address when generating a pre/post
359 increment address. */
360 static enum machine_mode aarch64_memory_reference_mode;
362 /* Used to force GTY into this file. */
363 static GTY(()) int gty_dummy;
365 /* A table of valid AArch64 "bitmask immediate" values for
366 logical instructions. */
368 #define AARCH64_NUM_BITMASKS 5334
369 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
371 typedef enum aarch64_cond_code
373 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
374 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
375 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
377 aarch64_cc;
379 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
381 /* The condition codes of the processor, and the inverse function. */
382 static const char * const aarch64_condition_codes[] =
384 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
385 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
388 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
389 unsigned
390 aarch64_dbx_register_number (unsigned regno)
392 if (GP_REGNUM_P (regno))
393 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
394 else if (regno == SP_REGNUM)
395 return AARCH64_DWARF_SP;
396 else if (FP_REGNUM_P (regno))
397 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
399 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
400 equivalent DWARF register. */
401 return DWARF_FRAME_REGISTERS;
404 /* Return TRUE if MODE is any of the large INT modes. */
405 static bool
406 aarch64_vect_struct_mode_p (enum machine_mode mode)
408 return mode == OImode || mode == CImode || mode == XImode;
411 /* Return TRUE if MODE is any of the vector modes. */
412 static bool
413 aarch64_vector_mode_p (enum machine_mode mode)
415 return aarch64_vector_mode_supported_p (mode)
416 || aarch64_vect_struct_mode_p (mode);
419 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420 static bool
421 aarch64_array_mode_supported_p (enum machine_mode mode,
422 unsigned HOST_WIDE_INT nelems)
424 if (TARGET_SIMD
425 && AARCH64_VALID_SIMD_QREG_MODE (mode)
426 && (nelems >= 2 && nelems <= 4))
427 return true;
429 return false;
432 /* Implement HARD_REGNO_NREGS. */
435 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
437 switch (aarch64_regno_regclass (regno))
439 case FP_REGS:
440 case FP_LO_REGS:
441 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
442 default:
443 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
445 gcc_unreachable ();
448 /* Implement HARD_REGNO_MODE_OK. */
451 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
453 if (GET_MODE_CLASS (mode) == MODE_CC)
454 return regno == CC_REGNUM;
456 if (regno == SP_REGNUM)
457 /* The purpose of comparing with ptr_mode is to support the
458 global register variable associated with the stack pointer
459 register via the syntax of asm ("wsp") in ILP32. */
460 return mode == Pmode || mode == ptr_mode;
462 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
463 return mode == Pmode;
465 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
466 return 1;
468 if (FP_REGNUM_P (regno))
470 if (aarch64_vect_struct_mode_p (mode))
471 return
472 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
473 else
474 return 1;
477 return 0;
480 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
481 enum machine_mode
482 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
483 enum machine_mode mode)
485 /* Handle modes that fit within single registers. */
486 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
488 if (GET_MODE_SIZE (mode) >= 4)
489 return mode;
490 else
491 return SImode;
493 /* Fall back to generic for multi-reg and very large modes. */
494 else
495 return choose_hard_reg_mode (regno, nregs, false);
498 /* Return true if calls to DECL should be treated as
499 long-calls (ie called via a register). */
500 static bool
501 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
503 return false;
506 /* Return true if calls to symbol-ref SYM should be treated as
507 long-calls (ie called via a register). */
508 bool
509 aarch64_is_long_call_p (rtx sym)
511 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
514 /* Return true if the offsets to a zero/sign-extract operation
515 represent an expression that matches an extend operation. The
516 operands represent the paramters from
518 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
519 bool
520 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
521 rtx extract_imm)
523 HOST_WIDE_INT mult_val, extract_val;
525 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
526 return false;
528 mult_val = INTVAL (mult_imm);
529 extract_val = INTVAL (extract_imm);
531 if (extract_val > 8
532 && extract_val < GET_MODE_BITSIZE (mode)
533 && exact_log2 (extract_val & ~7) > 0
534 && (extract_val & 7) <= 4
535 && mult_val == (1 << (extract_val & 7)))
536 return true;
538 return false;
541 /* Emit an insn that's a simple single-set. Both the operands must be
542 known to be valid. */
543 inline static rtx
544 emit_set_insn (rtx x, rtx y)
546 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
549 /* X and Y are two things to compare using CODE. Emit the compare insn and
550 return the rtx for register 0 in the proper mode. */
552 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
554 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
555 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
557 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
558 return cc_reg;
561 /* Build the SYMBOL_REF for __tls_get_addr. */
563 static GTY(()) rtx tls_get_addr_libfunc;
566 aarch64_tls_get_addr (void)
568 if (!tls_get_addr_libfunc)
569 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
570 return tls_get_addr_libfunc;
573 /* Return the TLS model to use for ADDR. */
575 static enum tls_model
576 tls_symbolic_operand_type (rtx addr)
578 enum tls_model tls_kind = TLS_MODEL_NONE;
579 rtx sym, addend;
581 if (GET_CODE (addr) == CONST)
583 split_const (addr, &sym, &addend);
584 if (GET_CODE (sym) == SYMBOL_REF)
585 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
587 else if (GET_CODE (addr) == SYMBOL_REF)
588 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
590 return tls_kind;
593 /* We'll allow lo_sum's in addresses in our legitimate addresses
594 so that combine would take care of combining addresses where
595 necessary, but for generation purposes, we'll generate the address
596 as :
597 RTL Absolute
598 tmp = hi (symbol_ref); adrp x1, foo
599 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
602 PIC TLS
603 adrp x1, :got:foo adrp tmp, :tlsgd:foo
604 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
605 bl __tls_get_addr
608 Load TLS symbol, depending on TLS mechanism and TLS access model.
610 Global Dynamic - Traditional TLS:
611 adrp tmp, :tlsgd:imm
612 add dest, tmp, #:tlsgd_lo12:imm
613 bl __tls_get_addr
615 Global Dynamic - TLS Descriptors:
616 adrp dest, :tlsdesc:imm
617 ldr tmp, [dest, #:tlsdesc_lo12:imm]
618 add dest, dest, #:tlsdesc_lo12:imm
619 blr tmp
620 mrs tp, tpidr_el0
621 add dest, dest, tp
623 Initial Exec:
624 mrs tp, tpidr_el0
625 adrp tmp, :gottprel:imm
626 ldr dest, [tmp, #:gottprel_lo12:imm]
627 add dest, dest, tp
629 Local Exec:
630 mrs tp, tpidr_el0
631 add t0, tp, #:tprel_hi12:imm
632 add t0, #:tprel_lo12_nc:imm
635 static void
636 aarch64_load_symref_appropriately (rtx dest, rtx imm,
637 enum aarch64_symbol_type type)
639 switch (type)
641 case SYMBOL_SMALL_ABSOLUTE:
643 /* In ILP32, the mode of dest can be either SImode or DImode. */
644 rtx tmp_reg = dest;
645 enum machine_mode mode = GET_MODE (dest);
647 gcc_assert (mode == Pmode || mode == ptr_mode);
649 if (can_create_pseudo_p ())
650 tmp_reg = gen_reg_rtx (mode);
652 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
653 emit_insn (gen_add_losym (dest, tmp_reg, imm));
654 return;
657 case SYMBOL_TINY_ABSOLUTE:
658 emit_insn (gen_rtx_SET (Pmode, dest, imm));
659 return;
661 case SYMBOL_SMALL_GOT:
663 /* In ILP32, the mode of dest can be either SImode or DImode,
664 while the got entry is always of SImode size. The mode of
665 dest depends on how dest is used: if dest is assigned to a
666 pointer (e.g. in the memory), it has SImode; it may have
667 DImode if dest is dereferenced to access the memeory.
668 This is why we have to handle three different ldr_got_small
669 patterns here (two patterns for ILP32). */
670 rtx tmp_reg = dest;
671 enum machine_mode mode = GET_MODE (dest);
673 if (can_create_pseudo_p ())
674 tmp_reg = gen_reg_rtx (mode);
676 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
677 if (mode == ptr_mode)
679 if (mode == DImode)
680 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
681 else
682 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
684 else
686 gcc_assert (mode == Pmode);
687 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
690 return;
693 case SYMBOL_SMALL_TLSGD:
695 rtx insns;
696 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
698 start_sequence ();
699 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
700 insns = get_insns ();
701 end_sequence ();
703 RTL_CONST_CALL_P (insns) = 1;
704 emit_libcall_block (insns, dest, result, imm);
705 return;
708 case SYMBOL_SMALL_TLSDESC:
710 enum machine_mode mode = GET_MODE (dest);
711 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
712 rtx tp;
714 gcc_assert (mode == Pmode || mode == ptr_mode);
716 /* In ILP32, the got entry is always of SImode size. Unlike
717 small GOT, the dest is fixed at reg 0. */
718 if (TARGET_ILP32)
719 emit_insn (gen_tlsdesc_small_si (imm));
720 else
721 emit_insn (gen_tlsdesc_small_di (imm));
722 tp = aarch64_load_tp (NULL);
724 if (mode != Pmode)
725 tp = gen_lowpart (mode, tp);
727 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
728 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
729 return;
732 case SYMBOL_SMALL_GOTTPREL:
734 /* In ILP32, the mode of dest can be either SImode or DImode,
735 while the got entry is always of SImode size. The mode of
736 dest depends on how dest is used: if dest is assigned to a
737 pointer (e.g. in the memory), it has SImode; it may have
738 DImode if dest is dereferenced to access the memeory.
739 This is why we have to handle three different tlsie_small
740 patterns here (two patterns for ILP32). */
741 enum machine_mode mode = GET_MODE (dest);
742 rtx tmp_reg = gen_reg_rtx (mode);
743 rtx tp = aarch64_load_tp (NULL);
745 if (mode == ptr_mode)
747 if (mode == DImode)
748 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
749 else
751 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
752 tp = gen_lowpart (mode, tp);
755 else
757 gcc_assert (mode == Pmode);
758 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
761 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
762 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
763 return;
766 case SYMBOL_SMALL_TPREL:
768 rtx tp = aarch64_load_tp (NULL);
769 emit_insn (gen_tlsle_small (dest, tp, imm));
770 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
771 return;
774 case SYMBOL_TINY_GOT:
775 emit_insn (gen_ldr_got_tiny (dest, imm));
776 return;
778 default:
779 gcc_unreachable ();
783 /* Emit a move from SRC to DEST. Assume that the move expanders can
784 handle all moves if !can_create_pseudo_p (). The distinction is
785 important because, unlike emit_move_insn, the move expanders know
786 how to force Pmode objects into the constant pool even when the
787 constant pool address is not itself legitimate. */
788 static rtx
789 aarch64_emit_move (rtx dest, rtx src)
791 return (can_create_pseudo_p ()
792 ? emit_move_insn (dest, src)
793 : emit_move_insn_1 (dest, src));
796 /* Split a 128-bit move operation into two 64-bit move operations,
797 taking care to handle partial overlap of register to register
798 copies. Special cases are needed when moving between GP regs and
799 FP regs. SRC can be a register, constant or memory; DST a register
800 or memory. If either operand is memory it must not have any side
801 effects. */
802 void
803 aarch64_split_128bit_move (rtx dst, rtx src)
805 rtx dst_lo, dst_hi;
806 rtx src_lo, src_hi;
808 enum machine_mode mode = GET_MODE (dst);
810 gcc_assert (mode == TImode || mode == TFmode);
811 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
812 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
814 if (REG_P (dst) && REG_P (src))
816 int src_regno = REGNO (src);
817 int dst_regno = REGNO (dst);
819 /* Handle FP <-> GP regs. */
820 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
822 src_lo = gen_lowpart (word_mode, src);
823 src_hi = gen_highpart (word_mode, src);
825 if (mode == TImode)
827 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
828 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
830 else
832 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
833 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
835 return;
837 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
839 dst_lo = gen_lowpart (word_mode, dst);
840 dst_hi = gen_highpart (word_mode, dst);
842 if (mode == TImode)
844 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
845 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
847 else
849 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
850 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
852 return;
856 dst_lo = gen_lowpart (word_mode, dst);
857 dst_hi = gen_highpart (word_mode, dst);
858 src_lo = gen_lowpart (word_mode, src);
859 src_hi = gen_highpart_mode (word_mode, mode, src);
861 /* At most one pairing may overlap. */
862 if (reg_overlap_mentioned_p (dst_lo, src_hi))
864 aarch64_emit_move (dst_hi, src_hi);
865 aarch64_emit_move (dst_lo, src_lo);
867 else
869 aarch64_emit_move (dst_lo, src_lo);
870 aarch64_emit_move (dst_hi, src_hi);
874 bool
875 aarch64_split_128bit_move_p (rtx dst, rtx src)
877 return (! REG_P (src)
878 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
881 /* Split a complex SIMD combine. */
883 void
884 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
886 enum machine_mode src_mode = GET_MODE (src1);
887 enum machine_mode dst_mode = GET_MODE (dst);
889 gcc_assert (VECTOR_MODE_P (dst_mode));
891 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
893 rtx (*gen) (rtx, rtx, rtx);
895 switch (src_mode)
897 case V8QImode:
898 gen = gen_aarch64_simd_combinev8qi;
899 break;
900 case V4HImode:
901 gen = gen_aarch64_simd_combinev4hi;
902 break;
903 case V2SImode:
904 gen = gen_aarch64_simd_combinev2si;
905 break;
906 case V2SFmode:
907 gen = gen_aarch64_simd_combinev2sf;
908 break;
909 case DImode:
910 gen = gen_aarch64_simd_combinedi;
911 break;
912 case DFmode:
913 gen = gen_aarch64_simd_combinedf;
914 break;
915 default:
916 gcc_unreachable ();
919 emit_insn (gen (dst, src1, src2));
920 return;
924 /* Split a complex SIMD move. */
926 void
927 aarch64_split_simd_move (rtx dst, rtx src)
929 enum machine_mode src_mode = GET_MODE (src);
930 enum machine_mode dst_mode = GET_MODE (dst);
932 gcc_assert (VECTOR_MODE_P (dst_mode));
934 if (REG_P (dst) && REG_P (src))
936 rtx (*gen) (rtx, rtx);
938 gcc_assert (VECTOR_MODE_P (src_mode));
940 switch (src_mode)
942 case V16QImode:
943 gen = gen_aarch64_split_simd_movv16qi;
944 break;
945 case V8HImode:
946 gen = gen_aarch64_split_simd_movv8hi;
947 break;
948 case V4SImode:
949 gen = gen_aarch64_split_simd_movv4si;
950 break;
951 case V2DImode:
952 gen = gen_aarch64_split_simd_movv2di;
953 break;
954 case V4SFmode:
955 gen = gen_aarch64_split_simd_movv4sf;
956 break;
957 case V2DFmode:
958 gen = gen_aarch64_split_simd_movv2df;
959 break;
960 default:
961 gcc_unreachable ();
964 emit_insn (gen (dst, src));
965 return;
969 static rtx
970 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
972 if (can_create_pseudo_p ())
973 return force_reg (mode, value);
974 else
976 x = aarch64_emit_move (x, value);
977 return x;
982 static rtx
983 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
985 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
987 rtx high;
988 /* Load the full offset into a register. This
989 might be improvable in the future. */
990 high = GEN_INT (offset);
991 offset = 0;
992 high = aarch64_force_temporary (mode, temp, high);
993 reg = aarch64_force_temporary (mode, temp,
994 gen_rtx_PLUS (mode, high, reg));
996 return plus_constant (mode, reg, offset);
999 void
1000 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1002 enum machine_mode mode = GET_MODE (dest);
1003 unsigned HOST_WIDE_INT mask;
1004 int i;
1005 bool first;
1006 unsigned HOST_WIDE_INT val;
1007 bool subtargets;
1008 rtx subtarget;
1009 int one_match, zero_match;
1011 gcc_assert (mode == SImode || mode == DImode);
1013 /* Check on what type of symbol it is. */
1014 if (GET_CODE (imm) == SYMBOL_REF
1015 || GET_CODE (imm) == LABEL_REF
1016 || GET_CODE (imm) == CONST)
1018 rtx mem, base, offset;
1019 enum aarch64_symbol_type sty;
1021 /* If we have (const (plus symbol offset)), separate out the offset
1022 before we start classifying the symbol. */
1023 split_const (imm, &base, &offset);
1025 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1026 switch (sty)
1028 case SYMBOL_FORCE_TO_MEM:
1029 if (offset != const0_rtx
1030 && targetm.cannot_force_const_mem (mode, imm))
1032 gcc_assert (can_create_pseudo_p ());
1033 base = aarch64_force_temporary (mode, dest, base);
1034 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1035 aarch64_emit_move (dest, base);
1036 return;
1038 mem = force_const_mem (ptr_mode, imm);
1039 gcc_assert (mem);
1040 if (mode != ptr_mode)
1041 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1042 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1043 return;
1045 case SYMBOL_SMALL_TLSGD:
1046 case SYMBOL_SMALL_TLSDESC:
1047 case SYMBOL_SMALL_GOTTPREL:
1048 case SYMBOL_SMALL_GOT:
1049 case SYMBOL_TINY_GOT:
1050 if (offset != const0_rtx)
1052 gcc_assert(can_create_pseudo_p ());
1053 base = aarch64_force_temporary (mode, dest, base);
1054 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1055 aarch64_emit_move (dest, base);
1056 return;
1058 /* FALLTHRU */
1060 case SYMBOL_SMALL_TPREL:
1061 case SYMBOL_SMALL_ABSOLUTE:
1062 case SYMBOL_TINY_ABSOLUTE:
1063 aarch64_load_symref_appropriately (dest, imm, sty);
1064 return;
1066 default:
1067 gcc_unreachable ();
1071 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1073 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1074 return;
1077 if (!CONST_INT_P (imm))
1079 if (GET_CODE (imm) == HIGH)
1080 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1081 else
1083 rtx mem = force_const_mem (mode, imm);
1084 gcc_assert (mem);
1085 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1088 return;
1091 if (mode == SImode)
1093 /* We know we can't do this in 1 insn, and we must be able to do it
1094 in two; so don't mess around looking for sequences that don't buy
1095 us anything. */
1096 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1097 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1098 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1099 return;
1102 /* Remaining cases are all for DImode. */
1104 val = INTVAL (imm);
1105 subtargets = optimize && can_create_pseudo_p ();
1107 one_match = 0;
1108 zero_match = 0;
1109 mask = 0xffff;
1111 for (i = 0; i < 64; i += 16, mask <<= 16)
1113 if ((val & mask) == 0)
1114 zero_match++;
1115 else if ((val & mask) == mask)
1116 one_match++;
1119 if (one_match == 2)
1121 mask = 0xffff;
1122 for (i = 0; i < 64; i += 16, mask <<= 16)
1124 if ((val & mask) != mask)
1126 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1127 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1128 GEN_INT ((val >> i) & 0xffff)));
1129 return;
1132 gcc_unreachable ();
1135 if (zero_match == 2)
1136 goto simple_sequence;
1138 mask = 0x0ffff0000UL;
1139 for (i = 16; i < 64; i += 16, mask <<= 16)
1141 HOST_WIDE_INT comp = mask & ~(mask - 1);
1143 if (aarch64_uimm12_shift (val - (val & mask)))
1145 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1147 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1148 emit_insn (gen_adddi3 (dest, subtarget,
1149 GEN_INT (val - (val & mask))));
1150 return;
1152 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1154 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1156 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1157 GEN_INT ((val + comp) & mask)));
1158 emit_insn (gen_adddi3 (dest, subtarget,
1159 GEN_INT (val - ((val + comp) & mask))));
1160 return;
1162 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1164 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1166 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1167 GEN_INT ((val - comp) | ~mask)));
1168 emit_insn (gen_adddi3 (dest, subtarget,
1169 GEN_INT (val - ((val - comp) | ~mask))));
1170 return;
1172 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1174 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1176 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1177 GEN_INT (val | ~mask)));
1178 emit_insn (gen_adddi3 (dest, subtarget,
1179 GEN_INT (val - (val | ~mask))));
1180 return;
1184 /* See if we can do it by arithmetically combining two
1185 immediates. */
1186 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1188 int j;
1189 mask = 0xffff;
1191 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1192 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1194 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1195 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1196 GEN_INT (aarch64_bitmasks[i])));
1197 emit_insn (gen_adddi3 (dest, subtarget,
1198 GEN_INT (val - aarch64_bitmasks[i])));
1199 return;
1202 for (j = 0; j < 64; j += 16, mask <<= 16)
1204 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1206 emit_insn (gen_rtx_SET (VOIDmode, dest,
1207 GEN_INT (aarch64_bitmasks[i])));
1208 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1209 GEN_INT ((val >> j) & 0xffff)));
1210 return;
1215 /* See if we can do it by logically combining two immediates. */
1216 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1218 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1220 int j;
1222 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1223 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1225 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1226 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1227 GEN_INT (aarch64_bitmasks[i])));
1228 emit_insn (gen_iordi3 (dest, subtarget,
1229 GEN_INT (aarch64_bitmasks[j])));
1230 return;
1233 else if ((val & aarch64_bitmasks[i]) == val)
1235 int j;
1237 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1238 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1241 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1242 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1243 GEN_INT (aarch64_bitmasks[j])));
1244 emit_insn (gen_anddi3 (dest, subtarget,
1245 GEN_INT (aarch64_bitmasks[i])));
1246 return;
1251 simple_sequence:
1252 first = true;
1253 mask = 0xffff;
1254 for (i = 0; i < 64; i += 16, mask <<= 16)
1256 if ((val & mask) != 0)
1258 if (first)
1260 emit_insn (gen_rtx_SET (VOIDmode, dest,
1261 GEN_INT (val & mask)));
1262 first = false;
1264 else
1265 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1266 GEN_INT ((val >> i) & 0xffff)));
1271 static bool
1272 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1273 tree exp ATTRIBUTE_UNUSED)
1275 /* Currently, always true. */
1276 return true;
1279 /* Implement TARGET_PASS_BY_REFERENCE. */
1281 static bool
1282 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1283 enum machine_mode mode,
1284 const_tree type,
1285 bool named ATTRIBUTE_UNUSED)
1287 HOST_WIDE_INT size;
1288 enum machine_mode dummymode;
1289 int nregs;
1291 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1292 size = (mode == BLKmode && type)
1293 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1295 /* Aggregates are passed by reference based on their size. */
1296 if (type && AGGREGATE_TYPE_P (type))
1298 size = int_size_in_bytes (type);
1301 /* Variable sized arguments are always returned by reference. */
1302 if (size < 0)
1303 return true;
1305 /* Can this be a candidate to be passed in fp/simd register(s)? */
1306 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1307 &dummymode, &nregs,
1308 NULL))
1309 return false;
1311 /* Arguments which are variable sized or larger than 2 registers are
1312 passed by reference unless they are a homogenous floating point
1313 aggregate. */
1314 return size > 2 * UNITS_PER_WORD;
1317 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1318 static bool
1319 aarch64_return_in_msb (const_tree valtype)
1321 enum machine_mode dummy_mode;
1322 int dummy_int;
1324 /* Never happens in little-endian mode. */
1325 if (!BYTES_BIG_ENDIAN)
1326 return false;
1328 /* Only composite types smaller than or equal to 16 bytes can
1329 be potentially returned in registers. */
1330 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1331 || int_size_in_bytes (valtype) <= 0
1332 || int_size_in_bytes (valtype) > 16)
1333 return false;
1335 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1336 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1337 is always passed/returned in the least significant bits of fp/simd
1338 register(s). */
1339 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1340 &dummy_mode, &dummy_int, NULL))
1341 return false;
1343 return true;
1346 /* Implement TARGET_FUNCTION_VALUE.
1347 Define how to find the value returned by a function. */
1349 static rtx
1350 aarch64_function_value (const_tree type, const_tree func,
1351 bool outgoing ATTRIBUTE_UNUSED)
1353 enum machine_mode mode;
1354 int unsignedp;
1355 int count;
1356 enum machine_mode ag_mode;
1358 mode = TYPE_MODE (type);
1359 if (INTEGRAL_TYPE_P (type))
1360 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1362 if (aarch64_return_in_msb (type))
1364 HOST_WIDE_INT size = int_size_in_bytes (type);
1366 if (size % UNITS_PER_WORD != 0)
1368 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1369 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1373 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1374 &ag_mode, &count, NULL))
1376 if (!aarch64_composite_type_p (type, mode))
1378 gcc_assert (count == 1 && mode == ag_mode);
1379 return gen_rtx_REG (mode, V0_REGNUM);
1381 else
1383 int i;
1384 rtx par;
1386 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1387 for (i = 0; i < count; i++)
1389 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1390 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1391 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1392 XVECEXP (par, 0, i) = tmp;
1394 return par;
1397 else
1398 return gen_rtx_REG (mode, R0_REGNUM);
1401 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1402 Return true if REGNO is the number of a hard register in which the values
1403 of called function may come back. */
1405 static bool
1406 aarch64_function_value_regno_p (const unsigned int regno)
1408 /* Maximum of 16 bytes can be returned in the general registers. Examples
1409 of 16-byte return values are: 128-bit integers and 16-byte small
1410 structures (excluding homogeneous floating-point aggregates). */
1411 if (regno == R0_REGNUM || regno == R1_REGNUM)
1412 return true;
1414 /* Up to four fp/simd registers can return a function value, e.g. a
1415 homogeneous floating-point aggregate having four members. */
1416 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1417 return !TARGET_GENERAL_REGS_ONLY;
1419 return false;
1422 /* Implement TARGET_RETURN_IN_MEMORY.
1424 If the type T of the result of a function is such that
1425 void func (T arg)
1426 would require that arg be passed as a value in a register (or set of
1427 registers) according to the parameter passing rules, then the result
1428 is returned in the same registers as would be used for such an
1429 argument. */
1431 static bool
1432 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1434 HOST_WIDE_INT size;
1435 enum machine_mode ag_mode;
1436 int count;
1438 if (!AGGREGATE_TYPE_P (type)
1439 && TREE_CODE (type) != COMPLEX_TYPE
1440 && TREE_CODE (type) != VECTOR_TYPE)
1441 /* Simple scalar types always returned in registers. */
1442 return false;
1444 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1445 type,
1446 &ag_mode,
1447 &count,
1448 NULL))
1449 return false;
1451 /* Types larger than 2 registers returned in memory. */
1452 size = int_size_in_bytes (type);
1453 return (size < 0 || size > 2 * UNITS_PER_WORD);
1456 static bool
1457 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1458 const_tree type, int *nregs)
1460 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1461 return aarch64_vfp_is_call_or_return_candidate (mode,
1462 type,
1463 &pcum->aapcs_vfp_rmode,
1464 nregs,
1465 NULL);
1468 /* Given MODE and TYPE of a function argument, return the alignment in
1469 bits. The idea is to suppress any stronger alignment requested by
1470 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1471 This is a helper function for local use only. */
1473 static unsigned int
1474 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1476 unsigned int alignment;
1478 if (type)
1480 if (!integer_zerop (TYPE_SIZE (type)))
1482 if (TYPE_MODE (type) == mode)
1483 alignment = TYPE_ALIGN (type);
1484 else
1485 alignment = GET_MODE_ALIGNMENT (mode);
1487 else
1488 alignment = 0;
1490 else
1491 alignment = GET_MODE_ALIGNMENT (mode);
1493 return alignment;
1496 /* Layout a function argument according to the AAPCS64 rules. The rule
1497 numbers refer to the rule numbers in the AAPCS64. */
1499 static void
1500 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1501 const_tree type,
1502 bool named ATTRIBUTE_UNUSED)
1504 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1505 int ncrn, nvrn, nregs;
1506 bool allocate_ncrn, allocate_nvrn;
1507 HOST_WIDE_INT size;
1509 /* We need to do this once per argument. */
1510 if (pcum->aapcs_arg_processed)
1511 return;
1513 pcum->aapcs_arg_processed = true;
1515 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1516 size
1517 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1518 UNITS_PER_WORD);
1520 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1521 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1522 mode,
1523 type,
1524 &nregs);
1526 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1527 The following code thus handles passing by SIMD/FP registers first. */
1529 nvrn = pcum->aapcs_nvrn;
1531 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1532 and homogenous short-vector aggregates (HVA). */
1533 if (allocate_nvrn)
1535 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1537 pcum->aapcs_nextnvrn = nvrn + nregs;
1538 if (!aarch64_composite_type_p (type, mode))
1540 gcc_assert (nregs == 1);
1541 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1543 else
1545 rtx par;
1546 int i;
1547 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1548 for (i = 0; i < nregs; i++)
1550 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1551 V0_REGNUM + nvrn + i);
1552 tmp = gen_rtx_EXPR_LIST
1553 (VOIDmode, tmp,
1554 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1555 XVECEXP (par, 0, i) = tmp;
1557 pcum->aapcs_reg = par;
1559 return;
1561 else
1563 /* C.3 NSRN is set to 8. */
1564 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1565 goto on_stack;
1569 ncrn = pcum->aapcs_ncrn;
1570 nregs = size / UNITS_PER_WORD;
1572 /* C6 - C9. though the sign and zero extension semantics are
1573 handled elsewhere. This is the case where the argument fits
1574 entirely general registers. */
1575 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1577 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1579 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1581 /* C.8 if the argument has an alignment of 16 then the NGRN is
1582 rounded up to the next even number. */
1583 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1585 ++ncrn;
1586 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1588 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1589 A reg is still generated for it, but the caller should be smart
1590 enough not to use it. */
1591 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1593 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1595 else
1597 rtx par;
1598 int i;
1600 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1601 for (i = 0; i < nregs; i++)
1603 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1604 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1605 GEN_INT (i * UNITS_PER_WORD));
1606 XVECEXP (par, 0, i) = tmp;
1608 pcum->aapcs_reg = par;
1611 pcum->aapcs_nextncrn = ncrn + nregs;
1612 return;
1615 /* C.11 */
1616 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1618 /* The argument is passed on stack; record the needed number of words for
1619 this argument and align the total size if necessary. */
1620 on_stack:
1621 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1622 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1623 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1624 16 / UNITS_PER_WORD);
1625 return;
1628 /* Implement TARGET_FUNCTION_ARG. */
1630 static rtx
1631 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1632 const_tree type, bool named)
1634 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1635 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1637 if (mode == VOIDmode)
1638 return NULL_RTX;
1640 aarch64_layout_arg (pcum_v, mode, type, named);
1641 return pcum->aapcs_reg;
1644 void
1645 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1646 const_tree fntype ATTRIBUTE_UNUSED,
1647 rtx libname ATTRIBUTE_UNUSED,
1648 const_tree fndecl ATTRIBUTE_UNUSED,
1649 unsigned n_named ATTRIBUTE_UNUSED)
1651 pcum->aapcs_ncrn = 0;
1652 pcum->aapcs_nvrn = 0;
1653 pcum->aapcs_nextncrn = 0;
1654 pcum->aapcs_nextnvrn = 0;
1655 pcum->pcs_variant = ARM_PCS_AAPCS64;
1656 pcum->aapcs_reg = NULL_RTX;
1657 pcum->aapcs_arg_processed = false;
1658 pcum->aapcs_stack_words = 0;
1659 pcum->aapcs_stack_size = 0;
1661 return;
1664 static void
1665 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1666 enum machine_mode mode,
1667 const_tree type,
1668 bool named)
1670 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1671 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1673 aarch64_layout_arg (pcum_v, mode, type, named);
1674 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1675 != (pcum->aapcs_stack_words != 0));
1676 pcum->aapcs_arg_processed = false;
1677 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1678 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1679 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1680 pcum->aapcs_stack_words = 0;
1681 pcum->aapcs_reg = NULL_RTX;
1685 bool
1686 aarch64_function_arg_regno_p (unsigned regno)
1688 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1689 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1692 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1693 PARM_BOUNDARY bits of alignment, but will be given anything up
1694 to STACK_BOUNDARY bits if the type requires it. This makes sure
1695 that both before and after the layout of each argument, the Next
1696 Stacked Argument Address (NSAA) will have a minimum alignment of
1697 8 bytes. */
1699 static unsigned int
1700 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1702 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1704 if (alignment < PARM_BOUNDARY)
1705 alignment = PARM_BOUNDARY;
1706 if (alignment > STACK_BOUNDARY)
1707 alignment = STACK_BOUNDARY;
1708 return alignment;
1711 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1713 Return true if an argument passed on the stack should be padded upwards,
1714 i.e. if the least-significant byte of the stack slot has useful data.
1716 Small aggregate types are placed in the lowest memory address.
1718 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1720 bool
1721 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1723 /* On little-endian targets, the least significant byte of every stack
1724 argument is passed at the lowest byte address of the stack slot. */
1725 if (!BYTES_BIG_ENDIAN)
1726 return true;
1728 /* Otherwise, integral, floating-point and pointer types are padded downward:
1729 the least significant byte of a stack argument is passed at the highest
1730 byte address of the stack slot. */
1731 if (type
1732 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1733 || POINTER_TYPE_P (type))
1734 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1735 return false;
1737 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1738 return true;
1741 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1743 It specifies padding for the last (may also be the only)
1744 element of a block move between registers and memory. If
1745 assuming the block is in the memory, padding upward means that
1746 the last element is padded after its highest significant byte,
1747 while in downward padding, the last element is padded at the
1748 its least significant byte side.
1750 Small aggregates and small complex types are always padded
1751 upwards.
1753 We don't need to worry about homogeneous floating-point or
1754 short-vector aggregates; their move is not affected by the
1755 padding direction determined here. Regardless of endianness,
1756 each element of such an aggregate is put in the least
1757 significant bits of a fp/simd register.
1759 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1760 register has useful data, and return the opposite if the most
1761 significant byte does. */
1763 bool
1764 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1765 bool first ATTRIBUTE_UNUSED)
1768 /* Small composite types are always padded upward. */
1769 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1771 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1772 : GET_MODE_SIZE (mode));
1773 if (size < 2 * UNITS_PER_WORD)
1774 return true;
1777 /* Otherwise, use the default padding. */
1778 return !BYTES_BIG_ENDIAN;
1781 static enum machine_mode
1782 aarch64_libgcc_cmp_return_mode (void)
1784 return SImode;
1787 static bool
1788 aarch64_frame_pointer_required (void)
1790 /* If the function contains dynamic stack allocations, we need to
1791 use the frame pointer to access the static parts of the frame. */
1792 if (cfun->calls_alloca)
1793 return true;
1795 /* In aarch64_override_options_after_change
1796 flag_omit_leaf_frame_pointer turns off the frame pointer by
1797 default. Turn it back on now if we've not got a leaf
1798 function. */
1799 if (flag_omit_leaf_frame_pointer
1800 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1801 return true;
1803 return false;
1806 /* Mark the registers that need to be saved by the callee and calculate
1807 the size of the callee-saved registers area and frame record (both FP
1808 and LR may be omitted). */
1809 static void
1810 aarch64_layout_frame (void)
1812 HOST_WIDE_INT offset = 0;
1813 int regno;
1815 if (reload_completed && cfun->machine->frame.laid_out)
1816 return;
1818 #define SLOT_NOT_REQUIRED (-2)
1819 #define SLOT_REQUIRED (-1)
1821 /* First mark all the registers that really need to be saved... */
1822 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1823 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1825 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1826 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1828 /* ... that includes the eh data registers (if needed)... */
1829 if (crtl->calls_eh_return)
1830 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1831 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1832 = SLOT_REQUIRED;
1834 /* ... and any callee saved register that dataflow says is live. */
1835 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1836 if (df_regs_ever_live_p (regno)
1837 && !call_used_regs[regno])
1838 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1840 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1841 if (df_regs_ever_live_p (regno)
1842 && !call_used_regs[regno])
1843 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1845 if (frame_pointer_needed)
1847 /* FP and LR are placed in the linkage record. */
1848 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1849 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
1850 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1851 offset += 2 * UNITS_PER_WORD;
1854 /* Now assign stack slots for them. */
1855 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1856 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1858 cfun->machine->frame.reg_offset[regno] = offset;
1859 offset += UNITS_PER_WORD;
1862 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1863 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1865 cfun->machine->frame.reg_offset[regno] = offset;
1866 offset += UNITS_PER_WORD;
1869 cfun->machine->frame.padding0 =
1870 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1871 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1873 cfun->machine->frame.saved_regs_size = offset;
1875 cfun->machine->frame.hard_fp_offset
1876 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1877 + get_frame_size ()
1878 + cfun->machine->frame.saved_regs_size,
1879 STACK_BOUNDARY / BITS_PER_UNIT);
1881 cfun->machine->frame.frame_size
1882 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1883 + crtl->outgoing_args_size,
1884 STACK_BOUNDARY / BITS_PER_UNIT);
1886 cfun->machine->frame.laid_out = true;
1889 /* Make the last instruction frame-related and note that it performs
1890 the operation described by FRAME_PATTERN. */
1892 static void
1893 aarch64_set_frame_expr (rtx frame_pattern)
1895 rtx insn;
1897 insn = get_last_insn ();
1898 RTX_FRAME_RELATED_P (insn) = 1;
1899 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1900 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1901 frame_pattern,
1902 REG_NOTES (insn));
1905 static bool
1906 aarch64_register_saved_on_entry (int regno)
1908 return cfun->machine->frame.reg_offset[regno] >= 0;
1911 static unsigned
1912 aarch64_next_callee_save (unsigned regno, unsigned limit)
1914 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1915 regno ++;
1916 return regno;
1919 static rtx
1920 aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
1921 rtx reg2)
1923 switch (mode)
1925 case DImode:
1926 return gen_store_pairdi (mem1, reg1, mem2, reg2);
1928 case DFmode:
1929 return gen_store_pairdf (mem1, reg1, mem2, reg2);
1931 default:
1932 gcc_unreachable ();
1936 static rtx
1937 aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
1938 rtx mem2)
1940 switch (mode)
1942 case DImode:
1943 return gen_load_pairdi (reg1, mem1, reg2, mem2);
1945 case DFmode:
1946 return gen_load_pairdf (reg1, mem1, reg2, mem2);
1948 default:
1949 gcc_unreachable ();
1953 static void
1954 aarch64_save_or_restore_fprs (HOST_WIDE_INT start_offset, bool restore)
1956 unsigned regno;
1957 unsigned regno2;
1958 rtx insn;
1959 rtx (*gen_mem_ref) (enum machine_mode, rtx)
1960 = frame_pointer_needed ? gen_frame_mem : gen_rtx_MEM;
1963 for (regno = aarch64_next_callee_save (V0_REGNUM, V31_REGNUM);
1964 regno <= V31_REGNUM;
1965 regno = aarch64_next_callee_save (regno + 1, V31_REGNUM))
1967 rtx reg = gen_rtx_REG (DFmode, regno);
1968 rtx mem;
1970 HOST_WIDE_INT offset = start_offset
1971 + cfun->machine->frame.reg_offset[regno];
1972 mem = gen_mem_ref (DFmode, plus_constant (Pmode, stack_pointer_rtx,
1973 offset));
1975 regno2 = aarch64_next_callee_save (regno + 1, V31_REGNUM);
1977 if (regno2 <= V31_REGNUM)
1979 rtx reg2 = gen_rtx_REG (DFmode, regno2);
1980 rtx mem2;
1982 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
1983 mem2 = gen_mem_ref (DFmode,
1984 plus_constant (Pmode, stack_pointer_rtx, offset));
1985 if (restore == false)
1986 insn = emit_insn (aarch64_gen_store_pair (DFmode, mem, reg, mem2, reg2));
1987 else
1989 insn = emit_insn (aarch64_gen_load_pair (DFmode, reg, mem, reg2, mem2));
1990 add_reg_note (insn, REG_CFA_RESTORE, reg);
1991 add_reg_note (insn, REG_CFA_RESTORE, reg2);
1994 /* The first part of a frame-related parallel insn is
1995 always assumed to be relevant to the frame
1996 calculations; subsequent parts, are only
1997 frame-related if explicitly marked. */
1998 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1999 regno = regno2;
2001 else
2003 if (restore == false)
2004 insn = emit_move_insn (mem, reg);
2005 else
2007 insn = emit_move_insn (reg, mem);
2008 add_reg_note (insn, REG_CFA_RESTORE, reg);
2011 RTX_FRAME_RELATED_P (insn) = 1;
2016 /* offset from the stack pointer of where the saves and
2017 restore's have to happen. */
2018 static void
2019 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT start_offset,
2020 bool restore)
2022 rtx insn;
2023 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2024 ? gen_frame_mem : gen_rtx_MEM);
2025 unsigned limit = frame_pointer_needed ? R28_REGNUM : R30_REGNUM;
2026 unsigned regno;
2027 unsigned regno2;
2029 for (regno = aarch64_next_callee_save (R0_REGNUM, limit);
2030 regno <= limit;
2031 regno = aarch64_next_callee_save (regno + 1, limit))
2033 rtx reg = gen_rtx_REG (DImode, regno);
2034 rtx mem;
2036 HOST_WIDE_INT offset = start_offset
2037 + cfun->machine->frame.reg_offset[regno];
2038 mem = gen_mem_ref (Pmode, plus_constant (Pmode, stack_pointer_rtx,
2039 offset));
2041 regno2 = aarch64_next_callee_save (regno + 1, limit);
2043 if (regno2 <= limit
2044 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2045 == cfun->machine->frame.reg_offset[regno2]))
2048 rtx reg2 = gen_rtx_REG (DImode, regno2);
2049 rtx mem2;
2051 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2052 mem2 = gen_mem_ref (Pmode,
2053 plus_constant (Pmode, stack_pointer_rtx, offset));
2054 if (restore == false)
2055 insn = emit_insn (aarch64_gen_store_pair (DImode, mem, reg, mem2, reg2));
2056 else
2058 insn = emit_insn (aarch64_gen_load_pair (DImode, reg, mem, reg2, mem2));
2059 add_reg_note (insn, REG_CFA_RESTORE, reg);
2060 add_reg_note (insn, REG_CFA_RESTORE, reg2);
2063 /* The first part of a frame-related parallel insn is
2064 always assumed to be relevant to the frame
2065 calculations; subsequent parts, are only
2066 frame-related if explicitly marked. */
2067 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2068 regno = regno2;
2070 else
2072 if (restore == false)
2073 insn = emit_move_insn (mem, reg);
2074 else
2076 insn = emit_move_insn (reg, mem);
2077 add_reg_note (insn, REG_CFA_RESTORE, reg);
2080 RTX_FRAME_RELATED_P (insn) = 1;
2082 aarch64_save_or_restore_fprs (start_offset, restore);
2085 /* AArch64 stack frames generated by this compiler look like:
2087 +-------------------------------+
2089 | incoming stack arguments |
2091 +-------------------------------+
2092 | | <-- incoming stack pointer (aligned)
2093 | callee-allocated save area |
2094 | for register varargs |
2096 +-------------------------------+
2097 | local variables | <-- frame_pointer_rtx
2099 +-------------------------------+
2100 | padding0 | \
2101 +-------------------------------+ |
2102 | callee-saved registers | | frame.saved_regs_size
2103 +-------------------------------+ |
2104 | LR' | |
2105 +-------------------------------+ |
2106 | FP' | / <- hard_frame_pointer_rtx (aligned)
2107 +-------------------------------+
2108 | dynamic allocation |
2109 +-------------------------------+
2110 | padding |
2111 +-------------------------------+
2112 | outgoing stack arguments | <-- arg_pointer
2114 +-------------------------------+
2115 | | <-- stack_pointer_rtx (aligned)
2117 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2118 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2119 unchanged. */
2121 /* Generate the prologue instructions for entry into a function.
2122 Establish the stack frame by decreasing the stack pointer with a
2123 properly calculated size and, if necessary, create a frame record
2124 filled with the values of LR and previous frame pointer. The
2125 current FP is also set up if it is in use. */
2127 void
2128 aarch64_expand_prologue (void)
2130 /* sub sp, sp, #<frame_size>
2131 stp {fp, lr}, [sp, #<frame_size> - 16]
2132 add fp, sp, #<frame_size> - hardfp_offset
2133 stp {cs_reg}, [fp, #-16] etc.
2135 sub sp, sp, <final_adjustment_if_any>
2137 HOST_WIDE_INT frame_size, offset;
2138 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
2139 rtx insn;
2141 aarch64_layout_frame ();
2143 if (flag_stack_usage_info)
2144 current_function_static_stack_size = cfun->machine->frame.frame_size;
2146 frame_size = cfun->machine->frame.frame_size;
2147 offset = cfun->machine->frame.frame_size;
2149 fp_offset = cfun->machine->frame.frame_size
2150 - cfun->machine->frame.hard_fp_offset;
2152 /* Store pairs and load pairs have a range only -512 to 504. */
2153 if (offset >= 512)
2155 /* When the frame has a large size, an initial decrease is done on
2156 the stack pointer to jump over the callee-allocated save area for
2157 register varargs, the local variable area and/or the callee-saved
2158 register area. This will allow the pre-index write-back
2159 store pair instructions to be used for setting up the stack frame
2160 efficiently. */
2161 offset = cfun->machine->frame.hard_fp_offset;
2162 if (offset >= 512)
2163 offset = cfun->machine->frame.saved_regs_size;
2165 frame_size -= (offset + crtl->outgoing_args_size);
2166 fp_offset = 0;
2168 if (frame_size >= 0x1000000)
2170 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2171 emit_move_insn (op0, GEN_INT (-frame_size));
2172 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2173 aarch64_set_frame_expr (gen_rtx_SET
2174 (Pmode, stack_pointer_rtx,
2175 plus_constant (Pmode,
2176 stack_pointer_rtx,
2177 -frame_size)));
2179 else if (frame_size > 0)
2181 if ((frame_size & 0xfff) != frame_size)
2183 insn = emit_insn (gen_add2_insn
2184 (stack_pointer_rtx,
2185 GEN_INT (-(frame_size
2186 & ~(HOST_WIDE_INT)0xfff))));
2187 RTX_FRAME_RELATED_P (insn) = 1;
2189 if ((frame_size & 0xfff) != 0)
2191 insn = emit_insn (gen_add2_insn
2192 (stack_pointer_rtx,
2193 GEN_INT (-(frame_size
2194 & (HOST_WIDE_INT)0xfff))));
2195 RTX_FRAME_RELATED_P (insn) = 1;
2199 else
2200 frame_size = -1;
2202 if (offset > 0)
2204 /* Save the frame pointer and lr if the frame pointer is needed
2205 first. Make the frame pointer point to the location of the
2206 old frame pointer on the stack. */
2207 if (frame_pointer_needed)
2209 rtx mem_fp, mem_lr;
2211 if (fp_offset)
2213 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2214 GEN_INT (-offset)));
2215 RTX_FRAME_RELATED_P (insn) = 1;
2216 aarch64_set_frame_expr (gen_rtx_SET
2217 (Pmode, stack_pointer_rtx,
2218 gen_rtx_MINUS (Pmode,
2219 stack_pointer_rtx,
2220 GEN_INT (offset))));
2221 mem_fp = gen_frame_mem (DImode,
2222 plus_constant (Pmode,
2223 stack_pointer_rtx,
2224 fp_offset));
2225 mem_lr = gen_frame_mem (DImode,
2226 plus_constant (Pmode,
2227 stack_pointer_rtx,
2228 fp_offset
2229 + UNITS_PER_WORD));
2230 insn = emit_insn (gen_store_pairdi (mem_fp,
2231 hard_frame_pointer_rtx,
2232 mem_lr,
2233 gen_rtx_REG (DImode,
2234 LR_REGNUM)));
2236 else
2238 insn = emit_insn (gen_storewb_pairdi_di
2239 (stack_pointer_rtx, stack_pointer_rtx,
2240 hard_frame_pointer_rtx,
2241 gen_rtx_REG (DImode, LR_REGNUM),
2242 GEN_INT (-offset),
2243 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2244 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2247 /* The first part of a frame-related parallel insn is always
2248 assumed to be relevant to the frame calculations;
2249 subsequent parts, are only frame-related if explicitly
2250 marked. */
2251 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2252 RTX_FRAME_RELATED_P (insn) = 1;
2254 /* Set up frame pointer to point to the location of the
2255 previous frame pointer on the stack. */
2256 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2257 stack_pointer_rtx,
2258 GEN_INT (fp_offset)));
2259 aarch64_set_frame_expr (gen_rtx_SET
2260 (Pmode, hard_frame_pointer_rtx,
2261 plus_constant (Pmode,
2262 stack_pointer_rtx,
2263 fp_offset)));
2264 RTX_FRAME_RELATED_P (insn) = 1;
2265 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2266 hard_frame_pointer_rtx));
2268 else
2270 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2271 GEN_INT (-offset)));
2272 RTX_FRAME_RELATED_P (insn) = 1;
2275 aarch64_save_or_restore_callee_save_registers (fp_offset, 0);
2278 /* when offset >= 512,
2279 sub sp, sp, #<outgoing_args_size> */
2280 if (frame_size > -1)
2282 if (crtl->outgoing_args_size > 0)
2284 insn = emit_insn (gen_add2_insn
2285 (stack_pointer_rtx,
2286 GEN_INT (- crtl->outgoing_args_size)));
2287 RTX_FRAME_RELATED_P (insn) = 1;
2292 /* Generate the epilogue instructions for returning from a function. */
2293 void
2294 aarch64_expand_epilogue (bool for_sibcall)
2296 HOST_WIDE_INT frame_size, offset;
2297 HOST_WIDE_INT fp_offset;
2298 rtx insn;
2299 rtx cfa_reg;
2301 aarch64_layout_frame ();
2303 offset = frame_size = cfun->machine->frame.frame_size;
2304 fp_offset = cfun->machine->frame.frame_size
2305 - cfun->machine->frame.hard_fp_offset;
2307 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2309 /* Store pairs and load pairs have a range only -512 to 504. */
2310 if (offset >= 512)
2312 offset = cfun->machine->frame.hard_fp_offset;
2313 if (offset >= 512)
2314 offset = cfun->machine->frame.saved_regs_size;
2316 frame_size -= (offset + crtl->outgoing_args_size);
2317 fp_offset = 0;
2318 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2320 insn = emit_insn (gen_add2_insn
2321 (stack_pointer_rtx,
2322 GEN_INT (crtl->outgoing_args_size)));
2323 RTX_FRAME_RELATED_P (insn) = 1;
2326 else
2327 frame_size = -1;
2329 /* If there were outgoing arguments or we've done dynamic stack
2330 allocation, then restore the stack pointer from the frame
2331 pointer. This is at most one insn and more efficient than using
2332 GCC's internal mechanism. */
2333 if (frame_pointer_needed
2334 && (crtl->outgoing_args_size || cfun->calls_alloca))
2336 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2337 hard_frame_pointer_rtx,
2338 GEN_INT (- fp_offset)));
2339 RTX_FRAME_RELATED_P (insn) = 1;
2340 /* As SP is set to (FP - fp_offset), according to the rules in
2341 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2342 from the value of SP from now on. */
2343 cfa_reg = stack_pointer_rtx;
2346 aarch64_save_or_restore_callee_save_registers (fp_offset, 1);
2348 /* Restore the frame pointer and lr if the frame pointer is needed. */
2349 if (offset > 0)
2351 if (frame_pointer_needed)
2353 rtx mem_fp, mem_lr;
2355 if (fp_offset)
2357 mem_fp = gen_frame_mem (DImode,
2358 plus_constant (Pmode,
2359 stack_pointer_rtx,
2360 fp_offset));
2361 mem_lr = gen_frame_mem (DImode,
2362 plus_constant (Pmode,
2363 stack_pointer_rtx,
2364 fp_offset
2365 + UNITS_PER_WORD));
2366 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2367 mem_fp,
2368 gen_rtx_REG (DImode,
2369 LR_REGNUM),
2370 mem_lr));
2372 else
2374 insn = emit_insn (gen_loadwb_pairdi_di
2375 (stack_pointer_rtx,
2376 stack_pointer_rtx,
2377 hard_frame_pointer_rtx,
2378 gen_rtx_REG (DImode, LR_REGNUM),
2379 GEN_INT (offset),
2380 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2381 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2382 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2383 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2384 plus_constant (Pmode, cfa_reg,
2385 offset))));
2388 /* The first part of a frame-related parallel insn
2389 is always assumed to be relevant to the frame
2390 calculations; subsequent parts, are only
2391 frame-related if explicitly marked. */
2392 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2393 RTX_FRAME_RELATED_P (insn) = 1;
2394 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2395 add_reg_note (insn, REG_CFA_RESTORE,
2396 gen_rtx_REG (DImode, LR_REGNUM));
2398 if (fp_offset)
2400 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2401 GEN_INT (offset)));
2402 RTX_FRAME_RELATED_P (insn) = 1;
2405 else
2407 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2408 GEN_INT (offset)));
2409 RTX_FRAME_RELATED_P (insn) = 1;
2413 /* Stack adjustment for exception handler. */
2414 if (crtl->calls_eh_return)
2416 /* We need to unwind the stack by the offset computed by
2417 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2418 based on SP. Ideally we would update the SP and define the
2419 CFA along the lines of:
2421 SP = SP + EH_RETURN_STACKADJ_RTX
2422 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2424 However the dwarf emitter only understands a constant
2425 register offset.
2427 The solution chosen here is to use the otherwise unused IP0
2428 as a temporary register to hold the current SP value. The
2429 CFA is described using IP0 then SP is modified. */
2431 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2433 insn = emit_move_insn (ip0, stack_pointer_rtx);
2434 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2435 RTX_FRAME_RELATED_P (insn) = 1;
2437 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2439 /* Ensure the assignment to IP0 does not get optimized away. */
2440 emit_use (ip0);
2443 if (frame_size > -1)
2445 if (frame_size >= 0x1000000)
2447 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2448 emit_move_insn (op0, GEN_INT (frame_size));
2449 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2450 aarch64_set_frame_expr (gen_rtx_SET
2451 (Pmode, stack_pointer_rtx,
2452 plus_constant (Pmode,
2453 stack_pointer_rtx,
2454 frame_size)));
2456 else if (frame_size > 0)
2458 if ((frame_size & 0xfff) != 0)
2460 insn = emit_insn (gen_add2_insn
2461 (stack_pointer_rtx,
2462 GEN_INT ((frame_size
2463 & (HOST_WIDE_INT) 0xfff))));
2464 RTX_FRAME_RELATED_P (insn) = 1;
2466 if ((frame_size & 0xfff) != frame_size)
2468 insn = emit_insn (gen_add2_insn
2469 (stack_pointer_rtx,
2470 GEN_INT ((frame_size
2471 & ~ (HOST_WIDE_INT) 0xfff))));
2472 RTX_FRAME_RELATED_P (insn) = 1;
2476 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2477 plus_constant (Pmode,
2478 stack_pointer_rtx,
2479 offset)));
2482 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2483 if (!for_sibcall)
2484 emit_jump_insn (ret_rtx);
2487 /* Return the place to copy the exception unwinding return address to.
2488 This will probably be a stack slot, but could (in theory be the
2489 return register). */
2491 aarch64_final_eh_return_addr (void)
2493 HOST_WIDE_INT fp_offset;
2495 aarch64_layout_frame ();
2497 fp_offset = cfun->machine->frame.frame_size
2498 - cfun->machine->frame.hard_fp_offset;
2500 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2501 return gen_rtx_REG (DImode, LR_REGNUM);
2503 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2504 result in a store to save LR introduced by builtin_eh_return () being
2505 incorrectly deleted because the alias is not detected.
2506 So in the calculation of the address to copy the exception unwinding
2507 return address to, we note 2 cases.
2508 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2509 we return a SP-relative location since all the addresses are SP-relative
2510 in this case. This prevents the store from being optimized away.
2511 If the fp_offset is not 0, then the addresses will be FP-relative and
2512 therefore we return a FP-relative location. */
2514 if (frame_pointer_needed)
2516 if (fp_offset)
2517 return gen_frame_mem (DImode,
2518 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2519 else
2520 return gen_frame_mem (DImode,
2521 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2524 /* If FP is not needed, we calculate the location of LR, which would be
2525 at the top of the saved registers block. */
2527 return gen_frame_mem (DImode,
2528 plus_constant (Pmode,
2529 stack_pointer_rtx,
2530 fp_offset
2531 + cfun->machine->frame.saved_regs_size
2532 - 2 * UNITS_PER_WORD));
2535 /* Possibly output code to build up a constant in a register. For
2536 the benefit of the costs infrastructure, returns the number of
2537 instructions which would be emitted. GENERATE inhibits or
2538 enables code generation. */
2540 static int
2541 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2543 int insns = 0;
2545 if (aarch64_bitmask_imm (val, DImode))
2547 if (generate)
2548 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2549 insns = 1;
2551 else
2553 int i;
2554 int ncount = 0;
2555 int zcount = 0;
2556 HOST_WIDE_INT valp = val >> 16;
2557 HOST_WIDE_INT valm;
2558 HOST_WIDE_INT tval;
2560 for (i = 16; i < 64; i += 16)
2562 valm = (valp & 0xffff);
2564 if (valm != 0)
2565 ++ zcount;
2567 if (valm != 0xffff)
2568 ++ ncount;
2570 valp >>= 16;
2573 /* zcount contains the number of additional MOVK instructions
2574 required if the constant is built up with an initial MOVZ instruction,
2575 while ncount is the number of MOVK instructions required if starting
2576 with a MOVN instruction. Choose the sequence that yields the fewest
2577 number of instructions, preferring MOVZ instructions when they are both
2578 the same. */
2579 if (ncount < zcount)
2581 if (generate)
2582 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2583 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2584 tval = 0xffff;
2585 insns++;
2587 else
2589 if (generate)
2590 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2591 GEN_INT (val & 0xffff));
2592 tval = 0;
2593 insns++;
2596 val >>= 16;
2598 for (i = 16; i < 64; i += 16)
2600 if ((val & 0xffff) != tval)
2602 if (generate)
2603 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2604 GEN_INT (i),
2605 GEN_INT (val & 0xffff)));
2606 insns++;
2608 val >>= 16;
2611 return insns;
2614 static void
2615 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2617 HOST_WIDE_INT mdelta = delta;
2618 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2619 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2621 if (mdelta < 0)
2622 mdelta = -mdelta;
2624 if (mdelta >= 4096 * 4096)
2626 (void) aarch64_build_constant (scratchreg, delta, true);
2627 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2629 else if (mdelta > 0)
2631 if (mdelta >= 4096)
2633 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2634 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2635 if (delta < 0)
2636 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2637 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2638 else
2639 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2640 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2642 if (mdelta % 4096 != 0)
2644 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2645 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2646 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2651 /* Output code to add DELTA to the first argument, and then jump
2652 to FUNCTION. Used for C++ multiple inheritance. */
2653 static void
2654 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2655 HOST_WIDE_INT delta,
2656 HOST_WIDE_INT vcall_offset,
2657 tree function)
2659 /* The this pointer is always in x0. Note that this differs from
2660 Arm where the this pointer maybe bumped to r1 if r0 is required
2661 to return a pointer to an aggregate. On AArch64 a result value
2662 pointer will be in x8. */
2663 int this_regno = R0_REGNUM;
2664 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2666 reload_completed = 1;
2667 emit_note (NOTE_INSN_PROLOGUE_END);
2669 if (vcall_offset == 0)
2670 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2671 else
2673 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2675 this_rtx = gen_rtx_REG (Pmode, this_regno);
2676 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2677 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2679 addr = this_rtx;
2680 if (delta != 0)
2682 if (delta >= -256 && delta < 256)
2683 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2684 plus_constant (Pmode, this_rtx, delta));
2685 else
2686 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2689 if (Pmode == ptr_mode)
2690 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2691 else
2692 aarch64_emit_move (temp0,
2693 gen_rtx_ZERO_EXTEND (Pmode,
2694 gen_rtx_MEM (ptr_mode, addr)));
2696 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2697 addr = plus_constant (Pmode, temp0, vcall_offset);
2698 else
2700 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2701 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2704 if (Pmode == ptr_mode)
2705 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2706 else
2707 aarch64_emit_move (temp1,
2708 gen_rtx_SIGN_EXTEND (Pmode,
2709 gen_rtx_MEM (ptr_mode, addr)));
2711 emit_insn (gen_add2_insn (this_rtx, temp1));
2714 /* Generate a tail call to the target function. */
2715 if (!TREE_USED (function))
2717 assemble_external (function);
2718 TREE_USED (function) = 1;
2720 funexp = XEXP (DECL_RTL (function), 0);
2721 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2722 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2723 SIBLING_CALL_P (insn) = 1;
2725 insn = get_insns ();
2726 shorten_branches (insn);
2727 final_start_function (insn, file, 1);
2728 final (insn, file, 1);
2729 final_end_function ();
2731 /* Stop pretending to be a post-reload pass. */
2732 reload_completed = 0;
2735 static int
2736 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2738 if (GET_CODE (*x) == SYMBOL_REF)
2739 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2741 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2742 TLS offsets, not real symbol references. */
2743 if (GET_CODE (*x) == UNSPEC
2744 && XINT (*x, 1) == UNSPEC_TLS)
2745 return -1;
2747 return 0;
2750 static bool
2751 aarch64_tls_referenced_p (rtx x)
2753 if (!TARGET_HAVE_TLS)
2754 return false;
2756 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2760 static int
2761 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2763 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2764 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2766 if (*imm1 < *imm2)
2767 return -1;
2768 if (*imm1 > *imm2)
2769 return +1;
2770 return 0;
2774 static void
2775 aarch64_build_bitmask_table (void)
2777 unsigned HOST_WIDE_INT mask, imm;
2778 unsigned int log_e, e, s, r;
2779 unsigned int nimms = 0;
2781 for (log_e = 1; log_e <= 6; log_e++)
2783 e = 1 << log_e;
2784 if (e == 64)
2785 mask = ~(HOST_WIDE_INT) 0;
2786 else
2787 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2788 for (s = 1; s < e; s++)
2790 for (r = 0; r < e; r++)
2792 /* set s consecutive bits to 1 (s < 64) */
2793 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2794 /* rotate right by r */
2795 if (r != 0)
2796 imm = ((imm >> r) | (imm << (e - r))) & mask;
2797 /* replicate the constant depending on SIMD size */
2798 switch (log_e) {
2799 case 1: imm |= (imm << 2);
2800 case 2: imm |= (imm << 4);
2801 case 3: imm |= (imm << 8);
2802 case 4: imm |= (imm << 16);
2803 case 5: imm |= (imm << 32);
2804 case 6:
2805 break;
2806 default:
2807 gcc_unreachable ();
2809 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2810 aarch64_bitmasks[nimms++] = imm;
2815 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2816 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2817 aarch64_bitmasks_cmp);
2821 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2822 a left shift of 0 or 12 bits. */
2823 bool
2824 aarch64_uimm12_shift (HOST_WIDE_INT val)
2826 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2827 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2832 /* Return true if val is an immediate that can be loaded into a
2833 register by a MOVZ instruction. */
2834 static bool
2835 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2837 if (GET_MODE_SIZE (mode) > 4)
2839 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2840 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2841 return 1;
2843 else
2845 /* Ignore sign extension. */
2846 val &= (HOST_WIDE_INT) 0xffffffff;
2848 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2849 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2853 /* Return true if val is a valid bitmask immediate. */
2854 bool
2855 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2857 if (GET_MODE_SIZE (mode) < 8)
2859 /* Replicate bit pattern. */
2860 val &= (HOST_WIDE_INT) 0xffffffff;
2861 val |= val << 32;
2863 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2864 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2868 /* Return true if val is an immediate that can be loaded into a
2869 register in a single instruction. */
2870 bool
2871 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2873 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2874 return 1;
2875 return aarch64_bitmask_imm (val, mode);
2878 static bool
2879 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2881 rtx base, offset;
2883 if (GET_CODE (x) == HIGH)
2884 return true;
2886 split_const (x, &base, &offset);
2887 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2889 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2890 != SYMBOL_FORCE_TO_MEM)
2891 return true;
2892 else
2893 /* Avoid generating a 64-bit relocation in ILP32; leave
2894 to aarch64_expand_mov_immediate to handle it properly. */
2895 return mode != ptr_mode;
2898 return aarch64_tls_referenced_p (x);
2901 /* Return true if register REGNO is a valid index register.
2902 STRICT_P is true if REG_OK_STRICT is in effect. */
2904 bool
2905 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2907 if (!HARD_REGISTER_NUM_P (regno))
2909 if (!strict_p)
2910 return true;
2912 if (!reg_renumber)
2913 return false;
2915 regno = reg_renumber[regno];
2917 return GP_REGNUM_P (regno);
2920 /* Return true if register REGNO is a valid base register for mode MODE.
2921 STRICT_P is true if REG_OK_STRICT is in effect. */
2923 bool
2924 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2926 if (!HARD_REGISTER_NUM_P (regno))
2928 if (!strict_p)
2929 return true;
2931 if (!reg_renumber)
2932 return false;
2934 regno = reg_renumber[regno];
2937 /* The fake registers will be eliminated to either the stack or
2938 hard frame pointer, both of which are usually valid base registers.
2939 Reload deals with the cases where the eliminated form isn't valid. */
2940 return (GP_REGNUM_P (regno)
2941 || regno == SP_REGNUM
2942 || regno == FRAME_POINTER_REGNUM
2943 || regno == ARG_POINTER_REGNUM);
2946 /* Return true if X is a valid base register for mode MODE.
2947 STRICT_P is true if REG_OK_STRICT is in effect. */
2949 static bool
2950 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2952 if (!strict_p && GET_CODE (x) == SUBREG)
2953 x = SUBREG_REG (x);
2955 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2958 /* Return true if address offset is a valid index. If it is, fill in INFO
2959 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2961 static bool
2962 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2963 enum machine_mode mode, bool strict_p)
2965 enum aarch64_address_type type;
2966 rtx index;
2967 int shift;
2969 /* (reg:P) */
2970 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2971 && GET_MODE (x) == Pmode)
2973 type = ADDRESS_REG_REG;
2974 index = x;
2975 shift = 0;
2977 /* (sign_extend:DI (reg:SI)) */
2978 else if ((GET_CODE (x) == SIGN_EXTEND
2979 || GET_CODE (x) == ZERO_EXTEND)
2980 && GET_MODE (x) == DImode
2981 && GET_MODE (XEXP (x, 0)) == SImode)
2983 type = (GET_CODE (x) == SIGN_EXTEND)
2984 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2985 index = XEXP (x, 0);
2986 shift = 0;
2988 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2989 else if (GET_CODE (x) == MULT
2990 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2991 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2992 && GET_MODE (XEXP (x, 0)) == DImode
2993 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2994 && CONST_INT_P (XEXP (x, 1)))
2996 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2997 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2998 index = XEXP (XEXP (x, 0), 0);
2999 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3001 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3002 else if (GET_CODE (x) == ASHIFT
3003 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3004 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3005 && GET_MODE (XEXP (x, 0)) == DImode
3006 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3007 && CONST_INT_P (XEXP (x, 1)))
3009 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3010 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3011 index = XEXP (XEXP (x, 0), 0);
3012 shift = INTVAL (XEXP (x, 1));
3014 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3015 else if ((GET_CODE (x) == SIGN_EXTRACT
3016 || GET_CODE (x) == ZERO_EXTRACT)
3017 && GET_MODE (x) == DImode
3018 && GET_CODE (XEXP (x, 0)) == MULT
3019 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3020 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3022 type = (GET_CODE (x) == SIGN_EXTRACT)
3023 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3024 index = XEXP (XEXP (x, 0), 0);
3025 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3026 if (INTVAL (XEXP (x, 1)) != 32 + shift
3027 || INTVAL (XEXP (x, 2)) != 0)
3028 shift = -1;
3030 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3031 (const_int 0xffffffff<<shift)) */
3032 else if (GET_CODE (x) == AND
3033 && GET_MODE (x) == DImode
3034 && GET_CODE (XEXP (x, 0)) == MULT
3035 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3037 && CONST_INT_P (XEXP (x, 1)))
3039 type = ADDRESS_REG_UXTW;
3040 index = XEXP (XEXP (x, 0), 0);
3041 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3042 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3043 shift = -1;
3045 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3046 else if ((GET_CODE (x) == SIGN_EXTRACT
3047 || GET_CODE (x) == ZERO_EXTRACT)
3048 && GET_MODE (x) == DImode
3049 && GET_CODE (XEXP (x, 0)) == ASHIFT
3050 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3051 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3053 type = (GET_CODE (x) == SIGN_EXTRACT)
3054 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3055 index = XEXP (XEXP (x, 0), 0);
3056 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3057 if (INTVAL (XEXP (x, 1)) != 32 + shift
3058 || INTVAL (XEXP (x, 2)) != 0)
3059 shift = -1;
3061 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3062 (const_int 0xffffffff<<shift)) */
3063 else if (GET_CODE (x) == AND
3064 && GET_MODE (x) == DImode
3065 && GET_CODE (XEXP (x, 0)) == ASHIFT
3066 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3067 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3068 && CONST_INT_P (XEXP (x, 1)))
3070 type = ADDRESS_REG_UXTW;
3071 index = XEXP (XEXP (x, 0), 0);
3072 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3073 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3074 shift = -1;
3076 /* (mult:P (reg:P) (const_int scale)) */
3077 else if (GET_CODE (x) == MULT
3078 && GET_MODE (x) == Pmode
3079 && GET_MODE (XEXP (x, 0)) == Pmode
3080 && CONST_INT_P (XEXP (x, 1)))
3082 type = ADDRESS_REG_REG;
3083 index = XEXP (x, 0);
3084 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3086 /* (ashift:P (reg:P) (const_int shift)) */
3087 else if (GET_CODE (x) == ASHIFT
3088 && GET_MODE (x) == Pmode
3089 && GET_MODE (XEXP (x, 0)) == Pmode
3090 && CONST_INT_P (XEXP (x, 1)))
3092 type = ADDRESS_REG_REG;
3093 index = XEXP (x, 0);
3094 shift = INTVAL (XEXP (x, 1));
3096 else
3097 return false;
3099 if (GET_CODE (index) == SUBREG)
3100 index = SUBREG_REG (index);
3102 if ((shift == 0 ||
3103 (shift > 0 && shift <= 3
3104 && (1 << shift) == GET_MODE_SIZE (mode)))
3105 && REG_P (index)
3106 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3108 info->type = type;
3109 info->offset = index;
3110 info->shift = shift;
3111 return true;
3114 return false;
3117 static inline bool
3118 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3120 return (offset >= -64 * GET_MODE_SIZE (mode)
3121 && offset < 64 * GET_MODE_SIZE (mode)
3122 && offset % GET_MODE_SIZE (mode) == 0);
3125 static inline bool
3126 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3127 HOST_WIDE_INT offset)
3129 return offset >= -256 && offset < 256;
3132 static inline bool
3133 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3135 return (offset >= 0
3136 && offset < 4096 * GET_MODE_SIZE (mode)
3137 && offset % GET_MODE_SIZE (mode) == 0);
3140 /* Return true if X is a valid address for machine mode MODE. If it is,
3141 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3142 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3144 static bool
3145 aarch64_classify_address (struct aarch64_address_info *info,
3146 rtx x, enum machine_mode mode,
3147 RTX_CODE outer_code, bool strict_p)
3149 enum rtx_code code = GET_CODE (x);
3150 rtx op0, op1;
3151 bool allow_reg_index_p =
3152 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3153 || aarch64_vector_mode_supported_p (mode));
3154 /* Don't support anything other than POST_INC or REG addressing for
3155 AdvSIMD. */
3156 if (aarch64_vect_struct_mode_p (mode)
3157 && (code != POST_INC && code != REG))
3158 return false;
3160 switch (code)
3162 case REG:
3163 case SUBREG:
3164 info->type = ADDRESS_REG_IMM;
3165 info->base = x;
3166 info->offset = const0_rtx;
3167 return aarch64_base_register_rtx_p (x, strict_p);
3169 case PLUS:
3170 op0 = XEXP (x, 0);
3171 op1 = XEXP (x, 1);
3172 if (GET_MODE_SIZE (mode) != 0
3173 && CONST_INT_P (op1)
3174 && aarch64_base_register_rtx_p (op0, strict_p))
3176 HOST_WIDE_INT offset = INTVAL (op1);
3178 info->type = ADDRESS_REG_IMM;
3179 info->base = op0;
3180 info->offset = op1;
3182 /* TImode and TFmode values are allowed in both pairs of X
3183 registers and individual Q registers. The available
3184 address modes are:
3185 X,X: 7-bit signed scaled offset
3186 Q: 9-bit signed offset
3187 We conservatively require an offset representable in either mode.
3189 if (mode == TImode || mode == TFmode)
3190 return (offset_7bit_signed_scaled_p (mode, offset)
3191 && offset_9bit_signed_unscaled_p (mode, offset));
3193 if (outer_code == PARALLEL)
3194 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3195 && offset_7bit_signed_scaled_p (mode, offset));
3196 else
3197 return (offset_9bit_signed_unscaled_p (mode, offset)
3198 || offset_12bit_unsigned_scaled_p (mode, offset));
3201 if (allow_reg_index_p)
3203 /* Look for base + (scaled/extended) index register. */
3204 if (aarch64_base_register_rtx_p (op0, strict_p)
3205 && aarch64_classify_index (info, op1, mode, strict_p))
3207 info->base = op0;
3208 return true;
3210 if (aarch64_base_register_rtx_p (op1, strict_p)
3211 && aarch64_classify_index (info, op0, mode, strict_p))
3213 info->base = op1;
3214 return true;
3218 return false;
3220 case POST_INC:
3221 case POST_DEC:
3222 case PRE_INC:
3223 case PRE_DEC:
3224 info->type = ADDRESS_REG_WB;
3225 info->base = XEXP (x, 0);
3226 info->offset = NULL_RTX;
3227 return aarch64_base_register_rtx_p (info->base, strict_p);
3229 case POST_MODIFY:
3230 case PRE_MODIFY:
3231 info->type = ADDRESS_REG_WB;
3232 info->base = XEXP (x, 0);
3233 if (GET_CODE (XEXP (x, 1)) == PLUS
3234 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3235 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3236 && aarch64_base_register_rtx_p (info->base, strict_p))
3238 HOST_WIDE_INT offset;
3239 info->offset = XEXP (XEXP (x, 1), 1);
3240 offset = INTVAL (info->offset);
3242 /* TImode and TFmode values are allowed in both pairs of X
3243 registers and individual Q registers. The available
3244 address modes are:
3245 X,X: 7-bit signed scaled offset
3246 Q: 9-bit signed offset
3247 We conservatively require an offset representable in either mode.
3249 if (mode == TImode || mode == TFmode)
3250 return (offset_7bit_signed_scaled_p (mode, offset)
3251 && offset_9bit_signed_unscaled_p (mode, offset));
3253 if (outer_code == PARALLEL)
3254 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3255 && offset_7bit_signed_scaled_p (mode, offset));
3256 else
3257 return offset_9bit_signed_unscaled_p (mode, offset);
3259 return false;
3261 case CONST:
3262 case SYMBOL_REF:
3263 case LABEL_REF:
3264 /* load literal: pc-relative constant pool entry. Only supported
3265 for SI mode or larger. */
3266 info->type = ADDRESS_SYMBOLIC;
3267 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3269 rtx sym, addend;
3271 split_const (x, &sym, &addend);
3272 return (GET_CODE (sym) == LABEL_REF
3273 || (GET_CODE (sym) == SYMBOL_REF
3274 && CONSTANT_POOL_ADDRESS_P (sym)));
3276 return false;
3278 case LO_SUM:
3279 info->type = ADDRESS_LO_SUM;
3280 info->base = XEXP (x, 0);
3281 info->offset = XEXP (x, 1);
3282 if (allow_reg_index_p
3283 && aarch64_base_register_rtx_p (info->base, strict_p))
3285 rtx sym, offs;
3286 split_const (info->offset, &sym, &offs);
3287 if (GET_CODE (sym) == SYMBOL_REF
3288 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3289 == SYMBOL_SMALL_ABSOLUTE))
3291 /* The symbol and offset must be aligned to the access size. */
3292 unsigned int align;
3293 unsigned int ref_size;
3295 if (CONSTANT_POOL_ADDRESS_P (sym))
3296 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3297 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3299 tree exp = SYMBOL_REF_DECL (sym);
3300 align = TYPE_ALIGN (TREE_TYPE (exp));
3301 align = CONSTANT_ALIGNMENT (exp, align);
3303 else if (SYMBOL_REF_DECL (sym))
3304 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3305 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3306 && SYMBOL_REF_BLOCK (sym) != NULL)
3307 align = SYMBOL_REF_BLOCK (sym)->alignment;
3308 else
3309 align = BITS_PER_UNIT;
3311 ref_size = GET_MODE_SIZE (mode);
3312 if (ref_size == 0)
3313 ref_size = GET_MODE_SIZE (DImode);
3315 return ((INTVAL (offs) & (ref_size - 1)) == 0
3316 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3319 return false;
3321 default:
3322 return false;
3326 bool
3327 aarch64_symbolic_address_p (rtx x)
3329 rtx offset;
3331 split_const (x, &x, &offset);
3332 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3335 /* Classify the base of symbolic expression X, given that X appears in
3336 context CONTEXT. */
3338 enum aarch64_symbol_type
3339 aarch64_classify_symbolic_expression (rtx x,
3340 enum aarch64_symbol_context context)
3342 rtx offset;
3344 split_const (x, &x, &offset);
3345 return aarch64_classify_symbol (x, context);
3349 /* Return TRUE if X is a legitimate address for accessing memory in
3350 mode MODE. */
3351 static bool
3352 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3354 struct aarch64_address_info addr;
3356 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3359 /* Return TRUE if X is a legitimate address for accessing memory in
3360 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3361 pair operation. */
3362 bool
3363 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3364 RTX_CODE outer_code, bool strict_p)
3366 struct aarch64_address_info addr;
3368 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3371 /* Return TRUE if rtx X is immediate constant 0.0 */
3372 bool
3373 aarch64_float_const_zero_rtx_p (rtx x)
3375 REAL_VALUE_TYPE r;
3377 if (GET_MODE (x) == VOIDmode)
3378 return false;
3380 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3381 if (REAL_VALUE_MINUS_ZERO (r))
3382 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3383 return REAL_VALUES_EQUAL (r, dconst0);
3386 /* Return the fixed registers used for condition codes. */
3388 static bool
3389 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3391 *p1 = CC_REGNUM;
3392 *p2 = INVALID_REGNUM;
3393 return true;
3396 /* Emit call insn with PAT and do aarch64-specific handling. */
3398 void
3399 aarch64_emit_call_insn (rtx pat)
3401 rtx insn = emit_call_insn (pat);
3403 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3404 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3405 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3408 enum machine_mode
3409 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3411 /* All floating point compares return CCFP if it is an equality
3412 comparison, and CCFPE otherwise. */
3413 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3415 switch (code)
3417 case EQ:
3418 case NE:
3419 case UNORDERED:
3420 case ORDERED:
3421 case UNLT:
3422 case UNLE:
3423 case UNGT:
3424 case UNGE:
3425 case UNEQ:
3426 case LTGT:
3427 return CCFPmode;
3429 case LT:
3430 case LE:
3431 case GT:
3432 case GE:
3433 return CCFPEmode;
3435 default:
3436 gcc_unreachable ();
3440 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3441 && y == const0_rtx
3442 && (code == EQ || code == NE || code == LT || code == GE)
3443 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3444 || GET_CODE (x) == NEG))
3445 return CC_NZmode;
3447 /* A compare with a shifted operand. Because of canonicalization,
3448 the comparison will have to be swapped when we emit the assembly
3449 code. */
3450 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3451 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3452 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3453 || GET_CODE (x) == LSHIFTRT
3454 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3455 return CC_SWPmode;
3457 /* Similarly for a negated operand, but we can only do this for
3458 equalities. */
3459 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3460 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3461 && (code == EQ || code == NE)
3462 && GET_CODE (x) == NEG)
3463 return CC_Zmode;
3465 /* A compare of a mode narrower than SI mode against zero can be done
3466 by extending the value in the comparison. */
3467 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3468 && y == const0_rtx)
3469 /* Only use sign-extension if we really need it. */
3470 return ((code == GT || code == GE || code == LE || code == LT)
3471 ? CC_SESWPmode : CC_ZESWPmode);
3473 /* For everything else, return CCmode. */
3474 return CCmode;
3477 static unsigned
3478 aarch64_get_condition_code (rtx x)
3480 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3481 enum rtx_code comp_code = GET_CODE (x);
3483 if (GET_MODE_CLASS (mode) != MODE_CC)
3484 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3486 switch (mode)
3488 case CCFPmode:
3489 case CCFPEmode:
3490 switch (comp_code)
3492 case GE: return AARCH64_GE;
3493 case GT: return AARCH64_GT;
3494 case LE: return AARCH64_LS;
3495 case LT: return AARCH64_MI;
3496 case NE: return AARCH64_NE;
3497 case EQ: return AARCH64_EQ;
3498 case ORDERED: return AARCH64_VC;
3499 case UNORDERED: return AARCH64_VS;
3500 case UNLT: return AARCH64_LT;
3501 case UNLE: return AARCH64_LE;
3502 case UNGT: return AARCH64_HI;
3503 case UNGE: return AARCH64_PL;
3504 default: gcc_unreachable ();
3506 break;
3508 case CCmode:
3509 switch (comp_code)
3511 case NE: return AARCH64_NE;
3512 case EQ: return AARCH64_EQ;
3513 case GE: return AARCH64_GE;
3514 case GT: return AARCH64_GT;
3515 case LE: return AARCH64_LE;
3516 case LT: return AARCH64_LT;
3517 case GEU: return AARCH64_CS;
3518 case GTU: return AARCH64_HI;
3519 case LEU: return AARCH64_LS;
3520 case LTU: return AARCH64_CC;
3521 default: gcc_unreachable ();
3523 break;
3525 case CC_SWPmode:
3526 case CC_ZESWPmode:
3527 case CC_SESWPmode:
3528 switch (comp_code)
3530 case NE: return AARCH64_NE;
3531 case EQ: return AARCH64_EQ;
3532 case GE: return AARCH64_LE;
3533 case GT: return AARCH64_LT;
3534 case LE: return AARCH64_GE;
3535 case LT: return AARCH64_GT;
3536 case GEU: return AARCH64_LS;
3537 case GTU: return AARCH64_CC;
3538 case LEU: return AARCH64_CS;
3539 case LTU: return AARCH64_HI;
3540 default: gcc_unreachable ();
3542 break;
3544 case CC_NZmode:
3545 switch (comp_code)
3547 case NE: return AARCH64_NE;
3548 case EQ: return AARCH64_EQ;
3549 case GE: return AARCH64_PL;
3550 case LT: return AARCH64_MI;
3551 default: gcc_unreachable ();
3553 break;
3555 case CC_Zmode:
3556 switch (comp_code)
3558 case NE: return AARCH64_NE;
3559 case EQ: return AARCH64_EQ;
3560 default: gcc_unreachable ();
3562 break;
3564 default:
3565 gcc_unreachable ();
3566 break;
3570 static unsigned
3571 bit_count (unsigned HOST_WIDE_INT value)
3573 unsigned count = 0;
3575 while (value)
3577 count++;
3578 value &= value - 1;
3581 return count;
3584 void
3585 aarch64_print_operand (FILE *f, rtx x, char code)
3587 switch (code)
3589 /* An integer or symbol address without a preceding # sign. */
3590 case 'c':
3591 switch (GET_CODE (x))
3593 case CONST_INT:
3594 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3595 break;
3597 case SYMBOL_REF:
3598 output_addr_const (f, x);
3599 break;
3601 case CONST:
3602 if (GET_CODE (XEXP (x, 0)) == PLUS
3603 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3605 output_addr_const (f, x);
3606 break;
3608 /* Fall through. */
3610 default:
3611 output_operand_lossage ("Unsupported operand for code '%c'", code);
3613 break;
3615 case 'e':
3616 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3618 int n;
3620 if (GET_CODE (x) != CONST_INT
3621 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3623 output_operand_lossage ("invalid operand for '%%%c'", code);
3624 return;
3627 switch (n)
3629 case 3:
3630 fputc ('b', f);
3631 break;
3632 case 4:
3633 fputc ('h', f);
3634 break;
3635 case 5:
3636 fputc ('w', f);
3637 break;
3638 default:
3639 output_operand_lossage ("invalid operand for '%%%c'", code);
3640 return;
3643 break;
3645 case 'p':
3647 int n;
3649 /* Print N such that 2^N == X. */
3650 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3652 output_operand_lossage ("invalid operand for '%%%c'", code);
3653 return;
3656 asm_fprintf (f, "%d", n);
3658 break;
3660 case 'P':
3661 /* Print the number of non-zero bits in X (a const_int). */
3662 if (GET_CODE (x) != CONST_INT)
3664 output_operand_lossage ("invalid operand for '%%%c'", code);
3665 return;
3668 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3669 break;
3671 case 'H':
3672 /* Print the higher numbered register of a pair (TImode) of regs. */
3673 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3675 output_operand_lossage ("invalid operand for '%%%c'", code);
3676 return;
3679 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3680 break;
3682 case 'm':
3683 /* Print a condition (eq, ne, etc). */
3685 /* CONST_TRUE_RTX means always -- that's the default. */
3686 if (x == const_true_rtx)
3687 return;
3689 if (!COMPARISON_P (x))
3691 output_operand_lossage ("invalid operand for '%%%c'", code);
3692 return;
3695 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3696 break;
3698 case 'M':
3699 /* Print the inverse of a condition (eq <-> ne, etc). */
3701 /* CONST_TRUE_RTX means never -- that's the default. */
3702 if (x == const_true_rtx)
3704 fputs ("nv", f);
3705 return;
3708 if (!COMPARISON_P (x))
3710 output_operand_lossage ("invalid operand for '%%%c'", code);
3711 return;
3714 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3715 (aarch64_get_condition_code (x))], f);
3716 break;
3718 case 'b':
3719 case 'h':
3720 case 's':
3721 case 'd':
3722 case 'q':
3723 /* Print a scalar FP/SIMD register name. */
3724 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3726 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3727 return;
3729 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3730 break;
3732 case 'S':
3733 case 'T':
3734 case 'U':
3735 case 'V':
3736 /* Print the first FP/SIMD register name in a list. */
3737 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3739 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3740 return;
3742 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3743 break;
3745 case 'X':
3746 /* Print bottom 16 bits of integer constant in hex. */
3747 if (GET_CODE (x) != CONST_INT)
3749 output_operand_lossage ("invalid operand for '%%%c'", code);
3750 return;
3752 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3753 break;
3755 case 'w':
3756 case 'x':
3757 /* Print a general register name or the zero register (32-bit or
3758 64-bit). */
3759 if (x == const0_rtx
3760 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3762 asm_fprintf (f, "%czr", code);
3763 break;
3766 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3768 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3769 break;
3772 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3774 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3775 break;
3778 /* Fall through */
3780 case 0:
3781 /* Print a normal operand, if it's a general register, then we
3782 assume DImode. */
3783 if (x == NULL)
3785 output_operand_lossage ("missing operand");
3786 return;
3789 switch (GET_CODE (x))
3791 case REG:
3792 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3793 break;
3795 case MEM:
3796 aarch64_memory_reference_mode = GET_MODE (x);
3797 output_address (XEXP (x, 0));
3798 break;
3800 case LABEL_REF:
3801 case SYMBOL_REF:
3802 output_addr_const (asm_out_file, x);
3803 break;
3805 case CONST_INT:
3806 asm_fprintf (f, "%wd", INTVAL (x));
3807 break;
3809 case CONST_VECTOR:
3810 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3812 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3813 HOST_WIDE_INT_MIN,
3814 HOST_WIDE_INT_MAX));
3815 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3817 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3819 fputc ('0', f);
3821 else
3822 gcc_unreachable ();
3823 break;
3825 case CONST_DOUBLE:
3826 /* CONST_DOUBLE can represent a double-width integer.
3827 In this case, the mode of x is VOIDmode. */
3828 if (GET_MODE (x) == VOIDmode)
3829 ; /* Do Nothing. */
3830 else if (aarch64_float_const_zero_rtx_p (x))
3832 fputc ('0', f);
3833 break;
3835 else if (aarch64_float_const_representable_p (x))
3837 #define buf_size 20
3838 char float_buf[buf_size] = {'\0'};
3839 REAL_VALUE_TYPE r;
3840 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3841 real_to_decimal_for_mode (float_buf, &r,
3842 buf_size, buf_size,
3843 1, GET_MODE (x));
3844 asm_fprintf (asm_out_file, "%s", float_buf);
3845 break;
3846 #undef buf_size
3848 output_operand_lossage ("invalid constant");
3849 return;
3850 default:
3851 output_operand_lossage ("invalid operand");
3852 return;
3854 break;
3856 case 'A':
3857 if (GET_CODE (x) == HIGH)
3858 x = XEXP (x, 0);
3860 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3862 case SYMBOL_SMALL_GOT:
3863 asm_fprintf (asm_out_file, ":got:");
3864 break;
3866 case SYMBOL_SMALL_TLSGD:
3867 asm_fprintf (asm_out_file, ":tlsgd:");
3868 break;
3870 case SYMBOL_SMALL_TLSDESC:
3871 asm_fprintf (asm_out_file, ":tlsdesc:");
3872 break;
3874 case SYMBOL_SMALL_GOTTPREL:
3875 asm_fprintf (asm_out_file, ":gottprel:");
3876 break;
3878 case SYMBOL_SMALL_TPREL:
3879 asm_fprintf (asm_out_file, ":tprel:");
3880 break;
3882 case SYMBOL_TINY_GOT:
3883 gcc_unreachable ();
3884 break;
3886 default:
3887 break;
3889 output_addr_const (asm_out_file, x);
3890 break;
3892 case 'L':
3893 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3895 case SYMBOL_SMALL_GOT:
3896 asm_fprintf (asm_out_file, ":lo12:");
3897 break;
3899 case SYMBOL_SMALL_TLSGD:
3900 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3901 break;
3903 case SYMBOL_SMALL_TLSDESC:
3904 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3905 break;
3907 case SYMBOL_SMALL_GOTTPREL:
3908 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3909 break;
3911 case SYMBOL_SMALL_TPREL:
3912 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3913 break;
3915 case SYMBOL_TINY_GOT:
3916 asm_fprintf (asm_out_file, ":got:");
3917 break;
3919 default:
3920 break;
3922 output_addr_const (asm_out_file, x);
3923 break;
3925 case 'G':
3927 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3929 case SYMBOL_SMALL_TPREL:
3930 asm_fprintf (asm_out_file, ":tprel_hi12:");
3931 break;
3932 default:
3933 break;
3935 output_addr_const (asm_out_file, x);
3936 break;
3938 default:
3939 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3940 return;
3944 void
3945 aarch64_print_operand_address (FILE *f, rtx x)
3947 struct aarch64_address_info addr;
3949 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3950 MEM, true))
3951 switch (addr.type)
3953 case ADDRESS_REG_IMM:
3954 if (addr.offset == const0_rtx)
3955 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3956 else
3957 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
3958 INTVAL (addr.offset));
3959 return;
3961 case ADDRESS_REG_REG:
3962 if (addr.shift == 0)
3963 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
3964 reg_names [REGNO (addr.offset)]);
3965 else
3966 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
3967 reg_names [REGNO (addr.offset)], addr.shift);
3968 return;
3970 case ADDRESS_REG_UXTW:
3971 if (addr.shift == 0)
3972 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
3973 REGNO (addr.offset) - R0_REGNUM);
3974 else
3975 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
3976 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3977 return;
3979 case ADDRESS_REG_SXTW:
3980 if (addr.shift == 0)
3981 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
3982 REGNO (addr.offset) - R0_REGNUM);
3983 else
3984 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
3985 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3986 return;
3988 case ADDRESS_REG_WB:
3989 switch (GET_CODE (x))
3991 case PRE_INC:
3992 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3993 GET_MODE_SIZE (aarch64_memory_reference_mode));
3994 return;
3995 case POST_INC:
3996 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
3997 GET_MODE_SIZE (aarch64_memory_reference_mode));
3998 return;
3999 case PRE_DEC:
4000 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4001 GET_MODE_SIZE (aarch64_memory_reference_mode));
4002 return;
4003 case POST_DEC:
4004 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4005 GET_MODE_SIZE (aarch64_memory_reference_mode));
4006 return;
4007 case PRE_MODIFY:
4008 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4009 INTVAL (addr.offset));
4010 return;
4011 case POST_MODIFY:
4012 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4013 INTVAL (addr.offset));
4014 return;
4015 default:
4016 break;
4018 break;
4020 case ADDRESS_LO_SUM:
4021 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4022 output_addr_const (f, addr.offset);
4023 asm_fprintf (f, "]");
4024 return;
4026 case ADDRESS_SYMBOLIC:
4027 break;
4030 output_addr_const (f, x);
4033 bool
4034 aarch64_label_mentioned_p (rtx x)
4036 const char *fmt;
4037 int i;
4039 if (GET_CODE (x) == LABEL_REF)
4040 return true;
4042 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4043 referencing instruction, but they are constant offsets, not
4044 symbols. */
4045 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4046 return false;
4048 fmt = GET_RTX_FORMAT (GET_CODE (x));
4049 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4051 if (fmt[i] == 'E')
4053 int j;
4055 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4056 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4057 return 1;
4059 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4060 return 1;
4063 return 0;
4066 /* Implement REGNO_REG_CLASS. */
4068 enum reg_class
4069 aarch64_regno_regclass (unsigned regno)
4071 if (GP_REGNUM_P (regno))
4072 return GENERAL_REGS;
4074 if (regno == SP_REGNUM)
4075 return STACK_REG;
4077 if (regno == FRAME_POINTER_REGNUM
4078 || regno == ARG_POINTER_REGNUM)
4079 return POINTER_REGS;
4081 if (FP_REGNUM_P (regno))
4082 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4084 return NO_REGS;
4087 /* Try a machine-dependent way of reloading an illegitimate address
4088 operand. If we find one, push the reload and return the new rtx. */
4091 aarch64_legitimize_reload_address (rtx *x_p,
4092 enum machine_mode mode,
4093 int opnum, int type,
4094 int ind_levels ATTRIBUTE_UNUSED)
4096 rtx x = *x_p;
4098 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4099 if (aarch64_vect_struct_mode_p (mode)
4100 && GET_CODE (x) == PLUS
4101 && REG_P (XEXP (x, 0))
4102 && CONST_INT_P (XEXP (x, 1)))
4104 rtx orig_rtx = x;
4105 x = copy_rtx (x);
4106 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4107 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4108 opnum, (enum reload_type) type);
4109 return x;
4112 /* We must recognize output that we have already generated ourselves. */
4113 if (GET_CODE (x) == PLUS
4114 && GET_CODE (XEXP (x, 0)) == PLUS
4115 && REG_P (XEXP (XEXP (x, 0), 0))
4116 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4117 && CONST_INT_P (XEXP (x, 1)))
4119 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4120 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4121 opnum, (enum reload_type) type);
4122 return x;
4125 /* We wish to handle large displacements off a base register by splitting
4126 the addend across an add and the mem insn. This can cut the number of
4127 extra insns needed from 3 to 1. It is only useful for load/store of a
4128 single register with 12 bit offset field. */
4129 if (GET_CODE (x) == PLUS
4130 && REG_P (XEXP (x, 0))
4131 && CONST_INT_P (XEXP (x, 1))
4132 && HARD_REGISTER_P (XEXP (x, 0))
4133 && mode != TImode
4134 && mode != TFmode
4135 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4137 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4138 HOST_WIDE_INT low = val & 0xfff;
4139 HOST_WIDE_INT high = val - low;
4140 HOST_WIDE_INT offs;
4141 rtx cst;
4142 enum machine_mode xmode = GET_MODE (x);
4144 /* In ILP32, xmode can be either DImode or SImode. */
4145 gcc_assert (xmode == DImode || xmode == SImode);
4147 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4148 BLKmode alignment. */
4149 if (GET_MODE_SIZE (mode) == 0)
4150 return NULL_RTX;
4152 offs = low % GET_MODE_SIZE (mode);
4154 /* Align misaligned offset by adjusting high part to compensate. */
4155 if (offs != 0)
4157 if (aarch64_uimm12_shift (high + offs))
4159 /* Align down. */
4160 low = low - offs;
4161 high = high + offs;
4163 else
4165 /* Align up. */
4166 offs = GET_MODE_SIZE (mode) - offs;
4167 low = low + offs;
4168 high = high + (low & 0x1000) - offs;
4169 low &= 0xfff;
4173 /* Check for overflow. */
4174 if (high + low != val)
4175 return NULL_RTX;
4177 cst = GEN_INT (high);
4178 if (!aarch64_uimm12_shift (high))
4179 cst = force_const_mem (xmode, cst);
4181 /* Reload high part into base reg, leaving the low part
4182 in the mem instruction.
4183 Note that replacing this gen_rtx_PLUS with plus_constant is
4184 wrong in this case because we rely on the
4185 (plus (plus reg c1) c2) structure being preserved so that
4186 XEXP (*p, 0) in push_reload below uses the correct term. */
4187 x = gen_rtx_PLUS (xmode,
4188 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4189 GEN_INT (low));
4191 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4192 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4193 opnum, (enum reload_type) type);
4194 return x;
4197 return NULL_RTX;
4201 static reg_class_t
4202 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4203 reg_class_t rclass,
4204 enum machine_mode mode,
4205 secondary_reload_info *sri)
4207 /* Without the TARGET_SIMD instructions we cannot move a Q register
4208 to a Q register directly. We need a scratch. */
4209 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4210 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4211 && reg_class_subset_p (rclass, FP_REGS))
4213 if (mode == TFmode)
4214 sri->icode = CODE_FOR_aarch64_reload_movtf;
4215 else if (mode == TImode)
4216 sri->icode = CODE_FOR_aarch64_reload_movti;
4217 return NO_REGS;
4220 /* A TFmode or TImode memory access should be handled via an FP_REGS
4221 because AArch64 has richer addressing modes for LDR/STR instructions
4222 than LDP/STP instructions. */
4223 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4224 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4225 return FP_REGS;
4227 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4228 return GENERAL_REGS;
4230 return NO_REGS;
4233 static bool
4234 aarch64_can_eliminate (const int from, const int to)
4236 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4237 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4239 if (frame_pointer_needed)
4241 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4242 return true;
4243 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4244 return false;
4245 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4246 && !cfun->calls_alloca)
4247 return true;
4248 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4249 return true;
4251 return false;
4254 return true;
4257 HOST_WIDE_INT
4258 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4260 aarch64_layout_frame ();
4262 if (to == HARD_FRAME_POINTER_REGNUM)
4264 if (from == ARG_POINTER_REGNUM)
4265 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4267 if (from == FRAME_POINTER_REGNUM)
4268 return (cfun->machine->frame.hard_fp_offset
4269 - cfun->machine->frame.saved_varargs_size);
4272 if (to == STACK_POINTER_REGNUM)
4274 if (from == FRAME_POINTER_REGNUM)
4275 return (cfun->machine->frame.frame_size
4276 - cfun->machine->frame.saved_varargs_size);
4279 return cfun->machine->frame.frame_size;
4282 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4283 previous frame. */
4286 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4288 if (count != 0)
4289 return const0_rtx;
4290 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4294 static void
4295 aarch64_asm_trampoline_template (FILE *f)
4297 if (TARGET_ILP32)
4299 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4300 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4302 else
4304 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4305 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4307 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4308 assemble_aligned_integer (4, const0_rtx);
4309 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4310 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4313 static void
4314 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4316 rtx fnaddr, mem, a_tramp;
4317 const int tramp_code_sz = 16;
4319 /* Don't need to copy the trailing D-words, we fill those in below. */
4320 emit_block_move (m_tramp, assemble_trampoline_template (),
4321 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4322 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4323 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4324 if (GET_MODE (fnaddr) != ptr_mode)
4325 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4326 emit_move_insn (mem, fnaddr);
4328 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4329 emit_move_insn (mem, chain_value);
4331 /* XXX We should really define a "clear_cache" pattern and use
4332 gen_clear_cache(). */
4333 a_tramp = XEXP (m_tramp, 0);
4334 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4335 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4336 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4337 ptr_mode);
4340 static unsigned char
4341 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4343 switch (regclass)
4345 case CALLER_SAVE_REGS:
4346 case POINTER_REGS:
4347 case GENERAL_REGS:
4348 case ALL_REGS:
4349 case FP_REGS:
4350 case FP_LO_REGS:
4351 return
4352 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4353 (GET_MODE_SIZE (mode) + 7) / 8;
4354 case STACK_REG:
4355 return 1;
4357 case NO_REGS:
4358 return 0;
4360 default:
4361 break;
4363 gcc_unreachable ();
4366 static reg_class_t
4367 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4369 if (regclass == POINTER_REGS)
4370 return GENERAL_REGS;
4372 if (regclass == STACK_REG)
4374 if (REG_P(x)
4375 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4376 return regclass;
4378 return NO_REGS;
4381 /* If it's an integer immediate that MOVI can't handle, then
4382 FP_REGS is not an option, so we return NO_REGS instead. */
4383 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4384 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4385 return NO_REGS;
4387 /* Register eliminiation can result in a request for
4388 SP+constant->FP_REGS. We cannot support such operations which
4389 use SP as source and an FP_REG as destination, so reject out
4390 right now. */
4391 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4393 rtx lhs = XEXP (x, 0);
4395 /* Look through a possible SUBREG introduced by ILP32. */
4396 if (GET_CODE (lhs) == SUBREG)
4397 lhs = SUBREG_REG (lhs);
4399 gcc_assert (REG_P (lhs));
4400 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4401 POINTER_REGS));
4402 return NO_REGS;
4405 return regclass;
4408 void
4409 aarch64_asm_output_labelref (FILE* f, const char *name)
4411 asm_fprintf (f, "%U%s", name);
4414 static void
4415 aarch64_elf_asm_constructor (rtx symbol, int priority)
4417 if (priority == DEFAULT_INIT_PRIORITY)
4418 default_ctor_section_asm_out_constructor (symbol, priority);
4419 else
4421 section *s;
4422 char buf[18];
4423 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4424 s = get_section (buf, SECTION_WRITE, NULL);
4425 switch_to_section (s);
4426 assemble_align (POINTER_SIZE);
4427 assemble_aligned_integer (POINTER_BYTES, symbol);
4431 static void
4432 aarch64_elf_asm_destructor (rtx symbol, int priority)
4434 if (priority == DEFAULT_INIT_PRIORITY)
4435 default_dtor_section_asm_out_destructor (symbol, priority);
4436 else
4438 section *s;
4439 char buf[18];
4440 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4441 s = get_section (buf, SECTION_WRITE, NULL);
4442 switch_to_section (s);
4443 assemble_align (POINTER_SIZE);
4444 assemble_aligned_integer (POINTER_BYTES, symbol);
4448 const char*
4449 aarch64_output_casesi (rtx *operands)
4451 char buf[100];
4452 char label[100];
4453 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4454 int index;
4455 static const char *const patterns[4][2] =
4458 "ldrb\t%w3, [%0,%w1,uxtw]",
4459 "add\t%3, %4, %w3, sxtb #2"
4462 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4463 "add\t%3, %4, %w3, sxth #2"
4466 "ldr\t%w3, [%0,%w1,uxtw #2]",
4467 "add\t%3, %4, %w3, sxtw #2"
4469 /* We assume that DImode is only generated when not optimizing and
4470 that we don't really need 64-bit address offsets. That would
4471 imply an object file with 8GB of code in a single function! */
4473 "ldr\t%w3, [%0,%w1,uxtw #2]",
4474 "add\t%3, %4, %w3, sxtw #2"
4478 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4480 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4482 gcc_assert (index >= 0 && index <= 3);
4484 /* Need to implement table size reduction, by chaning the code below. */
4485 output_asm_insn (patterns[index][0], operands);
4486 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4487 snprintf (buf, sizeof (buf),
4488 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4489 output_asm_insn (buf, operands);
4490 output_asm_insn (patterns[index][1], operands);
4491 output_asm_insn ("br\t%3", operands);
4492 assemble_label (asm_out_file, label);
4493 return "";
4497 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4498 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4499 operator. */
4502 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4504 if (shift >= 0 && shift <= 3)
4506 int size;
4507 for (size = 8; size <= 32; size *= 2)
4509 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4510 if (mask == bits << shift)
4511 return size;
4514 return 0;
4517 static bool
4518 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4519 const_rtx x ATTRIBUTE_UNUSED)
4521 /* We can't use blocks for constants when we're using a per-function
4522 constant pool. */
4523 return false;
4526 static section *
4527 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4528 rtx x ATTRIBUTE_UNUSED,
4529 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4531 /* Force all constant pool entries into the current function section. */
4532 return function_section (current_function_decl);
4536 /* Costs. */
4538 /* Helper function for rtx cost calculation. Strip a shift expression
4539 from X. Returns the inner operand if successful, or the original
4540 expression on failure. */
4541 static rtx
4542 aarch64_strip_shift (rtx x)
4544 rtx op = x;
4546 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4547 we can convert both to ROR during final output. */
4548 if ((GET_CODE (op) == ASHIFT
4549 || GET_CODE (op) == ASHIFTRT
4550 || GET_CODE (op) == LSHIFTRT
4551 || GET_CODE (op) == ROTATERT
4552 || GET_CODE (op) == ROTATE)
4553 && CONST_INT_P (XEXP (op, 1)))
4554 return XEXP (op, 0);
4556 if (GET_CODE (op) == MULT
4557 && CONST_INT_P (XEXP (op, 1))
4558 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4559 return XEXP (op, 0);
4561 return x;
4564 /* Helper function for rtx cost calculation. Strip an extend
4565 expression from X. Returns the inner operand if successful, or the
4566 original expression on failure. We deal with a number of possible
4567 canonicalization variations here. */
4568 static rtx
4569 aarch64_strip_extend (rtx x)
4571 rtx op = x;
4573 /* Zero and sign extraction of a widened value. */
4574 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4575 && XEXP (op, 2) == const0_rtx
4576 && GET_CODE (XEXP (op, 0)) == MULT
4577 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4578 XEXP (op, 1)))
4579 return XEXP (XEXP (op, 0), 0);
4581 /* It can also be represented (for zero-extend) as an AND with an
4582 immediate. */
4583 if (GET_CODE (op) == AND
4584 && GET_CODE (XEXP (op, 0)) == MULT
4585 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4586 && CONST_INT_P (XEXP (op, 1))
4587 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4588 INTVAL (XEXP (op, 1))) != 0)
4589 return XEXP (XEXP (op, 0), 0);
4591 /* Now handle extended register, as this may also have an optional
4592 left shift by 1..4. */
4593 if (GET_CODE (op) == ASHIFT
4594 && CONST_INT_P (XEXP (op, 1))
4595 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4596 op = XEXP (op, 0);
4598 if (GET_CODE (op) == ZERO_EXTEND
4599 || GET_CODE (op) == SIGN_EXTEND)
4600 op = XEXP (op, 0);
4602 if (op != x)
4603 return op;
4605 return x;
4608 /* Helper function for rtx cost calculation. Calculate the cost of
4609 a MULT, which may be part of a multiply-accumulate rtx. Return
4610 the calculated cost of the expression, recursing manually in to
4611 operands where needed. */
4613 static int
4614 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4616 rtx op0, op1;
4617 const struct cpu_cost_table *extra_cost
4618 = aarch64_tune_params->insn_extra_cost;
4619 int cost = 0;
4620 bool maybe_fma = (outer == PLUS || outer == MINUS);
4621 enum machine_mode mode = GET_MODE (x);
4623 gcc_checking_assert (code == MULT);
4625 op0 = XEXP (x, 0);
4626 op1 = XEXP (x, 1);
4628 if (VECTOR_MODE_P (mode))
4629 mode = GET_MODE_INNER (mode);
4631 /* Integer multiply/fma. */
4632 if (GET_MODE_CLASS (mode) == MODE_INT)
4634 /* The multiply will be canonicalized as a shift, cost it as such. */
4635 if (CONST_INT_P (op1)
4636 && exact_log2 (INTVAL (op1)) > 0)
4638 if (speed)
4640 if (maybe_fma)
4641 /* ADD (shifted register). */
4642 cost += extra_cost->alu.arith_shift;
4643 else
4644 /* LSL (immediate). */
4645 cost += extra_cost->alu.shift;
4648 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4650 return cost;
4653 /* Integer multiplies or FMAs have zero/sign extending variants. */
4654 if ((GET_CODE (op0) == ZERO_EXTEND
4655 && GET_CODE (op1) == ZERO_EXTEND)
4656 || (GET_CODE (op0) == SIGN_EXTEND
4657 && GET_CODE (op1) == SIGN_EXTEND))
4659 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4660 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4662 if (speed)
4664 if (maybe_fma)
4665 /* MADD/SMADDL/UMADDL. */
4666 cost += extra_cost->mult[0].extend_add;
4667 else
4668 /* MUL/SMULL/UMULL. */
4669 cost += extra_cost->mult[0].extend;
4672 return cost;
4675 /* This is either an integer multiply or an FMA. In both cases
4676 we want to recurse and cost the operands. */
4677 cost += rtx_cost (op0, MULT, 0, speed)
4678 + rtx_cost (op1, MULT, 1, speed);
4680 if (speed)
4682 if (maybe_fma)
4683 /* MADD. */
4684 cost += extra_cost->mult[mode == DImode].add;
4685 else
4686 /* MUL. */
4687 cost += extra_cost->mult[mode == DImode].simple;
4690 return cost;
4692 else
4694 if (speed)
4696 /* Floating-point FMA/FMUL can also support negations of the
4697 operands. */
4698 if (GET_CODE (op0) == NEG)
4699 op0 = XEXP (op0, 0);
4700 if (GET_CODE (op1) == NEG)
4701 op1 = XEXP (op1, 0);
4703 if (maybe_fma)
4704 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4705 cost += extra_cost->fp[mode == DFmode].fma;
4706 else
4707 /* FMUL/FNMUL. */
4708 cost += extra_cost->fp[mode == DFmode].mult;
4711 cost += rtx_cost (op0, MULT, 0, speed)
4712 + rtx_cost (op1, MULT, 1, speed);
4713 return cost;
4717 static int
4718 aarch64_address_cost (rtx x,
4719 enum machine_mode mode,
4720 addr_space_t as ATTRIBUTE_UNUSED,
4721 bool speed)
4723 enum rtx_code c = GET_CODE (x);
4724 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4725 struct aarch64_address_info info;
4726 int cost = 0;
4727 info.shift = 0;
4729 if (!aarch64_classify_address (&info, x, mode, c, false))
4731 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4733 /* This is a CONST or SYMBOL ref which will be split
4734 in a different way depending on the code model in use.
4735 Cost it through the generic infrastructure. */
4736 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4737 /* Divide through by the cost of one instruction to
4738 bring it to the same units as the address costs. */
4739 cost_symbol_ref /= COSTS_N_INSNS (1);
4740 /* The cost is then the cost of preparing the address,
4741 followed by an immediate (possibly 0) offset. */
4742 return cost_symbol_ref + addr_cost->imm_offset;
4744 else
4746 /* This is most likely a jump table from a case
4747 statement. */
4748 return addr_cost->register_offset;
4752 switch (info.type)
4754 case ADDRESS_LO_SUM:
4755 case ADDRESS_SYMBOLIC:
4756 case ADDRESS_REG_IMM:
4757 cost += addr_cost->imm_offset;
4758 break;
4760 case ADDRESS_REG_WB:
4761 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4762 cost += addr_cost->pre_modify;
4763 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4764 cost += addr_cost->post_modify;
4765 else
4766 gcc_unreachable ();
4768 break;
4770 case ADDRESS_REG_REG:
4771 cost += addr_cost->register_offset;
4772 break;
4774 case ADDRESS_REG_UXTW:
4775 case ADDRESS_REG_SXTW:
4776 cost += addr_cost->register_extend;
4777 break;
4779 default:
4780 gcc_unreachable ();
4784 if (info.shift > 0)
4786 /* For the sake of calculating the cost of the shifted register
4787 component, we can treat same sized modes in the same way. */
4788 switch (GET_MODE_BITSIZE (mode))
4790 case 16:
4791 cost += addr_cost->addr_scale_costs.hi;
4792 break;
4794 case 32:
4795 cost += addr_cost->addr_scale_costs.si;
4796 break;
4798 case 64:
4799 cost += addr_cost->addr_scale_costs.di;
4800 break;
4802 /* We can't tell, or this is a 128-bit vector. */
4803 default:
4804 cost += addr_cost->addr_scale_costs.ti;
4805 break;
4809 return cost;
4812 /* Return true if the RTX X in mode MODE is a zero or sign extract
4813 usable in an ADD or SUB (extended register) instruction. */
4814 static bool
4815 aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4817 /* Catch add with a sign extract.
4818 This is add_<optab><mode>_multp2. */
4819 if (GET_CODE (x) == SIGN_EXTRACT
4820 || GET_CODE (x) == ZERO_EXTRACT)
4822 rtx op0 = XEXP (x, 0);
4823 rtx op1 = XEXP (x, 1);
4824 rtx op2 = XEXP (x, 2);
4826 if (GET_CODE (op0) == MULT
4827 && CONST_INT_P (op1)
4828 && op2 == const0_rtx
4829 && CONST_INT_P (XEXP (op0, 1))
4830 && aarch64_is_extend_from_extract (mode,
4831 XEXP (op0, 1),
4832 op1))
4834 return true;
4838 return false;
4841 static bool
4842 aarch64_frint_unspec_p (unsigned int u)
4844 switch (u)
4846 case UNSPEC_FRINTZ:
4847 case UNSPEC_FRINTP:
4848 case UNSPEC_FRINTM:
4849 case UNSPEC_FRINTA:
4850 case UNSPEC_FRINTN:
4851 case UNSPEC_FRINTX:
4852 case UNSPEC_FRINTI:
4853 return true;
4855 default:
4856 return false;
4860 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4861 storing it in *COST. Result is true if the total cost of the operation
4862 has now been calculated. */
4863 static bool
4864 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4866 rtx inner;
4867 rtx comparator;
4868 enum rtx_code cmpcode;
4870 if (COMPARISON_P (op0))
4872 inner = XEXP (op0, 0);
4873 comparator = XEXP (op0, 1);
4874 cmpcode = GET_CODE (op0);
4876 else
4878 inner = op0;
4879 comparator = const0_rtx;
4880 cmpcode = NE;
4883 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
4885 /* Conditional branch. */
4886 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4887 return true;
4888 else
4890 if (cmpcode == NE || cmpcode == EQ)
4892 if (comparator == const0_rtx)
4894 /* TBZ/TBNZ/CBZ/CBNZ. */
4895 if (GET_CODE (inner) == ZERO_EXTRACT)
4896 /* TBZ/TBNZ. */
4897 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
4898 0, speed);
4899 else
4900 /* CBZ/CBNZ. */
4901 *cost += rtx_cost (inner, cmpcode, 0, speed);
4903 return true;
4906 else if (cmpcode == LT || cmpcode == GE)
4908 /* TBZ/TBNZ. */
4909 if (comparator == const0_rtx)
4910 return true;
4914 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4916 /* It's a conditional operation based on the status flags,
4917 so it must be some flavor of CSEL. */
4919 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
4920 if (GET_CODE (op1) == NEG
4921 || GET_CODE (op1) == NOT
4922 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
4923 op1 = XEXP (op1, 0);
4925 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
4926 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
4927 return true;
4930 /* We don't know what this is, cost all operands. */
4931 return false;
4934 /* Calculate the cost of calculating X, storing it in *COST. Result
4935 is true if the total cost of the operation has now been calculated. */
4936 static bool
4937 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4938 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4940 rtx op0, op1, op2;
4941 const struct cpu_cost_table *extra_cost
4942 = aarch64_tune_params->insn_extra_cost;
4943 enum machine_mode mode = GET_MODE (x);
4945 /* By default, assume that everything has equivalent cost to the
4946 cheapest instruction. Any additional costs are applied as a delta
4947 above this default. */
4948 *cost = COSTS_N_INSNS (1);
4950 /* TODO: The cost infrastructure currently does not handle
4951 vector operations. Assume that all vector operations
4952 are equally expensive. */
4953 if (VECTOR_MODE_P (mode))
4955 if (speed)
4956 *cost += extra_cost->vect.alu;
4957 return true;
4960 switch (code)
4962 case SET:
4963 /* The cost depends entirely on the operands to SET. */
4964 *cost = 0;
4965 op0 = SET_DEST (x);
4966 op1 = SET_SRC (x);
4968 switch (GET_CODE (op0))
4970 case MEM:
4971 if (speed)
4973 rtx address = XEXP (op0, 0);
4974 if (GET_MODE_CLASS (mode) == MODE_INT)
4975 *cost += extra_cost->ldst.store;
4976 else if (mode == SFmode)
4977 *cost += extra_cost->ldst.storef;
4978 else if (mode == DFmode)
4979 *cost += extra_cost->ldst.stored;
4981 *cost +=
4982 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4983 0, speed));
4986 *cost += rtx_cost (op1, SET, 1, speed);
4987 return true;
4989 case SUBREG:
4990 if (! REG_P (SUBREG_REG (op0)))
4991 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4993 /* Fall through. */
4994 case REG:
4995 /* const0_rtx is in general free, but we will use an
4996 instruction to set a register to 0. */
4997 if (REG_P (op1) || op1 == const0_rtx)
4999 /* The cost is 1 per register copied. */
5000 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5001 / UNITS_PER_WORD;
5002 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5004 else
5005 /* Cost is just the cost of the RHS of the set. */
5006 *cost += rtx_cost (op1, SET, 1, speed);
5007 return true;
5009 case ZERO_EXTRACT:
5010 case SIGN_EXTRACT:
5011 /* Bit-field insertion. Strip any redundant widening of
5012 the RHS to meet the width of the target. */
5013 if (GET_CODE (op1) == SUBREG)
5014 op1 = SUBREG_REG (op1);
5015 if ((GET_CODE (op1) == ZERO_EXTEND
5016 || GET_CODE (op1) == SIGN_EXTEND)
5017 && GET_CODE (XEXP (op0, 1)) == CONST_INT
5018 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5019 >= INTVAL (XEXP (op0, 1))))
5020 op1 = XEXP (op1, 0);
5022 if (CONST_INT_P (op1))
5024 /* MOV immediate is assumed to always be cheap. */
5025 *cost = COSTS_N_INSNS (1);
5027 else
5029 /* BFM. */
5030 if (speed)
5031 *cost += extra_cost->alu.bfi;
5032 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5035 return true;
5037 default:
5038 /* We can't make sense of this, assume default cost. */
5039 *cost = COSTS_N_INSNS (1);
5040 return false;
5042 return false;
5044 case CONST_INT:
5045 /* If an instruction can incorporate a constant within the
5046 instruction, the instruction's expression avoids calling
5047 rtx_cost() on the constant. If rtx_cost() is called on a
5048 constant, then it is usually because the constant must be
5049 moved into a register by one or more instructions.
5051 The exception is constant 0, which can be expressed
5052 as XZR/WZR and is therefore free. The exception to this is
5053 if we have (set (reg) (const0_rtx)) in which case we must cost
5054 the move. However, we can catch that when we cost the SET, so
5055 we don't need to consider that here. */
5056 if (x == const0_rtx)
5057 *cost = 0;
5058 else
5060 /* To an approximation, building any other constant is
5061 proportionally expensive to the number of instructions
5062 required to build that constant. This is true whether we
5063 are compiling for SPEED or otherwise. */
5064 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5065 INTVAL (x),
5066 false));
5068 return true;
5070 case CONST_DOUBLE:
5071 if (speed)
5073 /* mov[df,sf]_aarch64. */
5074 if (aarch64_float_const_representable_p (x))
5075 /* FMOV (scalar immediate). */
5076 *cost += extra_cost->fp[mode == DFmode].fpconst;
5077 else if (!aarch64_float_const_zero_rtx_p (x))
5079 /* This will be a load from memory. */
5080 if (mode == DFmode)
5081 *cost += extra_cost->ldst.loadd;
5082 else
5083 *cost += extra_cost->ldst.loadf;
5085 else
5086 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5087 or MOV v0.s[0], wzr - neither of which are modeled by the
5088 cost tables. Just use the default cost. */
5093 return true;
5095 case MEM:
5096 if (speed)
5098 /* For loads we want the base cost of a load, plus an
5099 approximation for the additional cost of the addressing
5100 mode. */
5101 rtx address = XEXP (x, 0);
5102 if (GET_MODE_CLASS (mode) == MODE_INT)
5103 *cost += extra_cost->ldst.load;
5104 else if (mode == SFmode)
5105 *cost += extra_cost->ldst.loadf;
5106 else if (mode == DFmode)
5107 *cost += extra_cost->ldst.loadd;
5109 *cost +=
5110 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5111 0, speed));
5114 return true;
5116 case NEG:
5117 op0 = XEXP (x, 0);
5119 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5121 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5122 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5124 /* CSETM. */
5125 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5126 return true;
5129 /* Cost this as SUB wzr, X. */
5130 op0 = CONST0_RTX (GET_MODE (x));
5131 op1 = XEXP (x, 0);
5132 goto cost_minus;
5135 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5137 /* Support (neg(fma...)) as a single instruction only if
5138 sign of zeros is unimportant. This matches the decision
5139 making in aarch64.md. */
5140 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5142 /* FNMADD. */
5143 *cost = rtx_cost (op0, NEG, 0, speed);
5144 return true;
5146 if (speed)
5147 /* FNEG. */
5148 *cost += extra_cost->fp[mode == DFmode].neg;
5149 return false;
5152 return false;
5154 case CLRSB:
5155 case CLZ:
5156 if (speed)
5157 *cost += extra_cost->alu.clz;
5159 return false;
5161 case COMPARE:
5162 op0 = XEXP (x, 0);
5163 op1 = XEXP (x, 1);
5165 if (op1 == const0_rtx
5166 && GET_CODE (op0) == AND)
5168 x = op0;
5169 goto cost_logic;
5172 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5174 /* TODO: A write to the CC flags possibly costs extra, this
5175 needs encoding in the cost tables. */
5177 /* CC_ZESWPmode supports zero extend for free. */
5178 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5179 op0 = XEXP (op0, 0);
5181 /* ANDS. */
5182 if (GET_CODE (op0) == AND)
5184 x = op0;
5185 goto cost_logic;
5188 if (GET_CODE (op0) == PLUS)
5190 /* ADDS (and CMN alias). */
5191 x = op0;
5192 goto cost_plus;
5195 if (GET_CODE (op0) == MINUS)
5197 /* SUBS. */
5198 x = op0;
5199 goto cost_minus;
5202 if (GET_CODE (op1) == NEG)
5204 /* CMN. */
5205 if (speed)
5206 *cost += extra_cost->alu.arith;
5208 *cost += rtx_cost (op0, COMPARE, 0, speed);
5209 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5210 return true;
5213 /* CMP.
5215 Compare can freely swap the order of operands, and
5216 canonicalization puts the more complex operation first.
5217 But the integer MINUS logic expects the shift/extend
5218 operation in op1. */
5219 if (! (REG_P (op0)
5220 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5222 op0 = XEXP (x, 1);
5223 op1 = XEXP (x, 0);
5225 goto cost_minus;
5228 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5230 /* FCMP. */
5231 if (speed)
5232 *cost += extra_cost->fp[mode == DFmode].compare;
5234 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5236 /* FCMP supports constant 0.0 for no extra cost. */
5237 return true;
5239 return false;
5242 return false;
5244 case MINUS:
5246 op0 = XEXP (x, 0);
5247 op1 = XEXP (x, 1);
5249 cost_minus:
5250 /* Detect valid immediates. */
5251 if ((GET_MODE_CLASS (mode) == MODE_INT
5252 || (GET_MODE_CLASS (mode) == MODE_CC
5253 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5254 && CONST_INT_P (op1)
5255 && aarch64_uimm12_shift (INTVAL (op1)))
5257 *cost += rtx_cost (op0, MINUS, 0, speed);
5259 if (speed)
5260 /* SUB(S) (immediate). */
5261 *cost += extra_cost->alu.arith;
5262 return true;
5266 /* Look for SUB (extended register). */
5267 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5269 if (speed)
5270 *cost += extra_cost->alu.arith_shift;
5272 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5273 (enum rtx_code) GET_CODE (op1),
5274 0, speed);
5275 return true;
5278 rtx new_op1 = aarch64_strip_extend (op1);
5280 /* Cost this as an FMA-alike operation. */
5281 if ((GET_CODE (new_op1) == MULT
5282 || GET_CODE (new_op1) == ASHIFT)
5283 && code != COMPARE)
5285 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5286 (enum rtx_code) code,
5287 speed);
5288 *cost += rtx_cost (op0, MINUS, 0, speed);
5289 return true;
5292 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5294 if (speed)
5296 if (GET_MODE_CLASS (mode) == MODE_INT)
5297 /* SUB(S). */
5298 *cost += extra_cost->alu.arith;
5299 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5300 /* FSUB. */
5301 *cost += extra_cost->fp[mode == DFmode].addsub;
5303 return true;
5306 case PLUS:
5308 rtx new_op0;
5310 op0 = XEXP (x, 0);
5311 op1 = XEXP (x, 1);
5313 cost_plus:
5314 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5315 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5317 /* CSINC. */
5318 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5319 *cost += rtx_cost (op1, PLUS, 1, speed);
5320 return true;
5323 if (GET_MODE_CLASS (mode) == MODE_INT
5324 && CONST_INT_P (op1)
5325 && aarch64_uimm12_shift (INTVAL (op1)))
5327 *cost += rtx_cost (op0, PLUS, 0, speed);
5329 if (speed)
5330 /* ADD (immediate). */
5331 *cost += extra_cost->alu.arith;
5332 return true;
5335 /* Look for ADD (extended register). */
5336 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5338 if (speed)
5339 *cost += extra_cost->alu.arith_shift;
5341 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5342 (enum rtx_code) GET_CODE (op0),
5343 0, speed);
5344 return true;
5347 /* Strip any extend, leave shifts behind as we will
5348 cost them through mult_cost. */
5349 new_op0 = aarch64_strip_extend (op0);
5351 if (GET_CODE (new_op0) == MULT
5352 || GET_CODE (new_op0) == ASHIFT)
5354 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5355 speed);
5356 *cost += rtx_cost (op1, PLUS, 1, speed);
5357 return true;
5360 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5361 + rtx_cost (op1, PLUS, 1, speed));
5363 if (speed)
5365 if (GET_MODE_CLASS (mode) == MODE_INT)
5366 /* ADD. */
5367 *cost += extra_cost->alu.arith;
5368 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5369 /* FADD. */
5370 *cost += extra_cost->fp[mode == DFmode].addsub;
5372 return true;
5375 case BSWAP:
5376 *cost = COSTS_N_INSNS (1);
5378 if (speed)
5379 *cost += extra_cost->alu.rev;
5381 return false;
5383 case IOR:
5384 if (aarch_rev16_p (x))
5386 *cost = COSTS_N_INSNS (1);
5388 if (speed)
5389 *cost += extra_cost->alu.rev;
5391 return true;
5393 /* Fall through. */
5394 case XOR:
5395 case AND:
5396 cost_logic:
5397 op0 = XEXP (x, 0);
5398 op1 = XEXP (x, 1);
5400 if (code == AND
5401 && GET_CODE (op0) == MULT
5402 && CONST_INT_P (XEXP (op0, 1))
5403 && CONST_INT_P (op1)
5404 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5405 INTVAL (op1)) != 0)
5407 /* This is a UBFM/SBFM. */
5408 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5409 if (speed)
5410 *cost += extra_cost->alu.bfx;
5411 return true;
5414 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5416 /* We possibly get the immediate for free, this is not
5417 modelled. */
5418 if (CONST_INT_P (op1)
5419 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5421 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5423 if (speed)
5424 *cost += extra_cost->alu.logical;
5426 return true;
5428 else
5430 rtx new_op0 = op0;
5432 /* Handle ORN, EON, or BIC. */
5433 if (GET_CODE (op0) == NOT)
5434 op0 = XEXP (op0, 0);
5436 new_op0 = aarch64_strip_shift (op0);
5438 /* If we had a shift on op0 then this is a logical-shift-
5439 by-register/immediate operation. Otherwise, this is just
5440 a logical operation. */
5441 if (speed)
5443 if (new_op0 != op0)
5445 /* Shift by immediate. */
5446 if (CONST_INT_P (XEXP (op0, 1)))
5447 *cost += extra_cost->alu.log_shift;
5448 else
5449 *cost += extra_cost->alu.log_shift_reg;
5451 else
5452 *cost += extra_cost->alu.logical;
5455 /* In both cases we want to cost both operands. */
5456 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5457 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5459 return true;
5462 return false;
5464 case NOT:
5465 /* MVN. */
5466 if (speed)
5467 *cost += extra_cost->alu.logical;
5469 /* The logical instruction could have the shifted register form,
5470 but the cost is the same if the shift is processed as a separate
5471 instruction, so we don't bother with it here. */
5472 return false;
5474 case ZERO_EXTEND:
5476 op0 = XEXP (x, 0);
5477 /* If a value is written in SI mode, then zero extended to DI
5478 mode, the operation will in general be free as a write to
5479 a 'w' register implicitly zeroes the upper bits of an 'x'
5480 register. However, if this is
5482 (set (reg) (zero_extend (reg)))
5484 we must cost the explicit register move. */
5485 if (mode == DImode
5486 && GET_MODE (op0) == SImode
5487 && outer == SET)
5489 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5491 if (!op_cost && speed)
5492 /* MOV. */
5493 *cost += extra_cost->alu.extend;
5494 else
5495 /* Free, the cost is that of the SI mode operation. */
5496 *cost = op_cost;
5498 return true;
5500 else if (MEM_P (XEXP (x, 0)))
5502 /* All loads can zero extend to any size for free. */
5503 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5504 return true;
5507 /* UXTB/UXTH. */
5508 if (speed)
5509 *cost += extra_cost->alu.extend;
5511 return false;
5513 case SIGN_EXTEND:
5514 if (MEM_P (XEXP (x, 0)))
5516 /* LDRSH. */
5517 if (speed)
5519 rtx address = XEXP (XEXP (x, 0), 0);
5520 *cost += extra_cost->ldst.load_sign_extend;
5522 *cost +=
5523 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5524 0, speed));
5526 return true;
5529 if (speed)
5530 *cost += extra_cost->alu.extend;
5531 return false;
5533 case ASHIFT:
5534 op0 = XEXP (x, 0);
5535 op1 = XEXP (x, 1);
5537 if (CONST_INT_P (op1))
5539 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5540 aliases. */
5541 if (speed)
5542 *cost += extra_cost->alu.shift;
5544 /* We can incorporate zero/sign extend for free. */
5545 if (GET_CODE (op0) == ZERO_EXTEND
5546 || GET_CODE (op0) == SIGN_EXTEND)
5547 op0 = XEXP (op0, 0);
5549 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5550 return true;
5552 else
5554 /* LSLV. */
5555 if (speed)
5556 *cost += extra_cost->alu.shift_reg;
5558 return false; /* All arguments need to be in registers. */
5561 case ROTATE:
5562 case ROTATERT:
5563 case LSHIFTRT:
5564 case ASHIFTRT:
5565 op0 = XEXP (x, 0);
5566 op1 = XEXP (x, 1);
5568 if (CONST_INT_P (op1))
5570 /* ASR (immediate) and friends. */
5571 if (speed)
5572 *cost += extra_cost->alu.shift;
5574 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5575 return true;
5577 else
5580 /* ASR (register) and friends. */
5581 if (speed)
5582 *cost += extra_cost->alu.shift_reg;
5584 return false; /* All arguments need to be in registers. */
5587 case SYMBOL_REF:
5589 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5591 /* LDR. */
5592 if (speed)
5593 *cost += extra_cost->ldst.load;
5595 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5596 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5598 /* ADRP, followed by ADD. */
5599 *cost += COSTS_N_INSNS (1);
5600 if (speed)
5601 *cost += 2 * extra_cost->alu.arith;
5603 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5604 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5606 /* ADR. */
5607 if (speed)
5608 *cost += extra_cost->alu.arith;
5611 if (flag_pic)
5613 /* One extra load instruction, after accessing the GOT. */
5614 *cost += COSTS_N_INSNS (1);
5615 if (speed)
5616 *cost += extra_cost->ldst.load;
5618 return true;
5620 case HIGH:
5621 case LO_SUM:
5622 /* ADRP/ADD (immediate). */
5623 if (speed)
5624 *cost += extra_cost->alu.arith;
5625 return true;
5627 case ZERO_EXTRACT:
5628 case SIGN_EXTRACT:
5629 /* UBFX/SBFX. */
5630 if (speed)
5631 *cost += extra_cost->alu.bfx;
5633 /* We can trust that the immediates used will be correct (there
5634 are no by-register forms), so we need only cost op0. */
5635 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5636 return true;
5638 case MULT:
5639 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5640 /* aarch64_rtx_mult_cost always handles recursion to its
5641 operands. */
5642 return true;
5644 case MOD:
5645 case UMOD:
5646 if (speed)
5648 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5649 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5650 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5651 else if (GET_MODE (x) == DFmode)
5652 *cost += (extra_cost->fp[1].mult
5653 + extra_cost->fp[1].div);
5654 else if (GET_MODE (x) == SFmode)
5655 *cost += (extra_cost->fp[0].mult
5656 + extra_cost->fp[0].div);
5658 return false; /* All arguments need to be in registers. */
5660 case DIV:
5661 case UDIV:
5662 case SQRT:
5663 if (speed)
5665 if (GET_MODE_CLASS (mode) == MODE_INT)
5666 /* There is no integer SQRT, so only DIV and UDIV can get
5667 here. */
5668 *cost += extra_cost->mult[mode == DImode].idiv;
5669 else
5670 *cost += extra_cost->fp[mode == DFmode].div;
5672 return false; /* All arguments need to be in registers. */
5674 case IF_THEN_ELSE:
5675 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5676 XEXP (x, 2), cost, speed);
5678 case EQ:
5679 case NE:
5680 case GT:
5681 case GTU:
5682 case LT:
5683 case LTU:
5684 case GE:
5685 case GEU:
5686 case LE:
5687 case LEU:
5689 return false; /* All arguments must be in registers. */
5691 case FMA:
5692 op0 = XEXP (x, 0);
5693 op1 = XEXP (x, 1);
5694 op2 = XEXP (x, 2);
5696 if (speed)
5697 *cost += extra_cost->fp[mode == DFmode].fma;
5699 /* FMSUB, FNMADD, and FNMSUB are free. */
5700 if (GET_CODE (op0) == NEG)
5701 op0 = XEXP (op0, 0);
5703 if (GET_CODE (op2) == NEG)
5704 op2 = XEXP (op2, 0);
5706 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5707 and the by-element operand as operand 0. */
5708 if (GET_CODE (op1) == NEG)
5709 op1 = XEXP (op1, 0);
5711 /* Catch vector-by-element operations. The by-element operand can
5712 either be (vec_duplicate (vec_select (x))) or just
5713 (vec_select (x)), depending on whether we are multiplying by
5714 a vector or a scalar.
5716 Canonicalization is not very good in these cases, FMA4 will put the
5717 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5718 if (GET_CODE (op0) == VEC_DUPLICATE)
5719 op0 = XEXP (op0, 0);
5720 else if (GET_CODE (op1) == VEC_DUPLICATE)
5721 op1 = XEXP (op1, 0);
5723 if (GET_CODE (op0) == VEC_SELECT)
5724 op0 = XEXP (op0, 0);
5725 else if (GET_CODE (op1) == VEC_SELECT)
5726 op1 = XEXP (op1, 0);
5728 /* If the remaining parameters are not registers,
5729 get the cost to put them into registers. */
5730 *cost += rtx_cost (op0, FMA, 0, speed);
5731 *cost += rtx_cost (op1, FMA, 1, speed);
5732 *cost += rtx_cost (op2, FMA, 2, speed);
5733 return true;
5735 case FLOAT_EXTEND:
5736 if (speed)
5737 *cost += extra_cost->fp[mode == DFmode].widen;
5738 return false;
5740 case FLOAT_TRUNCATE:
5741 if (speed)
5742 *cost += extra_cost->fp[mode == DFmode].narrow;
5743 return false;
5745 case FIX:
5746 case UNSIGNED_FIX:
5747 x = XEXP (x, 0);
5748 /* Strip the rounding part. They will all be implemented
5749 by the fcvt* family of instructions anyway. */
5750 if (GET_CODE (x) == UNSPEC)
5752 unsigned int uns_code = XINT (x, 1);
5754 if (uns_code == UNSPEC_FRINTA
5755 || uns_code == UNSPEC_FRINTM
5756 || uns_code == UNSPEC_FRINTN
5757 || uns_code == UNSPEC_FRINTP
5758 || uns_code == UNSPEC_FRINTZ)
5759 x = XVECEXP (x, 0, 0);
5762 if (speed)
5763 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5765 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5766 return true;
5768 case ABS:
5769 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5771 /* FABS and FNEG are analogous. */
5772 if (speed)
5773 *cost += extra_cost->fp[mode == DFmode].neg;
5775 else
5777 /* Integer ABS will either be split to
5778 two arithmetic instructions, or will be an ABS
5779 (scalar), which we don't model. */
5780 *cost = COSTS_N_INSNS (2);
5781 if (speed)
5782 *cost += 2 * extra_cost->alu.arith;
5784 return false;
5786 case SMAX:
5787 case SMIN:
5788 if (speed)
5790 /* FMAXNM/FMINNM/FMAX/FMIN.
5791 TODO: This may not be accurate for all implementations, but
5792 we do not model this in the cost tables. */
5793 *cost += extra_cost->fp[mode == DFmode].addsub;
5795 return false;
5797 case UNSPEC:
5798 /* The floating point round to integer frint* instructions. */
5799 if (aarch64_frint_unspec_p (XINT (x, 1)))
5801 if (speed)
5802 *cost += extra_cost->fp[mode == DFmode].roundint;
5804 return false;
5807 if (XINT (x, 1) == UNSPEC_RBIT)
5809 if (speed)
5810 *cost += extra_cost->alu.rev;
5812 return false;
5814 break;
5816 case TRUNCATE:
5818 /* Decompose <su>muldi3_highpart. */
5819 if (/* (truncate:DI */
5820 mode == DImode
5821 /* (lshiftrt:TI */
5822 && GET_MODE (XEXP (x, 0)) == TImode
5823 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5824 /* (mult:TI */
5825 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5826 /* (ANY_EXTEND:TI (reg:DI))
5827 (ANY_EXTEND:TI (reg:DI))) */
5828 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5829 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5830 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5831 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5832 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5833 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5834 /* (const_int 64) */
5835 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5836 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5838 /* UMULH/SMULH. */
5839 if (speed)
5840 *cost += extra_cost->mult[mode == DImode].extend;
5841 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5842 MULT, 0, speed);
5843 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5844 MULT, 1, speed);
5845 return true;
5848 /* Fall through. */
5849 default:
5850 break;
5853 if (dump_file && (dump_flags & TDF_DETAILS))
5854 fprintf (dump_file,
5855 "\nFailed to cost RTX. Assuming default cost.\n");
5857 return true;
5860 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5861 calculated for X. This cost is stored in *COST. Returns true
5862 if the total cost of X was calculated. */
5863 static bool
5864 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5865 int param, int *cost, bool speed)
5867 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5869 if (dump_file && (dump_flags & TDF_DETAILS))
5871 print_rtl_single (dump_file, x);
5872 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5873 speed ? "Hot" : "Cold",
5874 *cost, result ? "final" : "partial");
5877 return result;
5880 static int
5881 aarch64_register_move_cost (enum machine_mode mode,
5882 reg_class_t from_i, reg_class_t to_i)
5884 enum reg_class from = (enum reg_class) from_i;
5885 enum reg_class to = (enum reg_class) to_i;
5886 const struct cpu_regmove_cost *regmove_cost
5887 = aarch64_tune_params->regmove_cost;
5889 /* Moving between GPR and stack cost is the same as GP2GP. */
5890 if ((from == GENERAL_REGS && to == STACK_REG)
5891 || (to == GENERAL_REGS && from == STACK_REG))
5892 return regmove_cost->GP2GP;
5894 /* To/From the stack register, we move via the gprs. */
5895 if (to == STACK_REG || from == STACK_REG)
5896 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5897 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5899 if (from == GENERAL_REGS && to == GENERAL_REGS)
5900 return regmove_cost->GP2GP;
5901 else if (from == GENERAL_REGS)
5902 return regmove_cost->GP2FP;
5903 else if (to == GENERAL_REGS)
5904 return regmove_cost->FP2GP;
5906 /* When AdvSIMD instructions are disabled it is not possible to move
5907 a 128-bit value directly between Q registers. This is handled in
5908 secondary reload. A general register is used as a scratch to move
5909 the upper DI value and the lower DI value is moved directly,
5910 hence the cost is the sum of three moves. */
5911 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5912 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5914 return regmove_cost->FP2FP;
5917 static int
5918 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5919 reg_class_t rclass ATTRIBUTE_UNUSED,
5920 bool in ATTRIBUTE_UNUSED)
5922 return aarch64_tune_params->memmov_cost;
5925 /* Return the number of instructions that can be issued per cycle. */
5926 static int
5927 aarch64_sched_issue_rate (void)
5929 return aarch64_tune_params->issue_rate;
5932 /* Vectorizer cost model target hooks. */
5934 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5935 static int
5936 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5937 tree vectype,
5938 int misalign ATTRIBUTE_UNUSED)
5940 unsigned elements;
5942 switch (type_of_cost)
5944 case scalar_stmt:
5945 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5947 case scalar_load:
5948 return aarch64_tune_params->vec_costs->scalar_load_cost;
5950 case scalar_store:
5951 return aarch64_tune_params->vec_costs->scalar_store_cost;
5953 case vector_stmt:
5954 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5956 case vector_load:
5957 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5959 case vector_store:
5960 return aarch64_tune_params->vec_costs->vec_store_cost;
5962 case vec_to_scalar:
5963 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5965 case scalar_to_vec:
5966 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5968 case unaligned_load:
5969 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5971 case unaligned_store:
5972 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5974 case cond_branch_taken:
5975 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5977 case cond_branch_not_taken:
5978 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5980 case vec_perm:
5981 case vec_promote_demote:
5982 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5984 case vec_construct:
5985 elements = TYPE_VECTOR_SUBPARTS (vectype);
5986 return elements / 2 + 1;
5988 default:
5989 gcc_unreachable ();
5993 /* Implement targetm.vectorize.add_stmt_cost. */
5994 static unsigned
5995 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5996 struct _stmt_vec_info *stmt_info, int misalign,
5997 enum vect_cost_model_location where)
5999 unsigned *cost = (unsigned *) data;
6000 unsigned retval = 0;
6002 if (flag_vect_cost_model)
6004 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6005 int stmt_cost =
6006 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6008 /* Statements in an inner loop relative to the loop being
6009 vectorized are weighted more heavily. The value here is
6010 a function (linear for now) of the loop nest level. */
6011 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6013 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6014 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6015 unsigned nest_level = loop_depth (loop);
6017 count *= nest_level;
6020 retval = (unsigned) (count * stmt_cost);
6021 cost[where] += retval;
6024 return retval;
6027 static void initialize_aarch64_code_model (void);
6029 /* Parse the architecture extension string. */
6031 static void
6032 aarch64_parse_extension (char *str)
6034 /* The extension string is parsed left to right. */
6035 const struct aarch64_option_extension *opt = NULL;
6037 /* Flag to say whether we are adding or removing an extension. */
6038 int adding_ext = -1;
6040 while (str != NULL && *str != 0)
6042 char *ext;
6043 size_t len;
6045 str++;
6046 ext = strchr (str, '+');
6048 if (ext != NULL)
6049 len = ext - str;
6050 else
6051 len = strlen (str);
6053 if (len >= 2 && strncmp (str, "no", 2) == 0)
6055 adding_ext = 0;
6056 len -= 2;
6057 str += 2;
6059 else if (len > 0)
6060 adding_ext = 1;
6062 if (len == 0)
6064 error ("missing feature modifier after %qs", "+no");
6065 return;
6068 /* Scan over the extensions table trying to find an exact match. */
6069 for (opt = all_extensions; opt->name != NULL; opt++)
6071 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6073 /* Add or remove the extension. */
6074 if (adding_ext)
6075 aarch64_isa_flags |= opt->flags_on;
6076 else
6077 aarch64_isa_flags &= ~(opt->flags_off);
6078 break;
6082 if (opt->name == NULL)
6084 /* Extension not found in list. */
6085 error ("unknown feature modifier %qs", str);
6086 return;
6089 str = ext;
6092 return;
6095 /* Parse the ARCH string. */
6097 static void
6098 aarch64_parse_arch (void)
6100 char *ext;
6101 const struct processor *arch;
6102 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6103 size_t len;
6105 strcpy (str, aarch64_arch_string);
6107 ext = strchr (str, '+');
6109 if (ext != NULL)
6110 len = ext - str;
6111 else
6112 len = strlen (str);
6114 if (len == 0)
6116 error ("missing arch name in -march=%qs", str);
6117 return;
6120 /* Loop through the list of supported ARCHs to find a match. */
6121 for (arch = all_architectures; arch->name != NULL; arch++)
6123 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6125 selected_arch = arch;
6126 aarch64_isa_flags = selected_arch->flags;
6128 if (!selected_cpu)
6129 selected_cpu = &all_cores[selected_arch->core];
6131 if (ext != NULL)
6133 /* ARCH string contains at least one extension. */
6134 aarch64_parse_extension (ext);
6137 if (strcmp (selected_arch->arch, selected_cpu->arch))
6139 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6140 selected_cpu->name, selected_arch->name);
6143 return;
6147 /* ARCH name not found in list. */
6148 error ("unknown value %qs for -march", str);
6149 return;
6152 /* Parse the CPU string. */
6154 static void
6155 aarch64_parse_cpu (void)
6157 char *ext;
6158 const struct processor *cpu;
6159 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6160 size_t len;
6162 strcpy (str, aarch64_cpu_string);
6164 ext = strchr (str, '+');
6166 if (ext != NULL)
6167 len = ext - str;
6168 else
6169 len = strlen (str);
6171 if (len == 0)
6173 error ("missing cpu name in -mcpu=%qs", str);
6174 return;
6177 /* Loop through the list of supported CPUs to find a match. */
6178 for (cpu = all_cores; cpu->name != NULL; cpu++)
6180 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6182 selected_cpu = cpu;
6183 selected_tune = cpu;
6184 aarch64_isa_flags = selected_cpu->flags;
6186 if (ext != NULL)
6188 /* CPU string contains at least one extension. */
6189 aarch64_parse_extension (ext);
6192 return;
6196 /* CPU name not found in list. */
6197 error ("unknown value %qs for -mcpu", str);
6198 return;
6201 /* Parse the TUNE string. */
6203 static void
6204 aarch64_parse_tune (void)
6206 const struct processor *cpu;
6207 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6208 strcpy (str, aarch64_tune_string);
6210 /* Loop through the list of supported CPUs to find a match. */
6211 for (cpu = all_cores; cpu->name != NULL; cpu++)
6213 if (strcmp (cpu->name, str) == 0)
6215 selected_tune = cpu;
6216 return;
6220 /* CPU name not found in list. */
6221 error ("unknown value %qs for -mtune", str);
6222 return;
6226 /* Implement TARGET_OPTION_OVERRIDE. */
6228 static void
6229 aarch64_override_options (void)
6231 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6232 If either of -march or -mtune is given, they override their
6233 respective component of -mcpu.
6235 So, first parse AARCH64_CPU_STRING, then the others, be careful
6236 with -march as, if -mcpu is not present on the command line, march
6237 must set a sensible default CPU. */
6238 if (aarch64_cpu_string)
6240 aarch64_parse_cpu ();
6243 if (aarch64_arch_string)
6245 aarch64_parse_arch ();
6248 if (aarch64_tune_string)
6250 aarch64_parse_tune ();
6253 #ifndef HAVE_AS_MABI_OPTION
6254 /* The compiler may have been configured with 2.23.* binutils, which does
6255 not have support for ILP32. */
6256 if (TARGET_ILP32)
6257 error ("Assembler does not support -mabi=ilp32");
6258 #endif
6260 initialize_aarch64_code_model ();
6262 aarch64_build_bitmask_table ();
6264 /* This target defaults to strict volatile bitfields. */
6265 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6266 flag_strict_volatile_bitfields = 1;
6268 /* If the user did not specify a processor, choose the default
6269 one for them. This will be the CPU set during configuration using
6270 --with-cpu, otherwise it is "generic". */
6271 if (!selected_cpu)
6273 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6274 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6277 gcc_assert (selected_cpu);
6279 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6280 if (!selected_tune)
6281 selected_tune = &all_cores[selected_cpu->core];
6283 aarch64_tune_flags = selected_tune->flags;
6284 aarch64_tune = selected_tune->core;
6285 aarch64_tune_params = selected_tune->tune;
6287 aarch64_override_options_after_change ();
6290 /* Implement targetm.override_options_after_change. */
6292 static void
6293 aarch64_override_options_after_change (void)
6295 if (flag_omit_frame_pointer)
6296 flag_omit_leaf_frame_pointer = false;
6297 else if (flag_omit_leaf_frame_pointer)
6298 flag_omit_frame_pointer = true;
6301 static struct machine_function *
6302 aarch64_init_machine_status (void)
6304 struct machine_function *machine;
6305 machine = ggc_cleared_alloc<machine_function> ();
6306 return machine;
6309 void
6310 aarch64_init_expanders (void)
6312 init_machine_status = aarch64_init_machine_status;
6315 /* A checking mechanism for the implementation of the various code models. */
6316 static void
6317 initialize_aarch64_code_model (void)
6319 if (flag_pic)
6321 switch (aarch64_cmodel_var)
6323 case AARCH64_CMODEL_TINY:
6324 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6325 break;
6326 case AARCH64_CMODEL_SMALL:
6327 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6328 break;
6329 case AARCH64_CMODEL_LARGE:
6330 sorry ("code model %qs with -f%s", "large",
6331 flag_pic > 1 ? "PIC" : "pic");
6332 default:
6333 gcc_unreachable ();
6336 else
6337 aarch64_cmodel = aarch64_cmodel_var;
6340 /* Return true if SYMBOL_REF X binds locally. */
6342 static bool
6343 aarch64_symbol_binds_local_p (const_rtx x)
6345 return (SYMBOL_REF_DECL (x)
6346 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6347 : SYMBOL_REF_LOCAL_P (x));
6350 /* Return true if SYMBOL_REF X is thread local */
6351 static bool
6352 aarch64_tls_symbol_p (rtx x)
6354 if (! TARGET_HAVE_TLS)
6355 return false;
6357 if (GET_CODE (x) != SYMBOL_REF)
6358 return false;
6360 return SYMBOL_REF_TLS_MODEL (x) != 0;
6363 /* Classify a TLS symbol into one of the TLS kinds. */
6364 enum aarch64_symbol_type
6365 aarch64_classify_tls_symbol (rtx x)
6367 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6369 switch (tls_kind)
6371 case TLS_MODEL_GLOBAL_DYNAMIC:
6372 case TLS_MODEL_LOCAL_DYNAMIC:
6373 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6375 case TLS_MODEL_INITIAL_EXEC:
6376 return SYMBOL_SMALL_GOTTPREL;
6378 case TLS_MODEL_LOCAL_EXEC:
6379 return SYMBOL_SMALL_TPREL;
6381 case TLS_MODEL_EMULATED:
6382 case TLS_MODEL_NONE:
6383 return SYMBOL_FORCE_TO_MEM;
6385 default:
6386 gcc_unreachable ();
6390 /* Return the method that should be used to access SYMBOL_REF or
6391 LABEL_REF X in context CONTEXT. */
6393 enum aarch64_symbol_type
6394 aarch64_classify_symbol (rtx x,
6395 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6397 if (GET_CODE (x) == LABEL_REF)
6399 switch (aarch64_cmodel)
6401 case AARCH64_CMODEL_LARGE:
6402 return SYMBOL_FORCE_TO_MEM;
6404 case AARCH64_CMODEL_TINY_PIC:
6405 case AARCH64_CMODEL_TINY:
6406 return SYMBOL_TINY_ABSOLUTE;
6408 case AARCH64_CMODEL_SMALL_PIC:
6409 case AARCH64_CMODEL_SMALL:
6410 return SYMBOL_SMALL_ABSOLUTE;
6412 default:
6413 gcc_unreachable ();
6417 if (GET_CODE (x) == SYMBOL_REF)
6419 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6420 return SYMBOL_FORCE_TO_MEM;
6422 if (aarch64_tls_symbol_p (x))
6423 return aarch64_classify_tls_symbol (x);
6425 switch (aarch64_cmodel)
6427 case AARCH64_CMODEL_TINY:
6428 if (SYMBOL_REF_WEAK (x))
6429 return SYMBOL_FORCE_TO_MEM;
6430 return SYMBOL_TINY_ABSOLUTE;
6432 case AARCH64_CMODEL_SMALL:
6433 if (SYMBOL_REF_WEAK (x))
6434 return SYMBOL_FORCE_TO_MEM;
6435 return SYMBOL_SMALL_ABSOLUTE;
6437 case AARCH64_CMODEL_TINY_PIC:
6438 if (!aarch64_symbol_binds_local_p (x))
6439 return SYMBOL_TINY_GOT;
6440 return SYMBOL_TINY_ABSOLUTE;
6442 case AARCH64_CMODEL_SMALL_PIC:
6443 if (!aarch64_symbol_binds_local_p (x))
6444 return SYMBOL_SMALL_GOT;
6445 return SYMBOL_SMALL_ABSOLUTE;
6447 default:
6448 gcc_unreachable ();
6452 /* By default push everything into the constant pool. */
6453 return SYMBOL_FORCE_TO_MEM;
6456 bool
6457 aarch64_constant_address_p (rtx x)
6459 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6462 bool
6463 aarch64_legitimate_pic_operand_p (rtx x)
6465 if (GET_CODE (x) == SYMBOL_REF
6466 || (GET_CODE (x) == CONST
6467 && GET_CODE (XEXP (x, 0)) == PLUS
6468 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6469 return false;
6471 return true;
6474 /* Return true if X holds either a quarter-precision or
6475 floating-point +0.0 constant. */
6476 static bool
6477 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6479 if (!CONST_DOUBLE_P (x))
6480 return false;
6482 /* TODO: We could handle moving 0.0 to a TFmode register,
6483 but first we would like to refactor the movtf_aarch64
6484 to be more amicable to split moves properly and
6485 correctly gate on TARGET_SIMD. For now - reject all
6486 constants which are not to SFmode or DFmode registers. */
6487 if (!(mode == SFmode || mode == DFmode))
6488 return false;
6490 if (aarch64_float_const_zero_rtx_p (x))
6491 return true;
6492 return aarch64_float_const_representable_p (x);
6495 static bool
6496 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6498 /* Do not allow vector struct mode constants. We could support
6499 0 and -1 easily, but they need support in aarch64-simd.md. */
6500 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6501 return false;
6503 /* This could probably go away because
6504 we now decompose CONST_INTs according to expand_mov_immediate. */
6505 if ((GET_CODE (x) == CONST_VECTOR
6506 && aarch64_simd_valid_immediate (x, mode, false, NULL))
6507 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6508 return !targetm.cannot_force_const_mem (mode, x);
6510 if (GET_CODE (x) == HIGH
6511 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6512 return true;
6514 return aarch64_constant_address_p (x);
6518 aarch64_load_tp (rtx target)
6520 if (!target
6521 || GET_MODE (target) != Pmode
6522 || !register_operand (target, Pmode))
6523 target = gen_reg_rtx (Pmode);
6525 /* Can return in any reg. */
6526 emit_insn (gen_aarch64_load_tp_hard (target));
6527 return target;
6530 /* On AAPCS systems, this is the "struct __va_list". */
6531 static GTY(()) tree va_list_type;
6533 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6534 Return the type to use as __builtin_va_list.
6536 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6538 struct __va_list
6540 void *__stack;
6541 void *__gr_top;
6542 void *__vr_top;
6543 int __gr_offs;
6544 int __vr_offs;
6545 }; */
6547 static tree
6548 aarch64_build_builtin_va_list (void)
6550 tree va_list_name;
6551 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6553 /* Create the type. */
6554 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6555 /* Give it the required name. */
6556 va_list_name = build_decl (BUILTINS_LOCATION,
6557 TYPE_DECL,
6558 get_identifier ("__va_list"),
6559 va_list_type);
6560 DECL_ARTIFICIAL (va_list_name) = 1;
6561 TYPE_NAME (va_list_type) = va_list_name;
6562 TYPE_STUB_DECL (va_list_type) = va_list_name;
6564 /* Create the fields. */
6565 f_stack = build_decl (BUILTINS_LOCATION,
6566 FIELD_DECL, get_identifier ("__stack"),
6567 ptr_type_node);
6568 f_grtop = build_decl (BUILTINS_LOCATION,
6569 FIELD_DECL, get_identifier ("__gr_top"),
6570 ptr_type_node);
6571 f_vrtop = build_decl (BUILTINS_LOCATION,
6572 FIELD_DECL, get_identifier ("__vr_top"),
6573 ptr_type_node);
6574 f_groff = build_decl (BUILTINS_LOCATION,
6575 FIELD_DECL, get_identifier ("__gr_offs"),
6576 integer_type_node);
6577 f_vroff = build_decl (BUILTINS_LOCATION,
6578 FIELD_DECL, get_identifier ("__vr_offs"),
6579 integer_type_node);
6581 DECL_ARTIFICIAL (f_stack) = 1;
6582 DECL_ARTIFICIAL (f_grtop) = 1;
6583 DECL_ARTIFICIAL (f_vrtop) = 1;
6584 DECL_ARTIFICIAL (f_groff) = 1;
6585 DECL_ARTIFICIAL (f_vroff) = 1;
6587 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6588 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6589 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6590 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6591 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6593 TYPE_FIELDS (va_list_type) = f_stack;
6594 DECL_CHAIN (f_stack) = f_grtop;
6595 DECL_CHAIN (f_grtop) = f_vrtop;
6596 DECL_CHAIN (f_vrtop) = f_groff;
6597 DECL_CHAIN (f_groff) = f_vroff;
6599 /* Compute its layout. */
6600 layout_type (va_list_type);
6602 return va_list_type;
6605 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6606 static void
6607 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6609 const CUMULATIVE_ARGS *cum;
6610 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6611 tree stack, grtop, vrtop, groff, vroff;
6612 tree t;
6613 int gr_save_area_size;
6614 int vr_save_area_size;
6615 int vr_offset;
6617 cum = &crtl->args.info;
6618 gr_save_area_size
6619 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6620 vr_save_area_size
6621 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6623 if (TARGET_GENERAL_REGS_ONLY)
6625 if (cum->aapcs_nvrn > 0)
6626 sorry ("%qs and floating point or vector arguments",
6627 "-mgeneral-regs-only");
6628 vr_save_area_size = 0;
6631 f_stack = TYPE_FIELDS (va_list_type_node);
6632 f_grtop = DECL_CHAIN (f_stack);
6633 f_vrtop = DECL_CHAIN (f_grtop);
6634 f_groff = DECL_CHAIN (f_vrtop);
6635 f_vroff = DECL_CHAIN (f_groff);
6637 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6638 NULL_TREE);
6639 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6640 NULL_TREE);
6641 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6642 NULL_TREE);
6643 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6644 NULL_TREE);
6645 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6646 NULL_TREE);
6648 /* Emit code to initialize STACK, which points to the next varargs stack
6649 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6650 by named arguments. STACK is 8-byte aligned. */
6651 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6652 if (cum->aapcs_stack_size > 0)
6653 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6654 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6655 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6657 /* Emit code to initialize GRTOP, the top of the GR save area.
6658 virtual_incoming_args_rtx should have been 16 byte aligned. */
6659 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6660 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6661 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6663 /* Emit code to initialize VRTOP, the top of the VR save area.
6664 This address is gr_save_area_bytes below GRTOP, rounded
6665 down to the next 16-byte boundary. */
6666 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6667 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6668 STACK_BOUNDARY / BITS_PER_UNIT);
6670 if (vr_offset)
6671 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6672 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6673 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6675 /* Emit code to initialize GROFF, the offset from GRTOP of the
6676 next GPR argument. */
6677 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6678 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6679 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6681 /* Likewise emit code to initialize VROFF, the offset from FTOP
6682 of the next VR argument. */
6683 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6684 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6685 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6688 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6690 static tree
6691 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6692 gimple_seq *post_p ATTRIBUTE_UNUSED)
6694 tree addr;
6695 bool indirect_p;
6696 bool is_ha; /* is HFA or HVA. */
6697 bool dw_align; /* double-word align. */
6698 enum machine_mode ag_mode = VOIDmode;
6699 int nregs;
6700 enum machine_mode mode;
6702 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6703 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6704 HOST_WIDE_INT size, rsize, adjust, align;
6705 tree t, u, cond1, cond2;
6707 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6708 if (indirect_p)
6709 type = build_pointer_type (type);
6711 mode = TYPE_MODE (type);
6713 f_stack = TYPE_FIELDS (va_list_type_node);
6714 f_grtop = DECL_CHAIN (f_stack);
6715 f_vrtop = DECL_CHAIN (f_grtop);
6716 f_groff = DECL_CHAIN (f_vrtop);
6717 f_vroff = DECL_CHAIN (f_groff);
6719 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6720 f_stack, NULL_TREE);
6721 size = int_size_in_bytes (type);
6722 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6724 dw_align = false;
6725 adjust = 0;
6726 if (aarch64_vfp_is_call_or_return_candidate (mode,
6727 type,
6728 &ag_mode,
6729 &nregs,
6730 &is_ha))
6732 /* TYPE passed in fp/simd registers. */
6733 if (TARGET_GENERAL_REGS_ONLY)
6734 sorry ("%qs and floating point or vector arguments",
6735 "-mgeneral-regs-only");
6737 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6738 unshare_expr (valist), f_vrtop, NULL_TREE);
6739 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6740 unshare_expr (valist), f_vroff, NULL_TREE);
6742 rsize = nregs * UNITS_PER_VREG;
6744 if (is_ha)
6746 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6747 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6749 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6750 && size < UNITS_PER_VREG)
6752 adjust = UNITS_PER_VREG - size;
6755 else
6757 /* TYPE passed in general registers. */
6758 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6759 unshare_expr (valist), f_grtop, NULL_TREE);
6760 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6761 unshare_expr (valist), f_groff, NULL_TREE);
6762 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6763 nregs = rsize / UNITS_PER_WORD;
6765 if (align > 8)
6766 dw_align = true;
6768 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6769 && size < UNITS_PER_WORD)
6771 adjust = UNITS_PER_WORD - size;
6775 /* Get a local temporary for the field value. */
6776 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6778 /* Emit code to branch if off >= 0. */
6779 t = build2 (GE_EXPR, boolean_type_node, off,
6780 build_int_cst (TREE_TYPE (off), 0));
6781 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6783 if (dw_align)
6785 /* Emit: offs = (offs + 15) & -16. */
6786 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6787 build_int_cst (TREE_TYPE (off), 15));
6788 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6789 build_int_cst (TREE_TYPE (off), -16));
6790 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6792 else
6793 roundup = NULL;
6795 /* Update ap.__[g|v]r_offs */
6796 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6797 build_int_cst (TREE_TYPE (off), rsize));
6798 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6800 /* String up. */
6801 if (roundup)
6802 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6804 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6805 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6806 build_int_cst (TREE_TYPE (f_off), 0));
6807 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6809 /* String up: make sure the assignment happens before the use. */
6810 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6811 COND_EXPR_ELSE (cond1) = t;
6813 /* Prepare the trees handling the argument that is passed on the stack;
6814 the top level node will store in ON_STACK. */
6815 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6816 if (align > 8)
6818 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6819 t = fold_convert (intDI_type_node, arg);
6820 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6821 build_int_cst (TREE_TYPE (t), 15));
6822 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6823 build_int_cst (TREE_TYPE (t), -16));
6824 t = fold_convert (TREE_TYPE (arg), t);
6825 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6827 else
6828 roundup = NULL;
6829 /* Advance ap.__stack */
6830 t = fold_convert (intDI_type_node, arg);
6831 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6832 build_int_cst (TREE_TYPE (t), size + 7));
6833 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6834 build_int_cst (TREE_TYPE (t), -8));
6835 t = fold_convert (TREE_TYPE (arg), t);
6836 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6837 /* String up roundup and advance. */
6838 if (roundup)
6839 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6840 /* String up with arg */
6841 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6842 /* Big-endianness related address adjustment. */
6843 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6844 && size < UNITS_PER_WORD)
6846 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6847 size_int (UNITS_PER_WORD - size));
6848 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6851 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6852 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6854 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6855 t = off;
6856 if (adjust)
6857 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6858 build_int_cst (TREE_TYPE (off), adjust));
6860 t = fold_convert (sizetype, t);
6861 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6863 if (is_ha)
6865 /* type ha; // treat as "struct {ftype field[n];}"
6866 ... [computing offs]
6867 for (i = 0; i <nregs; ++i, offs += 16)
6868 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6869 return ha; */
6870 int i;
6871 tree tmp_ha, field_t, field_ptr_t;
6873 /* Declare a local variable. */
6874 tmp_ha = create_tmp_var_raw (type, "ha");
6875 gimple_add_tmp_var (tmp_ha);
6877 /* Establish the base type. */
6878 switch (ag_mode)
6880 case SFmode:
6881 field_t = float_type_node;
6882 field_ptr_t = float_ptr_type_node;
6883 break;
6884 case DFmode:
6885 field_t = double_type_node;
6886 field_ptr_t = double_ptr_type_node;
6887 break;
6888 case TFmode:
6889 field_t = long_double_type_node;
6890 field_ptr_t = long_double_ptr_type_node;
6891 break;
6892 /* The half precision and quad precision are not fully supported yet. Enable
6893 the following code after the support is complete. Need to find the correct
6894 type node for __fp16 *. */
6895 #if 0
6896 case HFmode:
6897 field_t = float_type_node;
6898 field_ptr_t = float_ptr_type_node;
6899 break;
6900 #endif
6901 case V2SImode:
6902 case V4SImode:
6904 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6905 field_t = build_vector_type_for_mode (innertype, ag_mode);
6906 field_ptr_t = build_pointer_type (field_t);
6908 break;
6909 default:
6910 gcc_assert (0);
6913 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6914 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6915 addr = t;
6916 t = fold_convert (field_ptr_t, addr);
6917 t = build2 (MODIFY_EXPR, field_t,
6918 build1 (INDIRECT_REF, field_t, tmp_ha),
6919 build1 (INDIRECT_REF, field_t, t));
6921 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6922 for (i = 1; i < nregs; ++i)
6924 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6925 u = fold_convert (field_ptr_t, addr);
6926 u = build2 (MODIFY_EXPR, field_t,
6927 build2 (MEM_REF, field_t, tmp_ha,
6928 build_int_cst (field_ptr_t,
6929 (i *
6930 int_size_in_bytes (field_t)))),
6931 build1 (INDIRECT_REF, field_t, u));
6932 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6935 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6936 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6939 COND_EXPR_ELSE (cond2) = t;
6940 addr = fold_convert (build_pointer_type (type), cond1);
6941 addr = build_va_arg_indirect_ref (addr);
6943 if (indirect_p)
6944 addr = build_va_arg_indirect_ref (addr);
6946 return addr;
6949 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6951 static void
6952 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6953 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6954 int no_rtl)
6956 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6957 CUMULATIVE_ARGS local_cum;
6958 int gr_saved, vr_saved;
6960 /* The caller has advanced CUM up to, but not beyond, the last named
6961 argument. Advance a local copy of CUM past the last "real" named
6962 argument, to find out how many registers are left over. */
6963 local_cum = *cum;
6964 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6966 /* Found out how many registers we need to save. */
6967 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6968 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6970 if (TARGET_GENERAL_REGS_ONLY)
6972 if (local_cum.aapcs_nvrn > 0)
6973 sorry ("%qs and floating point or vector arguments",
6974 "-mgeneral-regs-only");
6975 vr_saved = 0;
6978 if (!no_rtl)
6980 if (gr_saved > 0)
6982 rtx ptr, mem;
6984 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6985 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6986 - gr_saved * UNITS_PER_WORD);
6987 mem = gen_frame_mem (BLKmode, ptr);
6988 set_mem_alias_set (mem, get_varargs_alias_set ());
6990 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6991 mem, gr_saved);
6993 if (vr_saved > 0)
6995 /* We can't use move_block_from_reg, because it will use
6996 the wrong mode, storing D regs only. */
6997 enum machine_mode mode = TImode;
6998 int off, i;
7000 /* Set OFF to the offset from virtual_incoming_args_rtx of
7001 the first vector register. The VR save area lies below
7002 the GR one, and is aligned to 16 bytes. */
7003 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7004 STACK_BOUNDARY / BITS_PER_UNIT);
7005 off -= vr_saved * UNITS_PER_VREG;
7007 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7009 rtx ptr, mem;
7011 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7012 mem = gen_frame_mem (mode, ptr);
7013 set_mem_alias_set (mem, get_varargs_alias_set ());
7014 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7015 off += UNITS_PER_VREG;
7020 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7021 any complication of having crtl->args.pretend_args_size changed. */
7022 cfun->machine->frame.saved_varargs_size
7023 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7024 STACK_BOUNDARY / BITS_PER_UNIT)
7025 + vr_saved * UNITS_PER_VREG);
7028 static void
7029 aarch64_conditional_register_usage (void)
7031 int i;
7032 if (!TARGET_FLOAT)
7034 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7036 fixed_regs[i] = 1;
7037 call_used_regs[i] = 1;
7042 /* Walk down the type tree of TYPE counting consecutive base elements.
7043 If *MODEP is VOIDmode, then set it to the first valid floating point
7044 type. If a non-floating point type is found, or if a floating point
7045 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7046 otherwise return the count in the sub-tree. */
7047 static int
7048 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
7050 enum machine_mode mode;
7051 HOST_WIDE_INT size;
7053 switch (TREE_CODE (type))
7055 case REAL_TYPE:
7056 mode = TYPE_MODE (type);
7057 if (mode != DFmode && mode != SFmode && mode != TFmode)
7058 return -1;
7060 if (*modep == VOIDmode)
7061 *modep = mode;
7063 if (*modep == mode)
7064 return 1;
7066 break;
7068 case COMPLEX_TYPE:
7069 mode = TYPE_MODE (TREE_TYPE (type));
7070 if (mode != DFmode && mode != SFmode && mode != TFmode)
7071 return -1;
7073 if (*modep == VOIDmode)
7074 *modep = mode;
7076 if (*modep == mode)
7077 return 2;
7079 break;
7081 case VECTOR_TYPE:
7082 /* Use V2SImode and V4SImode as representatives of all 64-bit
7083 and 128-bit vector types. */
7084 size = int_size_in_bytes (type);
7085 switch (size)
7087 case 8:
7088 mode = V2SImode;
7089 break;
7090 case 16:
7091 mode = V4SImode;
7092 break;
7093 default:
7094 return -1;
7097 if (*modep == VOIDmode)
7098 *modep = mode;
7100 /* Vector modes are considered to be opaque: two vectors are
7101 equivalent for the purposes of being homogeneous aggregates
7102 if they are the same size. */
7103 if (*modep == mode)
7104 return 1;
7106 break;
7108 case ARRAY_TYPE:
7110 int count;
7111 tree index = TYPE_DOMAIN (type);
7113 /* Can't handle incomplete types nor sizes that are not
7114 fixed. */
7115 if (!COMPLETE_TYPE_P (type)
7116 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7117 return -1;
7119 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7120 if (count == -1
7121 || !index
7122 || !TYPE_MAX_VALUE (index)
7123 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7124 || !TYPE_MIN_VALUE (index)
7125 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7126 || count < 0)
7127 return -1;
7129 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7130 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7132 /* There must be no padding. */
7133 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7134 return -1;
7136 return count;
7139 case RECORD_TYPE:
7141 int count = 0;
7142 int sub_count;
7143 tree field;
7145 /* Can't handle incomplete types nor sizes that are not
7146 fixed. */
7147 if (!COMPLETE_TYPE_P (type)
7148 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7149 return -1;
7151 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7153 if (TREE_CODE (field) != FIELD_DECL)
7154 continue;
7156 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7157 if (sub_count < 0)
7158 return -1;
7159 count += sub_count;
7162 /* There must be no padding. */
7163 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7164 return -1;
7166 return count;
7169 case UNION_TYPE:
7170 case QUAL_UNION_TYPE:
7172 /* These aren't very interesting except in a degenerate case. */
7173 int count = 0;
7174 int sub_count;
7175 tree field;
7177 /* Can't handle incomplete types nor sizes that are not
7178 fixed. */
7179 if (!COMPLETE_TYPE_P (type)
7180 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7181 return -1;
7183 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7185 if (TREE_CODE (field) != FIELD_DECL)
7186 continue;
7188 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7189 if (sub_count < 0)
7190 return -1;
7191 count = count > sub_count ? count : sub_count;
7194 /* There must be no padding. */
7195 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7196 return -1;
7198 return count;
7201 default:
7202 break;
7205 return -1;
7208 /* Return true if we use LRA instead of reload pass. */
7209 static bool
7210 aarch64_lra_p (void)
7212 return aarch64_lra_flag;
7215 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7216 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7217 array types. The C99 floating-point complex types are also considered
7218 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7219 types, which are GCC extensions and out of the scope of AAPCS64, are
7220 treated as composite types here as well.
7222 Note that MODE itself is not sufficient in determining whether a type
7223 is such a composite type or not. This is because
7224 stor-layout.c:compute_record_mode may have already changed the MODE
7225 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7226 structure with only one field may have its MODE set to the mode of the
7227 field. Also an integer mode whose size matches the size of the
7228 RECORD_TYPE type may be used to substitute the original mode
7229 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7230 solely relied on. */
7232 static bool
7233 aarch64_composite_type_p (const_tree type,
7234 enum machine_mode mode)
7236 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7237 return true;
7239 if (mode == BLKmode
7240 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7241 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7242 return true;
7244 return false;
7247 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7248 type as described in AAPCS64 \S 4.1.2.
7250 See the comment above aarch64_composite_type_p for the notes on MODE. */
7252 static bool
7253 aarch64_short_vector_p (const_tree type,
7254 enum machine_mode mode)
7256 HOST_WIDE_INT size = -1;
7258 if (type && TREE_CODE (type) == VECTOR_TYPE)
7259 size = int_size_in_bytes (type);
7260 else if (!aarch64_composite_type_p (type, mode)
7261 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7262 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7263 size = GET_MODE_SIZE (mode);
7265 return (size == 8 || size == 16) ? true : false;
7268 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7269 shall be passed or returned in simd/fp register(s) (providing these
7270 parameter passing registers are available).
7272 Upon successful return, *COUNT returns the number of needed registers,
7273 *BASE_MODE returns the mode of the individual register and when IS_HAF
7274 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7275 floating-point aggregate or a homogeneous short-vector aggregate. */
7277 static bool
7278 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7279 const_tree type,
7280 enum machine_mode *base_mode,
7281 int *count,
7282 bool *is_ha)
7284 enum machine_mode new_mode = VOIDmode;
7285 bool composite_p = aarch64_composite_type_p (type, mode);
7287 if (is_ha != NULL) *is_ha = false;
7289 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7290 || aarch64_short_vector_p (type, mode))
7292 *count = 1;
7293 new_mode = mode;
7295 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7297 if (is_ha != NULL) *is_ha = true;
7298 *count = 2;
7299 new_mode = GET_MODE_INNER (mode);
7301 else if (type && composite_p)
7303 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7305 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7307 if (is_ha != NULL) *is_ha = true;
7308 *count = ag_count;
7310 else
7311 return false;
7313 else
7314 return false;
7316 *base_mode = new_mode;
7317 return true;
7320 /* Implement TARGET_STRUCT_VALUE_RTX. */
7322 static rtx
7323 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7324 int incoming ATTRIBUTE_UNUSED)
7326 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7329 /* Implements target hook vector_mode_supported_p. */
7330 static bool
7331 aarch64_vector_mode_supported_p (enum machine_mode mode)
7333 if (TARGET_SIMD
7334 && (mode == V4SImode || mode == V8HImode
7335 || mode == V16QImode || mode == V2DImode
7336 || mode == V2SImode || mode == V4HImode
7337 || mode == V8QImode || mode == V2SFmode
7338 || mode == V4SFmode || mode == V2DFmode
7339 || mode == V1DFmode))
7340 return true;
7342 return false;
7345 /* Return appropriate SIMD container
7346 for MODE within a vector of WIDTH bits. */
7347 static enum machine_mode
7348 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
7350 gcc_assert (width == 64 || width == 128);
7351 if (TARGET_SIMD)
7353 if (width == 128)
7354 switch (mode)
7356 case DFmode:
7357 return V2DFmode;
7358 case SFmode:
7359 return V4SFmode;
7360 case SImode:
7361 return V4SImode;
7362 case HImode:
7363 return V8HImode;
7364 case QImode:
7365 return V16QImode;
7366 case DImode:
7367 return V2DImode;
7368 default:
7369 break;
7371 else
7372 switch (mode)
7374 case SFmode:
7375 return V2SFmode;
7376 case SImode:
7377 return V2SImode;
7378 case HImode:
7379 return V4HImode;
7380 case QImode:
7381 return V8QImode;
7382 default:
7383 break;
7386 return word_mode;
7389 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7390 static enum machine_mode
7391 aarch64_preferred_simd_mode (enum machine_mode mode)
7393 return aarch64_simd_container_mode (mode, 128);
7396 /* Return the bitmask of possible vector sizes for the vectorizer
7397 to iterate over. */
7398 static unsigned int
7399 aarch64_autovectorize_vector_sizes (void)
7401 return (16 | 8);
7404 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7405 vector types in order to conform to the AAPCS64 (see "Procedure
7406 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7407 qualify for emission with the mangled names defined in that document,
7408 a vector type must not only be of the correct mode but also be
7409 composed of AdvSIMD vector element types (e.g.
7410 _builtin_aarch64_simd_qi); these types are registered by
7411 aarch64_init_simd_builtins (). In other words, vector types defined
7412 in other ways e.g. via vector_size attribute will get default
7413 mangled names. */
7414 typedef struct
7416 enum machine_mode mode;
7417 const char *element_type_name;
7418 const char *mangled_name;
7419 } aarch64_simd_mangle_map_entry;
7421 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7422 /* 64-bit containerized types. */
7423 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7424 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7425 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7426 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7427 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7428 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7429 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7430 { DImode, "__builtin_aarch64_simd_di", "11__Int64x1_t" },
7431 { DImode, "__builtin_aarch64_simd_udi", "12__Uint64x1_t" },
7432 { V1DFmode, "__builtin_aarch64_simd_df", "13__Float64x1_t" },
7433 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7434 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7435 /* 128-bit containerized types. */
7436 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7437 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7438 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7439 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7440 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7441 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7442 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7443 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7444 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7445 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7446 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7447 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7448 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7449 { VOIDmode, NULL, NULL }
7452 /* Implement TARGET_MANGLE_TYPE. */
7454 static const char *
7455 aarch64_mangle_type (const_tree type)
7457 /* The AArch64 ABI documents say that "__va_list" has to be
7458 managled as if it is in the "std" namespace. */
7459 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7460 return "St9__va_list";
7462 /* Check the mode of the vector type, and the name of the vector
7463 element type, against the table. */
7464 if (TREE_CODE (type) == VECTOR_TYPE)
7466 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7468 while (pos->mode != VOIDmode)
7470 tree elt_type = TREE_TYPE (type);
7472 if (pos->mode == TYPE_MODE (type)
7473 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7474 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7475 pos->element_type_name))
7476 return pos->mangled_name;
7478 pos++;
7482 /* Use the default mangling. */
7483 return NULL;
7486 /* Return the equivalent letter for size. */
7487 static char
7488 sizetochar (int size)
7490 switch (size)
7492 case 64: return 'd';
7493 case 32: return 's';
7494 case 16: return 'h';
7495 case 8 : return 'b';
7496 default: gcc_unreachable ();
7500 /* Return true iff x is a uniform vector of floating-point
7501 constants, and the constant can be represented in
7502 quarter-precision form. Note, as aarch64_float_const_representable
7503 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7504 static bool
7505 aarch64_vect_float_const_representable_p (rtx x)
7507 int i = 0;
7508 REAL_VALUE_TYPE r0, ri;
7509 rtx x0, xi;
7511 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7512 return false;
7514 x0 = CONST_VECTOR_ELT (x, 0);
7515 if (!CONST_DOUBLE_P (x0))
7516 return false;
7518 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7520 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7522 xi = CONST_VECTOR_ELT (x, i);
7523 if (!CONST_DOUBLE_P (xi))
7524 return false;
7526 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7527 if (!REAL_VALUES_EQUAL (r0, ri))
7528 return false;
7531 return aarch64_float_const_representable_p (x0);
7534 /* Return true for valid and false for invalid. */
7535 bool
7536 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7537 struct simd_immediate_info *info)
7539 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7540 matches = 1; \
7541 for (i = 0; i < idx; i += (STRIDE)) \
7542 if (!(TEST)) \
7543 matches = 0; \
7544 if (matches) \
7546 immtype = (CLASS); \
7547 elsize = (ELSIZE); \
7548 eshift = (SHIFT); \
7549 emvn = (NEG); \
7550 break; \
7553 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7554 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7555 unsigned char bytes[16];
7556 int immtype = -1, matches;
7557 unsigned int invmask = inverse ? 0xff : 0;
7558 int eshift, emvn;
7560 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7562 if (! (aarch64_simd_imm_zero_p (op, mode)
7563 || aarch64_vect_float_const_representable_p (op)))
7564 return false;
7566 if (info)
7568 info->value = CONST_VECTOR_ELT (op, 0);
7569 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7570 info->mvn = false;
7571 info->shift = 0;
7574 return true;
7577 /* Splat vector constant out into a byte vector. */
7578 for (i = 0; i < n_elts; i++)
7580 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7581 it must be laid out in the vector register in reverse order. */
7582 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7583 unsigned HOST_WIDE_INT elpart;
7584 unsigned int part, parts;
7586 if (GET_CODE (el) == CONST_INT)
7588 elpart = INTVAL (el);
7589 parts = 1;
7591 else if (GET_CODE (el) == CONST_DOUBLE)
7593 elpart = CONST_DOUBLE_LOW (el);
7594 parts = 2;
7596 else
7597 gcc_unreachable ();
7599 for (part = 0; part < parts; part++)
7601 unsigned int byte;
7602 for (byte = 0; byte < innersize; byte++)
7604 bytes[idx++] = (elpart & 0xff) ^ invmask;
7605 elpart >>= BITS_PER_UNIT;
7607 if (GET_CODE (el) == CONST_DOUBLE)
7608 elpart = CONST_DOUBLE_HIGH (el);
7612 /* Sanity check. */
7613 gcc_assert (idx == GET_MODE_SIZE (mode));
7617 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7618 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7620 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7621 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7623 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7624 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7626 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7627 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7629 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7631 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7633 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7634 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7636 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7637 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7639 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7640 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7642 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7643 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7645 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7647 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7649 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7650 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7652 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7653 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7655 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7656 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7658 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7659 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7661 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7663 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7664 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7666 while (0);
7668 if (immtype == -1)
7669 return false;
7671 if (info)
7673 info->element_width = elsize;
7674 info->mvn = emvn != 0;
7675 info->shift = eshift;
7677 unsigned HOST_WIDE_INT imm = 0;
7679 if (immtype >= 12 && immtype <= 15)
7680 info->msl = true;
7682 /* Un-invert bytes of recognized vector, if necessary. */
7683 if (invmask != 0)
7684 for (i = 0; i < idx; i++)
7685 bytes[i] ^= invmask;
7687 if (immtype == 17)
7689 /* FIXME: Broken on 32-bit H_W_I hosts. */
7690 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7692 for (i = 0; i < 8; i++)
7693 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7694 << (i * BITS_PER_UNIT);
7697 info->value = GEN_INT (imm);
7699 else
7701 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7702 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7704 /* Construct 'abcdefgh' because the assembler cannot handle
7705 generic constants. */
7706 if (info->mvn)
7707 imm = ~imm;
7708 imm = (imm >> info->shift) & 0xff;
7709 info->value = GEN_INT (imm);
7713 return true;
7714 #undef CHECK
7717 static bool
7718 aarch64_const_vec_all_same_int_p (rtx x,
7719 HOST_WIDE_INT minval,
7720 HOST_WIDE_INT maxval)
7722 HOST_WIDE_INT firstval;
7723 int count, i;
7725 if (GET_CODE (x) != CONST_VECTOR
7726 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7727 return false;
7729 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7730 if (firstval < minval || firstval > maxval)
7731 return false;
7733 count = CONST_VECTOR_NUNITS (x);
7734 for (i = 1; i < count; i++)
7735 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7736 return false;
7738 return true;
7741 /* Check of immediate shift constants are within range. */
7742 bool
7743 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7745 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7746 if (left)
7747 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7748 else
7749 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7752 /* Return true if X is a uniform vector where all elements
7753 are either the floating-point constant 0.0 or the
7754 integer constant 0. */
7755 bool
7756 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7758 return x == CONST0_RTX (mode);
7761 bool
7762 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7764 HOST_WIDE_INT imm = INTVAL (x);
7765 int i;
7767 for (i = 0; i < 8; i++)
7769 unsigned int byte = imm & 0xff;
7770 if (byte != 0xff && byte != 0)
7771 return false;
7772 imm >>= 8;
7775 return true;
7778 bool
7779 aarch64_mov_operand_p (rtx x,
7780 enum aarch64_symbol_context context,
7781 enum machine_mode mode)
7783 if (GET_CODE (x) == HIGH
7784 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7785 return true;
7787 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7788 return true;
7790 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7791 return true;
7793 return aarch64_classify_symbolic_expression (x, context)
7794 == SYMBOL_TINY_ABSOLUTE;
7797 /* Return a const_int vector of VAL. */
7799 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7801 int nunits = GET_MODE_NUNITS (mode);
7802 rtvec v = rtvec_alloc (nunits);
7803 int i;
7805 for (i=0; i < nunits; i++)
7806 RTVEC_ELT (v, i) = GEN_INT (val);
7808 return gen_rtx_CONST_VECTOR (mode, v);
7811 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7813 bool
7814 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7816 enum machine_mode vmode;
7818 gcc_assert (!VECTOR_MODE_P (mode));
7819 vmode = aarch64_preferred_simd_mode (mode);
7820 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7821 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7824 /* Construct and return a PARALLEL RTX vector. */
7826 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7828 int nunits = GET_MODE_NUNITS (mode);
7829 rtvec v = rtvec_alloc (nunits / 2);
7830 int base = high ? nunits / 2 : 0;
7831 rtx t1;
7832 int i;
7834 for (i=0; i < nunits / 2; i++)
7835 RTVEC_ELT (v, i) = GEN_INT (base + i);
7837 t1 = gen_rtx_PARALLEL (mode, v);
7838 return t1;
7841 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7842 HIGH (exclusive). */
7843 void
7844 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7846 HOST_WIDE_INT lane;
7847 gcc_assert (GET_CODE (operand) == CONST_INT);
7848 lane = INTVAL (operand);
7850 if (lane < low || lane >= high)
7851 error ("lane out of range");
7854 void
7855 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7857 gcc_assert (GET_CODE (operand) == CONST_INT);
7858 HOST_WIDE_INT lane = INTVAL (operand);
7860 if (lane < low || lane >= high)
7861 error ("constant out of range");
7864 /* Emit code to reinterpret one AdvSIMD type as another,
7865 without altering bits. */
7866 void
7867 aarch64_simd_reinterpret (rtx dest, rtx src)
7869 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7872 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7873 registers). */
7874 void
7875 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7876 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7877 rtx op1)
7879 rtx mem = gen_rtx_MEM (mode, destaddr);
7880 rtx tmp1 = gen_reg_rtx (mode);
7881 rtx tmp2 = gen_reg_rtx (mode);
7883 emit_insn (intfn (tmp1, op1, tmp2));
7885 emit_move_insn (mem, tmp1);
7886 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7887 emit_move_insn (mem, tmp2);
7890 /* Return TRUE if OP is a valid vector addressing mode. */
7891 bool
7892 aarch64_simd_mem_operand_p (rtx op)
7894 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7895 || GET_CODE (XEXP (op, 0)) == REG);
7898 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7899 not to early-clobber SRC registers in the process.
7901 We assume that the operands described by SRC and DEST represent a
7902 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7903 number of components into which the copy has been decomposed. */
7904 void
7905 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7906 rtx *src, unsigned int count)
7908 unsigned int i;
7910 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7911 || REGNO (operands[0]) < REGNO (operands[1]))
7913 for (i = 0; i < count; i++)
7915 operands[2 * i] = dest[i];
7916 operands[2 * i + 1] = src[i];
7919 else
7921 for (i = 0; i < count; i++)
7923 operands[2 * i] = dest[count - i - 1];
7924 operands[2 * i + 1] = src[count - i - 1];
7929 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7930 one of VSTRUCT modes: OI, CI or XI. */
7932 aarch64_simd_attr_length_move (rtx insn)
7934 enum machine_mode mode;
7936 extract_insn_cached (insn);
7938 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7940 mode = GET_MODE (recog_data.operand[0]);
7941 switch (mode)
7943 case OImode:
7944 return 8;
7945 case CImode:
7946 return 12;
7947 case XImode:
7948 return 16;
7949 default:
7950 gcc_unreachable ();
7953 return 4;
7956 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7957 alignment of a vector to 128 bits. */
7958 static HOST_WIDE_INT
7959 aarch64_simd_vector_alignment (const_tree type)
7961 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7962 return MIN (align, 128);
7965 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7966 static bool
7967 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7969 if (is_packed)
7970 return false;
7972 /* We guarantee alignment for vectors up to 128-bits. */
7973 if (tree_int_cst_compare (TYPE_SIZE (type),
7974 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7975 return false;
7977 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7978 return true;
7981 /* If VALS is a vector constant that can be loaded into a register
7982 using DUP, generate instructions to do so and return an RTX to
7983 assign to the register. Otherwise return NULL_RTX. */
7984 static rtx
7985 aarch64_simd_dup_constant (rtx vals)
7987 enum machine_mode mode = GET_MODE (vals);
7988 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7989 int n_elts = GET_MODE_NUNITS (mode);
7990 bool all_same = true;
7991 rtx x;
7992 int i;
7994 if (GET_CODE (vals) != CONST_VECTOR)
7995 return NULL_RTX;
7997 for (i = 1; i < n_elts; ++i)
7999 x = CONST_VECTOR_ELT (vals, i);
8000 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8001 all_same = false;
8004 if (!all_same)
8005 return NULL_RTX;
8007 /* We can load this constant by using DUP and a constant in a
8008 single ARM register. This will be cheaper than a vector
8009 load. */
8010 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8011 return gen_rtx_VEC_DUPLICATE (mode, x);
8015 /* Generate code to load VALS, which is a PARALLEL containing only
8016 constants (for vec_init) or CONST_VECTOR, efficiently into a
8017 register. Returns an RTX to copy into the register, or NULL_RTX
8018 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8019 static rtx
8020 aarch64_simd_make_constant (rtx vals)
8022 enum machine_mode mode = GET_MODE (vals);
8023 rtx const_dup;
8024 rtx const_vec = NULL_RTX;
8025 int n_elts = GET_MODE_NUNITS (mode);
8026 int n_const = 0;
8027 int i;
8029 if (GET_CODE (vals) == CONST_VECTOR)
8030 const_vec = vals;
8031 else if (GET_CODE (vals) == PARALLEL)
8033 /* A CONST_VECTOR must contain only CONST_INTs and
8034 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8035 Only store valid constants in a CONST_VECTOR. */
8036 for (i = 0; i < n_elts; ++i)
8038 rtx x = XVECEXP (vals, 0, i);
8039 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8040 n_const++;
8042 if (n_const == n_elts)
8043 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8045 else
8046 gcc_unreachable ();
8048 if (const_vec != NULL_RTX
8049 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8050 /* Load using MOVI/MVNI. */
8051 return const_vec;
8052 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8053 /* Loaded using DUP. */
8054 return const_dup;
8055 else if (const_vec != NULL_RTX)
8056 /* Load from constant pool. We can not take advantage of single-cycle
8057 LD1 because we need a PC-relative addressing mode. */
8058 return const_vec;
8059 else
8060 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8061 We can not construct an initializer. */
8062 return NULL_RTX;
8065 void
8066 aarch64_expand_vector_init (rtx target, rtx vals)
8068 enum machine_mode mode = GET_MODE (target);
8069 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8070 int n_elts = GET_MODE_NUNITS (mode);
8071 int n_var = 0, one_var = -1;
8072 bool all_same = true;
8073 rtx x, mem;
8074 int i;
8076 x = XVECEXP (vals, 0, 0);
8077 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8078 n_var = 1, one_var = 0;
8080 for (i = 1; i < n_elts; ++i)
8082 x = XVECEXP (vals, 0, i);
8083 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8084 ++n_var, one_var = i;
8086 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8087 all_same = false;
8090 if (n_var == 0)
8092 rtx constant = aarch64_simd_make_constant (vals);
8093 if (constant != NULL_RTX)
8095 emit_move_insn (target, constant);
8096 return;
8100 /* Splat a single non-constant element if we can. */
8101 if (all_same)
8103 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8104 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8105 return;
8108 /* One field is non-constant. Load constant then overwrite varying
8109 field. This is more efficient than using the stack. */
8110 if (n_var == 1)
8112 rtx copy = copy_rtx (vals);
8113 rtx index = GEN_INT (one_var);
8114 enum insn_code icode;
8116 /* Load constant part of vector, substitute neighboring value for
8117 varying element. */
8118 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8119 aarch64_expand_vector_init (target, copy);
8121 /* Insert variable. */
8122 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8123 icode = optab_handler (vec_set_optab, mode);
8124 gcc_assert (icode != CODE_FOR_nothing);
8125 emit_insn (GEN_FCN (icode) (target, x, index));
8126 return;
8129 /* Construct the vector in memory one field at a time
8130 and load the whole vector. */
8131 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8132 for (i = 0; i < n_elts; i++)
8133 emit_move_insn (adjust_address_nv (mem, inner_mode,
8134 i * GET_MODE_SIZE (inner_mode)),
8135 XVECEXP (vals, 0, i));
8136 emit_move_insn (target, mem);
8140 static unsigned HOST_WIDE_INT
8141 aarch64_shift_truncation_mask (enum machine_mode mode)
8143 return
8144 (aarch64_vector_mode_supported_p (mode)
8145 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8148 #ifndef TLS_SECTION_ASM_FLAG
8149 #define TLS_SECTION_ASM_FLAG 'T'
8150 #endif
8152 void
8153 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8154 tree decl ATTRIBUTE_UNUSED)
8156 char flagchars[10], *f = flagchars;
8158 /* If we have already declared this section, we can use an
8159 abbreviated form to switch back to it -- unless this section is
8160 part of a COMDAT groups, in which case GAS requires the full
8161 declaration every time. */
8162 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8163 && (flags & SECTION_DECLARED))
8165 fprintf (asm_out_file, "\t.section\t%s\n", name);
8166 return;
8169 if (!(flags & SECTION_DEBUG))
8170 *f++ = 'a';
8171 if (flags & SECTION_WRITE)
8172 *f++ = 'w';
8173 if (flags & SECTION_CODE)
8174 *f++ = 'x';
8175 if (flags & SECTION_SMALL)
8176 *f++ = 's';
8177 if (flags & SECTION_MERGE)
8178 *f++ = 'M';
8179 if (flags & SECTION_STRINGS)
8180 *f++ = 'S';
8181 if (flags & SECTION_TLS)
8182 *f++ = TLS_SECTION_ASM_FLAG;
8183 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8184 *f++ = 'G';
8185 *f = '\0';
8187 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8189 if (!(flags & SECTION_NOTYPE))
8191 const char *type;
8192 const char *format;
8194 if (flags & SECTION_BSS)
8195 type = "nobits";
8196 else
8197 type = "progbits";
8199 #ifdef TYPE_OPERAND_FMT
8200 format = "," TYPE_OPERAND_FMT;
8201 #else
8202 format = ",@%s";
8203 #endif
8205 fprintf (asm_out_file, format, type);
8207 if (flags & SECTION_ENTSIZE)
8208 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8209 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8211 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8212 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8213 else
8214 fprintf (asm_out_file, ",%s,comdat",
8215 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8219 putc ('\n', asm_out_file);
8222 /* Select a format to encode pointers in exception handling data. */
8224 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8226 int type;
8227 switch (aarch64_cmodel)
8229 case AARCH64_CMODEL_TINY:
8230 case AARCH64_CMODEL_TINY_PIC:
8231 case AARCH64_CMODEL_SMALL:
8232 case AARCH64_CMODEL_SMALL_PIC:
8233 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8234 for everything. */
8235 type = DW_EH_PE_sdata4;
8236 break;
8237 default:
8238 /* No assumptions here. 8-byte relocs required. */
8239 type = DW_EH_PE_sdata8;
8240 break;
8242 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8245 /* Emit load exclusive. */
8247 static void
8248 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8249 rtx mem, rtx model_rtx)
8251 rtx (*gen) (rtx, rtx, rtx);
8253 switch (mode)
8255 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8256 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8257 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8258 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8259 default:
8260 gcc_unreachable ();
8263 emit_insn (gen (rval, mem, model_rtx));
8266 /* Emit store exclusive. */
8268 static void
8269 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8270 rtx rval, rtx mem, rtx model_rtx)
8272 rtx (*gen) (rtx, rtx, rtx, rtx);
8274 switch (mode)
8276 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8277 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8278 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8279 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8280 default:
8281 gcc_unreachable ();
8284 emit_insn (gen (bval, rval, mem, model_rtx));
8287 /* Mark the previous jump instruction as unlikely. */
8289 static void
8290 aarch64_emit_unlikely_jump (rtx insn)
8292 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8294 insn = emit_jump_insn (insn);
8295 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8298 /* Expand a compare and swap pattern. */
8300 void
8301 aarch64_expand_compare_and_swap (rtx operands[])
8303 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8304 enum machine_mode mode, cmp_mode;
8305 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8307 bval = operands[0];
8308 rval = operands[1];
8309 mem = operands[2];
8310 oldval = operands[3];
8311 newval = operands[4];
8312 is_weak = operands[5];
8313 mod_s = operands[6];
8314 mod_f = operands[7];
8315 mode = GET_MODE (mem);
8316 cmp_mode = mode;
8318 /* Normally the succ memory model must be stronger than fail, but in the
8319 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8320 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8322 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8323 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8324 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8326 switch (mode)
8328 case QImode:
8329 case HImode:
8330 /* For short modes, we're going to perform the comparison in SImode,
8331 so do the zero-extension now. */
8332 cmp_mode = SImode;
8333 rval = gen_reg_rtx (SImode);
8334 oldval = convert_modes (SImode, mode, oldval, true);
8335 /* Fall through. */
8337 case SImode:
8338 case DImode:
8339 /* Force the value into a register if needed. */
8340 if (!aarch64_plus_operand (oldval, mode))
8341 oldval = force_reg (cmp_mode, oldval);
8342 break;
8344 default:
8345 gcc_unreachable ();
8348 switch (mode)
8350 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8351 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8352 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8353 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8354 default:
8355 gcc_unreachable ();
8358 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8360 if (mode == QImode || mode == HImode)
8361 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8363 x = gen_rtx_REG (CCmode, CC_REGNUM);
8364 x = gen_rtx_EQ (SImode, x, const0_rtx);
8365 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8368 /* Split a compare and swap pattern. */
8370 void
8371 aarch64_split_compare_and_swap (rtx operands[])
8373 rtx rval, mem, oldval, newval, scratch;
8374 enum machine_mode mode;
8375 bool is_weak;
8376 rtx label1, label2, x, cond;
8378 rval = operands[0];
8379 mem = operands[1];
8380 oldval = operands[2];
8381 newval = operands[3];
8382 is_weak = (operands[4] != const0_rtx);
8383 scratch = operands[7];
8384 mode = GET_MODE (mem);
8386 label1 = NULL_RTX;
8387 if (!is_weak)
8389 label1 = gen_label_rtx ();
8390 emit_label (label1);
8392 label2 = gen_label_rtx ();
8394 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8396 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8397 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8398 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8399 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8400 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8402 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8404 if (!is_weak)
8406 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8407 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8408 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8409 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8411 else
8413 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8414 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8415 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8418 emit_label (label2);
8421 /* Split an atomic operation. */
8423 void
8424 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8425 rtx value, rtx model_rtx, rtx cond)
8427 enum machine_mode mode = GET_MODE (mem);
8428 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8429 rtx label, x;
8431 label = gen_label_rtx ();
8432 emit_label (label);
8434 if (new_out)
8435 new_out = gen_lowpart (wmode, new_out);
8436 if (old_out)
8437 old_out = gen_lowpart (wmode, old_out);
8438 else
8439 old_out = new_out;
8440 value = simplify_gen_subreg (wmode, value, mode, 0);
8442 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8444 switch (code)
8446 case SET:
8447 new_out = value;
8448 break;
8450 case NOT:
8451 x = gen_rtx_AND (wmode, old_out, value);
8452 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8453 x = gen_rtx_NOT (wmode, new_out);
8454 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8455 break;
8457 case MINUS:
8458 if (CONST_INT_P (value))
8460 value = GEN_INT (-INTVAL (value));
8461 code = PLUS;
8463 /* Fall through. */
8465 default:
8466 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8467 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8468 break;
8471 aarch64_emit_store_exclusive (mode, cond, mem,
8472 gen_lowpart (mode, new_out), model_rtx);
8474 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8475 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8476 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8477 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8480 static void
8481 aarch64_print_extension (void)
8483 const struct aarch64_option_extension *opt = NULL;
8485 for (opt = all_extensions; opt->name != NULL; opt++)
8486 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8487 asm_fprintf (asm_out_file, "+%s", opt->name);
8489 asm_fprintf (asm_out_file, "\n");
8492 static void
8493 aarch64_start_file (void)
8495 if (selected_arch)
8497 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8498 aarch64_print_extension ();
8500 else if (selected_cpu)
8502 const char *truncated_name
8503 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8504 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8505 aarch64_print_extension ();
8507 default_file_start();
8510 /* Target hook for c_mode_for_suffix. */
8511 static enum machine_mode
8512 aarch64_c_mode_for_suffix (char suffix)
8514 if (suffix == 'q')
8515 return TFmode;
8517 return VOIDmode;
8520 /* We can only represent floating point constants which will fit in
8521 "quarter-precision" values. These values are characterised by
8522 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8525 (-1)^s * (n/16) * 2^r
8527 Where:
8528 's' is the sign bit.
8529 'n' is an integer in the range 16 <= n <= 31.
8530 'r' is an integer in the range -3 <= r <= 4. */
8532 /* Return true iff X can be represented by a quarter-precision
8533 floating point immediate operand X. Note, we cannot represent 0.0. */
8534 bool
8535 aarch64_float_const_representable_p (rtx x)
8537 /* This represents our current view of how many bits
8538 make up the mantissa. */
8539 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8540 int exponent;
8541 unsigned HOST_WIDE_INT mantissa, mask;
8542 REAL_VALUE_TYPE r, m;
8543 bool fail;
8545 if (!CONST_DOUBLE_P (x))
8546 return false;
8548 if (GET_MODE (x) == VOIDmode)
8549 return false;
8551 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8553 /* We cannot represent infinities, NaNs or +/-zero. We won't
8554 know if we have +zero until we analyse the mantissa, but we
8555 can reject the other invalid values. */
8556 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8557 || REAL_VALUE_MINUS_ZERO (r))
8558 return false;
8560 /* Extract exponent. */
8561 r = real_value_abs (&r);
8562 exponent = REAL_EXP (&r);
8564 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8565 highest (sign) bit, with a fixed binary point at bit point_pos.
8566 m1 holds the low part of the mantissa, m2 the high part.
8567 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8568 bits for the mantissa, this can fail (low bits will be lost). */
8569 real_ldexp (&m, &r, point_pos - exponent);
8570 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
8572 /* If the low part of the mantissa has bits set we cannot represent
8573 the value. */
8574 if (w.elt (0) != 0)
8575 return false;
8576 /* We have rejected the lower HOST_WIDE_INT, so update our
8577 understanding of how many bits lie in the mantissa and
8578 look only at the high HOST_WIDE_INT. */
8579 mantissa = w.elt (1);
8580 point_pos -= HOST_BITS_PER_WIDE_INT;
8582 /* We can only represent values with a mantissa of the form 1.xxxx. */
8583 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8584 if ((mantissa & mask) != 0)
8585 return false;
8587 /* Having filtered unrepresentable values, we may now remove all
8588 but the highest 5 bits. */
8589 mantissa >>= point_pos - 5;
8591 /* We cannot represent the value 0.0, so reject it. This is handled
8592 elsewhere. */
8593 if (mantissa == 0)
8594 return false;
8596 /* Then, as bit 4 is always set, we can mask it off, leaving
8597 the mantissa in the range [0, 15]. */
8598 mantissa &= ~(1 << 4);
8599 gcc_assert (mantissa <= 15);
8601 /* GCC internally does not use IEEE754-like encoding (where normalized
8602 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8603 Our mantissa values are shifted 4 places to the left relative to
8604 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8605 by 5 places to correct for GCC's representation. */
8606 exponent = 5 - exponent;
8608 return (exponent >= 0 && exponent <= 7);
8611 char*
8612 aarch64_output_simd_mov_immediate (rtx const_vector,
8613 enum machine_mode mode,
8614 unsigned width)
8616 bool is_valid;
8617 static char templ[40];
8618 const char *mnemonic;
8619 const char *shift_op;
8620 unsigned int lane_count = 0;
8621 char element_char;
8623 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8625 /* This will return true to show const_vector is legal for use as either
8626 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8627 also update INFO to show how the immediate should be generated. */
8628 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8629 gcc_assert (is_valid);
8631 element_char = sizetochar (info.element_width);
8632 lane_count = width / info.element_width;
8634 mode = GET_MODE_INNER (mode);
8635 if (mode == SFmode || mode == DFmode)
8637 gcc_assert (info.shift == 0 && ! info.mvn);
8638 if (aarch64_float_const_zero_rtx_p (info.value))
8639 info.value = GEN_INT (0);
8640 else
8642 #define buf_size 20
8643 REAL_VALUE_TYPE r;
8644 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8645 char float_buf[buf_size] = {'\0'};
8646 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8647 #undef buf_size
8649 if (lane_count == 1)
8650 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8651 else
8652 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8653 lane_count, element_char, float_buf);
8654 return templ;
8658 mnemonic = info.mvn ? "mvni" : "movi";
8659 shift_op = info.msl ? "msl" : "lsl";
8661 if (lane_count == 1)
8662 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8663 mnemonic, UINTVAL (info.value));
8664 else if (info.shift)
8665 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8666 ", %s %d", mnemonic, lane_count, element_char,
8667 UINTVAL (info.value), shift_op, info.shift);
8668 else
8669 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8670 mnemonic, lane_count, element_char, UINTVAL (info.value));
8671 return templ;
8674 char*
8675 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8676 enum machine_mode mode)
8678 enum machine_mode vmode;
8680 gcc_assert (!VECTOR_MODE_P (mode));
8681 vmode = aarch64_simd_container_mode (mode, 64);
8682 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8683 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8686 /* Split operands into moves from op[1] + op[2] into op[0]. */
8688 void
8689 aarch64_split_combinev16qi (rtx operands[3])
8691 unsigned int dest = REGNO (operands[0]);
8692 unsigned int src1 = REGNO (operands[1]);
8693 unsigned int src2 = REGNO (operands[2]);
8694 enum machine_mode halfmode = GET_MODE (operands[1]);
8695 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8696 rtx destlo, desthi;
8698 gcc_assert (halfmode == V16QImode);
8700 if (src1 == dest && src2 == dest + halfregs)
8702 /* No-op move. Can't split to nothing; emit something. */
8703 emit_note (NOTE_INSN_DELETED);
8704 return;
8707 /* Preserve register attributes for variable tracking. */
8708 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8709 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8710 GET_MODE_SIZE (halfmode));
8712 /* Special case of reversed high/low parts. */
8713 if (reg_overlap_mentioned_p (operands[2], destlo)
8714 && reg_overlap_mentioned_p (operands[1], desthi))
8716 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8717 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8718 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8720 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8722 /* Try to avoid unnecessary moves if part of the result
8723 is in the right place already. */
8724 if (src1 != dest)
8725 emit_move_insn (destlo, operands[1]);
8726 if (src2 != dest + halfregs)
8727 emit_move_insn (desthi, operands[2]);
8729 else
8731 if (src2 != dest + halfregs)
8732 emit_move_insn (desthi, operands[2]);
8733 if (src1 != dest)
8734 emit_move_insn (destlo, operands[1]);
8738 /* vec_perm support. */
8740 #define MAX_VECT_LEN 16
8742 struct expand_vec_perm_d
8744 rtx target, op0, op1;
8745 unsigned char perm[MAX_VECT_LEN];
8746 enum machine_mode vmode;
8747 unsigned char nelt;
8748 bool one_vector_p;
8749 bool testing_p;
8752 /* Generate a variable permutation. */
8754 static void
8755 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8757 enum machine_mode vmode = GET_MODE (target);
8758 bool one_vector_p = rtx_equal_p (op0, op1);
8760 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8761 gcc_checking_assert (GET_MODE (op0) == vmode);
8762 gcc_checking_assert (GET_MODE (op1) == vmode);
8763 gcc_checking_assert (GET_MODE (sel) == vmode);
8764 gcc_checking_assert (TARGET_SIMD);
8766 if (one_vector_p)
8768 if (vmode == V8QImode)
8770 /* Expand the argument to a V16QI mode by duplicating it. */
8771 rtx pair = gen_reg_rtx (V16QImode);
8772 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8773 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8775 else
8777 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8780 else
8782 rtx pair;
8784 if (vmode == V8QImode)
8786 pair = gen_reg_rtx (V16QImode);
8787 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8788 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8790 else
8792 pair = gen_reg_rtx (OImode);
8793 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8794 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8799 void
8800 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8802 enum machine_mode vmode = GET_MODE (target);
8803 unsigned int nelt = GET_MODE_NUNITS (vmode);
8804 bool one_vector_p = rtx_equal_p (op0, op1);
8805 rtx mask;
8807 /* The TBL instruction does not use a modulo index, so we must take care
8808 of that ourselves. */
8809 mask = aarch64_simd_gen_const_vector_dup (vmode,
8810 one_vector_p ? nelt - 1 : 2 * nelt - 1);
8811 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8813 /* For big-endian, we also need to reverse the index within the vector
8814 (but not which vector). */
8815 if (BYTES_BIG_ENDIAN)
8817 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
8818 if (!one_vector_p)
8819 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
8820 sel = expand_simple_binop (vmode, XOR, sel, mask,
8821 NULL, 0, OPTAB_LIB_WIDEN);
8823 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8826 /* Recognize patterns suitable for the TRN instructions. */
8827 static bool
8828 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8830 unsigned int i, odd, mask, nelt = d->nelt;
8831 rtx out, in0, in1, x;
8832 rtx (*gen) (rtx, rtx, rtx);
8833 enum machine_mode vmode = d->vmode;
8835 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8836 return false;
8838 /* Note that these are little-endian tests.
8839 We correct for big-endian later. */
8840 if (d->perm[0] == 0)
8841 odd = 0;
8842 else if (d->perm[0] == 1)
8843 odd = 1;
8844 else
8845 return false;
8846 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8848 for (i = 0; i < nelt; i += 2)
8850 if (d->perm[i] != i + odd)
8851 return false;
8852 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8853 return false;
8856 /* Success! */
8857 if (d->testing_p)
8858 return true;
8860 in0 = d->op0;
8861 in1 = d->op1;
8862 if (BYTES_BIG_ENDIAN)
8864 x = in0, in0 = in1, in1 = x;
8865 odd = !odd;
8867 out = d->target;
8869 if (odd)
8871 switch (vmode)
8873 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8874 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8875 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8876 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8877 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8878 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8879 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8880 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8881 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8882 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8883 default:
8884 return false;
8887 else
8889 switch (vmode)
8891 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8892 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8893 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8894 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8895 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8896 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8897 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8898 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8899 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8900 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8901 default:
8902 return false;
8906 emit_insn (gen (out, in0, in1));
8907 return true;
8910 /* Recognize patterns suitable for the UZP instructions. */
8911 static bool
8912 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8914 unsigned int i, odd, mask, nelt = d->nelt;
8915 rtx out, in0, in1, x;
8916 rtx (*gen) (rtx, rtx, rtx);
8917 enum machine_mode vmode = d->vmode;
8919 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8920 return false;
8922 /* Note that these are little-endian tests.
8923 We correct for big-endian later. */
8924 if (d->perm[0] == 0)
8925 odd = 0;
8926 else if (d->perm[0] == 1)
8927 odd = 1;
8928 else
8929 return false;
8930 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8932 for (i = 0; i < nelt; i++)
8934 unsigned elt = (i * 2 + odd) & mask;
8935 if (d->perm[i] != elt)
8936 return false;
8939 /* Success! */
8940 if (d->testing_p)
8941 return true;
8943 in0 = d->op0;
8944 in1 = d->op1;
8945 if (BYTES_BIG_ENDIAN)
8947 x = in0, in0 = in1, in1 = x;
8948 odd = !odd;
8950 out = d->target;
8952 if (odd)
8954 switch (vmode)
8956 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8957 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8958 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8959 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8960 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8961 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8962 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8963 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8964 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8965 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8966 default:
8967 return false;
8970 else
8972 switch (vmode)
8974 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8975 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8976 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8977 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8978 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8979 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8980 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8981 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8982 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8983 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8984 default:
8985 return false;
8989 emit_insn (gen (out, in0, in1));
8990 return true;
8993 /* Recognize patterns suitable for the ZIP instructions. */
8994 static bool
8995 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8997 unsigned int i, high, mask, nelt = d->nelt;
8998 rtx out, in0, in1, x;
8999 rtx (*gen) (rtx, rtx, rtx);
9000 enum machine_mode vmode = d->vmode;
9002 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9003 return false;
9005 /* Note that these are little-endian tests.
9006 We correct for big-endian later. */
9007 high = nelt / 2;
9008 if (d->perm[0] == high)
9009 /* Do Nothing. */
9011 else if (d->perm[0] == 0)
9012 high = 0;
9013 else
9014 return false;
9015 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9017 for (i = 0; i < nelt / 2; i++)
9019 unsigned elt = (i + high) & mask;
9020 if (d->perm[i * 2] != elt)
9021 return false;
9022 elt = (elt + nelt) & mask;
9023 if (d->perm[i * 2 + 1] != elt)
9024 return false;
9027 /* Success! */
9028 if (d->testing_p)
9029 return true;
9031 in0 = d->op0;
9032 in1 = d->op1;
9033 if (BYTES_BIG_ENDIAN)
9035 x = in0, in0 = in1, in1 = x;
9036 high = !high;
9038 out = d->target;
9040 if (high)
9042 switch (vmode)
9044 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9045 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9046 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9047 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9048 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9049 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9050 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9051 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9052 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9053 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9054 default:
9055 return false;
9058 else
9060 switch (vmode)
9062 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9063 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9064 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9065 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9066 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9067 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9068 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9069 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9070 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9071 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9072 default:
9073 return false;
9077 emit_insn (gen (out, in0, in1));
9078 return true;
9081 /* Recognize patterns for the EXT insn. */
9083 static bool
9084 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9086 unsigned int i, nelt = d->nelt;
9087 rtx (*gen) (rtx, rtx, rtx, rtx);
9088 rtx offset;
9090 unsigned int location = d->perm[0]; /* Always < nelt. */
9092 /* Check if the extracted indices are increasing by one. */
9093 for (i = 1; i < nelt; i++)
9095 unsigned int required = location + i;
9096 if (d->one_vector_p)
9098 /* We'll pass the same vector in twice, so allow indices to wrap. */
9099 required &= (nelt - 1);
9101 if (d->perm[i] != required)
9102 return false;
9105 switch (d->vmode)
9107 case V16QImode: gen = gen_aarch64_extv16qi; break;
9108 case V8QImode: gen = gen_aarch64_extv8qi; break;
9109 case V4HImode: gen = gen_aarch64_extv4hi; break;
9110 case V8HImode: gen = gen_aarch64_extv8hi; break;
9111 case V2SImode: gen = gen_aarch64_extv2si; break;
9112 case V4SImode: gen = gen_aarch64_extv4si; break;
9113 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9114 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9115 case V2DImode: gen = gen_aarch64_extv2di; break;
9116 case V2DFmode: gen = gen_aarch64_extv2df; break;
9117 default:
9118 return false;
9121 /* Success! */
9122 if (d->testing_p)
9123 return true;
9125 /* The case where (location == 0) is a no-op for both big- and little-endian,
9126 and is removed by the mid-end at optimization levels -O1 and higher. */
9128 if (BYTES_BIG_ENDIAN && (location != 0))
9130 /* After setup, we want the high elements of the first vector (stored
9131 at the LSB end of the register), and the low elements of the second
9132 vector (stored at the MSB end of the register). So swap. */
9133 rtx temp = d->op0;
9134 d->op0 = d->op1;
9135 d->op1 = temp;
9136 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9137 location = nelt - location;
9140 offset = GEN_INT (location);
9141 emit_insn (gen (d->target, d->op0, d->op1, offset));
9142 return true;
9145 /* Recognize patterns for the REV insns. */
9147 static bool
9148 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9150 unsigned int i, j, diff, nelt = d->nelt;
9151 rtx (*gen) (rtx, rtx);
9153 if (!d->one_vector_p)
9154 return false;
9156 diff = d->perm[0];
9157 switch (diff)
9159 case 7:
9160 switch (d->vmode)
9162 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9163 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9164 default:
9165 return false;
9167 break;
9168 case 3:
9169 switch (d->vmode)
9171 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9172 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9173 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9174 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9175 default:
9176 return false;
9178 break;
9179 case 1:
9180 switch (d->vmode)
9182 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9183 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9184 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9185 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9186 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9187 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9188 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9189 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9190 default:
9191 return false;
9193 break;
9194 default:
9195 return false;
9198 for (i = 0; i < nelt ; i += diff + 1)
9199 for (j = 0; j <= diff; j += 1)
9201 /* This is guaranteed to be true as the value of diff
9202 is 7, 3, 1 and we should have enough elements in the
9203 queue to generate this. Getting a vector mask with a
9204 value of diff other than these values implies that
9205 something is wrong by the time we get here. */
9206 gcc_assert (i + j < nelt);
9207 if (d->perm[i + j] != i + diff - j)
9208 return false;
9211 /* Success! */
9212 if (d->testing_p)
9213 return true;
9215 emit_insn (gen (d->target, d->op0));
9216 return true;
9219 static bool
9220 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9222 rtx (*gen) (rtx, rtx, rtx);
9223 rtx out = d->target;
9224 rtx in0;
9225 enum machine_mode vmode = d->vmode;
9226 unsigned int i, elt, nelt = d->nelt;
9227 rtx lane;
9229 /* TODO: This may not be big-endian safe. */
9230 if (BYTES_BIG_ENDIAN)
9231 return false;
9233 elt = d->perm[0];
9234 for (i = 1; i < nelt; i++)
9236 if (elt != d->perm[i])
9237 return false;
9240 /* The generic preparation in aarch64_expand_vec_perm_const_1
9241 swaps the operand order and the permute indices if it finds
9242 d->perm[0] to be in the second operand. Thus, we can always
9243 use d->op0 and need not do any extra arithmetic to get the
9244 correct lane number. */
9245 in0 = d->op0;
9246 lane = GEN_INT (elt);
9248 switch (vmode)
9250 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9251 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9252 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9253 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9254 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9255 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9256 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9257 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9258 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9259 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9260 default:
9261 return false;
9264 emit_insn (gen (out, in0, lane));
9265 return true;
9268 static bool
9269 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9271 rtx rperm[MAX_VECT_LEN], sel;
9272 enum machine_mode vmode = d->vmode;
9273 unsigned int i, nelt = d->nelt;
9275 if (d->testing_p)
9276 return true;
9278 /* Generic code will try constant permutation twice. Once with the
9279 original mode and again with the elements lowered to QImode.
9280 So wait and don't do the selector expansion ourselves. */
9281 if (vmode != V8QImode && vmode != V16QImode)
9282 return false;
9284 for (i = 0; i < nelt; ++i)
9286 int nunits = GET_MODE_NUNITS (vmode);
9288 /* If big-endian and two vectors we end up with a weird mixed-endian
9289 mode on NEON. Reverse the index within each word but not the word
9290 itself. */
9291 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9292 : d->perm[i]);
9294 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9295 sel = force_reg (vmode, sel);
9297 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9298 return true;
9301 static bool
9302 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9304 /* The pattern matching functions above are written to look for a small
9305 number to begin the sequence (0, 1, N/2). If we begin with an index
9306 from the second operand, we can swap the operands. */
9307 if (d->perm[0] >= d->nelt)
9309 unsigned i, nelt = d->nelt;
9310 rtx x;
9312 gcc_assert (nelt == (nelt & -nelt));
9313 for (i = 0; i < nelt; ++i)
9314 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
9316 x = d->op0;
9317 d->op0 = d->op1;
9318 d->op1 = x;
9321 if (TARGET_SIMD)
9323 if (aarch64_evpc_rev (d))
9324 return true;
9325 else if (aarch64_evpc_ext (d))
9326 return true;
9327 else if (aarch64_evpc_zip (d))
9328 return true;
9329 else if (aarch64_evpc_uzp (d))
9330 return true;
9331 else if (aarch64_evpc_trn (d))
9332 return true;
9333 else if (aarch64_evpc_dup (d))
9334 return true;
9335 return aarch64_evpc_tbl (d);
9337 return false;
9340 /* Expand a vec_perm_const pattern. */
9342 bool
9343 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9345 struct expand_vec_perm_d d;
9346 int i, nelt, which;
9348 d.target = target;
9349 d.op0 = op0;
9350 d.op1 = op1;
9352 d.vmode = GET_MODE (target);
9353 gcc_assert (VECTOR_MODE_P (d.vmode));
9354 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9355 d.testing_p = false;
9357 for (i = which = 0; i < nelt; ++i)
9359 rtx e = XVECEXP (sel, 0, i);
9360 int ei = INTVAL (e) & (2 * nelt - 1);
9361 which |= (ei < nelt ? 1 : 2);
9362 d.perm[i] = ei;
9365 switch (which)
9367 default:
9368 gcc_unreachable ();
9370 case 3:
9371 d.one_vector_p = false;
9372 if (!rtx_equal_p (op0, op1))
9373 break;
9375 /* The elements of PERM do not suggest that only the first operand
9376 is used, but both operands are identical. Allow easier matching
9377 of the permutation by folding the permutation into the single
9378 input vector. */
9379 /* Fall Through. */
9380 case 2:
9381 for (i = 0; i < nelt; ++i)
9382 d.perm[i] &= nelt - 1;
9383 d.op0 = op1;
9384 d.one_vector_p = true;
9385 break;
9387 case 1:
9388 d.op1 = op0;
9389 d.one_vector_p = true;
9390 break;
9393 return aarch64_expand_vec_perm_const_1 (&d);
9396 static bool
9397 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9398 const unsigned char *sel)
9400 struct expand_vec_perm_d d;
9401 unsigned int i, nelt, which;
9402 bool ret;
9404 d.vmode = vmode;
9405 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9406 d.testing_p = true;
9407 memcpy (d.perm, sel, nelt);
9409 /* Calculate whether all elements are in one vector. */
9410 for (i = which = 0; i < nelt; ++i)
9412 unsigned char e = d.perm[i];
9413 gcc_assert (e < 2 * nelt);
9414 which |= (e < nelt ? 1 : 2);
9417 /* If all elements are from the second vector, reindex as if from the
9418 first vector. */
9419 if (which == 2)
9420 for (i = 0; i < nelt; ++i)
9421 d.perm[i] -= nelt;
9423 /* Check whether the mask can be applied to a single vector. */
9424 d.one_vector_p = (which != 3);
9426 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9427 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9428 if (!d.one_vector_p)
9429 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9431 start_sequence ();
9432 ret = aarch64_expand_vec_perm_const_1 (&d);
9433 end_sequence ();
9435 return ret;
9438 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9439 bool
9440 aarch64_cannot_change_mode_class (enum machine_mode from,
9441 enum machine_mode to,
9442 enum reg_class rclass)
9444 /* Full-reg subregs are allowed on general regs or any class if they are
9445 the same size. */
9446 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9447 || !reg_classes_intersect_p (FP_REGS, rclass))
9448 return false;
9450 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9451 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9452 2. Scalar to Scalar for integer modes or same size float modes.
9453 3. Vector to Vector modes.
9454 4. On little-endian only, Vector-Structure to Vector modes. */
9455 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9457 if (aarch64_vector_mode_supported_p (from)
9458 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9459 return false;
9461 if (GET_MODE_NUNITS (from) == 1
9462 && GET_MODE_NUNITS (to) == 1
9463 && (GET_MODE_CLASS (from) == MODE_INT
9464 || from == to))
9465 return false;
9467 if (aarch64_vector_mode_supported_p (from)
9468 && aarch64_vector_mode_supported_p (to))
9469 return false;
9471 /* Within an vector structure straddling multiple vector registers
9472 we are in a mixed-endian representation. As such, we can't
9473 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9474 switch between vectors and vector structures cheaply. */
9475 if (!BYTES_BIG_ENDIAN)
9476 if ((aarch64_vector_mode_supported_p (from)
9477 && aarch64_vect_struct_mode_p (to))
9478 || (aarch64_vector_mode_supported_p (to)
9479 && aarch64_vect_struct_mode_p (from)))
9480 return false;
9483 return true;
9486 /* Implement MODES_TIEABLE_P. */
9488 bool
9489 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9491 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9492 return true;
9494 /* We specifically want to allow elements of "structure" modes to
9495 be tieable to the structure. This more general condition allows
9496 other rarer situations too. */
9497 if (TARGET_SIMD
9498 && aarch64_vector_mode_p (mode1)
9499 && aarch64_vector_mode_p (mode2))
9500 return true;
9502 return false;
9505 /* Return a new RTX holding the result of moving POINTER forward by
9506 AMOUNT bytes. */
9508 static rtx
9509 aarch64_move_pointer (rtx pointer, int amount)
9511 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9513 return adjust_automodify_address (pointer, GET_MODE (pointer),
9514 next, amount);
9517 /* Return a new RTX holding the result of moving POINTER forward by the
9518 size of the mode it points to. */
9520 static rtx
9521 aarch64_progress_pointer (rtx pointer)
9523 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9525 return aarch64_move_pointer (pointer, amount);
9528 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9529 MODE bytes. */
9531 static void
9532 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9533 enum machine_mode mode)
9535 rtx reg = gen_reg_rtx (mode);
9537 /* "Cast" the pointers to the correct mode. */
9538 *src = adjust_address (*src, mode, 0);
9539 *dst = adjust_address (*dst, mode, 0);
9540 /* Emit the memcpy. */
9541 emit_move_insn (reg, *src);
9542 emit_move_insn (*dst, reg);
9543 /* Move the pointers forward. */
9544 *src = aarch64_progress_pointer (*src);
9545 *dst = aarch64_progress_pointer (*dst);
9548 /* Expand movmem, as if from a __builtin_memcpy. Return true if
9549 we succeed, otherwise return false. */
9551 bool
9552 aarch64_expand_movmem (rtx *operands)
9554 unsigned int n;
9555 rtx dst = operands[0];
9556 rtx src = operands[1];
9557 rtx base;
9558 bool speed_p = !optimize_function_for_size_p (cfun);
9560 /* When optimizing for size, give a better estimate of the length of a
9561 memcpy call, but use the default otherwise. */
9562 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9564 /* We can't do anything smart if the amount to copy is not constant. */
9565 if (!CONST_INT_P (operands[2]))
9566 return false;
9568 n = UINTVAL (operands[2]);
9570 /* Try to keep the number of instructions low. For cases below 16 bytes we
9571 need to make at most two moves. For cases above 16 bytes it will be one
9572 move for each 16 byte chunk, then at most two additional moves. */
9573 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9574 return false;
9576 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9577 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9579 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9580 src = adjust_automodify_address (src, VOIDmode, base, 0);
9582 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9583 1-byte chunk. */
9584 if (n < 4)
9586 if (n >= 2)
9588 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9589 n -= 2;
9592 if (n == 1)
9593 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9595 return true;
9598 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9599 4-byte chunk, partially overlapping with the previously copied chunk. */
9600 if (n < 8)
9602 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9603 n -= 4;
9604 if (n > 0)
9606 int move = n - 4;
9608 src = aarch64_move_pointer (src, move);
9609 dst = aarch64_move_pointer (dst, move);
9610 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9612 return true;
9615 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9616 them, then (if applicable) an 8-byte chunk. */
9617 while (n >= 8)
9619 if (n / 16)
9621 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9622 n -= 16;
9624 else
9626 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9627 n -= 8;
9631 /* Finish the final bytes of the copy. We can always do this in one
9632 instruction. We either copy the exact amount we need, or partially
9633 overlap with the previous chunk we copied and copy 8-bytes. */
9634 if (n == 0)
9635 return true;
9636 else if (n == 1)
9637 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9638 else if (n == 2)
9639 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9640 else if (n == 4)
9641 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9642 else
9644 if (n == 3)
9646 src = aarch64_move_pointer (src, -1);
9647 dst = aarch64_move_pointer (dst, -1);
9648 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9650 else
9652 int move = n - 8;
9654 src = aarch64_move_pointer (src, move);
9655 dst = aarch64_move_pointer (dst, move);
9656 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9660 return true;
9663 #undef TARGET_ADDRESS_COST
9664 #define TARGET_ADDRESS_COST aarch64_address_cost
9666 /* This hook will determines whether unnamed bitfields affect the alignment
9667 of the containing structure. The hook returns true if the structure
9668 should inherit the alignment requirements of an unnamed bitfield's
9669 type. */
9670 #undef TARGET_ALIGN_ANON_BITFIELD
9671 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9673 #undef TARGET_ASM_ALIGNED_DI_OP
9674 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9676 #undef TARGET_ASM_ALIGNED_HI_OP
9677 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9679 #undef TARGET_ASM_ALIGNED_SI_OP
9680 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9682 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9683 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9684 hook_bool_const_tree_hwi_hwi_const_tree_true
9686 #undef TARGET_ASM_FILE_START
9687 #define TARGET_ASM_FILE_START aarch64_start_file
9689 #undef TARGET_ASM_OUTPUT_MI_THUNK
9690 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9692 #undef TARGET_ASM_SELECT_RTX_SECTION
9693 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9695 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9696 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9698 #undef TARGET_BUILD_BUILTIN_VA_LIST
9699 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9701 #undef TARGET_CALLEE_COPIES
9702 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9704 #undef TARGET_CAN_ELIMINATE
9705 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9707 #undef TARGET_CANNOT_FORCE_CONST_MEM
9708 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9710 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9711 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9713 /* Only the least significant bit is used for initialization guard
9714 variables. */
9715 #undef TARGET_CXX_GUARD_MASK_BIT
9716 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9718 #undef TARGET_C_MODE_FOR_SUFFIX
9719 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9721 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9722 #undef TARGET_DEFAULT_TARGET_FLAGS
9723 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9724 #endif
9726 #undef TARGET_CLASS_MAX_NREGS
9727 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9729 #undef TARGET_BUILTIN_DECL
9730 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9732 #undef TARGET_EXPAND_BUILTIN
9733 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9735 #undef TARGET_EXPAND_BUILTIN_VA_START
9736 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9738 #undef TARGET_FOLD_BUILTIN
9739 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9741 #undef TARGET_FUNCTION_ARG
9742 #define TARGET_FUNCTION_ARG aarch64_function_arg
9744 #undef TARGET_FUNCTION_ARG_ADVANCE
9745 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9747 #undef TARGET_FUNCTION_ARG_BOUNDARY
9748 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9750 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9751 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9753 #undef TARGET_FUNCTION_VALUE
9754 #define TARGET_FUNCTION_VALUE aarch64_function_value
9756 #undef TARGET_FUNCTION_VALUE_REGNO_P
9757 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9759 #undef TARGET_FRAME_POINTER_REQUIRED
9760 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9762 #undef TARGET_GIMPLE_FOLD_BUILTIN
9763 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9765 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9766 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9768 #undef TARGET_INIT_BUILTINS
9769 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9771 #undef TARGET_LEGITIMATE_ADDRESS_P
9772 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9774 #undef TARGET_LEGITIMATE_CONSTANT_P
9775 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9777 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9778 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9780 #undef TARGET_LRA_P
9781 #define TARGET_LRA_P aarch64_lra_p
9783 #undef TARGET_MANGLE_TYPE
9784 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9786 #undef TARGET_MEMORY_MOVE_COST
9787 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9789 #undef TARGET_MUST_PASS_IN_STACK
9790 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9792 /* This target hook should return true if accesses to volatile bitfields
9793 should use the narrowest mode possible. It should return false if these
9794 accesses should use the bitfield container type. */
9795 #undef TARGET_NARROW_VOLATILE_BITFIELD
9796 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9798 #undef TARGET_OPTION_OVERRIDE
9799 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9801 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9802 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9803 aarch64_override_options_after_change
9805 #undef TARGET_PASS_BY_REFERENCE
9806 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9808 #undef TARGET_PREFERRED_RELOAD_CLASS
9809 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9811 #undef TARGET_SECONDARY_RELOAD
9812 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9814 #undef TARGET_SHIFT_TRUNCATION_MASK
9815 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9817 #undef TARGET_SETUP_INCOMING_VARARGS
9818 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9820 #undef TARGET_STRUCT_VALUE_RTX
9821 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9823 #undef TARGET_REGISTER_MOVE_COST
9824 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9826 #undef TARGET_RETURN_IN_MEMORY
9827 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9829 #undef TARGET_RETURN_IN_MSB
9830 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9832 #undef TARGET_RTX_COSTS
9833 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9835 #undef TARGET_SCHED_ISSUE_RATE
9836 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9838 #undef TARGET_TRAMPOLINE_INIT
9839 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9841 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9842 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9844 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9845 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9847 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9848 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9850 #undef TARGET_VECTORIZE_ADD_STMT_COST
9851 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9853 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9854 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9855 aarch64_builtin_vectorization_cost
9857 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9858 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9860 #undef TARGET_VECTORIZE_BUILTINS
9861 #define TARGET_VECTORIZE_BUILTINS
9863 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9864 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9865 aarch64_builtin_vectorized_function
9867 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9868 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9869 aarch64_autovectorize_vector_sizes
9871 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9872 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9873 aarch64_atomic_assign_expand_fenv
9875 /* Section anchor support. */
9877 #undef TARGET_MIN_ANCHOR_OFFSET
9878 #define TARGET_MIN_ANCHOR_OFFSET -256
9880 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9881 byte offset; we can do much more for larger data types, but have no way
9882 to determine the size of the access. We assume accesses are aligned. */
9883 #undef TARGET_MAX_ANCHOR_OFFSET
9884 #define TARGET_MAX_ANCHOR_OFFSET 4095
9886 #undef TARGET_VECTOR_ALIGNMENT
9887 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9889 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9890 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9891 aarch64_simd_vector_alignment_reachable
9893 /* vec_perm support. */
9895 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9896 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9897 aarch64_vectorize_vec_perm_const_ok
9900 #undef TARGET_FIXED_CONDITION_CODE_REGS
9901 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9903 #undef TARGET_FLAGS_REGNUM
9904 #define TARGET_FLAGS_REGNUM CC_REGNUM
9906 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
9907 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
9909 struct gcc_target targetm = TARGET_INITIALIZER;
9911 #include "gt-aarch64.h"