Rebase.
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blob11654283055996a59a17f721948d0f3cdbb8229b
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
66 #include "dumpfile.h"
67 #include "builtins.h"
69 /* Defined for convenience. */
70 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
72 /* Classifies an address.
74 ADDRESS_REG_IMM
75 A simple base register plus immediate offset.
77 ADDRESS_REG_WB
78 A base register indexed by immediate offset with writeback.
80 ADDRESS_REG_REG
81 A base register indexed by (optionally scaled) register.
83 ADDRESS_REG_UXTW
84 A base register indexed by (optionally scaled) zero-extended register.
86 ADDRESS_REG_SXTW
87 A base register indexed by (optionally scaled) sign-extended register.
89 ADDRESS_LO_SUM
90 A LO_SUM rtx with a base register and "LO12" symbol relocation.
92 ADDRESS_SYMBOLIC:
93 A constant symbolic address, in pc-relative literal pool. */
95 enum aarch64_address_type {
96 ADDRESS_REG_IMM,
97 ADDRESS_REG_WB,
98 ADDRESS_REG_REG,
99 ADDRESS_REG_UXTW,
100 ADDRESS_REG_SXTW,
101 ADDRESS_LO_SUM,
102 ADDRESS_SYMBOLIC
105 struct aarch64_address_info {
106 enum aarch64_address_type type;
107 rtx base;
108 rtx offset;
109 int shift;
110 enum aarch64_symbol_type symbol_type;
113 struct simd_immediate_info
115 rtx value;
116 int shift;
117 int element_width;
118 bool mvn;
119 bool msl;
122 /* The current code model. */
123 enum aarch64_code_model aarch64_cmodel;
125 #ifdef HAVE_AS_TLS
126 #undef TARGET_HAVE_TLS
127 #define TARGET_HAVE_TLS 1
128 #endif
130 static bool aarch64_lra_p (void);
131 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
132 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
133 const_tree,
134 enum machine_mode *, int *,
135 bool *);
136 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
138 static void aarch64_override_options_after_change (void);
139 static bool aarch64_vector_mode_supported_p (enum machine_mode);
140 static unsigned bit_count (unsigned HOST_WIDE_INT);
141 static bool aarch64_const_vec_all_same_int_p (rtx,
142 HOST_WIDE_INT, HOST_WIDE_INT);
144 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
145 const unsigned char *sel);
146 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
148 /* The processor for which instructions should be scheduled. */
149 enum aarch64_processor aarch64_tune = cortexa53;
151 /* The current tuning set. */
152 const struct tune_params *aarch64_tune_params;
154 /* Mask to specify which instructions we are allowed to generate. */
155 unsigned long aarch64_isa_flags = 0;
157 /* Mask to specify which instruction scheduling options should be used. */
158 unsigned long aarch64_tune_flags = 0;
160 /* Tuning parameters. */
162 #if HAVE_DESIGNATED_INITIALIZERS
163 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
164 #else
165 #define NAMED_PARAM(NAME, VAL) (VAL)
166 #endif
168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169 __extension__
170 #endif
172 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
173 __extension__
174 #endif
175 static const struct cpu_addrcost_table generic_addrcost_table =
177 #if HAVE_DESIGNATED_INITIALIZERS
178 .addr_scale_costs =
179 #endif
181 NAMED_PARAM (qi, 0),
182 NAMED_PARAM (hi, 0),
183 NAMED_PARAM (si, 0),
184 NAMED_PARAM (ti, 0),
186 NAMED_PARAM (pre_modify, 0),
187 NAMED_PARAM (post_modify, 0),
188 NAMED_PARAM (register_offset, 0),
189 NAMED_PARAM (register_extend, 0),
190 NAMED_PARAM (imm_offset, 0)
193 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
194 __extension__
195 #endif
196 static const struct cpu_addrcost_table cortexa57_addrcost_table =
198 #if HAVE_DESIGNATED_INITIALIZERS
199 .addr_scale_costs =
200 #endif
202 NAMED_PARAM (qi, 0),
203 NAMED_PARAM (hi, 1),
204 NAMED_PARAM (si, 0),
205 NAMED_PARAM (ti, 1),
207 NAMED_PARAM (pre_modify, 0),
208 NAMED_PARAM (post_modify, 0),
209 NAMED_PARAM (register_offset, 0),
210 NAMED_PARAM (register_extend, 0),
211 NAMED_PARAM (imm_offset, 0),
214 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
215 __extension__
216 #endif
217 static const struct cpu_regmove_cost generic_regmove_cost =
219 NAMED_PARAM (GP2GP, 1),
220 NAMED_PARAM (GP2FP, 2),
221 NAMED_PARAM (FP2GP, 2),
222 /* We currently do not provide direct support for TFmode Q->Q move.
223 Therefore we need to raise the cost above 2 in order to have
224 reload handle the situation. */
225 NAMED_PARAM (FP2FP, 4)
228 /* Generic costs for vector insn classes. */
229 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
230 __extension__
231 #endif
232 static const struct cpu_vector_cost generic_vector_cost =
234 NAMED_PARAM (scalar_stmt_cost, 1),
235 NAMED_PARAM (scalar_load_cost, 1),
236 NAMED_PARAM (scalar_store_cost, 1),
237 NAMED_PARAM (vec_stmt_cost, 1),
238 NAMED_PARAM (vec_to_scalar_cost, 1),
239 NAMED_PARAM (scalar_to_vec_cost, 1),
240 NAMED_PARAM (vec_align_load_cost, 1),
241 NAMED_PARAM (vec_unalign_load_cost, 1),
242 NAMED_PARAM (vec_unalign_store_cost, 1),
243 NAMED_PARAM (vec_store_cost, 1),
244 NAMED_PARAM (cond_taken_branch_cost, 3),
245 NAMED_PARAM (cond_not_taken_branch_cost, 1)
248 /* Generic costs for vector insn classes. */
249 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
250 __extension__
251 #endif
252 static const struct cpu_vector_cost cortexa57_vector_cost =
254 NAMED_PARAM (scalar_stmt_cost, 1),
255 NAMED_PARAM (scalar_load_cost, 4),
256 NAMED_PARAM (scalar_store_cost, 1),
257 NAMED_PARAM (vec_stmt_cost, 3),
258 NAMED_PARAM (vec_to_scalar_cost, 8),
259 NAMED_PARAM (scalar_to_vec_cost, 8),
260 NAMED_PARAM (vec_align_load_cost, 5),
261 NAMED_PARAM (vec_unalign_load_cost, 5),
262 NAMED_PARAM (vec_unalign_store_cost, 1),
263 NAMED_PARAM (vec_store_cost, 1),
264 NAMED_PARAM (cond_taken_branch_cost, 1),
265 NAMED_PARAM (cond_not_taken_branch_cost, 1)
268 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
269 __extension__
270 #endif
271 static const struct tune_params generic_tunings =
273 &cortexa57_extra_costs,
274 &generic_addrcost_table,
275 &generic_regmove_cost,
276 &generic_vector_cost,
277 NAMED_PARAM (memmov_cost, 4),
278 NAMED_PARAM (issue_rate, 2)
281 static const struct tune_params cortexa53_tunings =
283 &cortexa53_extra_costs,
284 &generic_addrcost_table,
285 &generic_regmove_cost,
286 &generic_vector_cost,
287 NAMED_PARAM (memmov_cost, 4),
288 NAMED_PARAM (issue_rate, 2)
291 static const struct tune_params cortexa57_tunings =
293 &cortexa57_extra_costs,
294 &cortexa57_addrcost_table,
295 &generic_regmove_cost,
296 &cortexa57_vector_cost,
297 NAMED_PARAM (memmov_cost, 4),
298 NAMED_PARAM (issue_rate, 3)
301 /* A processor implementing AArch64. */
302 struct processor
304 const char *const name;
305 enum aarch64_processor core;
306 const char *arch;
307 const unsigned long flags;
308 const struct tune_params *const tune;
311 /* Processor cores implementing AArch64. */
312 static const struct processor all_cores[] =
314 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
315 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
316 #include "aarch64-cores.def"
317 #undef AARCH64_CORE
318 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
319 {NULL, aarch64_none, NULL, 0, NULL}
322 /* Architectures implementing AArch64. */
323 static const struct processor all_architectures[] =
325 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
326 {NAME, CORE, #ARCH, FLAGS, NULL},
327 #include "aarch64-arches.def"
328 #undef AARCH64_ARCH
329 {NULL, aarch64_none, NULL, 0, NULL}
332 /* Target specification. These are populated as commandline arguments
333 are processed, or NULL if not specified. */
334 static const struct processor *selected_arch;
335 static const struct processor *selected_cpu;
336 static const struct processor *selected_tune;
338 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
340 /* An ISA extension in the co-processor and main instruction set space. */
341 struct aarch64_option_extension
343 const char *const name;
344 const unsigned long flags_on;
345 const unsigned long flags_off;
348 /* ISA extensions in AArch64. */
349 static const struct aarch64_option_extension all_extensions[] =
351 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
352 {NAME, FLAGS_ON, FLAGS_OFF},
353 #include "aarch64-option-extensions.def"
354 #undef AARCH64_OPT_EXTENSION
355 {NULL, 0, 0}
358 /* Used to track the size of an address when generating a pre/post
359 increment address. */
360 static enum machine_mode aarch64_memory_reference_mode;
362 /* Used to force GTY into this file. */
363 static GTY(()) int gty_dummy;
365 /* A table of valid AArch64 "bitmask immediate" values for
366 logical instructions. */
368 #define AARCH64_NUM_BITMASKS 5334
369 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
371 typedef enum aarch64_cond_code
373 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
374 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
375 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
377 aarch64_cc;
379 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
381 /* The condition codes of the processor, and the inverse function. */
382 static const char * const aarch64_condition_codes[] =
384 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
385 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
388 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
389 unsigned
390 aarch64_dbx_register_number (unsigned regno)
392 if (GP_REGNUM_P (regno))
393 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
394 else if (regno == SP_REGNUM)
395 return AARCH64_DWARF_SP;
396 else if (FP_REGNUM_P (regno))
397 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
399 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
400 equivalent DWARF register. */
401 return DWARF_FRAME_REGISTERS;
404 /* Return TRUE if MODE is any of the large INT modes. */
405 static bool
406 aarch64_vect_struct_mode_p (enum machine_mode mode)
408 return mode == OImode || mode == CImode || mode == XImode;
411 /* Return TRUE if MODE is any of the vector modes. */
412 static bool
413 aarch64_vector_mode_p (enum machine_mode mode)
415 return aarch64_vector_mode_supported_p (mode)
416 || aarch64_vect_struct_mode_p (mode);
419 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
420 static bool
421 aarch64_array_mode_supported_p (enum machine_mode mode,
422 unsigned HOST_WIDE_INT nelems)
424 if (TARGET_SIMD
425 && AARCH64_VALID_SIMD_QREG_MODE (mode)
426 && (nelems >= 2 && nelems <= 4))
427 return true;
429 return false;
432 /* Implement HARD_REGNO_NREGS. */
435 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
437 switch (aarch64_regno_regclass (regno))
439 case FP_REGS:
440 case FP_LO_REGS:
441 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
442 default:
443 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
445 gcc_unreachable ();
448 /* Implement HARD_REGNO_MODE_OK. */
451 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
453 if (GET_MODE_CLASS (mode) == MODE_CC)
454 return regno == CC_REGNUM;
456 if (regno == SP_REGNUM)
457 /* The purpose of comparing with ptr_mode is to support the
458 global register variable associated with the stack pointer
459 register via the syntax of asm ("wsp") in ILP32. */
460 return mode == Pmode || mode == ptr_mode;
462 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
463 return mode == Pmode;
465 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
466 return 1;
468 if (FP_REGNUM_P (regno))
470 if (aarch64_vect_struct_mode_p (mode))
471 return
472 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
473 else
474 return 1;
477 return 0;
480 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
481 enum machine_mode
482 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
483 enum machine_mode mode)
485 /* Handle modes that fit within single registers. */
486 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
488 if (GET_MODE_SIZE (mode) >= 4)
489 return mode;
490 else
491 return SImode;
493 /* Fall back to generic for multi-reg and very large modes. */
494 else
495 return choose_hard_reg_mode (regno, nregs, false);
498 /* Return true if calls to DECL should be treated as
499 long-calls (ie called via a register). */
500 static bool
501 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
503 return false;
506 /* Return true if calls to symbol-ref SYM should be treated as
507 long-calls (ie called via a register). */
508 bool
509 aarch64_is_long_call_p (rtx sym)
511 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
514 /* Return true if the offsets to a zero/sign-extract operation
515 represent an expression that matches an extend operation. The
516 operands represent the paramters from
518 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
519 bool
520 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
521 rtx extract_imm)
523 HOST_WIDE_INT mult_val, extract_val;
525 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
526 return false;
528 mult_val = INTVAL (mult_imm);
529 extract_val = INTVAL (extract_imm);
531 if (extract_val > 8
532 && extract_val < GET_MODE_BITSIZE (mode)
533 && exact_log2 (extract_val & ~7) > 0
534 && (extract_val & 7) <= 4
535 && mult_val == (1 << (extract_val & 7)))
536 return true;
538 return false;
541 /* Emit an insn that's a simple single-set. Both the operands must be
542 known to be valid. */
543 inline static rtx
544 emit_set_insn (rtx x, rtx y)
546 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
549 /* X and Y are two things to compare using CODE. Emit the compare insn and
550 return the rtx for register 0 in the proper mode. */
552 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
554 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
555 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
557 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
558 return cc_reg;
561 /* Build the SYMBOL_REF for __tls_get_addr. */
563 static GTY(()) rtx tls_get_addr_libfunc;
566 aarch64_tls_get_addr (void)
568 if (!tls_get_addr_libfunc)
569 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
570 return tls_get_addr_libfunc;
573 /* Return the TLS model to use for ADDR. */
575 static enum tls_model
576 tls_symbolic_operand_type (rtx addr)
578 enum tls_model tls_kind = TLS_MODEL_NONE;
579 rtx sym, addend;
581 if (GET_CODE (addr) == CONST)
583 split_const (addr, &sym, &addend);
584 if (GET_CODE (sym) == SYMBOL_REF)
585 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
587 else if (GET_CODE (addr) == SYMBOL_REF)
588 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
590 return tls_kind;
593 /* We'll allow lo_sum's in addresses in our legitimate addresses
594 so that combine would take care of combining addresses where
595 necessary, but for generation purposes, we'll generate the address
596 as :
597 RTL Absolute
598 tmp = hi (symbol_ref); adrp x1, foo
599 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
602 PIC TLS
603 adrp x1, :got:foo adrp tmp, :tlsgd:foo
604 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
605 bl __tls_get_addr
608 Load TLS symbol, depending on TLS mechanism and TLS access model.
610 Global Dynamic - Traditional TLS:
611 adrp tmp, :tlsgd:imm
612 add dest, tmp, #:tlsgd_lo12:imm
613 bl __tls_get_addr
615 Global Dynamic - TLS Descriptors:
616 adrp dest, :tlsdesc:imm
617 ldr tmp, [dest, #:tlsdesc_lo12:imm]
618 add dest, dest, #:tlsdesc_lo12:imm
619 blr tmp
620 mrs tp, tpidr_el0
621 add dest, dest, tp
623 Initial Exec:
624 mrs tp, tpidr_el0
625 adrp tmp, :gottprel:imm
626 ldr dest, [tmp, #:gottprel_lo12:imm]
627 add dest, dest, tp
629 Local Exec:
630 mrs tp, tpidr_el0
631 add t0, tp, #:tprel_hi12:imm
632 add t0, #:tprel_lo12_nc:imm
635 static void
636 aarch64_load_symref_appropriately (rtx dest, rtx imm,
637 enum aarch64_symbol_type type)
639 switch (type)
641 case SYMBOL_SMALL_ABSOLUTE:
643 /* In ILP32, the mode of dest can be either SImode or DImode. */
644 rtx tmp_reg = dest;
645 enum machine_mode mode = GET_MODE (dest);
647 gcc_assert (mode == Pmode || mode == ptr_mode);
649 if (can_create_pseudo_p ())
650 tmp_reg = gen_reg_rtx (mode);
652 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
653 emit_insn (gen_add_losym (dest, tmp_reg, imm));
654 return;
657 case SYMBOL_TINY_ABSOLUTE:
658 emit_insn (gen_rtx_SET (Pmode, dest, imm));
659 return;
661 case SYMBOL_SMALL_GOT:
663 /* In ILP32, the mode of dest can be either SImode or DImode,
664 while the got entry is always of SImode size. The mode of
665 dest depends on how dest is used: if dest is assigned to a
666 pointer (e.g. in the memory), it has SImode; it may have
667 DImode if dest is dereferenced to access the memeory.
668 This is why we have to handle three different ldr_got_small
669 patterns here (two patterns for ILP32). */
670 rtx tmp_reg = dest;
671 enum machine_mode mode = GET_MODE (dest);
673 if (can_create_pseudo_p ())
674 tmp_reg = gen_reg_rtx (mode);
676 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
677 if (mode == ptr_mode)
679 if (mode == DImode)
680 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
681 else
682 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
684 else
686 gcc_assert (mode == Pmode);
687 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
690 return;
693 case SYMBOL_SMALL_TLSGD:
695 rtx insns;
696 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
698 start_sequence ();
699 aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
700 insns = get_insns ();
701 end_sequence ();
703 RTL_CONST_CALL_P (insns) = 1;
704 emit_libcall_block (insns, dest, result, imm);
705 return;
708 case SYMBOL_SMALL_TLSDESC:
710 enum machine_mode mode = GET_MODE (dest);
711 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
712 rtx tp;
714 gcc_assert (mode == Pmode || mode == ptr_mode);
716 /* In ILP32, the got entry is always of SImode size. Unlike
717 small GOT, the dest is fixed at reg 0. */
718 if (TARGET_ILP32)
719 emit_insn (gen_tlsdesc_small_si (imm));
720 else
721 emit_insn (gen_tlsdesc_small_di (imm));
722 tp = aarch64_load_tp (NULL);
724 if (mode != Pmode)
725 tp = gen_lowpart (mode, tp);
727 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
728 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
729 return;
732 case SYMBOL_SMALL_GOTTPREL:
734 /* In ILP32, the mode of dest can be either SImode or DImode,
735 while the got entry is always of SImode size. The mode of
736 dest depends on how dest is used: if dest is assigned to a
737 pointer (e.g. in the memory), it has SImode; it may have
738 DImode if dest is dereferenced to access the memeory.
739 This is why we have to handle three different tlsie_small
740 patterns here (two patterns for ILP32). */
741 enum machine_mode mode = GET_MODE (dest);
742 rtx tmp_reg = gen_reg_rtx (mode);
743 rtx tp = aarch64_load_tp (NULL);
745 if (mode == ptr_mode)
747 if (mode == DImode)
748 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
749 else
751 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
752 tp = gen_lowpart (mode, tp);
755 else
757 gcc_assert (mode == Pmode);
758 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
761 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
762 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
763 return;
766 case SYMBOL_SMALL_TPREL:
768 rtx tp = aarch64_load_tp (NULL);
769 emit_insn (gen_tlsle_small (dest, tp, imm));
770 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
771 return;
774 case SYMBOL_TINY_GOT:
775 emit_insn (gen_ldr_got_tiny (dest, imm));
776 return;
778 default:
779 gcc_unreachable ();
783 /* Emit a move from SRC to DEST. Assume that the move expanders can
784 handle all moves if !can_create_pseudo_p (). The distinction is
785 important because, unlike emit_move_insn, the move expanders know
786 how to force Pmode objects into the constant pool even when the
787 constant pool address is not itself legitimate. */
788 static rtx
789 aarch64_emit_move (rtx dest, rtx src)
791 return (can_create_pseudo_p ()
792 ? emit_move_insn (dest, src)
793 : emit_move_insn_1 (dest, src));
796 /* Split a 128-bit move operation into two 64-bit move operations,
797 taking care to handle partial overlap of register to register
798 copies. Special cases are needed when moving between GP regs and
799 FP regs. SRC can be a register, constant or memory; DST a register
800 or memory. If either operand is memory it must not have any side
801 effects. */
802 void
803 aarch64_split_128bit_move (rtx dst, rtx src)
805 rtx dst_lo, dst_hi;
806 rtx src_lo, src_hi;
808 enum machine_mode mode = GET_MODE (dst);
810 gcc_assert (mode == TImode || mode == TFmode);
811 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
812 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
814 if (REG_P (dst) && REG_P (src))
816 int src_regno = REGNO (src);
817 int dst_regno = REGNO (dst);
819 /* Handle FP <-> GP regs. */
820 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
822 src_lo = gen_lowpart (word_mode, src);
823 src_hi = gen_highpart (word_mode, src);
825 if (mode == TImode)
827 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
828 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
830 else
832 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
833 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
835 return;
837 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
839 dst_lo = gen_lowpart (word_mode, dst);
840 dst_hi = gen_highpart (word_mode, dst);
842 if (mode == TImode)
844 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
845 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
847 else
849 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
850 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
852 return;
856 dst_lo = gen_lowpart (word_mode, dst);
857 dst_hi = gen_highpart (word_mode, dst);
858 src_lo = gen_lowpart (word_mode, src);
859 src_hi = gen_highpart_mode (word_mode, mode, src);
861 /* At most one pairing may overlap. */
862 if (reg_overlap_mentioned_p (dst_lo, src_hi))
864 aarch64_emit_move (dst_hi, src_hi);
865 aarch64_emit_move (dst_lo, src_lo);
867 else
869 aarch64_emit_move (dst_lo, src_lo);
870 aarch64_emit_move (dst_hi, src_hi);
874 bool
875 aarch64_split_128bit_move_p (rtx dst, rtx src)
877 return (! REG_P (src)
878 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
881 /* Split a complex SIMD combine. */
883 void
884 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
886 enum machine_mode src_mode = GET_MODE (src1);
887 enum machine_mode dst_mode = GET_MODE (dst);
889 gcc_assert (VECTOR_MODE_P (dst_mode));
891 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
893 rtx (*gen) (rtx, rtx, rtx);
895 switch (src_mode)
897 case V8QImode:
898 gen = gen_aarch64_simd_combinev8qi;
899 break;
900 case V4HImode:
901 gen = gen_aarch64_simd_combinev4hi;
902 break;
903 case V2SImode:
904 gen = gen_aarch64_simd_combinev2si;
905 break;
906 case V2SFmode:
907 gen = gen_aarch64_simd_combinev2sf;
908 break;
909 case DImode:
910 gen = gen_aarch64_simd_combinedi;
911 break;
912 case DFmode:
913 gen = gen_aarch64_simd_combinedf;
914 break;
915 default:
916 gcc_unreachable ();
919 emit_insn (gen (dst, src1, src2));
920 return;
924 /* Split a complex SIMD move. */
926 void
927 aarch64_split_simd_move (rtx dst, rtx src)
929 enum machine_mode src_mode = GET_MODE (src);
930 enum machine_mode dst_mode = GET_MODE (dst);
932 gcc_assert (VECTOR_MODE_P (dst_mode));
934 if (REG_P (dst) && REG_P (src))
936 rtx (*gen) (rtx, rtx);
938 gcc_assert (VECTOR_MODE_P (src_mode));
940 switch (src_mode)
942 case V16QImode:
943 gen = gen_aarch64_split_simd_movv16qi;
944 break;
945 case V8HImode:
946 gen = gen_aarch64_split_simd_movv8hi;
947 break;
948 case V4SImode:
949 gen = gen_aarch64_split_simd_movv4si;
950 break;
951 case V2DImode:
952 gen = gen_aarch64_split_simd_movv2di;
953 break;
954 case V4SFmode:
955 gen = gen_aarch64_split_simd_movv4sf;
956 break;
957 case V2DFmode:
958 gen = gen_aarch64_split_simd_movv2df;
959 break;
960 default:
961 gcc_unreachable ();
964 emit_insn (gen (dst, src));
965 return;
969 static rtx
970 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
972 if (can_create_pseudo_p ())
973 return force_reg (mode, value);
974 else
976 x = aarch64_emit_move (x, value);
977 return x;
982 static rtx
983 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
985 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
987 rtx high;
988 /* Load the full offset into a register. This
989 might be improvable in the future. */
990 high = GEN_INT (offset);
991 offset = 0;
992 high = aarch64_force_temporary (mode, temp, high);
993 reg = aarch64_force_temporary (mode, temp,
994 gen_rtx_PLUS (mode, high, reg));
996 return plus_constant (mode, reg, offset);
999 void
1000 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1002 enum machine_mode mode = GET_MODE (dest);
1003 unsigned HOST_WIDE_INT mask;
1004 int i;
1005 bool first;
1006 unsigned HOST_WIDE_INT val;
1007 bool subtargets;
1008 rtx subtarget;
1009 int one_match, zero_match;
1011 gcc_assert (mode == SImode || mode == DImode);
1013 /* Check on what type of symbol it is. */
1014 if (GET_CODE (imm) == SYMBOL_REF
1015 || GET_CODE (imm) == LABEL_REF
1016 || GET_CODE (imm) == CONST)
1018 rtx mem, base, offset;
1019 enum aarch64_symbol_type sty;
1021 /* If we have (const (plus symbol offset)), separate out the offset
1022 before we start classifying the symbol. */
1023 split_const (imm, &base, &offset);
1025 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1026 switch (sty)
1028 case SYMBOL_FORCE_TO_MEM:
1029 if (offset != const0_rtx
1030 && targetm.cannot_force_const_mem (mode, imm))
1032 gcc_assert (can_create_pseudo_p ());
1033 base = aarch64_force_temporary (mode, dest, base);
1034 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1035 aarch64_emit_move (dest, base);
1036 return;
1038 mem = force_const_mem (ptr_mode, imm);
1039 gcc_assert (mem);
1040 if (mode != ptr_mode)
1041 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1042 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1043 return;
1045 case SYMBOL_SMALL_TLSGD:
1046 case SYMBOL_SMALL_TLSDESC:
1047 case SYMBOL_SMALL_GOTTPREL:
1048 case SYMBOL_SMALL_GOT:
1049 case SYMBOL_TINY_GOT:
1050 if (offset != const0_rtx)
1052 gcc_assert(can_create_pseudo_p ());
1053 base = aarch64_force_temporary (mode, dest, base);
1054 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1055 aarch64_emit_move (dest, base);
1056 return;
1058 /* FALLTHRU */
1060 case SYMBOL_SMALL_TPREL:
1061 case SYMBOL_SMALL_ABSOLUTE:
1062 case SYMBOL_TINY_ABSOLUTE:
1063 aarch64_load_symref_appropriately (dest, imm, sty);
1064 return;
1066 default:
1067 gcc_unreachable ();
1071 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1073 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1074 return;
1077 if (!CONST_INT_P (imm))
1079 if (GET_CODE (imm) == HIGH)
1080 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1081 else
1083 rtx mem = force_const_mem (mode, imm);
1084 gcc_assert (mem);
1085 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1088 return;
1091 if (mode == SImode)
1093 /* We know we can't do this in 1 insn, and we must be able to do it
1094 in two; so don't mess around looking for sequences that don't buy
1095 us anything. */
1096 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1097 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1098 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1099 return;
1102 /* Remaining cases are all for DImode. */
1104 val = INTVAL (imm);
1105 subtargets = optimize && can_create_pseudo_p ();
1107 one_match = 0;
1108 zero_match = 0;
1109 mask = 0xffff;
1111 for (i = 0; i < 64; i += 16, mask <<= 16)
1113 if ((val & mask) == 0)
1114 zero_match++;
1115 else if ((val & mask) == mask)
1116 one_match++;
1119 if (one_match == 2)
1121 mask = 0xffff;
1122 for (i = 0; i < 64; i += 16, mask <<= 16)
1124 if ((val & mask) != mask)
1126 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1127 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1128 GEN_INT ((val >> i) & 0xffff)));
1129 return;
1132 gcc_unreachable ();
1135 if (zero_match == 2)
1136 goto simple_sequence;
1138 mask = 0x0ffff0000UL;
1139 for (i = 16; i < 64; i += 16, mask <<= 16)
1141 HOST_WIDE_INT comp = mask & ~(mask - 1);
1143 if (aarch64_uimm12_shift (val - (val & mask)))
1145 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1147 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1148 emit_insn (gen_adddi3 (dest, subtarget,
1149 GEN_INT (val - (val & mask))));
1150 return;
1152 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1154 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1156 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1157 GEN_INT ((val + comp) & mask)));
1158 emit_insn (gen_adddi3 (dest, subtarget,
1159 GEN_INT (val - ((val + comp) & mask))));
1160 return;
1162 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1164 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1166 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1167 GEN_INT ((val - comp) | ~mask)));
1168 emit_insn (gen_adddi3 (dest, subtarget,
1169 GEN_INT (val - ((val - comp) | ~mask))));
1170 return;
1172 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1174 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1176 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1177 GEN_INT (val | ~mask)));
1178 emit_insn (gen_adddi3 (dest, subtarget,
1179 GEN_INT (val - (val | ~mask))));
1180 return;
1184 /* See if we can do it by arithmetically combining two
1185 immediates. */
1186 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1188 int j;
1189 mask = 0xffff;
1191 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1192 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1194 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1195 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1196 GEN_INT (aarch64_bitmasks[i])));
1197 emit_insn (gen_adddi3 (dest, subtarget,
1198 GEN_INT (val - aarch64_bitmasks[i])));
1199 return;
1202 for (j = 0; j < 64; j += 16, mask <<= 16)
1204 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1206 emit_insn (gen_rtx_SET (VOIDmode, dest,
1207 GEN_INT (aarch64_bitmasks[i])));
1208 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1209 GEN_INT ((val >> j) & 0xffff)));
1210 return;
1215 /* See if we can do it by logically combining two immediates. */
1216 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1218 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1220 int j;
1222 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1223 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1225 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1226 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1227 GEN_INT (aarch64_bitmasks[i])));
1228 emit_insn (gen_iordi3 (dest, subtarget,
1229 GEN_INT (aarch64_bitmasks[j])));
1230 return;
1233 else if ((val & aarch64_bitmasks[i]) == val)
1235 int j;
1237 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1238 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1241 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1242 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1243 GEN_INT (aarch64_bitmasks[j])));
1244 emit_insn (gen_anddi3 (dest, subtarget,
1245 GEN_INT (aarch64_bitmasks[i])));
1246 return;
1251 simple_sequence:
1252 first = true;
1253 mask = 0xffff;
1254 for (i = 0; i < 64; i += 16, mask <<= 16)
1256 if ((val & mask) != 0)
1258 if (first)
1260 emit_insn (gen_rtx_SET (VOIDmode, dest,
1261 GEN_INT (val & mask)));
1262 first = false;
1264 else
1265 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1266 GEN_INT ((val >> i) & 0xffff)));
1271 static bool
1272 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1273 tree exp ATTRIBUTE_UNUSED)
1275 /* Currently, always true. */
1276 return true;
1279 /* Implement TARGET_PASS_BY_REFERENCE. */
1281 static bool
1282 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1283 enum machine_mode mode,
1284 const_tree type,
1285 bool named ATTRIBUTE_UNUSED)
1287 HOST_WIDE_INT size;
1288 enum machine_mode dummymode;
1289 int nregs;
1291 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1292 size = (mode == BLKmode && type)
1293 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1295 /* Aggregates are passed by reference based on their size. */
1296 if (type && AGGREGATE_TYPE_P (type))
1298 size = int_size_in_bytes (type);
1301 /* Variable sized arguments are always returned by reference. */
1302 if (size < 0)
1303 return true;
1305 /* Can this be a candidate to be passed in fp/simd register(s)? */
1306 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1307 &dummymode, &nregs,
1308 NULL))
1309 return false;
1311 /* Arguments which are variable sized or larger than 2 registers are
1312 passed by reference unless they are a homogenous floating point
1313 aggregate. */
1314 return size > 2 * UNITS_PER_WORD;
1317 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1318 static bool
1319 aarch64_return_in_msb (const_tree valtype)
1321 enum machine_mode dummy_mode;
1322 int dummy_int;
1324 /* Never happens in little-endian mode. */
1325 if (!BYTES_BIG_ENDIAN)
1326 return false;
1328 /* Only composite types smaller than or equal to 16 bytes can
1329 be potentially returned in registers. */
1330 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1331 || int_size_in_bytes (valtype) <= 0
1332 || int_size_in_bytes (valtype) > 16)
1333 return false;
1335 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1336 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1337 is always passed/returned in the least significant bits of fp/simd
1338 register(s). */
1339 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1340 &dummy_mode, &dummy_int, NULL))
1341 return false;
1343 return true;
1346 /* Implement TARGET_FUNCTION_VALUE.
1347 Define how to find the value returned by a function. */
1349 static rtx
1350 aarch64_function_value (const_tree type, const_tree func,
1351 bool outgoing ATTRIBUTE_UNUSED)
1353 enum machine_mode mode;
1354 int unsignedp;
1355 int count;
1356 enum machine_mode ag_mode;
1358 mode = TYPE_MODE (type);
1359 if (INTEGRAL_TYPE_P (type))
1360 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1362 if (aarch64_return_in_msb (type))
1364 HOST_WIDE_INT size = int_size_in_bytes (type);
1366 if (size % UNITS_PER_WORD != 0)
1368 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1369 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1373 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1374 &ag_mode, &count, NULL))
1376 if (!aarch64_composite_type_p (type, mode))
1378 gcc_assert (count == 1 && mode == ag_mode);
1379 return gen_rtx_REG (mode, V0_REGNUM);
1381 else
1383 int i;
1384 rtx par;
1386 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1387 for (i = 0; i < count; i++)
1389 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1390 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1391 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1392 XVECEXP (par, 0, i) = tmp;
1394 return par;
1397 else
1398 return gen_rtx_REG (mode, R0_REGNUM);
1401 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1402 Return true if REGNO is the number of a hard register in which the values
1403 of called function may come back. */
1405 static bool
1406 aarch64_function_value_regno_p (const unsigned int regno)
1408 /* Maximum of 16 bytes can be returned in the general registers. Examples
1409 of 16-byte return values are: 128-bit integers and 16-byte small
1410 structures (excluding homogeneous floating-point aggregates). */
1411 if (regno == R0_REGNUM || regno == R1_REGNUM)
1412 return true;
1414 /* Up to four fp/simd registers can return a function value, e.g. a
1415 homogeneous floating-point aggregate having four members. */
1416 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1417 return !TARGET_GENERAL_REGS_ONLY;
1419 return false;
1422 /* Implement TARGET_RETURN_IN_MEMORY.
1424 If the type T of the result of a function is such that
1425 void func (T arg)
1426 would require that arg be passed as a value in a register (or set of
1427 registers) according to the parameter passing rules, then the result
1428 is returned in the same registers as would be used for such an
1429 argument. */
1431 static bool
1432 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1434 HOST_WIDE_INT size;
1435 enum machine_mode ag_mode;
1436 int count;
1438 if (!AGGREGATE_TYPE_P (type)
1439 && TREE_CODE (type) != COMPLEX_TYPE
1440 && TREE_CODE (type) != VECTOR_TYPE)
1441 /* Simple scalar types always returned in registers. */
1442 return false;
1444 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1445 type,
1446 &ag_mode,
1447 &count,
1448 NULL))
1449 return false;
1451 /* Types larger than 2 registers returned in memory. */
1452 size = int_size_in_bytes (type);
1453 return (size < 0 || size > 2 * UNITS_PER_WORD);
1456 static bool
1457 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1458 const_tree type, int *nregs)
1460 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1461 return aarch64_vfp_is_call_or_return_candidate (mode,
1462 type,
1463 &pcum->aapcs_vfp_rmode,
1464 nregs,
1465 NULL);
1468 /* Given MODE and TYPE of a function argument, return the alignment in
1469 bits. The idea is to suppress any stronger alignment requested by
1470 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1471 This is a helper function for local use only. */
1473 static unsigned int
1474 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1476 unsigned int alignment;
1478 if (type)
1480 if (!integer_zerop (TYPE_SIZE (type)))
1482 if (TYPE_MODE (type) == mode)
1483 alignment = TYPE_ALIGN (type);
1484 else
1485 alignment = GET_MODE_ALIGNMENT (mode);
1487 else
1488 alignment = 0;
1490 else
1491 alignment = GET_MODE_ALIGNMENT (mode);
1493 return alignment;
1496 /* Layout a function argument according to the AAPCS64 rules. The rule
1497 numbers refer to the rule numbers in the AAPCS64. */
1499 static void
1500 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1501 const_tree type,
1502 bool named ATTRIBUTE_UNUSED)
1504 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1505 int ncrn, nvrn, nregs;
1506 bool allocate_ncrn, allocate_nvrn;
1507 HOST_WIDE_INT size;
1509 /* We need to do this once per argument. */
1510 if (pcum->aapcs_arg_processed)
1511 return;
1513 pcum->aapcs_arg_processed = true;
1515 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1516 size
1517 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1518 UNITS_PER_WORD);
1520 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1521 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1522 mode,
1523 type,
1524 &nregs);
1526 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1527 The following code thus handles passing by SIMD/FP registers first. */
1529 nvrn = pcum->aapcs_nvrn;
1531 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1532 and homogenous short-vector aggregates (HVA). */
1533 if (allocate_nvrn)
1535 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1537 pcum->aapcs_nextnvrn = nvrn + nregs;
1538 if (!aarch64_composite_type_p (type, mode))
1540 gcc_assert (nregs == 1);
1541 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1543 else
1545 rtx par;
1546 int i;
1547 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1548 for (i = 0; i < nregs; i++)
1550 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1551 V0_REGNUM + nvrn + i);
1552 tmp = gen_rtx_EXPR_LIST
1553 (VOIDmode, tmp,
1554 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1555 XVECEXP (par, 0, i) = tmp;
1557 pcum->aapcs_reg = par;
1559 return;
1561 else
1563 /* C.3 NSRN is set to 8. */
1564 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1565 goto on_stack;
1569 ncrn = pcum->aapcs_ncrn;
1570 nregs = size / UNITS_PER_WORD;
1572 /* C6 - C9. though the sign and zero extension semantics are
1573 handled elsewhere. This is the case where the argument fits
1574 entirely general registers. */
1575 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1577 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1579 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1581 /* C.8 if the argument has an alignment of 16 then the NGRN is
1582 rounded up to the next even number. */
1583 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1585 ++ncrn;
1586 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1588 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1589 A reg is still generated for it, but the caller should be smart
1590 enough not to use it. */
1591 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1593 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1595 else
1597 rtx par;
1598 int i;
1600 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1601 for (i = 0; i < nregs; i++)
1603 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1604 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1605 GEN_INT (i * UNITS_PER_WORD));
1606 XVECEXP (par, 0, i) = tmp;
1608 pcum->aapcs_reg = par;
1611 pcum->aapcs_nextncrn = ncrn + nregs;
1612 return;
1615 /* C.11 */
1616 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1618 /* The argument is passed on stack; record the needed number of words for
1619 this argument and align the total size if necessary. */
1620 on_stack:
1621 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1622 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1623 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1624 16 / UNITS_PER_WORD);
1625 return;
1628 /* Implement TARGET_FUNCTION_ARG. */
1630 static rtx
1631 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1632 const_tree type, bool named)
1634 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1635 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1637 if (mode == VOIDmode)
1638 return NULL_RTX;
1640 aarch64_layout_arg (pcum_v, mode, type, named);
1641 return pcum->aapcs_reg;
1644 void
1645 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1646 const_tree fntype ATTRIBUTE_UNUSED,
1647 rtx libname ATTRIBUTE_UNUSED,
1648 const_tree fndecl ATTRIBUTE_UNUSED,
1649 unsigned n_named ATTRIBUTE_UNUSED)
1651 pcum->aapcs_ncrn = 0;
1652 pcum->aapcs_nvrn = 0;
1653 pcum->aapcs_nextncrn = 0;
1654 pcum->aapcs_nextnvrn = 0;
1655 pcum->pcs_variant = ARM_PCS_AAPCS64;
1656 pcum->aapcs_reg = NULL_RTX;
1657 pcum->aapcs_arg_processed = false;
1658 pcum->aapcs_stack_words = 0;
1659 pcum->aapcs_stack_size = 0;
1661 return;
1664 static void
1665 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1666 enum machine_mode mode,
1667 const_tree type,
1668 bool named)
1670 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1671 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1673 aarch64_layout_arg (pcum_v, mode, type, named);
1674 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1675 != (pcum->aapcs_stack_words != 0));
1676 pcum->aapcs_arg_processed = false;
1677 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1678 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1679 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1680 pcum->aapcs_stack_words = 0;
1681 pcum->aapcs_reg = NULL_RTX;
1685 bool
1686 aarch64_function_arg_regno_p (unsigned regno)
1688 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1689 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1692 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1693 PARM_BOUNDARY bits of alignment, but will be given anything up
1694 to STACK_BOUNDARY bits if the type requires it. This makes sure
1695 that both before and after the layout of each argument, the Next
1696 Stacked Argument Address (NSAA) will have a minimum alignment of
1697 8 bytes. */
1699 static unsigned int
1700 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1702 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1704 if (alignment < PARM_BOUNDARY)
1705 alignment = PARM_BOUNDARY;
1706 if (alignment > STACK_BOUNDARY)
1707 alignment = STACK_BOUNDARY;
1708 return alignment;
1711 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1713 Return true if an argument passed on the stack should be padded upwards,
1714 i.e. if the least-significant byte of the stack slot has useful data.
1716 Small aggregate types are placed in the lowest memory address.
1718 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1720 bool
1721 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1723 /* On little-endian targets, the least significant byte of every stack
1724 argument is passed at the lowest byte address of the stack slot. */
1725 if (!BYTES_BIG_ENDIAN)
1726 return true;
1728 /* Otherwise, integral, floating-point and pointer types are padded downward:
1729 the least significant byte of a stack argument is passed at the highest
1730 byte address of the stack slot. */
1731 if (type
1732 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1733 || POINTER_TYPE_P (type))
1734 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1735 return false;
1737 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1738 return true;
1741 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1743 It specifies padding for the last (may also be the only)
1744 element of a block move between registers and memory. If
1745 assuming the block is in the memory, padding upward means that
1746 the last element is padded after its highest significant byte,
1747 while in downward padding, the last element is padded at the
1748 its least significant byte side.
1750 Small aggregates and small complex types are always padded
1751 upwards.
1753 We don't need to worry about homogeneous floating-point or
1754 short-vector aggregates; their move is not affected by the
1755 padding direction determined here. Regardless of endianness,
1756 each element of such an aggregate is put in the least
1757 significant bits of a fp/simd register.
1759 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1760 register has useful data, and return the opposite if the most
1761 significant byte does. */
1763 bool
1764 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1765 bool first ATTRIBUTE_UNUSED)
1768 /* Small composite types are always padded upward. */
1769 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1771 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1772 : GET_MODE_SIZE (mode));
1773 if (size < 2 * UNITS_PER_WORD)
1774 return true;
1777 /* Otherwise, use the default padding. */
1778 return !BYTES_BIG_ENDIAN;
1781 static enum machine_mode
1782 aarch64_libgcc_cmp_return_mode (void)
1784 return SImode;
1787 static bool
1788 aarch64_frame_pointer_required (void)
1790 /* If the function contains dynamic stack allocations, we need to
1791 use the frame pointer to access the static parts of the frame. */
1792 if (cfun->calls_alloca)
1793 return true;
1795 /* In aarch64_override_options_after_change
1796 flag_omit_leaf_frame_pointer turns off the frame pointer by
1797 default. Turn it back on now if we've not got a leaf
1798 function. */
1799 if (flag_omit_leaf_frame_pointer
1800 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1801 return true;
1803 return false;
1806 /* Mark the registers that need to be saved by the callee and calculate
1807 the size of the callee-saved registers area and frame record (both FP
1808 and LR may be omitted). */
1809 static void
1810 aarch64_layout_frame (void)
1812 HOST_WIDE_INT offset = 0;
1813 int regno;
1815 if (reload_completed && cfun->machine->frame.laid_out)
1816 return;
1818 #define SLOT_NOT_REQUIRED (-2)
1819 #define SLOT_REQUIRED (-1)
1821 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1822 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1824 /* First mark all the registers that really need to be saved... */
1825 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1826 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1828 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1829 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1831 /* ... that includes the eh data registers (if needed)... */
1832 if (crtl->calls_eh_return)
1833 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1834 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1835 = SLOT_REQUIRED;
1837 /* ... and any callee saved register that dataflow says is live. */
1838 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1839 if (df_regs_ever_live_p (regno)
1840 && !call_used_regs[regno])
1841 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1843 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1844 if (df_regs_ever_live_p (regno)
1845 && !call_used_regs[regno])
1846 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1848 if (frame_pointer_needed)
1850 /* FP and LR are placed in the linkage record. */
1851 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1852 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
1853 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
1854 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1855 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1856 offset += 2 * UNITS_PER_WORD;
1859 /* Now assign stack slots for them. */
1860 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1861 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1863 cfun->machine->frame.reg_offset[regno] = offset;
1864 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1865 cfun->machine->frame.wb_candidate1 = regno;
1866 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
1867 cfun->machine->frame.wb_candidate2 = regno;
1868 offset += UNITS_PER_WORD;
1871 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1872 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1874 cfun->machine->frame.reg_offset[regno] = offset;
1875 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1876 cfun->machine->frame.wb_candidate1 = regno;
1877 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
1878 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
1879 cfun->machine->frame.wb_candidate2 = regno;
1880 offset += UNITS_PER_WORD;
1883 cfun->machine->frame.padding0 =
1884 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1885 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1887 cfun->machine->frame.saved_regs_size = offset;
1889 cfun->machine->frame.hard_fp_offset
1890 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1891 + get_frame_size ()
1892 + cfun->machine->frame.saved_regs_size,
1893 STACK_BOUNDARY / BITS_PER_UNIT);
1895 cfun->machine->frame.frame_size
1896 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1897 + crtl->outgoing_args_size,
1898 STACK_BOUNDARY / BITS_PER_UNIT);
1900 cfun->machine->frame.laid_out = true;
1903 /* Make the last instruction frame-related and note that it performs
1904 the operation described by FRAME_PATTERN. */
1906 static void
1907 aarch64_set_frame_expr (rtx frame_pattern)
1909 rtx insn;
1911 insn = get_last_insn ();
1912 RTX_FRAME_RELATED_P (insn) = 1;
1913 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1914 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1915 frame_pattern,
1916 REG_NOTES (insn));
1919 static bool
1920 aarch64_register_saved_on_entry (int regno)
1922 return cfun->machine->frame.reg_offset[regno] >= 0;
1925 static unsigned
1926 aarch64_next_callee_save (unsigned regno, unsigned limit)
1928 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1929 regno ++;
1930 return regno;
1933 static void
1934 aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
1935 HOST_WIDE_INT adjustment)
1937 rtx base_rtx = stack_pointer_rtx;
1938 rtx insn, reg, mem;
1940 reg = gen_rtx_REG (mode, regno);
1941 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
1942 plus_constant (Pmode, base_rtx, -adjustment));
1943 mem = gen_rtx_MEM (mode, mem);
1945 insn = emit_move_insn (mem, reg);
1946 RTX_FRAME_RELATED_P (insn) = 1;
1949 static void
1950 aarch64_popwb_single_reg (enum machine_mode mode, unsigned regno,
1951 HOST_WIDE_INT adjustment)
1953 rtx base_rtx = stack_pointer_rtx;
1954 rtx insn, reg, mem;
1956 reg = gen_rtx_REG (mode, regno);
1957 mem = gen_rtx_POST_MODIFY (Pmode, base_rtx,
1958 plus_constant (Pmode, base_rtx, adjustment));
1959 mem = gen_rtx_MEM (mode, mem);
1961 insn = emit_move_insn (reg, mem);
1962 add_reg_note (insn, REG_CFA_RESTORE, reg);
1963 RTX_FRAME_RELATED_P (insn) = 1;
1966 static rtx
1967 aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1968 HOST_WIDE_INT adjustment)
1970 switch (mode)
1972 case DImode:
1973 return gen_storewb_pairdi_di (base, base, reg, reg2,
1974 GEN_INT (-adjustment),
1975 GEN_INT (UNITS_PER_WORD - adjustment));
1976 case DFmode:
1977 return gen_storewb_pairdf_di (base, base, reg, reg2,
1978 GEN_INT (-adjustment),
1979 GEN_INT (UNITS_PER_WORD - adjustment));
1980 default:
1981 gcc_unreachable ();
1985 static void
1986 aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
1987 unsigned regno2, HOST_WIDE_INT adjustment)
1989 rtx insn;
1990 rtx reg1 = gen_rtx_REG (mode, regno1);
1991 rtx reg2 = gen_rtx_REG (mode, regno2);
1993 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
1994 reg2, adjustment));
1995 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1997 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1998 RTX_FRAME_RELATED_P (insn) = 1;
2001 static rtx
2002 aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
2003 HOST_WIDE_INT adjustment)
2005 switch (mode)
2007 case DImode:
2008 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2009 GEN_INT (UNITS_PER_WORD));
2010 case DFmode:
2011 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2012 GEN_INT (UNITS_PER_WORD));
2013 default:
2014 gcc_unreachable ();
2018 static void
2019 aarch64_popwb_pair_reg (enum machine_mode mode, unsigned regno1,
2020 unsigned regno2, HOST_WIDE_INT adjustment, rtx cfa)
2022 rtx insn;
2023 rtx reg1 = gen_rtx_REG (mode, regno1);
2024 rtx reg2 = gen_rtx_REG (mode, regno2);
2026 insn = emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
2027 reg2, adjustment));
2028 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2029 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2030 RTX_FRAME_RELATED_P (insn) = 1;
2032 if (cfa)
2033 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2034 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2035 plus_constant (Pmode, cfa, adjustment))));
2037 add_reg_note (insn, REG_CFA_RESTORE, reg1);
2038 add_reg_note (insn, REG_CFA_RESTORE, reg2);
2041 static rtx
2042 aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2043 rtx reg2)
2045 switch (mode)
2047 case DImode:
2048 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2050 case DFmode:
2051 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2053 default:
2054 gcc_unreachable ();
2058 static rtx
2059 aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2060 rtx mem2)
2062 switch (mode)
2064 case DImode:
2065 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2067 case DFmode:
2068 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2070 default:
2071 gcc_unreachable ();
2076 static void
2077 aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
2078 unsigned start, unsigned limit, bool skip_wb)
2080 rtx insn;
2081 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2082 ? gen_frame_mem : gen_rtx_MEM);
2083 unsigned regno;
2084 unsigned regno2;
2086 for (regno = aarch64_next_callee_save (start, limit);
2087 regno <= limit;
2088 regno = aarch64_next_callee_save (regno + 1, limit))
2090 rtx reg, mem;
2091 HOST_WIDE_INT offset;
2093 if (skip_wb
2094 && (regno == cfun->machine->frame.wb_candidate1
2095 || regno == cfun->machine->frame.wb_candidate2))
2096 continue;
2098 reg = gen_rtx_REG (mode, regno);
2099 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2100 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2101 offset));
2103 regno2 = aarch64_next_callee_save (regno + 1, limit);
2105 if (regno2 <= limit
2106 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2107 == cfun->machine->frame.reg_offset[regno2]))
2110 rtx reg2 = gen_rtx_REG (mode, regno2);
2111 rtx mem2;
2113 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2114 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2115 offset));
2116 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2117 reg2));
2119 /* The first part of a frame-related parallel insn is
2120 always assumed to be relevant to the frame
2121 calculations; subsequent parts, are only
2122 frame-related if explicitly marked. */
2123 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2124 regno = regno2;
2126 else
2127 insn = emit_move_insn (mem, reg);
2129 RTX_FRAME_RELATED_P (insn) = 1;
2133 static void
2134 aarch64_restore_callee_saves (enum machine_mode mode,
2135 HOST_WIDE_INT start_offset, unsigned start,
2136 unsigned limit, bool skip_wb)
2138 rtx insn;
2139 rtx base_rtx = stack_pointer_rtx;
2140 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2141 ? gen_frame_mem : gen_rtx_MEM);
2142 unsigned regno;
2143 unsigned regno2;
2144 HOST_WIDE_INT offset;
2146 for (regno = aarch64_next_callee_save (start, limit);
2147 regno <= limit;
2148 regno = aarch64_next_callee_save (regno + 1, limit))
2150 rtx reg, mem;
2152 if (skip_wb
2153 && (regno == cfun->machine->frame.wb_candidate1
2154 || regno == cfun->machine->frame.wb_candidate2))
2155 continue;
2157 reg = gen_rtx_REG (mode, regno);
2158 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2159 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2161 regno2 = aarch64_next_callee_save (regno + 1, limit);
2163 if (regno2 <= limit
2164 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2165 == cfun->machine->frame.reg_offset[regno2]))
2167 rtx reg2 = gen_rtx_REG (mode, regno2);
2168 rtx mem2;
2170 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2171 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2172 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2,
2173 mem2));
2174 add_reg_note (insn, REG_CFA_RESTORE, reg);
2175 add_reg_note (insn, REG_CFA_RESTORE, reg2);
2177 /* The first part of a frame-related parallel insn is
2178 always assumed to be relevant to the frame
2179 calculations; subsequent parts, are only
2180 frame-related if explicitly marked. */
2181 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2182 regno = regno2;
2184 else
2186 insn = emit_move_insn (reg, mem);
2187 add_reg_note (insn, REG_CFA_RESTORE, reg);
2190 RTX_FRAME_RELATED_P (insn) = 1;
2194 /* AArch64 stack frames generated by this compiler look like:
2196 +-------------------------------+
2198 | incoming stack arguments |
2200 +-------------------------------+
2201 | | <-- incoming stack pointer (aligned)
2202 | callee-allocated save area |
2203 | for register varargs |
2205 +-------------------------------+
2206 | local variables | <-- frame_pointer_rtx
2208 +-------------------------------+
2209 | padding0 | \
2210 +-------------------------------+ |
2211 | callee-saved registers | | frame.saved_regs_size
2212 +-------------------------------+ |
2213 | LR' | |
2214 +-------------------------------+ |
2215 | FP' | / <- hard_frame_pointer_rtx (aligned)
2216 +-------------------------------+
2217 | dynamic allocation |
2218 +-------------------------------+
2219 | padding |
2220 +-------------------------------+
2221 | outgoing stack arguments | <-- arg_pointer
2223 +-------------------------------+
2224 | | <-- stack_pointer_rtx (aligned)
2226 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2227 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2228 unchanged. */
2230 /* Generate the prologue instructions for entry into a function.
2231 Establish the stack frame by decreasing the stack pointer with a
2232 properly calculated size and, if necessary, create a frame record
2233 filled with the values of LR and previous frame pointer. The
2234 current FP is also set up if it is in use. */
2236 void
2237 aarch64_expand_prologue (void)
2239 /* sub sp, sp, #<frame_size>
2240 stp {fp, lr}, [sp, #<frame_size> - 16]
2241 add fp, sp, #<frame_size> - hardfp_offset
2242 stp {cs_reg}, [fp, #-16] etc.
2244 sub sp, sp, <final_adjustment_if_any>
2246 HOST_WIDE_INT frame_size, offset;
2247 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
2248 rtx insn;
2250 aarch64_layout_frame ();
2252 if (flag_stack_usage_info)
2253 current_function_static_stack_size = cfun->machine->frame.frame_size;
2255 frame_size = cfun->machine->frame.frame_size;
2256 offset = cfun->machine->frame.frame_size;
2258 fp_offset = cfun->machine->frame.frame_size
2259 - cfun->machine->frame.hard_fp_offset;
2261 /* Store pairs and load pairs have a range only -512 to 504. */
2262 if (offset >= 512)
2264 /* When the frame has a large size, an initial decrease is done on
2265 the stack pointer to jump over the callee-allocated save area for
2266 register varargs, the local variable area and/or the callee-saved
2267 register area. This will allow the pre-index write-back
2268 store pair instructions to be used for setting up the stack frame
2269 efficiently. */
2270 offset = cfun->machine->frame.hard_fp_offset;
2271 if (offset >= 512)
2272 offset = cfun->machine->frame.saved_regs_size;
2274 frame_size -= (offset + crtl->outgoing_args_size);
2275 fp_offset = 0;
2277 if (frame_size >= 0x1000000)
2279 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2280 emit_move_insn (op0, GEN_INT (-frame_size));
2281 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2282 aarch64_set_frame_expr (gen_rtx_SET
2283 (Pmode, stack_pointer_rtx,
2284 plus_constant (Pmode,
2285 stack_pointer_rtx,
2286 -frame_size)));
2288 else if (frame_size > 0)
2290 if ((frame_size & 0xfff) != frame_size)
2292 insn = emit_insn (gen_add2_insn
2293 (stack_pointer_rtx,
2294 GEN_INT (-(frame_size
2295 & ~(HOST_WIDE_INT)0xfff))));
2296 RTX_FRAME_RELATED_P (insn) = 1;
2298 if ((frame_size & 0xfff) != 0)
2300 insn = emit_insn (gen_add2_insn
2301 (stack_pointer_rtx,
2302 GEN_INT (-(frame_size
2303 & (HOST_WIDE_INT)0xfff))));
2304 RTX_FRAME_RELATED_P (insn) = 1;
2308 else
2309 frame_size = -1;
2311 if (offset > 0)
2313 bool skip_wb = false;
2315 if (frame_pointer_needed)
2317 skip_wb = true;
2319 if (fp_offset)
2321 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2322 GEN_INT (-offset)));
2323 RTX_FRAME_RELATED_P (insn) = 1;
2324 aarch64_set_frame_expr (gen_rtx_SET
2325 (Pmode, stack_pointer_rtx,
2326 gen_rtx_MINUS (Pmode, stack_pointer_rtx,
2327 GEN_INT (offset))));
2329 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2330 R30_REGNUM, false);
2332 else
2333 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2335 /* Set up frame pointer to point to the location of the
2336 previous frame pointer on the stack. */
2337 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2338 stack_pointer_rtx,
2339 GEN_INT (fp_offset)));
2340 aarch64_set_frame_expr (gen_rtx_SET
2341 (Pmode, hard_frame_pointer_rtx,
2342 plus_constant (Pmode,
2343 stack_pointer_rtx,
2344 fp_offset)));
2345 RTX_FRAME_RELATED_P (insn) = 1;
2346 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2347 hard_frame_pointer_rtx));
2349 else
2351 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2352 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2354 if (fp_offset
2355 || reg1 == FIRST_PSEUDO_REGISTER
2356 || (reg2 == FIRST_PSEUDO_REGISTER
2357 && offset >= 256))
2359 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2360 GEN_INT (-offset)));
2361 RTX_FRAME_RELATED_P (insn) = 1;
2363 else
2365 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2367 skip_wb = true;
2369 if (reg2 == FIRST_PSEUDO_REGISTER)
2370 aarch64_pushwb_single_reg (mode1, reg1, offset);
2371 else
2372 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2376 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2377 skip_wb);
2378 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2379 skip_wb);
2382 /* when offset >= 512,
2383 sub sp, sp, #<outgoing_args_size> */
2384 if (frame_size > -1)
2386 if (crtl->outgoing_args_size > 0)
2388 insn = emit_insn (gen_add2_insn
2389 (stack_pointer_rtx,
2390 GEN_INT (- crtl->outgoing_args_size)));
2391 RTX_FRAME_RELATED_P (insn) = 1;
2396 /* Generate the epilogue instructions for returning from a function. */
2397 void
2398 aarch64_expand_epilogue (bool for_sibcall)
2400 HOST_WIDE_INT frame_size, offset;
2401 HOST_WIDE_INT fp_offset;
2402 rtx insn;
2403 rtx cfa_reg;
2405 aarch64_layout_frame ();
2407 offset = frame_size = cfun->machine->frame.frame_size;
2408 fp_offset = cfun->machine->frame.frame_size
2409 - cfun->machine->frame.hard_fp_offset;
2411 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2413 /* Store pairs and load pairs have a range only -512 to 504. */
2414 if (offset >= 512)
2416 offset = cfun->machine->frame.hard_fp_offset;
2417 if (offset >= 512)
2418 offset = cfun->machine->frame.saved_regs_size;
2420 frame_size -= (offset + crtl->outgoing_args_size);
2421 fp_offset = 0;
2422 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2424 insn = emit_insn (gen_add2_insn
2425 (stack_pointer_rtx,
2426 GEN_INT (crtl->outgoing_args_size)));
2427 RTX_FRAME_RELATED_P (insn) = 1;
2430 else
2431 frame_size = -1;
2433 /* If there were outgoing arguments or we've done dynamic stack
2434 allocation, then restore the stack pointer from the frame
2435 pointer. This is at most one insn and more efficient than using
2436 GCC's internal mechanism. */
2437 if (frame_pointer_needed
2438 && (crtl->outgoing_args_size || cfun->calls_alloca))
2440 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2441 hard_frame_pointer_rtx,
2442 GEN_INT (0)));
2443 offset = offset - fp_offset;
2444 RTX_FRAME_RELATED_P (insn) = 1;
2445 /* As SP is set to (FP - fp_offset), according to the rules in
2446 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2447 from the value of SP from now on. */
2448 cfa_reg = stack_pointer_rtx;
2451 if (offset > 0)
2453 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2454 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2455 bool skip_wb = true;
2457 if (frame_pointer_needed)
2458 fp_offset = 0;
2459 else if (fp_offset
2460 || reg1 == FIRST_PSEUDO_REGISTER
2461 || (reg2 == FIRST_PSEUDO_REGISTER
2462 && offset >= 256))
2463 skip_wb = false;
2465 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2466 skip_wb);
2467 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2468 skip_wb);
2470 if (skip_wb)
2472 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2474 if (reg2 == FIRST_PSEUDO_REGISTER)
2475 aarch64_popwb_single_reg (mode1, reg1, offset);
2476 else
2478 if (reg1 != HARD_FRAME_POINTER_REGNUM)
2479 cfa_reg = NULL;
2481 aarch64_popwb_pair_reg (mode1, reg1, reg2, offset, cfa_reg);
2484 else
2486 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2487 GEN_INT (offset)));
2488 RTX_FRAME_RELATED_P (insn) = 1;
2492 /* Stack adjustment for exception handler. */
2493 if (crtl->calls_eh_return)
2495 /* We need to unwind the stack by the offset computed by
2496 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2497 based on SP. Ideally we would update the SP and define the
2498 CFA along the lines of:
2500 SP = SP + EH_RETURN_STACKADJ_RTX
2501 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2503 However the dwarf emitter only understands a constant
2504 register offset.
2506 The solution chosen here is to use the otherwise unused IP0
2507 as a temporary register to hold the current SP value. The
2508 CFA is described using IP0 then SP is modified. */
2510 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2512 insn = emit_move_insn (ip0, stack_pointer_rtx);
2513 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2514 RTX_FRAME_RELATED_P (insn) = 1;
2516 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2518 /* Ensure the assignment to IP0 does not get optimized away. */
2519 emit_use (ip0);
2522 if (frame_size > -1)
2524 if (frame_size >= 0x1000000)
2526 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2527 emit_move_insn (op0, GEN_INT (frame_size));
2528 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2529 aarch64_set_frame_expr (gen_rtx_SET
2530 (Pmode, stack_pointer_rtx,
2531 plus_constant (Pmode,
2532 stack_pointer_rtx,
2533 frame_size)));
2535 else if (frame_size > 0)
2537 if ((frame_size & 0xfff) != 0)
2539 insn = emit_insn (gen_add2_insn
2540 (stack_pointer_rtx,
2541 GEN_INT ((frame_size
2542 & (HOST_WIDE_INT) 0xfff))));
2543 RTX_FRAME_RELATED_P (insn) = 1;
2545 if ((frame_size & 0xfff) != frame_size)
2547 insn = emit_insn (gen_add2_insn
2548 (stack_pointer_rtx,
2549 GEN_INT ((frame_size
2550 & ~ (HOST_WIDE_INT) 0xfff))));
2551 RTX_FRAME_RELATED_P (insn) = 1;
2555 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2556 plus_constant (Pmode,
2557 stack_pointer_rtx,
2558 offset)));
2561 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2562 if (!for_sibcall)
2563 emit_jump_insn (ret_rtx);
2566 /* Return the place to copy the exception unwinding return address to.
2567 This will probably be a stack slot, but could (in theory be the
2568 return register). */
2570 aarch64_final_eh_return_addr (void)
2572 HOST_WIDE_INT fp_offset;
2574 aarch64_layout_frame ();
2576 fp_offset = cfun->machine->frame.frame_size
2577 - cfun->machine->frame.hard_fp_offset;
2579 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2580 return gen_rtx_REG (DImode, LR_REGNUM);
2582 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2583 result in a store to save LR introduced by builtin_eh_return () being
2584 incorrectly deleted because the alias is not detected.
2585 So in the calculation of the address to copy the exception unwinding
2586 return address to, we note 2 cases.
2587 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2588 we return a SP-relative location since all the addresses are SP-relative
2589 in this case. This prevents the store from being optimized away.
2590 If the fp_offset is not 0, then the addresses will be FP-relative and
2591 therefore we return a FP-relative location. */
2593 if (frame_pointer_needed)
2595 if (fp_offset)
2596 return gen_frame_mem (DImode,
2597 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2598 else
2599 return gen_frame_mem (DImode,
2600 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2603 /* If FP is not needed, we calculate the location of LR, which would be
2604 at the top of the saved registers block. */
2606 return gen_frame_mem (DImode,
2607 plus_constant (Pmode,
2608 stack_pointer_rtx,
2609 fp_offset
2610 + cfun->machine->frame.saved_regs_size
2611 - 2 * UNITS_PER_WORD));
2614 /* Possibly output code to build up a constant in a register. For
2615 the benefit of the costs infrastructure, returns the number of
2616 instructions which would be emitted. GENERATE inhibits or
2617 enables code generation. */
2619 static int
2620 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2622 int insns = 0;
2624 if (aarch64_bitmask_imm (val, DImode))
2626 if (generate)
2627 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2628 insns = 1;
2630 else
2632 int i;
2633 int ncount = 0;
2634 int zcount = 0;
2635 HOST_WIDE_INT valp = val >> 16;
2636 HOST_WIDE_INT valm;
2637 HOST_WIDE_INT tval;
2639 for (i = 16; i < 64; i += 16)
2641 valm = (valp & 0xffff);
2643 if (valm != 0)
2644 ++ zcount;
2646 if (valm != 0xffff)
2647 ++ ncount;
2649 valp >>= 16;
2652 /* zcount contains the number of additional MOVK instructions
2653 required if the constant is built up with an initial MOVZ instruction,
2654 while ncount is the number of MOVK instructions required if starting
2655 with a MOVN instruction. Choose the sequence that yields the fewest
2656 number of instructions, preferring MOVZ instructions when they are both
2657 the same. */
2658 if (ncount < zcount)
2660 if (generate)
2661 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2662 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2663 tval = 0xffff;
2664 insns++;
2666 else
2668 if (generate)
2669 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2670 GEN_INT (val & 0xffff));
2671 tval = 0;
2672 insns++;
2675 val >>= 16;
2677 for (i = 16; i < 64; i += 16)
2679 if ((val & 0xffff) != tval)
2681 if (generate)
2682 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2683 GEN_INT (i),
2684 GEN_INT (val & 0xffff)));
2685 insns++;
2687 val >>= 16;
2690 return insns;
2693 static void
2694 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2696 HOST_WIDE_INT mdelta = delta;
2697 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2698 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2700 if (mdelta < 0)
2701 mdelta = -mdelta;
2703 if (mdelta >= 4096 * 4096)
2705 (void) aarch64_build_constant (scratchreg, delta, true);
2706 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2708 else if (mdelta > 0)
2710 if (mdelta >= 4096)
2712 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2713 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2714 if (delta < 0)
2715 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2716 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2717 else
2718 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2719 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2721 if (mdelta % 4096 != 0)
2723 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2724 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2725 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2730 /* Output code to add DELTA to the first argument, and then jump
2731 to FUNCTION. Used for C++ multiple inheritance. */
2732 static void
2733 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2734 HOST_WIDE_INT delta,
2735 HOST_WIDE_INT vcall_offset,
2736 tree function)
2738 /* The this pointer is always in x0. Note that this differs from
2739 Arm where the this pointer maybe bumped to r1 if r0 is required
2740 to return a pointer to an aggregate. On AArch64 a result value
2741 pointer will be in x8. */
2742 int this_regno = R0_REGNUM;
2743 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2745 reload_completed = 1;
2746 emit_note (NOTE_INSN_PROLOGUE_END);
2748 if (vcall_offset == 0)
2749 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2750 else
2752 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2754 this_rtx = gen_rtx_REG (Pmode, this_regno);
2755 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2756 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2758 addr = this_rtx;
2759 if (delta != 0)
2761 if (delta >= -256 && delta < 256)
2762 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2763 plus_constant (Pmode, this_rtx, delta));
2764 else
2765 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2768 if (Pmode == ptr_mode)
2769 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2770 else
2771 aarch64_emit_move (temp0,
2772 gen_rtx_ZERO_EXTEND (Pmode,
2773 gen_rtx_MEM (ptr_mode, addr)));
2775 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2776 addr = plus_constant (Pmode, temp0, vcall_offset);
2777 else
2779 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2780 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2783 if (Pmode == ptr_mode)
2784 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2785 else
2786 aarch64_emit_move (temp1,
2787 gen_rtx_SIGN_EXTEND (Pmode,
2788 gen_rtx_MEM (ptr_mode, addr)));
2790 emit_insn (gen_add2_insn (this_rtx, temp1));
2793 /* Generate a tail call to the target function. */
2794 if (!TREE_USED (function))
2796 assemble_external (function);
2797 TREE_USED (function) = 1;
2799 funexp = XEXP (DECL_RTL (function), 0);
2800 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2801 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2802 SIBLING_CALL_P (insn) = 1;
2804 insn = get_insns ();
2805 shorten_branches (insn);
2806 final_start_function (insn, file, 1);
2807 final (insn, file, 1);
2808 final_end_function ();
2810 /* Stop pretending to be a post-reload pass. */
2811 reload_completed = 0;
2814 static int
2815 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2817 if (GET_CODE (*x) == SYMBOL_REF)
2818 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2820 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2821 TLS offsets, not real symbol references. */
2822 if (GET_CODE (*x) == UNSPEC
2823 && XINT (*x, 1) == UNSPEC_TLS)
2824 return -1;
2826 return 0;
2829 static bool
2830 aarch64_tls_referenced_p (rtx x)
2832 if (!TARGET_HAVE_TLS)
2833 return false;
2835 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2839 static int
2840 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2842 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2843 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2845 if (*imm1 < *imm2)
2846 return -1;
2847 if (*imm1 > *imm2)
2848 return +1;
2849 return 0;
2853 static void
2854 aarch64_build_bitmask_table (void)
2856 unsigned HOST_WIDE_INT mask, imm;
2857 unsigned int log_e, e, s, r;
2858 unsigned int nimms = 0;
2860 for (log_e = 1; log_e <= 6; log_e++)
2862 e = 1 << log_e;
2863 if (e == 64)
2864 mask = ~(HOST_WIDE_INT) 0;
2865 else
2866 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2867 for (s = 1; s < e; s++)
2869 for (r = 0; r < e; r++)
2871 /* set s consecutive bits to 1 (s < 64) */
2872 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2873 /* rotate right by r */
2874 if (r != 0)
2875 imm = ((imm >> r) | (imm << (e - r))) & mask;
2876 /* replicate the constant depending on SIMD size */
2877 switch (log_e) {
2878 case 1: imm |= (imm << 2);
2879 case 2: imm |= (imm << 4);
2880 case 3: imm |= (imm << 8);
2881 case 4: imm |= (imm << 16);
2882 case 5: imm |= (imm << 32);
2883 case 6:
2884 break;
2885 default:
2886 gcc_unreachable ();
2888 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2889 aarch64_bitmasks[nimms++] = imm;
2894 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2895 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2896 aarch64_bitmasks_cmp);
2900 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2901 a left shift of 0 or 12 bits. */
2902 bool
2903 aarch64_uimm12_shift (HOST_WIDE_INT val)
2905 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2906 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2911 /* Return true if val is an immediate that can be loaded into a
2912 register by a MOVZ instruction. */
2913 static bool
2914 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2916 if (GET_MODE_SIZE (mode) > 4)
2918 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2919 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2920 return 1;
2922 else
2924 /* Ignore sign extension. */
2925 val &= (HOST_WIDE_INT) 0xffffffff;
2927 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2928 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2932 /* Return true if val is a valid bitmask immediate. */
2933 bool
2934 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2936 if (GET_MODE_SIZE (mode) < 8)
2938 /* Replicate bit pattern. */
2939 val &= (HOST_WIDE_INT) 0xffffffff;
2940 val |= val << 32;
2942 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2943 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2947 /* Return true if val is an immediate that can be loaded into a
2948 register in a single instruction. */
2949 bool
2950 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2952 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2953 return 1;
2954 return aarch64_bitmask_imm (val, mode);
2957 static bool
2958 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2960 rtx base, offset;
2962 if (GET_CODE (x) == HIGH)
2963 return true;
2965 split_const (x, &base, &offset);
2966 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2968 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2969 != SYMBOL_FORCE_TO_MEM)
2970 return true;
2971 else
2972 /* Avoid generating a 64-bit relocation in ILP32; leave
2973 to aarch64_expand_mov_immediate to handle it properly. */
2974 return mode != ptr_mode;
2977 return aarch64_tls_referenced_p (x);
2980 /* Return true if register REGNO is a valid index register.
2981 STRICT_P is true if REG_OK_STRICT is in effect. */
2983 bool
2984 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2986 if (!HARD_REGISTER_NUM_P (regno))
2988 if (!strict_p)
2989 return true;
2991 if (!reg_renumber)
2992 return false;
2994 regno = reg_renumber[regno];
2996 return GP_REGNUM_P (regno);
2999 /* Return true if register REGNO is a valid base register for mode MODE.
3000 STRICT_P is true if REG_OK_STRICT is in effect. */
3002 bool
3003 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3005 if (!HARD_REGISTER_NUM_P (regno))
3007 if (!strict_p)
3008 return true;
3010 if (!reg_renumber)
3011 return false;
3013 regno = reg_renumber[regno];
3016 /* The fake registers will be eliminated to either the stack or
3017 hard frame pointer, both of which are usually valid base registers.
3018 Reload deals with the cases where the eliminated form isn't valid. */
3019 return (GP_REGNUM_P (regno)
3020 || regno == SP_REGNUM
3021 || regno == FRAME_POINTER_REGNUM
3022 || regno == ARG_POINTER_REGNUM);
3025 /* Return true if X is a valid base register for mode MODE.
3026 STRICT_P is true if REG_OK_STRICT is in effect. */
3028 static bool
3029 aarch64_base_register_rtx_p (rtx x, bool strict_p)
3031 if (!strict_p && GET_CODE (x) == SUBREG)
3032 x = SUBREG_REG (x);
3034 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3037 /* Return true if address offset is a valid index. If it is, fill in INFO
3038 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3040 static bool
3041 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3042 enum machine_mode mode, bool strict_p)
3044 enum aarch64_address_type type;
3045 rtx index;
3046 int shift;
3048 /* (reg:P) */
3049 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3050 && GET_MODE (x) == Pmode)
3052 type = ADDRESS_REG_REG;
3053 index = x;
3054 shift = 0;
3056 /* (sign_extend:DI (reg:SI)) */
3057 else if ((GET_CODE (x) == SIGN_EXTEND
3058 || GET_CODE (x) == ZERO_EXTEND)
3059 && GET_MODE (x) == DImode
3060 && GET_MODE (XEXP (x, 0)) == SImode)
3062 type = (GET_CODE (x) == SIGN_EXTEND)
3063 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3064 index = XEXP (x, 0);
3065 shift = 0;
3067 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3068 else if (GET_CODE (x) == MULT
3069 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3070 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3071 && GET_MODE (XEXP (x, 0)) == DImode
3072 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3073 && CONST_INT_P (XEXP (x, 1)))
3075 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3076 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3077 index = XEXP (XEXP (x, 0), 0);
3078 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3080 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3081 else if (GET_CODE (x) == ASHIFT
3082 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3083 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3084 && GET_MODE (XEXP (x, 0)) == DImode
3085 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3086 && CONST_INT_P (XEXP (x, 1)))
3088 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3089 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3090 index = XEXP (XEXP (x, 0), 0);
3091 shift = INTVAL (XEXP (x, 1));
3093 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3094 else if ((GET_CODE (x) == SIGN_EXTRACT
3095 || GET_CODE (x) == ZERO_EXTRACT)
3096 && GET_MODE (x) == DImode
3097 && GET_CODE (XEXP (x, 0)) == MULT
3098 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3099 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3101 type = (GET_CODE (x) == SIGN_EXTRACT)
3102 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3103 index = XEXP (XEXP (x, 0), 0);
3104 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3105 if (INTVAL (XEXP (x, 1)) != 32 + shift
3106 || INTVAL (XEXP (x, 2)) != 0)
3107 shift = -1;
3109 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3110 (const_int 0xffffffff<<shift)) */
3111 else if (GET_CODE (x) == AND
3112 && GET_MODE (x) == DImode
3113 && GET_CODE (XEXP (x, 0)) == MULT
3114 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3115 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3116 && CONST_INT_P (XEXP (x, 1)))
3118 type = ADDRESS_REG_UXTW;
3119 index = XEXP (XEXP (x, 0), 0);
3120 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3121 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3122 shift = -1;
3124 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3125 else if ((GET_CODE (x) == SIGN_EXTRACT
3126 || GET_CODE (x) == ZERO_EXTRACT)
3127 && GET_MODE (x) == DImode
3128 && GET_CODE (XEXP (x, 0)) == ASHIFT
3129 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3130 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3132 type = (GET_CODE (x) == SIGN_EXTRACT)
3133 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3134 index = XEXP (XEXP (x, 0), 0);
3135 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3136 if (INTVAL (XEXP (x, 1)) != 32 + shift
3137 || INTVAL (XEXP (x, 2)) != 0)
3138 shift = -1;
3140 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3141 (const_int 0xffffffff<<shift)) */
3142 else if (GET_CODE (x) == AND
3143 && GET_MODE (x) == DImode
3144 && GET_CODE (XEXP (x, 0)) == ASHIFT
3145 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3146 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3147 && CONST_INT_P (XEXP (x, 1)))
3149 type = ADDRESS_REG_UXTW;
3150 index = XEXP (XEXP (x, 0), 0);
3151 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3152 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3153 shift = -1;
3155 /* (mult:P (reg:P) (const_int scale)) */
3156 else if (GET_CODE (x) == MULT
3157 && GET_MODE (x) == Pmode
3158 && GET_MODE (XEXP (x, 0)) == Pmode
3159 && CONST_INT_P (XEXP (x, 1)))
3161 type = ADDRESS_REG_REG;
3162 index = XEXP (x, 0);
3163 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3165 /* (ashift:P (reg:P) (const_int shift)) */
3166 else if (GET_CODE (x) == ASHIFT
3167 && GET_MODE (x) == Pmode
3168 && GET_MODE (XEXP (x, 0)) == Pmode
3169 && CONST_INT_P (XEXP (x, 1)))
3171 type = ADDRESS_REG_REG;
3172 index = XEXP (x, 0);
3173 shift = INTVAL (XEXP (x, 1));
3175 else
3176 return false;
3178 if (GET_CODE (index) == SUBREG)
3179 index = SUBREG_REG (index);
3181 if ((shift == 0 ||
3182 (shift > 0 && shift <= 3
3183 && (1 << shift) == GET_MODE_SIZE (mode)))
3184 && REG_P (index)
3185 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3187 info->type = type;
3188 info->offset = index;
3189 info->shift = shift;
3190 return true;
3193 return false;
3196 bool
3197 aarch64_offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3199 return (offset >= -64 * GET_MODE_SIZE (mode)
3200 && offset < 64 * GET_MODE_SIZE (mode)
3201 && offset % GET_MODE_SIZE (mode) == 0);
3204 static inline bool
3205 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3206 HOST_WIDE_INT offset)
3208 return offset >= -256 && offset < 256;
3211 static inline bool
3212 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3214 return (offset >= 0
3215 && offset < 4096 * GET_MODE_SIZE (mode)
3216 && offset % GET_MODE_SIZE (mode) == 0);
3219 /* Return true if X is a valid address for machine mode MODE. If it is,
3220 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3221 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3223 static bool
3224 aarch64_classify_address (struct aarch64_address_info *info,
3225 rtx x, enum machine_mode mode,
3226 RTX_CODE outer_code, bool strict_p)
3228 enum rtx_code code = GET_CODE (x);
3229 rtx op0, op1;
3230 bool allow_reg_index_p =
3231 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3232 || aarch64_vector_mode_supported_p (mode));
3233 /* Don't support anything other than POST_INC or REG addressing for
3234 AdvSIMD. */
3235 if (aarch64_vect_struct_mode_p (mode)
3236 && (code != POST_INC && code != REG))
3237 return false;
3239 switch (code)
3241 case REG:
3242 case SUBREG:
3243 info->type = ADDRESS_REG_IMM;
3244 info->base = x;
3245 info->offset = const0_rtx;
3246 return aarch64_base_register_rtx_p (x, strict_p);
3248 case PLUS:
3249 op0 = XEXP (x, 0);
3250 op1 = XEXP (x, 1);
3252 if (! strict_p
3253 && REG_P (op0)
3254 && (op0 == virtual_stack_vars_rtx
3255 || op0 == frame_pointer_rtx
3256 || op0 == arg_pointer_rtx)
3257 && CONST_INT_P (op1))
3259 info->type = ADDRESS_REG_IMM;
3260 info->base = op0;
3261 info->offset = op1;
3263 return true;
3266 if (GET_MODE_SIZE (mode) != 0
3267 && CONST_INT_P (op1)
3268 && aarch64_base_register_rtx_p (op0, strict_p))
3270 HOST_WIDE_INT offset = INTVAL (op1);
3272 info->type = ADDRESS_REG_IMM;
3273 info->base = op0;
3274 info->offset = op1;
3276 /* TImode and TFmode values are allowed in both pairs of X
3277 registers and individual Q registers. The available
3278 address modes are:
3279 X,X: 7-bit signed scaled offset
3280 Q: 9-bit signed offset
3281 We conservatively require an offset representable in either mode.
3283 if (mode == TImode || mode == TFmode)
3284 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3285 && offset_9bit_signed_unscaled_p (mode, offset));
3287 if (outer_code == PARALLEL)
3288 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3289 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3290 else
3291 return (offset_9bit_signed_unscaled_p (mode, offset)
3292 || offset_12bit_unsigned_scaled_p (mode, offset));
3295 if (allow_reg_index_p)
3297 /* Look for base + (scaled/extended) index register. */
3298 if (aarch64_base_register_rtx_p (op0, strict_p)
3299 && aarch64_classify_index (info, op1, mode, strict_p))
3301 info->base = op0;
3302 return true;
3304 if (aarch64_base_register_rtx_p (op1, strict_p)
3305 && aarch64_classify_index (info, op0, mode, strict_p))
3307 info->base = op1;
3308 return true;
3312 return false;
3314 case POST_INC:
3315 case POST_DEC:
3316 case PRE_INC:
3317 case PRE_DEC:
3318 info->type = ADDRESS_REG_WB;
3319 info->base = XEXP (x, 0);
3320 info->offset = NULL_RTX;
3321 return aarch64_base_register_rtx_p (info->base, strict_p);
3323 case POST_MODIFY:
3324 case PRE_MODIFY:
3325 info->type = ADDRESS_REG_WB;
3326 info->base = XEXP (x, 0);
3327 if (GET_CODE (XEXP (x, 1)) == PLUS
3328 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3329 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3330 && aarch64_base_register_rtx_p (info->base, strict_p))
3332 HOST_WIDE_INT offset;
3333 info->offset = XEXP (XEXP (x, 1), 1);
3334 offset = INTVAL (info->offset);
3336 /* TImode and TFmode values are allowed in both pairs of X
3337 registers and individual Q registers. The available
3338 address modes are:
3339 X,X: 7-bit signed scaled offset
3340 Q: 9-bit signed offset
3341 We conservatively require an offset representable in either mode.
3343 if (mode == TImode || mode == TFmode)
3344 return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
3345 && offset_9bit_signed_unscaled_p (mode, offset));
3347 if (outer_code == PARALLEL)
3348 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3349 && aarch64_offset_7bit_signed_scaled_p (mode, offset));
3350 else
3351 return offset_9bit_signed_unscaled_p (mode, offset);
3353 return false;
3355 case CONST:
3356 case SYMBOL_REF:
3357 case LABEL_REF:
3358 /* load literal: pc-relative constant pool entry. Only supported
3359 for SI mode or larger. */
3360 info->type = ADDRESS_SYMBOLIC;
3361 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3363 rtx sym, addend;
3365 split_const (x, &sym, &addend);
3366 return (GET_CODE (sym) == LABEL_REF
3367 || (GET_CODE (sym) == SYMBOL_REF
3368 && CONSTANT_POOL_ADDRESS_P (sym)));
3370 return false;
3372 case LO_SUM:
3373 info->type = ADDRESS_LO_SUM;
3374 info->base = XEXP (x, 0);
3375 info->offset = XEXP (x, 1);
3376 if (allow_reg_index_p
3377 && aarch64_base_register_rtx_p (info->base, strict_p))
3379 rtx sym, offs;
3380 split_const (info->offset, &sym, &offs);
3381 if (GET_CODE (sym) == SYMBOL_REF
3382 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3383 == SYMBOL_SMALL_ABSOLUTE))
3385 /* The symbol and offset must be aligned to the access size. */
3386 unsigned int align;
3387 unsigned int ref_size;
3389 if (CONSTANT_POOL_ADDRESS_P (sym))
3390 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3391 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3393 tree exp = SYMBOL_REF_DECL (sym);
3394 align = TYPE_ALIGN (TREE_TYPE (exp));
3395 align = CONSTANT_ALIGNMENT (exp, align);
3397 else if (SYMBOL_REF_DECL (sym))
3398 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3399 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3400 && SYMBOL_REF_BLOCK (sym) != NULL)
3401 align = SYMBOL_REF_BLOCK (sym)->alignment;
3402 else
3403 align = BITS_PER_UNIT;
3405 ref_size = GET_MODE_SIZE (mode);
3406 if (ref_size == 0)
3407 ref_size = GET_MODE_SIZE (DImode);
3409 return ((INTVAL (offs) & (ref_size - 1)) == 0
3410 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3413 return false;
3415 default:
3416 return false;
3420 bool
3421 aarch64_symbolic_address_p (rtx x)
3423 rtx offset;
3425 split_const (x, &x, &offset);
3426 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3429 /* Classify the base of symbolic expression X, given that X appears in
3430 context CONTEXT. */
3432 enum aarch64_symbol_type
3433 aarch64_classify_symbolic_expression (rtx x,
3434 enum aarch64_symbol_context context)
3436 rtx offset;
3438 split_const (x, &x, &offset);
3439 return aarch64_classify_symbol (x, context);
3443 /* Return TRUE if X is a legitimate address for accessing memory in
3444 mode MODE. */
3445 static bool
3446 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3448 struct aarch64_address_info addr;
3450 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3453 /* Return TRUE if X is a legitimate address for accessing memory in
3454 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3455 pair operation. */
3456 bool
3457 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3458 RTX_CODE outer_code, bool strict_p)
3460 struct aarch64_address_info addr;
3462 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3465 /* Return TRUE if rtx X is immediate constant 0.0 */
3466 bool
3467 aarch64_float_const_zero_rtx_p (rtx x)
3469 REAL_VALUE_TYPE r;
3471 if (GET_MODE (x) == VOIDmode)
3472 return false;
3474 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3475 if (REAL_VALUE_MINUS_ZERO (r))
3476 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3477 return REAL_VALUES_EQUAL (r, dconst0);
3480 /* Return the fixed registers used for condition codes. */
3482 static bool
3483 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3485 *p1 = CC_REGNUM;
3486 *p2 = INVALID_REGNUM;
3487 return true;
3490 /* Emit call insn with PAT and do aarch64-specific handling. */
3492 void
3493 aarch64_emit_call_insn (rtx pat)
3495 rtx insn = emit_call_insn (pat);
3497 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
3498 clobber_reg (fusage, gen_rtx_REG (word_mode, IP0_REGNUM));
3499 clobber_reg (fusage, gen_rtx_REG (word_mode, IP1_REGNUM));
3502 enum machine_mode
3503 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3505 /* All floating point compares return CCFP if it is an equality
3506 comparison, and CCFPE otherwise. */
3507 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3509 switch (code)
3511 case EQ:
3512 case NE:
3513 case UNORDERED:
3514 case ORDERED:
3515 case UNLT:
3516 case UNLE:
3517 case UNGT:
3518 case UNGE:
3519 case UNEQ:
3520 case LTGT:
3521 return CCFPmode;
3523 case LT:
3524 case LE:
3525 case GT:
3526 case GE:
3527 return CCFPEmode;
3529 default:
3530 gcc_unreachable ();
3534 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3535 && y == const0_rtx
3536 && (code == EQ || code == NE || code == LT || code == GE)
3537 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3538 || GET_CODE (x) == NEG))
3539 return CC_NZmode;
3541 /* A compare with a shifted operand. Because of canonicalization,
3542 the comparison will have to be swapped when we emit the assembly
3543 code. */
3544 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3545 && (REG_P (y) || GET_CODE (y) == SUBREG)
3546 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3547 || GET_CODE (x) == LSHIFTRT
3548 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3549 return CC_SWPmode;
3551 /* Similarly for a negated operand, but we can only do this for
3552 equalities. */
3553 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3554 && (REG_P (y) || GET_CODE (y) == SUBREG)
3555 && (code == EQ || code == NE)
3556 && GET_CODE (x) == NEG)
3557 return CC_Zmode;
3559 /* A compare of a mode narrower than SI mode against zero can be done
3560 by extending the value in the comparison. */
3561 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3562 && y == const0_rtx)
3563 /* Only use sign-extension if we really need it. */
3564 return ((code == GT || code == GE || code == LE || code == LT)
3565 ? CC_SESWPmode : CC_ZESWPmode);
3567 /* For everything else, return CCmode. */
3568 return CCmode;
3571 static unsigned
3572 aarch64_get_condition_code (rtx x)
3574 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3575 enum rtx_code comp_code = GET_CODE (x);
3577 if (GET_MODE_CLASS (mode) != MODE_CC)
3578 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3580 switch (mode)
3582 case CCFPmode:
3583 case CCFPEmode:
3584 switch (comp_code)
3586 case GE: return AARCH64_GE;
3587 case GT: return AARCH64_GT;
3588 case LE: return AARCH64_LS;
3589 case LT: return AARCH64_MI;
3590 case NE: return AARCH64_NE;
3591 case EQ: return AARCH64_EQ;
3592 case ORDERED: return AARCH64_VC;
3593 case UNORDERED: return AARCH64_VS;
3594 case UNLT: return AARCH64_LT;
3595 case UNLE: return AARCH64_LE;
3596 case UNGT: return AARCH64_HI;
3597 case UNGE: return AARCH64_PL;
3598 default: gcc_unreachable ();
3600 break;
3602 case CCmode:
3603 switch (comp_code)
3605 case NE: return AARCH64_NE;
3606 case EQ: return AARCH64_EQ;
3607 case GE: return AARCH64_GE;
3608 case GT: return AARCH64_GT;
3609 case LE: return AARCH64_LE;
3610 case LT: return AARCH64_LT;
3611 case GEU: return AARCH64_CS;
3612 case GTU: return AARCH64_HI;
3613 case LEU: return AARCH64_LS;
3614 case LTU: return AARCH64_CC;
3615 default: gcc_unreachable ();
3617 break;
3619 case CC_SWPmode:
3620 case CC_ZESWPmode:
3621 case CC_SESWPmode:
3622 switch (comp_code)
3624 case NE: return AARCH64_NE;
3625 case EQ: return AARCH64_EQ;
3626 case GE: return AARCH64_LE;
3627 case GT: return AARCH64_LT;
3628 case LE: return AARCH64_GE;
3629 case LT: return AARCH64_GT;
3630 case GEU: return AARCH64_LS;
3631 case GTU: return AARCH64_CC;
3632 case LEU: return AARCH64_CS;
3633 case LTU: return AARCH64_HI;
3634 default: gcc_unreachable ();
3636 break;
3638 case CC_NZmode:
3639 switch (comp_code)
3641 case NE: return AARCH64_NE;
3642 case EQ: return AARCH64_EQ;
3643 case GE: return AARCH64_PL;
3644 case LT: return AARCH64_MI;
3645 default: gcc_unreachable ();
3647 break;
3649 case CC_Zmode:
3650 switch (comp_code)
3652 case NE: return AARCH64_NE;
3653 case EQ: return AARCH64_EQ;
3654 default: gcc_unreachable ();
3656 break;
3658 default:
3659 gcc_unreachable ();
3660 break;
3664 static unsigned
3665 bit_count (unsigned HOST_WIDE_INT value)
3667 unsigned count = 0;
3669 while (value)
3671 count++;
3672 value &= value - 1;
3675 return count;
3678 void
3679 aarch64_print_operand (FILE *f, rtx x, char code)
3681 switch (code)
3683 /* An integer or symbol address without a preceding # sign. */
3684 case 'c':
3685 switch (GET_CODE (x))
3687 case CONST_INT:
3688 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3689 break;
3691 case SYMBOL_REF:
3692 output_addr_const (f, x);
3693 break;
3695 case CONST:
3696 if (GET_CODE (XEXP (x, 0)) == PLUS
3697 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3699 output_addr_const (f, x);
3700 break;
3702 /* Fall through. */
3704 default:
3705 output_operand_lossage ("Unsupported operand for code '%c'", code);
3707 break;
3709 case 'e':
3710 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3712 int n;
3714 if (!CONST_INT_P (x)
3715 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3717 output_operand_lossage ("invalid operand for '%%%c'", code);
3718 return;
3721 switch (n)
3723 case 3:
3724 fputc ('b', f);
3725 break;
3726 case 4:
3727 fputc ('h', f);
3728 break;
3729 case 5:
3730 fputc ('w', f);
3731 break;
3732 default:
3733 output_operand_lossage ("invalid operand for '%%%c'", code);
3734 return;
3737 break;
3739 case 'p':
3741 int n;
3743 /* Print N such that 2^N == X. */
3744 if (!CONST_INT_P (x) || (n = exact_log2 (INTVAL (x))) < 0)
3746 output_operand_lossage ("invalid operand for '%%%c'", code);
3747 return;
3750 asm_fprintf (f, "%d", n);
3752 break;
3754 case 'P':
3755 /* Print the number of non-zero bits in X (a const_int). */
3756 if (!CONST_INT_P (x))
3758 output_operand_lossage ("invalid operand for '%%%c'", code);
3759 return;
3762 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3763 break;
3765 case 'H':
3766 /* Print the higher numbered register of a pair (TImode) of regs. */
3767 if (!REG_P (x) || !GP_REGNUM_P (REGNO (x) + 1))
3769 output_operand_lossage ("invalid operand for '%%%c'", code);
3770 return;
3773 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3774 break;
3776 case 'm':
3777 /* Print a condition (eq, ne, etc). */
3779 /* CONST_TRUE_RTX means always -- that's the default. */
3780 if (x == const_true_rtx)
3781 return;
3783 if (!COMPARISON_P (x))
3785 output_operand_lossage ("invalid operand for '%%%c'", code);
3786 return;
3789 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3790 break;
3792 case 'M':
3793 /* Print the inverse of a condition (eq <-> ne, etc). */
3795 /* CONST_TRUE_RTX means never -- that's the default. */
3796 if (x == const_true_rtx)
3798 fputs ("nv", f);
3799 return;
3802 if (!COMPARISON_P (x))
3804 output_operand_lossage ("invalid operand for '%%%c'", code);
3805 return;
3808 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3809 (aarch64_get_condition_code (x))], f);
3810 break;
3812 case 'b':
3813 case 'h':
3814 case 's':
3815 case 'd':
3816 case 'q':
3817 /* Print a scalar FP/SIMD register name. */
3818 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3820 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3821 return;
3823 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3824 break;
3826 case 'S':
3827 case 'T':
3828 case 'U':
3829 case 'V':
3830 /* Print the first FP/SIMD register name in a list. */
3831 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3833 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3834 return;
3836 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3837 break;
3839 case 'X':
3840 /* Print bottom 16 bits of integer constant in hex. */
3841 if (!CONST_INT_P (x))
3843 output_operand_lossage ("invalid operand for '%%%c'", code);
3844 return;
3846 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3847 break;
3849 case 'w':
3850 case 'x':
3851 /* Print a general register name or the zero register (32-bit or
3852 64-bit). */
3853 if (x == const0_rtx
3854 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3856 asm_fprintf (f, "%czr", code);
3857 break;
3860 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3862 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3863 break;
3866 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3868 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3869 break;
3872 /* Fall through */
3874 case 0:
3875 /* Print a normal operand, if it's a general register, then we
3876 assume DImode. */
3877 if (x == NULL)
3879 output_operand_lossage ("missing operand");
3880 return;
3883 switch (GET_CODE (x))
3885 case REG:
3886 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3887 break;
3889 case MEM:
3890 aarch64_memory_reference_mode = GET_MODE (x);
3891 output_address (XEXP (x, 0));
3892 break;
3894 case LABEL_REF:
3895 case SYMBOL_REF:
3896 output_addr_const (asm_out_file, x);
3897 break;
3899 case CONST_INT:
3900 asm_fprintf (f, "%wd", INTVAL (x));
3901 break;
3903 case CONST_VECTOR:
3904 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3906 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3907 HOST_WIDE_INT_MIN,
3908 HOST_WIDE_INT_MAX));
3909 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3911 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3913 fputc ('0', f);
3915 else
3916 gcc_unreachable ();
3917 break;
3919 case CONST_DOUBLE:
3920 /* CONST_DOUBLE can represent a double-width integer.
3921 In this case, the mode of x is VOIDmode. */
3922 if (GET_MODE (x) == VOIDmode)
3923 ; /* Do Nothing. */
3924 else if (aarch64_float_const_zero_rtx_p (x))
3926 fputc ('0', f);
3927 break;
3929 else if (aarch64_float_const_representable_p (x))
3931 #define buf_size 20
3932 char float_buf[buf_size] = {'\0'};
3933 REAL_VALUE_TYPE r;
3934 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3935 real_to_decimal_for_mode (float_buf, &r,
3936 buf_size, buf_size,
3937 1, GET_MODE (x));
3938 asm_fprintf (asm_out_file, "%s", float_buf);
3939 break;
3940 #undef buf_size
3942 output_operand_lossage ("invalid constant");
3943 return;
3944 default:
3945 output_operand_lossage ("invalid operand");
3946 return;
3948 break;
3950 case 'A':
3951 if (GET_CODE (x) == HIGH)
3952 x = XEXP (x, 0);
3954 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3956 case SYMBOL_SMALL_GOT:
3957 asm_fprintf (asm_out_file, ":got:");
3958 break;
3960 case SYMBOL_SMALL_TLSGD:
3961 asm_fprintf (asm_out_file, ":tlsgd:");
3962 break;
3964 case SYMBOL_SMALL_TLSDESC:
3965 asm_fprintf (asm_out_file, ":tlsdesc:");
3966 break;
3968 case SYMBOL_SMALL_GOTTPREL:
3969 asm_fprintf (asm_out_file, ":gottprel:");
3970 break;
3972 case SYMBOL_SMALL_TPREL:
3973 asm_fprintf (asm_out_file, ":tprel:");
3974 break;
3976 case SYMBOL_TINY_GOT:
3977 gcc_unreachable ();
3978 break;
3980 default:
3981 break;
3983 output_addr_const (asm_out_file, x);
3984 break;
3986 case 'L':
3987 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3989 case SYMBOL_SMALL_GOT:
3990 asm_fprintf (asm_out_file, ":lo12:");
3991 break;
3993 case SYMBOL_SMALL_TLSGD:
3994 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3995 break;
3997 case SYMBOL_SMALL_TLSDESC:
3998 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3999 break;
4001 case SYMBOL_SMALL_GOTTPREL:
4002 asm_fprintf (asm_out_file, ":gottprel_lo12:");
4003 break;
4005 case SYMBOL_SMALL_TPREL:
4006 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
4007 break;
4009 case SYMBOL_TINY_GOT:
4010 asm_fprintf (asm_out_file, ":got:");
4011 break;
4013 default:
4014 break;
4016 output_addr_const (asm_out_file, x);
4017 break;
4019 case 'G':
4021 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
4023 case SYMBOL_SMALL_TPREL:
4024 asm_fprintf (asm_out_file, ":tprel_hi12:");
4025 break;
4026 default:
4027 break;
4029 output_addr_const (asm_out_file, x);
4030 break;
4032 default:
4033 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4034 return;
4038 void
4039 aarch64_print_operand_address (FILE *f, rtx x)
4041 struct aarch64_address_info addr;
4043 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4044 MEM, true))
4045 switch (addr.type)
4047 case ADDRESS_REG_IMM:
4048 if (addr.offset == const0_rtx)
4049 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4050 else
4051 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4052 INTVAL (addr.offset));
4053 return;
4055 case ADDRESS_REG_REG:
4056 if (addr.shift == 0)
4057 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4058 reg_names [REGNO (addr.offset)]);
4059 else
4060 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4061 reg_names [REGNO (addr.offset)], addr.shift);
4062 return;
4064 case ADDRESS_REG_UXTW:
4065 if (addr.shift == 0)
4066 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4067 REGNO (addr.offset) - R0_REGNUM);
4068 else
4069 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4070 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4071 return;
4073 case ADDRESS_REG_SXTW:
4074 if (addr.shift == 0)
4075 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4076 REGNO (addr.offset) - R0_REGNUM);
4077 else
4078 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4079 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4080 return;
4082 case ADDRESS_REG_WB:
4083 switch (GET_CODE (x))
4085 case PRE_INC:
4086 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4087 GET_MODE_SIZE (aarch64_memory_reference_mode));
4088 return;
4089 case POST_INC:
4090 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4091 GET_MODE_SIZE (aarch64_memory_reference_mode));
4092 return;
4093 case PRE_DEC:
4094 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4095 GET_MODE_SIZE (aarch64_memory_reference_mode));
4096 return;
4097 case POST_DEC:
4098 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4099 GET_MODE_SIZE (aarch64_memory_reference_mode));
4100 return;
4101 case PRE_MODIFY:
4102 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4103 INTVAL (addr.offset));
4104 return;
4105 case POST_MODIFY:
4106 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4107 INTVAL (addr.offset));
4108 return;
4109 default:
4110 break;
4112 break;
4114 case ADDRESS_LO_SUM:
4115 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4116 output_addr_const (f, addr.offset);
4117 asm_fprintf (f, "]");
4118 return;
4120 case ADDRESS_SYMBOLIC:
4121 break;
4124 output_addr_const (f, x);
4127 bool
4128 aarch64_label_mentioned_p (rtx x)
4130 const char *fmt;
4131 int i;
4133 if (GET_CODE (x) == LABEL_REF)
4134 return true;
4136 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4137 referencing instruction, but they are constant offsets, not
4138 symbols. */
4139 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4140 return false;
4142 fmt = GET_RTX_FORMAT (GET_CODE (x));
4143 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4145 if (fmt[i] == 'E')
4147 int j;
4149 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4150 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4151 return 1;
4153 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4154 return 1;
4157 return 0;
4160 /* Implement REGNO_REG_CLASS. */
4162 enum reg_class
4163 aarch64_regno_regclass (unsigned regno)
4165 if (GP_REGNUM_P (regno))
4166 return GENERAL_REGS;
4168 if (regno == SP_REGNUM)
4169 return STACK_REG;
4171 if (regno == FRAME_POINTER_REGNUM
4172 || regno == ARG_POINTER_REGNUM)
4173 return POINTER_REGS;
4175 if (FP_REGNUM_P (regno))
4176 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4178 return NO_REGS;
4181 /* Try a machine-dependent way of reloading an illegitimate address
4182 operand. If we find one, push the reload and return the new rtx. */
4185 aarch64_legitimize_reload_address (rtx *x_p,
4186 enum machine_mode mode,
4187 int opnum, int type,
4188 int ind_levels ATTRIBUTE_UNUSED)
4190 rtx x = *x_p;
4192 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4193 if (aarch64_vect_struct_mode_p (mode)
4194 && GET_CODE (x) == PLUS
4195 && REG_P (XEXP (x, 0))
4196 && CONST_INT_P (XEXP (x, 1)))
4198 rtx orig_rtx = x;
4199 x = copy_rtx (x);
4200 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4201 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4202 opnum, (enum reload_type) type);
4203 return x;
4206 /* We must recognize output that we have already generated ourselves. */
4207 if (GET_CODE (x) == PLUS
4208 && GET_CODE (XEXP (x, 0)) == PLUS
4209 && REG_P (XEXP (XEXP (x, 0), 0))
4210 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4211 && CONST_INT_P (XEXP (x, 1)))
4213 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4214 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4215 opnum, (enum reload_type) type);
4216 return x;
4219 /* We wish to handle large displacements off a base register by splitting
4220 the addend across an add and the mem insn. This can cut the number of
4221 extra insns needed from 3 to 1. It is only useful for load/store of a
4222 single register with 12 bit offset field. */
4223 if (GET_CODE (x) == PLUS
4224 && REG_P (XEXP (x, 0))
4225 && CONST_INT_P (XEXP (x, 1))
4226 && HARD_REGISTER_P (XEXP (x, 0))
4227 && mode != TImode
4228 && mode != TFmode
4229 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4231 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4232 HOST_WIDE_INT low = val & 0xfff;
4233 HOST_WIDE_INT high = val - low;
4234 HOST_WIDE_INT offs;
4235 rtx cst;
4236 enum machine_mode xmode = GET_MODE (x);
4238 /* In ILP32, xmode can be either DImode or SImode. */
4239 gcc_assert (xmode == DImode || xmode == SImode);
4241 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4242 BLKmode alignment. */
4243 if (GET_MODE_SIZE (mode) == 0)
4244 return NULL_RTX;
4246 offs = low % GET_MODE_SIZE (mode);
4248 /* Align misaligned offset by adjusting high part to compensate. */
4249 if (offs != 0)
4251 if (aarch64_uimm12_shift (high + offs))
4253 /* Align down. */
4254 low = low - offs;
4255 high = high + offs;
4257 else
4259 /* Align up. */
4260 offs = GET_MODE_SIZE (mode) - offs;
4261 low = low + offs;
4262 high = high + (low & 0x1000) - offs;
4263 low &= 0xfff;
4267 /* Check for overflow. */
4268 if (high + low != val)
4269 return NULL_RTX;
4271 cst = GEN_INT (high);
4272 if (!aarch64_uimm12_shift (high))
4273 cst = force_const_mem (xmode, cst);
4275 /* Reload high part into base reg, leaving the low part
4276 in the mem instruction.
4277 Note that replacing this gen_rtx_PLUS with plus_constant is
4278 wrong in this case because we rely on the
4279 (plus (plus reg c1) c2) structure being preserved so that
4280 XEXP (*p, 0) in push_reload below uses the correct term. */
4281 x = gen_rtx_PLUS (xmode,
4282 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4283 GEN_INT (low));
4285 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4286 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4287 opnum, (enum reload_type) type);
4288 return x;
4291 return NULL_RTX;
4295 static reg_class_t
4296 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4297 reg_class_t rclass,
4298 enum machine_mode mode,
4299 secondary_reload_info *sri)
4301 /* Without the TARGET_SIMD instructions we cannot move a Q register
4302 to a Q register directly. We need a scratch. */
4303 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4304 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4305 && reg_class_subset_p (rclass, FP_REGS))
4307 if (mode == TFmode)
4308 sri->icode = CODE_FOR_aarch64_reload_movtf;
4309 else if (mode == TImode)
4310 sri->icode = CODE_FOR_aarch64_reload_movti;
4311 return NO_REGS;
4314 /* A TFmode or TImode memory access should be handled via an FP_REGS
4315 because AArch64 has richer addressing modes for LDR/STR instructions
4316 than LDP/STP instructions. */
4317 if (!TARGET_GENERAL_REGS_ONLY && rclass == GENERAL_REGS
4318 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4319 return FP_REGS;
4321 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4322 return GENERAL_REGS;
4324 return NO_REGS;
4327 static bool
4328 aarch64_can_eliminate (const int from, const int to)
4330 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4331 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4333 if (frame_pointer_needed)
4335 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4336 return true;
4337 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4338 return false;
4339 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4340 && !cfun->calls_alloca)
4341 return true;
4342 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4343 return true;
4345 return false;
4348 return true;
4351 HOST_WIDE_INT
4352 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4354 aarch64_layout_frame ();
4356 if (to == HARD_FRAME_POINTER_REGNUM)
4358 if (from == ARG_POINTER_REGNUM)
4359 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4361 if (from == FRAME_POINTER_REGNUM)
4362 return (cfun->machine->frame.hard_fp_offset
4363 - cfun->machine->frame.saved_varargs_size);
4366 if (to == STACK_POINTER_REGNUM)
4368 if (from == FRAME_POINTER_REGNUM)
4369 return (cfun->machine->frame.frame_size
4370 - cfun->machine->frame.saved_varargs_size);
4373 return cfun->machine->frame.frame_size;
4376 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4377 previous frame. */
4380 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4382 if (count != 0)
4383 return const0_rtx;
4384 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4388 static void
4389 aarch64_asm_trampoline_template (FILE *f)
4391 if (TARGET_ILP32)
4393 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4394 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4396 else
4398 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4399 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4401 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4402 assemble_aligned_integer (4, const0_rtx);
4403 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4404 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4407 static void
4408 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4410 rtx fnaddr, mem, a_tramp;
4411 const int tramp_code_sz = 16;
4413 /* Don't need to copy the trailing D-words, we fill those in below. */
4414 emit_block_move (m_tramp, assemble_trampoline_template (),
4415 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4416 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4417 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4418 if (GET_MODE (fnaddr) != ptr_mode)
4419 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4420 emit_move_insn (mem, fnaddr);
4422 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4423 emit_move_insn (mem, chain_value);
4425 /* XXX We should really define a "clear_cache" pattern and use
4426 gen_clear_cache(). */
4427 a_tramp = XEXP (m_tramp, 0);
4428 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4429 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4430 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4431 ptr_mode);
4434 static unsigned char
4435 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4437 switch (regclass)
4439 case CALLER_SAVE_REGS:
4440 case POINTER_REGS:
4441 case GENERAL_REGS:
4442 case ALL_REGS:
4443 case FP_REGS:
4444 case FP_LO_REGS:
4445 return
4446 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4447 (GET_MODE_SIZE (mode) + 7) / 8;
4448 case STACK_REG:
4449 return 1;
4451 case NO_REGS:
4452 return 0;
4454 default:
4455 break;
4457 gcc_unreachable ();
4460 static reg_class_t
4461 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4463 if (regclass == POINTER_REGS)
4464 return GENERAL_REGS;
4466 if (regclass == STACK_REG)
4468 if (REG_P(x)
4469 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4470 return regclass;
4472 return NO_REGS;
4475 /* If it's an integer immediate that MOVI can't handle, then
4476 FP_REGS is not an option, so we return NO_REGS instead. */
4477 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4478 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4479 return NO_REGS;
4481 /* Register eliminiation can result in a request for
4482 SP+constant->FP_REGS. We cannot support such operations which
4483 use SP as source and an FP_REG as destination, so reject out
4484 right now. */
4485 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4487 rtx lhs = XEXP (x, 0);
4489 /* Look through a possible SUBREG introduced by ILP32. */
4490 if (GET_CODE (lhs) == SUBREG)
4491 lhs = SUBREG_REG (lhs);
4493 gcc_assert (REG_P (lhs));
4494 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4495 POINTER_REGS));
4496 return NO_REGS;
4499 return regclass;
4502 void
4503 aarch64_asm_output_labelref (FILE* f, const char *name)
4505 asm_fprintf (f, "%U%s", name);
4508 static void
4509 aarch64_elf_asm_constructor (rtx symbol, int priority)
4511 if (priority == DEFAULT_INIT_PRIORITY)
4512 default_ctor_section_asm_out_constructor (symbol, priority);
4513 else
4515 section *s;
4516 char buf[18];
4517 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4518 s = get_section (buf, SECTION_WRITE, NULL);
4519 switch_to_section (s);
4520 assemble_align (POINTER_SIZE);
4521 assemble_aligned_integer (POINTER_BYTES, symbol);
4525 static void
4526 aarch64_elf_asm_destructor (rtx symbol, int priority)
4528 if (priority == DEFAULT_INIT_PRIORITY)
4529 default_dtor_section_asm_out_destructor (symbol, priority);
4530 else
4532 section *s;
4533 char buf[18];
4534 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4535 s = get_section (buf, SECTION_WRITE, NULL);
4536 switch_to_section (s);
4537 assemble_align (POINTER_SIZE);
4538 assemble_aligned_integer (POINTER_BYTES, symbol);
4542 const char*
4543 aarch64_output_casesi (rtx *operands)
4545 char buf[100];
4546 char label[100];
4547 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4548 int index;
4549 static const char *const patterns[4][2] =
4552 "ldrb\t%w3, [%0,%w1,uxtw]",
4553 "add\t%3, %4, %w3, sxtb #2"
4556 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4557 "add\t%3, %4, %w3, sxth #2"
4560 "ldr\t%w3, [%0,%w1,uxtw #2]",
4561 "add\t%3, %4, %w3, sxtw #2"
4563 /* We assume that DImode is only generated when not optimizing and
4564 that we don't really need 64-bit address offsets. That would
4565 imply an object file with 8GB of code in a single function! */
4567 "ldr\t%w3, [%0,%w1,uxtw #2]",
4568 "add\t%3, %4, %w3, sxtw #2"
4572 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4574 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4576 gcc_assert (index >= 0 && index <= 3);
4578 /* Need to implement table size reduction, by chaning the code below. */
4579 output_asm_insn (patterns[index][0], operands);
4580 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4581 snprintf (buf, sizeof (buf),
4582 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4583 output_asm_insn (buf, operands);
4584 output_asm_insn (patterns[index][1], operands);
4585 output_asm_insn ("br\t%3", operands);
4586 assemble_label (asm_out_file, label);
4587 return "";
4591 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4592 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4593 operator. */
4596 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4598 if (shift >= 0 && shift <= 3)
4600 int size;
4601 for (size = 8; size <= 32; size *= 2)
4603 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4604 if (mask == bits << shift)
4605 return size;
4608 return 0;
4611 static bool
4612 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4613 const_rtx x ATTRIBUTE_UNUSED)
4615 /* We can't use blocks for constants when we're using a per-function
4616 constant pool. */
4617 return false;
4620 static section *
4621 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4622 rtx x ATTRIBUTE_UNUSED,
4623 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4625 /* Force all constant pool entries into the current function section. */
4626 return function_section (current_function_decl);
4630 /* Costs. */
4632 /* Helper function for rtx cost calculation. Strip a shift expression
4633 from X. Returns the inner operand if successful, or the original
4634 expression on failure. */
4635 static rtx
4636 aarch64_strip_shift (rtx x)
4638 rtx op = x;
4640 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4641 we can convert both to ROR during final output. */
4642 if ((GET_CODE (op) == ASHIFT
4643 || GET_CODE (op) == ASHIFTRT
4644 || GET_CODE (op) == LSHIFTRT
4645 || GET_CODE (op) == ROTATERT
4646 || GET_CODE (op) == ROTATE)
4647 && CONST_INT_P (XEXP (op, 1)))
4648 return XEXP (op, 0);
4650 if (GET_CODE (op) == MULT
4651 && CONST_INT_P (XEXP (op, 1))
4652 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4653 return XEXP (op, 0);
4655 return x;
4658 /* Helper function for rtx cost calculation. Strip an extend
4659 expression from X. Returns the inner operand if successful, or the
4660 original expression on failure. We deal with a number of possible
4661 canonicalization variations here. */
4662 static rtx
4663 aarch64_strip_extend (rtx x)
4665 rtx op = x;
4667 /* Zero and sign extraction of a widened value. */
4668 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4669 && XEXP (op, 2) == const0_rtx
4670 && GET_CODE (XEXP (op, 0)) == MULT
4671 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4672 XEXP (op, 1)))
4673 return XEXP (XEXP (op, 0), 0);
4675 /* It can also be represented (for zero-extend) as an AND with an
4676 immediate. */
4677 if (GET_CODE (op) == AND
4678 && GET_CODE (XEXP (op, 0)) == MULT
4679 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4680 && CONST_INT_P (XEXP (op, 1))
4681 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4682 INTVAL (XEXP (op, 1))) != 0)
4683 return XEXP (XEXP (op, 0), 0);
4685 /* Now handle extended register, as this may also have an optional
4686 left shift by 1..4. */
4687 if (GET_CODE (op) == ASHIFT
4688 && CONST_INT_P (XEXP (op, 1))
4689 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4690 op = XEXP (op, 0);
4692 if (GET_CODE (op) == ZERO_EXTEND
4693 || GET_CODE (op) == SIGN_EXTEND)
4694 op = XEXP (op, 0);
4696 if (op != x)
4697 return op;
4699 return x;
4702 /* Helper function for rtx cost calculation. Calculate the cost of
4703 a MULT, which may be part of a multiply-accumulate rtx. Return
4704 the calculated cost of the expression, recursing manually in to
4705 operands where needed. */
4707 static int
4708 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4710 rtx op0, op1;
4711 const struct cpu_cost_table *extra_cost
4712 = aarch64_tune_params->insn_extra_cost;
4713 int cost = 0;
4714 bool maybe_fma = (outer == PLUS || outer == MINUS);
4715 enum machine_mode mode = GET_MODE (x);
4717 gcc_checking_assert (code == MULT);
4719 op0 = XEXP (x, 0);
4720 op1 = XEXP (x, 1);
4722 if (VECTOR_MODE_P (mode))
4723 mode = GET_MODE_INNER (mode);
4725 /* Integer multiply/fma. */
4726 if (GET_MODE_CLASS (mode) == MODE_INT)
4728 /* The multiply will be canonicalized as a shift, cost it as such. */
4729 if (CONST_INT_P (op1)
4730 && exact_log2 (INTVAL (op1)) > 0)
4732 if (speed)
4734 if (maybe_fma)
4735 /* ADD (shifted register). */
4736 cost += extra_cost->alu.arith_shift;
4737 else
4738 /* LSL (immediate). */
4739 cost += extra_cost->alu.shift;
4742 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4744 return cost;
4747 /* Integer multiplies or FMAs have zero/sign extending variants. */
4748 if ((GET_CODE (op0) == ZERO_EXTEND
4749 && GET_CODE (op1) == ZERO_EXTEND)
4750 || (GET_CODE (op0) == SIGN_EXTEND
4751 && GET_CODE (op1) == SIGN_EXTEND))
4753 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4754 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4756 if (speed)
4758 if (maybe_fma)
4759 /* MADD/SMADDL/UMADDL. */
4760 cost += extra_cost->mult[0].extend_add;
4761 else
4762 /* MUL/SMULL/UMULL. */
4763 cost += extra_cost->mult[0].extend;
4766 return cost;
4769 /* This is either an integer multiply or an FMA. In both cases
4770 we want to recurse and cost the operands. */
4771 cost += rtx_cost (op0, MULT, 0, speed)
4772 + rtx_cost (op1, MULT, 1, speed);
4774 if (speed)
4776 if (maybe_fma)
4777 /* MADD. */
4778 cost += extra_cost->mult[mode == DImode].add;
4779 else
4780 /* MUL. */
4781 cost += extra_cost->mult[mode == DImode].simple;
4784 return cost;
4786 else
4788 if (speed)
4790 /* Floating-point FMA/FMUL can also support negations of the
4791 operands. */
4792 if (GET_CODE (op0) == NEG)
4793 op0 = XEXP (op0, 0);
4794 if (GET_CODE (op1) == NEG)
4795 op1 = XEXP (op1, 0);
4797 if (maybe_fma)
4798 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4799 cost += extra_cost->fp[mode == DFmode].fma;
4800 else
4801 /* FMUL/FNMUL. */
4802 cost += extra_cost->fp[mode == DFmode].mult;
4805 cost += rtx_cost (op0, MULT, 0, speed)
4806 + rtx_cost (op1, MULT, 1, speed);
4807 return cost;
4811 static int
4812 aarch64_address_cost (rtx x,
4813 enum machine_mode mode,
4814 addr_space_t as ATTRIBUTE_UNUSED,
4815 bool speed)
4817 enum rtx_code c = GET_CODE (x);
4818 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4819 struct aarch64_address_info info;
4820 int cost = 0;
4821 info.shift = 0;
4823 if (!aarch64_classify_address (&info, x, mode, c, false))
4825 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4827 /* This is a CONST or SYMBOL ref which will be split
4828 in a different way depending on the code model in use.
4829 Cost it through the generic infrastructure. */
4830 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4831 /* Divide through by the cost of one instruction to
4832 bring it to the same units as the address costs. */
4833 cost_symbol_ref /= COSTS_N_INSNS (1);
4834 /* The cost is then the cost of preparing the address,
4835 followed by an immediate (possibly 0) offset. */
4836 return cost_symbol_ref + addr_cost->imm_offset;
4838 else
4840 /* This is most likely a jump table from a case
4841 statement. */
4842 return addr_cost->register_offset;
4846 switch (info.type)
4848 case ADDRESS_LO_SUM:
4849 case ADDRESS_SYMBOLIC:
4850 case ADDRESS_REG_IMM:
4851 cost += addr_cost->imm_offset;
4852 break;
4854 case ADDRESS_REG_WB:
4855 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4856 cost += addr_cost->pre_modify;
4857 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4858 cost += addr_cost->post_modify;
4859 else
4860 gcc_unreachable ();
4862 break;
4864 case ADDRESS_REG_REG:
4865 cost += addr_cost->register_offset;
4866 break;
4868 case ADDRESS_REG_UXTW:
4869 case ADDRESS_REG_SXTW:
4870 cost += addr_cost->register_extend;
4871 break;
4873 default:
4874 gcc_unreachable ();
4878 if (info.shift > 0)
4880 /* For the sake of calculating the cost of the shifted register
4881 component, we can treat same sized modes in the same way. */
4882 switch (GET_MODE_BITSIZE (mode))
4884 case 16:
4885 cost += addr_cost->addr_scale_costs.hi;
4886 break;
4888 case 32:
4889 cost += addr_cost->addr_scale_costs.si;
4890 break;
4892 case 64:
4893 cost += addr_cost->addr_scale_costs.di;
4894 break;
4896 /* We can't tell, or this is a 128-bit vector. */
4897 default:
4898 cost += addr_cost->addr_scale_costs.ti;
4899 break;
4903 return cost;
4906 /* Return true if the RTX X in mode MODE is a zero or sign extract
4907 usable in an ADD or SUB (extended register) instruction. */
4908 static bool
4909 aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4911 /* Catch add with a sign extract.
4912 This is add_<optab><mode>_multp2. */
4913 if (GET_CODE (x) == SIGN_EXTRACT
4914 || GET_CODE (x) == ZERO_EXTRACT)
4916 rtx op0 = XEXP (x, 0);
4917 rtx op1 = XEXP (x, 1);
4918 rtx op2 = XEXP (x, 2);
4920 if (GET_CODE (op0) == MULT
4921 && CONST_INT_P (op1)
4922 && op2 == const0_rtx
4923 && CONST_INT_P (XEXP (op0, 1))
4924 && aarch64_is_extend_from_extract (mode,
4925 XEXP (op0, 1),
4926 op1))
4928 return true;
4932 return false;
4935 static bool
4936 aarch64_frint_unspec_p (unsigned int u)
4938 switch (u)
4940 case UNSPEC_FRINTZ:
4941 case UNSPEC_FRINTP:
4942 case UNSPEC_FRINTM:
4943 case UNSPEC_FRINTA:
4944 case UNSPEC_FRINTN:
4945 case UNSPEC_FRINTX:
4946 case UNSPEC_FRINTI:
4947 return true;
4949 default:
4950 return false;
4954 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4955 storing it in *COST. Result is true if the total cost of the operation
4956 has now been calculated. */
4957 static bool
4958 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4960 rtx inner;
4961 rtx comparator;
4962 enum rtx_code cmpcode;
4964 if (COMPARISON_P (op0))
4966 inner = XEXP (op0, 0);
4967 comparator = XEXP (op0, 1);
4968 cmpcode = GET_CODE (op0);
4970 else
4972 inner = op0;
4973 comparator = const0_rtx;
4974 cmpcode = NE;
4977 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
4979 /* Conditional branch. */
4980 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4981 return true;
4982 else
4984 if (cmpcode == NE || cmpcode == EQ)
4986 if (comparator == const0_rtx)
4988 /* TBZ/TBNZ/CBZ/CBNZ. */
4989 if (GET_CODE (inner) == ZERO_EXTRACT)
4990 /* TBZ/TBNZ. */
4991 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
4992 0, speed);
4993 else
4994 /* CBZ/CBNZ. */
4995 *cost += rtx_cost (inner, cmpcode, 0, speed);
4997 return true;
5000 else if (cmpcode == LT || cmpcode == GE)
5002 /* TBZ/TBNZ. */
5003 if (comparator == const0_rtx)
5004 return true;
5008 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
5010 /* It's a conditional operation based on the status flags,
5011 so it must be some flavor of CSEL. */
5013 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
5014 if (GET_CODE (op1) == NEG
5015 || GET_CODE (op1) == NOT
5016 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
5017 op1 = XEXP (op1, 0);
5019 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
5020 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
5021 return true;
5024 /* We don't know what this is, cost all operands. */
5025 return false;
5028 /* Calculate the cost of calculating X, storing it in *COST. Result
5029 is true if the total cost of the operation has now been calculated. */
5030 static bool
5031 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5032 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5034 rtx op0, op1, op2;
5035 const struct cpu_cost_table *extra_cost
5036 = aarch64_tune_params->insn_extra_cost;
5037 enum machine_mode mode = GET_MODE (x);
5039 /* By default, assume that everything has equivalent cost to the
5040 cheapest instruction. Any additional costs are applied as a delta
5041 above this default. */
5042 *cost = COSTS_N_INSNS (1);
5044 /* TODO: The cost infrastructure currently does not handle
5045 vector operations. Assume that all vector operations
5046 are equally expensive. */
5047 if (VECTOR_MODE_P (mode))
5049 if (speed)
5050 *cost += extra_cost->vect.alu;
5051 return true;
5054 switch (code)
5056 case SET:
5057 /* The cost depends entirely on the operands to SET. */
5058 *cost = 0;
5059 op0 = SET_DEST (x);
5060 op1 = SET_SRC (x);
5062 switch (GET_CODE (op0))
5064 case MEM:
5065 if (speed)
5067 rtx address = XEXP (op0, 0);
5068 if (GET_MODE_CLASS (mode) == MODE_INT)
5069 *cost += extra_cost->ldst.store;
5070 else if (mode == SFmode)
5071 *cost += extra_cost->ldst.storef;
5072 else if (mode == DFmode)
5073 *cost += extra_cost->ldst.stored;
5075 *cost +=
5076 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5077 0, speed));
5080 *cost += rtx_cost (op1, SET, 1, speed);
5081 return true;
5083 case SUBREG:
5084 if (! REG_P (SUBREG_REG (op0)))
5085 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5087 /* Fall through. */
5088 case REG:
5089 /* const0_rtx is in general free, but we will use an
5090 instruction to set a register to 0. */
5091 if (REG_P (op1) || op1 == const0_rtx)
5093 /* The cost is 1 per register copied. */
5094 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5095 / UNITS_PER_WORD;
5096 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5098 else
5099 /* Cost is just the cost of the RHS of the set. */
5100 *cost += rtx_cost (op1, SET, 1, speed);
5101 return true;
5103 case ZERO_EXTRACT:
5104 case SIGN_EXTRACT:
5105 /* Bit-field insertion. Strip any redundant widening of
5106 the RHS to meet the width of the target. */
5107 if (GET_CODE (op1) == SUBREG)
5108 op1 = SUBREG_REG (op1);
5109 if ((GET_CODE (op1) == ZERO_EXTEND
5110 || GET_CODE (op1) == SIGN_EXTEND)
5111 && CONST_INT_P (XEXP (op0, 1))
5112 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5113 >= INTVAL (XEXP (op0, 1))))
5114 op1 = XEXP (op1, 0);
5116 if (CONST_INT_P (op1))
5118 /* MOV immediate is assumed to always be cheap. */
5119 *cost = COSTS_N_INSNS (1);
5121 else
5123 /* BFM. */
5124 if (speed)
5125 *cost += extra_cost->alu.bfi;
5126 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5129 return true;
5131 default:
5132 /* We can't make sense of this, assume default cost. */
5133 *cost = COSTS_N_INSNS (1);
5134 return false;
5136 return false;
5138 case CONST_INT:
5139 /* If an instruction can incorporate a constant within the
5140 instruction, the instruction's expression avoids calling
5141 rtx_cost() on the constant. If rtx_cost() is called on a
5142 constant, then it is usually because the constant must be
5143 moved into a register by one or more instructions.
5145 The exception is constant 0, which can be expressed
5146 as XZR/WZR and is therefore free. The exception to this is
5147 if we have (set (reg) (const0_rtx)) in which case we must cost
5148 the move. However, we can catch that when we cost the SET, so
5149 we don't need to consider that here. */
5150 if (x == const0_rtx)
5151 *cost = 0;
5152 else
5154 /* To an approximation, building any other constant is
5155 proportionally expensive to the number of instructions
5156 required to build that constant. This is true whether we
5157 are compiling for SPEED or otherwise. */
5158 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5159 INTVAL (x),
5160 false));
5162 return true;
5164 case CONST_DOUBLE:
5165 if (speed)
5167 /* mov[df,sf]_aarch64. */
5168 if (aarch64_float_const_representable_p (x))
5169 /* FMOV (scalar immediate). */
5170 *cost += extra_cost->fp[mode == DFmode].fpconst;
5171 else if (!aarch64_float_const_zero_rtx_p (x))
5173 /* This will be a load from memory. */
5174 if (mode == DFmode)
5175 *cost += extra_cost->ldst.loadd;
5176 else
5177 *cost += extra_cost->ldst.loadf;
5179 else
5180 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5181 or MOV v0.s[0], wzr - neither of which are modeled by the
5182 cost tables. Just use the default cost. */
5187 return true;
5189 case MEM:
5190 if (speed)
5192 /* For loads we want the base cost of a load, plus an
5193 approximation for the additional cost of the addressing
5194 mode. */
5195 rtx address = XEXP (x, 0);
5196 if (GET_MODE_CLASS (mode) == MODE_INT)
5197 *cost += extra_cost->ldst.load;
5198 else if (mode == SFmode)
5199 *cost += extra_cost->ldst.loadf;
5200 else if (mode == DFmode)
5201 *cost += extra_cost->ldst.loadd;
5203 *cost +=
5204 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5205 0, speed));
5208 return true;
5210 case NEG:
5211 op0 = XEXP (x, 0);
5213 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5215 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5216 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5218 /* CSETM. */
5219 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5220 return true;
5223 /* Cost this as SUB wzr, X. */
5224 op0 = CONST0_RTX (GET_MODE (x));
5225 op1 = XEXP (x, 0);
5226 goto cost_minus;
5229 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5231 /* Support (neg(fma...)) as a single instruction only if
5232 sign of zeros is unimportant. This matches the decision
5233 making in aarch64.md. */
5234 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5236 /* FNMADD. */
5237 *cost = rtx_cost (op0, NEG, 0, speed);
5238 return true;
5240 if (speed)
5241 /* FNEG. */
5242 *cost += extra_cost->fp[mode == DFmode].neg;
5243 return false;
5246 return false;
5248 case CLRSB:
5249 case CLZ:
5250 if (speed)
5251 *cost += extra_cost->alu.clz;
5253 return false;
5255 case COMPARE:
5256 op0 = XEXP (x, 0);
5257 op1 = XEXP (x, 1);
5259 if (op1 == const0_rtx
5260 && GET_CODE (op0) == AND)
5262 x = op0;
5263 goto cost_logic;
5266 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5268 /* TODO: A write to the CC flags possibly costs extra, this
5269 needs encoding in the cost tables. */
5271 /* CC_ZESWPmode supports zero extend for free. */
5272 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5273 op0 = XEXP (op0, 0);
5275 /* ANDS. */
5276 if (GET_CODE (op0) == AND)
5278 x = op0;
5279 goto cost_logic;
5282 if (GET_CODE (op0) == PLUS)
5284 /* ADDS (and CMN alias). */
5285 x = op0;
5286 goto cost_plus;
5289 if (GET_CODE (op0) == MINUS)
5291 /* SUBS. */
5292 x = op0;
5293 goto cost_minus;
5296 if (GET_CODE (op1) == NEG)
5298 /* CMN. */
5299 if (speed)
5300 *cost += extra_cost->alu.arith;
5302 *cost += rtx_cost (op0, COMPARE, 0, speed);
5303 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5304 return true;
5307 /* CMP.
5309 Compare can freely swap the order of operands, and
5310 canonicalization puts the more complex operation first.
5311 But the integer MINUS logic expects the shift/extend
5312 operation in op1. */
5313 if (! (REG_P (op0)
5314 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5316 op0 = XEXP (x, 1);
5317 op1 = XEXP (x, 0);
5319 goto cost_minus;
5322 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5324 /* FCMP. */
5325 if (speed)
5326 *cost += extra_cost->fp[mode == DFmode].compare;
5328 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5330 /* FCMP supports constant 0.0 for no extra cost. */
5331 return true;
5333 return false;
5336 return false;
5338 case MINUS:
5340 op0 = XEXP (x, 0);
5341 op1 = XEXP (x, 1);
5343 cost_minus:
5344 /* Detect valid immediates. */
5345 if ((GET_MODE_CLASS (mode) == MODE_INT
5346 || (GET_MODE_CLASS (mode) == MODE_CC
5347 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5348 && CONST_INT_P (op1)
5349 && aarch64_uimm12_shift (INTVAL (op1)))
5351 *cost += rtx_cost (op0, MINUS, 0, speed);
5353 if (speed)
5354 /* SUB(S) (immediate). */
5355 *cost += extra_cost->alu.arith;
5356 return true;
5360 /* Look for SUB (extended register). */
5361 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5363 if (speed)
5364 *cost += extra_cost->alu.arith_shift;
5366 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5367 (enum rtx_code) GET_CODE (op1),
5368 0, speed);
5369 return true;
5372 rtx new_op1 = aarch64_strip_extend (op1);
5374 /* Cost this as an FMA-alike operation. */
5375 if ((GET_CODE (new_op1) == MULT
5376 || GET_CODE (new_op1) == ASHIFT)
5377 && code != COMPARE)
5379 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5380 (enum rtx_code) code,
5381 speed);
5382 *cost += rtx_cost (op0, MINUS, 0, speed);
5383 return true;
5386 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5388 if (speed)
5390 if (GET_MODE_CLASS (mode) == MODE_INT)
5391 /* SUB(S). */
5392 *cost += extra_cost->alu.arith;
5393 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5394 /* FSUB. */
5395 *cost += extra_cost->fp[mode == DFmode].addsub;
5397 return true;
5400 case PLUS:
5402 rtx new_op0;
5404 op0 = XEXP (x, 0);
5405 op1 = XEXP (x, 1);
5407 cost_plus:
5408 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5409 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5411 /* CSINC. */
5412 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5413 *cost += rtx_cost (op1, PLUS, 1, speed);
5414 return true;
5417 if (GET_MODE_CLASS (mode) == MODE_INT
5418 && CONST_INT_P (op1)
5419 && aarch64_uimm12_shift (INTVAL (op1)))
5421 *cost += rtx_cost (op0, PLUS, 0, speed);
5423 if (speed)
5424 /* ADD (immediate). */
5425 *cost += extra_cost->alu.arith;
5426 return true;
5429 /* Look for ADD (extended register). */
5430 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5432 if (speed)
5433 *cost += extra_cost->alu.arith_shift;
5435 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5436 (enum rtx_code) GET_CODE (op0),
5437 0, speed);
5438 return true;
5441 /* Strip any extend, leave shifts behind as we will
5442 cost them through mult_cost. */
5443 new_op0 = aarch64_strip_extend (op0);
5445 if (GET_CODE (new_op0) == MULT
5446 || GET_CODE (new_op0) == ASHIFT)
5448 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5449 speed);
5450 *cost += rtx_cost (op1, PLUS, 1, speed);
5451 return true;
5454 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5455 + rtx_cost (op1, PLUS, 1, speed));
5457 if (speed)
5459 if (GET_MODE_CLASS (mode) == MODE_INT)
5460 /* ADD. */
5461 *cost += extra_cost->alu.arith;
5462 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5463 /* FADD. */
5464 *cost += extra_cost->fp[mode == DFmode].addsub;
5466 return true;
5469 case BSWAP:
5470 *cost = COSTS_N_INSNS (1);
5472 if (speed)
5473 *cost += extra_cost->alu.rev;
5475 return false;
5477 case IOR:
5478 if (aarch_rev16_p (x))
5480 *cost = COSTS_N_INSNS (1);
5482 if (speed)
5483 *cost += extra_cost->alu.rev;
5485 return true;
5487 /* Fall through. */
5488 case XOR:
5489 case AND:
5490 cost_logic:
5491 op0 = XEXP (x, 0);
5492 op1 = XEXP (x, 1);
5494 if (code == AND
5495 && GET_CODE (op0) == MULT
5496 && CONST_INT_P (XEXP (op0, 1))
5497 && CONST_INT_P (op1)
5498 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5499 INTVAL (op1)) != 0)
5501 /* This is a UBFM/SBFM. */
5502 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5503 if (speed)
5504 *cost += extra_cost->alu.bfx;
5505 return true;
5508 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5510 /* We possibly get the immediate for free, this is not
5511 modelled. */
5512 if (CONST_INT_P (op1)
5513 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5515 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5517 if (speed)
5518 *cost += extra_cost->alu.logical;
5520 return true;
5522 else
5524 rtx new_op0 = op0;
5526 /* Handle ORN, EON, or BIC. */
5527 if (GET_CODE (op0) == NOT)
5528 op0 = XEXP (op0, 0);
5530 new_op0 = aarch64_strip_shift (op0);
5532 /* If we had a shift on op0 then this is a logical-shift-
5533 by-register/immediate operation. Otherwise, this is just
5534 a logical operation. */
5535 if (speed)
5537 if (new_op0 != op0)
5539 /* Shift by immediate. */
5540 if (CONST_INT_P (XEXP (op0, 1)))
5541 *cost += extra_cost->alu.log_shift;
5542 else
5543 *cost += extra_cost->alu.log_shift_reg;
5545 else
5546 *cost += extra_cost->alu.logical;
5549 /* In both cases we want to cost both operands. */
5550 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5551 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5553 return true;
5556 return false;
5558 case NOT:
5559 /* MVN. */
5560 if (speed)
5561 *cost += extra_cost->alu.logical;
5563 /* The logical instruction could have the shifted register form,
5564 but the cost is the same if the shift is processed as a separate
5565 instruction, so we don't bother with it here. */
5566 return false;
5568 case ZERO_EXTEND:
5570 op0 = XEXP (x, 0);
5571 /* If a value is written in SI mode, then zero extended to DI
5572 mode, the operation will in general be free as a write to
5573 a 'w' register implicitly zeroes the upper bits of an 'x'
5574 register. However, if this is
5576 (set (reg) (zero_extend (reg)))
5578 we must cost the explicit register move. */
5579 if (mode == DImode
5580 && GET_MODE (op0) == SImode
5581 && outer == SET)
5583 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5585 if (!op_cost && speed)
5586 /* MOV. */
5587 *cost += extra_cost->alu.extend;
5588 else
5589 /* Free, the cost is that of the SI mode operation. */
5590 *cost = op_cost;
5592 return true;
5594 else if (MEM_P (XEXP (x, 0)))
5596 /* All loads can zero extend to any size for free. */
5597 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5598 return true;
5601 /* UXTB/UXTH. */
5602 if (speed)
5603 *cost += extra_cost->alu.extend;
5605 return false;
5607 case SIGN_EXTEND:
5608 if (MEM_P (XEXP (x, 0)))
5610 /* LDRSH. */
5611 if (speed)
5613 rtx address = XEXP (XEXP (x, 0), 0);
5614 *cost += extra_cost->ldst.load_sign_extend;
5616 *cost +=
5617 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5618 0, speed));
5620 return true;
5623 if (speed)
5624 *cost += extra_cost->alu.extend;
5625 return false;
5627 case ASHIFT:
5628 op0 = XEXP (x, 0);
5629 op1 = XEXP (x, 1);
5631 if (CONST_INT_P (op1))
5633 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5634 aliases. */
5635 if (speed)
5636 *cost += extra_cost->alu.shift;
5638 /* We can incorporate zero/sign extend for free. */
5639 if (GET_CODE (op0) == ZERO_EXTEND
5640 || GET_CODE (op0) == SIGN_EXTEND)
5641 op0 = XEXP (op0, 0);
5643 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5644 return true;
5646 else
5648 /* LSLV. */
5649 if (speed)
5650 *cost += extra_cost->alu.shift_reg;
5652 return false; /* All arguments need to be in registers. */
5655 case ROTATE:
5656 case ROTATERT:
5657 case LSHIFTRT:
5658 case ASHIFTRT:
5659 op0 = XEXP (x, 0);
5660 op1 = XEXP (x, 1);
5662 if (CONST_INT_P (op1))
5664 /* ASR (immediate) and friends. */
5665 if (speed)
5666 *cost += extra_cost->alu.shift;
5668 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5669 return true;
5671 else
5674 /* ASR (register) and friends. */
5675 if (speed)
5676 *cost += extra_cost->alu.shift_reg;
5678 return false; /* All arguments need to be in registers. */
5681 case SYMBOL_REF:
5683 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5685 /* LDR. */
5686 if (speed)
5687 *cost += extra_cost->ldst.load;
5689 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5690 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5692 /* ADRP, followed by ADD. */
5693 *cost += COSTS_N_INSNS (1);
5694 if (speed)
5695 *cost += 2 * extra_cost->alu.arith;
5697 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5698 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5700 /* ADR. */
5701 if (speed)
5702 *cost += extra_cost->alu.arith;
5705 if (flag_pic)
5707 /* One extra load instruction, after accessing the GOT. */
5708 *cost += COSTS_N_INSNS (1);
5709 if (speed)
5710 *cost += extra_cost->ldst.load;
5712 return true;
5714 case HIGH:
5715 case LO_SUM:
5716 /* ADRP/ADD (immediate). */
5717 if (speed)
5718 *cost += extra_cost->alu.arith;
5719 return true;
5721 case ZERO_EXTRACT:
5722 case SIGN_EXTRACT:
5723 /* UBFX/SBFX. */
5724 if (speed)
5725 *cost += extra_cost->alu.bfx;
5727 /* We can trust that the immediates used will be correct (there
5728 are no by-register forms), so we need only cost op0. */
5729 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5730 return true;
5732 case MULT:
5733 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5734 /* aarch64_rtx_mult_cost always handles recursion to its
5735 operands. */
5736 return true;
5738 case MOD:
5739 case UMOD:
5740 if (speed)
5742 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5743 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5744 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5745 else if (GET_MODE (x) == DFmode)
5746 *cost += (extra_cost->fp[1].mult
5747 + extra_cost->fp[1].div);
5748 else if (GET_MODE (x) == SFmode)
5749 *cost += (extra_cost->fp[0].mult
5750 + extra_cost->fp[0].div);
5752 return false; /* All arguments need to be in registers. */
5754 case DIV:
5755 case UDIV:
5756 case SQRT:
5757 if (speed)
5759 if (GET_MODE_CLASS (mode) == MODE_INT)
5760 /* There is no integer SQRT, so only DIV and UDIV can get
5761 here. */
5762 *cost += extra_cost->mult[mode == DImode].idiv;
5763 else
5764 *cost += extra_cost->fp[mode == DFmode].div;
5766 return false; /* All arguments need to be in registers. */
5768 case IF_THEN_ELSE:
5769 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5770 XEXP (x, 2), cost, speed);
5772 case EQ:
5773 case NE:
5774 case GT:
5775 case GTU:
5776 case LT:
5777 case LTU:
5778 case GE:
5779 case GEU:
5780 case LE:
5781 case LEU:
5783 return false; /* All arguments must be in registers. */
5785 case FMA:
5786 op0 = XEXP (x, 0);
5787 op1 = XEXP (x, 1);
5788 op2 = XEXP (x, 2);
5790 if (speed)
5791 *cost += extra_cost->fp[mode == DFmode].fma;
5793 /* FMSUB, FNMADD, and FNMSUB are free. */
5794 if (GET_CODE (op0) == NEG)
5795 op0 = XEXP (op0, 0);
5797 if (GET_CODE (op2) == NEG)
5798 op2 = XEXP (op2, 0);
5800 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5801 and the by-element operand as operand 0. */
5802 if (GET_CODE (op1) == NEG)
5803 op1 = XEXP (op1, 0);
5805 /* Catch vector-by-element operations. The by-element operand can
5806 either be (vec_duplicate (vec_select (x))) or just
5807 (vec_select (x)), depending on whether we are multiplying by
5808 a vector or a scalar.
5810 Canonicalization is not very good in these cases, FMA4 will put the
5811 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5812 if (GET_CODE (op0) == VEC_DUPLICATE)
5813 op0 = XEXP (op0, 0);
5814 else if (GET_CODE (op1) == VEC_DUPLICATE)
5815 op1 = XEXP (op1, 0);
5817 if (GET_CODE (op0) == VEC_SELECT)
5818 op0 = XEXP (op0, 0);
5819 else if (GET_CODE (op1) == VEC_SELECT)
5820 op1 = XEXP (op1, 0);
5822 /* If the remaining parameters are not registers,
5823 get the cost to put them into registers. */
5824 *cost += rtx_cost (op0, FMA, 0, speed);
5825 *cost += rtx_cost (op1, FMA, 1, speed);
5826 *cost += rtx_cost (op2, FMA, 2, speed);
5827 return true;
5829 case FLOAT_EXTEND:
5830 if (speed)
5831 *cost += extra_cost->fp[mode == DFmode].widen;
5832 return false;
5834 case FLOAT_TRUNCATE:
5835 if (speed)
5836 *cost += extra_cost->fp[mode == DFmode].narrow;
5837 return false;
5839 case FIX:
5840 case UNSIGNED_FIX:
5841 x = XEXP (x, 0);
5842 /* Strip the rounding part. They will all be implemented
5843 by the fcvt* family of instructions anyway. */
5844 if (GET_CODE (x) == UNSPEC)
5846 unsigned int uns_code = XINT (x, 1);
5848 if (uns_code == UNSPEC_FRINTA
5849 || uns_code == UNSPEC_FRINTM
5850 || uns_code == UNSPEC_FRINTN
5851 || uns_code == UNSPEC_FRINTP
5852 || uns_code == UNSPEC_FRINTZ)
5853 x = XVECEXP (x, 0, 0);
5856 if (speed)
5857 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5859 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5860 return true;
5862 case ABS:
5863 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5865 /* FABS and FNEG are analogous. */
5866 if (speed)
5867 *cost += extra_cost->fp[mode == DFmode].neg;
5869 else
5871 /* Integer ABS will either be split to
5872 two arithmetic instructions, or will be an ABS
5873 (scalar), which we don't model. */
5874 *cost = COSTS_N_INSNS (2);
5875 if (speed)
5876 *cost += 2 * extra_cost->alu.arith;
5878 return false;
5880 case SMAX:
5881 case SMIN:
5882 if (speed)
5884 /* FMAXNM/FMINNM/FMAX/FMIN.
5885 TODO: This may not be accurate for all implementations, but
5886 we do not model this in the cost tables. */
5887 *cost += extra_cost->fp[mode == DFmode].addsub;
5889 return false;
5891 case UNSPEC:
5892 /* The floating point round to integer frint* instructions. */
5893 if (aarch64_frint_unspec_p (XINT (x, 1)))
5895 if (speed)
5896 *cost += extra_cost->fp[mode == DFmode].roundint;
5898 return false;
5901 if (XINT (x, 1) == UNSPEC_RBIT)
5903 if (speed)
5904 *cost += extra_cost->alu.rev;
5906 return false;
5908 break;
5910 case TRUNCATE:
5912 /* Decompose <su>muldi3_highpart. */
5913 if (/* (truncate:DI */
5914 mode == DImode
5915 /* (lshiftrt:TI */
5916 && GET_MODE (XEXP (x, 0)) == TImode
5917 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5918 /* (mult:TI */
5919 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5920 /* (ANY_EXTEND:TI (reg:DI))
5921 (ANY_EXTEND:TI (reg:DI))) */
5922 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5923 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5924 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5925 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5926 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5927 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5928 /* (const_int 64) */
5929 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5930 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5932 /* UMULH/SMULH. */
5933 if (speed)
5934 *cost += extra_cost->mult[mode == DImode].extend;
5935 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5936 MULT, 0, speed);
5937 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5938 MULT, 1, speed);
5939 return true;
5942 /* Fall through. */
5943 default:
5944 break;
5947 if (dump_file && (dump_flags & TDF_DETAILS))
5948 fprintf (dump_file,
5949 "\nFailed to cost RTX. Assuming default cost.\n");
5951 return true;
5954 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5955 calculated for X. This cost is stored in *COST. Returns true
5956 if the total cost of X was calculated. */
5957 static bool
5958 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5959 int param, int *cost, bool speed)
5961 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5963 if (dump_file && (dump_flags & TDF_DETAILS))
5965 print_rtl_single (dump_file, x);
5966 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5967 speed ? "Hot" : "Cold",
5968 *cost, result ? "final" : "partial");
5971 return result;
5974 static int
5975 aarch64_register_move_cost (enum machine_mode mode,
5976 reg_class_t from_i, reg_class_t to_i)
5978 enum reg_class from = (enum reg_class) from_i;
5979 enum reg_class to = (enum reg_class) to_i;
5980 const struct cpu_regmove_cost *regmove_cost
5981 = aarch64_tune_params->regmove_cost;
5983 /* Moving between GPR and stack cost is the same as GP2GP. */
5984 if ((from == GENERAL_REGS && to == STACK_REG)
5985 || (to == GENERAL_REGS && from == STACK_REG))
5986 return regmove_cost->GP2GP;
5988 /* To/From the stack register, we move via the gprs. */
5989 if (to == STACK_REG || from == STACK_REG)
5990 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5991 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5993 if (from == GENERAL_REGS && to == GENERAL_REGS)
5994 return regmove_cost->GP2GP;
5995 else if (from == GENERAL_REGS)
5996 return regmove_cost->GP2FP;
5997 else if (to == GENERAL_REGS)
5998 return regmove_cost->FP2GP;
6000 /* When AdvSIMD instructions are disabled it is not possible to move
6001 a 128-bit value directly between Q registers. This is handled in
6002 secondary reload. A general register is used as a scratch to move
6003 the upper DI value and the lower DI value is moved directly,
6004 hence the cost is the sum of three moves. */
6005 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
6006 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
6008 return regmove_cost->FP2FP;
6011 static int
6012 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
6013 reg_class_t rclass ATTRIBUTE_UNUSED,
6014 bool in ATTRIBUTE_UNUSED)
6016 return aarch64_tune_params->memmov_cost;
6019 /* Return the number of instructions that can be issued per cycle. */
6020 static int
6021 aarch64_sched_issue_rate (void)
6023 return aarch64_tune_params->issue_rate;
6026 /* Vectorizer cost model target hooks. */
6028 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6029 static int
6030 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6031 tree vectype,
6032 int misalign ATTRIBUTE_UNUSED)
6034 unsigned elements;
6036 switch (type_of_cost)
6038 case scalar_stmt:
6039 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6041 case scalar_load:
6042 return aarch64_tune_params->vec_costs->scalar_load_cost;
6044 case scalar_store:
6045 return aarch64_tune_params->vec_costs->scalar_store_cost;
6047 case vector_stmt:
6048 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6050 case vector_load:
6051 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6053 case vector_store:
6054 return aarch64_tune_params->vec_costs->vec_store_cost;
6056 case vec_to_scalar:
6057 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6059 case scalar_to_vec:
6060 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6062 case unaligned_load:
6063 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6065 case unaligned_store:
6066 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6068 case cond_branch_taken:
6069 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6071 case cond_branch_not_taken:
6072 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6074 case vec_perm:
6075 case vec_promote_demote:
6076 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6078 case vec_construct:
6079 elements = TYPE_VECTOR_SUBPARTS (vectype);
6080 return elements / 2 + 1;
6082 default:
6083 gcc_unreachable ();
6087 /* Implement targetm.vectorize.add_stmt_cost. */
6088 static unsigned
6089 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6090 struct _stmt_vec_info *stmt_info, int misalign,
6091 enum vect_cost_model_location where)
6093 unsigned *cost = (unsigned *) data;
6094 unsigned retval = 0;
6096 if (flag_vect_cost_model)
6098 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6099 int stmt_cost =
6100 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6102 /* Statements in an inner loop relative to the loop being
6103 vectorized are weighted more heavily. The value here is
6104 a function (linear for now) of the loop nest level. */
6105 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6107 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6108 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6109 unsigned nest_level = loop_depth (loop);
6111 count *= nest_level;
6114 retval = (unsigned) (count * stmt_cost);
6115 cost[where] += retval;
6118 return retval;
6121 static void initialize_aarch64_code_model (void);
6123 /* Parse the architecture extension string. */
6125 static void
6126 aarch64_parse_extension (char *str)
6128 /* The extension string is parsed left to right. */
6129 const struct aarch64_option_extension *opt = NULL;
6131 /* Flag to say whether we are adding or removing an extension. */
6132 int adding_ext = -1;
6134 while (str != NULL && *str != 0)
6136 char *ext;
6137 size_t len;
6139 str++;
6140 ext = strchr (str, '+');
6142 if (ext != NULL)
6143 len = ext - str;
6144 else
6145 len = strlen (str);
6147 if (len >= 2 && strncmp (str, "no", 2) == 0)
6149 adding_ext = 0;
6150 len -= 2;
6151 str += 2;
6153 else if (len > 0)
6154 adding_ext = 1;
6156 if (len == 0)
6158 error ("missing feature modifier after %qs", "+no");
6159 return;
6162 /* Scan over the extensions table trying to find an exact match. */
6163 for (opt = all_extensions; opt->name != NULL; opt++)
6165 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6167 /* Add or remove the extension. */
6168 if (adding_ext)
6169 aarch64_isa_flags |= opt->flags_on;
6170 else
6171 aarch64_isa_flags &= ~(opt->flags_off);
6172 break;
6176 if (opt->name == NULL)
6178 /* Extension not found in list. */
6179 error ("unknown feature modifier %qs", str);
6180 return;
6183 str = ext;
6186 return;
6189 /* Parse the ARCH string. */
6191 static void
6192 aarch64_parse_arch (void)
6194 char *ext;
6195 const struct processor *arch;
6196 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6197 size_t len;
6199 strcpy (str, aarch64_arch_string);
6201 ext = strchr (str, '+');
6203 if (ext != NULL)
6204 len = ext - str;
6205 else
6206 len = strlen (str);
6208 if (len == 0)
6210 error ("missing arch name in -march=%qs", str);
6211 return;
6214 /* Loop through the list of supported ARCHs to find a match. */
6215 for (arch = all_architectures; arch->name != NULL; arch++)
6217 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6219 selected_arch = arch;
6220 aarch64_isa_flags = selected_arch->flags;
6222 if (!selected_cpu)
6223 selected_cpu = &all_cores[selected_arch->core];
6225 if (ext != NULL)
6227 /* ARCH string contains at least one extension. */
6228 aarch64_parse_extension (ext);
6231 if (strcmp (selected_arch->arch, selected_cpu->arch))
6233 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6234 selected_cpu->name, selected_arch->name);
6237 return;
6241 /* ARCH name not found in list. */
6242 error ("unknown value %qs for -march", str);
6243 return;
6246 /* Parse the CPU string. */
6248 static void
6249 aarch64_parse_cpu (void)
6251 char *ext;
6252 const struct processor *cpu;
6253 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6254 size_t len;
6256 strcpy (str, aarch64_cpu_string);
6258 ext = strchr (str, '+');
6260 if (ext != NULL)
6261 len = ext - str;
6262 else
6263 len = strlen (str);
6265 if (len == 0)
6267 error ("missing cpu name in -mcpu=%qs", str);
6268 return;
6271 /* Loop through the list of supported CPUs to find a match. */
6272 for (cpu = all_cores; cpu->name != NULL; cpu++)
6274 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6276 selected_cpu = cpu;
6277 selected_tune = cpu;
6278 aarch64_isa_flags = selected_cpu->flags;
6280 if (ext != NULL)
6282 /* CPU string contains at least one extension. */
6283 aarch64_parse_extension (ext);
6286 return;
6290 /* CPU name not found in list. */
6291 error ("unknown value %qs for -mcpu", str);
6292 return;
6295 /* Parse the TUNE string. */
6297 static void
6298 aarch64_parse_tune (void)
6300 const struct processor *cpu;
6301 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6302 strcpy (str, aarch64_tune_string);
6304 /* Loop through the list of supported CPUs to find a match. */
6305 for (cpu = all_cores; cpu->name != NULL; cpu++)
6307 if (strcmp (cpu->name, str) == 0)
6309 selected_tune = cpu;
6310 return;
6314 /* CPU name not found in list. */
6315 error ("unknown value %qs for -mtune", str);
6316 return;
6320 /* Implement TARGET_OPTION_OVERRIDE. */
6322 static void
6323 aarch64_override_options (void)
6325 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6326 If either of -march or -mtune is given, they override their
6327 respective component of -mcpu.
6329 So, first parse AARCH64_CPU_STRING, then the others, be careful
6330 with -march as, if -mcpu is not present on the command line, march
6331 must set a sensible default CPU. */
6332 if (aarch64_cpu_string)
6334 aarch64_parse_cpu ();
6337 if (aarch64_arch_string)
6339 aarch64_parse_arch ();
6342 if (aarch64_tune_string)
6344 aarch64_parse_tune ();
6347 #ifndef HAVE_AS_MABI_OPTION
6348 /* The compiler may have been configured with 2.23.* binutils, which does
6349 not have support for ILP32. */
6350 if (TARGET_ILP32)
6351 error ("Assembler does not support -mabi=ilp32");
6352 #endif
6354 initialize_aarch64_code_model ();
6356 aarch64_build_bitmask_table ();
6358 /* This target defaults to strict volatile bitfields. */
6359 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6360 flag_strict_volatile_bitfields = 1;
6362 /* If the user did not specify a processor, choose the default
6363 one for them. This will be the CPU set during configuration using
6364 --with-cpu, otherwise it is "generic". */
6365 if (!selected_cpu)
6367 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6368 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6371 gcc_assert (selected_cpu);
6373 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6374 if (!selected_tune)
6375 selected_tune = &all_cores[selected_cpu->core];
6377 aarch64_tune_flags = selected_tune->flags;
6378 aarch64_tune = selected_tune->core;
6379 aarch64_tune_params = selected_tune->tune;
6381 aarch64_override_options_after_change ();
6384 /* Implement targetm.override_options_after_change. */
6386 static void
6387 aarch64_override_options_after_change (void)
6389 if (flag_omit_frame_pointer)
6390 flag_omit_leaf_frame_pointer = false;
6391 else if (flag_omit_leaf_frame_pointer)
6392 flag_omit_frame_pointer = true;
6395 static struct machine_function *
6396 aarch64_init_machine_status (void)
6398 struct machine_function *machine;
6399 machine = ggc_cleared_alloc<machine_function> ();
6400 return machine;
6403 void
6404 aarch64_init_expanders (void)
6406 init_machine_status = aarch64_init_machine_status;
6409 /* A checking mechanism for the implementation of the various code models. */
6410 static void
6411 initialize_aarch64_code_model (void)
6413 if (flag_pic)
6415 switch (aarch64_cmodel_var)
6417 case AARCH64_CMODEL_TINY:
6418 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6419 break;
6420 case AARCH64_CMODEL_SMALL:
6421 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6422 break;
6423 case AARCH64_CMODEL_LARGE:
6424 sorry ("code model %qs with -f%s", "large",
6425 flag_pic > 1 ? "PIC" : "pic");
6426 default:
6427 gcc_unreachable ();
6430 else
6431 aarch64_cmodel = aarch64_cmodel_var;
6434 /* Return true if SYMBOL_REF X binds locally. */
6436 static bool
6437 aarch64_symbol_binds_local_p (const_rtx x)
6439 return (SYMBOL_REF_DECL (x)
6440 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6441 : SYMBOL_REF_LOCAL_P (x));
6444 /* Return true if SYMBOL_REF X is thread local */
6445 static bool
6446 aarch64_tls_symbol_p (rtx x)
6448 if (! TARGET_HAVE_TLS)
6449 return false;
6451 if (GET_CODE (x) != SYMBOL_REF)
6452 return false;
6454 return SYMBOL_REF_TLS_MODEL (x) != 0;
6457 /* Classify a TLS symbol into one of the TLS kinds. */
6458 enum aarch64_symbol_type
6459 aarch64_classify_tls_symbol (rtx x)
6461 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6463 switch (tls_kind)
6465 case TLS_MODEL_GLOBAL_DYNAMIC:
6466 case TLS_MODEL_LOCAL_DYNAMIC:
6467 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6469 case TLS_MODEL_INITIAL_EXEC:
6470 return SYMBOL_SMALL_GOTTPREL;
6472 case TLS_MODEL_LOCAL_EXEC:
6473 return SYMBOL_SMALL_TPREL;
6475 case TLS_MODEL_EMULATED:
6476 case TLS_MODEL_NONE:
6477 return SYMBOL_FORCE_TO_MEM;
6479 default:
6480 gcc_unreachable ();
6484 /* Return the method that should be used to access SYMBOL_REF or
6485 LABEL_REF X in context CONTEXT. */
6487 enum aarch64_symbol_type
6488 aarch64_classify_symbol (rtx x,
6489 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6491 if (GET_CODE (x) == LABEL_REF)
6493 switch (aarch64_cmodel)
6495 case AARCH64_CMODEL_LARGE:
6496 return SYMBOL_FORCE_TO_MEM;
6498 case AARCH64_CMODEL_TINY_PIC:
6499 case AARCH64_CMODEL_TINY:
6500 return SYMBOL_TINY_ABSOLUTE;
6502 case AARCH64_CMODEL_SMALL_PIC:
6503 case AARCH64_CMODEL_SMALL:
6504 return SYMBOL_SMALL_ABSOLUTE;
6506 default:
6507 gcc_unreachable ();
6511 if (GET_CODE (x) == SYMBOL_REF)
6513 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6514 return SYMBOL_FORCE_TO_MEM;
6516 if (aarch64_tls_symbol_p (x))
6517 return aarch64_classify_tls_symbol (x);
6519 switch (aarch64_cmodel)
6521 case AARCH64_CMODEL_TINY:
6522 if (SYMBOL_REF_WEAK (x))
6523 return SYMBOL_FORCE_TO_MEM;
6524 return SYMBOL_TINY_ABSOLUTE;
6526 case AARCH64_CMODEL_SMALL:
6527 if (SYMBOL_REF_WEAK (x))
6528 return SYMBOL_FORCE_TO_MEM;
6529 return SYMBOL_SMALL_ABSOLUTE;
6531 case AARCH64_CMODEL_TINY_PIC:
6532 if (!aarch64_symbol_binds_local_p (x))
6533 return SYMBOL_TINY_GOT;
6534 return SYMBOL_TINY_ABSOLUTE;
6536 case AARCH64_CMODEL_SMALL_PIC:
6537 if (!aarch64_symbol_binds_local_p (x))
6538 return SYMBOL_SMALL_GOT;
6539 return SYMBOL_SMALL_ABSOLUTE;
6541 default:
6542 gcc_unreachable ();
6546 /* By default push everything into the constant pool. */
6547 return SYMBOL_FORCE_TO_MEM;
6550 bool
6551 aarch64_constant_address_p (rtx x)
6553 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6556 bool
6557 aarch64_legitimate_pic_operand_p (rtx x)
6559 if (GET_CODE (x) == SYMBOL_REF
6560 || (GET_CODE (x) == CONST
6561 && GET_CODE (XEXP (x, 0)) == PLUS
6562 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6563 return false;
6565 return true;
6568 /* Return true if X holds either a quarter-precision or
6569 floating-point +0.0 constant. */
6570 static bool
6571 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6573 if (!CONST_DOUBLE_P (x))
6574 return false;
6576 /* TODO: We could handle moving 0.0 to a TFmode register,
6577 but first we would like to refactor the movtf_aarch64
6578 to be more amicable to split moves properly and
6579 correctly gate on TARGET_SIMD. For now - reject all
6580 constants which are not to SFmode or DFmode registers. */
6581 if (!(mode == SFmode || mode == DFmode))
6582 return false;
6584 if (aarch64_float_const_zero_rtx_p (x))
6585 return true;
6586 return aarch64_float_const_representable_p (x);
6589 static bool
6590 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6592 /* Do not allow vector struct mode constants. We could support
6593 0 and -1 easily, but they need support in aarch64-simd.md. */
6594 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6595 return false;
6597 /* This could probably go away because
6598 we now decompose CONST_INTs according to expand_mov_immediate. */
6599 if ((GET_CODE (x) == CONST_VECTOR
6600 && aarch64_simd_valid_immediate (x, mode, false, NULL))
6601 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6602 return !targetm.cannot_force_const_mem (mode, x);
6604 if (GET_CODE (x) == HIGH
6605 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6606 return true;
6608 return aarch64_constant_address_p (x);
6612 aarch64_load_tp (rtx target)
6614 if (!target
6615 || GET_MODE (target) != Pmode
6616 || !register_operand (target, Pmode))
6617 target = gen_reg_rtx (Pmode);
6619 /* Can return in any reg. */
6620 emit_insn (gen_aarch64_load_tp_hard (target));
6621 return target;
6624 /* On AAPCS systems, this is the "struct __va_list". */
6625 static GTY(()) tree va_list_type;
6627 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6628 Return the type to use as __builtin_va_list.
6630 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6632 struct __va_list
6634 void *__stack;
6635 void *__gr_top;
6636 void *__vr_top;
6637 int __gr_offs;
6638 int __vr_offs;
6639 }; */
6641 static tree
6642 aarch64_build_builtin_va_list (void)
6644 tree va_list_name;
6645 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6647 /* Create the type. */
6648 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6649 /* Give it the required name. */
6650 va_list_name = build_decl (BUILTINS_LOCATION,
6651 TYPE_DECL,
6652 get_identifier ("__va_list"),
6653 va_list_type);
6654 DECL_ARTIFICIAL (va_list_name) = 1;
6655 TYPE_NAME (va_list_type) = va_list_name;
6656 TYPE_STUB_DECL (va_list_type) = va_list_name;
6658 /* Create the fields. */
6659 f_stack = build_decl (BUILTINS_LOCATION,
6660 FIELD_DECL, get_identifier ("__stack"),
6661 ptr_type_node);
6662 f_grtop = build_decl (BUILTINS_LOCATION,
6663 FIELD_DECL, get_identifier ("__gr_top"),
6664 ptr_type_node);
6665 f_vrtop = build_decl (BUILTINS_LOCATION,
6666 FIELD_DECL, get_identifier ("__vr_top"),
6667 ptr_type_node);
6668 f_groff = build_decl (BUILTINS_LOCATION,
6669 FIELD_DECL, get_identifier ("__gr_offs"),
6670 integer_type_node);
6671 f_vroff = build_decl (BUILTINS_LOCATION,
6672 FIELD_DECL, get_identifier ("__vr_offs"),
6673 integer_type_node);
6675 DECL_ARTIFICIAL (f_stack) = 1;
6676 DECL_ARTIFICIAL (f_grtop) = 1;
6677 DECL_ARTIFICIAL (f_vrtop) = 1;
6678 DECL_ARTIFICIAL (f_groff) = 1;
6679 DECL_ARTIFICIAL (f_vroff) = 1;
6681 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6682 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6683 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6684 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6685 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6687 TYPE_FIELDS (va_list_type) = f_stack;
6688 DECL_CHAIN (f_stack) = f_grtop;
6689 DECL_CHAIN (f_grtop) = f_vrtop;
6690 DECL_CHAIN (f_vrtop) = f_groff;
6691 DECL_CHAIN (f_groff) = f_vroff;
6693 /* Compute its layout. */
6694 layout_type (va_list_type);
6696 return va_list_type;
6699 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6700 static void
6701 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6703 const CUMULATIVE_ARGS *cum;
6704 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6705 tree stack, grtop, vrtop, groff, vroff;
6706 tree t;
6707 int gr_save_area_size;
6708 int vr_save_area_size;
6709 int vr_offset;
6711 cum = &crtl->args.info;
6712 gr_save_area_size
6713 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6714 vr_save_area_size
6715 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6717 if (TARGET_GENERAL_REGS_ONLY)
6719 if (cum->aapcs_nvrn > 0)
6720 sorry ("%qs and floating point or vector arguments",
6721 "-mgeneral-regs-only");
6722 vr_save_area_size = 0;
6725 f_stack = TYPE_FIELDS (va_list_type_node);
6726 f_grtop = DECL_CHAIN (f_stack);
6727 f_vrtop = DECL_CHAIN (f_grtop);
6728 f_groff = DECL_CHAIN (f_vrtop);
6729 f_vroff = DECL_CHAIN (f_groff);
6731 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6732 NULL_TREE);
6733 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6734 NULL_TREE);
6735 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6736 NULL_TREE);
6737 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6738 NULL_TREE);
6739 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6740 NULL_TREE);
6742 /* Emit code to initialize STACK, which points to the next varargs stack
6743 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6744 by named arguments. STACK is 8-byte aligned. */
6745 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6746 if (cum->aapcs_stack_size > 0)
6747 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6748 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6749 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6751 /* Emit code to initialize GRTOP, the top of the GR save area.
6752 virtual_incoming_args_rtx should have been 16 byte aligned. */
6753 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6754 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6755 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6757 /* Emit code to initialize VRTOP, the top of the VR save area.
6758 This address is gr_save_area_bytes below GRTOP, rounded
6759 down to the next 16-byte boundary. */
6760 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6761 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6762 STACK_BOUNDARY / BITS_PER_UNIT);
6764 if (vr_offset)
6765 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6766 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6767 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6769 /* Emit code to initialize GROFF, the offset from GRTOP of the
6770 next GPR argument. */
6771 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6772 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6773 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6775 /* Likewise emit code to initialize VROFF, the offset from FTOP
6776 of the next VR argument. */
6777 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6778 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6782 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6784 static tree
6785 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6786 gimple_seq *post_p ATTRIBUTE_UNUSED)
6788 tree addr;
6789 bool indirect_p;
6790 bool is_ha; /* is HFA or HVA. */
6791 bool dw_align; /* double-word align. */
6792 enum machine_mode ag_mode = VOIDmode;
6793 int nregs;
6794 enum machine_mode mode;
6796 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6797 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6798 HOST_WIDE_INT size, rsize, adjust, align;
6799 tree t, u, cond1, cond2;
6801 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6802 if (indirect_p)
6803 type = build_pointer_type (type);
6805 mode = TYPE_MODE (type);
6807 f_stack = TYPE_FIELDS (va_list_type_node);
6808 f_grtop = DECL_CHAIN (f_stack);
6809 f_vrtop = DECL_CHAIN (f_grtop);
6810 f_groff = DECL_CHAIN (f_vrtop);
6811 f_vroff = DECL_CHAIN (f_groff);
6813 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6814 f_stack, NULL_TREE);
6815 size = int_size_in_bytes (type);
6816 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6818 dw_align = false;
6819 adjust = 0;
6820 if (aarch64_vfp_is_call_or_return_candidate (mode,
6821 type,
6822 &ag_mode,
6823 &nregs,
6824 &is_ha))
6826 /* TYPE passed in fp/simd registers. */
6827 if (TARGET_GENERAL_REGS_ONLY)
6828 sorry ("%qs and floating point or vector arguments",
6829 "-mgeneral-regs-only");
6831 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6832 unshare_expr (valist), f_vrtop, NULL_TREE);
6833 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6834 unshare_expr (valist), f_vroff, NULL_TREE);
6836 rsize = nregs * UNITS_PER_VREG;
6838 if (is_ha)
6840 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6841 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6843 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6844 && size < UNITS_PER_VREG)
6846 adjust = UNITS_PER_VREG - size;
6849 else
6851 /* TYPE passed in general registers. */
6852 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6853 unshare_expr (valist), f_grtop, NULL_TREE);
6854 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6855 unshare_expr (valist), f_groff, NULL_TREE);
6856 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6857 nregs = rsize / UNITS_PER_WORD;
6859 if (align > 8)
6860 dw_align = true;
6862 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6863 && size < UNITS_PER_WORD)
6865 adjust = UNITS_PER_WORD - size;
6869 /* Get a local temporary for the field value. */
6870 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6872 /* Emit code to branch if off >= 0. */
6873 t = build2 (GE_EXPR, boolean_type_node, off,
6874 build_int_cst (TREE_TYPE (off), 0));
6875 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6877 if (dw_align)
6879 /* Emit: offs = (offs + 15) & -16. */
6880 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6881 build_int_cst (TREE_TYPE (off), 15));
6882 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6883 build_int_cst (TREE_TYPE (off), -16));
6884 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6886 else
6887 roundup = NULL;
6889 /* Update ap.__[g|v]r_offs */
6890 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6891 build_int_cst (TREE_TYPE (off), rsize));
6892 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6894 /* String up. */
6895 if (roundup)
6896 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6898 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6899 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6900 build_int_cst (TREE_TYPE (f_off), 0));
6901 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6903 /* String up: make sure the assignment happens before the use. */
6904 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6905 COND_EXPR_ELSE (cond1) = t;
6907 /* Prepare the trees handling the argument that is passed on the stack;
6908 the top level node will store in ON_STACK. */
6909 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6910 if (align > 8)
6912 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6913 t = fold_convert (intDI_type_node, arg);
6914 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6915 build_int_cst (TREE_TYPE (t), 15));
6916 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6917 build_int_cst (TREE_TYPE (t), -16));
6918 t = fold_convert (TREE_TYPE (arg), t);
6919 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6921 else
6922 roundup = NULL;
6923 /* Advance ap.__stack */
6924 t = fold_convert (intDI_type_node, arg);
6925 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6926 build_int_cst (TREE_TYPE (t), size + 7));
6927 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6928 build_int_cst (TREE_TYPE (t), -8));
6929 t = fold_convert (TREE_TYPE (arg), t);
6930 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6931 /* String up roundup and advance. */
6932 if (roundup)
6933 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6934 /* String up with arg */
6935 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6936 /* Big-endianness related address adjustment. */
6937 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6938 && size < UNITS_PER_WORD)
6940 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6941 size_int (UNITS_PER_WORD - size));
6942 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6945 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6946 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6948 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6949 t = off;
6950 if (adjust)
6951 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6952 build_int_cst (TREE_TYPE (off), adjust));
6954 t = fold_convert (sizetype, t);
6955 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6957 if (is_ha)
6959 /* type ha; // treat as "struct {ftype field[n];}"
6960 ... [computing offs]
6961 for (i = 0; i <nregs; ++i, offs += 16)
6962 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6963 return ha; */
6964 int i;
6965 tree tmp_ha, field_t, field_ptr_t;
6967 /* Declare a local variable. */
6968 tmp_ha = create_tmp_var_raw (type, "ha");
6969 gimple_add_tmp_var (tmp_ha);
6971 /* Establish the base type. */
6972 switch (ag_mode)
6974 case SFmode:
6975 field_t = float_type_node;
6976 field_ptr_t = float_ptr_type_node;
6977 break;
6978 case DFmode:
6979 field_t = double_type_node;
6980 field_ptr_t = double_ptr_type_node;
6981 break;
6982 case TFmode:
6983 field_t = long_double_type_node;
6984 field_ptr_t = long_double_ptr_type_node;
6985 break;
6986 /* The half precision and quad precision are not fully supported yet. Enable
6987 the following code after the support is complete. Need to find the correct
6988 type node for __fp16 *. */
6989 #if 0
6990 case HFmode:
6991 field_t = float_type_node;
6992 field_ptr_t = float_ptr_type_node;
6993 break;
6994 #endif
6995 case V2SImode:
6996 case V4SImode:
6998 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6999 field_t = build_vector_type_for_mode (innertype, ag_mode);
7000 field_ptr_t = build_pointer_type (field_t);
7002 break;
7003 default:
7004 gcc_assert (0);
7007 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
7008 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
7009 addr = t;
7010 t = fold_convert (field_ptr_t, addr);
7011 t = build2 (MODIFY_EXPR, field_t,
7012 build1 (INDIRECT_REF, field_t, tmp_ha),
7013 build1 (INDIRECT_REF, field_t, t));
7015 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
7016 for (i = 1; i < nregs; ++i)
7018 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
7019 u = fold_convert (field_ptr_t, addr);
7020 u = build2 (MODIFY_EXPR, field_t,
7021 build2 (MEM_REF, field_t, tmp_ha,
7022 build_int_cst (field_ptr_t,
7023 (i *
7024 int_size_in_bytes (field_t)))),
7025 build1 (INDIRECT_REF, field_t, u));
7026 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7029 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7030 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7033 COND_EXPR_ELSE (cond2) = t;
7034 addr = fold_convert (build_pointer_type (type), cond1);
7035 addr = build_va_arg_indirect_ref (addr);
7037 if (indirect_p)
7038 addr = build_va_arg_indirect_ref (addr);
7040 return addr;
7043 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7045 static void
7046 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7047 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7048 int no_rtl)
7050 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7051 CUMULATIVE_ARGS local_cum;
7052 int gr_saved, vr_saved;
7054 /* The caller has advanced CUM up to, but not beyond, the last named
7055 argument. Advance a local copy of CUM past the last "real" named
7056 argument, to find out how many registers are left over. */
7057 local_cum = *cum;
7058 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7060 /* Found out how many registers we need to save. */
7061 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7062 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7064 if (TARGET_GENERAL_REGS_ONLY)
7066 if (local_cum.aapcs_nvrn > 0)
7067 sorry ("%qs and floating point or vector arguments",
7068 "-mgeneral-regs-only");
7069 vr_saved = 0;
7072 if (!no_rtl)
7074 if (gr_saved > 0)
7076 rtx ptr, mem;
7078 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7079 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7080 - gr_saved * UNITS_PER_WORD);
7081 mem = gen_frame_mem (BLKmode, ptr);
7082 set_mem_alias_set (mem, get_varargs_alias_set ());
7084 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7085 mem, gr_saved);
7087 if (vr_saved > 0)
7089 /* We can't use move_block_from_reg, because it will use
7090 the wrong mode, storing D regs only. */
7091 enum machine_mode mode = TImode;
7092 int off, i;
7094 /* Set OFF to the offset from virtual_incoming_args_rtx of
7095 the first vector register. The VR save area lies below
7096 the GR one, and is aligned to 16 bytes. */
7097 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7098 STACK_BOUNDARY / BITS_PER_UNIT);
7099 off -= vr_saved * UNITS_PER_VREG;
7101 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7103 rtx ptr, mem;
7105 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7106 mem = gen_frame_mem (mode, ptr);
7107 set_mem_alias_set (mem, get_varargs_alias_set ());
7108 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7109 off += UNITS_PER_VREG;
7114 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7115 any complication of having crtl->args.pretend_args_size changed. */
7116 cfun->machine->frame.saved_varargs_size
7117 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7118 STACK_BOUNDARY / BITS_PER_UNIT)
7119 + vr_saved * UNITS_PER_VREG);
7122 static void
7123 aarch64_conditional_register_usage (void)
7125 int i;
7126 if (!TARGET_FLOAT)
7128 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7130 fixed_regs[i] = 1;
7131 call_used_regs[i] = 1;
7136 /* Walk down the type tree of TYPE counting consecutive base elements.
7137 If *MODEP is VOIDmode, then set it to the first valid floating point
7138 type. If a non-floating point type is found, or if a floating point
7139 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7140 otherwise return the count in the sub-tree. */
7141 static int
7142 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
7144 enum machine_mode mode;
7145 HOST_WIDE_INT size;
7147 switch (TREE_CODE (type))
7149 case REAL_TYPE:
7150 mode = TYPE_MODE (type);
7151 if (mode != DFmode && mode != SFmode && mode != TFmode)
7152 return -1;
7154 if (*modep == VOIDmode)
7155 *modep = mode;
7157 if (*modep == mode)
7158 return 1;
7160 break;
7162 case COMPLEX_TYPE:
7163 mode = TYPE_MODE (TREE_TYPE (type));
7164 if (mode != DFmode && mode != SFmode && mode != TFmode)
7165 return -1;
7167 if (*modep == VOIDmode)
7168 *modep = mode;
7170 if (*modep == mode)
7171 return 2;
7173 break;
7175 case VECTOR_TYPE:
7176 /* Use V2SImode and V4SImode as representatives of all 64-bit
7177 and 128-bit vector types. */
7178 size = int_size_in_bytes (type);
7179 switch (size)
7181 case 8:
7182 mode = V2SImode;
7183 break;
7184 case 16:
7185 mode = V4SImode;
7186 break;
7187 default:
7188 return -1;
7191 if (*modep == VOIDmode)
7192 *modep = mode;
7194 /* Vector modes are considered to be opaque: two vectors are
7195 equivalent for the purposes of being homogeneous aggregates
7196 if they are the same size. */
7197 if (*modep == mode)
7198 return 1;
7200 break;
7202 case ARRAY_TYPE:
7204 int count;
7205 tree index = TYPE_DOMAIN (type);
7207 /* Can't handle incomplete types nor sizes that are not
7208 fixed. */
7209 if (!COMPLETE_TYPE_P (type)
7210 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7211 return -1;
7213 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7214 if (count == -1
7215 || !index
7216 || !TYPE_MAX_VALUE (index)
7217 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7218 || !TYPE_MIN_VALUE (index)
7219 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7220 || count < 0)
7221 return -1;
7223 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7224 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7226 /* There must be no padding. */
7227 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7228 return -1;
7230 return count;
7233 case RECORD_TYPE:
7235 int count = 0;
7236 int sub_count;
7237 tree field;
7239 /* Can't handle incomplete types nor sizes that are not
7240 fixed. */
7241 if (!COMPLETE_TYPE_P (type)
7242 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7243 return -1;
7245 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7247 if (TREE_CODE (field) != FIELD_DECL)
7248 continue;
7250 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7251 if (sub_count < 0)
7252 return -1;
7253 count += sub_count;
7256 /* There must be no padding. */
7257 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7258 return -1;
7260 return count;
7263 case UNION_TYPE:
7264 case QUAL_UNION_TYPE:
7266 /* These aren't very interesting except in a degenerate case. */
7267 int count = 0;
7268 int sub_count;
7269 tree field;
7271 /* Can't handle incomplete types nor sizes that are not
7272 fixed. */
7273 if (!COMPLETE_TYPE_P (type)
7274 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
7275 return -1;
7277 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7279 if (TREE_CODE (field) != FIELD_DECL)
7280 continue;
7282 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7283 if (sub_count < 0)
7284 return -1;
7285 count = count > sub_count ? count : sub_count;
7288 /* There must be no padding. */
7289 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
7290 return -1;
7292 return count;
7295 default:
7296 break;
7299 return -1;
7302 /* Return true if we use LRA instead of reload pass. */
7303 static bool
7304 aarch64_lra_p (void)
7306 return aarch64_lra_flag;
7309 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7310 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7311 array types. The C99 floating-point complex types are also considered
7312 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7313 types, which are GCC extensions and out of the scope of AAPCS64, are
7314 treated as composite types here as well.
7316 Note that MODE itself is not sufficient in determining whether a type
7317 is such a composite type or not. This is because
7318 stor-layout.c:compute_record_mode may have already changed the MODE
7319 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7320 structure with only one field may have its MODE set to the mode of the
7321 field. Also an integer mode whose size matches the size of the
7322 RECORD_TYPE type may be used to substitute the original mode
7323 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7324 solely relied on. */
7326 static bool
7327 aarch64_composite_type_p (const_tree type,
7328 enum machine_mode mode)
7330 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7331 return true;
7333 if (mode == BLKmode
7334 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7335 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7336 return true;
7338 return false;
7341 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7342 type as described in AAPCS64 \S 4.1.2.
7344 See the comment above aarch64_composite_type_p for the notes on MODE. */
7346 static bool
7347 aarch64_short_vector_p (const_tree type,
7348 enum machine_mode mode)
7350 HOST_WIDE_INT size = -1;
7352 if (type && TREE_CODE (type) == VECTOR_TYPE)
7353 size = int_size_in_bytes (type);
7354 else if (!aarch64_composite_type_p (type, mode)
7355 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7356 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7357 size = GET_MODE_SIZE (mode);
7359 return (size == 8 || size == 16) ? true : false;
7362 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7363 shall be passed or returned in simd/fp register(s) (providing these
7364 parameter passing registers are available).
7366 Upon successful return, *COUNT returns the number of needed registers,
7367 *BASE_MODE returns the mode of the individual register and when IS_HAF
7368 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7369 floating-point aggregate or a homogeneous short-vector aggregate. */
7371 static bool
7372 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7373 const_tree type,
7374 enum machine_mode *base_mode,
7375 int *count,
7376 bool *is_ha)
7378 enum machine_mode new_mode = VOIDmode;
7379 bool composite_p = aarch64_composite_type_p (type, mode);
7381 if (is_ha != NULL) *is_ha = false;
7383 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7384 || aarch64_short_vector_p (type, mode))
7386 *count = 1;
7387 new_mode = mode;
7389 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7391 if (is_ha != NULL) *is_ha = true;
7392 *count = 2;
7393 new_mode = GET_MODE_INNER (mode);
7395 else if (type && composite_p)
7397 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7399 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7401 if (is_ha != NULL) *is_ha = true;
7402 *count = ag_count;
7404 else
7405 return false;
7407 else
7408 return false;
7410 *base_mode = new_mode;
7411 return true;
7414 /* Implement TARGET_STRUCT_VALUE_RTX. */
7416 static rtx
7417 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7418 int incoming ATTRIBUTE_UNUSED)
7420 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7423 /* Implements target hook vector_mode_supported_p. */
7424 static bool
7425 aarch64_vector_mode_supported_p (enum machine_mode mode)
7427 if (TARGET_SIMD
7428 && (mode == V4SImode || mode == V8HImode
7429 || mode == V16QImode || mode == V2DImode
7430 || mode == V2SImode || mode == V4HImode
7431 || mode == V8QImode || mode == V2SFmode
7432 || mode == V4SFmode || mode == V2DFmode
7433 || mode == V1DFmode))
7434 return true;
7436 return false;
7439 /* Return appropriate SIMD container
7440 for MODE within a vector of WIDTH bits. */
7441 static enum machine_mode
7442 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
7444 gcc_assert (width == 64 || width == 128);
7445 if (TARGET_SIMD)
7447 if (width == 128)
7448 switch (mode)
7450 case DFmode:
7451 return V2DFmode;
7452 case SFmode:
7453 return V4SFmode;
7454 case SImode:
7455 return V4SImode;
7456 case HImode:
7457 return V8HImode;
7458 case QImode:
7459 return V16QImode;
7460 case DImode:
7461 return V2DImode;
7462 default:
7463 break;
7465 else
7466 switch (mode)
7468 case SFmode:
7469 return V2SFmode;
7470 case SImode:
7471 return V2SImode;
7472 case HImode:
7473 return V4HImode;
7474 case QImode:
7475 return V8QImode;
7476 default:
7477 break;
7480 return word_mode;
7483 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7484 static enum machine_mode
7485 aarch64_preferred_simd_mode (enum machine_mode mode)
7487 return aarch64_simd_container_mode (mode, 128);
7490 /* Return the bitmask of possible vector sizes for the vectorizer
7491 to iterate over. */
7492 static unsigned int
7493 aarch64_autovectorize_vector_sizes (void)
7495 return (16 | 8);
7498 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7499 vector types in order to conform to the AAPCS64 (see "Procedure
7500 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7501 qualify for emission with the mangled names defined in that document,
7502 a vector type must not only be of the correct mode but also be
7503 composed of AdvSIMD vector element types (e.g.
7504 _builtin_aarch64_simd_qi); these types are registered by
7505 aarch64_init_simd_builtins (). In other words, vector types defined
7506 in other ways e.g. via vector_size attribute will get default
7507 mangled names. */
7508 typedef struct
7510 enum machine_mode mode;
7511 const char *element_type_name;
7512 const char *mangled_name;
7513 } aarch64_simd_mangle_map_entry;
7515 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7516 /* 64-bit containerized types. */
7517 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7518 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7519 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7520 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7521 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7522 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7523 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7524 { DImode, "__builtin_aarch64_simd_di", "11__Int64x1_t" },
7525 { DImode, "__builtin_aarch64_simd_udi", "12__Uint64x1_t" },
7526 { V1DFmode, "__builtin_aarch64_simd_df", "13__Float64x1_t" },
7527 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7528 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7529 /* 128-bit containerized types. */
7530 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7531 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7532 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7533 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7534 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7535 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7536 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7537 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7538 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7539 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7540 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7541 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7542 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7543 { VOIDmode, NULL, NULL }
7546 /* Implement TARGET_MANGLE_TYPE. */
7548 static const char *
7549 aarch64_mangle_type (const_tree type)
7551 /* The AArch64 ABI documents say that "__va_list" has to be
7552 managled as if it is in the "std" namespace. */
7553 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7554 return "St9__va_list";
7556 /* Check the mode of the vector type, and the name of the vector
7557 element type, against the table. */
7558 if (TREE_CODE (type) == VECTOR_TYPE)
7560 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7562 while (pos->mode != VOIDmode)
7564 tree elt_type = TREE_TYPE (type);
7566 if (pos->mode == TYPE_MODE (type)
7567 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7568 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7569 pos->element_type_name))
7570 return pos->mangled_name;
7572 pos++;
7576 /* Use the default mangling. */
7577 return NULL;
7580 /* Return the equivalent letter for size. */
7581 static char
7582 sizetochar (int size)
7584 switch (size)
7586 case 64: return 'd';
7587 case 32: return 's';
7588 case 16: return 'h';
7589 case 8 : return 'b';
7590 default: gcc_unreachable ();
7594 /* Return true iff x is a uniform vector of floating-point
7595 constants, and the constant can be represented in
7596 quarter-precision form. Note, as aarch64_float_const_representable
7597 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7598 static bool
7599 aarch64_vect_float_const_representable_p (rtx x)
7601 int i = 0;
7602 REAL_VALUE_TYPE r0, ri;
7603 rtx x0, xi;
7605 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7606 return false;
7608 x0 = CONST_VECTOR_ELT (x, 0);
7609 if (!CONST_DOUBLE_P (x0))
7610 return false;
7612 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7614 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7616 xi = CONST_VECTOR_ELT (x, i);
7617 if (!CONST_DOUBLE_P (xi))
7618 return false;
7620 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7621 if (!REAL_VALUES_EQUAL (r0, ri))
7622 return false;
7625 return aarch64_float_const_representable_p (x0);
7628 /* Return true for valid and false for invalid. */
7629 bool
7630 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7631 struct simd_immediate_info *info)
7633 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7634 matches = 1; \
7635 for (i = 0; i < idx; i += (STRIDE)) \
7636 if (!(TEST)) \
7637 matches = 0; \
7638 if (matches) \
7640 immtype = (CLASS); \
7641 elsize = (ELSIZE); \
7642 eshift = (SHIFT); \
7643 emvn = (NEG); \
7644 break; \
7647 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7648 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7649 unsigned char bytes[16];
7650 int immtype = -1, matches;
7651 unsigned int invmask = inverse ? 0xff : 0;
7652 int eshift, emvn;
7654 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7656 if (! (aarch64_simd_imm_zero_p (op, mode)
7657 || aarch64_vect_float_const_representable_p (op)))
7658 return false;
7660 if (info)
7662 info->value = CONST_VECTOR_ELT (op, 0);
7663 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7664 info->mvn = false;
7665 info->shift = 0;
7668 return true;
7671 /* Splat vector constant out into a byte vector. */
7672 for (i = 0; i < n_elts; i++)
7674 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7675 it must be laid out in the vector register in reverse order. */
7676 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7677 unsigned HOST_WIDE_INT elpart;
7678 unsigned int part, parts;
7680 if (CONST_INT_P (el))
7682 elpart = INTVAL (el);
7683 parts = 1;
7685 else if (GET_CODE (el) == CONST_DOUBLE)
7687 elpart = CONST_DOUBLE_LOW (el);
7688 parts = 2;
7690 else
7691 gcc_unreachable ();
7693 for (part = 0; part < parts; part++)
7695 unsigned int byte;
7696 for (byte = 0; byte < innersize; byte++)
7698 bytes[idx++] = (elpart & 0xff) ^ invmask;
7699 elpart >>= BITS_PER_UNIT;
7701 if (GET_CODE (el) == CONST_DOUBLE)
7702 elpart = CONST_DOUBLE_HIGH (el);
7706 /* Sanity check. */
7707 gcc_assert (idx == GET_MODE_SIZE (mode));
7711 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7712 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7714 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7715 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7717 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7718 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7720 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7721 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7723 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7725 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7727 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7728 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7730 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7731 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7733 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7734 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7736 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7737 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7739 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7741 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7743 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7744 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7746 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7747 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7749 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7750 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7752 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7753 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7755 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7757 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7758 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7760 while (0);
7762 if (immtype == -1)
7763 return false;
7765 if (info)
7767 info->element_width = elsize;
7768 info->mvn = emvn != 0;
7769 info->shift = eshift;
7771 unsigned HOST_WIDE_INT imm = 0;
7773 if (immtype >= 12 && immtype <= 15)
7774 info->msl = true;
7776 /* Un-invert bytes of recognized vector, if necessary. */
7777 if (invmask != 0)
7778 for (i = 0; i < idx; i++)
7779 bytes[i] ^= invmask;
7781 if (immtype == 17)
7783 /* FIXME: Broken on 32-bit H_W_I hosts. */
7784 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7786 for (i = 0; i < 8; i++)
7787 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7788 << (i * BITS_PER_UNIT);
7791 info->value = GEN_INT (imm);
7793 else
7795 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7796 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7798 /* Construct 'abcdefgh' because the assembler cannot handle
7799 generic constants. */
7800 if (info->mvn)
7801 imm = ~imm;
7802 imm = (imm >> info->shift) & 0xff;
7803 info->value = GEN_INT (imm);
7807 return true;
7808 #undef CHECK
7811 static bool
7812 aarch64_const_vec_all_same_int_p (rtx x,
7813 HOST_WIDE_INT minval,
7814 HOST_WIDE_INT maxval)
7816 HOST_WIDE_INT firstval;
7817 int count, i;
7819 if (GET_CODE (x) != CONST_VECTOR
7820 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7821 return false;
7823 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7824 if (firstval < minval || firstval > maxval)
7825 return false;
7827 count = CONST_VECTOR_NUNITS (x);
7828 for (i = 1; i < count; i++)
7829 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7830 return false;
7832 return true;
7835 /* Check of immediate shift constants are within range. */
7836 bool
7837 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7839 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7840 if (left)
7841 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7842 else
7843 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7846 /* Return true if X is a uniform vector where all elements
7847 are either the floating-point constant 0.0 or the
7848 integer constant 0. */
7849 bool
7850 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7852 return x == CONST0_RTX (mode);
7855 bool
7856 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7858 HOST_WIDE_INT imm = INTVAL (x);
7859 int i;
7861 for (i = 0; i < 8; i++)
7863 unsigned int byte = imm & 0xff;
7864 if (byte != 0xff && byte != 0)
7865 return false;
7866 imm >>= 8;
7869 return true;
7872 bool
7873 aarch64_mov_operand_p (rtx x,
7874 enum aarch64_symbol_context context,
7875 enum machine_mode mode)
7877 if (GET_CODE (x) == HIGH
7878 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7879 return true;
7881 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7882 return true;
7884 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7885 return true;
7887 return aarch64_classify_symbolic_expression (x, context)
7888 == SYMBOL_TINY_ABSOLUTE;
7891 /* Return a const_int vector of VAL. */
7893 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7895 int nunits = GET_MODE_NUNITS (mode);
7896 rtvec v = rtvec_alloc (nunits);
7897 int i;
7899 for (i=0; i < nunits; i++)
7900 RTVEC_ELT (v, i) = GEN_INT (val);
7902 return gen_rtx_CONST_VECTOR (mode, v);
7905 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7907 bool
7908 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7910 enum machine_mode vmode;
7912 gcc_assert (!VECTOR_MODE_P (mode));
7913 vmode = aarch64_preferred_simd_mode (mode);
7914 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7915 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7918 /* Construct and return a PARALLEL RTX vector with elements numbering the
7919 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
7920 the vector - from the perspective of the architecture. This does not
7921 line up with GCC's perspective on lane numbers, so we end up with
7922 different masks depending on our target endian-ness. The diagram
7923 below may help. We must draw the distinction when building masks
7924 which select one half of the vector. An instruction selecting
7925 architectural low-lanes for a big-endian target, must be described using
7926 a mask selecting GCC high-lanes.
7928 Big-Endian Little-Endian
7930 GCC 0 1 2 3 3 2 1 0
7931 | x | x | x | x | | x | x | x | x |
7932 Architecture 3 2 1 0 3 2 1 0
7934 Low Mask: { 2, 3 } { 0, 1 }
7935 High Mask: { 0, 1 } { 2, 3 }
7939 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7941 int nunits = GET_MODE_NUNITS (mode);
7942 rtvec v = rtvec_alloc (nunits / 2);
7943 int high_base = nunits / 2;
7944 int low_base = 0;
7945 int base;
7946 rtx t1;
7947 int i;
7949 if (BYTES_BIG_ENDIAN)
7950 base = high ? low_base : high_base;
7951 else
7952 base = high ? high_base : low_base;
7954 for (i = 0; i < nunits / 2; i++)
7955 RTVEC_ELT (v, i) = GEN_INT (base + i);
7957 t1 = gen_rtx_PARALLEL (mode, v);
7958 return t1;
7961 /* Check OP for validity as a PARALLEL RTX vector with elements
7962 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
7963 from the perspective of the architecture. See the diagram above
7964 aarch64_simd_vect_par_cnst_half for more details. */
7966 bool
7967 aarch64_simd_check_vect_par_cnst_half (rtx op, enum machine_mode mode,
7968 bool high)
7970 rtx ideal = aarch64_simd_vect_par_cnst_half (mode, high);
7971 HOST_WIDE_INT count_op = XVECLEN (op, 0);
7972 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
7973 int i = 0;
7975 if (!VECTOR_MODE_P (mode))
7976 return false;
7978 if (count_op != count_ideal)
7979 return false;
7981 for (i = 0; i < count_ideal; i++)
7983 rtx elt_op = XVECEXP (op, 0, i);
7984 rtx elt_ideal = XVECEXP (ideal, 0, i);
7986 if (!CONST_INT_P (elt_op)
7987 || INTVAL (elt_ideal) != INTVAL (elt_op))
7988 return false;
7990 return true;
7993 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7994 HIGH (exclusive). */
7995 void
7996 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7998 HOST_WIDE_INT lane;
7999 gcc_assert (CONST_INT_P (operand));
8000 lane = INTVAL (operand);
8002 if (lane < low || lane >= high)
8003 error ("lane out of range");
8006 void
8007 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
8009 gcc_assert (CONST_INT_P (operand));
8010 HOST_WIDE_INT lane = INTVAL (operand);
8012 if (lane < low || lane >= high)
8013 error ("constant out of range");
8016 /* Emit code to reinterpret one AdvSIMD type as another,
8017 without altering bits. */
8018 void
8019 aarch64_simd_reinterpret (rtx dest, rtx src)
8021 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
8024 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
8025 registers). */
8026 void
8027 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
8028 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
8029 rtx op1)
8031 rtx mem = gen_rtx_MEM (mode, destaddr);
8032 rtx tmp1 = gen_reg_rtx (mode);
8033 rtx tmp2 = gen_reg_rtx (mode);
8035 emit_insn (intfn (tmp1, op1, tmp2));
8037 emit_move_insn (mem, tmp1);
8038 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
8039 emit_move_insn (mem, tmp2);
8042 /* Return TRUE if OP is a valid vector addressing mode. */
8043 bool
8044 aarch64_simd_mem_operand_p (rtx op)
8046 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
8047 || REG_P (XEXP (op, 0)));
8050 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
8051 not to early-clobber SRC registers in the process.
8053 We assume that the operands described by SRC and DEST represent a
8054 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
8055 number of components into which the copy has been decomposed. */
8056 void
8057 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
8058 rtx *src, unsigned int count)
8060 unsigned int i;
8062 if (!reg_overlap_mentioned_p (operands[0], operands[1])
8063 || REGNO (operands[0]) < REGNO (operands[1]))
8065 for (i = 0; i < count; i++)
8067 operands[2 * i] = dest[i];
8068 operands[2 * i + 1] = src[i];
8071 else
8073 for (i = 0; i < count; i++)
8075 operands[2 * i] = dest[count - i - 1];
8076 operands[2 * i + 1] = src[count - i - 1];
8081 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
8082 one of VSTRUCT modes: OI, CI or XI. */
8084 aarch64_simd_attr_length_move (rtx insn)
8086 enum machine_mode mode;
8088 extract_insn_cached (insn);
8090 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8092 mode = GET_MODE (recog_data.operand[0]);
8093 switch (mode)
8095 case OImode:
8096 return 8;
8097 case CImode:
8098 return 12;
8099 case XImode:
8100 return 16;
8101 default:
8102 gcc_unreachable ();
8105 return 4;
8108 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8109 alignment of a vector to 128 bits. */
8110 static HOST_WIDE_INT
8111 aarch64_simd_vector_alignment (const_tree type)
8113 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8114 return MIN (align, 128);
8117 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8118 static bool
8119 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8121 if (is_packed)
8122 return false;
8124 /* We guarantee alignment for vectors up to 128-bits. */
8125 if (tree_int_cst_compare (TYPE_SIZE (type),
8126 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8127 return false;
8129 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8130 return true;
8133 /* If VALS is a vector constant that can be loaded into a register
8134 using DUP, generate instructions to do so and return an RTX to
8135 assign to the register. Otherwise return NULL_RTX. */
8136 static rtx
8137 aarch64_simd_dup_constant (rtx vals)
8139 enum machine_mode mode = GET_MODE (vals);
8140 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8141 int n_elts = GET_MODE_NUNITS (mode);
8142 bool all_same = true;
8143 rtx x;
8144 int i;
8146 if (GET_CODE (vals) != CONST_VECTOR)
8147 return NULL_RTX;
8149 for (i = 1; i < n_elts; ++i)
8151 x = CONST_VECTOR_ELT (vals, i);
8152 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8153 all_same = false;
8156 if (!all_same)
8157 return NULL_RTX;
8159 /* We can load this constant by using DUP and a constant in a
8160 single ARM register. This will be cheaper than a vector
8161 load. */
8162 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8163 return gen_rtx_VEC_DUPLICATE (mode, x);
8167 /* Generate code to load VALS, which is a PARALLEL containing only
8168 constants (for vec_init) or CONST_VECTOR, efficiently into a
8169 register. Returns an RTX to copy into the register, or NULL_RTX
8170 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8171 static rtx
8172 aarch64_simd_make_constant (rtx vals)
8174 enum machine_mode mode = GET_MODE (vals);
8175 rtx const_dup;
8176 rtx const_vec = NULL_RTX;
8177 int n_elts = GET_MODE_NUNITS (mode);
8178 int n_const = 0;
8179 int i;
8181 if (GET_CODE (vals) == CONST_VECTOR)
8182 const_vec = vals;
8183 else if (GET_CODE (vals) == PARALLEL)
8185 /* A CONST_VECTOR must contain only CONST_INTs and
8186 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8187 Only store valid constants in a CONST_VECTOR. */
8188 for (i = 0; i < n_elts; ++i)
8190 rtx x = XVECEXP (vals, 0, i);
8191 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8192 n_const++;
8194 if (n_const == n_elts)
8195 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8197 else
8198 gcc_unreachable ();
8200 if (const_vec != NULL_RTX
8201 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8202 /* Load using MOVI/MVNI. */
8203 return const_vec;
8204 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8205 /* Loaded using DUP. */
8206 return const_dup;
8207 else if (const_vec != NULL_RTX)
8208 /* Load from constant pool. We can not take advantage of single-cycle
8209 LD1 because we need a PC-relative addressing mode. */
8210 return const_vec;
8211 else
8212 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8213 We can not construct an initializer. */
8214 return NULL_RTX;
8217 void
8218 aarch64_expand_vector_init (rtx target, rtx vals)
8220 enum machine_mode mode = GET_MODE (target);
8221 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8222 int n_elts = GET_MODE_NUNITS (mode);
8223 int n_var = 0, one_var = -1;
8224 bool all_same = true;
8225 rtx x, mem;
8226 int i;
8228 x = XVECEXP (vals, 0, 0);
8229 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8230 n_var = 1, one_var = 0;
8232 for (i = 1; i < n_elts; ++i)
8234 x = XVECEXP (vals, 0, i);
8235 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8236 ++n_var, one_var = i;
8238 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8239 all_same = false;
8242 if (n_var == 0)
8244 rtx constant = aarch64_simd_make_constant (vals);
8245 if (constant != NULL_RTX)
8247 emit_move_insn (target, constant);
8248 return;
8252 /* Splat a single non-constant element if we can. */
8253 if (all_same)
8255 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8256 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8257 return;
8260 /* One field is non-constant. Load constant then overwrite varying
8261 field. This is more efficient than using the stack. */
8262 if (n_var == 1)
8264 rtx copy = copy_rtx (vals);
8265 rtx index = GEN_INT (one_var);
8266 enum insn_code icode;
8268 /* Load constant part of vector, substitute neighboring value for
8269 varying element. */
8270 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8271 aarch64_expand_vector_init (target, copy);
8273 /* Insert variable. */
8274 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8275 icode = optab_handler (vec_set_optab, mode);
8276 gcc_assert (icode != CODE_FOR_nothing);
8277 emit_insn (GEN_FCN (icode) (target, x, index));
8278 return;
8281 /* Construct the vector in memory one field at a time
8282 and load the whole vector. */
8283 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8284 for (i = 0; i < n_elts; i++)
8285 emit_move_insn (adjust_address_nv (mem, inner_mode,
8286 i * GET_MODE_SIZE (inner_mode)),
8287 XVECEXP (vals, 0, i));
8288 emit_move_insn (target, mem);
8292 static unsigned HOST_WIDE_INT
8293 aarch64_shift_truncation_mask (enum machine_mode mode)
8295 return
8296 (aarch64_vector_mode_supported_p (mode)
8297 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8300 #ifndef TLS_SECTION_ASM_FLAG
8301 #define TLS_SECTION_ASM_FLAG 'T'
8302 #endif
8304 void
8305 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8306 tree decl ATTRIBUTE_UNUSED)
8308 char flagchars[10], *f = flagchars;
8310 /* If we have already declared this section, we can use an
8311 abbreviated form to switch back to it -- unless this section is
8312 part of a COMDAT groups, in which case GAS requires the full
8313 declaration every time. */
8314 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8315 && (flags & SECTION_DECLARED))
8317 fprintf (asm_out_file, "\t.section\t%s\n", name);
8318 return;
8321 if (!(flags & SECTION_DEBUG))
8322 *f++ = 'a';
8323 if (flags & SECTION_WRITE)
8324 *f++ = 'w';
8325 if (flags & SECTION_CODE)
8326 *f++ = 'x';
8327 if (flags & SECTION_SMALL)
8328 *f++ = 's';
8329 if (flags & SECTION_MERGE)
8330 *f++ = 'M';
8331 if (flags & SECTION_STRINGS)
8332 *f++ = 'S';
8333 if (flags & SECTION_TLS)
8334 *f++ = TLS_SECTION_ASM_FLAG;
8335 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8336 *f++ = 'G';
8337 *f = '\0';
8339 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8341 if (!(flags & SECTION_NOTYPE))
8343 const char *type;
8344 const char *format;
8346 if (flags & SECTION_BSS)
8347 type = "nobits";
8348 else
8349 type = "progbits";
8351 #ifdef TYPE_OPERAND_FMT
8352 format = "," TYPE_OPERAND_FMT;
8353 #else
8354 format = ",@%s";
8355 #endif
8357 fprintf (asm_out_file, format, type);
8359 if (flags & SECTION_ENTSIZE)
8360 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8361 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8363 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8364 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8365 else
8366 fprintf (asm_out_file, ",%s,comdat",
8367 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8371 putc ('\n', asm_out_file);
8374 /* Select a format to encode pointers in exception handling data. */
8376 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8378 int type;
8379 switch (aarch64_cmodel)
8381 case AARCH64_CMODEL_TINY:
8382 case AARCH64_CMODEL_TINY_PIC:
8383 case AARCH64_CMODEL_SMALL:
8384 case AARCH64_CMODEL_SMALL_PIC:
8385 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8386 for everything. */
8387 type = DW_EH_PE_sdata4;
8388 break;
8389 default:
8390 /* No assumptions here. 8-byte relocs required. */
8391 type = DW_EH_PE_sdata8;
8392 break;
8394 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8397 /* Emit load exclusive. */
8399 static void
8400 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8401 rtx mem, rtx model_rtx)
8403 rtx (*gen) (rtx, rtx, rtx);
8405 switch (mode)
8407 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8408 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8409 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8410 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8411 default:
8412 gcc_unreachable ();
8415 emit_insn (gen (rval, mem, model_rtx));
8418 /* Emit store exclusive. */
8420 static void
8421 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8422 rtx rval, rtx mem, rtx model_rtx)
8424 rtx (*gen) (rtx, rtx, rtx, rtx);
8426 switch (mode)
8428 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8429 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8430 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8431 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8432 default:
8433 gcc_unreachable ();
8436 emit_insn (gen (bval, rval, mem, model_rtx));
8439 /* Mark the previous jump instruction as unlikely. */
8441 static void
8442 aarch64_emit_unlikely_jump (rtx insn)
8444 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8446 insn = emit_jump_insn (insn);
8447 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8450 /* Expand a compare and swap pattern. */
8452 void
8453 aarch64_expand_compare_and_swap (rtx operands[])
8455 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8456 enum machine_mode mode, cmp_mode;
8457 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8459 bval = operands[0];
8460 rval = operands[1];
8461 mem = operands[2];
8462 oldval = operands[3];
8463 newval = operands[4];
8464 is_weak = operands[5];
8465 mod_s = operands[6];
8466 mod_f = operands[7];
8467 mode = GET_MODE (mem);
8468 cmp_mode = mode;
8470 /* Normally the succ memory model must be stronger than fail, but in the
8471 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8472 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8474 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8475 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8476 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8478 switch (mode)
8480 case QImode:
8481 case HImode:
8482 /* For short modes, we're going to perform the comparison in SImode,
8483 so do the zero-extension now. */
8484 cmp_mode = SImode;
8485 rval = gen_reg_rtx (SImode);
8486 oldval = convert_modes (SImode, mode, oldval, true);
8487 /* Fall through. */
8489 case SImode:
8490 case DImode:
8491 /* Force the value into a register if needed. */
8492 if (!aarch64_plus_operand (oldval, mode))
8493 oldval = force_reg (cmp_mode, oldval);
8494 break;
8496 default:
8497 gcc_unreachable ();
8500 switch (mode)
8502 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8503 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8504 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8505 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8506 default:
8507 gcc_unreachable ();
8510 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8512 if (mode == QImode || mode == HImode)
8513 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8515 x = gen_rtx_REG (CCmode, CC_REGNUM);
8516 x = gen_rtx_EQ (SImode, x, const0_rtx);
8517 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8520 /* Split a compare and swap pattern. */
8522 void
8523 aarch64_split_compare_and_swap (rtx operands[])
8525 rtx rval, mem, oldval, newval, scratch;
8526 enum machine_mode mode;
8527 bool is_weak;
8528 rtx label1, label2, x, cond;
8530 rval = operands[0];
8531 mem = operands[1];
8532 oldval = operands[2];
8533 newval = operands[3];
8534 is_weak = (operands[4] != const0_rtx);
8535 scratch = operands[7];
8536 mode = GET_MODE (mem);
8538 label1 = NULL_RTX;
8539 if (!is_weak)
8541 label1 = gen_label_rtx ();
8542 emit_label (label1);
8544 label2 = gen_label_rtx ();
8546 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8548 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8549 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8550 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8551 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8552 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8554 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8556 if (!is_weak)
8558 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8559 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8560 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8561 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8563 else
8565 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8566 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8567 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8570 emit_label (label2);
8573 /* Split an atomic operation. */
8575 void
8576 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8577 rtx value, rtx model_rtx, rtx cond)
8579 enum machine_mode mode = GET_MODE (mem);
8580 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8581 rtx label, x;
8583 label = gen_label_rtx ();
8584 emit_label (label);
8586 if (new_out)
8587 new_out = gen_lowpart (wmode, new_out);
8588 if (old_out)
8589 old_out = gen_lowpart (wmode, old_out);
8590 else
8591 old_out = new_out;
8592 value = simplify_gen_subreg (wmode, value, mode, 0);
8594 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8596 switch (code)
8598 case SET:
8599 new_out = value;
8600 break;
8602 case NOT:
8603 x = gen_rtx_AND (wmode, old_out, value);
8604 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8605 x = gen_rtx_NOT (wmode, new_out);
8606 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8607 break;
8609 case MINUS:
8610 if (CONST_INT_P (value))
8612 value = GEN_INT (-INTVAL (value));
8613 code = PLUS;
8615 /* Fall through. */
8617 default:
8618 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8619 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8620 break;
8623 aarch64_emit_store_exclusive (mode, cond, mem,
8624 gen_lowpart (mode, new_out), model_rtx);
8626 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8627 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8628 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8629 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8632 static void
8633 aarch64_print_extension (void)
8635 const struct aarch64_option_extension *opt = NULL;
8637 for (opt = all_extensions; opt->name != NULL; opt++)
8638 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8639 asm_fprintf (asm_out_file, "+%s", opt->name);
8641 asm_fprintf (asm_out_file, "\n");
8644 static void
8645 aarch64_start_file (void)
8647 if (selected_arch)
8649 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8650 aarch64_print_extension ();
8652 else if (selected_cpu)
8654 const char *truncated_name
8655 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8656 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8657 aarch64_print_extension ();
8659 default_file_start();
8662 /* Target hook for c_mode_for_suffix. */
8663 static enum machine_mode
8664 aarch64_c_mode_for_suffix (char suffix)
8666 if (suffix == 'q')
8667 return TFmode;
8669 return VOIDmode;
8672 /* We can only represent floating point constants which will fit in
8673 "quarter-precision" values. These values are characterised by
8674 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8677 (-1)^s * (n/16) * 2^r
8679 Where:
8680 's' is the sign bit.
8681 'n' is an integer in the range 16 <= n <= 31.
8682 'r' is an integer in the range -3 <= r <= 4. */
8684 /* Return true iff X can be represented by a quarter-precision
8685 floating point immediate operand X. Note, we cannot represent 0.0. */
8686 bool
8687 aarch64_float_const_representable_p (rtx x)
8689 /* This represents our current view of how many bits
8690 make up the mantissa. */
8691 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8692 int exponent;
8693 unsigned HOST_WIDE_INT mantissa, mask;
8694 REAL_VALUE_TYPE r, m;
8695 bool fail;
8697 if (!CONST_DOUBLE_P (x))
8698 return false;
8700 if (GET_MODE (x) == VOIDmode)
8701 return false;
8703 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8705 /* We cannot represent infinities, NaNs or +/-zero. We won't
8706 know if we have +zero until we analyse the mantissa, but we
8707 can reject the other invalid values. */
8708 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8709 || REAL_VALUE_MINUS_ZERO (r))
8710 return false;
8712 /* Extract exponent. */
8713 r = real_value_abs (&r);
8714 exponent = REAL_EXP (&r);
8716 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8717 highest (sign) bit, with a fixed binary point at bit point_pos.
8718 m1 holds the low part of the mantissa, m2 the high part.
8719 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8720 bits for the mantissa, this can fail (low bits will be lost). */
8721 real_ldexp (&m, &r, point_pos - exponent);
8722 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
8724 /* If the low part of the mantissa has bits set we cannot represent
8725 the value. */
8726 if (w.elt (0) != 0)
8727 return false;
8728 /* We have rejected the lower HOST_WIDE_INT, so update our
8729 understanding of how many bits lie in the mantissa and
8730 look only at the high HOST_WIDE_INT. */
8731 mantissa = w.elt (1);
8732 point_pos -= HOST_BITS_PER_WIDE_INT;
8734 /* We can only represent values with a mantissa of the form 1.xxxx. */
8735 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8736 if ((mantissa & mask) != 0)
8737 return false;
8739 /* Having filtered unrepresentable values, we may now remove all
8740 but the highest 5 bits. */
8741 mantissa >>= point_pos - 5;
8743 /* We cannot represent the value 0.0, so reject it. This is handled
8744 elsewhere. */
8745 if (mantissa == 0)
8746 return false;
8748 /* Then, as bit 4 is always set, we can mask it off, leaving
8749 the mantissa in the range [0, 15]. */
8750 mantissa &= ~(1 << 4);
8751 gcc_assert (mantissa <= 15);
8753 /* GCC internally does not use IEEE754-like encoding (where normalized
8754 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8755 Our mantissa values are shifted 4 places to the left relative to
8756 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8757 by 5 places to correct for GCC's representation. */
8758 exponent = 5 - exponent;
8760 return (exponent >= 0 && exponent <= 7);
8763 char*
8764 aarch64_output_simd_mov_immediate (rtx const_vector,
8765 enum machine_mode mode,
8766 unsigned width)
8768 bool is_valid;
8769 static char templ[40];
8770 const char *mnemonic;
8771 const char *shift_op;
8772 unsigned int lane_count = 0;
8773 char element_char;
8775 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8777 /* This will return true to show const_vector is legal for use as either
8778 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8779 also update INFO to show how the immediate should be generated. */
8780 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8781 gcc_assert (is_valid);
8783 element_char = sizetochar (info.element_width);
8784 lane_count = width / info.element_width;
8786 mode = GET_MODE_INNER (mode);
8787 if (mode == SFmode || mode == DFmode)
8789 gcc_assert (info.shift == 0 && ! info.mvn);
8790 if (aarch64_float_const_zero_rtx_p (info.value))
8791 info.value = GEN_INT (0);
8792 else
8794 #define buf_size 20
8795 REAL_VALUE_TYPE r;
8796 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8797 char float_buf[buf_size] = {'\0'};
8798 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8799 #undef buf_size
8801 if (lane_count == 1)
8802 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8803 else
8804 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8805 lane_count, element_char, float_buf);
8806 return templ;
8810 mnemonic = info.mvn ? "mvni" : "movi";
8811 shift_op = info.msl ? "msl" : "lsl";
8813 if (lane_count == 1)
8814 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8815 mnemonic, UINTVAL (info.value));
8816 else if (info.shift)
8817 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8818 ", %s %d", mnemonic, lane_count, element_char,
8819 UINTVAL (info.value), shift_op, info.shift);
8820 else
8821 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8822 mnemonic, lane_count, element_char, UINTVAL (info.value));
8823 return templ;
8826 char*
8827 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8828 enum machine_mode mode)
8830 enum machine_mode vmode;
8832 gcc_assert (!VECTOR_MODE_P (mode));
8833 vmode = aarch64_simd_container_mode (mode, 64);
8834 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8835 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8838 /* Split operands into moves from op[1] + op[2] into op[0]. */
8840 void
8841 aarch64_split_combinev16qi (rtx operands[3])
8843 unsigned int dest = REGNO (operands[0]);
8844 unsigned int src1 = REGNO (operands[1]);
8845 unsigned int src2 = REGNO (operands[2]);
8846 enum machine_mode halfmode = GET_MODE (operands[1]);
8847 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8848 rtx destlo, desthi;
8850 gcc_assert (halfmode == V16QImode);
8852 if (src1 == dest && src2 == dest + halfregs)
8854 /* No-op move. Can't split to nothing; emit something. */
8855 emit_note (NOTE_INSN_DELETED);
8856 return;
8859 /* Preserve register attributes for variable tracking. */
8860 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8861 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8862 GET_MODE_SIZE (halfmode));
8864 /* Special case of reversed high/low parts. */
8865 if (reg_overlap_mentioned_p (operands[2], destlo)
8866 && reg_overlap_mentioned_p (operands[1], desthi))
8868 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8869 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8870 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8872 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8874 /* Try to avoid unnecessary moves if part of the result
8875 is in the right place already. */
8876 if (src1 != dest)
8877 emit_move_insn (destlo, operands[1]);
8878 if (src2 != dest + halfregs)
8879 emit_move_insn (desthi, operands[2]);
8881 else
8883 if (src2 != dest + halfregs)
8884 emit_move_insn (desthi, operands[2]);
8885 if (src1 != dest)
8886 emit_move_insn (destlo, operands[1]);
8890 /* vec_perm support. */
8892 #define MAX_VECT_LEN 16
8894 struct expand_vec_perm_d
8896 rtx target, op0, op1;
8897 unsigned char perm[MAX_VECT_LEN];
8898 enum machine_mode vmode;
8899 unsigned char nelt;
8900 bool one_vector_p;
8901 bool testing_p;
8904 /* Generate a variable permutation. */
8906 static void
8907 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8909 enum machine_mode vmode = GET_MODE (target);
8910 bool one_vector_p = rtx_equal_p (op0, op1);
8912 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8913 gcc_checking_assert (GET_MODE (op0) == vmode);
8914 gcc_checking_assert (GET_MODE (op1) == vmode);
8915 gcc_checking_assert (GET_MODE (sel) == vmode);
8916 gcc_checking_assert (TARGET_SIMD);
8918 if (one_vector_p)
8920 if (vmode == V8QImode)
8922 /* Expand the argument to a V16QI mode by duplicating it. */
8923 rtx pair = gen_reg_rtx (V16QImode);
8924 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8925 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8927 else
8929 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8932 else
8934 rtx pair;
8936 if (vmode == V8QImode)
8938 pair = gen_reg_rtx (V16QImode);
8939 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8940 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8942 else
8944 pair = gen_reg_rtx (OImode);
8945 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8946 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8951 void
8952 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8954 enum machine_mode vmode = GET_MODE (target);
8955 unsigned int nelt = GET_MODE_NUNITS (vmode);
8956 bool one_vector_p = rtx_equal_p (op0, op1);
8957 rtx mask;
8959 /* The TBL instruction does not use a modulo index, so we must take care
8960 of that ourselves. */
8961 mask = aarch64_simd_gen_const_vector_dup (vmode,
8962 one_vector_p ? nelt - 1 : 2 * nelt - 1);
8963 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8965 /* For big-endian, we also need to reverse the index within the vector
8966 (but not which vector). */
8967 if (BYTES_BIG_ENDIAN)
8969 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
8970 if (!one_vector_p)
8971 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
8972 sel = expand_simple_binop (vmode, XOR, sel, mask,
8973 NULL, 0, OPTAB_LIB_WIDEN);
8975 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8978 /* Recognize patterns suitable for the TRN instructions. */
8979 static bool
8980 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8982 unsigned int i, odd, mask, nelt = d->nelt;
8983 rtx out, in0, in1, x;
8984 rtx (*gen) (rtx, rtx, rtx);
8985 enum machine_mode vmode = d->vmode;
8987 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8988 return false;
8990 /* Note that these are little-endian tests.
8991 We correct for big-endian later. */
8992 if (d->perm[0] == 0)
8993 odd = 0;
8994 else if (d->perm[0] == 1)
8995 odd = 1;
8996 else
8997 return false;
8998 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9000 for (i = 0; i < nelt; i += 2)
9002 if (d->perm[i] != i + odd)
9003 return false;
9004 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
9005 return false;
9008 /* Success! */
9009 if (d->testing_p)
9010 return true;
9012 in0 = d->op0;
9013 in1 = d->op1;
9014 if (BYTES_BIG_ENDIAN)
9016 x = in0, in0 = in1, in1 = x;
9017 odd = !odd;
9019 out = d->target;
9021 if (odd)
9023 switch (vmode)
9025 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
9026 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
9027 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
9028 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
9029 case V4SImode: gen = gen_aarch64_trn2v4si; break;
9030 case V2SImode: gen = gen_aarch64_trn2v2si; break;
9031 case V2DImode: gen = gen_aarch64_trn2v2di; break;
9032 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
9033 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
9034 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
9035 default:
9036 return false;
9039 else
9041 switch (vmode)
9043 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
9044 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
9045 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
9046 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
9047 case V4SImode: gen = gen_aarch64_trn1v4si; break;
9048 case V2SImode: gen = gen_aarch64_trn1v2si; break;
9049 case V2DImode: gen = gen_aarch64_trn1v2di; break;
9050 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
9051 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
9052 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
9053 default:
9054 return false;
9058 emit_insn (gen (out, in0, in1));
9059 return true;
9062 /* Recognize patterns suitable for the UZP instructions. */
9063 static bool
9064 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
9066 unsigned int i, odd, mask, nelt = d->nelt;
9067 rtx out, in0, in1, x;
9068 rtx (*gen) (rtx, rtx, rtx);
9069 enum machine_mode vmode = d->vmode;
9071 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9072 return false;
9074 /* Note that these are little-endian tests.
9075 We correct for big-endian later. */
9076 if (d->perm[0] == 0)
9077 odd = 0;
9078 else if (d->perm[0] == 1)
9079 odd = 1;
9080 else
9081 return false;
9082 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9084 for (i = 0; i < nelt; i++)
9086 unsigned elt = (i * 2 + odd) & mask;
9087 if (d->perm[i] != elt)
9088 return false;
9091 /* Success! */
9092 if (d->testing_p)
9093 return true;
9095 in0 = d->op0;
9096 in1 = d->op1;
9097 if (BYTES_BIG_ENDIAN)
9099 x = in0, in0 = in1, in1 = x;
9100 odd = !odd;
9102 out = d->target;
9104 if (odd)
9106 switch (vmode)
9108 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9109 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9110 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9111 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9112 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9113 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9114 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9115 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9116 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9117 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9118 default:
9119 return false;
9122 else
9124 switch (vmode)
9126 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9127 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9128 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9129 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9130 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9131 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9132 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9133 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9134 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9135 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9136 default:
9137 return false;
9141 emit_insn (gen (out, in0, in1));
9142 return true;
9145 /* Recognize patterns suitable for the ZIP instructions. */
9146 static bool
9147 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9149 unsigned int i, high, mask, nelt = d->nelt;
9150 rtx out, in0, in1, x;
9151 rtx (*gen) (rtx, rtx, rtx);
9152 enum machine_mode vmode = d->vmode;
9154 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9155 return false;
9157 /* Note that these are little-endian tests.
9158 We correct for big-endian later. */
9159 high = nelt / 2;
9160 if (d->perm[0] == high)
9161 /* Do Nothing. */
9163 else if (d->perm[0] == 0)
9164 high = 0;
9165 else
9166 return false;
9167 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9169 for (i = 0; i < nelt / 2; i++)
9171 unsigned elt = (i + high) & mask;
9172 if (d->perm[i * 2] != elt)
9173 return false;
9174 elt = (elt + nelt) & mask;
9175 if (d->perm[i * 2 + 1] != elt)
9176 return false;
9179 /* Success! */
9180 if (d->testing_p)
9181 return true;
9183 in0 = d->op0;
9184 in1 = d->op1;
9185 if (BYTES_BIG_ENDIAN)
9187 x = in0, in0 = in1, in1 = x;
9188 high = !high;
9190 out = d->target;
9192 if (high)
9194 switch (vmode)
9196 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9197 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9198 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9199 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9200 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9201 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9202 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9203 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9204 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9205 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9206 default:
9207 return false;
9210 else
9212 switch (vmode)
9214 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9215 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9216 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9217 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9218 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9219 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9220 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9221 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9222 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9223 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9224 default:
9225 return false;
9229 emit_insn (gen (out, in0, in1));
9230 return true;
9233 /* Recognize patterns for the EXT insn. */
9235 static bool
9236 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9238 unsigned int i, nelt = d->nelt;
9239 rtx (*gen) (rtx, rtx, rtx, rtx);
9240 rtx offset;
9242 unsigned int location = d->perm[0]; /* Always < nelt. */
9244 /* Check if the extracted indices are increasing by one. */
9245 for (i = 1; i < nelt; i++)
9247 unsigned int required = location + i;
9248 if (d->one_vector_p)
9250 /* We'll pass the same vector in twice, so allow indices to wrap. */
9251 required &= (nelt - 1);
9253 if (d->perm[i] != required)
9254 return false;
9257 switch (d->vmode)
9259 case V16QImode: gen = gen_aarch64_extv16qi; break;
9260 case V8QImode: gen = gen_aarch64_extv8qi; break;
9261 case V4HImode: gen = gen_aarch64_extv4hi; break;
9262 case V8HImode: gen = gen_aarch64_extv8hi; break;
9263 case V2SImode: gen = gen_aarch64_extv2si; break;
9264 case V4SImode: gen = gen_aarch64_extv4si; break;
9265 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9266 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9267 case V2DImode: gen = gen_aarch64_extv2di; break;
9268 case V2DFmode: gen = gen_aarch64_extv2df; break;
9269 default:
9270 return false;
9273 /* Success! */
9274 if (d->testing_p)
9275 return true;
9277 /* The case where (location == 0) is a no-op for both big- and little-endian,
9278 and is removed by the mid-end at optimization levels -O1 and higher. */
9280 if (BYTES_BIG_ENDIAN && (location != 0))
9282 /* After setup, we want the high elements of the first vector (stored
9283 at the LSB end of the register), and the low elements of the second
9284 vector (stored at the MSB end of the register). So swap. */
9285 rtx temp = d->op0;
9286 d->op0 = d->op1;
9287 d->op1 = temp;
9288 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9289 location = nelt - location;
9292 offset = GEN_INT (location);
9293 emit_insn (gen (d->target, d->op0, d->op1, offset));
9294 return true;
9297 /* Recognize patterns for the REV insns. */
9299 static bool
9300 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9302 unsigned int i, j, diff, nelt = d->nelt;
9303 rtx (*gen) (rtx, rtx);
9305 if (!d->one_vector_p)
9306 return false;
9308 diff = d->perm[0];
9309 switch (diff)
9311 case 7:
9312 switch (d->vmode)
9314 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9315 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9316 default:
9317 return false;
9319 break;
9320 case 3:
9321 switch (d->vmode)
9323 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9324 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9325 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9326 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9327 default:
9328 return false;
9330 break;
9331 case 1:
9332 switch (d->vmode)
9334 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9335 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9336 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9337 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9338 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9339 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9340 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9341 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9342 default:
9343 return false;
9345 break;
9346 default:
9347 return false;
9350 for (i = 0; i < nelt ; i += diff + 1)
9351 for (j = 0; j <= diff; j += 1)
9353 /* This is guaranteed to be true as the value of diff
9354 is 7, 3, 1 and we should have enough elements in the
9355 queue to generate this. Getting a vector mask with a
9356 value of diff other than these values implies that
9357 something is wrong by the time we get here. */
9358 gcc_assert (i + j < nelt);
9359 if (d->perm[i + j] != i + diff - j)
9360 return false;
9363 /* Success! */
9364 if (d->testing_p)
9365 return true;
9367 emit_insn (gen (d->target, d->op0));
9368 return true;
9371 static bool
9372 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9374 rtx (*gen) (rtx, rtx, rtx);
9375 rtx out = d->target;
9376 rtx in0;
9377 enum machine_mode vmode = d->vmode;
9378 unsigned int i, elt, nelt = d->nelt;
9379 rtx lane;
9381 elt = d->perm[0];
9382 for (i = 1; i < nelt; i++)
9384 if (elt != d->perm[i])
9385 return false;
9388 /* The generic preparation in aarch64_expand_vec_perm_const_1
9389 swaps the operand order and the permute indices if it finds
9390 d->perm[0] to be in the second operand. Thus, we can always
9391 use d->op0 and need not do any extra arithmetic to get the
9392 correct lane number. */
9393 in0 = d->op0;
9394 lane = GEN_INT (elt); /* The pattern corrects for big-endian. */
9396 switch (vmode)
9398 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9399 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9400 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9401 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9402 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9403 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9404 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9405 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9406 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9407 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9408 default:
9409 return false;
9412 emit_insn (gen (out, in0, lane));
9413 return true;
9416 static bool
9417 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9419 rtx rperm[MAX_VECT_LEN], sel;
9420 enum machine_mode vmode = d->vmode;
9421 unsigned int i, nelt = d->nelt;
9423 if (d->testing_p)
9424 return true;
9426 /* Generic code will try constant permutation twice. Once with the
9427 original mode and again with the elements lowered to QImode.
9428 So wait and don't do the selector expansion ourselves. */
9429 if (vmode != V8QImode && vmode != V16QImode)
9430 return false;
9432 for (i = 0; i < nelt; ++i)
9434 int nunits = GET_MODE_NUNITS (vmode);
9436 /* If big-endian and two vectors we end up with a weird mixed-endian
9437 mode on NEON. Reverse the index within each word but not the word
9438 itself. */
9439 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9440 : d->perm[i]);
9442 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9443 sel = force_reg (vmode, sel);
9445 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9446 return true;
9449 static bool
9450 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9452 /* The pattern matching functions above are written to look for a small
9453 number to begin the sequence (0, 1, N/2). If we begin with an index
9454 from the second operand, we can swap the operands. */
9455 if (d->perm[0] >= d->nelt)
9457 unsigned i, nelt = d->nelt;
9458 rtx x;
9460 gcc_assert (nelt == (nelt & -nelt));
9461 for (i = 0; i < nelt; ++i)
9462 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
9464 x = d->op0;
9465 d->op0 = d->op1;
9466 d->op1 = x;
9469 if (TARGET_SIMD)
9471 if (aarch64_evpc_rev (d))
9472 return true;
9473 else if (aarch64_evpc_ext (d))
9474 return true;
9475 else if (aarch64_evpc_dup (d))
9476 return true;
9477 else if (aarch64_evpc_zip (d))
9478 return true;
9479 else if (aarch64_evpc_uzp (d))
9480 return true;
9481 else if (aarch64_evpc_trn (d))
9482 return true;
9483 return aarch64_evpc_tbl (d);
9485 return false;
9488 /* Expand a vec_perm_const pattern. */
9490 bool
9491 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9493 struct expand_vec_perm_d d;
9494 int i, nelt, which;
9496 d.target = target;
9497 d.op0 = op0;
9498 d.op1 = op1;
9500 d.vmode = GET_MODE (target);
9501 gcc_assert (VECTOR_MODE_P (d.vmode));
9502 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9503 d.testing_p = false;
9505 for (i = which = 0; i < nelt; ++i)
9507 rtx e = XVECEXP (sel, 0, i);
9508 int ei = INTVAL (e) & (2 * nelt - 1);
9509 which |= (ei < nelt ? 1 : 2);
9510 d.perm[i] = ei;
9513 switch (which)
9515 default:
9516 gcc_unreachable ();
9518 case 3:
9519 d.one_vector_p = false;
9520 if (!rtx_equal_p (op0, op1))
9521 break;
9523 /* The elements of PERM do not suggest that only the first operand
9524 is used, but both operands are identical. Allow easier matching
9525 of the permutation by folding the permutation into the single
9526 input vector. */
9527 /* Fall Through. */
9528 case 2:
9529 for (i = 0; i < nelt; ++i)
9530 d.perm[i] &= nelt - 1;
9531 d.op0 = op1;
9532 d.one_vector_p = true;
9533 break;
9535 case 1:
9536 d.op1 = op0;
9537 d.one_vector_p = true;
9538 break;
9541 return aarch64_expand_vec_perm_const_1 (&d);
9544 static bool
9545 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9546 const unsigned char *sel)
9548 struct expand_vec_perm_d d;
9549 unsigned int i, nelt, which;
9550 bool ret;
9552 d.vmode = vmode;
9553 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9554 d.testing_p = true;
9555 memcpy (d.perm, sel, nelt);
9557 /* Calculate whether all elements are in one vector. */
9558 for (i = which = 0; i < nelt; ++i)
9560 unsigned char e = d.perm[i];
9561 gcc_assert (e < 2 * nelt);
9562 which |= (e < nelt ? 1 : 2);
9565 /* If all elements are from the second vector, reindex as if from the
9566 first vector. */
9567 if (which == 2)
9568 for (i = 0; i < nelt; ++i)
9569 d.perm[i] -= nelt;
9571 /* Check whether the mask can be applied to a single vector. */
9572 d.one_vector_p = (which != 3);
9574 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9575 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9576 if (!d.one_vector_p)
9577 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9579 start_sequence ();
9580 ret = aarch64_expand_vec_perm_const_1 (&d);
9581 end_sequence ();
9583 return ret;
9586 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9587 bool
9588 aarch64_cannot_change_mode_class (enum machine_mode from,
9589 enum machine_mode to,
9590 enum reg_class rclass)
9592 /* Full-reg subregs are allowed on general regs or any class if they are
9593 the same size. */
9594 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9595 || !reg_classes_intersect_p (FP_REGS, rclass))
9596 return false;
9598 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9599 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9600 2. Scalar to Scalar for integer modes or same size float modes.
9601 3. Vector to Vector modes.
9602 4. On little-endian only, Vector-Structure to Vector modes. */
9603 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9605 if (aarch64_vector_mode_supported_p (from)
9606 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9607 return false;
9609 if (GET_MODE_NUNITS (from) == 1
9610 && GET_MODE_NUNITS (to) == 1
9611 && (GET_MODE_CLASS (from) == MODE_INT
9612 || from == to))
9613 return false;
9615 if (aarch64_vector_mode_supported_p (from)
9616 && aarch64_vector_mode_supported_p (to))
9617 return false;
9619 /* Within an vector structure straddling multiple vector registers
9620 we are in a mixed-endian representation. As such, we can't
9621 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9622 switch between vectors and vector structures cheaply. */
9623 if (!BYTES_BIG_ENDIAN)
9624 if ((aarch64_vector_mode_supported_p (from)
9625 && aarch64_vect_struct_mode_p (to))
9626 || (aarch64_vector_mode_supported_p (to)
9627 && aarch64_vect_struct_mode_p (from)))
9628 return false;
9631 return true;
9634 /* Implement MODES_TIEABLE_P. */
9636 bool
9637 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9639 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9640 return true;
9642 /* We specifically want to allow elements of "structure" modes to
9643 be tieable to the structure. This more general condition allows
9644 other rarer situations too. */
9645 if (TARGET_SIMD
9646 && aarch64_vector_mode_p (mode1)
9647 && aarch64_vector_mode_p (mode2))
9648 return true;
9650 return false;
9653 /* Return a new RTX holding the result of moving POINTER forward by
9654 AMOUNT bytes. */
9656 static rtx
9657 aarch64_move_pointer (rtx pointer, int amount)
9659 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9661 return adjust_automodify_address (pointer, GET_MODE (pointer),
9662 next, amount);
9665 /* Return a new RTX holding the result of moving POINTER forward by the
9666 size of the mode it points to. */
9668 static rtx
9669 aarch64_progress_pointer (rtx pointer)
9671 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9673 return aarch64_move_pointer (pointer, amount);
9676 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9677 MODE bytes. */
9679 static void
9680 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9681 enum machine_mode mode)
9683 rtx reg = gen_reg_rtx (mode);
9685 /* "Cast" the pointers to the correct mode. */
9686 *src = adjust_address (*src, mode, 0);
9687 *dst = adjust_address (*dst, mode, 0);
9688 /* Emit the memcpy. */
9689 emit_move_insn (reg, *src);
9690 emit_move_insn (*dst, reg);
9691 /* Move the pointers forward. */
9692 *src = aarch64_progress_pointer (*src);
9693 *dst = aarch64_progress_pointer (*dst);
9696 /* Expand movmem, as if from a __builtin_memcpy. Return true if
9697 we succeed, otherwise return false. */
9699 bool
9700 aarch64_expand_movmem (rtx *operands)
9702 unsigned int n;
9703 rtx dst = operands[0];
9704 rtx src = operands[1];
9705 rtx base;
9706 bool speed_p = !optimize_function_for_size_p (cfun);
9708 /* When optimizing for size, give a better estimate of the length of a
9709 memcpy call, but use the default otherwise. */
9710 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9712 /* We can't do anything smart if the amount to copy is not constant. */
9713 if (!CONST_INT_P (operands[2]))
9714 return false;
9716 n = UINTVAL (operands[2]);
9718 /* Try to keep the number of instructions low. For cases below 16 bytes we
9719 need to make at most two moves. For cases above 16 bytes it will be one
9720 move for each 16 byte chunk, then at most two additional moves. */
9721 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9722 return false;
9724 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9725 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9727 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9728 src = adjust_automodify_address (src, VOIDmode, base, 0);
9730 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9731 1-byte chunk. */
9732 if (n < 4)
9734 if (n >= 2)
9736 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9737 n -= 2;
9740 if (n == 1)
9741 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9743 return true;
9746 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9747 4-byte chunk, partially overlapping with the previously copied chunk. */
9748 if (n < 8)
9750 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9751 n -= 4;
9752 if (n > 0)
9754 int move = n - 4;
9756 src = aarch64_move_pointer (src, move);
9757 dst = aarch64_move_pointer (dst, move);
9758 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9760 return true;
9763 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9764 them, then (if applicable) an 8-byte chunk. */
9765 while (n >= 8)
9767 if (n / 16)
9769 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9770 n -= 16;
9772 else
9774 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9775 n -= 8;
9779 /* Finish the final bytes of the copy. We can always do this in one
9780 instruction. We either copy the exact amount we need, or partially
9781 overlap with the previous chunk we copied and copy 8-bytes. */
9782 if (n == 0)
9783 return true;
9784 else if (n == 1)
9785 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9786 else if (n == 2)
9787 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9788 else if (n == 4)
9789 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9790 else
9792 if (n == 3)
9794 src = aarch64_move_pointer (src, -1);
9795 dst = aarch64_move_pointer (dst, -1);
9796 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9798 else
9800 int move = n - 8;
9802 src = aarch64_move_pointer (src, move);
9803 dst = aarch64_move_pointer (dst, move);
9804 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9808 return true;
9811 #undef TARGET_ADDRESS_COST
9812 #define TARGET_ADDRESS_COST aarch64_address_cost
9814 /* This hook will determines whether unnamed bitfields affect the alignment
9815 of the containing structure. The hook returns true if the structure
9816 should inherit the alignment requirements of an unnamed bitfield's
9817 type. */
9818 #undef TARGET_ALIGN_ANON_BITFIELD
9819 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9821 #undef TARGET_ASM_ALIGNED_DI_OP
9822 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9824 #undef TARGET_ASM_ALIGNED_HI_OP
9825 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9827 #undef TARGET_ASM_ALIGNED_SI_OP
9828 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9830 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9831 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9832 hook_bool_const_tree_hwi_hwi_const_tree_true
9834 #undef TARGET_ASM_FILE_START
9835 #define TARGET_ASM_FILE_START aarch64_start_file
9837 #undef TARGET_ASM_OUTPUT_MI_THUNK
9838 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9840 #undef TARGET_ASM_SELECT_RTX_SECTION
9841 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9843 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9844 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9846 #undef TARGET_BUILD_BUILTIN_VA_LIST
9847 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9849 #undef TARGET_CALLEE_COPIES
9850 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9852 #undef TARGET_CAN_ELIMINATE
9853 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9855 #undef TARGET_CANNOT_FORCE_CONST_MEM
9856 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9858 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9859 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9861 /* Only the least significant bit is used for initialization guard
9862 variables. */
9863 #undef TARGET_CXX_GUARD_MASK_BIT
9864 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9866 #undef TARGET_C_MODE_FOR_SUFFIX
9867 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9869 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9870 #undef TARGET_DEFAULT_TARGET_FLAGS
9871 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9872 #endif
9874 #undef TARGET_CLASS_MAX_NREGS
9875 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9877 #undef TARGET_BUILTIN_DECL
9878 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9880 #undef TARGET_EXPAND_BUILTIN
9881 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9883 #undef TARGET_EXPAND_BUILTIN_VA_START
9884 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9886 #undef TARGET_FOLD_BUILTIN
9887 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9889 #undef TARGET_FUNCTION_ARG
9890 #define TARGET_FUNCTION_ARG aarch64_function_arg
9892 #undef TARGET_FUNCTION_ARG_ADVANCE
9893 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9895 #undef TARGET_FUNCTION_ARG_BOUNDARY
9896 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9898 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9899 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9901 #undef TARGET_FUNCTION_VALUE
9902 #define TARGET_FUNCTION_VALUE aarch64_function_value
9904 #undef TARGET_FUNCTION_VALUE_REGNO_P
9905 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9907 #undef TARGET_FRAME_POINTER_REQUIRED
9908 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9910 #undef TARGET_GIMPLE_FOLD_BUILTIN
9911 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9913 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9914 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9916 #undef TARGET_INIT_BUILTINS
9917 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9919 #undef TARGET_LEGITIMATE_ADDRESS_P
9920 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9922 #undef TARGET_LEGITIMATE_CONSTANT_P
9923 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9925 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9926 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9928 #undef TARGET_LRA_P
9929 #define TARGET_LRA_P aarch64_lra_p
9931 #undef TARGET_MANGLE_TYPE
9932 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9934 #undef TARGET_MEMORY_MOVE_COST
9935 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9937 #undef TARGET_MUST_PASS_IN_STACK
9938 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9940 /* This target hook should return true if accesses to volatile bitfields
9941 should use the narrowest mode possible. It should return false if these
9942 accesses should use the bitfield container type. */
9943 #undef TARGET_NARROW_VOLATILE_BITFIELD
9944 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9946 #undef TARGET_OPTION_OVERRIDE
9947 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9949 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9950 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9951 aarch64_override_options_after_change
9953 #undef TARGET_PASS_BY_REFERENCE
9954 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9956 #undef TARGET_PREFERRED_RELOAD_CLASS
9957 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9959 #undef TARGET_SECONDARY_RELOAD
9960 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9962 #undef TARGET_SHIFT_TRUNCATION_MASK
9963 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9965 #undef TARGET_SETUP_INCOMING_VARARGS
9966 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9968 #undef TARGET_STRUCT_VALUE_RTX
9969 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9971 #undef TARGET_REGISTER_MOVE_COST
9972 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9974 #undef TARGET_RETURN_IN_MEMORY
9975 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9977 #undef TARGET_RETURN_IN_MSB
9978 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9980 #undef TARGET_RTX_COSTS
9981 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9983 #undef TARGET_SCHED_ISSUE_RATE
9984 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9986 #undef TARGET_TRAMPOLINE_INIT
9987 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9989 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9990 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9992 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9993 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9995 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9996 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9998 #undef TARGET_VECTORIZE_ADD_STMT_COST
9999 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
10001 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
10002 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
10003 aarch64_builtin_vectorization_cost
10005 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
10006 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
10008 #undef TARGET_VECTORIZE_BUILTINS
10009 #define TARGET_VECTORIZE_BUILTINS
10011 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
10012 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
10013 aarch64_builtin_vectorized_function
10015 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
10016 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
10017 aarch64_autovectorize_vector_sizes
10019 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
10020 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
10021 aarch64_atomic_assign_expand_fenv
10023 /* Section anchor support. */
10025 #undef TARGET_MIN_ANCHOR_OFFSET
10026 #define TARGET_MIN_ANCHOR_OFFSET -256
10028 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
10029 byte offset; we can do much more for larger data types, but have no way
10030 to determine the size of the access. We assume accesses are aligned. */
10031 #undef TARGET_MAX_ANCHOR_OFFSET
10032 #define TARGET_MAX_ANCHOR_OFFSET 4095
10034 #undef TARGET_VECTOR_ALIGNMENT
10035 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
10037 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
10038 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
10039 aarch64_simd_vector_alignment_reachable
10041 /* vec_perm support. */
10043 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
10044 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
10045 aarch64_vectorize_vec_perm_const_ok
10048 #undef TARGET_FIXED_CONDITION_CODE_REGS
10049 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
10051 #undef TARGET_FLAGS_REGNUM
10052 #define TARGET_FLAGS_REGNUM CC_REGNUM
10054 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
10055 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
10057 struct gcc_target targetm = TARGET_INITIALIZER;
10059 #include "gt-aarch64.h"