[AArch64 costs 10/18] Improve costs for sign/zero extend operations
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blob76ef7ce89bfde8c5c1875933b3792ec92e2fe4dc
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
66 #include "dumpfile.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
73 ADDRESS_REG_IMM
74 A simple base register plus immediate offset.
76 ADDRESS_REG_WB
77 A base register indexed by immediate offset with writeback.
79 ADDRESS_REG_REG
80 A base register indexed by (optionally scaled) register.
82 ADDRESS_REG_UXTW
83 A base register indexed by (optionally scaled) zero-extended register.
85 ADDRESS_REG_SXTW
86 A base register indexed by (optionally scaled) sign-extended register.
88 ADDRESS_LO_SUM
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
91 ADDRESS_SYMBOLIC:
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type {
95 ADDRESS_REG_IMM,
96 ADDRESS_REG_WB,
97 ADDRESS_REG_REG,
98 ADDRESS_REG_UXTW,
99 ADDRESS_REG_SXTW,
100 ADDRESS_LO_SUM,
101 ADDRESS_SYMBOLIC
104 struct aarch64_address_info {
105 enum aarch64_address_type type;
106 rtx base;
107 rtx offset;
108 int shift;
109 enum aarch64_symbol_type symbol_type;
112 struct simd_immediate_info
114 rtx value;
115 int shift;
116 int element_width;
117 bool mvn;
118 bool msl;
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel;
124 #ifdef HAVE_AS_TLS
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
127 #endif
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
132 const_tree,
133 enum machine_mode *, int *,
134 bool *);
135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
139 static unsigned bit_count (unsigned HOST_WIDE_INT);
140 static bool aarch64_const_vec_all_same_int_p (rtx,
141 HOST_WIDE_INT, HOST_WIDE_INT);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
144 const unsigned char *sel);
145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
147 /* The processor for which instructions should be scheduled. */
148 enum aarch64_processor aarch64_tune = cortexa53;
150 /* The current tuning set. */
151 const struct tune_params *aarch64_tune_params;
153 /* Mask to specify which instructions we are allowed to generate. */
154 unsigned long aarch64_isa_flags = 0;
156 /* Mask to specify which instruction scheduling options should be used. */
157 unsigned long aarch64_tune_flags = 0;
159 /* Tuning parameters. */
161 #if HAVE_DESIGNATED_INITIALIZERS
162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
163 #else
164 #define NAMED_PARAM(NAME, VAL) (VAL)
165 #endif
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
172 __extension__
173 #endif
174 static const struct cpu_addrcost_table generic_addrcost_table =
176 #if HAVE_DESIGNATED_INITIALIZERS
177 .addr_scale_costs =
178 #endif
180 NAMED_PARAM (qi, 0),
181 NAMED_PARAM (hi, 0),
182 NAMED_PARAM (si, 0),
183 NAMED_PARAM (ti, 0),
185 NAMED_PARAM (pre_modify, 0),
186 NAMED_PARAM (post_modify, 0),
187 NAMED_PARAM (register_offset, 0),
188 NAMED_PARAM (register_extend, 0),
189 NAMED_PARAM (imm_offset, 0)
192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
193 __extension__
194 #endif
195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
197 #if HAVE_DESIGNATED_INITIALIZERS
198 .addr_scale_costs =
199 #endif
201 NAMED_PARAM (qi, 0),
202 NAMED_PARAM (hi, 1),
203 NAMED_PARAM (si, 0),
204 NAMED_PARAM (ti, 1),
206 NAMED_PARAM (pre_modify, 0),
207 NAMED_PARAM (post_modify, 0),
208 NAMED_PARAM (register_offset, 0),
209 NAMED_PARAM (register_extend, 0),
210 NAMED_PARAM (imm_offset, 0),
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
214 __extension__
215 #endif
216 static const struct cpu_regmove_cost generic_regmove_cost =
218 NAMED_PARAM (GP2GP, 1),
219 NAMED_PARAM (GP2FP, 2),
220 NAMED_PARAM (FP2GP, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP, 4)
227 /* Generic costs for vector insn classes. */
228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
229 __extension__
230 #endif
231 static const struct cpu_vector_cost generic_vector_cost =
233 NAMED_PARAM (scalar_stmt_cost, 1),
234 NAMED_PARAM (scalar_load_cost, 1),
235 NAMED_PARAM (scalar_store_cost, 1),
236 NAMED_PARAM (vec_stmt_cost, 1),
237 NAMED_PARAM (vec_to_scalar_cost, 1),
238 NAMED_PARAM (scalar_to_vec_cost, 1),
239 NAMED_PARAM (vec_align_load_cost, 1),
240 NAMED_PARAM (vec_unalign_load_cost, 1),
241 NAMED_PARAM (vec_unalign_store_cost, 1),
242 NAMED_PARAM (vec_store_cost, 1),
243 NAMED_PARAM (cond_taken_branch_cost, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost, 1)
247 /* Generic costs for vector insn classes. */
248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
249 __extension__
250 #endif
251 static const struct cpu_vector_cost cortexa57_vector_cost =
253 NAMED_PARAM (scalar_stmt_cost, 1),
254 NAMED_PARAM (scalar_load_cost, 4),
255 NAMED_PARAM (scalar_store_cost, 1),
256 NAMED_PARAM (vec_stmt_cost, 3),
257 NAMED_PARAM (vec_to_scalar_cost, 8),
258 NAMED_PARAM (scalar_to_vec_cost, 8),
259 NAMED_PARAM (vec_align_load_cost, 5),
260 NAMED_PARAM (vec_unalign_load_cost, 5),
261 NAMED_PARAM (vec_unalign_store_cost, 1),
262 NAMED_PARAM (vec_store_cost, 1),
263 NAMED_PARAM (cond_taken_branch_cost, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost, 1)
267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
268 __extension__
269 #endif
270 static const struct tune_params generic_tunings =
272 &cortexa57_extra_costs,
273 &generic_addrcost_table,
274 &generic_regmove_cost,
275 &generic_vector_cost,
276 NAMED_PARAM (memmov_cost, 4),
277 NAMED_PARAM (issue_rate, 2)
280 static const struct tune_params cortexa53_tunings =
282 &cortexa53_extra_costs,
283 &generic_addrcost_table,
284 &generic_regmove_cost,
285 &generic_vector_cost,
286 NAMED_PARAM (memmov_cost, 4),
287 NAMED_PARAM (issue_rate, 2)
290 static const struct tune_params cortexa57_tunings =
292 &cortexa57_extra_costs,
293 &cortexa57_addrcost_table,
294 &generic_regmove_cost,
295 &cortexa57_vector_cost,
296 NAMED_PARAM (memmov_cost, 4),
297 NAMED_PARAM (issue_rate, 3)
300 /* A processor implementing AArch64. */
301 struct processor
303 const char *const name;
304 enum aarch64_processor core;
305 const char *arch;
306 const unsigned long flags;
307 const struct tune_params *const tune;
310 /* Processor cores implementing AArch64. */
311 static const struct processor all_cores[] =
313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315 #include "aarch64-cores.def"
316 #undef AARCH64_CORE
317 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
318 {NULL, aarch64_none, NULL, 0, NULL}
321 /* Architectures implementing AArch64. */
322 static const struct processor all_architectures[] =
324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326 #include "aarch64-arches.def"
327 #undef AARCH64_ARCH
328 {NULL, aarch64_none, NULL, 0, NULL}
331 /* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333 static const struct processor *selected_arch;
334 static const struct processor *selected_cpu;
335 static const struct processor *selected_tune;
337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339 /* An ISA extension in the co-processor and main instruction set space. */
340 struct aarch64_option_extension
342 const char *const name;
343 const unsigned long flags_on;
344 const unsigned long flags_off;
347 /* ISA extensions in AArch64. */
348 static const struct aarch64_option_extension all_extensions[] =
350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352 #include "aarch64-option-extensions.def"
353 #undef AARCH64_OPT_EXTENSION
354 {NULL, 0, 0}
357 /* Used to track the size of an address when generating a pre/post
358 increment address. */
359 static enum machine_mode aarch64_memory_reference_mode;
361 /* Used to force GTY into this file. */
362 static GTY(()) int gty_dummy;
364 /* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
367 #define AARCH64_NUM_BITMASKS 5334
368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
370 typedef enum aarch64_cond_code
372 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
373 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
374 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
376 aarch64_cc;
378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380 /* The condition codes of the processor, and the inverse function. */
381 static const char * const aarch64_condition_codes[] =
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
387 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
388 unsigned
389 aarch64_dbx_register_number (unsigned regno)
391 if (GP_REGNUM_P (regno))
392 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
393 else if (regno == SP_REGNUM)
394 return AARCH64_DWARF_SP;
395 else if (FP_REGNUM_P (regno))
396 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS;
403 /* Return TRUE if MODE is any of the large INT modes. */
404 static bool
405 aarch64_vect_struct_mode_p (enum machine_mode mode)
407 return mode == OImode || mode == CImode || mode == XImode;
410 /* Return TRUE if MODE is any of the vector modes. */
411 static bool
412 aarch64_vector_mode_p (enum machine_mode mode)
414 return aarch64_vector_mode_supported_p (mode)
415 || aarch64_vect_struct_mode_p (mode);
418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
419 static bool
420 aarch64_array_mode_supported_p (enum machine_mode mode,
421 unsigned HOST_WIDE_INT nelems)
423 if (TARGET_SIMD
424 && AARCH64_VALID_SIMD_QREG_MODE (mode)
425 && (nelems >= 2 && nelems <= 4))
426 return true;
428 return false;
431 /* Implement HARD_REGNO_NREGS. */
434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
436 switch (aarch64_regno_regclass (regno))
438 case FP_REGS:
439 case FP_LO_REGS:
440 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
441 default:
442 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
444 gcc_unreachable ();
447 /* Implement HARD_REGNO_MODE_OK. */
450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
452 if (GET_MODE_CLASS (mode) == MODE_CC)
453 return regno == CC_REGNUM;
455 if (regno == SP_REGNUM)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode == Pmode || mode == ptr_mode;
461 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
462 return mode == Pmode;
464 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
465 return 1;
467 if (FP_REGNUM_P (regno))
469 if (aarch64_vect_struct_mode_p (mode))
470 return
471 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
472 else
473 return 1;
476 return 0;
479 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
480 enum machine_mode
481 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
482 enum machine_mode mode)
484 /* Handle modes that fit within single registers. */
485 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
487 if (GET_MODE_SIZE (mode) >= 4)
488 return mode;
489 else
490 return SImode;
492 /* Fall back to generic for multi-reg and very large modes. */
493 else
494 return choose_hard_reg_mode (regno, nregs, false);
497 /* Return true if calls to DECL should be treated as
498 long-calls (ie called via a register). */
499 static bool
500 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
502 return false;
505 /* Return true if calls to symbol-ref SYM should be treated as
506 long-calls (ie called via a register). */
507 bool
508 aarch64_is_long_call_p (rtx sym)
510 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
513 /* Return true if the offsets to a zero/sign-extract operation
514 represent an expression that matches an extend operation. The
515 operands represent the paramters from
517 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
518 bool
519 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
520 rtx extract_imm)
522 HOST_WIDE_INT mult_val, extract_val;
524 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
525 return false;
527 mult_val = INTVAL (mult_imm);
528 extract_val = INTVAL (extract_imm);
530 if (extract_val > 8
531 && extract_val < GET_MODE_BITSIZE (mode)
532 && exact_log2 (extract_val & ~7) > 0
533 && (extract_val & 7) <= 4
534 && mult_val == (1 << (extract_val & 7)))
535 return true;
537 return false;
540 /* Emit an insn that's a simple single-set. Both the operands must be
541 known to be valid. */
542 inline static rtx
543 emit_set_insn (rtx x, rtx y)
545 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
548 /* X and Y are two things to compare using CODE. Emit the compare insn and
549 return the rtx for register 0 in the proper mode. */
551 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
553 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
554 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
556 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
557 return cc_reg;
560 /* Build the SYMBOL_REF for __tls_get_addr. */
562 static GTY(()) rtx tls_get_addr_libfunc;
565 aarch64_tls_get_addr (void)
567 if (!tls_get_addr_libfunc)
568 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
569 return tls_get_addr_libfunc;
572 /* Return the TLS model to use for ADDR. */
574 static enum tls_model
575 tls_symbolic_operand_type (rtx addr)
577 enum tls_model tls_kind = TLS_MODEL_NONE;
578 rtx sym, addend;
580 if (GET_CODE (addr) == CONST)
582 split_const (addr, &sym, &addend);
583 if (GET_CODE (sym) == SYMBOL_REF)
584 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
586 else if (GET_CODE (addr) == SYMBOL_REF)
587 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
589 return tls_kind;
592 /* We'll allow lo_sum's in addresses in our legitimate addresses
593 so that combine would take care of combining addresses where
594 necessary, but for generation purposes, we'll generate the address
595 as :
596 RTL Absolute
597 tmp = hi (symbol_ref); adrp x1, foo
598 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
601 PIC TLS
602 adrp x1, :got:foo adrp tmp, :tlsgd:foo
603 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
604 bl __tls_get_addr
607 Load TLS symbol, depending on TLS mechanism and TLS access model.
609 Global Dynamic - Traditional TLS:
610 adrp tmp, :tlsgd:imm
611 add dest, tmp, #:tlsgd_lo12:imm
612 bl __tls_get_addr
614 Global Dynamic - TLS Descriptors:
615 adrp dest, :tlsdesc:imm
616 ldr tmp, [dest, #:tlsdesc_lo12:imm]
617 add dest, dest, #:tlsdesc_lo12:imm
618 blr tmp
619 mrs tp, tpidr_el0
620 add dest, dest, tp
622 Initial Exec:
623 mrs tp, tpidr_el0
624 adrp tmp, :gottprel:imm
625 ldr dest, [tmp, #:gottprel_lo12:imm]
626 add dest, dest, tp
628 Local Exec:
629 mrs tp, tpidr_el0
630 add t0, tp, #:tprel_hi12:imm
631 add t0, #:tprel_lo12_nc:imm
634 static void
635 aarch64_load_symref_appropriately (rtx dest, rtx imm,
636 enum aarch64_symbol_type type)
638 switch (type)
640 case SYMBOL_SMALL_ABSOLUTE:
642 /* In ILP32, the mode of dest can be either SImode or DImode. */
643 rtx tmp_reg = dest;
644 enum machine_mode mode = GET_MODE (dest);
646 gcc_assert (mode == Pmode || mode == ptr_mode);
648 if (can_create_pseudo_p ())
649 tmp_reg = gen_reg_rtx (mode);
651 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
652 emit_insn (gen_add_losym (dest, tmp_reg, imm));
653 return;
656 case SYMBOL_TINY_ABSOLUTE:
657 emit_insn (gen_rtx_SET (Pmode, dest, imm));
658 return;
660 case SYMBOL_SMALL_GOT:
662 /* In ILP32, the mode of dest can be either SImode or DImode,
663 while the got entry is always of SImode size. The mode of
664 dest depends on how dest is used: if dest is assigned to a
665 pointer (e.g. in the memory), it has SImode; it may have
666 DImode if dest is dereferenced to access the memeory.
667 This is why we have to handle three different ldr_got_small
668 patterns here (two patterns for ILP32). */
669 rtx tmp_reg = dest;
670 enum machine_mode mode = GET_MODE (dest);
672 if (can_create_pseudo_p ())
673 tmp_reg = gen_reg_rtx (mode);
675 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
676 if (mode == ptr_mode)
678 if (mode == DImode)
679 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
680 else
681 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
683 else
685 gcc_assert (mode == Pmode);
686 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
689 return;
692 case SYMBOL_SMALL_TLSGD:
694 rtx insns;
695 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
697 start_sequence ();
698 emit_call_insn (gen_tlsgd_small (result, imm));
699 insns = get_insns ();
700 end_sequence ();
702 RTL_CONST_CALL_P (insns) = 1;
703 emit_libcall_block (insns, dest, result, imm);
704 return;
707 case SYMBOL_SMALL_TLSDESC:
709 enum machine_mode mode = GET_MODE (dest);
710 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
711 rtx tp;
713 gcc_assert (mode == Pmode || mode == ptr_mode);
715 /* In ILP32, the got entry is always of SImode size. Unlike
716 small GOT, the dest is fixed at reg 0. */
717 if (TARGET_ILP32)
718 emit_insn (gen_tlsdesc_small_si (imm));
719 else
720 emit_insn (gen_tlsdesc_small_di (imm));
721 tp = aarch64_load_tp (NULL);
723 if (mode != Pmode)
724 tp = gen_lowpart (mode, tp);
726 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
727 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
728 return;
731 case SYMBOL_SMALL_GOTTPREL:
733 /* In ILP32, the mode of dest can be either SImode or DImode,
734 while the got entry is always of SImode size. The mode of
735 dest depends on how dest is used: if dest is assigned to a
736 pointer (e.g. in the memory), it has SImode; it may have
737 DImode if dest is dereferenced to access the memeory.
738 This is why we have to handle three different tlsie_small
739 patterns here (two patterns for ILP32). */
740 enum machine_mode mode = GET_MODE (dest);
741 rtx tmp_reg = gen_reg_rtx (mode);
742 rtx tp = aarch64_load_tp (NULL);
744 if (mode == ptr_mode)
746 if (mode == DImode)
747 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
748 else
750 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
751 tp = gen_lowpart (mode, tp);
754 else
756 gcc_assert (mode == Pmode);
757 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
760 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
761 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
762 return;
765 case SYMBOL_SMALL_TPREL:
767 rtx tp = aarch64_load_tp (NULL);
768 emit_insn (gen_tlsle_small (dest, tp, imm));
769 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
770 return;
773 case SYMBOL_TINY_GOT:
774 emit_insn (gen_ldr_got_tiny (dest, imm));
775 return;
777 default:
778 gcc_unreachable ();
782 /* Emit a move from SRC to DEST. Assume that the move expanders can
783 handle all moves if !can_create_pseudo_p (). The distinction is
784 important because, unlike emit_move_insn, the move expanders know
785 how to force Pmode objects into the constant pool even when the
786 constant pool address is not itself legitimate. */
787 static rtx
788 aarch64_emit_move (rtx dest, rtx src)
790 return (can_create_pseudo_p ()
791 ? emit_move_insn (dest, src)
792 : emit_move_insn_1 (dest, src));
795 /* Split a 128-bit move operation into two 64-bit move operations,
796 taking care to handle partial overlap of register to register
797 copies. Special cases are needed when moving between GP regs and
798 FP regs. SRC can be a register, constant or memory; DST a register
799 or memory. If either operand is memory it must not have any side
800 effects. */
801 void
802 aarch64_split_128bit_move (rtx dst, rtx src)
804 rtx dst_lo, dst_hi;
805 rtx src_lo, src_hi;
807 enum machine_mode mode = GET_MODE (dst);
809 gcc_assert (mode == TImode || mode == TFmode);
810 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
811 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
813 if (REG_P (dst) && REG_P (src))
815 int src_regno = REGNO (src);
816 int dst_regno = REGNO (dst);
818 /* Handle FP <-> GP regs. */
819 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
821 src_lo = gen_lowpart (word_mode, src);
822 src_hi = gen_highpart (word_mode, src);
824 if (mode == TImode)
826 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
827 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
829 else
831 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
832 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
834 return;
836 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
838 dst_lo = gen_lowpart (word_mode, dst);
839 dst_hi = gen_highpart (word_mode, dst);
841 if (mode == TImode)
843 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
844 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
846 else
848 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
849 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
851 return;
855 dst_lo = gen_lowpart (word_mode, dst);
856 dst_hi = gen_highpart (word_mode, dst);
857 src_lo = gen_lowpart (word_mode, src);
858 src_hi = gen_highpart_mode (word_mode, mode, src);
860 /* At most one pairing may overlap. */
861 if (reg_overlap_mentioned_p (dst_lo, src_hi))
863 aarch64_emit_move (dst_hi, src_hi);
864 aarch64_emit_move (dst_lo, src_lo);
866 else
868 aarch64_emit_move (dst_lo, src_lo);
869 aarch64_emit_move (dst_hi, src_hi);
873 bool
874 aarch64_split_128bit_move_p (rtx dst, rtx src)
876 return (! REG_P (src)
877 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
880 /* Split a complex SIMD combine. */
882 void
883 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
885 enum machine_mode src_mode = GET_MODE (src1);
886 enum machine_mode dst_mode = GET_MODE (dst);
888 gcc_assert (VECTOR_MODE_P (dst_mode));
890 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
892 rtx (*gen) (rtx, rtx, rtx);
894 switch (src_mode)
896 case V8QImode:
897 gen = gen_aarch64_simd_combinev8qi;
898 break;
899 case V4HImode:
900 gen = gen_aarch64_simd_combinev4hi;
901 break;
902 case V2SImode:
903 gen = gen_aarch64_simd_combinev2si;
904 break;
905 case V2SFmode:
906 gen = gen_aarch64_simd_combinev2sf;
907 break;
908 case DImode:
909 gen = gen_aarch64_simd_combinedi;
910 break;
911 case DFmode:
912 gen = gen_aarch64_simd_combinedf;
913 break;
914 default:
915 gcc_unreachable ();
918 emit_insn (gen (dst, src1, src2));
919 return;
923 /* Split a complex SIMD move. */
925 void
926 aarch64_split_simd_move (rtx dst, rtx src)
928 enum machine_mode src_mode = GET_MODE (src);
929 enum machine_mode dst_mode = GET_MODE (dst);
931 gcc_assert (VECTOR_MODE_P (dst_mode));
933 if (REG_P (dst) && REG_P (src))
935 rtx (*gen) (rtx, rtx);
937 gcc_assert (VECTOR_MODE_P (src_mode));
939 switch (src_mode)
941 case V16QImode:
942 gen = gen_aarch64_split_simd_movv16qi;
943 break;
944 case V8HImode:
945 gen = gen_aarch64_split_simd_movv8hi;
946 break;
947 case V4SImode:
948 gen = gen_aarch64_split_simd_movv4si;
949 break;
950 case V2DImode:
951 gen = gen_aarch64_split_simd_movv2di;
952 break;
953 case V4SFmode:
954 gen = gen_aarch64_split_simd_movv4sf;
955 break;
956 case V2DFmode:
957 gen = gen_aarch64_split_simd_movv2df;
958 break;
959 default:
960 gcc_unreachable ();
963 emit_insn (gen (dst, src));
964 return;
968 static rtx
969 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
971 if (can_create_pseudo_p ())
972 return force_reg (mode, value);
973 else
975 x = aarch64_emit_move (x, value);
976 return x;
981 static rtx
982 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
984 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
986 rtx high;
987 /* Load the full offset into a register. This
988 might be improvable in the future. */
989 high = GEN_INT (offset);
990 offset = 0;
991 high = aarch64_force_temporary (mode, temp, high);
992 reg = aarch64_force_temporary (mode, temp,
993 gen_rtx_PLUS (mode, high, reg));
995 return plus_constant (mode, reg, offset);
998 void
999 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1001 enum machine_mode mode = GET_MODE (dest);
1002 unsigned HOST_WIDE_INT mask;
1003 int i;
1004 bool first;
1005 unsigned HOST_WIDE_INT val;
1006 bool subtargets;
1007 rtx subtarget;
1008 int one_match, zero_match;
1010 gcc_assert (mode == SImode || mode == DImode);
1012 /* Check on what type of symbol it is. */
1013 if (GET_CODE (imm) == SYMBOL_REF
1014 || GET_CODE (imm) == LABEL_REF
1015 || GET_CODE (imm) == CONST)
1017 rtx mem, base, offset;
1018 enum aarch64_symbol_type sty;
1020 /* If we have (const (plus symbol offset)), separate out the offset
1021 before we start classifying the symbol. */
1022 split_const (imm, &base, &offset);
1024 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1025 switch (sty)
1027 case SYMBOL_FORCE_TO_MEM:
1028 if (offset != const0_rtx
1029 && targetm.cannot_force_const_mem (mode, imm))
1031 gcc_assert (can_create_pseudo_p ());
1032 base = aarch64_force_temporary (mode, dest, base);
1033 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1034 aarch64_emit_move (dest, base);
1035 return;
1037 mem = force_const_mem (ptr_mode, imm);
1038 gcc_assert (mem);
1039 if (mode != ptr_mode)
1040 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1041 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1042 return;
1044 case SYMBOL_SMALL_TLSGD:
1045 case SYMBOL_SMALL_TLSDESC:
1046 case SYMBOL_SMALL_GOTTPREL:
1047 case SYMBOL_SMALL_GOT:
1048 case SYMBOL_TINY_GOT:
1049 if (offset != const0_rtx)
1051 gcc_assert(can_create_pseudo_p ());
1052 base = aarch64_force_temporary (mode, dest, base);
1053 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1054 aarch64_emit_move (dest, base);
1055 return;
1057 /* FALLTHRU */
1059 case SYMBOL_SMALL_TPREL:
1060 case SYMBOL_SMALL_ABSOLUTE:
1061 case SYMBOL_TINY_ABSOLUTE:
1062 aarch64_load_symref_appropriately (dest, imm, sty);
1063 return;
1065 default:
1066 gcc_unreachable ();
1070 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1072 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1073 return;
1076 if (!CONST_INT_P (imm))
1078 if (GET_CODE (imm) == HIGH)
1079 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1080 else
1082 rtx mem = force_const_mem (mode, imm);
1083 gcc_assert (mem);
1084 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1087 return;
1090 if (mode == SImode)
1092 /* We know we can't do this in 1 insn, and we must be able to do it
1093 in two; so don't mess around looking for sequences that don't buy
1094 us anything. */
1095 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1096 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1097 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1098 return;
1101 /* Remaining cases are all for DImode. */
1103 val = INTVAL (imm);
1104 subtargets = optimize && can_create_pseudo_p ();
1106 one_match = 0;
1107 zero_match = 0;
1108 mask = 0xffff;
1110 for (i = 0; i < 64; i += 16, mask <<= 16)
1112 if ((val & mask) == 0)
1113 zero_match++;
1114 else if ((val & mask) == mask)
1115 one_match++;
1118 if (one_match == 2)
1120 mask = 0xffff;
1121 for (i = 0; i < 64; i += 16, mask <<= 16)
1123 if ((val & mask) != mask)
1125 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1126 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1127 GEN_INT ((val >> i) & 0xffff)));
1128 return;
1131 gcc_unreachable ();
1134 if (zero_match == 2)
1135 goto simple_sequence;
1137 mask = 0x0ffff0000UL;
1138 for (i = 16; i < 64; i += 16, mask <<= 16)
1140 HOST_WIDE_INT comp = mask & ~(mask - 1);
1142 if (aarch64_uimm12_shift (val - (val & mask)))
1144 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1146 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1147 emit_insn (gen_adddi3 (dest, subtarget,
1148 GEN_INT (val - (val & mask))));
1149 return;
1151 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1153 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1155 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1156 GEN_INT ((val + comp) & mask)));
1157 emit_insn (gen_adddi3 (dest, subtarget,
1158 GEN_INT (val - ((val + comp) & mask))));
1159 return;
1161 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1163 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1165 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1166 GEN_INT ((val - comp) | ~mask)));
1167 emit_insn (gen_adddi3 (dest, subtarget,
1168 GEN_INT (val - ((val - comp) | ~mask))));
1169 return;
1171 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1173 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1175 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1176 GEN_INT (val | ~mask)));
1177 emit_insn (gen_adddi3 (dest, subtarget,
1178 GEN_INT (val - (val | ~mask))));
1179 return;
1183 /* See if we can do it by arithmetically combining two
1184 immediates. */
1185 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1187 int j;
1188 mask = 0xffff;
1190 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1191 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1193 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1194 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1195 GEN_INT (aarch64_bitmasks[i])));
1196 emit_insn (gen_adddi3 (dest, subtarget,
1197 GEN_INT (val - aarch64_bitmasks[i])));
1198 return;
1201 for (j = 0; j < 64; j += 16, mask <<= 16)
1203 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1205 emit_insn (gen_rtx_SET (VOIDmode, dest,
1206 GEN_INT (aarch64_bitmasks[i])));
1207 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1208 GEN_INT ((val >> j) & 0xffff)));
1209 return;
1214 /* See if we can do it by logically combining two immediates. */
1215 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1217 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1219 int j;
1221 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1222 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1224 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1225 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1226 GEN_INT (aarch64_bitmasks[i])));
1227 emit_insn (gen_iordi3 (dest, subtarget,
1228 GEN_INT (aarch64_bitmasks[j])));
1229 return;
1232 else if ((val & aarch64_bitmasks[i]) == val)
1234 int j;
1236 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1237 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1240 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1241 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1242 GEN_INT (aarch64_bitmasks[j])));
1243 emit_insn (gen_anddi3 (dest, subtarget,
1244 GEN_INT (aarch64_bitmasks[i])));
1245 return;
1250 simple_sequence:
1251 first = true;
1252 mask = 0xffff;
1253 for (i = 0; i < 64; i += 16, mask <<= 16)
1255 if ((val & mask) != 0)
1257 if (first)
1259 emit_insn (gen_rtx_SET (VOIDmode, dest,
1260 GEN_INT (val & mask)));
1261 first = false;
1263 else
1264 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1265 GEN_INT ((val >> i) & 0xffff)));
1270 static bool
1271 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1273 /* Indirect calls are not currently supported. */
1274 if (decl == NULL)
1275 return false;
1277 /* Cannot tail-call to long-calls, since these are outside of the
1278 range of a branch instruction (we could handle this if we added
1279 support for indirect tail-calls. */
1280 if (aarch64_decl_is_long_call_p (decl))
1281 return false;
1283 return true;
1286 /* Implement TARGET_PASS_BY_REFERENCE. */
1288 static bool
1289 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1290 enum machine_mode mode,
1291 const_tree type,
1292 bool named ATTRIBUTE_UNUSED)
1294 HOST_WIDE_INT size;
1295 enum machine_mode dummymode;
1296 int nregs;
1298 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1299 size = (mode == BLKmode && type)
1300 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1302 /* Aggregates are passed by reference based on their size. */
1303 if (type && AGGREGATE_TYPE_P (type))
1305 size = int_size_in_bytes (type);
1308 /* Variable sized arguments are always returned by reference. */
1309 if (size < 0)
1310 return true;
1312 /* Can this be a candidate to be passed in fp/simd register(s)? */
1313 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1314 &dummymode, &nregs,
1315 NULL))
1316 return false;
1318 /* Arguments which are variable sized or larger than 2 registers are
1319 passed by reference unless they are a homogenous floating point
1320 aggregate. */
1321 return size > 2 * UNITS_PER_WORD;
1324 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1325 static bool
1326 aarch64_return_in_msb (const_tree valtype)
1328 enum machine_mode dummy_mode;
1329 int dummy_int;
1331 /* Never happens in little-endian mode. */
1332 if (!BYTES_BIG_ENDIAN)
1333 return false;
1335 /* Only composite types smaller than or equal to 16 bytes can
1336 be potentially returned in registers. */
1337 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1338 || int_size_in_bytes (valtype) <= 0
1339 || int_size_in_bytes (valtype) > 16)
1340 return false;
1342 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1343 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1344 is always passed/returned in the least significant bits of fp/simd
1345 register(s). */
1346 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1347 &dummy_mode, &dummy_int, NULL))
1348 return false;
1350 return true;
1353 /* Implement TARGET_FUNCTION_VALUE.
1354 Define how to find the value returned by a function. */
1356 static rtx
1357 aarch64_function_value (const_tree type, const_tree func,
1358 bool outgoing ATTRIBUTE_UNUSED)
1360 enum machine_mode mode;
1361 int unsignedp;
1362 int count;
1363 enum machine_mode ag_mode;
1365 mode = TYPE_MODE (type);
1366 if (INTEGRAL_TYPE_P (type))
1367 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1369 if (aarch64_return_in_msb (type))
1371 HOST_WIDE_INT size = int_size_in_bytes (type);
1373 if (size % UNITS_PER_WORD != 0)
1375 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1376 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1380 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1381 &ag_mode, &count, NULL))
1383 if (!aarch64_composite_type_p (type, mode))
1385 gcc_assert (count == 1 && mode == ag_mode);
1386 return gen_rtx_REG (mode, V0_REGNUM);
1388 else
1390 int i;
1391 rtx par;
1393 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1394 for (i = 0; i < count; i++)
1396 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1397 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1398 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1399 XVECEXP (par, 0, i) = tmp;
1401 return par;
1404 else
1405 return gen_rtx_REG (mode, R0_REGNUM);
1408 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1409 Return true if REGNO is the number of a hard register in which the values
1410 of called function may come back. */
1412 static bool
1413 aarch64_function_value_regno_p (const unsigned int regno)
1415 /* Maximum of 16 bytes can be returned in the general registers. Examples
1416 of 16-byte return values are: 128-bit integers and 16-byte small
1417 structures (excluding homogeneous floating-point aggregates). */
1418 if (regno == R0_REGNUM || regno == R1_REGNUM)
1419 return true;
1421 /* Up to four fp/simd registers can return a function value, e.g. a
1422 homogeneous floating-point aggregate having four members. */
1423 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1424 return !TARGET_GENERAL_REGS_ONLY;
1426 return false;
1429 /* Implement TARGET_RETURN_IN_MEMORY.
1431 If the type T of the result of a function is such that
1432 void func (T arg)
1433 would require that arg be passed as a value in a register (or set of
1434 registers) according to the parameter passing rules, then the result
1435 is returned in the same registers as would be used for such an
1436 argument. */
1438 static bool
1439 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1441 HOST_WIDE_INT size;
1442 enum machine_mode ag_mode;
1443 int count;
1445 if (!AGGREGATE_TYPE_P (type)
1446 && TREE_CODE (type) != COMPLEX_TYPE
1447 && TREE_CODE (type) != VECTOR_TYPE)
1448 /* Simple scalar types always returned in registers. */
1449 return false;
1451 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1452 type,
1453 &ag_mode,
1454 &count,
1455 NULL))
1456 return false;
1458 /* Types larger than 2 registers returned in memory. */
1459 size = int_size_in_bytes (type);
1460 return (size < 0 || size > 2 * UNITS_PER_WORD);
1463 static bool
1464 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1465 const_tree type, int *nregs)
1467 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1468 return aarch64_vfp_is_call_or_return_candidate (mode,
1469 type,
1470 &pcum->aapcs_vfp_rmode,
1471 nregs,
1472 NULL);
1475 /* Given MODE and TYPE of a function argument, return the alignment in
1476 bits. The idea is to suppress any stronger alignment requested by
1477 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1478 This is a helper function for local use only. */
1480 static unsigned int
1481 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1483 unsigned int alignment;
1485 if (type)
1487 if (!integer_zerop (TYPE_SIZE (type)))
1489 if (TYPE_MODE (type) == mode)
1490 alignment = TYPE_ALIGN (type);
1491 else
1492 alignment = GET_MODE_ALIGNMENT (mode);
1494 else
1495 alignment = 0;
1497 else
1498 alignment = GET_MODE_ALIGNMENT (mode);
1500 return alignment;
1503 /* Layout a function argument according to the AAPCS64 rules. The rule
1504 numbers refer to the rule numbers in the AAPCS64. */
1506 static void
1507 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1508 const_tree type,
1509 bool named ATTRIBUTE_UNUSED)
1511 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1512 int ncrn, nvrn, nregs;
1513 bool allocate_ncrn, allocate_nvrn;
1515 /* We need to do this once per argument. */
1516 if (pcum->aapcs_arg_processed)
1517 return;
1519 pcum->aapcs_arg_processed = true;
1521 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1522 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1523 mode,
1524 type,
1525 &nregs);
1527 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1528 The following code thus handles passing by SIMD/FP registers first. */
1530 nvrn = pcum->aapcs_nvrn;
1532 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1533 and homogenous short-vector aggregates (HVA). */
1534 if (allocate_nvrn)
1536 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1538 pcum->aapcs_nextnvrn = nvrn + nregs;
1539 if (!aarch64_composite_type_p (type, mode))
1541 gcc_assert (nregs == 1);
1542 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1544 else
1546 rtx par;
1547 int i;
1548 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1549 for (i = 0; i < nregs; i++)
1551 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1552 V0_REGNUM + nvrn + i);
1553 tmp = gen_rtx_EXPR_LIST
1554 (VOIDmode, tmp,
1555 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1556 XVECEXP (par, 0, i) = tmp;
1558 pcum->aapcs_reg = par;
1560 return;
1562 else
1564 /* C.3 NSRN is set to 8. */
1565 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1566 goto on_stack;
1570 ncrn = pcum->aapcs_ncrn;
1571 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1572 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1575 /* C6 - C9. though the sign and zero extension semantics are
1576 handled elsewhere. This is the case where the argument fits
1577 entirely general registers. */
1578 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1580 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1582 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1584 /* C.8 if the argument has an alignment of 16 then the NGRN is
1585 rounded up to the next even number. */
1586 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1588 ++ncrn;
1589 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1591 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1592 A reg is still generated for it, but the caller should be smart
1593 enough not to use it. */
1594 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1596 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1598 else
1600 rtx par;
1601 int i;
1603 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1604 for (i = 0; i < nregs; i++)
1606 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1607 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1608 GEN_INT (i * UNITS_PER_WORD));
1609 XVECEXP (par, 0, i) = tmp;
1611 pcum->aapcs_reg = par;
1614 pcum->aapcs_nextncrn = ncrn + nregs;
1615 return;
1618 /* C.11 */
1619 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1621 /* The argument is passed on stack; record the needed number of words for
1622 this argument (we can re-use NREGS) and align the total size if
1623 necessary. */
1624 on_stack:
1625 pcum->aapcs_stack_words = nregs;
1626 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1627 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1628 16 / UNITS_PER_WORD) + 1;
1629 return;
1632 /* Implement TARGET_FUNCTION_ARG. */
1634 static rtx
1635 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1636 const_tree type, bool named)
1638 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1639 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1641 if (mode == VOIDmode)
1642 return NULL_RTX;
1644 aarch64_layout_arg (pcum_v, mode, type, named);
1645 return pcum->aapcs_reg;
1648 void
1649 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1650 const_tree fntype ATTRIBUTE_UNUSED,
1651 rtx libname ATTRIBUTE_UNUSED,
1652 const_tree fndecl ATTRIBUTE_UNUSED,
1653 unsigned n_named ATTRIBUTE_UNUSED)
1655 pcum->aapcs_ncrn = 0;
1656 pcum->aapcs_nvrn = 0;
1657 pcum->aapcs_nextncrn = 0;
1658 pcum->aapcs_nextnvrn = 0;
1659 pcum->pcs_variant = ARM_PCS_AAPCS64;
1660 pcum->aapcs_reg = NULL_RTX;
1661 pcum->aapcs_arg_processed = false;
1662 pcum->aapcs_stack_words = 0;
1663 pcum->aapcs_stack_size = 0;
1665 return;
1668 static void
1669 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1670 enum machine_mode mode,
1671 const_tree type,
1672 bool named)
1674 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1675 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1677 aarch64_layout_arg (pcum_v, mode, type, named);
1678 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1679 != (pcum->aapcs_stack_words != 0));
1680 pcum->aapcs_arg_processed = false;
1681 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1682 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1683 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1684 pcum->aapcs_stack_words = 0;
1685 pcum->aapcs_reg = NULL_RTX;
1689 bool
1690 aarch64_function_arg_regno_p (unsigned regno)
1692 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1693 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1696 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1697 PARM_BOUNDARY bits of alignment, but will be given anything up
1698 to STACK_BOUNDARY bits if the type requires it. This makes sure
1699 that both before and after the layout of each argument, the Next
1700 Stacked Argument Address (NSAA) will have a minimum alignment of
1701 8 bytes. */
1703 static unsigned int
1704 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1706 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1708 if (alignment < PARM_BOUNDARY)
1709 alignment = PARM_BOUNDARY;
1710 if (alignment > STACK_BOUNDARY)
1711 alignment = STACK_BOUNDARY;
1712 return alignment;
1715 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1717 Return true if an argument passed on the stack should be padded upwards,
1718 i.e. if the least-significant byte of the stack slot has useful data.
1720 Small aggregate types are placed in the lowest memory address.
1722 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1724 bool
1725 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1727 /* On little-endian targets, the least significant byte of every stack
1728 argument is passed at the lowest byte address of the stack slot. */
1729 if (!BYTES_BIG_ENDIAN)
1730 return true;
1732 /* Otherwise, integral, floating-point and pointer types are padded downward:
1733 the least significant byte of a stack argument is passed at the highest
1734 byte address of the stack slot. */
1735 if (type
1736 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1737 || POINTER_TYPE_P (type))
1738 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1739 return false;
1741 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1742 return true;
1745 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1747 It specifies padding for the last (may also be the only)
1748 element of a block move between registers and memory. If
1749 assuming the block is in the memory, padding upward means that
1750 the last element is padded after its highest significant byte,
1751 while in downward padding, the last element is padded at the
1752 its least significant byte side.
1754 Small aggregates and small complex types are always padded
1755 upwards.
1757 We don't need to worry about homogeneous floating-point or
1758 short-vector aggregates; their move is not affected by the
1759 padding direction determined here. Regardless of endianness,
1760 each element of such an aggregate is put in the least
1761 significant bits of a fp/simd register.
1763 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1764 register has useful data, and return the opposite if the most
1765 significant byte does. */
1767 bool
1768 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1769 bool first ATTRIBUTE_UNUSED)
1772 /* Small composite types are always padded upward. */
1773 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1775 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1776 : GET_MODE_SIZE (mode));
1777 if (size < 2 * UNITS_PER_WORD)
1778 return true;
1781 /* Otherwise, use the default padding. */
1782 return !BYTES_BIG_ENDIAN;
1785 static enum machine_mode
1786 aarch64_libgcc_cmp_return_mode (void)
1788 return SImode;
1791 static bool
1792 aarch64_frame_pointer_required (void)
1794 /* If the function contains dynamic stack allocations, we need to
1795 use the frame pointer to access the static parts of the frame. */
1796 if (cfun->calls_alloca)
1797 return true;
1799 /* In aarch64_override_options_after_change
1800 flag_omit_leaf_frame_pointer turns off the frame pointer by
1801 default. Turn it back on now if we've not got a leaf
1802 function. */
1803 if (flag_omit_leaf_frame_pointer
1804 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1805 return true;
1807 return false;
1810 /* Mark the registers that need to be saved by the callee and calculate
1811 the size of the callee-saved registers area and frame record (both FP
1812 and LR may be omitted). */
1813 static void
1814 aarch64_layout_frame (void)
1816 HOST_WIDE_INT offset = 0;
1817 int regno;
1819 if (reload_completed && cfun->machine->frame.laid_out)
1820 return;
1822 /* First mark all the registers that really need to be saved... */
1823 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1824 cfun->machine->frame.reg_offset[regno] = -1;
1826 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1827 cfun->machine->frame.reg_offset[regno] = -1;
1829 /* ... that includes the eh data registers (if needed)... */
1830 if (crtl->calls_eh_return)
1831 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1832 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1834 /* ... and any callee saved register that dataflow says is live. */
1835 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1836 if (df_regs_ever_live_p (regno)
1837 && !call_used_regs[regno])
1838 cfun->machine->frame.reg_offset[regno] = 0;
1840 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1841 if (df_regs_ever_live_p (regno)
1842 && !call_used_regs[regno])
1843 cfun->machine->frame.reg_offset[regno] = 0;
1845 if (frame_pointer_needed)
1847 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1848 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1849 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1852 /* Now assign stack slots for them. */
1853 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1854 if (cfun->machine->frame.reg_offset[regno] != -1)
1856 cfun->machine->frame.reg_offset[regno] = offset;
1857 offset += UNITS_PER_WORD;
1860 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1861 if (cfun->machine->frame.reg_offset[regno] != -1)
1863 cfun->machine->frame.reg_offset[regno] = offset;
1864 offset += UNITS_PER_WORD;
1867 if (frame_pointer_needed)
1869 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1870 offset += UNITS_PER_WORD;
1873 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1875 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1876 offset += UNITS_PER_WORD;
1879 cfun->machine->frame.padding0 =
1880 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1881 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1883 cfun->machine->frame.saved_regs_size = offset;
1884 cfun->machine->frame.laid_out = true;
1887 /* Make the last instruction frame-related and note that it performs
1888 the operation described by FRAME_PATTERN. */
1890 static void
1891 aarch64_set_frame_expr (rtx frame_pattern)
1893 rtx insn;
1895 insn = get_last_insn ();
1896 RTX_FRAME_RELATED_P (insn) = 1;
1897 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1898 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1899 frame_pattern,
1900 REG_NOTES (insn));
1903 static bool
1904 aarch64_register_saved_on_entry (int regno)
1906 return cfun->machine->frame.reg_offset[regno] != -1;
1910 static void
1911 aarch64_save_or_restore_fprs (int start_offset, int increment,
1912 bool restore, rtx base_rtx)
1915 unsigned regno;
1916 unsigned regno2;
1917 rtx insn;
1918 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1919 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1922 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1924 if (aarch64_register_saved_on_entry (regno))
1926 rtx mem;
1927 mem = gen_mem_ref (DFmode,
1928 plus_constant (Pmode,
1929 base_rtx,
1930 start_offset));
1932 for (regno2 = regno + 1;
1933 regno2 <= V31_REGNUM
1934 && !aarch64_register_saved_on_entry (regno2);
1935 regno2++)
1937 /* Empty loop. */
1939 if (regno2 <= V31_REGNUM &&
1940 aarch64_register_saved_on_entry (regno2))
1942 rtx mem2;
1943 /* Next highest register to be saved. */
1944 mem2 = gen_mem_ref (DFmode,
1945 plus_constant
1946 (Pmode,
1947 base_rtx,
1948 start_offset + increment));
1949 if (restore == false)
1951 insn = emit_insn
1952 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1953 mem2, gen_rtx_REG (DFmode, regno2)));
1956 else
1958 insn = emit_insn
1959 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1960 gen_rtx_REG (DFmode, regno2), mem2));
1962 add_reg_note (insn, REG_CFA_RESTORE,
1963 gen_rtx_REG (DFmode, regno));
1964 add_reg_note (insn, REG_CFA_RESTORE,
1965 gen_rtx_REG (DFmode, regno2));
1968 /* The first part of a frame-related parallel insn
1969 is always assumed to be relevant to the frame
1970 calculations; subsequent parts, are only
1971 frame-related if explicitly marked. */
1972 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1973 regno = regno2;
1974 start_offset += increment * 2;
1976 else
1978 if (restore == false)
1979 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1980 else
1982 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1983 add_reg_note (insn, REG_CFA_RESTORE,
1984 gen_rtx_REG (DImode, regno));
1986 start_offset += increment;
1988 RTX_FRAME_RELATED_P (insn) = 1;
1995 /* offset from the stack pointer of where the saves and
1996 restore's have to happen. */
1997 static void
1998 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1999 bool restore)
2001 rtx insn;
2002 rtx base_rtx = stack_pointer_rtx;
2003 HOST_WIDE_INT start_offset = offset;
2004 HOST_WIDE_INT increment = UNITS_PER_WORD;
2005 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
2006 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
2007 unsigned regno;
2008 unsigned regno2;
2010 for (regno = R0_REGNUM; regno <= limit; regno++)
2012 if (aarch64_register_saved_on_entry (regno))
2014 rtx mem;
2015 mem = gen_mem_ref (Pmode,
2016 plus_constant (Pmode,
2017 base_rtx,
2018 start_offset));
2020 for (regno2 = regno + 1;
2021 regno2 <= limit
2022 && !aarch64_register_saved_on_entry (regno2);
2023 regno2++)
2025 /* Empty loop. */
2027 if (regno2 <= limit &&
2028 aarch64_register_saved_on_entry (regno2))
2030 rtx mem2;
2031 /* Next highest register to be saved. */
2032 mem2 = gen_mem_ref (Pmode,
2033 plus_constant
2034 (Pmode,
2035 base_rtx,
2036 start_offset + increment));
2037 if (restore == false)
2039 insn = emit_insn
2040 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
2041 mem2, gen_rtx_REG (DImode, regno2)));
2044 else
2046 insn = emit_insn
2047 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2048 gen_rtx_REG (DImode, regno2), mem2));
2050 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2051 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2054 /* The first part of a frame-related parallel insn
2055 is always assumed to be relevant to the frame
2056 calculations; subsequent parts, are only
2057 frame-related if explicitly marked. */
2058 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2059 1)) = 1;
2060 regno = regno2;
2061 start_offset += increment * 2;
2063 else
2065 if (restore == false)
2066 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2067 else
2069 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2070 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2072 start_offset += increment;
2074 RTX_FRAME_RELATED_P (insn) = 1;
2078 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2082 /* AArch64 stack frames generated by this compiler look like:
2084 +-------------------------------+
2086 | incoming stack arguments |
2088 +-------------------------------+ <-- arg_pointer_rtx
2090 | callee-allocated save area |
2091 | for register varargs |
2093 +-------------------------------+ <-- frame_pointer_rtx
2095 | local variables |
2097 +-------------------------------+
2098 | padding0 | \
2099 +-------------------------------+ |
2100 | | |
2101 | | |
2102 | callee-saved registers | | frame.saved_regs_size
2103 | | |
2104 +-------------------------------+ |
2105 | LR' | |
2106 +-------------------------------+ |
2107 | FP' | /
2108 P +-------------------------------+ <-- hard_frame_pointer_rtx
2109 | dynamic allocation |
2110 +-------------------------------+
2112 | outgoing stack arguments |
2114 +-------------------------------+ <-- stack_pointer_rtx
2116 Dynamic stack allocations such as alloca insert data at point P.
2117 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2118 hard_frame_pointer_rtx unchanged. */
2120 /* Generate the prologue instructions for entry into a function.
2121 Establish the stack frame by decreasing the stack pointer with a
2122 properly calculated size and, if necessary, create a frame record
2123 filled with the values of LR and previous frame pointer. The
2124 current FP is also set up if it is in use. */
2126 void
2127 aarch64_expand_prologue (void)
2129 /* sub sp, sp, #<frame_size>
2130 stp {fp, lr}, [sp, #<frame_size> - 16]
2131 add fp, sp, #<frame_size> - hardfp_offset
2132 stp {cs_reg}, [fp, #-16] etc.
2134 sub sp, sp, <final_adjustment_if_any>
2136 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2137 HOST_WIDE_INT frame_size, offset;
2138 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2139 rtx insn;
2141 aarch64_layout_frame ();
2142 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2143 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2144 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2145 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2146 + crtl->outgoing_args_size);
2147 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2148 STACK_BOUNDARY / BITS_PER_UNIT);
2150 if (flag_stack_usage_info)
2151 current_function_static_stack_size = frame_size;
2153 fp_offset = (offset
2154 - original_frame_size
2155 - cfun->machine->frame.saved_regs_size);
2157 /* Store pairs and load pairs have a range only -512 to 504. */
2158 if (offset >= 512)
2160 /* When the frame has a large size, an initial decrease is done on
2161 the stack pointer to jump over the callee-allocated save area for
2162 register varargs, the local variable area and/or the callee-saved
2163 register area. This will allow the pre-index write-back
2164 store pair instructions to be used for setting up the stack frame
2165 efficiently. */
2166 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2167 if (offset >= 512)
2168 offset = cfun->machine->frame.saved_regs_size;
2170 frame_size -= (offset + crtl->outgoing_args_size);
2171 fp_offset = 0;
2173 if (frame_size >= 0x1000000)
2175 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2176 emit_move_insn (op0, GEN_INT (-frame_size));
2177 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2178 aarch64_set_frame_expr (gen_rtx_SET
2179 (Pmode, stack_pointer_rtx,
2180 plus_constant (Pmode,
2181 stack_pointer_rtx,
2182 -frame_size)));
2184 else if (frame_size > 0)
2186 if ((frame_size & 0xfff) != frame_size)
2188 insn = emit_insn (gen_add2_insn
2189 (stack_pointer_rtx,
2190 GEN_INT (-(frame_size
2191 & ~(HOST_WIDE_INT)0xfff))));
2192 RTX_FRAME_RELATED_P (insn) = 1;
2194 if ((frame_size & 0xfff) != 0)
2196 insn = emit_insn (gen_add2_insn
2197 (stack_pointer_rtx,
2198 GEN_INT (-(frame_size
2199 & (HOST_WIDE_INT)0xfff))));
2200 RTX_FRAME_RELATED_P (insn) = 1;
2204 else
2205 frame_size = -1;
2207 if (offset > 0)
2209 /* Save the frame pointer and lr if the frame pointer is needed
2210 first. Make the frame pointer point to the location of the
2211 old frame pointer on the stack. */
2212 if (frame_pointer_needed)
2214 rtx mem_fp, mem_lr;
2216 if (fp_offset)
2218 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2219 GEN_INT (-offset)));
2220 RTX_FRAME_RELATED_P (insn) = 1;
2221 aarch64_set_frame_expr (gen_rtx_SET
2222 (Pmode, stack_pointer_rtx,
2223 gen_rtx_MINUS (Pmode,
2224 stack_pointer_rtx,
2225 GEN_INT (offset))));
2226 mem_fp = gen_frame_mem (DImode,
2227 plus_constant (Pmode,
2228 stack_pointer_rtx,
2229 fp_offset));
2230 mem_lr = gen_frame_mem (DImode,
2231 plus_constant (Pmode,
2232 stack_pointer_rtx,
2233 fp_offset
2234 + UNITS_PER_WORD));
2235 insn = emit_insn (gen_store_pairdi (mem_fp,
2236 hard_frame_pointer_rtx,
2237 mem_lr,
2238 gen_rtx_REG (DImode,
2239 LR_REGNUM)));
2241 else
2243 insn = emit_insn (gen_storewb_pairdi_di
2244 (stack_pointer_rtx, stack_pointer_rtx,
2245 hard_frame_pointer_rtx,
2246 gen_rtx_REG (DImode, LR_REGNUM),
2247 GEN_INT (-offset),
2248 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2249 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2252 /* The first part of a frame-related parallel insn is always
2253 assumed to be relevant to the frame calculations;
2254 subsequent parts, are only frame-related if explicitly
2255 marked. */
2256 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2257 RTX_FRAME_RELATED_P (insn) = 1;
2259 /* Set up frame pointer to point to the location of the
2260 previous frame pointer on the stack. */
2261 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2262 stack_pointer_rtx,
2263 GEN_INT (fp_offset)));
2264 aarch64_set_frame_expr (gen_rtx_SET
2265 (Pmode, hard_frame_pointer_rtx,
2266 plus_constant (Pmode,
2267 stack_pointer_rtx,
2268 fp_offset)));
2269 RTX_FRAME_RELATED_P (insn) = 1;
2270 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2271 hard_frame_pointer_rtx));
2273 else
2275 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2276 GEN_INT (-offset)));
2277 RTX_FRAME_RELATED_P (insn) = 1;
2280 aarch64_save_or_restore_callee_save_registers
2281 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2284 /* when offset >= 512,
2285 sub sp, sp, #<outgoing_args_size> */
2286 if (frame_size > -1)
2288 if (crtl->outgoing_args_size > 0)
2290 insn = emit_insn (gen_add2_insn
2291 (stack_pointer_rtx,
2292 GEN_INT (- crtl->outgoing_args_size)));
2293 RTX_FRAME_RELATED_P (insn) = 1;
2298 /* Generate the epilogue instructions for returning from a function. */
2299 void
2300 aarch64_expand_epilogue (bool for_sibcall)
2302 HOST_WIDE_INT original_frame_size, frame_size, offset;
2303 HOST_WIDE_INT fp_offset;
2304 rtx insn;
2305 rtx cfa_reg;
2307 aarch64_layout_frame ();
2308 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2309 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2310 + crtl->outgoing_args_size);
2311 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2312 STACK_BOUNDARY / BITS_PER_UNIT);
2314 fp_offset = (offset
2315 - original_frame_size
2316 - cfun->machine->frame.saved_regs_size);
2318 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2320 /* Store pairs and load pairs have a range only -512 to 504. */
2321 if (offset >= 512)
2323 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2324 if (offset >= 512)
2325 offset = cfun->machine->frame.saved_regs_size;
2327 frame_size -= (offset + crtl->outgoing_args_size);
2328 fp_offset = 0;
2329 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2331 insn = emit_insn (gen_add2_insn
2332 (stack_pointer_rtx,
2333 GEN_INT (crtl->outgoing_args_size)));
2334 RTX_FRAME_RELATED_P (insn) = 1;
2337 else
2338 frame_size = -1;
2340 /* If there were outgoing arguments or we've done dynamic stack
2341 allocation, then restore the stack pointer from the frame
2342 pointer. This is at most one insn and more efficient than using
2343 GCC's internal mechanism. */
2344 if (frame_pointer_needed
2345 && (crtl->outgoing_args_size || cfun->calls_alloca))
2347 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2348 hard_frame_pointer_rtx,
2349 GEN_INT (- fp_offset)));
2350 RTX_FRAME_RELATED_P (insn) = 1;
2351 /* As SP is set to (FP - fp_offset), according to the rules in
2352 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2353 from the value of SP from now on. */
2354 cfa_reg = stack_pointer_rtx;
2357 aarch64_save_or_restore_callee_save_registers
2358 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2360 /* Restore the frame pointer and lr if the frame pointer is needed. */
2361 if (offset > 0)
2363 if (frame_pointer_needed)
2365 rtx mem_fp, mem_lr;
2367 if (fp_offset)
2369 mem_fp = gen_frame_mem (DImode,
2370 plus_constant (Pmode,
2371 stack_pointer_rtx,
2372 fp_offset));
2373 mem_lr = gen_frame_mem (DImode,
2374 plus_constant (Pmode,
2375 stack_pointer_rtx,
2376 fp_offset
2377 + UNITS_PER_WORD));
2378 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2379 mem_fp,
2380 gen_rtx_REG (DImode,
2381 LR_REGNUM),
2382 mem_lr));
2384 else
2386 insn = emit_insn (gen_loadwb_pairdi_di
2387 (stack_pointer_rtx,
2388 stack_pointer_rtx,
2389 hard_frame_pointer_rtx,
2390 gen_rtx_REG (DImode, LR_REGNUM),
2391 GEN_INT (offset),
2392 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2393 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2394 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2395 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2396 plus_constant (Pmode, cfa_reg,
2397 offset))));
2400 /* The first part of a frame-related parallel insn
2401 is always assumed to be relevant to the frame
2402 calculations; subsequent parts, are only
2403 frame-related if explicitly marked. */
2404 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2405 RTX_FRAME_RELATED_P (insn) = 1;
2406 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2407 add_reg_note (insn, REG_CFA_RESTORE,
2408 gen_rtx_REG (DImode, LR_REGNUM));
2410 if (fp_offset)
2412 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2413 GEN_INT (offset)));
2414 RTX_FRAME_RELATED_P (insn) = 1;
2417 else
2419 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2420 GEN_INT (offset)));
2421 RTX_FRAME_RELATED_P (insn) = 1;
2425 /* Stack adjustment for exception handler. */
2426 if (crtl->calls_eh_return)
2428 /* We need to unwind the stack by the offset computed by
2429 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2430 based on SP. Ideally we would update the SP and define the
2431 CFA along the lines of:
2433 SP = SP + EH_RETURN_STACKADJ_RTX
2434 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2436 However the dwarf emitter only understands a constant
2437 register offset.
2439 The solution chosen here is to use the otherwise unused IP0
2440 as a temporary register to hold the current SP value. The
2441 CFA is described using IP0 then SP is modified. */
2443 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2445 insn = emit_move_insn (ip0, stack_pointer_rtx);
2446 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2447 RTX_FRAME_RELATED_P (insn) = 1;
2449 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2451 /* Ensure the assignment to IP0 does not get optimized away. */
2452 emit_use (ip0);
2455 if (frame_size > -1)
2457 if (frame_size >= 0x1000000)
2459 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2460 emit_move_insn (op0, GEN_INT (frame_size));
2461 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2462 aarch64_set_frame_expr (gen_rtx_SET
2463 (Pmode, stack_pointer_rtx,
2464 plus_constant (Pmode,
2465 stack_pointer_rtx,
2466 frame_size)));
2468 else if (frame_size > 0)
2470 if ((frame_size & 0xfff) != 0)
2472 insn = emit_insn (gen_add2_insn
2473 (stack_pointer_rtx,
2474 GEN_INT ((frame_size
2475 & (HOST_WIDE_INT) 0xfff))));
2476 RTX_FRAME_RELATED_P (insn) = 1;
2478 if ((frame_size & 0xfff) != frame_size)
2480 insn = emit_insn (gen_add2_insn
2481 (stack_pointer_rtx,
2482 GEN_INT ((frame_size
2483 & ~ (HOST_WIDE_INT) 0xfff))));
2484 RTX_FRAME_RELATED_P (insn) = 1;
2488 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2489 plus_constant (Pmode,
2490 stack_pointer_rtx,
2491 offset)));
2494 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2495 if (!for_sibcall)
2496 emit_jump_insn (ret_rtx);
2499 /* Return the place to copy the exception unwinding return address to.
2500 This will probably be a stack slot, but could (in theory be the
2501 return register). */
2503 aarch64_final_eh_return_addr (void)
2505 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2506 aarch64_layout_frame ();
2507 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2508 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2509 + crtl->outgoing_args_size);
2510 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2511 STACK_BOUNDARY / BITS_PER_UNIT);
2512 fp_offset = offset
2513 - original_frame_size
2514 - cfun->machine->frame.saved_regs_size;
2516 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2517 return gen_rtx_REG (DImode, LR_REGNUM);
2519 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2520 result in a store to save LR introduced by builtin_eh_return () being
2521 incorrectly deleted because the alias is not detected.
2522 So in the calculation of the address to copy the exception unwinding
2523 return address to, we note 2 cases.
2524 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2525 we return a SP-relative location since all the addresses are SP-relative
2526 in this case. This prevents the store from being optimized away.
2527 If the fp_offset is not 0, then the addresses will be FP-relative and
2528 therefore we return a FP-relative location. */
2530 if (frame_pointer_needed)
2532 if (fp_offset)
2533 return gen_frame_mem (DImode,
2534 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2535 else
2536 return gen_frame_mem (DImode,
2537 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2540 /* If FP is not needed, we calculate the location of LR, which would be
2541 at the top of the saved registers block. */
2543 return gen_frame_mem (DImode,
2544 plus_constant (Pmode,
2545 stack_pointer_rtx,
2546 fp_offset
2547 + cfun->machine->frame.saved_regs_size
2548 - 2 * UNITS_PER_WORD));
2551 /* Possibly output code to build up a constant in a register. For
2552 the benefit of the costs infrastructure, returns the number of
2553 instructions which would be emitted. GENERATE inhibits or
2554 enables code generation. */
2556 static int
2557 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2559 int insns = 0;
2561 if (aarch64_bitmask_imm (val, DImode))
2563 if (generate)
2564 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2565 insns = 1;
2567 else
2569 int i;
2570 int ncount = 0;
2571 int zcount = 0;
2572 HOST_WIDE_INT valp = val >> 16;
2573 HOST_WIDE_INT valm;
2574 HOST_WIDE_INT tval;
2576 for (i = 16; i < 64; i += 16)
2578 valm = (valp & 0xffff);
2580 if (valm != 0)
2581 ++ zcount;
2583 if (valm != 0xffff)
2584 ++ ncount;
2586 valp >>= 16;
2589 /* zcount contains the number of additional MOVK instructions
2590 required if the constant is built up with an initial MOVZ instruction,
2591 while ncount is the number of MOVK instructions required if starting
2592 with a MOVN instruction. Choose the sequence that yields the fewest
2593 number of instructions, preferring MOVZ instructions when they are both
2594 the same. */
2595 if (ncount < zcount)
2597 if (generate)
2598 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2599 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2600 tval = 0xffff;
2601 insns++;
2603 else
2605 if (generate)
2606 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2607 GEN_INT (val & 0xffff));
2608 tval = 0;
2609 insns++;
2612 val >>= 16;
2614 for (i = 16; i < 64; i += 16)
2616 if ((val & 0xffff) != tval)
2618 if (generate)
2619 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2620 GEN_INT (i),
2621 GEN_INT (val & 0xffff)));
2622 insns++;
2624 val >>= 16;
2627 return insns;
2630 static void
2631 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2633 HOST_WIDE_INT mdelta = delta;
2634 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2635 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2637 if (mdelta < 0)
2638 mdelta = -mdelta;
2640 if (mdelta >= 4096 * 4096)
2642 (void) aarch64_build_constant (scratchreg, delta, true);
2643 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2645 else if (mdelta > 0)
2647 if (mdelta >= 4096)
2649 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2650 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2651 if (delta < 0)
2652 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2653 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2654 else
2655 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2656 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2658 if (mdelta % 4096 != 0)
2660 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2661 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2662 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2667 /* Output code to add DELTA to the first argument, and then jump
2668 to FUNCTION. Used for C++ multiple inheritance. */
2669 static void
2670 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2671 HOST_WIDE_INT delta,
2672 HOST_WIDE_INT vcall_offset,
2673 tree function)
2675 /* The this pointer is always in x0. Note that this differs from
2676 Arm where the this pointer maybe bumped to r1 if r0 is required
2677 to return a pointer to an aggregate. On AArch64 a result value
2678 pointer will be in x8. */
2679 int this_regno = R0_REGNUM;
2680 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2682 reload_completed = 1;
2683 emit_note (NOTE_INSN_PROLOGUE_END);
2685 if (vcall_offset == 0)
2686 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2687 else
2689 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2691 this_rtx = gen_rtx_REG (Pmode, this_regno);
2692 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2693 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2695 addr = this_rtx;
2696 if (delta != 0)
2698 if (delta >= -256 && delta < 256)
2699 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2700 plus_constant (Pmode, this_rtx, delta));
2701 else
2702 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2705 if (Pmode == ptr_mode)
2706 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2707 else
2708 aarch64_emit_move (temp0,
2709 gen_rtx_ZERO_EXTEND (Pmode,
2710 gen_rtx_MEM (ptr_mode, addr)));
2712 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2713 addr = plus_constant (Pmode, temp0, vcall_offset);
2714 else
2716 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2717 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2720 if (Pmode == ptr_mode)
2721 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2722 else
2723 aarch64_emit_move (temp1,
2724 gen_rtx_SIGN_EXTEND (Pmode,
2725 gen_rtx_MEM (ptr_mode, addr)));
2727 emit_insn (gen_add2_insn (this_rtx, temp1));
2730 /* Generate a tail call to the target function. */
2731 if (!TREE_USED (function))
2733 assemble_external (function);
2734 TREE_USED (function) = 1;
2736 funexp = XEXP (DECL_RTL (function), 0);
2737 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2738 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2739 SIBLING_CALL_P (insn) = 1;
2741 insn = get_insns ();
2742 shorten_branches (insn);
2743 final_start_function (insn, file, 1);
2744 final (insn, file, 1);
2745 final_end_function ();
2747 /* Stop pretending to be a post-reload pass. */
2748 reload_completed = 0;
2751 static int
2752 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2754 if (GET_CODE (*x) == SYMBOL_REF)
2755 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2757 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2758 TLS offsets, not real symbol references. */
2759 if (GET_CODE (*x) == UNSPEC
2760 && XINT (*x, 1) == UNSPEC_TLS)
2761 return -1;
2763 return 0;
2766 static bool
2767 aarch64_tls_referenced_p (rtx x)
2769 if (!TARGET_HAVE_TLS)
2770 return false;
2772 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2776 static int
2777 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2779 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2780 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2782 if (*imm1 < *imm2)
2783 return -1;
2784 if (*imm1 > *imm2)
2785 return +1;
2786 return 0;
2790 static void
2791 aarch64_build_bitmask_table (void)
2793 unsigned HOST_WIDE_INT mask, imm;
2794 unsigned int log_e, e, s, r;
2795 unsigned int nimms = 0;
2797 for (log_e = 1; log_e <= 6; log_e++)
2799 e = 1 << log_e;
2800 if (e == 64)
2801 mask = ~(HOST_WIDE_INT) 0;
2802 else
2803 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2804 for (s = 1; s < e; s++)
2806 for (r = 0; r < e; r++)
2808 /* set s consecutive bits to 1 (s < 64) */
2809 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2810 /* rotate right by r */
2811 if (r != 0)
2812 imm = ((imm >> r) | (imm << (e - r))) & mask;
2813 /* replicate the constant depending on SIMD size */
2814 switch (log_e) {
2815 case 1: imm |= (imm << 2);
2816 case 2: imm |= (imm << 4);
2817 case 3: imm |= (imm << 8);
2818 case 4: imm |= (imm << 16);
2819 case 5: imm |= (imm << 32);
2820 case 6:
2821 break;
2822 default:
2823 gcc_unreachable ();
2825 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2826 aarch64_bitmasks[nimms++] = imm;
2831 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2832 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2833 aarch64_bitmasks_cmp);
2837 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2838 a left shift of 0 or 12 bits. */
2839 bool
2840 aarch64_uimm12_shift (HOST_WIDE_INT val)
2842 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2843 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2848 /* Return true if val is an immediate that can be loaded into a
2849 register by a MOVZ instruction. */
2850 static bool
2851 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2853 if (GET_MODE_SIZE (mode) > 4)
2855 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2856 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2857 return 1;
2859 else
2861 /* Ignore sign extension. */
2862 val &= (HOST_WIDE_INT) 0xffffffff;
2864 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2865 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2869 /* Return true if val is a valid bitmask immediate. */
2870 bool
2871 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2873 if (GET_MODE_SIZE (mode) < 8)
2875 /* Replicate bit pattern. */
2876 val &= (HOST_WIDE_INT) 0xffffffff;
2877 val |= val << 32;
2879 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2880 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2884 /* Return true if val is an immediate that can be loaded into a
2885 register in a single instruction. */
2886 bool
2887 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2889 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2890 return 1;
2891 return aarch64_bitmask_imm (val, mode);
2894 static bool
2895 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2897 rtx base, offset;
2899 if (GET_CODE (x) == HIGH)
2900 return true;
2902 split_const (x, &base, &offset);
2903 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2905 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2906 != SYMBOL_FORCE_TO_MEM)
2907 return true;
2908 else
2909 /* Avoid generating a 64-bit relocation in ILP32; leave
2910 to aarch64_expand_mov_immediate to handle it properly. */
2911 return mode != ptr_mode;
2914 return aarch64_tls_referenced_p (x);
2917 /* Return true if register REGNO is a valid index register.
2918 STRICT_P is true if REG_OK_STRICT is in effect. */
2920 bool
2921 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2923 if (!HARD_REGISTER_NUM_P (regno))
2925 if (!strict_p)
2926 return true;
2928 if (!reg_renumber)
2929 return false;
2931 regno = reg_renumber[regno];
2933 return GP_REGNUM_P (regno);
2936 /* Return true if register REGNO is a valid base register for mode MODE.
2937 STRICT_P is true if REG_OK_STRICT is in effect. */
2939 bool
2940 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2942 if (!HARD_REGISTER_NUM_P (regno))
2944 if (!strict_p)
2945 return true;
2947 if (!reg_renumber)
2948 return false;
2950 regno = reg_renumber[regno];
2953 /* The fake registers will be eliminated to either the stack or
2954 hard frame pointer, both of which are usually valid base registers.
2955 Reload deals with the cases where the eliminated form isn't valid. */
2956 return (GP_REGNUM_P (regno)
2957 || regno == SP_REGNUM
2958 || regno == FRAME_POINTER_REGNUM
2959 || regno == ARG_POINTER_REGNUM);
2962 /* Return true if X is a valid base register for mode MODE.
2963 STRICT_P is true if REG_OK_STRICT is in effect. */
2965 static bool
2966 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2968 if (!strict_p && GET_CODE (x) == SUBREG)
2969 x = SUBREG_REG (x);
2971 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2974 /* Return true if address offset is a valid index. If it is, fill in INFO
2975 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2977 static bool
2978 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2979 enum machine_mode mode, bool strict_p)
2981 enum aarch64_address_type type;
2982 rtx index;
2983 int shift;
2985 /* (reg:P) */
2986 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2987 && GET_MODE (x) == Pmode)
2989 type = ADDRESS_REG_REG;
2990 index = x;
2991 shift = 0;
2993 /* (sign_extend:DI (reg:SI)) */
2994 else if ((GET_CODE (x) == SIGN_EXTEND
2995 || GET_CODE (x) == ZERO_EXTEND)
2996 && GET_MODE (x) == DImode
2997 && GET_MODE (XEXP (x, 0)) == SImode)
2999 type = (GET_CODE (x) == SIGN_EXTEND)
3000 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3001 index = XEXP (x, 0);
3002 shift = 0;
3004 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3005 else if (GET_CODE (x) == MULT
3006 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3007 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3008 && GET_MODE (XEXP (x, 0)) == DImode
3009 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3010 && CONST_INT_P (XEXP (x, 1)))
3012 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3013 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3014 index = XEXP (XEXP (x, 0), 0);
3015 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3017 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3018 else if (GET_CODE (x) == ASHIFT
3019 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3020 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3021 && GET_MODE (XEXP (x, 0)) == DImode
3022 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3023 && CONST_INT_P (XEXP (x, 1)))
3025 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3026 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3027 index = XEXP (XEXP (x, 0), 0);
3028 shift = INTVAL (XEXP (x, 1));
3030 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3031 else if ((GET_CODE (x) == SIGN_EXTRACT
3032 || GET_CODE (x) == ZERO_EXTRACT)
3033 && GET_MODE (x) == DImode
3034 && GET_CODE (XEXP (x, 0)) == MULT
3035 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3036 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3038 type = (GET_CODE (x) == SIGN_EXTRACT)
3039 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3040 index = XEXP (XEXP (x, 0), 0);
3041 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3042 if (INTVAL (XEXP (x, 1)) != 32 + shift
3043 || INTVAL (XEXP (x, 2)) != 0)
3044 shift = -1;
3046 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3047 (const_int 0xffffffff<<shift)) */
3048 else if (GET_CODE (x) == AND
3049 && GET_MODE (x) == DImode
3050 && GET_CODE (XEXP (x, 0)) == MULT
3051 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3052 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3053 && CONST_INT_P (XEXP (x, 1)))
3055 type = ADDRESS_REG_UXTW;
3056 index = XEXP (XEXP (x, 0), 0);
3057 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3058 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3059 shift = -1;
3061 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3062 else if ((GET_CODE (x) == SIGN_EXTRACT
3063 || GET_CODE (x) == ZERO_EXTRACT)
3064 && GET_MODE (x) == DImode
3065 && GET_CODE (XEXP (x, 0)) == ASHIFT
3066 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3067 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3069 type = (GET_CODE (x) == SIGN_EXTRACT)
3070 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3071 index = XEXP (XEXP (x, 0), 0);
3072 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3073 if (INTVAL (XEXP (x, 1)) != 32 + shift
3074 || INTVAL (XEXP (x, 2)) != 0)
3075 shift = -1;
3077 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3078 (const_int 0xffffffff<<shift)) */
3079 else if (GET_CODE (x) == AND
3080 && GET_MODE (x) == DImode
3081 && GET_CODE (XEXP (x, 0)) == ASHIFT
3082 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3083 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3084 && CONST_INT_P (XEXP (x, 1)))
3086 type = ADDRESS_REG_UXTW;
3087 index = XEXP (XEXP (x, 0), 0);
3088 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3089 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3090 shift = -1;
3092 /* (mult:P (reg:P) (const_int scale)) */
3093 else if (GET_CODE (x) == MULT
3094 && GET_MODE (x) == Pmode
3095 && GET_MODE (XEXP (x, 0)) == Pmode
3096 && CONST_INT_P (XEXP (x, 1)))
3098 type = ADDRESS_REG_REG;
3099 index = XEXP (x, 0);
3100 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3102 /* (ashift:P (reg:P) (const_int shift)) */
3103 else if (GET_CODE (x) == ASHIFT
3104 && GET_MODE (x) == Pmode
3105 && GET_MODE (XEXP (x, 0)) == Pmode
3106 && CONST_INT_P (XEXP (x, 1)))
3108 type = ADDRESS_REG_REG;
3109 index = XEXP (x, 0);
3110 shift = INTVAL (XEXP (x, 1));
3112 else
3113 return false;
3115 if (GET_CODE (index) == SUBREG)
3116 index = SUBREG_REG (index);
3118 if ((shift == 0 ||
3119 (shift > 0 && shift <= 3
3120 && (1 << shift) == GET_MODE_SIZE (mode)))
3121 && REG_P (index)
3122 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3124 info->type = type;
3125 info->offset = index;
3126 info->shift = shift;
3127 return true;
3130 return false;
3133 static inline bool
3134 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3136 return (offset >= -64 * GET_MODE_SIZE (mode)
3137 && offset < 64 * GET_MODE_SIZE (mode)
3138 && offset % GET_MODE_SIZE (mode) == 0);
3141 static inline bool
3142 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3143 HOST_WIDE_INT offset)
3145 return offset >= -256 && offset < 256;
3148 static inline bool
3149 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3151 return (offset >= 0
3152 && offset < 4096 * GET_MODE_SIZE (mode)
3153 && offset % GET_MODE_SIZE (mode) == 0);
3156 /* Return true if X is a valid address for machine mode MODE. If it is,
3157 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3158 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3160 static bool
3161 aarch64_classify_address (struct aarch64_address_info *info,
3162 rtx x, enum machine_mode mode,
3163 RTX_CODE outer_code, bool strict_p)
3165 enum rtx_code code = GET_CODE (x);
3166 rtx op0, op1;
3167 bool allow_reg_index_p =
3168 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3170 /* Don't support anything other than POST_INC or REG addressing for
3171 AdvSIMD. */
3172 if (aarch64_vector_mode_p (mode)
3173 && (code != POST_INC && code != REG))
3174 return false;
3176 switch (code)
3178 case REG:
3179 case SUBREG:
3180 info->type = ADDRESS_REG_IMM;
3181 info->base = x;
3182 info->offset = const0_rtx;
3183 return aarch64_base_register_rtx_p (x, strict_p);
3185 case PLUS:
3186 op0 = XEXP (x, 0);
3187 op1 = XEXP (x, 1);
3188 if (GET_MODE_SIZE (mode) != 0
3189 && CONST_INT_P (op1)
3190 && aarch64_base_register_rtx_p (op0, strict_p))
3192 HOST_WIDE_INT offset = INTVAL (op1);
3194 info->type = ADDRESS_REG_IMM;
3195 info->base = op0;
3196 info->offset = op1;
3198 /* TImode and TFmode values are allowed in both pairs of X
3199 registers and individual Q registers. The available
3200 address modes are:
3201 X,X: 7-bit signed scaled offset
3202 Q: 9-bit signed offset
3203 We conservatively require an offset representable in either mode.
3205 if (mode == TImode || mode == TFmode)
3206 return (offset_7bit_signed_scaled_p (mode, offset)
3207 && offset_9bit_signed_unscaled_p (mode, offset));
3209 if (outer_code == PARALLEL)
3210 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3211 && offset_7bit_signed_scaled_p (mode, offset));
3212 else
3213 return (offset_9bit_signed_unscaled_p (mode, offset)
3214 || offset_12bit_unsigned_scaled_p (mode, offset));
3217 if (allow_reg_index_p)
3219 /* Look for base + (scaled/extended) index register. */
3220 if (aarch64_base_register_rtx_p (op0, strict_p)
3221 && aarch64_classify_index (info, op1, mode, strict_p))
3223 info->base = op0;
3224 return true;
3226 if (aarch64_base_register_rtx_p (op1, strict_p)
3227 && aarch64_classify_index (info, op0, mode, strict_p))
3229 info->base = op1;
3230 return true;
3234 return false;
3236 case POST_INC:
3237 case POST_DEC:
3238 case PRE_INC:
3239 case PRE_DEC:
3240 info->type = ADDRESS_REG_WB;
3241 info->base = XEXP (x, 0);
3242 info->offset = NULL_RTX;
3243 return aarch64_base_register_rtx_p (info->base, strict_p);
3245 case POST_MODIFY:
3246 case PRE_MODIFY:
3247 info->type = ADDRESS_REG_WB;
3248 info->base = XEXP (x, 0);
3249 if (GET_CODE (XEXP (x, 1)) == PLUS
3250 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3251 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3252 && aarch64_base_register_rtx_p (info->base, strict_p))
3254 HOST_WIDE_INT offset;
3255 info->offset = XEXP (XEXP (x, 1), 1);
3256 offset = INTVAL (info->offset);
3258 /* TImode and TFmode values are allowed in both pairs of X
3259 registers and individual Q registers. The available
3260 address modes are:
3261 X,X: 7-bit signed scaled offset
3262 Q: 9-bit signed offset
3263 We conservatively require an offset representable in either mode.
3265 if (mode == TImode || mode == TFmode)
3266 return (offset_7bit_signed_scaled_p (mode, offset)
3267 && offset_9bit_signed_unscaled_p (mode, offset));
3269 if (outer_code == PARALLEL)
3270 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3271 && offset_7bit_signed_scaled_p (mode, offset));
3272 else
3273 return offset_9bit_signed_unscaled_p (mode, offset);
3275 return false;
3277 case CONST:
3278 case SYMBOL_REF:
3279 case LABEL_REF:
3280 /* load literal: pc-relative constant pool entry. Only supported
3281 for SI mode or larger. */
3282 info->type = ADDRESS_SYMBOLIC;
3283 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3285 rtx sym, addend;
3287 split_const (x, &sym, &addend);
3288 return (GET_CODE (sym) == LABEL_REF
3289 || (GET_CODE (sym) == SYMBOL_REF
3290 && CONSTANT_POOL_ADDRESS_P (sym)));
3292 return false;
3294 case LO_SUM:
3295 info->type = ADDRESS_LO_SUM;
3296 info->base = XEXP (x, 0);
3297 info->offset = XEXP (x, 1);
3298 if (allow_reg_index_p
3299 && aarch64_base_register_rtx_p (info->base, strict_p))
3301 rtx sym, offs;
3302 split_const (info->offset, &sym, &offs);
3303 if (GET_CODE (sym) == SYMBOL_REF
3304 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3305 == SYMBOL_SMALL_ABSOLUTE))
3307 /* The symbol and offset must be aligned to the access size. */
3308 unsigned int align;
3309 unsigned int ref_size;
3311 if (CONSTANT_POOL_ADDRESS_P (sym))
3312 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3313 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3315 tree exp = SYMBOL_REF_DECL (sym);
3316 align = TYPE_ALIGN (TREE_TYPE (exp));
3317 align = CONSTANT_ALIGNMENT (exp, align);
3319 else if (SYMBOL_REF_DECL (sym))
3320 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3321 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3322 && SYMBOL_REF_BLOCK (sym) != NULL)
3323 align = SYMBOL_REF_BLOCK (sym)->alignment;
3324 else
3325 align = BITS_PER_UNIT;
3327 ref_size = GET_MODE_SIZE (mode);
3328 if (ref_size == 0)
3329 ref_size = GET_MODE_SIZE (DImode);
3331 return ((INTVAL (offs) & (ref_size - 1)) == 0
3332 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3335 return false;
3337 default:
3338 return false;
3342 bool
3343 aarch64_symbolic_address_p (rtx x)
3345 rtx offset;
3347 split_const (x, &x, &offset);
3348 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3351 /* Classify the base of symbolic expression X, given that X appears in
3352 context CONTEXT. */
3354 enum aarch64_symbol_type
3355 aarch64_classify_symbolic_expression (rtx x,
3356 enum aarch64_symbol_context context)
3358 rtx offset;
3360 split_const (x, &x, &offset);
3361 return aarch64_classify_symbol (x, context);
3365 /* Return TRUE if X is a legitimate address for accessing memory in
3366 mode MODE. */
3367 static bool
3368 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3370 struct aarch64_address_info addr;
3372 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3375 /* Return TRUE if X is a legitimate address for accessing memory in
3376 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3377 pair operation. */
3378 bool
3379 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3380 RTX_CODE outer_code, bool strict_p)
3382 struct aarch64_address_info addr;
3384 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3387 /* Return TRUE if rtx X is immediate constant 0.0 */
3388 bool
3389 aarch64_float_const_zero_rtx_p (rtx x)
3391 REAL_VALUE_TYPE r;
3393 if (GET_MODE (x) == VOIDmode)
3394 return false;
3396 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3397 if (REAL_VALUE_MINUS_ZERO (r))
3398 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3399 return REAL_VALUES_EQUAL (r, dconst0);
3402 /* Return the fixed registers used for condition codes. */
3404 static bool
3405 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3407 *p1 = CC_REGNUM;
3408 *p2 = INVALID_REGNUM;
3409 return true;
3412 enum machine_mode
3413 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3415 /* All floating point compares return CCFP if it is an equality
3416 comparison, and CCFPE otherwise. */
3417 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3419 switch (code)
3421 case EQ:
3422 case NE:
3423 case UNORDERED:
3424 case ORDERED:
3425 case UNLT:
3426 case UNLE:
3427 case UNGT:
3428 case UNGE:
3429 case UNEQ:
3430 case LTGT:
3431 return CCFPmode;
3433 case LT:
3434 case LE:
3435 case GT:
3436 case GE:
3437 return CCFPEmode;
3439 default:
3440 gcc_unreachable ();
3444 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3445 && y == const0_rtx
3446 && (code == EQ || code == NE || code == LT || code == GE)
3447 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3448 || GET_CODE (x) == NEG))
3449 return CC_NZmode;
3451 /* A compare with a shifted operand. Because of canonicalization,
3452 the comparison will have to be swapped when we emit the assembly
3453 code. */
3454 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3455 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3456 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3457 || GET_CODE (x) == LSHIFTRT
3458 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3459 return CC_SWPmode;
3461 /* Similarly for a negated operand, but we can only do this for
3462 equalities. */
3463 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3464 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3465 && (code == EQ || code == NE)
3466 && GET_CODE (x) == NEG)
3467 return CC_Zmode;
3469 /* A compare of a mode narrower than SI mode against zero can be done
3470 by extending the value in the comparison. */
3471 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3472 && y == const0_rtx)
3473 /* Only use sign-extension if we really need it. */
3474 return ((code == GT || code == GE || code == LE || code == LT)
3475 ? CC_SESWPmode : CC_ZESWPmode);
3477 /* For everything else, return CCmode. */
3478 return CCmode;
3481 static unsigned
3482 aarch64_get_condition_code (rtx x)
3484 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3485 enum rtx_code comp_code = GET_CODE (x);
3487 if (GET_MODE_CLASS (mode) != MODE_CC)
3488 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3490 switch (mode)
3492 case CCFPmode:
3493 case CCFPEmode:
3494 switch (comp_code)
3496 case GE: return AARCH64_GE;
3497 case GT: return AARCH64_GT;
3498 case LE: return AARCH64_LS;
3499 case LT: return AARCH64_MI;
3500 case NE: return AARCH64_NE;
3501 case EQ: return AARCH64_EQ;
3502 case ORDERED: return AARCH64_VC;
3503 case UNORDERED: return AARCH64_VS;
3504 case UNLT: return AARCH64_LT;
3505 case UNLE: return AARCH64_LE;
3506 case UNGT: return AARCH64_HI;
3507 case UNGE: return AARCH64_PL;
3508 default: gcc_unreachable ();
3510 break;
3512 case CCmode:
3513 switch (comp_code)
3515 case NE: return AARCH64_NE;
3516 case EQ: return AARCH64_EQ;
3517 case GE: return AARCH64_GE;
3518 case GT: return AARCH64_GT;
3519 case LE: return AARCH64_LE;
3520 case LT: return AARCH64_LT;
3521 case GEU: return AARCH64_CS;
3522 case GTU: return AARCH64_HI;
3523 case LEU: return AARCH64_LS;
3524 case LTU: return AARCH64_CC;
3525 default: gcc_unreachable ();
3527 break;
3529 case CC_SWPmode:
3530 case CC_ZESWPmode:
3531 case CC_SESWPmode:
3532 switch (comp_code)
3534 case NE: return AARCH64_NE;
3535 case EQ: return AARCH64_EQ;
3536 case GE: return AARCH64_LE;
3537 case GT: return AARCH64_LT;
3538 case LE: return AARCH64_GE;
3539 case LT: return AARCH64_GT;
3540 case GEU: return AARCH64_LS;
3541 case GTU: return AARCH64_CC;
3542 case LEU: return AARCH64_CS;
3543 case LTU: return AARCH64_HI;
3544 default: gcc_unreachable ();
3546 break;
3548 case CC_NZmode:
3549 switch (comp_code)
3551 case NE: return AARCH64_NE;
3552 case EQ: return AARCH64_EQ;
3553 case GE: return AARCH64_PL;
3554 case LT: return AARCH64_MI;
3555 default: gcc_unreachable ();
3557 break;
3559 case CC_Zmode:
3560 switch (comp_code)
3562 case NE: return AARCH64_NE;
3563 case EQ: return AARCH64_EQ;
3564 default: gcc_unreachable ();
3566 break;
3568 default:
3569 gcc_unreachable ();
3570 break;
3574 static unsigned
3575 bit_count (unsigned HOST_WIDE_INT value)
3577 unsigned count = 0;
3579 while (value)
3581 count++;
3582 value &= value - 1;
3585 return count;
3588 void
3589 aarch64_print_operand (FILE *f, rtx x, char code)
3591 switch (code)
3593 /* An integer or symbol address without a preceding # sign. */
3594 case 'c':
3595 switch (GET_CODE (x))
3597 case CONST_INT:
3598 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3599 break;
3601 case SYMBOL_REF:
3602 output_addr_const (f, x);
3603 break;
3605 case CONST:
3606 if (GET_CODE (XEXP (x, 0)) == PLUS
3607 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3609 output_addr_const (f, x);
3610 break;
3612 /* Fall through. */
3614 default:
3615 output_operand_lossage ("Unsupported operand for code '%c'", code);
3617 break;
3619 case 'e':
3620 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3622 int n;
3624 if (GET_CODE (x) != CONST_INT
3625 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3627 output_operand_lossage ("invalid operand for '%%%c'", code);
3628 return;
3631 switch (n)
3633 case 3:
3634 fputc ('b', f);
3635 break;
3636 case 4:
3637 fputc ('h', f);
3638 break;
3639 case 5:
3640 fputc ('w', f);
3641 break;
3642 default:
3643 output_operand_lossage ("invalid operand for '%%%c'", code);
3644 return;
3647 break;
3649 case 'p':
3651 int n;
3653 /* Print N such that 2^N == X. */
3654 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3656 output_operand_lossage ("invalid operand for '%%%c'", code);
3657 return;
3660 asm_fprintf (f, "%d", n);
3662 break;
3664 case 'P':
3665 /* Print the number of non-zero bits in X (a const_int). */
3666 if (GET_CODE (x) != CONST_INT)
3668 output_operand_lossage ("invalid operand for '%%%c'", code);
3669 return;
3672 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3673 break;
3675 case 'H':
3676 /* Print the higher numbered register of a pair (TImode) of regs. */
3677 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3679 output_operand_lossage ("invalid operand for '%%%c'", code);
3680 return;
3683 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3684 break;
3686 case 'm':
3687 /* Print a condition (eq, ne, etc). */
3689 /* CONST_TRUE_RTX means always -- that's the default. */
3690 if (x == const_true_rtx)
3691 return;
3693 if (!COMPARISON_P (x))
3695 output_operand_lossage ("invalid operand for '%%%c'", code);
3696 return;
3699 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3700 break;
3702 case 'M':
3703 /* Print the inverse of a condition (eq <-> ne, etc). */
3705 /* CONST_TRUE_RTX means never -- that's the default. */
3706 if (x == const_true_rtx)
3708 fputs ("nv", f);
3709 return;
3712 if (!COMPARISON_P (x))
3714 output_operand_lossage ("invalid operand for '%%%c'", code);
3715 return;
3718 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3719 (aarch64_get_condition_code (x))], f);
3720 break;
3722 case 'b':
3723 case 'h':
3724 case 's':
3725 case 'd':
3726 case 'q':
3727 /* Print a scalar FP/SIMD register name. */
3728 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3730 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3731 return;
3733 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3734 break;
3736 case 'S':
3737 case 'T':
3738 case 'U':
3739 case 'V':
3740 /* Print the first FP/SIMD register name in a list. */
3741 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3743 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3744 return;
3746 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3747 break;
3749 case 'X':
3750 /* Print bottom 16 bits of integer constant in hex. */
3751 if (GET_CODE (x) != CONST_INT)
3753 output_operand_lossage ("invalid operand for '%%%c'", code);
3754 return;
3756 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3757 break;
3759 case 'w':
3760 case 'x':
3761 /* Print a general register name or the zero register (32-bit or
3762 64-bit). */
3763 if (x == const0_rtx
3764 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3766 asm_fprintf (f, "%czr", code);
3767 break;
3770 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3772 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3773 break;
3776 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3778 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3779 break;
3782 /* Fall through */
3784 case 0:
3785 /* Print a normal operand, if it's a general register, then we
3786 assume DImode. */
3787 if (x == NULL)
3789 output_operand_lossage ("missing operand");
3790 return;
3793 switch (GET_CODE (x))
3795 case REG:
3796 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3797 break;
3799 case MEM:
3800 aarch64_memory_reference_mode = GET_MODE (x);
3801 output_address (XEXP (x, 0));
3802 break;
3804 case LABEL_REF:
3805 case SYMBOL_REF:
3806 output_addr_const (asm_out_file, x);
3807 break;
3809 case CONST_INT:
3810 asm_fprintf (f, "%wd", INTVAL (x));
3811 break;
3813 case CONST_VECTOR:
3814 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3816 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3817 HOST_WIDE_INT_MIN,
3818 HOST_WIDE_INT_MAX));
3819 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3821 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3823 fputc ('0', f);
3825 else
3826 gcc_unreachable ();
3827 break;
3829 case CONST_DOUBLE:
3830 /* CONST_DOUBLE can represent a double-width integer.
3831 In this case, the mode of x is VOIDmode. */
3832 if (GET_MODE (x) == VOIDmode)
3833 ; /* Do Nothing. */
3834 else if (aarch64_float_const_zero_rtx_p (x))
3836 fputc ('0', f);
3837 break;
3839 else if (aarch64_float_const_representable_p (x))
3841 #define buf_size 20
3842 char float_buf[buf_size] = {'\0'};
3843 REAL_VALUE_TYPE r;
3844 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3845 real_to_decimal_for_mode (float_buf, &r,
3846 buf_size, buf_size,
3847 1, GET_MODE (x));
3848 asm_fprintf (asm_out_file, "%s", float_buf);
3849 break;
3850 #undef buf_size
3852 output_operand_lossage ("invalid constant");
3853 return;
3854 default:
3855 output_operand_lossage ("invalid operand");
3856 return;
3858 break;
3860 case 'A':
3861 if (GET_CODE (x) == HIGH)
3862 x = XEXP (x, 0);
3864 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3866 case SYMBOL_SMALL_GOT:
3867 asm_fprintf (asm_out_file, ":got:");
3868 break;
3870 case SYMBOL_SMALL_TLSGD:
3871 asm_fprintf (asm_out_file, ":tlsgd:");
3872 break;
3874 case SYMBOL_SMALL_TLSDESC:
3875 asm_fprintf (asm_out_file, ":tlsdesc:");
3876 break;
3878 case SYMBOL_SMALL_GOTTPREL:
3879 asm_fprintf (asm_out_file, ":gottprel:");
3880 break;
3882 case SYMBOL_SMALL_TPREL:
3883 asm_fprintf (asm_out_file, ":tprel:");
3884 break;
3886 case SYMBOL_TINY_GOT:
3887 gcc_unreachable ();
3888 break;
3890 default:
3891 break;
3893 output_addr_const (asm_out_file, x);
3894 break;
3896 case 'L':
3897 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3899 case SYMBOL_SMALL_GOT:
3900 asm_fprintf (asm_out_file, ":lo12:");
3901 break;
3903 case SYMBOL_SMALL_TLSGD:
3904 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3905 break;
3907 case SYMBOL_SMALL_TLSDESC:
3908 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3909 break;
3911 case SYMBOL_SMALL_GOTTPREL:
3912 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3913 break;
3915 case SYMBOL_SMALL_TPREL:
3916 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3917 break;
3919 case SYMBOL_TINY_GOT:
3920 asm_fprintf (asm_out_file, ":got:");
3921 break;
3923 default:
3924 break;
3926 output_addr_const (asm_out_file, x);
3927 break;
3929 case 'G':
3931 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3933 case SYMBOL_SMALL_TPREL:
3934 asm_fprintf (asm_out_file, ":tprel_hi12:");
3935 break;
3936 default:
3937 break;
3939 output_addr_const (asm_out_file, x);
3940 break;
3942 default:
3943 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3944 return;
3948 void
3949 aarch64_print_operand_address (FILE *f, rtx x)
3951 struct aarch64_address_info addr;
3953 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3954 MEM, true))
3955 switch (addr.type)
3957 case ADDRESS_REG_IMM:
3958 if (addr.offset == const0_rtx)
3959 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3960 else
3961 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
3962 INTVAL (addr.offset));
3963 return;
3965 case ADDRESS_REG_REG:
3966 if (addr.shift == 0)
3967 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
3968 reg_names [REGNO (addr.offset)]);
3969 else
3970 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
3971 reg_names [REGNO (addr.offset)], addr.shift);
3972 return;
3974 case ADDRESS_REG_UXTW:
3975 if (addr.shift == 0)
3976 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
3977 REGNO (addr.offset) - R0_REGNUM);
3978 else
3979 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
3980 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3981 return;
3983 case ADDRESS_REG_SXTW:
3984 if (addr.shift == 0)
3985 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
3986 REGNO (addr.offset) - R0_REGNUM);
3987 else
3988 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
3989 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3990 return;
3992 case ADDRESS_REG_WB:
3993 switch (GET_CODE (x))
3995 case PRE_INC:
3996 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3997 GET_MODE_SIZE (aarch64_memory_reference_mode));
3998 return;
3999 case POST_INC:
4000 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4001 GET_MODE_SIZE (aarch64_memory_reference_mode));
4002 return;
4003 case PRE_DEC:
4004 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4005 GET_MODE_SIZE (aarch64_memory_reference_mode));
4006 return;
4007 case POST_DEC:
4008 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4009 GET_MODE_SIZE (aarch64_memory_reference_mode));
4010 return;
4011 case PRE_MODIFY:
4012 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4013 INTVAL (addr.offset));
4014 return;
4015 case POST_MODIFY:
4016 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4017 INTVAL (addr.offset));
4018 return;
4019 default:
4020 break;
4022 break;
4024 case ADDRESS_LO_SUM:
4025 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4026 output_addr_const (f, addr.offset);
4027 asm_fprintf (f, "]");
4028 return;
4030 case ADDRESS_SYMBOLIC:
4031 break;
4034 output_addr_const (f, x);
4037 bool
4038 aarch64_label_mentioned_p (rtx x)
4040 const char *fmt;
4041 int i;
4043 if (GET_CODE (x) == LABEL_REF)
4044 return true;
4046 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4047 referencing instruction, but they are constant offsets, not
4048 symbols. */
4049 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4050 return false;
4052 fmt = GET_RTX_FORMAT (GET_CODE (x));
4053 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4055 if (fmt[i] == 'E')
4057 int j;
4059 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4060 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4061 return 1;
4063 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4064 return 1;
4067 return 0;
4070 /* Implement REGNO_REG_CLASS. */
4072 enum reg_class
4073 aarch64_regno_regclass (unsigned regno)
4075 if (GP_REGNUM_P (regno))
4076 return CORE_REGS;
4078 if (regno == SP_REGNUM)
4079 return STACK_REG;
4081 if (regno == FRAME_POINTER_REGNUM
4082 || regno == ARG_POINTER_REGNUM)
4083 return POINTER_REGS;
4085 if (FP_REGNUM_P (regno))
4086 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4088 return NO_REGS;
4091 /* Try a machine-dependent way of reloading an illegitimate address
4092 operand. If we find one, push the reload and return the new rtx. */
4095 aarch64_legitimize_reload_address (rtx *x_p,
4096 enum machine_mode mode,
4097 int opnum, int type,
4098 int ind_levels ATTRIBUTE_UNUSED)
4100 rtx x = *x_p;
4102 /* Do not allow mem (plus (reg, const)) if vector mode. */
4103 if (aarch64_vector_mode_p (mode)
4104 && GET_CODE (x) == PLUS
4105 && REG_P (XEXP (x, 0))
4106 && CONST_INT_P (XEXP (x, 1)))
4108 rtx orig_rtx = x;
4109 x = copy_rtx (x);
4110 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4111 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4112 opnum, (enum reload_type) type);
4113 return x;
4116 /* We must recognize output that we have already generated ourselves. */
4117 if (GET_CODE (x) == PLUS
4118 && GET_CODE (XEXP (x, 0)) == PLUS
4119 && REG_P (XEXP (XEXP (x, 0), 0))
4120 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4121 && CONST_INT_P (XEXP (x, 1)))
4123 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4124 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4125 opnum, (enum reload_type) type);
4126 return x;
4129 /* We wish to handle large displacements off a base register by splitting
4130 the addend across an add and the mem insn. This can cut the number of
4131 extra insns needed from 3 to 1. It is only useful for load/store of a
4132 single register with 12 bit offset field. */
4133 if (GET_CODE (x) == PLUS
4134 && REG_P (XEXP (x, 0))
4135 && CONST_INT_P (XEXP (x, 1))
4136 && HARD_REGISTER_P (XEXP (x, 0))
4137 && mode != TImode
4138 && mode != TFmode
4139 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4141 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4142 HOST_WIDE_INT low = val & 0xfff;
4143 HOST_WIDE_INT high = val - low;
4144 HOST_WIDE_INT offs;
4145 rtx cst;
4146 enum machine_mode xmode = GET_MODE (x);
4148 /* In ILP32, xmode can be either DImode or SImode. */
4149 gcc_assert (xmode == DImode || xmode == SImode);
4151 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4152 BLKmode alignment. */
4153 if (GET_MODE_SIZE (mode) == 0)
4154 return NULL_RTX;
4156 offs = low % GET_MODE_SIZE (mode);
4158 /* Align misaligned offset by adjusting high part to compensate. */
4159 if (offs != 0)
4161 if (aarch64_uimm12_shift (high + offs))
4163 /* Align down. */
4164 low = low - offs;
4165 high = high + offs;
4167 else
4169 /* Align up. */
4170 offs = GET_MODE_SIZE (mode) - offs;
4171 low = low + offs;
4172 high = high + (low & 0x1000) - offs;
4173 low &= 0xfff;
4177 /* Check for overflow. */
4178 if (high + low != val)
4179 return NULL_RTX;
4181 cst = GEN_INT (high);
4182 if (!aarch64_uimm12_shift (high))
4183 cst = force_const_mem (xmode, cst);
4185 /* Reload high part into base reg, leaving the low part
4186 in the mem instruction.
4187 Note that replacing this gen_rtx_PLUS with plus_constant is
4188 wrong in this case because we rely on the
4189 (plus (plus reg c1) c2) structure being preserved so that
4190 XEXP (*p, 0) in push_reload below uses the correct term. */
4191 x = gen_rtx_PLUS (xmode,
4192 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4193 GEN_INT (low));
4195 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4196 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4197 opnum, (enum reload_type) type);
4198 return x;
4201 return NULL_RTX;
4205 static reg_class_t
4206 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4207 reg_class_t rclass,
4208 enum machine_mode mode,
4209 secondary_reload_info *sri)
4211 /* Without the TARGET_SIMD instructions we cannot move a Q register
4212 to a Q register directly. We need a scratch. */
4213 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4214 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4215 && reg_class_subset_p (rclass, FP_REGS))
4217 if (mode == TFmode)
4218 sri->icode = CODE_FOR_aarch64_reload_movtf;
4219 else if (mode == TImode)
4220 sri->icode = CODE_FOR_aarch64_reload_movti;
4221 return NO_REGS;
4224 /* A TFmode or TImode memory access should be handled via an FP_REGS
4225 because AArch64 has richer addressing modes for LDR/STR instructions
4226 than LDP/STP instructions. */
4227 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4228 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4229 return FP_REGS;
4231 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4232 return CORE_REGS;
4234 return NO_REGS;
4237 static bool
4238 aarch64_can_eliminate (const int from, const int to)
4240 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4241 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4243 if (frame_pointer_needed)
4245 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4246 return true;
4247 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4248 return false;
4249 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4250 && !cfun->calls_alloca)
4251 return true;
4252 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4253 return true;
4255 return false;
4258 return true;
4261 HOST_WIDE_INT
4262 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4264 HOST_WIDE_INT frame_size;
4265 HOST_WIDE_INT offset;
4267 aarch64_layout_frame ();
4268 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4269 + crtl->outgoing_args_size
4270 + cfun->machine->saved_varargs_size);
4272 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4273 offset = frame_size;
4275 if (to == HARD_FRAME_POINTER_REGNUM)
4277 if (from == ARG_POINTER_REGNUM)
4278 return offset - crtl->outgoing_args_size;
4280 if (from == FRAME_POINTER_REGNUM)
4281 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4284 if (to == STACK_POINTER_REGNUM)
4286 if (from == FRAME_POINTER_REGNUM)
4288 HOST_WIDE_INT elim = crtl->outgoing_args_size
4289 + cfun->machine->frame.saved_regs_size
4290 + get_frame_size ();
4291 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4292 return elim;
4296 return offset;
4300 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4301 previous frame. */
4304 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4306 if (count != 0)
4307 return const0_rtx;
4308 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4312 static void
4313 aarch64_asm_trampoline_template (FILE *f)
4315 if (TARGET_ILP32)
4317 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4318 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4320 else
4322 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4323 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4325 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4326 assemble_aligned_integer (4, const0_rtx);
4327 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4328 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4331 static void
4332 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4334 rtx fnaddr, mem, a_tramp;
4335 const int tramp_code_sz = 16;
4337 /* Don't need to copy the trailing D-words, we fill those in below. */
4338 emit_block_move (m_tramp, assemble_trampoline_template (),
4339 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4340 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4341 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4342 if (GET_MODE (fnaddr) != ptr_mode)
4343 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4344 emit_move_insn (mem, fnaddr);
4346 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4347 emit_move_insn (mem, chain_value);
4349 /* XXX We should really define a "clear_cache" pattern and use
4350 gen_clear_cache(). */
4351 a_tramp = XEXP (m_tramp, 0);
4352 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4353 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4354 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4355 ptr_mode);
4358 static unsigned char
4359 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4361 switch (regclass)
4363 case CORE_REGS:
4364 case POINTER_REGS:
4365 case GENERAL_REGS:
4366 case ALL_REGS:
4367 case FP_REGS:
4368 case FP_LO_REGS:
4369 return
4370 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4371 (GET_MODE_SIZE (mode) + 7) / 8;
4372 case STACK_REG:
4373 return 1;
4375 case NO_REGS:
4376 return 0;
4378 default:
4379 break;
4381 gcc_unreachable ();
4384 static reg_class_t
4385 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4387 if (regclass == POINTER_REGS)
4388 return GENERAL_REGS;
4390 if (regclass == STACK_REG)
4392 if (REG_P(x)
4393 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4394 return regclass;
4396 return NO_REGS;
4399 /* If it's an integer immediate that MOVI can't handle, then
4400 FP_REGS is not an option, so we return NO_REGS instead. */
4401 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4402 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4403 return NO_REGS;
4405 /* Register eliminiation can result in a request for
4406 SP+constant->FP_REGS. We cannot support such operations which
4407 use SP as source and an FP_REG as destination, so reject out
4408 right now. */
4409 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4411 rtx lhs = XEXP (x, 0);
4413 /* Look through a possible SUBREG introduced by ILP32. */
4414 if (GET_CODE (lhs) == SUBREG)
4415 lhs = SUBREG_REG (lhs);
4417 gcc_assert (REG_P (lhs));
4418 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4419 POINTER_REGS));
4420 return NO_REGS;
4423 return regclass;
4426 void
4427 aarch64_asm_output_labelref (FILE* f, const char *name)
4429 asm_fprintf (f, "%U%s", name);
4432 static void
4433 aarch64_elf_asm_constructor (rtx symbol, int priority)
4435 if (priority == DEFAULT_INIT_PRIORITY)
4436 default_ctor_section_asm_out_constructor (symbol, priority);
4437 else
4439 section *s;
4440 char buf[18];
4441 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4442 s = get_section (buf, SECTION_WRITE, NULL);
4443 switch_to_section (s);
4444 assemble_align (POINTER_SIZE);
4445 assemble_aligned_integer (POINTER_BYTES, symbol);
4449 static void
4450 aarch64_elf_asm_destructor (rtx symbol, int priority)
4452 if (priority == DEFAULT_INIT_PRIORITY)
4453 default_dtor_section_asm_out_destructor (symbol, priority);
4454 else
4456 section *s;
4457 char buf[18];
4458 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4459 s = get_section (buf, SECTION_WRITE, NULL);
4460 switch_to_section (s);
4461 assemble_align (POINTER_SIZE);
4462 assemble_aligned_integer (POINTER_BYTES, symbol);
4466 const char*
4467 aarch64_output_casesi (rtx *operands)
4469 char buf[100];
4470 char label[100];
4471 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4472 int index;
4473 static const char *const patterns[4][2] =
4476 "ldrb\t%w3, [%0,%w1,uxtw]",
4477 "add\t%3, %4, %w3, sxtb #2"
4480 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4481 "add\t%3, %4, %w3, sxth #2"
4484 "ldr\t%w3, [%0,%w1,uxtw #2]",
4485 "add\t%3, %4, %w3, sxtw #2"
4487 /* We assume that DImode is only generated when not optimizing and
4488 that we don't really need 64-bit address offsets. That would
4489 imply an object file with 8GB of code in a single function! */
4491 "ldr\t%w3, [%0,%w1,uxtw #2]",
4492 "add\t%3, %4, %w3, sxtw #2"
4496 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4498 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4500 gcc_assert (index >= 0 && index <= 3);
4502 /* Need to implement table size reduction, by chaning the code below. */
4503 output_asm_insn (patterns[index][0], operands);
4504 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4505 snprintf (buf, sizeof (buf),
4506 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4507 output_asm_insn (buf, operands);
4508 output_asm_insn (patterns[index][1], operands);
4509 output_asm_insn ("br\t%3", operands);
4510 assemble_label (asm_out_file, label);
4511 return "";
4515 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4516 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4517 operator. */
4520 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4522 if (shift >= 0 && shift <= 3)
4524 int size;
4525 for (size = 8; size <= 32; size *= 2)
4527 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4528 if (mask == bits << shift)
4529 return size;
4532 return 0;
4535 static bool
4536 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4537 const_rtx x ATTRIBUTE_UNUSED)
4539 /* We can't use blocks for constants when we're using a per-function
4540 constant pool. */
4541 return false;
4544 static section *
4545 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4546 rtx x ATTRIBUTE_UNUSED,
4547 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4549 /* Force all constant pool entries into the current function section. */
4550 return function_section (current_function_decl);
4554 /* Costs. */
4556 /* Helper function for rtx cost calculation. Strip a shift expression
4557 from X. Returns the inner operand if successful, or the original
4558 expression on failure. */
4559 static rtx
4560 aarch64_strip_shift (rtx x)
4562 rtx op = x;
4564 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4565 we can convert both to ROR during final output. */
4566 if ((GET_CODE (op) == ASHIFT
4567 || GET_CODE (op) == ASHIFTRT
4568 || GET_CODE (op) == LSHIFTRT
4569 || GET_CODE (op) == ROTATERT
4570 || GET_CODE (op) == ROTATE)
4571 && CONST_INT_P (XEXP (op, 1)))
4572 return XEXP (op, 0);
4574 if (GET_CODE (op) == MULT
4575 && CONST_INT_P (XEXP (op, 1))
4576 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4577 return XEXP (op, 0);
4579 return x;
4582 /* Helper function for rtx cost calculation. Strip an extend
4583 expression from X. Returns the inner operand if successful, or the
4584 original expression on failure. We deal with a number of possible
4585 canonicalization variations here. */
4586 static rtx
4587 aarch64_strip_extend (rtx x)
4589 rtx op = x;
4591 /* Zero and sign extraction of a widened value. */
4592 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4593 && XEXP (op, 2) == const0_rtx
4594 && GET_CODE (XEXP (op, 0)) == MULT
4595 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4596 XEXP (op, 1)))
4597 return XEXP (XEXP (op, 0), 0);
4599 /* It can also be represented (for zero-extend) as an AND with an
4600 immediate. */
4601 if (GET_CODE (op) == AND
4602 && GET_CODE (XEXP (op, 0)) == MULT
4603 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4604 && CONST_INT_P (XEXP (op, 1))
4605 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4606 INTVAL (XEXP (op, 1))) != 0)
4607 return XEXP (XEXP (op, 0), 0);
4609 /* Now handle extended register, as this may also have an optional
4610 left shift by 1..4. */
4611 if (GET_CODE (op) == ASHIFT
4612 && CONST_INT_P (XEXP (op, 1))
4613 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4614 op = XEXP (op, 0);
4616 if (GET_CODE (op) == ZERO_EXTEND
4617 || GET_CODE (op) == SIGN_EXTEND)
4618 op = XEXP (op, 0);
4620 if (op != x)
4621 return op;
4623 return x;
4626 /* Helper function for rtx cost calculation. Calculate the cost of
4627 a MULT, which may be part of a multiply-accumulate rtx. Return
4628 the calculated cost of the expression, recursing manually in to
4629 operands where needed. */
4631 static int
4632 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4634 rtx op0, op1;
4635 const struct cpu_cost_table *extra_cost
4636 = aarch64_tune_params->insn_extra_cost;
4637 int cost = 0;
4638 bool maybe_fma = (outer == PLUS || outer == MINUS);
4639 enum machine_mode mode = GET_MODE (x);
4641 gcc_checking_assert (code == MULT);
4643 op0 = XEXP (x, 0);
4644 op1 = XEXP (x, 1);
4646 if (VECTOR_MODE_P (mode))
4647 mode = GET_MODE_INNER (mode);
4649 /* Integer multiply/fma. */
4650 if (GET_MODE_CLASS (mode) == MODE_INT)
4652 /* The multiply will be canonicalized as a shift, cost it as such. */
4653 if (CONST_INT_P (op1)
4654 && exact_log2 (INTVAL (op1)) > 0)
4656 if (speed)
4658 if (maybe_fma)
4659 /* ADD (shifted register). */
4660 cost += extra_cost->alu.arith_shift;
4661 else
4662 /* LSL (immediate). */
4663 cost += extra_cost->alu.shift;
4666 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4668 return cost;
4671 /* Integer multiplies or FMAs have zero/sign extending variants. */
4672 if ((GET_CODE (op0) == ZERO_EXTEND
4673 && GET_CODE (op1) == ZERO_EXTEND)
4674 || (GET_CODE (op0) == SIGN_EXTEND
4675 && GET_CODE (op1) == SIGN_EXTEND))
4677 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4678 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4680 if (speed)
4682 if (maybe_fma)
4683 /* MADD/SMADDL/UMADDL. */
4684 cost += extra_cost->mult[0].extend_add;
4685 else
4686 /* MUL/SMULL/UMULL. */
4687 cost += extra_cost->mult[0].extend;
4690 return cost;
4693 /* This is either an integer multiply or an FMA. In both cases
4694 we want to recurse and cost the operands. */
4695 cost += rtx_cost (op0, MULT, 0, speed)
4696 + rtx_cost (op1, MULT, 1, speed);
4698 if (speed)
4700 if (maybe_fma)
4701 /* MADD. */
4702 cost += extra_cost->mult[mode == DImode].add;
4703 else
4704 /* MUL. */
4705 cost += extra_cost->mult[mode == DImode].simple;
4708 return cost;
4710 else
4712 if (speed)
4714 /* Floating-point FMA can also support negations of the
4715 operands. */
4716 if (GET_CODE (op0) == NEG)
4718 maybe_fma = true;
4719 op0 = XEXP (op0, 0);
4721 if (GET_CODE (op1) == NEG)
4723 maybe_fma = true;
4724 op1 = XEXP (op1, 0);
4727 if (maybe_fma)
4728 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4729 cost += extra_cost->fp[mode == DFmode].fma;
4730 else
4731 /* FMUL. */
4732 cost += extra_cost->fp[mode == DFmode].mult;
4735 cost += rtx_cost (op0, MULT, 0, speed)
4736 + rtx_cost (op1, MULT, 1, speed);
4737 return cost;
4741 static int
4742 aarch64_address_cost (rtx x,
4743 enum machine_mode mode,
4744 addr_space_t as ATTRIBUTE_UNUSED,
4745 bool speed)
4747 enum rtx_code c = GET_CODE (x);
4748 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4749 struct aarch64_address_info info;
4750 int cost = 0;
4751 info.shift = 0;
4753 if (!aarch64_classify_address (&info, x, mode, c, false))
4755 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4757 /* This is a CONST or SYMBOL ref which will be split
4758 in a different way depending on the code model in use.
4759 Cost it through the generic infrastructure. */
4760 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4761 /* Divide through by the cost of one instruction to
4762 bring it to the same units as the address costs. */
4763 cost_symbol_ref /= COSTS_N_INSNS (1);
4764 /* The cost is then the cost of preparing the address,
4765 followed by an immediate (possibly 0) offset. */
4766 return cost_symbol_ref + addr_cost->imm_offset;
4768 else
4770 /* This is most likely a jump table from a case
4771 statement. */
4772 return addr_cost->register_offset;
4776 switch (info.type)
4778 case ADDRESS_LO_SUM:
4779 case ADDRESS_SYMBOLIC:
4780 case ADDRESS_REG_IMM:
4781 cost += addr_cost->imm_offset;
4782 break;
4784 case ADDRESS_REG_WB:
4785 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4786 cost += addr_cost->pre_modify;
4787 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4788 cost += addr_cost->post_modify;
4789 else
4790 gcc_unreachable ();
4792 break;
4794 case ADDRESS_REG_REG:
4795 cost += addr_cost->register_offset;
4796 break;
4798 case ADDRESS_REG_UXTW:
4799 case ADDRESS_REG_SXTW:
4800 cost += addr_cost->register_extend;
4801 break;
4803 default:
4804 gcc_unreachable ();
4808 if (info.shift > 0)
4810 /* For the sake of calculating the cost of the shifted register
4811 component, we can treat same sized modes in the same way. */
4812 switch (GET_MODE_BITSIZE (mode))
4814 case 16:
4815 cost += addr_cost->addr_scale_costs.hi;
4816 break;
4818 case 32:
4819 cost += addr_cost->addr_scale_costs.si;
4820 break;
4822 case 64:
4823 cost += addr_cost->addr_scale_costs.di;
4824 break;
4826 /* We can't tell, or this is a 128-bit vector. */
4827 default:
4828 cost += addr_cost->addr_scale_costs.ti;
4829 break;
4833 return cost;
4836 /* Calculate the cost of calculating X, storing it in *COST. Result
4837 is true if the total cost of the operation has now been calculated. */
4838 static bool
4839 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4840 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4842 rtx op0, op1;
4843 const struct cpu_cost_table *extra_cost
4844 = aarch64_tune_params->insn_extra_cost;
4845 enum machine_mode mode = GET_MODE (x);
4847 /* By default, assume that everything has equivalent cost to the
4848 cheapest instruction. Any additional costs are applied as a delta
4849 above this default. */
4850 *cost = COSTS_N_INSNS (1);
4852 /* TODO: The cost infrastructure currently does not handle
4853 vector operations. Assume that all vector operations
4854 are equally expensive. */
4855 if (VECTOR_MODE_P (mode))
4857 if (speed)
4858 *cost += extra_cost->vect.alu;
4859 return true;
4862 switch (code)
4864 case SET:
4865 /* The cost depends entirely on the operands to SET. */
4866 *cost = 0;
4867 op0 = SET_DEST (x);
4868 op1 = SET_SRC (x);
4870 switch (GET_CODE (op0))
4872 case MEM:
4873 if (speed)
4875 rtx address = XEXP (op0, 0);
4876 if (GET_MODE_CLASS (mode) == MODE_INT)
4877 *cost += extra_cost->ldst.store;
4878 else if (mode == SFmode)
4879 *cost += extra_cost->ldst.storef;
4880 else if (mode == DFmode)
4881 *cost += extra_cost->ldst.stored;
4883 *cost +=
4884 COSTS_N_INSNS (aarch64_address_cost (address, mode,
4885 0, speed));
4888 *cost += rtx_cost (op1, SET, 1, speed);
4889 return true;
4891 case SUBREG:
4892 if (! REG_P (SUBREG_REG (op0)))
4893 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4895 /* Fall through. */
4896 case REG:
4897 /* const0_rtx is in general free, but we will use an
4898 instruction to set a register to 0. */
4899 if (REG_P (op1) || op1 == const0_rtx)
4901 /* The cost is 1 per register copied. */
4902 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4903 / UNITS_PER_WORD;
4904 *cost = COSTS_N_INSNS (n_minus_1 + 1);
4906 else
4907 /* Cost is just the cost of the RHS of the set. */
4908 *cost += rtx_cost (op1, SET, 1, speed);
4909 return true;
4911 case ZERO_EXTRACT:
4912 case SIGN_EXTRACT:
4913 /* Bit-field insertion. Strip any redundant widening of
4914 the RHS to meet the width of the target. */
4915 if (GET_CODE (op1) == SUBREG)
4916 op1 = SUBREG_REG (op1);
4917 if ((GET_CODE (op1) == ZERO_EXTEND
4918 || GET_CODE (op1) == SIGN_EXTEND)
4919 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4920 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4921 >= INTVAL (XEXP (op0, 1))))
4922 op1 = XEXP (op1, 0);
4924 if (CONST_INT_P (op1))
4926 /* MOV immediate is assumed to always be cheap. */
4927 *cost = COSTS_N_INSNS (1);
4929 else
4931 /* BFM. */
4932 if (speed)
4933 *cost += extra_cost->alu.bfi;
4934 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4937 return true;
4939 default:
4940 /* We can't make sense of this, assume default cost. */
4941 *cost = COSTS_N_INSNS (1);
4942 break;
4944 return false;
4946 case CONST_INT:
4947 /* If an instruction can incorporate a constant within the
4948 instruction, the instruction's expression avoids calling
4949 rtx_cost() on the constant. If rtx_cost() is called on a
4950 constant, then it is usually because the constant must be
4951 moved into a register by one or more instructions.
4953 The exception is constant 0, which can be expressed
4954 as XZR/WZR and is therefore free. The exception to this is
4955 if we have (set (reg) (const0_rtx)) in which case we must cost
4956 the move. However, we can catch that when we cost the SET, so
4957 we don't need to consider that here. */
4958 if (x == const0_rtx)
4959 *cost = 0;
4960 else
4962 /* To an approximation, building any other constant is
4963 proportionally expensive to the number of instructions
4964 required to build that constant. This is true whether we
4965 are compiling for SPEED or otherwise. */
4966 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4967 INTVAL (x),
4968 false));
4970 return true;
4972 case CONST_DOUBLE:
4973 if (speed)
4975 /* mov[df,sf]_aarch64. */
4976 if (aarch64_float_const_representable_p (x))
4977 /* FMOV (scalar immediate). */
4978 *cost += extra_cost->fp[mode == DFmode].fpconst;
4979 else if (!aarch64_float_const_zero_rtx_p (x))
4981 /* This will be a load from memory. */
4982 if (mode == DFmode)
4983 *cost += extra_cost->ldst.loadd;
4984 else
4985 *cost += extra_cost->ldst.loadf;
4987 else
4988 /* Otherwise this is +0.0. We get this using MOVI d0, #0
4989 or MOV v0.s[0], wzr - neither of which are modeled by the
4990 cost tables. Just use the default cost. */
4995 return true;
4997 case MEM:
4998 if (speed)
5000 /* For loads we want the base cost of a load, plus an
5001 approximation for the additional cost of the addressing
5002 mode. */
5003 rtx address = XEXP (x, 0);
5004 if (GET_MODE_CLASS (mode) == MODE_INT)
5005 *cost += extra_cost->ldst.load;
5006 else if (mode == SFmode)
5007 *cost += extra_cost->ldst.loadf;
5008 else if (mode == DFmode)
5009 *cost += extra_cost->ldst.loadd;
5011 *cost +=
5012 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5013 0, speed));
5016 return true;
5018 case NEG:
5019 op0 = XEXP (x, 0);
5021 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5023 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5024 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5026 /* CSETM. */
5027 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5028 return true;
5031 /* Cost this as SUB wzr, X. */
5032 op0 = CONST0_RTX (GET_MODE (x));
5033 op1 = XEXP (x, 0);
5034 goto cost_minus;
5037 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5039 /* Support (neg(fma...)) as a single instruction only if
5040 sign of zeros is unimportant. This matches the decision
5041 making in aarch64.md. */
5042 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5044 /* FNMADD. */
5045 *cost = rtx_cost (op0, NEG, 0, speed);
5046 return true;
5048 if (speed)
5049 /* FNEG. */
5050 *cost += extra_cost->fp[mode == DFmode].neg;
5051 return false;
5054 return false;
5056 case COMPARE:
5057 op0 = XEXP (x, 0);
5058 op1 = XEXP (x, 1);
5060 if (op1 == const0_rtx
5061 && GET_CODE (op0) == AND)
5063 x = op0;
5064 goto cost_logic;
5067 /* Comparisons can work if the order is swapped.
5068 Canonicalization puts the more complex operation first, but
5069 we want it in op1. */
5070 if (! (REG_P (op0)
5071 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5073 op0 = XEXP (x, 1);
5074 op1 = XEXP (x, 0);
5076 goto cost_minus;
5078 case MINUS:
5080 op0 = XEXP (x, 0);
5081 op1 = XEXP (x, 1);
5083 cost_minus:
5084 /* Detect valid immediates. */
5085 if ((GET_MODE_CLASS (mode) == MODE_INT
5086 || (GET_MODE_CLASS (mode) == MODE_CC
5087 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5088 && CONST_INT_P (op1)
5089 && aarch64_uimm12_shift (INTVAL (op1)))
5091 *cost += rtx_cost (op0, MINUS, 0, speed);
5093 if (speed)
5094 /* SUB(S) (immediate). */
5095 *cost += extra_cost->alu.arith;
5096 return true;
5100 rtx new_op1 = aarch64_strip_extend (op1);
5102 /* Cost this as an FMA-alike operation. */
5103 if ((GET_CODE (new_op1) == MULT
5104 || GET_CODE (new_op1) == ASHIFT)
5105 && code != COMPARE)
5107 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5108 (enum rtx_code) code,
5109 speed);
5110 *cost += rtx_cost (op0, MINUS, 0, speed);
5111 return true;
5114 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5116 if (speed)
5118 if (GET_MODE_CLASS (mode) == MODE_INT)
5119 /* SUB(S). */
5120 *cost += extra_cost->alu.arith;
5121 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5122 /* FSUB. */
5123 *cost += extra_cost->fp[mode == DFmode].addsub;
5125 return true;
5128 case PLUS:
5130 rtx new_op0;
5132 op0 = XEXP (x, 0);
5133 op1 = XEXP (x, 1);
5135 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5136 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5138 /* CSINC. */
5139 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5140 *cost += rtx_cost (op1, PLUS, 1, speed);
5141 return true;
5144 if (GET_MODE_CLASS (mode) == MODE_INT
5145 && CONST_INT_P (op1)
5146 && aarch64_uimm12_shift (INTVAL (op1)))
5148 *cost += rtx_cost (op0, PLUS, 0, speed);
5150 if (speed)
5151 /* ADD (immediate). */
5152 *cost += extra_cost->alu.arith;
5153 return true;
5156 /* Strip any extend, leave shifts behind as we will
5157 cost them through mult_cost. */
5158 new_op0 = aarch64_strip_extend (op0);
5160 if (GET_CODE (new_op0) == MULT
5161 || GET_CODE (new_op0) == ASHIFT)
5163 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5164 speed);
5165 *cost += rtx_cost (op1, PLUS, 1, speed);
5166 return true;
5169 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5170 + rtx_cost (op1, PLUS, 1, speed));
5172 if (speed)
5174 if (GET_MODE_CLASS (mode) == MODE_INT)
5175 /* ADD. */
5176 *cost += extra_cost->alu.arith;
5177 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5178 /* FADD. */
5179 *cost += extra_cost->fp[mode == DFmode].addsub;
5181 return true;
5184 case BSWAP:
5185 *cost = COSTS_N_INSNS (1);
5187 if (speed)
5188 *cost += extra_cost->alu.rev;
5190 return false;
5192 case IOR:
5193 if (aarch_rev16_p (x))
5195 *cost = COSTS_N_INSNS (1);
5197 if (speed)
5198 *cost += extra_cost->alu.rev;
5200 return true;
5202 /* Fall through. */
5203 case XOR:
5204 case AND:
5205 cost_logic:
5206 op0 = XEXP (x, 0);
5207 op1 = XEXP (x, 1);
5209 if (code == AND
5210 && GET_CODE (op0) == MULT
5211 && CONST_INT_P (XEXP (op0, 1))
5212 && CONST_INT_P (op1)
5213 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5214 INTVAL (op1)) != 0)
5216 /* This is a UBFM/SBFM. */
5217 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5218 if (speed)
5219 *cost += extra_cost->alu.bfx;
5220 return true;
5223 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5225 /* We possibly get the immediate for free, this is not
5226 modelled. */
5227 if (CONST_INT_P (op1)
5228 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5230 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5232 if (speed)
5233 *cost += extra_cost->alu.logical;
5235 return true;
5237 else
5239 rtx new_op0 = op0;
5241 /* Handle ORN, EON, or BIC. */
5242 if (GET_CODE (op0) == NOT)
5243 op0 = XEXP (op0, 0);
5245 new_op0 = aarch64_strip_shift (op0);
5247 /* If we had a shift on op0 then this is a logical-shift-
5248 by-register/immediate operation. Otherwise, this is just
5249 a logical operation. */
5250 if (speed)
5252 if (new_op0 != op0)
5254 /* Shift by immediate. */
5255 if (CONST_INT_P (XEXP (op0, 1)))
5256 *cost += extra_cost->alu.log_shift;
5257 else
5258 *cost += extra_cost->alu.log_shift_reg;
5260 else
5261 *cost += extra_cost->alu.logical;
5264 /* In both cases we want to cost both operands. */
5265 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5266 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5268 return true;
5271 return false;
5273 case NOT:
5274 /* MVN. */
5275 if (speed)
5276 *cost += extra_cost->alu.logical;
5278 /* The logical instruction could have the shifted register form,
5279 but the cost is the same if the shift is processed as a separate
5280 instruction, so we don't bother with it here. */
5281 return false;
5283 case ZERO_EXTEND:
5285 op0 = XEXP (x, 0);
5286 /* If a value is written in SI mode, then zero extended to DI
5287 mode, the operation will in general be free as a write to
5288 a 'w' register implicitly zeroes the upper bits of an 'x'
5289 register. However, if this is
5291 (set (reg) (zero_extend (reg)))
5293 we must cost the explicit register move. */
5294 if (mode == DImode
5295 && GET_MODE (op0) == SImode
5296 && outer == SET)
5298 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5300 if (!op_cost && speed)
5301 /* MOV. */
5302 *cost += extra_cost->alu.extend;
5303 else
5304 /* Free, the cost is that of the SI mode operation. */
5305 *cost = op_cost;
5307 return true;
5309 else if (MEM_P (XEXP (x, 0)))
5311 /* All loads can zero extend to any size for free. */
5312 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5313 return true;
5316 /* UXTB/UXTH. */
5317 if (speed)
5318 *cost += extra_cost->alu.extend;
5320 return false;
5322 case SIGN_EXTEND:
5323 if (MEM_P (XEXP (x, 0)))
5325 /* LDRSH. */
5326 if (speed)
5328 rtx address = XEXP (XEXP (x, 0), 0);
5329 *cost += extra_cost->ldst.load_sign_extend;
5331 *cost +=
5332 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5333 0, speed));
5335 return true;
5338 if (speed)
5339 *cost += extra_cost->alu.extend;
5340 return false;
5342 case ROTATE:
5343 if (!CONST_INT_P (XEXP (x, 1)))
5344 *cost += COSTS_N_INSNS (2);
5345 /* Fall through. */
5346 case ROTATERT:
5347 case LSHIFTRT:
5348 case ASHIFT:
5349 case ASHIFTRT:
5351 /* Shifting by a register often takes an extra cycle. */
5352 if (speed && !CONST_INT_P (XEXP (x, 1)))
5353 *cost += extra_cost->alu.arith_shift_reg;
5355 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5356 return true;
5358 case HIGH:
5359 if (!CONSTANT_P (XEXP (x, 0)))
5360 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5361 return true;
5363 case LO_SUM:
5364 if (!CONSTANT_P (XEXP (x, 1)))
5365 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5366 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5367 return true;
5369 case ZERO_EXTRACT:
5370 case SIGN_EXTRACT:
5371 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5372 return true;
5374 case MULT:
5375 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5376 /* aarch64_rtx_mult_cost always handles recursion to its
5377 operands. */
5378 return true;
5380 case MOD:
5381 case UMOD:
5382 *cost = COSTS_N_INSNS (2);
5383 if (speed)
5385 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5386 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5387 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5388 else if (GET_MODE (x) == DFmode)
5389 *cost += (extra_cost->fp[1].mult
5390 + extra_cost->fp[1].div);
5391 else if (GET_MODE (x) == SFmode)
5392 *cost += (extra_cost->fp[0].mult
5393 + extra_cost->fp[0].div);
5395 return false; /* All arguments need to be in registers. */
5397 case DIV:
5398 case UDIV:
5399 *cost = COSTS_N_INSNS (1);
5400 if (speed)
5402 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5403 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5404 else if (GET_MODE (x) == DFmode)
5405 *cost += extra_cost->fp[1].div;
5406 else if (GET_MODE (x) == SFmode)
5407 *cost += extra_cost->fp[0].div;
5409 return false; /* All arguments need to be in registers. */
5411 default:
5412 break;
5414 return false;
5417 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5418 calculated for X. This cost is stored in *COST. Returns true
5419 if the total cost of X was calculated. */
5420 static bool
5421 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5422 int param, int *cost, bool speed)
5424 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5426 if (dump_file && (dump_flags & TDF_DETAILS))
5428 print_rtl_single (dump_file, x);
5429 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5430 speed ? "Hot" : "Cold",
5431 *cost, result ? "final" : "partial");
5434 return result;
5437 static int
5438 aarch64_register_move_cost (enum machine_mode mode,
5439 reg_class_t from_i, reg_class_t to_i)
5441 enum reg_class from = (enum reg_class) from_i;
5442 enum reg_class to = (enum reg_class) to_i;
5443 const struct cpu_regmove_cost *regmove_cost
5444 = aarch64_tune_params->regmove_cost;
5446 /* Moving between GPR and stack cost is the same as GP2GP. */
5447 if ((from == GENERAL_REGS && to == STACK_REG)
5448 || (to == GENERAL_REGS && from == STACK_REG))
5449 return regmove_cost->GP2GP;
5451 /* To/From the stack register, we move via the gprs. */
5452 if (to == STACK_REG || from == STACK_REG)
5453 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5454 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5456 if (from == GENERAL_REGS && to == GENERAL_REGS)
5457 return regmove_cost->GP2GP;
5458 else if (from == GENERAL_REGS)
5459 return regmove_cost->GP2FP;
5460 else if (to == GENERAL_REGS)
5461 return regmove_cost->FP2GP;
5463 /* When AdvSIMD instructions are disabled it is not possible to move
5464 a 128-bit value directly between Q registers. This is handled in
5465 secondary reload. A general register is used as a scratch to move
5466 the upper DI value and the lower DI value is moved directly,
5467 hence the cost is the sum of three moves. */
5468 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5469 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5471 return regmove_cost->FP2FP;
5474 static int
5475 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5476 reg_class_t rclass ATTRIBUTE_UNUSED,
5477 bool in ATTRIBUTE_UNUSED)
5479 return aarch64_tune_params->memmov_cost;
5482 /* Return the number of instructions that can be issued per cycle. */
5483 static int
5484 aarch64_sched_issue_rate (void)
5486 return aarch64_tune_params->issue_rate;
5489 /* Vectorizer cost model target hooks. */
5491 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5492 static int
5493 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5494 tree vectype,
5495 int misalign ATTRIBUTE_UNUSED)
5497 unsigned elements;
5499 switch (type_of_cost)
5501 case scalar_stmt:
5502 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5504 case scalar_load:
5505 return aarch64_tune_params->vec_costs->scalar_load_cost;
5507 case scalar_store:
5508 return aarch64_tune_params->vec_costs->scalar_store_cost;
5510 case vector_stmt:
5511 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5513 case vector_load:
5514 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5516 case vector_store:
5517 return aarch64_tune_params->vec_costs->vec_store_cost;
5519 case vec_to_scalar:
5520 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5522 case scalar_to_vec:
5523 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5525 case unaligned_load:
5526 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5528 case unaligned_store:
5529 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5531 case cond_branch_taken:
5532 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5534 case cond_branch_not_taken:
5535 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5537 case vec_perm:
5538 case vec_promote_demote:
5539 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5541 case vec_construct:
5542 elements = TYPE_VECTOR_SUBPARTS (vectype);
5543 return elements / 2 + 1;
5545 default:
5546 gcc_unreachable ();
5550 /* Implement targetm.vectorize.add_stmt_cost. */
5551 static unsigned
5552 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5553 struct _stmt_vec_info *stmt_info, int misalign,
5554 enum vect_cost_model_location where)
5556 unsigned *cost = (unsigned *) data;
5557 unsigned retval = 0;
5559 if (flag_vect_cost_model)
5561 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5562 int stmt_cost =
5563 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5565 /* Statements in an inner loop relative to the loop being
5566 vectorized are weighted more heavily. The value here is
5567 a function (linear for now) of the loop nest level. */
5568 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5570 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5571 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5572 unsigned nest_level = loop_depth (loop);
5574 count *= nest_level;
5577 retval = (unsigned) (count * stmt_cost);
5578 cost[where] += retval;
5581 return retval;
5584 static void initialize_aarch64_code_model (void);
5586 /* Parse the architecture extension string. */
5588 static void
5589 aarch64_parse_extension (char *str)
5591 /* The extension string is parsed left to right. */
5592 const struct aarch64_option_extension *opt = NULL;
5594 /* Flag to say whether we are adding or removing an extension. */
5595 int adding_ext = -1;
5597 while (str != NULL && *str != 0)
5599 char *ext;
5600 size_t len;
5602 str++;
5603 ext = strchr (str, '+');
5605 if (ext != NULL)
5606 len = ext - str;
5607 else
5608 len = strlen (str);
5610 if (len >= 2 && strncmp (str, "no", 2) == 0)
5612 adding_ext = 0;
5613 len -= 2;
5614 str += 2;
5616 else if (len > 0)
5617 adding_ext = 1;
5619 if (len == 0)
5621 error ("missing feature modifier after %qs", "+no");
5622 return;
5625 /* Scan over the extensions table trying to find an exact match. */
5626 for (opt = all_extensions; opt->name != NULL; opt++)
5628 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5630 /* Add or remove the extension. */
5631 if (adding_ext)
5632 aarch64_isa_flags |= opt->flags_on;
5633 else
5634 aarch64_isa_flags &= ~(opt->flags_off);
5635 break;
5639 if (opt->name == NULL)
5641 /* Extension not found in list. */
5642 error ("unknown feature modifier %qs", str);
5643 return;
5646 str = ext;
5649 return;
5652 /* Parse the ARCH string. */
5654 static void
5655 aarch64_parse_arch (void)
5657 char *ext;
5658 const struct processor *arch;
5659 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5660 size_t len;
5662 strcpy (str, aarch64_arch_string);
5664 ext = strchr (str, '+');
5666 if (ext != NULL)
5667 len = ext - str;
5668 else
5669 len = strlen (str);
5671 if (len == 0)
5673 error ("missing arch name in -march=%qs", str);
5674 return;
5677 /* Loop through the list of supported ARCHs to find a match. */
5678 for (arch = all_architectures; arch->name != NULL; arch++)
5680 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5682 selected_arch = arch;
5683 aarch64_isa_flags = selected_arch->flags;
5685 if (!selected_cpu)
5686 selected_cpu = &all_cores[selected_arch->core];
5688 if (ext != NULL)
5690 /* ARCH string contains at least one extension. */
5691 aarch64_parse_extension (ext);
5694 if (strcmp (selected_arch->arch, selected_cpu->arch))
5696 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5697 selected_cpu->name, selected_arch->name);
5700 return;
5704 /* ARCH name not found in list. */
5705 error ("unknown value %qs for -march", str);
5706 return;
5709 /* Parse the CPU string. */
5711 static void
5712 aarch64_parse_cpu (void)
5714 char *ext;
5715 const struct processor *cpu;
5716 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5717 size_t len;
5719 strcpy (str, aarch64_cpu_string);
5721 ext = strchr (str, '+');
5723 if (ext != NULL)
5724 len = ext - str;
5725 else
5726 len = strlen (str);
5728 if (len == 0)
5730 error ("missing cpu name in -mcpu=%qs", str);
5731 return;
5734 /* Loop through the list of supported CPUs to find a match. */
5735 for (cpu = all_cores; cpu->name != NULL; cpu++)
5737 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5739 selected_cpu = cpu;
5740 selected_tune = cpu;
5741 aarch64_isa_flags = selected_cpu->flags;
5743 if (ext != NULL)
5745 /* CPU string contains at least one extension. */
5746 aarch64_parse_extension (ext);
5749 return;
5753 /* CPU name not found in list. */
5754 error ("unknown value %qs for -mcpu", str);
5755 return;
5758 /* Parse the TUNE string. */
5760 static void
5761 aarch64_parse_tune (void)
5763 const struct processor *cpu;
5764 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5765 strcpy (str, aarch64_tune_string);
5767 /* Loop through the list of supported CPUs to find a match. */
5768 for (cpu = all_cores; cpu->name != NULL; cpu++)
5770 if (strcmp (cpu->name, str) == 0)
5772 selected_tune = cpu;
5773 return;
5777 /* CPU name not found in list. */
5778 error ("unknown value %qs for -mtune", str);
5779 return;
5783 /* Implement TARGET_OPTION_OVERRIDE. */
5785 static void
5786 aarch64_override_options (void)
5788 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5789 If either of -march or -mtune is given, they override their
5790 respective component of -mcpu.
5792 So, first parse AARCH64_CPU_STRING, then the others, be careful
5793 with -march as, if -mcpu is not present on the command line, march
5794 must set a sensible default CPU. */
5795 if (aarch64_cpu_string)
5797 aarch64_parse_cpu ();
5800 if (aarch64_arch_string)
5802 aarch64_parse_arch ();
5805 if (aarch64_tune_string)
5807 aarch64_parse_tune ();
5810 #ifndef HAVE_AS_MABI_OPTION
5811 /* The compiler may have been configured with 2.23.* binutils, which does
5812 not have support for ILP32. */
5813 if (TARGET_ILP32)
5814 error ("Assembler does not support -mabi=ilp32");
5815 #endif
5817 initialize_aarch64_code_model ();
5819 aarch64_build_bitmask_table ();
5821 /* This target defaults to strict volatile bitfields. */
5822 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5823 flag_strict_volatile_bitfields = 1;
5825 /* If the user did not specify a processor, choose the default
5826 one for them. This will be the CPU set during configuration using
5827 --with-cpu, otherwise it is "generic". */
5828 if (!selected_cpu)
5830 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5831 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5834 gcc_assert (selected_cpu);
5836 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5837 if (!selected_tune)
5838 selected_tune = &all_cores[selected_cpu->core];
5840 aarch64_tune_flags = selected_tune->flags;
5841 aarch64_tune = selected_tune->core;
5842 aarch64_tune_params = selected_tune->tune;
5844 aarch64_override_options_after_change ();
5847 /* Implement targetm.override_options_after_change. */
5849 static void
5850 aarch64_override_options_after_change (void)
5852 if (flag_omit_frame_pointer)
5853 flag_omit_leaf_frame_pointer = false;
5854 else if (flag_omit_leaf_frame_pointer)
5855 flag_omit_frame_pointer = true;
5858 static struct machine_function *
5859 aarch64_init_machine_status (void)
5861 struct machine_function *machine;
5862 machine = ggc_alloc_cleared_machine_function ();
5863 return machine;
5866 void
5867 aarch64_init_expanders (void)
5869 init_machine_status = aarch64_init_machine_status;
5872 /* A checking mechanism for the implementation of the various code models. */
5873 static void
5874 initialize_aarch64_code_model (void)
5876 if (flag_pic)
5878 switch (aarch64_cmodel_var)
5880 case AARCH64_CMODEL_TINY:
5881 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5882 break;
5883 case AARCH64_CMODEL_SMALL:
5884 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5885 break;
5886 case AARCH64_CMODEL_LARGE:
5887 sorry ("code model %qs with -f%s", "large",
5888 flag_pic > 1 ? "PIC" : "pic");
5889 default:
5890 gcc_unreachable ();
5893 else
5894 aarch64_cmodel = aarch64_cmodel_var;
5897 /* Return true if SYMBOL_REF X binds locally. */
5899 static bool
5900 aarch64_symbol_binds_local_p (const_rtx x)
5902 return (SYMBOL_REF_DECL (x)
5903 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5904 : SYMBOL_REF_LOCAL_P (x));
5907 /* Return true if SYMBOL_REF X is thread local */
5908 static bool
5909 aarch64_tls_symbol_p (rtx x)
5911 if (! TARGET_HAVE_TLS)
5912 return false;
5914 if (GET_CODE (x) != SYMBOL_REF)
5915 return false;
5917 return SYMBOL_REF_TLS_MODEL (x) != 0;
5920 /* Classify a TLS symbol into one of the TLS kinds. */
5921 enum aarch64_symbol_type
5922 aarch64_classify_tls_symbol (rtx x)
5924 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5926 switch (tls_kind)
5928 case TLS_MODEL_GLOBAL_DYNAMIC:
5929 case TLS_MODEL_LOCAL_DYNAMIC:
5930 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5932 case TLS_MODEL_INITIAL_EXEC:
5933 return SYMBOL_SMALL_GOTTPREL;
5935 case TLS_MODEL_LOCAL_EXEC:
5936 return SYMBOL_SMALL_TPREL;
5938 case TLS_MODEL_EMULATED:
5939 case TLS_MODEL_NONE:
5940 return SYMBOL_FORCE_TO_MEM;
5942 default:
5943 gcc_unreachable ();
5947 /* Return the method that should be used to access SYMBOL_REF or
5948 LABEL_REF X in context CONTEXT. */
5950 enum aarch64_symbol_type
5951 aarch64_classify_symbol (rtx x,
5952 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5954 if (GET_CODE (x) == LABEL_REF)
5956 switch (aarch64_cmodel)
5958 case AARCH64_CMODEL_LARGE:
5959 return SYMBOL_FORCE_TO_MEM;
5961 case AARCH64_CMODEL_TINY_PIC:
5962 case AARCH64_CMODEL_TINY:
5963 return SYMBOL_TINY_ABSOLUTE;
5965 case AARCH64_CMODEL_SMALL_PIC:
5966 case AARCH64_CMODEL_SMALL:
5967 return SYMBOL_SMALL_ABSOLUTE;
5969 default:
5970 gcc_unreachable ();
5974 if (GET_CODE (x) == SYMBOL_REF)
5976 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5977 return SYMBOL_FORCE_TO_MEM;
5979 if (aarch64_tls_symbol_p (x))
5980 return aarch64_classify_tls_symbol (x);
5982 switch (aarch64_cmodel)
5984 case AARCH64_CMODEL_TINY:
5985 if (SYMBOL_REF_WEAK (x))
5986 return SYMBOL_FORCE_TO_MEM;
5987 return SYMBOL_TINY_ABSOLUTE;
5989 case AARCH64_CMODEL_SMALL:
5990 if (SYMBOL_REF_WEAK (x))
5991 return SYMBOL_FORCE_TO_MEM;
5992 return SYMBOL_SMALL_ABSOLUTE;
5994 case AARCH64_CMODEL_TINY_PIC:
5995 if (!aarch64_symbol_binds_local_p (x))
5996 return SYMBOL_TINY_GOT;
5997 return SYMBOL_TINY_ABSOLUTE;
5999 case AARCH64_CMODEL_SMALL_PIC:
6000 if (!aarch64_symbol_binds_local_p (x))
6001 return SYMBOL_SMALL_GOT;
6002 return SYMBOL_SMALL_ABSOLUTE;
6004 default:
6005 gcc_unreachable ();
6009 /* By default push everything into the constant pool. */
6010 return SYMBOL_FORCE_TO_MEM;
6013 bool
6014 aarch64_constant_address_p (rtx x)
6016 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6019 bool
6020 aarch64_legitimate_pic_operand_p (rtx x)
6022 if (GET_CODE (x) == SYMBOL_REF
6023 || (GET_CODE (x) == CONST
6024 && GET_CODE (XEXP (x, 0)) == PLUS
6025 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6026 return false;
6028 return true;
6031 /* Return true if X holds either a quarter-precision or
6032 floating-point +0.0 constant. */
6033 static bool
6034 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6036 if (!CONST_DOUBLE_P (x))
6037 return false;
6039 /* TODO: We could handle moving 0.0 to a TFmode register,
6040 but first we would like to refactor the movtf_aarch64
6041 to be more amicable to split moves properly and
6042 correctly gate on TARGET_SIMD. For now - reject all
6043 constants which are not to SFmode or DFmode registers. */
6044 if (!(mode == SFmode || mode == DFmode))
6045 return false;
6047 if (aarch64_float_const_zero_rtx_p (x))
6048 return true;
6049 return aarch64_float_const_representable_p (x);
6052 static bool
6053 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6055 /* Do not allow vector struct mode constants. We could support
6056 0 and -1 easily, but they need support in aarch64-simd.md. */
6057 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6058 return false;
6060 /* This could probably go away because
6061 we now decompose CONST_INTs according to expand_mov_immediate. */
6062 if ((GET_CODE (x) == CONST_VECTOR
6063 && aarch64_simd_valid_immediate (x, mode, false, NULL))
6064 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6065 return !targetm.cannot_force_const_mem (mode, x);
6067 if (GET_CODE (x) == HIGH
6068 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6069 return true;
6071 return aarch64_constant_address_p (x);
6075 aarch64_load_tp (rtx target)
6077 if (!target
6078 || GET_MODE (target) != Pmode
6079 || !register_operand (target, Pmode))
6080 target = gen_reg_rtx (Pmode);
6082 /* Can return in any reg. */
6083 emit_insn (gen_aarch64_load_tp_hard (target));
6084 return target;
6087 /* On AAPCS systems, this is the "struct __va_list". */
6088 static GTY(()) tree va_list_type;
6090 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6091 Return the type to use as __builtin_va_list.
6093 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6095 struct __va_list
6097 void *__stack;
6098 void *__gr_top;
6099 void *__vr_top;
6100 int __gr_offs;
6101 int __vr_offs;
6102 }; */
6104 static tree
6105 aarch64_build_builtin_va_list (void)
6107 tree va_list_name;
6108 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6110 /* Create the type. */
6111 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6112 /* Give it the required name. */
6113 va_list_name = build_decl (BUILTINS_LOCATION,
6114 TYPE_DECL,
6115 get_identifier ("__va_list"),
6116 va_list_type);
6117 DECL_ARTIFICIAL (va_list_name) = 1;
6118 TYPE_NAME (va_list_type) = va_list_name;
6119 TYPE_STUB_DECL (va_list_type) = va_list_name;
6121 /* Create the fields. */
6122 f_stack = build_decl (BUILTINS_LOCATION,
6123 FIELD_DECL, get_identifier ("__stack"),
6124 ptr_type_node);
6125 f_grtop = build_decl (BUILTINS_LOCATION,
6126 FIELD_DECL, get_identifier ("__gr_top"),
6127 ptr_type_node);
6128 f_vrtop = build_decl (BUILTINS_LOCATION,
6129 FIELD_DECL, get_identifier ("__vr_top"),
6130 ptr_type_node);
6131 f_groff = build_decl (BUILTINS_LOCATION,
6132 FIELD_DECL, get_identifier ("__gr_offs"),
6133 integer_type_node);
6134 f_vroff = build_decl (BUILTINS_LOCATION,
6135 FIELD_DECL, get_identifier ("__vr_offs"),
6136 integer_type_node);
6138 DECL_ARTIFICIAL (f_stack) = 1;
6139 DECL_ARTIFICIAL (f_grtop) = 1;
6140 DECL_ARTIFICIAL (f_vrtop) = 1;
6141 DECL_ARTIFICIAL (f_groff) = 1;
6142 DECL_ARTIFICIAL (f_vroff) = 1;
6144 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6145 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6146 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6147 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6148 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6150 TYPE_FIELDS (va_list_type) = f_stack;
6151 DECL_CHAIN (f_stack) = f_grtop;
6152 DECL_CHAIN (f_grtop) = f_vrtop;
6153 DECL_CHAIN (f_vrtop) = f_groff;
6154 DECL_CHAIN (f_groff) = f_vroff;
6156 /* Compute its layout. */
6157 layout_type (va_list_type);
6159 return va_list_type;
6162 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6163 static void
6164 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6166 const CUMULATIVE_ARGS *cum;
6167 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6168 tree stack, grtop, vrtop, groff, vroff;
6169 tree t;
6170 int gr_save_area_size;
6171 int vr_save_area_size;
6172 int vr_offset;
6174 cum = &crtl->args.info;
6175 gr_save_area_size
6176 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6177 vr_save_area_size
6178 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6180 if (TARGET_GENERAL_REGS_ONLY)
6182 if (cum->aapcs_nvrn > 0)
6183 sorry ("%qs and floating point or vector arguments",
6184 "-mgeneral-regs-only");
6185 vr_save_area_size = 0;
6188 f_stack = TYPE_FIELDS (va_list_type_node);
6189 f_grtop = DECL_CHAIN (f_stack);
6190 f_vrtop = DECL_CHAIN (f_grtop);
6191 f_groff = DECL_CHAIN (f_vrtop);
6192 f_vroff = DECL_CHAIN (f_groff);
6194 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6195 NULL_TREE);
6196 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6197 NULL_TREE);
6198 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6199 NULL_TREE);
6200 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6201 NULL_TREE);
6202 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6203 NULL_TREE);
6205 /* Emit code to initialize STACK, which points to the next varargs stack
6206 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6207 by named arguments. STACK is 8-byte aligned. */
6208 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6209 if (cum->aapcs_stack_size > 0)
6210 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6211 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6212 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6214 /* Emit code to initialize GRTOP, the top of the GR save area.
6215 virtual_incoming_args_rtx should have been 16 byte aligned. */
6216 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6217 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6218 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6220 /* Emit code to initialize VRTOP, the top of the VR save area.
6221 This address is gr_save_area_bytes below GRTOP, rounded
6222 down to the next 16-byte boundary. */
6223 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6224 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6225 STACK_BOUNDARY / BITS_PER_UNIT);
6227 if (vr_offset)
6228 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6229 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6230 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6232 /* Emit code to initialize GROFF, the offset from GRTOP of the
6233 next GPR argument. */
6234 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6235 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6236 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6238 /* Likewise emit code to initialize VROFF, the offset from FTOP
6239 of the next VR argument. */
6240 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6241 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6242 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6245 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6247 static tree
6248 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6249 gimple_seq *post_p ATTRIBUTE_UNUSED)
6251 tree addr;
6252 bool indirect_p;
6253 bool is_ha; /* is HFA or HVA. */
6254 bool dw_align; /* double-word align. */
6255 enum machine_mode ag_mode = VOIDmode;
6256 int nregs;
6257 enum machine_mode mode;
6259 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6260 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6261 HOST_WIDE_INT size, rsize, adjust, align;
6262 tree t, u, cond1, cond2;
6264 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6265 if (indirect_p)
6266 type = build_pointer_type (type);
6268 mode = TYPE_MODE (type);
6270 f_stack = TYPE_FIELDS (va_list_type_node);
6271 f_grtop = DECL_CHAIN (f_stack);
6272 f_vrtop = DECL_CHAIN (f_grtop);
6273 f_groff = DECL_CHAIN (f_vrtop);
6274 f_vroff = DECL_CHAIN (f_groff);
6276 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6277 f_stack, NULL_TREE);
6278 size = int_size_in_bytes (type);
6279 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6281 dw_align = false;
6282 adjust = 0;
6283 if (aarch64_vfp_is_call_or_return_candidate (mode,
6284 type,
6285 &ag_mode,
6286 &nregs,
6287 &is_ha))
6289 /* TYPE passed in fp/simd registers. */
6290 if (TARGET_GENERAL_REGS_ONLY)
6291 sorry ("%qs and floating point or vector arguments",
6292 "-mgeneral-regs-only");
6294 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6295 unshare_expr (valist), f_vrtop, NULL_TREE);
6296 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6297 unshare_expr (valist), f_vroff, NULL_TREE);
6299 rsize = nregs * UNITS_PER_VREG;
6301 if (is_ha)
6303 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6304 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6306 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6307 && size < UNITS_PER_VREG)
6309 adjust = UNITS_PER_VREG - size;
6312 else
6314 /* TYPE passed in general registers. */
6315 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6316 unshare_expr (valist), f_grtop, NULL_TREE);
6317 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6318 unshare_expr (valist), f_groff, NULL_TREE);
6319 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6320 nregs = rsize / UNITS_PER_WORD;
6322 if (align > 8)
6323 dw_align = true;
6325 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6326 && size < UNITS_PER_WORD)
6328 adjust = UNITS_PER_WORD - size;
6332 /* Get a local temporary for the field value. */
6333 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6335 /* Emit code to branch if off >= 0. */
6336 t = build2 (GE_EXPR, boolean_type_node, off,
6337 build_int_cst (TREE_TYPE (off), 0));
6338 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6340 if (dw_align)
6342 /* Emit: offs = (offs + 15) & -16. */
6343 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6344 build_int_cst (TREE_TYPE (off), 15));
6345 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6346 build_int_cst (TREE_TYPE (off), -16));
6347 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6349 else
6350 roundup = NULL;
6352 /* Update ap.__[g|v]r_offs */
6353 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6354 build_int_cst (TREE_TYPE (off), rsize));
6355 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6357 /* String up. */
6358 if (roundup)
6359 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6361 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6362 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6363 build_int_cst (TREE_TYPE (f_off), 0));
6364 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6366 /* String up: make sure the assignment happens before the use. */
6367 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6368 COND_EXPR_ELSE (cond1) = t;
6370 /* Prepare the trees handling the argument that is passed on the stack;
6371 the top level node will store in ON_STACK. */
6372 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6373 if (align > 8)
6375 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6376 t = fold_convert (intDI_type_node, arg);
6377 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6378 build_int_cst (TREE_TYPE (t), 15));
6379 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6380 build_int_cst (TREE_TYPE (t), -16));
6381 t = fold_convert (TREE_TYPE (arg), t);
6382 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6384 else
6385 roundup = NULL;
6386 /* Advance ap.__stack */
6387 t = fold_convert (intDI_type_node, arg);
6388 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6389 build_int_cst (TREE_TYPE (t), size + 7));
6390 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6391 build_int_cst (TREE_TYPE (t), -8));
6392 t = fold_convert (TREE_TYPE (arg), t);
6393 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6394 /* String up roundup and advance. */
6395 if (roundup)
6396 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6397 /* String up with arg */
6398 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6399 /* Big-endianness related address adjustment. */
6400 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6401 && size < UNITS_PER_WORD)
6403 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6404 size_int (UNITS_PER_WORD - size));
6405 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6408 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6409 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6411 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6412 t = off;
6413 if (adjust)
6414 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6415 build_int_cst (TREE_TYPE (off), adjust));
6417 t = fold_convert (sizetype, t);
6418 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6420 if (is_ha)
6422 /* type ha; // treat as "struct {ftype field[n];}"
6423 ... [computing offs]
6424 for (i = 0; i <nregs; ++i, offs += 16)
6425 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6426 return ha; */
6427 int i;
6428 tree tmp_ha, field_t, field_ptr_t;
6430 /* Declare a local variable. */
6431 tmp_ha = create_tmp_var_raw (type, "ha");
6432 gimple_add_tmp_var (tmp_ha);
6434 /* Establish the base type. */
6435 switch (ag_mode)
6437 case SFmode:
6438 field_t = float_type_node;
6439 field_ptr_t = float_ptr_type_node;
6440 break;
6441 case DFmode:
6442 field_t = double_type_node;
6443 field_ptr_t = double_ptr_type_node;
6444 break;
6445 case TFmode:
6446 field_t = long_double_type_node;
6447 field_ptr_t = long_double_ptr_type_node;
6448 break;
6449 /* The half precision and quad precision are not fully supported yet. Enable
6450 the following code after the support is complete. Need to find the correct
6451 type node for __fp16 *. */
6452 #if 0
6453 case HFmode:
6454 field_t = float_type_node;
6455 field_ptr_t = float_ptr_type_node;
6456 break;
6457 #endif
6458 case V2SImode:
6459 case V4SImode:
6461 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6462 field_t = build_vector_type_for_mode (innertype, ag_mode);
6463 field_ptr_t = build_pointer_type (field_t);
6465 break;
6466 default:
6467 gcc_assert (0);
6470 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6471 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6472 addr = t;
6473 t = fold_convert (field_ptr_t, addr);
6474 t = build2 (MODIFY_EXPR, field_t,
6475 build1 (INDIRECT_REF, field_t, tmp_ha),
6476 build1 (INDIRECT_REF, field_t, t));
6478 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6479 for (i = 1; i < nregs; ++i)
6481 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6482 u = fold_convert (field_ptr_t, addr);
6483 u = build2 (MODIFY_EXPR, field_t,
6484 build2 (MEM_REF, field_t, tmp_ha,
6485 build_int_cst (field_ptr_t,
6486 (i *
6487 int_size_in_bytes (field_t)))),
6488 build1 (INDIRECT_REF, field_t, u));
6489 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6492 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6493 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6496 COND_EXPR_ELSE (cond2) = t;
6497 addr = fold_convert (build_pointer_type (type), cond1);
6498 addr = build_va_arg_indirect_ref (addr);
6500 if (indirect_p)
6501 addr = build_va_arg_indirect_ref (addr);
6503 return addr;
6506 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6508 static void
6509 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6510 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6511 int no_rtl)
6513 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6514 CUMULATIVE_ARGS local_cum;
6515 int gr_saved, vr_saved;
6517 /* The caller has advanced CUM up to, but not beyond, the last named
6518 argument. Advance a local copy of CUM past the last "real" named
6519 argument, to find out how many registers are left over. */
6520 local_cum = *cum;
6521 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6523 /* Found out how many registers we need to save. */
6524 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6525 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6527 if (TARGET_GENERAL_REGS_ONLY)
6529 if (local_cum.aapcs_nvrn > 0)
6530 sorry ("%qs and floating point or vector arguments",
6531 "-mgeneral-regs-only");
6532 vr_saved = 0;
6535 if (!no_rtl)
6537 if (gr_saved > 0)
6539 rtx ptr, mem;
6541 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6542 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6543 - gr_saved * UNITS_PER_WORD);
6544 mem = gen_frame_mem (BLKmode, ptr);
6545 set_mem_alias_set (mem, get_varargs_alias_set ());
6547 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6548 mem, gr_saved);
6550 if (vr_saved > 0)
6552 /* We can't use move_block_from_reg, because it will use
6553 the wrong mode, storing D regs only. */
6554 enum machine_mode mode = TImode;
6555 int off, i;
6557 /* Set OFF to the offset from virtual_incoming_args_rtx of
6558 the first vector register. The VR save area lies below
6559 the GR one, and is aligned to 16 bytes. */
6560 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6561 STACK_BOUNDARY / BITS_PER_UNIT);
6562 off -= vr_saved * UNITS_PER_VREG;
6564 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6566 rtx ptr, mem;
6568 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6569 mem = gen_frame_mem (mode, ptr);
6570 set_mem_alias_set (mem, get_varargs_alias_set ());
6571 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6572 off += UNITS_PER_VREG;
6577 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6578 any complication of having crtl->args.pretend_args_size changed. */
6579 cfun->machine->saved_varargs_size
6580 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6581 STACK_BOUNDARY / BITS_PER_UNIT)
6582 + vr_saved * UNITS_PER_VREG);
6585 static void
6586 aarch64_conditional_register_usage (void)
6588 int i;
6589 if (!TARGET_FLOAT)
6591 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6593 fixed_regs[i] = 1;
6594 call_used_regs[i] = 1;
6599 /* Walk down the type tree of TYPE counting consecutive base elements.
6600 If *MODEP is VOIDmode, then set it to the first valid floating point
6601 type. If a non-floating point type is found, or if a floating point
6602 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6603 otherwise return the count in the sub-tree. */
6604 static int
6605 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6607 enum machine_mode mode;
6608 HOST_WIDE_INT size;
6610 switch (TREE_CODE (type))
6612 case REAL_TYPE:
6613 mode = TYPE_MODE (type);
6614 if (mode != DFmode && mode != SFmode && mode != TFmode)
6615 return -1;
6617 if (*modep == VOIDmode)
6618 *modep = mode;
6620 if (*modep == mode)
6621 return 1;
6623 break;
6625 case COMPLEX_TYPE:
6626 mode = TYPE_MODE (TREE_TYPE (type));
6627 if (mode != DFmode && mode != SFmode && mode != TFmode)
6628 return -1;
6630 if (*modep == VOIDmode)
6631 *modep = mode;
6633 if (*modep == mode)
6634 return 2;
6636 break;
6638 case VECTOR_TYPE:
6639 /* Use V2SImode and V4SImode as representatives of all 64-bit
6640 and 128-bit vector types. */
6641 size = int_size_in_bytes (type);
6642 switch (size)
6644 case 8:
6645 mode = V2SImode;
6646 break;
6647 case 16:
6648 mode = V4SImode;
6649 break;
6650 default:
6651 return -1;
6654 if (*modep == VOIDmode)
6655 *modep = mode;
6657 /* Vector modes are considered to be opaque: two vectors are
6658 equivalent for the purposes of being homogeneous aggregates
6659 if they are the same size. */
6660 if (*modep == mode)
6661 return 1;
6663 break;
6665 case ARRAY_TYPE:
6667 int count;
6668 tree index = TYPE_DOMAIN (type);
6670 /* Can't handle incomplete types nor sizes that are not
6671 fixed. */
6672 if (!COMPLETE_TYPE_P (type)
6673 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6674 return -1;
6676 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6677 if (count == -1
6678 || !index
6679 || !TYPE_MAX_VALUE (index)
6680 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6681 || !TYPE_MIN_VALUE (index)
6682 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6683 || count < 0)
6684 return -1;
6686 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6687 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6689 /* There must be no padding. */
6690 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6691 return -1;
6693 return count;
6696 case RECORD_TYPE:
6698 int count = 0;
6699 int sub_count;
6700 tree field;
6702 /* Can't handle incomplete types nor sizes that are not
6703 fixed. */
6704 if (!COMPLETE_TYPE_P (type)
6705 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6706 return -1;
6708 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6710 if (TREE_CODE (field) != FIELD_DECL)
6711 continue;
6713 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6714 if (sub_count < 0)
6715 return -1;
6716 count += sub_count;
6719 /* There must be no padding. */
6720 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6721 return -1;
6723 return count;
6726 case UNION_TYPE:
6727 case QUAL_UNION_TYPE:
6729 /* These aren't very interesting except in a degenerate case. */
6730 int count = 0;
6731 int sub_count;
6732 tree field;
6734 /* Can't handle incomplete types nor sizes that are not
6735 fixed. */
6736 if (!COMPLETE_TYPE_P (type)
6737 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6738 return -1;
6740 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6742 if (TREE_CODE (field) != FIELD_DECL)
6743 continue;
6745 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6746 if (sub_count < 0)
6747 return -1;
6748 count = count > sub_count ? count : sub_count;
6751 /* There must be no padding. */
6752 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6753 return -1;
6755 return count;
6758 default:
6759 break;
6762 return -1;
6765 /* Return true if we use LRA instead of reload pass. */
6766 static bool
6767 aarch64_lra_p (void)
6769 return aarch64_lra_flag;
6772 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6773 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6774 array types. The C99 floating-point complex types are also considered
6775 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6776 types, which are GCC extensions and out of the scope of AAPCS64, are
6777 treated as composite types here as well.
6779 Note that MODE itself is not sufficient in determining whether a type
6780 is such a composite type or not. This is because
6781 stor-layout.c:compute_record_mode may have already changed the MODE
6782 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6783 structure with only one field may have its MODE set to the mode of the
6784 field. Also an integer mode whose size matches the size of the
6785 RECORD_TYPE type may be used to substitute the original mode
6786 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6787 solely relied on. */
6789 static bool
6790 aarch64_composite_type_p (const_tree type,
6791 enum machine_mode mode)
6793 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6794 return true;
6796 if (mode == BLKmode
6797 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6798 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6799 return true;
6801 return false;
6804 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6805 type as described in AAPCS64 \S 4.1.2.
6807 See the comment above aarch64_composite_type_p for the notes on MODE. */
6809 static bool
6810 aarch64_short_vector_p (const_tree type,
6811 enum machine_mode mode)
6813 HOST_WIDE_INT size = -1;
6815 if (type && TREE_CODE (type) == VECTOR_TYPE)
6816 size = int_size_in_bytes (type);
6817 else if (!aarch64_composite_type_p (type, mode)
6818 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6819 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6820 size = GET_MODE_SIZE (mode);
6822 return (size == 8 || size == 16) ? true : false;
6825 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6826 shall be passed or returned in simd/fp register(s) (providing these
6827 parameter passing registers are available).
6829 Upon successful return, *COUNT returns the number of needed registers,
6830 *BASE_MODE returns the mode of the individual register and when IS_HAF
6831 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6832 floating-point aggregate or a homogeneous short-vector aggregate. */
6834 static bool
6835 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6836 const_tree type,
6837 enum machine_mode *base_mode,
6838 int *count,
6839 bool *is_ha)
6841 enum machine_mode new_mode = VOIDmode;
6842 bool composite_p = aarch64_composite_type_p (type, mode);
6844 if (is_ha != NULL) *is_ha = false;
6846 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6847 || aarch64_short_vector_p (type, mode))
6849 *count = 1;
6850 new_mode = mode;
6852 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6854 if (is_ha != NULL) *is_ha = true;
6855 *count = 2;
6856 new_mode = GET_MODE_INNER (mode);
6858 else if (type && composite_p)
6860 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6862 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6864 if (is_ha != NULL) *is_ha = true;
6865 *count = ag_count;
6867 else
6868 return false;
6870 else
6871 return false;
6873 *base_mode = new_mode;
6874 return true;
6877 /* Implement TARGET_STRUCT_VALUE_RTX. */
6879 static rtx
6880 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6881 int incoming ATTRIBUTE_UNUSED)
6883 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6886 /* Implements target hook vector_mode_supported_p. */
6887 static bool
6888 aarch64_vector_mode_supported_p (enum machine_mode mode)
6890 if (TARGET_SIMD
6891 && (mode == V4SImode || mode == V8HImode
6892 || mode == V16QImode || mode == V2DImode
6893 || mode == V2SImode || mode == V4HImode
6894 || mode == V8QImode || mode == V2SFmode
6895 || mode == V4SFmode || mode == V2DFmode))
6896 return true;
6898 return false;
6901 /* Return appropriate SIMD container
6902 for MODE within a vector of WIDTH bits. */
6903 static enum machine_mode
6904 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6906 gcc_assert (width == 64 || width == 128);
6907 if (TARGET_SIMD)
6909 if (width == 128)
6910 switch (mode)
6912 case DFmode:
6913 return V2DFmode;
6914 case SFmode:
6915 return V4SFmode;
6916 case SImode:
6917 return V4SImode;
6918 case HImode:
6919 return V8HImode;
6920 case QImode:
6921 return V16QImode;
6922 case DImode:
6923 return V2DImode;
6924 default:
6925 break;
6927 else
6928 switch (mode)
6930 case SFmode:
6931 return V2SFmode;
6932 case SImode:
6933 return V2SImode;
6934 case HImode:
6935 return V4HImode;
6936 case QImode:
6937 return V8QImode;
6938 default:
6939 break;
6942 return word_mode;
6945 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6946 static enum machine_mode
6947 aarch64_preferred_simd_mode (enum machine_mode mode)
6949 return aarch64_simd_container_mode (mode, 128);
6952 /* Return the bitmask of possible vector sizes for the vectorizer
6953 to iterate over. */
6954 static unsigned int
6955 aarch64_autovectorize_vector_sizes (void)
6957 return (16 | 8);
6960 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6961 vector types in order to conform to the AAPCS64 (see "Procedure
6962 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6963 qualify for emission with the mangled names defined in that document,
6964 a vector type must not only be of the correct mode but also be
6965 composed of AdvSIMD vector element types (e.g.
6966 _builtin_aarch64_simd_qi); these types are registered by
6967 aarch64_init_simd_builtins (). In other words, vector types defined
6968 in other ways e.g. via vector_size attribute will get default
6969 mangled names. */
6970 typedef struct
6972 enum machine_mode mode;
6973 const char *element_type_name;
6974 const char *mangled_name;
6975 } aarch64_simd_mangle_map_entry;
6977 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6978 /* 64-bit containerized types. */
6979 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6980 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6981 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6982 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6983 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6984 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6985 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6986 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6987 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6988 /* 128-bit containerized types. */
6989 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6990 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6991 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6992 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6993 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6994 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6995 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6996 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6997 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6998 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6999 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7000 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7001 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7002 { VOIDmode, NULL, NULL }
7005 /* Implement TARGET_MANGLE_TYPE. */
7007 static const char *
7008 aarch64_mangle_type (const_tree type)
7010 /* The AArch64 ABI documents say that "__va_list" has to be
7011 managled as if it is in the "std" namespace. */
7012 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7013 return "St9__va_list";
7015 /* Check the mode of the vector type, and the name of the vector
7016 element type, against the table. */
7017 if (TREE_CODE (type) == VECTOR_TYPE)
7019 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7021 while (pos->mode != VOIDmode)
7023 tree elt_type = TREE_TYPE (type);
7025 if (pos->mode == TYPE_MODE (type)
7026 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7027 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7028 pos->element_type_name))
7029 return pos->mangled_name;
7031 pos++;
7035 /* Use the default mangling. */
7036 return NULL;
7039 /* Return the equivalent letter for size. */
7040 static char
7041 sizetochar (int size)
7043 switch (size)
7045 case 64: return 'd';
7046 case 32: return 's';
7047 case 16: return 'h';
7048 case 8 : return 'b';
7049 default: gcc_unreachable ();
7053 /* Return true iff x is a uniform vector of floating-point
7054 constants, and the constant can be represented in
7055 quarter-precision form. Note, as aarch64_float_const_representable
7056 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7057 static bool
7058 aarch64_vect_float_const_representable_p (rtx x)
7060 int i = 0;
7061 REAL_VALUE_TYPE r0, ri;
7062 rtx x0, xi;
7064 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7065 return false;
7067 x0 = CONST_VECTOR_ELT (x, 0);
7068 if (!CONST_DOUBLE_P (x0))
7069 return false;
7071 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7073 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7075 xi = CONST_VECTOR_ELT (x, i);
7076 if (!CONST_DOUBLE_P (xi))
7077 return false;
7079 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7080 if (!REAL_VALUES_EQUAL (r0, ri))
7081 return false;
7084 return aarch64_float_const_representable_p (x0);
7087 /* Return true for valid and false for invalid. */
7088 bool
7089 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7090 struct simd_immediate_info *info)
7092 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7093 matches = 1; \
7094 for (i = 0; i < idx; i += (STRIDE)) \
7095 if (!(TEST)) \
7096 matches = 0; \
7097 if (matches) \
7099 immtype = (CLASS); \
7100 elsize = (ELSIZE); \
7101 eshift = (SHIFT); \
7102 emvn = (NEG); \
7103 break; \
7106 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7107 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7108 unsigned char bytes[16];
7109 int immtype = -1, matches;
7110 unsigned int invmask = inverse ? 0xff : 0;
7111 int eshift, emvn;
7113 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7115 if (! (aarch64_simd_imm_zero_p (op, mode)
7116 || aarch64_vect_float_const_representable_p (op)))
7117 return false;
7119 if (info)
7121 info->value = CONST_VECTOR_ELT (op, 0);
7122 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7123 info->mvn = false;
7124 info->shift = 0;
7127 return true;
7130 /* Splat vector constant out into a byte vector. */
7131 for (i = 0; i < n_elts; i++)
7133 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7134 it must be laid out in the vector register in reverse order. */
7135 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7136 unsigned HOST_WIDE_INT elpart;
7137 unsigned int part, parts;
7139 if (GET_CODE (el) == CONST_INT)
7141 elpart = INTVAL (el);
7142 parts = 1;
7144 else if (GET_CODE (el) == CONST_DOUBLE)
7146 elpart = CONST_DOUBLE_LOW (el);
7147 parts = 2;
7149 else
7150 gcc_unreachable ();
7152 for (part = 0; part < parts; part++)
7154 unsigned int byte;
7155 for (byte = 0; byte < innersize; byte++)
7157 bytes[idx++] = (elpart & 0xff) ^ invmask;
7158 elpart >>= BITS_PER_UNIT;
7160 if (GET_CODE (el) == CONST_DOUBLE)
7161 elpart = CONST_DOUBLE_HIGH (el);
7165 /* Sanity check. */
7166 gcc_assert (idx == GET_MODE_SIZE (mode));
7170 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7171 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7173 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7174 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7176 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7177 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7179 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7180 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7182 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7184 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7186 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7187 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7189 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7190 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7192 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7193 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7195 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7196 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7198 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7200 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7202 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7203 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7205 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7206 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7208 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7209 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7211 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7212 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7214 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7216 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7217 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7219 while (0);
7221 if (immtype == -1)
7222 return false;
7224 if (info)
7226 info->element_width = elsize;
7227 info->mvn = emvn != 0;
7228 info->shift = eshift;
7230 unsigned HOST_WIDE_INT imm = 0;
7232 if (immtype >= 12 && immtype <= 15)
7233 info->msl = true;
7235 /* Un-invert bytes of recognized vector, if necessary. */
7236 if (invmask != 0)
7237 for (i = 0; i < idx; i++)
7238 bytes[i] ^= invmask;
7240 if (immtype == 17)
7242 /* FIXME: Broken on 32-bit H_W_I hosts. */
7243 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7245 for (i = 0; i < 8; i++)
7246 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7247 << (i * BITS_PER_UNIT);
7250 info->value = GEN_INT (imm);
7252 else
7254 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7255 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7257 /* Construct 'abcdefgh' because the assembler cannot handle
7258 generic constants. */
7259 if (info->mvn)
7260 imm = ~imm;
7261 imm = (imm >> info->shift) & 0xff;
7262 info->value = GEN_INT (imm);
7266 return true;
7267 #undef CHECK
7270 static bool
7271 aarch64_const_vec_all_same_int_p (rtx x,
7272 HOST_WIDE_INT minval,
7273 HOST_WIDE_INT maxval)
7275 HOST_WIDE_INT firstval;
7276 int count, i;
7278 if (GET_CODE (x) != CONST_VECTOR
7279 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7280 return false;
7282 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7283 if (firstval < minval || firstval > maxval)
7284 return false;
7286 count = CONST_VECTOR_NUNITS (x);
7287 for (i = 1; i < count; i++)
7288 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7289 return false;
7291 return true;
7294 /* Check of immediate shift constants are within range. */
7295 bool
7296 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7298 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7299 if (left)
7300 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7301 else
7302 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7305 /* Return true if X is a uniform vector where all elements
7306 are either the floating-point constant 0.0 or the
7307 integer constant 0. */
7308 bool
7309 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7311 return x == CONST0_RTX (mode);
7314 bool
7315 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7317 HOST_WIDE_INT imm = INTVAL (x);
7318 int i;
7320 for (i = 0; i < 8; i++)
7322 unsigned int byte = imm & 0xff;
7323 if (byte != 0xff && byte != 0)
7324 return false;
7325 imm >>= 8;
7328 return true;
7331 bool
7332 aarch64_mov_operand_p (rtx x,
7333 enum aarch64_symbol_context context,
7334 enum machine_mode mode)
7336 if (GET_CODE (x) == HIGH
7337 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7338 return true;
7340 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7341 return true;
7343 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7344 return true;
7346 return aarch64_classify_symbolic_expression (x, context)
7347 == SYMBOL_TINY_ABSOLUTE;
7350 /* Return a const_int vector of VAL. */
7352 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7354 int nunits = GET_MODE_NUNITS (mode);
7355 rtvec v = rtvec_alloc (nunits);
7356 int i;
7358 for (i=0; i < nunits; i++)
7359 RTVEC_ELT (v, i) = GEN_INT (val);
7361 return gen_rtx_CONST_VECTOR (mode, v);
7364 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7366 bool
7367 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7369 enum machine_mode vmode;
7371 gcc_assert (!VECTOR_MODE_P (mode));
7372 vmode = aarch64_preferred_simd_mode (mode);
7373 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7374 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7377 /* Construct and return a PARALLEL RTX vector. */
7379 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7381 int nunits = GET_MODE_NUNITS (mode);
7382 rtvec v = rtvec_alloc (nunits / 2);
7383 int base = high ? nunits / 2 : 0;
7384 rtx t1;
7385 int i;
7387 for (i=0; i < nunits / 2; i++)
7388 RTVEC_ELT (v, i) = GEN_INT (base + i);
7390 t1 = gen_rtx_PARALLEL (mode, v);
7391 return t1;
7394 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7395 HIGH (exclusive). */
7396 void
7397 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7399 HOST_WIDE_INT lane;
7400 gcc_assert (GET_CODE (operand) == CONST_INT);
7401 lane = INTVAL (operand);
7403 if (lane < low || lane >= high)
7404 error ("lane out of range");
7407 void
7408 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7410 gcc_assert (GET_CODE (operand) == CONST_INT);
7411 HOST_WIDE_INT lane = INTVAL (operand);
7413 if (lane < low || lane >= high)
7414 error ("constant out of range");
7417 /* Emit code to reinterpret one AdvSIMD type as another,
7418 without altering bits. */
7419 void
7420 aarch64_simd_reinterpret (rtx dest, rtx src)
7422 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7425 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7426 registers). */
7427 void
7428 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7429 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7430 rtx op1)
7432 rtx mem = gen_rtx_MEM (mode, destaddr);
7433 rtx tmp1 = gen_reg_rtx (mode);
7434 rtx tmp2 = gen_reg_rtx (mode);
7436 emit_insn (intfn (tmp1, op1, tmp2));
7438 emit_move_insn (mem, tmp1);
7439 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7440 emit_move_insn (mem, tmp2);
7443 /* Return TRUE if OP is a valid vector addressing mode. */
7444 bool
7445 aarch64_simd_mem_operand_p (rtx op)
7447 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7448 || GET_CODE (XEXP (op, 0)) == REG);
7451 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7452 not to early-clobber SRC registers in the process.
7454 We assume that the operands described by SRC and DEST represent a
7455 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7456 number of components into which the copy has been decomposed. */
7457 void
7458 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7459 rtx *src, unsigned int count)
7461 unsigned int i;
7463 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7464 || REGNO (operands[0]) < REGNO (operands[1]))
7466 for (i = 0; i < count; i++)
7468 operands[2 * i] = dest[i];
7469 operands[2 * i + 1] = src[i];
7472 else
7474 for (i = 0; i < count; i++)
7476 operands[2 * i] = dest[count - i - 1];
7477 operands[2 * i + 1] = src[count - i - 1];
7482 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7483 one of VSTRUCT modes: OI, CI or XI. */
7485 aarch64_simd_attr_length_move (rtx insn)
7487 enum machine_mode mode;
7489 extract_insn_cached (insn);
7491 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7493 mode = GET_MODE (recog_data.operand[0]);
7494 switch (mode)
7496 case OImode:
7497 return 8;
7498 case CImode:
7499 return 12;
7500 case XImode:
7501 return 16;
7502 default:
7503 gcc_unreachable ();
7506 return 4;
7509 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7510 alignment of a vector to 128 bits. */
7511 static HOST_WIDE_INT
7512 aarch64_simd_vector_alignment (const_tree type)
7514 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7515 return MIN (align, 128);
7518 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7519 static bool
7520 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7522 if (is_packed)
7523 return false;
7525 /* We guarantee alignment for vectors up to 128-bits. */
7526 if (tree_int_cst_compare (TYPE_SIZE (type),
7527 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7528 return false;
7530 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7531 return true;
7534 /* If VALS is a vector constant that can be loaded into a register
7535 using DUP, generate instructions to do so and return an RTX to
7536 assign to the register. Otherwise return NULL_RTX. */
7537 static rtx
7538 aarch64_simd_dup_constant (rtx vals)
7540 enum machine_mode mode = GET_MODE (vals);
7541 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7542 int n_elts = GET_MODE_NUNITS (mode);
7543 bool all_same = true;
7544 rtx x;
7545 int i;
7547 if (GET_CODE (vals) != CONST_VECTOR)
7548 return NULL_RTX;
7550 for (i = 1; i < n_elts; ++i)
7552 x = CONST_VECTOR_ELT (vals, i);
7553 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7554 all_same = false;
7557 if (!all_same)
7558 return NULL_RTX;
7560 /* We can load this constant by using DUP and a constant in a
7561 single ARM register. This will be cheaper than a vector
7562 load. */
7563 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7564 return gen_rtx_VEC_DUPLICATE (mode, x);
7568 /* Generate code to load VALS, which is a PARALLEL containing only
7569 constants (for vec_init) or CONST_VECTOR, efficiently into a
7570 register. Returns an RTX to copy into the register, or NULL_RTX
7571 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7572 static rtx
7573 aarch64_simd_make_constant (rtx vals)
7575 enum machine_mode mode = GET_MODE (vals);
7576 rtx const_dup;
7577 rtx const_vec = NULL_RTX;
7578 int n_elts = GET_MODE_NUNITS (mode);
7579 int n_const = 0;
7580 int i;
7582 if (GET_CODE (vals) == CONST_VECTOR)
7583 const_vec = vals;
7584 else if (GET_CODE (vals) == PARALLEL)
7586 /* A CONST_VECTOR must contain only CONST_INTs and
7587 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7588 Only store valid constants in a CONST_VECTOR. */
7589 for (i = 0; i < n_elts; ++i)
7591 rtx x = XVECEXP (vals, 0, i);
7592 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7593 n_const++;
7595 if (n_const == n_elts)
7596 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7598 else
7599 gcc_unreachable ();
7601 if (const_vec != NULL_RTX
7602 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7603 /* Load using MOVI/MVNI. */
7604 return const_vec;
7605 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7606 /* Loaded using DUP. */
7607 return const_dup;
7608 else if (const_vec != NULL_RTX)
7609 /* Load from constant pool. We can not take advantage of single-cycle
7610 LD1 because we need a PC-relative addressing mode. */
7611 return const_vec;
7612 else
7613 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7614 We can not construct an initializer. */
7615 return NULL_RTX;
7618 void
7619 aarch64_expand_vector_init (rtx target, rtx vals)
7621 enum machine_mode mode = GET_MODE (target);
7622 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7623 int n_elts = GET_MODE_NUNITS (mode);
7624 int n_var = 0, one_var = -1;
7625 bool all_same = true;
7626 rtx x, mem;
7627 int i;
7629 x = XVECEXP (vals, 0, 0);
7630 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7631 n_var = 1, one_var = 0;
7633 for (i = 1; i < n_elts; ++i)
7635 x = XVECEXP (vals, 0, i);
7636 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7637 ++n_var, one_var = i;
7639 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7640 all_same = false;
7643 if (n_var == 0)
7645 rtx constant = aarch64_simd_make_constant (vals);
7646 if (constant != NULL_RTX)
7648 emit_move_insn (target, constant);
7649 return;
7653 /* Splat a single non-constant element if we can. */
7654 if (all_same)
7656 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7657 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7658 return;
7661 /* One field is non-constant. Load constant then overwrite varying
7662 field. This is more efficient than using the stack. */
7663 if (n_var == 1)
7665 rtx copy = copy_rtx (vals);
7666 rtx index = GEN_INT (one_var);
7667 enum insn_code icode;
7669 /* Load constant part of vector, substitute neighboring value for
7670 varying element. */
7671 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7672 aarch64_expand_vector_init (target, copy);
7674 /* Insert variable. */
7675 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7676 icode = optab_handler (vec_set_optab, mode);
7677 gcc_assert (icode != CODE_FOR_nothing);
7678 emit_insn (GEN_FCN (icode) (target, x, index));
7679 return;
7682 /* Construct the vector in memory one field at a time
7683 and load the whole vector. */
7684 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7685 for (i = 0; i < n_elts; i++)
7686 emit_move_insn (adjust_address_nv (mem, inner_mode,
7687 i * GET_MODE_SIZE (inner_mode)),
7688 XVECEXP (vals, 0, i));
7689 emit_move_insn (target, mem);
7693 static unsigned HOST_WIDE_INT
7694 aarch64_shift_truncation_mask (enum machine_mode mode)
7696 return
7697 (aarch64_vector_mode_supported_p (mode)
7698 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7701 #ifndef TLS_SECTION_ASM_FLAG
7702 #define TLS_SECTION_ASM_FLAG 'T'
7703 #endif
7705 void
7706 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7707 tree decl ATTRIBUTE_UNUSED)
7709 char flagchars[10], *f = flagchars;
7711 /* If we have already declared this section, we can use an
7712 abbreviated form to switch back to it -- unless this section is
7713 part of a COMDAT groups, in which case GAS requires the full
7714 declaration every time. */
7715 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7716 && (flags & SECTION_DECLARED))
7718 fprintf (asm_out_file, "\t.section\t%s\n", name);
7719 return;
7722 if (!(flags & SECTION_DEBUG))
7723 *f++ = 'a';
7724 if (flags & SECTION_WRITE)
7725 *f++ = 'w';
7726 if (flags & SECTION_CODE)
7727 *f++ = 'x';
7728 if (flags & SECTION_SMALL)
7729 *f++ = 's';
7730 if (flags & SECTION_MERGE)
7731 *f++ = 'M';
7732 if (flags & SECTION_STRINGS)
7733 *f++ = 'S';
7734 if (flags & SECTION_TLS)
7735 *f++ = TLS_SECTION_ASM_FLAG;
7736 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7737 *f++ = 'G';
7738 *f = '\0';
7740 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7742 if (!(flags & SECTION_NOTYPE))
7744 const char *type;
7745 const char *format;
7747 if (flags & SECTION_BSS)
7748 type = "nobits";
7749 else
7750 type = "progbits";
7752 #ifdef TYPE_OPERAND_FMT
7753 format = "," TYPE_OPERAND_FMT;
7754 #else
7755 format = ",@%s";
7756 #endif
7758 fprintf (asm_out_file, format, type);
7760 if (flags & SECTION_ENTSIZE)
7761 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7762 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7764 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7765 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7766 else
7767 fprintf (asm_out_file, ",%s,comdat",
7768 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7772 putc ('\n', asm_out_file);
7775 /* Select a format to encode pointers in exception handling data. */
7777 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7779 int type;
7780 switch (aarch64_cmodel)
7782 case AARCH64_CMODEL_TINY:
7783 case AARCH64_CMODEL_TINY_PIC:
7784 case AARCH64_CMODEL_SMALL:
7785 case AARCH64_CMODEL_SMALL_PIC:
7786 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7787 for everything. */
7788 type = DW_EH_PE_sdata4;
7789 break;
7790 default:
7791 /* No assumptions here. 8-byte relocs required. */
7792 type = DW_EH_PE_sdata8;
7793 break;
7795 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7798 /* Emit load exclusive. */
7800 static void
7801 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7802 rtx mem, rtx model_rtx)
7804 rtx (*gen) (rtx, rtx, rtx);
7806 switch (mode)
7808 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7809 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7810 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7811 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7812 default:
7813 gcc_unreachable ();
7816 emit_insn (gen (rval, mem, model_rtx));
7819 /* Emit store exclusive. */
7821 static void
7822 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7823 rtx rval, rtx mem, rtx model_rtx)
7825 rtx (*gen) (rtx, rtx, rtx, rtx);
7827 switch (mode)
7829 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7830 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7831 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7832 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7833 default:
7834 gcc_unreachable ();
7837 emit_insn (gen (bval, rval, mem, model_rtx));
7840 /* Mark the previous jump instruction as unlikely. */
7842 static void
7843 aarch64_emit_unlikely_jump (rtx insn)
7845 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7847 insn = emit_jump_insn (insn);
7848 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7851 /* Expand a compare and swap pattern. */
7853 void
7854 aarch64_expand_compare_and_swap (rtx operands[])
7856 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7857 enum machine_mode mode, cmp_mode;
7858 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7860 bval = operands[0];
7861 rval = operands[1];
7862 mem = operands[2];
7863 oldval = operands[3];
7864 newval = operands[4];
7865 is_weak = operands[5];
7866 mod_s = operands[6];
7867 mod_f = operands[7];
7868 mode = GET_MODE (mem);
7869 cmp_mode = mode;
7871 /* Normally the succ memory model must be stronger than fail, but in the
7872 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7873 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7875 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7876 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7877 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7879 switch (mode)
7881 case QImode:
7882 case HImode:
7883 /* For short modes, we're going to perform the comparison in SImode,
7884 so do the zero-extension now. */
7885 cmp_mode = SImode;
7886 rval = gen_reg_rtx (SImode);
7887 oldval = convert_modes (SImode, mode, oldval, true);
7888 /* Fall through. */
7890 case SImode:
7891 case DImode:
7892 /* Force the value into a register if needed. */
7893 if (!aarch64_plus_operand (oldval, mode))
7894 oldval = force_reg (cmp_mode, oldval);
7895 break;
7897 default:
7898 gcc_unreachable ();
7901 switch (mode)
7903 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7904 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7905 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7906 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7907 default:
7908 gcc_unreachable ();
7911 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7913 if (mode == QImode || mode == HImode)
7914 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7916 x = gen_rtx_REG (CCmode, CC_REGNUM);
7917 x = gen_rtx_EQ (SImode, x, const0_rtx);
7918 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7921 /* Split a compare and swap pattern. */
7923 void
7924 aarch64_split_compare_and_swap (rtx operands[])
7926 rtx rval, mem, oldval, newval, scratch;
7927 enum machine_mode mode;
7928 bool is_weak;
7929 rtx label1, label2, x, cond;
7931 rval = operands[0];
7932 mem = operands[1];
7933 oldval = operands[2];
7934 newval = operands[3];
7935 is_weak = (operands[4] != const0_rtx);
7936 scratch = operands[7];
7937 mode = GET_MODE (mem);
7939 label1 = NULL_RTX;
7940 if (!is_weak)
7942 label1 = gen_label_rtx ();
7943 emit_label (label1);
7945 label2 = gen_label_rtx ();
7947 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7949 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7950 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7951 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7952 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7953 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7955 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7957 if (!is_weak)
7959 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7960 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7961 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7962 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7964 else
7966 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7967 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7968 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7971 emit_label (label2);
7974 /* Split an atomic operation. */
7976 void
7977 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7978 rtx value, rtx model_rtx, rtx cond)
7980 enum machine_mode mode = GET_MODE (mem);
7981 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7982 rtx label, x;
7984 label = gen_label_rtx ();
7985 emit_label (label);
7987 if (new_out)
7988 new_out = gen_lowpart (wmode, new_out);
7989 if (old_out)
7990 old_out = gen_lowpart (wmode, old_out);
7991 else
7992 old_out = new_out;
7993 value = simplify_gen_subreg (wmode, value, mode, 0);
7995 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7997 switch (code)
7999 case SET:
8000 new_out = value;
8001 break;
8003 case NOT:
8004 x = gen_rtx_AND (wmode, old_out, value);
8005 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8006 x = gen_rtx_NOT (wmode, new_out);
8007 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8008 break;
8010 case MINUS:
8011 if (CONST_INT_P (value))
8013 value = GEN_INT (-INTVAL (value));
8014 code = PLUS;
8016 /* Fall through. */
8018 default:
8019 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8020 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8021 break;
8024 aarch64_emit_store_exclusive (mode, cond, mem,
8025 gen_lowpart (mode, new_out), model_rtx);
8027 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8028 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8029 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8030 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8033 static void
8034 aarch64_print_extension (void)
8036 const struct aarch64_option_extension *opt = NULL;
8038 for (opt = all_extensions; opt->name != NULL; opt++)
8039 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8040 asm_fprintf (asm_out_file, "+%s", opt->name);
8042 asm_fprintf (asm_out_file, "\n");
8045 static void
8046 aarch64_start_file (void)
8048 if (selected_arch)
8050 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8051 aarch64_print_extension ();
8053 else if (selected_cpu)
8055 const char *truncated_name
8056 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8057 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8058 aarch64_print_extension ();
8060 default_file_start();
8063 /* Target hook for c_mode_for_suffix. */
8064 static enum machine_mode
8065 aarch64_c_mode_for_suffix (char suffix)
8067 if (suffix == 'q')
8068 return TFmode;
8070 return VOIDmode;
8073 /* We can only represent floating point constants which will fit in
8074 "quarter-precision" values. These values are characterised by
8075 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8078 (-1)^s * (n/16) * 2^r
8080 Where:
8081 's' is the sign bit.
8082 'n' is an integer in the range 16 <= n <= 31.
8083 'r' is an integer in the range -3 <= r <= 4. */
8085 /* Return true iff X can be represented by a quarter-precision
8086 floating point immediate operand X. Note, we cannot represent 0.0. */
8087 bool
8088 aarch64_float_const_representable_p (rtx x)
8090 /* This represents our current view of how many bits
8091 make up the mantissa. */
8092 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8093 int exponent;
8094 unsigned HOST_WIDE_INT mantissa, mask;
8095 REAL_VALUE_TYPE r, m;
8096 bool fail;
8098 if (!CONST_DOUBLE_P (x))
8099 return false;
8101 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8103 /* We cannot represent infinities, NaNs or +/-zero. We won't
8104 know if we have +zero until we analyse the mantissa, but we
8105 can reject the other invalid values. */
8106 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8107 || REAL_VALUE_MINUS_ZERO (r))
8108 return false;
8110 /* Extract exponent. */
8111 r = real_value_abs (&r);
8112 exponent = REAL_EXP (&r);
8114 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8115 highest (sign) bit, with a fixed binary point at bit point_pos.
8116 m1 holds the low part of the mantissa, m2 the high part.
8117 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8118 bits for the mantissa, this can fail (low bits will be lost). */
8119 real_ldexp (&m, &r, point_pos - exponent);
8120 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
8122 /* If the low part of the mantissa has bits set we cannot represent
8123 the value. */
8124 if (w.elt (0) != 0)
8125 return false;
8126 /* We have rejected the lower HOST_WIDE_INT, so update our
8127 understanding of how many bits lie in the mantissa and
8128 look only at the high HOST_WIDE_INT. */
8129 mantissa = w.elt (1);
8130 point_pos -= HOST_BITS_PER_WIDE_INT;
8132 /* We can only represent values with a mantissa of the form 1.xxxx. */
8133 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8134 if ((mantissa & mask) != 0)
8135 return false;
8137 /* Having filtered unrepresentable values, we may now remove all
8138 but the highest 5 bits. */
8139 mantissa >>= point_pos - 5;
8141 /* We cannot represent the value 0.0, so reject it. This is handled
8142 elsewhere. */
8143 if (mantissa == 0)
8144 return false;
8146 /* Then, as bit 4 is always set, we can mask it off, leaving
8147 the mantissa in the range [0, 15]. */
8148 mantissa &= ~(1 << 4);
8149 gcc_assert (mantissa <= 15);
8151 /* GCC internally does not use IEEE754-like encoding (where normalized
8152 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8153 Our mantissa values are shifted 4 places to the left relative to
8154 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8155 by 5 places to correct for GCC's representation. */
8156 exponent = 5 - exponent;
8158 return (exponent >= 0 && exponent <= 7);
8161 char*
8162 aarch64_output_simd_mov_immediate (rtx const_vector,
8163 enum machine_mode mode,
8164 unsigned width)
8166 bool is_valid;
8167 static char templ[40];
8168 const char *mnemonic;
8169 const char *shift_op;
8170 unsigned int lane_count = 0;
8171 char element_char;
8173 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8175 /* This will return true to show const_vector is legal for use as either
8176 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8177 also update INFO to show how the immediate should be generated. */
8178 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8179 gcc_assert (is_valid);
8181 element_char = sizetochar (info.element_width);
8182 lane_count = width / info.element_width;
8184 mode = GET_MODE_INNER (mode);
8185 if (mode == SFmode || mode == DFmode)
8187 gcc_assert (info.shift == 0 && ! info.mvn);
8188 if (aarch64_float_const_zero_rtx_p (info.value))
8189 info.value = GEN_INT (0);
8190 else
8192 #define buf_size 20
8193 REAL_VALUE_TYPE r;
8194 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8195 char float_buf[buf_size] = {'\0'};
8196 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8197 #undef buf_size
8199 if (lane_count == 1)
8200 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8201 else
8202 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8203 lane_count, element_char, float_buf);
8204 return templ;
8208 mnemonic = info.mvn ? "mvni" : "movi";
8209 shift_op = info.msl ? "msl" : "lsl";
8211 if (lane_count == 1)
8212 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8213 mnemonic, UINTVAL (info.value));
8214 else if (info.shift)
8215 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8216 ", %s %d", mnemonic, lane_count, element_char,
8217 UINTVAL (info.value), shift_op, info.shift);
8218 else
8219 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8220 mnemonic, lane_count, element_char, UINTVAL (info.value));
8221 return templ;
8224 char*
8225 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8226 enum machine_mode mode)
8228 enum machine_mode vmode;
8230 gcc_assert (!VECTOR_MODE_P (mode));
8231 vmode = aarch64_simd_container_mode (mode, 64);
8232 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8233 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8236 /* Split operands into moves from op[1] + op[2] into op[0]. */
8238 void
8239 aarch64_split_combinev16qi (rtx operands[3])
8241 unsigned int dest = REGNO (operands[0]);
8242 unsigned int src1 = REGNO (operands[1]);
8243 unsigned int src2 = REGNO (operands[2]);
8244 enum machine_mode halfmode = GET_MODE (operands[1]);
8245 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8246 rtx destlo, desthi;
8248 gcc_assert (halfmode == V16QImode);
8250 if (src1 == dest && src2 == dest + halfregs)
8252 /* No-op move. Can't split to nothing; emit something. */
8253 emit_note (NOTE_INSN_DELETED);
8254 return;
8257 /* Preserve register attributes for variable tracking. */
8258 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8259 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8260 GET_MODE_SIZE (halfmode));
8262 /* Special case of reversed high/low parts. */
8263 if (reg_overlap_mentioned_p (operands[2], destlo)
8264 && reg_overlap_mentioned_p (operands[1], desthi))
8266 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8267 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8268 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8270 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8272 /* Try to avoid unnecessary moves if part of the result
8273 is in the right place already. */
8274 if (src1 != dest)
8275 emit_move_insn (destlo, operands[1]);
8276 if (src2 != dest + halfregs)
8277 emit_move_insn (desthi, operands[2]);
8279 else
8281 if (src2 != dest + halfregs)
8282 emit_move_insn (desthi, operands[2]);
8283 if (src1 != dest)
8284 emit_move_insn (destlo, operands[1]);
8288 /* vec_perm support. */
8290 #define MAX_VECT_LEN 16
8292 struct expand_vec_perm_d
8294 rtx target, op0, op1;
8295 unsigned char perm[MAX_VECT_LEN];
8296 enum machine_mode vmode;
8297 unsigned char nelt;
8298 bool one_vector_p;
8299 bool testing_p;
8302 /* Generate a variable permutation. */
8304 static void
8305 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8307 enum machine_mode vmode = GET_MODE (target);
8308 bool one_vector_p = rtx_equal_p (op0, op1);
8310 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8311 gcc_checking_assert (GET_MODE (op0) == vmode);
8312 gcc_checking_assert (GET_MODE (op1) == vmode);
8313 gcc_checking_assert (GET_MODE (sel) == vmode);
8314 gcc_checking_assert (TARGET_SIMD);
8316 if (one_vector_p)
8318 if (vmode == V8QImode)
8320 /* Expand the argument to a V16QI mode by duplicating it. */
8321 rtx pair = gen_reg_rtx (V16QImode);
8322 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8323 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8325 else
8327 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8330 else
8332 rtx pair;
8334 if (vmode == V8QImode)
8336 pair = gen_reg_rtx (V16QImode);
8337 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8338 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8340 else
8342 pair = gen_reg_rtx (OImode);
8343 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8344 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8349 void
8350 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8352 enum machine_mode vmode = GET_MODE (target);
8353 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8354 bool one_vector_p = rtx_equal_p (op0, op1);
8355 rtx rmask[MAX_VECT_LEN], mask;
8357 gcc_checking_assert (!BYTES_BIG_ENDIAN);
8359 /* The TBL instruction does not use a modulo index, so we must take care
8360 of that ourselves. */
8361 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8362 for (i = 0; i < nelt; ++i)
8363 rmask[i] = mask;
8364 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8365 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8367 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8370 /* Recognize patterns suitable for the TRN instructions. */
8371 static bool
8372 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8374 unsigned int i, odd, mask, nelt = d->nelt;
8375 rtx out, in0, in1, x;
8376 rtx (*gen) (rtx, rtx, rtx);
8377 enum machine_mode vmode = d->vmode;
8379 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8380 return false;
8382 /* Note that these are little-endian tests.
8383 We correct for big-endian later. */
8384 if (d->perm[0] == 0)
8385 odd = 0;
8386 else if (d->perm[0] == 1)
8387 odd = 1;
8388 else
8389 return false;
8390 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8392 for (i = 0; i < nelt; i += 2)
8394 if (d->perm[i] != i + odd)
8395 return false;
8396 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8397 return false;
8400 /* Success! */
8401 if (d->testing_p)
8402 return true;
8404 in0 = d->op0;
8405 in1 = d->op1;
8406 if (BYTES_BIG_ENDIAN)
8408 x = in0, in0 = in1, in1 = x;
8409 odd = !odd;
8411 out = d->target;
8413 if (odd)
8415 switch (vmode)
8417 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8418 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8419 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8420 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8421 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8422 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8423 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8424 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8425 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8426 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8427 default:
8428 return false;
8431 else
8433 switch (vmode)
8435 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8436 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8437 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8438 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8439 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8440 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8441 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8442 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8443 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8444 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8445 default:
8446 return false;
8450 emit_insn (gen (out, in0, in1));
8451 return true;
8454 /* Recognize patterns suitable for the UZP instructions. */
8455 static bool
8456 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8458 unsigned int i, odd, mask, nelt = d->nelt;
8459 rtx out, in0, in1, x;
8460 rtx (*gen) (rtx, rtx, rtx);
8461 enum machine_mode vmode = d->vmode;
8463 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8464 return false;
8466 /* Note that these are little-endian tests.
8467 We correct for big-endian later. */
8468 if (d->perm[0] == 0)
8469 odd = 0;
8470 else if (d->perm[0] == 1)
8471 odd = 1;
8472 else
8473 return false;
8474 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8476 for (i = 0; i < nelt; i++)
8478 unsigned elt = (i * 2 + odd) & mask;
8479 if (d->perm[i] != elt)
8480 return false;
8483 /* Success! */
8484 if (d->testing_p)
8485 return true;
8487 in0 = d->op0;
8488 in1 = d->op1;
8489 if (BYTES_BIG_ENDIAN)
8491 x = in0, in0 = in1, in1 = x;
8492 odd = !odd;
8494 out = d->target;
8496 if (odd)
8498 switch (vmode)
8500 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8501 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8502 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8503 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8504 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8505 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8506 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8507 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8508 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8509 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8510 default:
8511 return false;
8514 else
8516 switch (vmode)
8518 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8519 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8520 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8521 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8522 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8523 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8524 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8525 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8526 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8527 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8528 default:
8529 return false;
8533 emit_insn (gen (out, in0, in1));
8534 return true;
8537 /* Recognize patterns suitable for the ZIP instructions. */
8538 static bool
8539 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8541 unsigned int i, high, mask, nelt = d->nelt;
8542 rtx out, in0, in1, x;
8543 rtx (*gen) (rtx, rtx, rtx);
8544 enum machine_mode vmode = d->vmode;
8546 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8547 return false;
8549 /* Note that these are little-endian tests.
8550 We correct for big-endian later. */
8551 high = nelt / 2;
8552 if (d->perm[0] == high)
8553 /* Do Nothing. */
8555 else if (d->perm[0] == 0)
8556 high = 0;
8557 else
8558 return false;
8559 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8561 for (i = 0; i < nelt / 2; i++)
8563 unsigned elt = (i + high) & mask;
8564 if (d->perm[i * 2] != elt)
8565 return false;
8566 elt = (elt + nelt) & mask;
8567 if (d->perm[i * 2 + 1] != elt)
8568 return false;
8571 /* Success! */
8572 if (d->testing_p)
8573 return true;
8575 in0 = d->op0;
8576 in1 = d->op1;
8577 if (BYTES_BIG_ENDIAN)
8579 x = in0, in0 = in1, in1 = x;
8580 high = !high;
8582 out = d->target;
8584 if (high)
8586 switch (vmode)
8588 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8589 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8590 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8591 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8592 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8593 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8594 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8595 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8596 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8597 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8598 default:
8599 return false;
8602 else
8604 switch (vmode)
8606 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8607 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8608 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8609 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8610 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8611 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8612 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8613 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8614 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8615 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8616 default:
8617 return false;
8621 emit_insn (gen (out, in0, in1));
8622 return true;
8625 static bool
8626 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8628 rtx (*gen) (rtx, rtx, rtx);
8629 rtx out = d->target;
8630 rtx in0;
8631 enum machine_mode vmode = d->vmode;
8632 unsigned int i, elt, nelt = d->nelt;
8633 rtx lane;
8635 /* TODO: This may not be big-endian safe. */
8636 if (BYTES_BIG_ENDIAN)
8637 return false;
8639 elt = d->perm[0];
8640 for (i = 1; i < nelt; i++)
8642 if (elt != d->perm[i])
8643 return false;
8646 /* The generic preparation in aarch64_expand_vec_perm_const_1
8647 swaps the operand order and the permute indices if it finds
8648 d->perm[0] to be in the second operand. Thus, we can always
8649 use d->op0 and need not do any extra arithmetic to get the
8650 correct lane number. */
8651 in0 = d->op0;
8652 lane = GEN_INT (elt);
8654 switch (vmode)
8656 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8657 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8658 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8659 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8660 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8661 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8662 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8663 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8664 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8665 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8666 default:
8667 return false;
8670 emit_insn (gen (out, in0, lane));
8671 return true;
8674 static bool
8675 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8677 rtx rperm[MAX_VECT_LEN], sel;
8678 enum machine_mode vmode = d->vmode;
8679 unsigned int i, nelt = d->nelt;
8681 if (d->testing_p)
8682 return true;
8684 /* Generic code will try constant permutation twice. Once with the
8685 original mode and again with the elements lowered to QImode.
8686 So wait and don't do the selector expansion ourselves. */
8687 if (vmode != V8QImode && vmode != V16QImode)
8688 return false;
8690 for (i = 0; i < nelt; ++i)
8692 int nunits = GET_MODE_NUNITS (vmode);
8694 /* If big-endian and two vectors we end up with a weird mixed-endian
8695 mode on NEON. Reverse the index within each word but not the word
8696 itself. */
8697 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
8698 : d->perm[i]);
8700 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8701 sel = force_reg (vmode, sel);
8703 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8704 return true;
8707 static bool
8708 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8710 /* The pattern matching functions above are written to look for a small
8711 number to begin the sequence (0, 1, N/2). If we begin with an index
8712 from the second operand, we can swap the operands. */
8713 if (d->perm[0] >= d->nelt)
8715 unsigned i, nelt = d->nelt;
8716 rtx x;
8718 gcc_assert (nelt == (nelt & -nelt));
8719 for (i = 0; i < nelt; ++i)
8720 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
8722 x = d->op0;
8723 d->op0 = d->op1;
8724 d->op1 = x;
8727 if (TARGET_SIMD)
8729 if (aarch64_evpc_zip (d))
8730 return true;
8731 else if (aarch64_evpc_uzp (d))
8732 return true;
8733 else if (aarch64_evpc_trn (d))
8734 return true;
8735 else if (aarch64_evpc_dup (d))
8736 return true;
8737 return aarch64_evpc_tbl (d);
8739 return false;
8742 /* Expand a vec_perm_const pattern. */
8744 bool
8745 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8747 struct expand_vec_perm_d d;
8748 int i, nelt, which;
8750 d.target = target;
8751 d.op0 = op0;
8752 d.op1 = op1;
8754 d.vmode = GET_MODE (target);
8755 gcc_assert (VECTOR_MODE_P (d.vmode));
8756 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8757 d.testing_p = false;
8759 for (i = which = 0; i < nelt; ++i)
8761 rtx e = XVECEXP (sel, 0, i);
8762 int ei = INTVAL (e) & (2 * nelt - 1);
8763 which |= (ei < nelt ? 1 : 2);
8764 d.perm[i] = ei;
8767 switch (which)
8769 default:
8770 gcc_unreachable ();
8772 case 3:
8773 d.one_vector_p = false;
8774 if (!rtx_equal_p (op0, op1))
8775 break;
8777 /* The elements of PERM do not suggest that only the first operand
8778 is used, but both operands are identical. Allow easier matching
8779 of the permutation by folding the permutation into the single
8780 input vector. */
8781 /* Fall Through. */
8782 case 2:
8783 for (i = 0; i < nelt; ++i)
8784 d.perm[i] &= nelt - 1;
8785 d.op0 = op1;
8786 d.one_vector_p = true;
8787 break;
8789 case 1:
8790 d.op1 = op0;
8791 d.one_vector_p = true;
8792 break;
8795 return aarch64_expand_vec_perm_const_1 (&d);
8798 static bool
8799 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8800 const unsigned char *sel)
8802 struct expand_vec_perm_d d;
8803 unsigned int i, nelt, which;
8804 bool ret;
8806 d.vmode = vmode;
8807 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8808 d.testing_p = true;
8809 memcpy (d.perm, sel, nelt);
8811 /* Calculate whether all elements are in one vector. */
8812 for (i = which = 0; i < nelt; ++i)
8814 unsigned char e = d.perm[i];
8815 gcc_assert (e < 2 * nelt);
8816 which |= (e < nelt ? 1 : 2);
8819 /* If all elements are from the second vector, reindex as if from the
8820 first vector. */
8821 if (which == 2)
8822 for (i = 0; i < nelt; ++i)
8823 d.perm[i] -= nelt;
8825 /* Check whether the mask can be applied to a single vector. */
8826 d.one_vector_p = (which != 3);
8828 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8829 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8830 if (!d.one_vector_p)
8831 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8833 start_sequence ();
8834 ret = aarch64_expand_vec_perm_const_1 (&d);
8835 end_sequence ();
8837 return ret;
8840 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8841 bool
8842 aarch64_cannot_change_mode_class (enum machine_mode from,
8843 enum machine_mode to,
8844 enum reg_class rclass)
8846 /* Full-reg subregs are allowed on general regs or any class if they are
8847 the same size. */
8848 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8849 || !reg_classes_intersect_p (FP_REGS, rclass))
8850 return false;
8852 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8853 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8854 2. Scalar to Scalar for integer modes or same size float modes.
8855 3. Vector to Vector modes.
8856 4. On little-endian only, Vector-Structure to Vector modes. */
8857 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8859 if (aarch64_vector_mode_supported_p (from)
8860 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8861 return false;
8863 if (GET_MODE_NUNITS (from) == 1
8864 && GET_MODE_NUNITS (to) == 1
8865 && (GET_MODE_CLASS (from) == MODE_INT
8866 || from == to))
8867 return false;
8869 if (aarch64_vector_mode_supported_p (from)
8870 && aarch64_vector_mode_supported_p (to))
8871 return false;
8873 /* Within an vector structure straddling multiple vector registers
8874 we are in a mixed-endian representation. As such, we can't
8875 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
8876 switch between vectors and vector structures cheaply. */
8877 if (!BYTES_BIG_ENDIAN)
8878 if ((aarch64_vector_mode_supported_p (from)
8879 && aarch64_vect_struct_mode_p (to))
8880 || (aarch64_vector_mode_supported_p (to)
8881 && aarch64_vect_struct_mode_p (from)))
8882 return false;
8885 return true;
8888 /* Implement MODES_TIEABLE_P. */
8890 bool
8891 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
8893 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
8894 return true;
8896 /* We specifically want to allow elements of "structure" modes to
8897 be tieable to the structure. This more general condition allows
8898 other rarer situations too. */
8899 if (TARGET_SIMD
8900 && aarch64_vector_mode_p (mode1)
8901 && aarch64_vector_mode_p (mode2))
8902 return true;
8904 return false;
8907 #undef TARGET_ADDRESS_COST
8908 #define TARGET_ADDRESS_COST aarch64_address_cost
8910 /* This hook will determines whether unnamed bitfields affect the alignment
8911 of the containing structure. The hook returns true if the structure
8912 should inherit the alignment requirements of an unnamed bitfield's
8913 type. */
8914 #undef TARGET_ALIGN_ANON_BITFIELD
8915 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8917 #undef TARGET_ASM_ALIGNED_DI_OP
8918 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8920 #undef TARGET_ASM_ALIGNED_HI_OP
8921 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8923 #undef TARGET_ASM_ALIGNED_SI_OP
8924 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8926 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8927 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8928 hook_bool_const_tree_hwi_hwi_const_tree_true
8930 #undef TARGET_ASM_FILE_START
8931 #define TARGET_ASM_FILE_START aarch64_start_file
8933 #undef TARGET_ASM_OUTPUT_MI_THUNK
8934 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8936 #undef TARGET_ASM_SELECT_RTX_SECTION
8937 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8939 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8940 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8942 #undef TARGET_BUILD_BUILTIN_VA_LIST
8943 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8945 #undef TARGET_CALLEE_COPIES
8946 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8948 #undef TARGET_CAN_ELIMINATE
8949 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8951 #undef TARGET_CANNOT_FORCE_CONST_MEM
8952 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8954 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8955 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8957 /* Only the least significant bit is used for initialization guard
8958 variables. */
8959 #undef TARGET_CXX_GUARD_MASK_BIT
8960 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8962 #undef TARGET_C_MODE_FOR_SUFFIX
8963 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8965 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8966 #undef TARGET_DEFAULT_TARGET_FLAGS
8967 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8968 #endif
8970 #undef TARGET_CLASS_MAX_NREGS
8971 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8973 #undef TARGET_BUILTIN_DECL
8974 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8976 #undef TARGET_EXPAND_BUILTIN
8977 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8979 #undef TARGET_EXPAND_BUILTIN_VA_START
8980 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8982 #undef TARGET_FOLD_BUILTIN
8983 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8985 #undef TARGET_FUNCTION_ARG
8986 #define TARGET_FUNCTION_ARG aarch64_function_arg
8988 #undef TARGET_FUNCTION_ARG_ADVANCE
8989 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8991 #undef TARGET_FUNCTION_ARG_BOUNDARY
8992 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8994 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8995 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8997 #undef TARGET_FUNCTION_VALUE
8998 #define TARGET_FUNCTION_VALUE aarch64_function_value
9000 #undef TARGET_FUNCTION_VALUE_REGNO_P
9001 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9003 #undef TARGET_FRAME_POINTER_REQUIRED
9004 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9006 #undef TARGET_GIMPLE_FOLD_BUILTIN
9007 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9009 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9010 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9012 #undef TARGET_INIT_BUILTINS
9013 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9015 #undef TARGET_LEGITIMATE_ADDRESS_P
9016 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9018 #undef TARGET_LEGITIMATE_CONSTANT_P
9019 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9021 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9022 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9024 #undef TARGET_LRA_P
9025 #define TARGET_LRA_P aarch64_lra_p
9027 #undef TARGET_MANGLE_TYPE
9028 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9030 #undef TARGET_MEMORY_MOVE_COST
9031 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9033 #undef TARGET_MUST_PASS_IN_STACK
9034 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9036 /* This target hook should return true if accesses to volatile bitfields
9037 should use the narrowest mode possible. It should return false if these
9038 accesses should use the bitfield container type. */
9039 #undef TARGET_NARROW_VOLATILE_BITFIELD
9040 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9042 #undef TARGET_OPTION_OVERRIDE
9043 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9045 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9046 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9047 aarch64_override_options_after_change
9049 #undef TARGET_PASS_BY_REFERENCE
9050 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9052 #undef TARGET_PREFERRED_RELOAD_CLASS
9053 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9055 #undef TARGET_SECONDARY_RELOAD
9056 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9058 #undef TARGET_SHIFT_TRUNCATION_MASK
9059 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9061 #undef TARGET_SETUP_INCOMING_VARARGS
9062 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9064 #undef TARGET_STRUCT_VALUE_RTX
9065 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9067 #undef TARGET_REGISTER_MOVE_COST
9068 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9070 #undef TARGET_RETURN_IN_MEMORY
9071 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9073 #undef TARGET_RETURN_IN_MSB
9074 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9076 #undef TARGET_RTX_COSTS
9077 #define TARGET_RTX_COSTS aarch64_rtx_costs
9079 #undef TARGET_SCHED_ISSUE_RATE
9080 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9082 #undef TARGET_TRAMPOLINE_INIT
9083 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9085 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9086 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9088 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9089 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9091 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9092 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9094 #undef TARGET_VECTORIZE_ADD_STMT_COST
9095 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9097 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9098 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9099 aarch64_builtin_vectorization_cost
9101 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9102 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9104 #undef TARGET_VECTORIZE_BUILTINS
9105 #define TARGET_VECTORIZE_BUILTINS
9107 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9108 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9109 aarch64_builtin_vectorized_function
9111 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9112 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9113 aarch64_autovectorize_vector_sizes
9115 /* Section anchor support. */
9117 #undef TARGET_MIN_ANCHOR_OFFSET
9118 #define TARGET_MIN_ANCHOR_OFFSET -256
9120 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9121 byte offset; we can do much more for larger data types, but have no way
9122 to determine the size of the access. We assume accesses are aligned. */
9123 #undef TARGET_MAX_ANCHOR_OFFSET
9124 #define TARGET_MAX_ANCHOR_OFFSET 4095
9126 #undef TARGET_VECTOR_ALIGNMENT
9127 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9129 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9130 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9131 aarch64_simd_vector_alignment_reachable
9133 /* vec_perm support. */
9135 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9136 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9137 aarch64_vectorize_vec_perm_const_ok
9140 #undef TARGET_FIXED_CONDITION_CODE_REGS
9141 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9143 #undef TARGET_FLAGS_REGNUM
9144 #define TARGET_FLAGS_REGNUM CC_REGNUM
9146 struct gcc_target targetm = TARGET_INITIALIZER;
9148 #include "gt-aarch64.h"