Merge from branches/gcc-4_8-branch up to rev 207411.
[official-gcc.git] / gcc-4_8-branch / gcc / config / aarch64 / aarch64.c
blob0e0203f8250e07cacc7f6cb5fa553d56c16eb830
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
48 #include "cfgloop.h"
49 #include "tree-vectorizer.h"
51 /* Classifies an address.
53 ADDRESS_REG_IMM
54 A simple base register plus immediate offset.
56 ADDRESS_REG_WB
57 A base register indexed by immediate offset with writeback.
59 ADDRESS_REG_REG
60 A base register indexed by (optionally scaled) register.
62 ADDRESS_REG_UXTW
63 A base register indexed by (optionally scaled) zero-extended register.
65 ADDRESS_REG_SXTW
66 A base register indexed by (optionally scaled) sign-extended register.
68 ADDRESS_LO_SUM
69 A LO_SUM rtx with a base register and "LO12" symbol relocation.
71 ADDRESS_SYMBOLIC:
72 A constant symbolic address, in pc-relative literal pool. */
74 enum aarch64_address_type {
75 ADDRESS_REG_IMM,
76 ADDRESS_REG_WB,
77 ADDRESS_REG_REG,
78 ADDRESS_REG_UXTW,
79 ADDRESS_REG_SXTW,
80 ADDRESS_LO_SUM,
81 ADDRESS_SYMBOLIC
84 struct aarch64_address_info {
85 enum aarch64_address_type type;
86 rtx base;
87 rtx offset;
88 int shift;
89 enum aarch64_symbol_type symbol_type;
92 struct simd_immediate_info
94 rtx value;
95 int shift;
96 int element_width;
97 bool mvn;
98 bool msl;
101 /* The current code model. */
102 enum aarch64_code_model aarch64_cmodel;
104 #ifdef HAVE_AS_TLS
105 #undef TARGET_HAVE_TLS
106 #define TARGET_HAVE_TLS 1
107 #endif
109 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
110 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
111 const_tree,
112 enum machine_mode *, int *,
113 bool *);
114 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
115 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
116 static void aarch64_override_options_after_change (void);
117 static bool aarch64_vector_mode_supported_p (enum machine_mode);
118 static unsigned bit_count (unsigned HOST_WIDE_INT);
119 static bool aarch64_const_vec_all_same_int_p (rtx,
120 HOST_WIDE_INT, HOST_WIDE_INT);
122 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
123 const unsigned char *sel);
125 /* The processor for which instructions should be scheduled. */
126 enum aarch64_processor aarch64_tune = generic;
128 /* The current tuning set. */
129 const struct tune_params *aarch64_tune_params;
131 /* Mask to specify which instructions we are allowed to generate. */
132 unsigned long aarch64_isa_flags = 0;
134 /* Mask to specify which instruction scheduling options should be used. */
135 unsigned long aarch64_tune_flags = 0;
137 /* Tuning parameters. */
139 #if HAVE_DESIGNATED_INITIALIZERS
140 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
141 #else
142 #define NAMED_PARAM(NAME, VAL) (VAL)
143 #endif
145 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
146 __extension__
147 #endif
148 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
150 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
151 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
152 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
153 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
154 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
155 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
156 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
157 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
158 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
159 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
160 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
161 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
164 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
165 __extension__
166 #endif
167 static const struct cpu_addrcost_table generic_addrcost_table =
169 NAMED_PARAM (pre_modify, 0),
170 NAMED_PARAM (post_modify, 0),
171 NAMED_PARAM (register_offset, 0),
172 NAMED_PARAM (register_extend, 0),
173 NAMED_PARAM (imm_offset, 0)
176 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
177 __extension__
178 #endif
179 static const struct cpu_regmove_cost generic_regmove_cost =
181 NAMED_PARAM (GP2GP, 1),
182 NAMED_PARAM (GP2FP, 2),
183 NAMED_PARAM (FP2GP, 2),
184 /* We currently do not provide direct support for TFmode Q->Q move.
185 Therefore we need to raise the cost above 2 in order to have
186 reload handle the situation. */
187 NAMED_PARAM (FP2FP, 4)
190 /* Generic costs for vector insn classes. */
191 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
192 __extension__
193 #endif
194 static const struct cpu_vector_cost generic_vector_cost =
196 NAMED_PARAM (scalar_stmt_cost, 1),
197 NAMED_PARAM (scalar_load_cost, 1),
198 NAMED_PARAM (scalar_store_cost, 1),
199 NAMED_PARAM (vec_stmt_cost, 1),
200 NAMED_PARAM (vec_to_scalar_cost, 1),
201 NAMED_PARAM (scalar_to_vec_cost, 1),
202 NAMED_PARAM (vec_align_load_cost, 1),
203 NAMED_PARAM (vec_unalign_load_cost, 1),
204 NAMED_PARAM (vec_unalign_store_cost, 1),
205 NAMED_PARAM (vec_store_cost, 1),
206 NAMED_PARAM (cond_taken_branch_cost, 3),
207 NAMED_PARAM (cond_not_taken_branch_cost, 1)
210 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
211 __extension__
212 #endif
213 static const struct tune_params generic_tunings =
215 &generic_rtx_cost_table,
216 &generic_addrcost_table,
217 &generic_regmove_cost,
218 &generic_vector_cost,
219 NAMED_PARAM (memmov_cost, 4)
222 /* A processor implementing AArch64. */
223 struct processor
225 const char *const name;
226 enum aarch64_processor core;
227 const char *arch;
228 const unsigned long flags;
229 const struct tune_params *const tune;
232 /* Processor cores implementing AArch64. */
233 static const struct processor all_cores[] =
235 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
236 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
237 #include "aarch64-cores.def"
238 #undef AARCH64_CORE
239 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
240 {NULL, aarch64_none, NULL, 0, NULL}
243 /* Architectures implementing AArch64. */
244 static const struct processor all_architectures[] =
246 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
247 {NAME, CORE, #ARCH, FLAGS, NULL},
248 #include "aarch64-arches.def"
249 #undef AARCH64_ARCH
250 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
251 {NULL, aarch64_none, NULL, 0, NULL}
254 /* Target specification. These are populated as commandline arguments
255 are processed, or NULL if not specified. */
256 static const struct processor *selected_arch;
257 static const struct processor *selected_cpu;
258 static const struct processor *selected_tune;
260 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
262 /* An ISA extension in the co-processor and main instruction set space. */
263 struct aarch64_option_extension
265 const char *const name;
266 const unsigned long flags_on;
267 const unsigned long flags_off;
270 /* ISA extensions in AArch64. */
271 static const struct aarch64_option_extension all_extensions[] =
273 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
274 {NAME, FLAGS_ON, FLAGS_OFF},
275 #include "aarch64-option-extensions.def"
276 #undef AARCH64_OPT_EXTENSION
277 {NULL, 0, 0}
280 /* Used to track the size of an address when generating a pre/post
281 increment address. */
282 static enum machine_mode aarch64_memory_reference_mode;
284 /* Used to force GTY into this file. */
285 static GTY(()) int gty_dummy;
287 /* A table of valid AArch64 "bitmask immediate" values for
288 logical instructions. */
290 #define AARCH64_NUM_BITMASKS 5334
291 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
293 /* Did we set flag_omit_frame_pointer just so
294 aarch64_frame_pointer_required would be called? */
295 static bool faked_omit_frame_pointer;
297 typedef enum aarch64_cond_code
299 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
300 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
301 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
303 aarch64_cc;
305 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
307 /* The condition codes of the processor, and the inverse function. */
308 static const char * const aarch64_condition_codes[] =
310 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
311 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
314 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
315 unsigned
316 aarch64_dbx_register_number (unsigned regno)
318 if (GP_REGNUM_P (regno))
319 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
320 else if (regno == SP_REGNUM)
321 return AARCH64_DWARF_SP;
322 else if (FP_REGNUM_P (regno))
323 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
325 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
326 equivalent DWARF register. */
327 return DWARF_FRAME_REGISTERS;
330 /* Return TRUE if MODE is any of the large INT modes. */
331 static bool
332 aarch64_vect_struct_mode_p (enum machine_mode mode)
334 return mode == OImode || mode == CImode || mode == XImode;
337 /* Return TRUE if MODE is any of the vector modes. */
338 static bool
339 aarch64_vector_mode_p (enum machine_mode mode)
341 return aarch64_vector_mode_supported_p (mode)
342 || aarch64_vect_struct_mode_p (mode);
345 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
346 static bool
347 aarch64_array_mode_supported_p (enum machine_mode mode,
348 unsigned HOST_WIDE_INT nelems)
350 if (TARGET_SIMD
351 && AARCH64_VALID_SIMD_QREG_MODE (mode)
352 && (nelems >= 2 && nelems <= 4))
353 return true;
355 return false;
358 /* Implement HARD_REGNO_NREGS. */
361 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
363 switch (aarch64_regno_regclass (regno))
365 case FP_REGS:
366 case FP_LO_REGS:
367 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
368 default:
369 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
371 gcc_unreachable ();
374 /* Implement HARD_REGNO_MODE_OK. */
377 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
379 if (GET_MODE_CLASS (mode) == MODE_CC)
380 return regno == CC_REGNUM;
382 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
383 || regno == ARG_POINTER_REGNUM)
384 return mode == Pmode;
386 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
387 return 1;
389 if (FP_REGNUM_P (regno))
391 if (aarch64_vect_struct_mode_p (mode))
392 return
393 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
394 else
395 return 1;
398 return 0;
401 /* Return true if calls to DECL should be treated as
402 long-calls (ie called via a register). */
403 static bool
404 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
406 return false;
409 /* Return true if calls to symbol-ref SYM should be treated as
410 long-calls (ie called via a register). */
411 bool
412 aarch64_is_long_call_p (rtx sym)
414 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
417 /* Return true if the offsets to a zero/sign-extract operation
418 represent an expression that matches an extend operation. The
419 operands represent the paramters from
421 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
422 bool
423 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
424 rtx extract_imm)
426 HOST_WIDE_INT mult_val, extract_val;
428 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
429 return false;
431 mult_val = INTVAL (mult_imm);
432 extract_val = INTVAL (extract_imm);
434 if (extract_val > 8
435 && extract_val < GET_MODE_BITSIZE (mode)
436 && exact_log2 (extract_val & ~7) > 0
437 && (extract_val & 7) <= 4
438 && mult_val == (1 << (extract_val & 7)))
439 return true;
441 return false;
444 /* Emit an insn that's a simple single-set. Both the operands must be
445 known to be valid. */
446 inline static rtx
447 emit_set_insn (rtx x, rtx y)
449 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
452 /* X and Y are two things to compare using CODE. Emit the compare insn and
453 return the rtx for register 0 in the proper mode. */
455 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
457 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
458 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
460 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
461 return cc_reg;
464 /* Build the SYMBOL_REF for __tls_get_addr. */
466 static GTY(()) rtx tls_get_addr_libfunc;
469 aarch64_tls_get_addr (void)
471 if (!tls_get_addr_libfunc)
472 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
473 return tls_get_addr_libfunc;
476 /* Return the TLS model to use for ADDR. */
478 static enum tls_model
479 tls_symbolic_operand_type (rtx addr)
481 enum tls_model tls_kind = TLS_MODEL_NONE;
482 rtx sym, addend;
484 if (GET_CODE (addr) == CONST)
486 split_const (addr, &sym, &addend);
487 if (GET_CODE (sym) == SYMBOL_REF)
488 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
490 else if (GET_CODE (addr) == SYMBOL_REF)
491 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
493 return tls_kind;
496 /* We'll allow lo_sum's in addresses in our legitimate addresses
497 so that combine would take care of combining addresses where
498 necessary, but for generation purposes, we'll generate the address
499 as :
500 RTL Absolute
501 tmp = hi (symbol_ref); adrp x1, foo
502 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
505 PIC TLS
506 adrp x1, :got:foo adrp tmp, :tlsgd:foo
507 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
508 bl __tls_get_addr
511 Load TLS symbol, depending on TLS mechanism and TLS access model.
513 Global Dynamic - Traditional TLS:
514 adrp tmp, :tlsgd:imm
515 add dest, tmp, #:tlsgd_lo12:imm
516 bl __tls_get_addr
518 Global Dynamic - TLS Descriptors:
519 adrp dest, :tlsdesc:imm
520 ldr tmp, [dest, #:tlsdesc_lo12:imm]
521 add dest, dest, #:tlsdesc_lo12:imm
522 blr tmp
523 mrs tp, tpidr_el0
524 add dest, dest, tp
526 Initial Exec:
527 mrs tp, tpidr_el0
528 adrp tmp, :gottprel:imm
529 ldr dest, [tmp, #:gottprel_lo12:imm]
530 add dest, dest, tp
532 Local Exec:
533 mrs tp, tpidr_el0
534 add t0, tp, #:tprel_hi12:imm
535 add t0, #:tprel_lo12_nc:imm
538 static void
539 aarch64_load_symref_appropriately (rtx dest, rtx imm,
540 enum aarch64_symbol_type type)
542 switch (type)
544 case SYMBOL_SMALL_ABSOLUTE:
546 rtx tmp_reg = dest;
547 if (can_create_pseudo_p ())
549 tmp_reg = gen_reg_rtx (Pmode);
552 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
553 emit_insn (gen_add_losym (dest, tmp_reg, imm));
554 return;
557 case SYMBOL_TINY_ABSOLUTE:
558 emit_insn (gen_rtx_SET (Pmode, dest, imm));
559 return;
561 case SYMBOL_SMALL_GOT:
563 rtx tmp_reg = dest;
564 if (can_create_pseudo_p ())
565 tmp_reg = gen_reg_rtx (Pmode);
566 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
567 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
568 return;
571 case SYMBOL_SMALL_TLSGD:
573 rtx insns;
574 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
576 start_sequence ();
577 emit_call_insn (gen_tlsgd_small (result, imm));
578 insns = get_insns ();
579 end_sequence ();
581 RTL_CONST_CALL_P (insns) = 1;
582 emit_libcall_block (insns, dest, result, imm);
583 return;
586 case SYMBOL_SMALL_TLSDESC:
588 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
589 rtx tp;
591 emit_insn (gen_tlsdesc_small (imm));
592 tp = aarch64_load_tp (NULL);
593 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
594 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
595 return;
598 case SYMBOL_SMALL_GOTTPREL:
600 rtx tmp_reg = gen_reg_rtx (Pmode);
601 rtx tp = aarch64_load_tp (NULL);
602 emit_insn (gen_tlsie_small (tmp_reg, imm));
603 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
604 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
605 return;
608 case SYMBOL_SMALL_TPREL:
610 rtx tp = aarch64_load_tp (NULL);
611 emit_insn (gen_tlsle_small (dest, tp, imm));
612 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
613 return;
616 case SYMBOL_TINY_GOT:
617 emit_insn (gen_ldr_got_tiny (dest, imm));
618 return;
620 default:
621 gcc_unreachable ();
625 /* Emit a move from SRC to DEST. Assume that the move expanders can
626 handle all moves if !can_create_pseudo_p (). The distinction is
627 important because, unlike emit_move_insn, the move expanders know
628 how to force Pmode objects into the constant pool even when the
629 constant pool address is not itself legitimate. */
630 static rtx
631 aarch64_emit_move (rtx dest, rtx src)
633 return (can_create_pseudo_p ()
634 ? emit_move_insn (dest, src)
635 : emit_move_insn_1 (dest, src));
638 void
639 aarch64_split_128bit_move (rtx dst, rtx src)
641 rtx low_dst;
643 enum machine_mode src_mode = GET_MODE (src);
644 enum machine_mode dst_mode = GET_MODE (dst);
645 int src_regno = REGNO (src);
646 int dst_regno = REGNO (dst);
648 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
650 if (REG_P (dst) && REG_P (src))
652 gcc_assert (src_mode == TImode || src_mode == TFmode);
654 /* Handle r -> w, w -> r. */
655 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
657 switch (src_mode) {
658 case TImode:
659 emit_insn
660 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
661 emit_insn
662 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
663 return;
664 case TFmode:
665 emit_insn
666 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
667 emit_insn
668 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
669 return;
670 default:
671 gcc_unreachable ();
674 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
676 switch (src_mode) {
677 case TImode:
678 emit_insn
679 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
680 emit_insn
681 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
682 return;
683 case TFmode:
684 emit_insn
685 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
686 emit_insn
687 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
688 return;
689 default:
690 gcc_unreachable ();
693 /* Fall through to r -> r cases. */
696 switch (dst_mode) {
697 case TImode:
698 low_dst = gen_lowpart (word_mode, dst);
699 if (REG_P (low_dst)
700 && reg_overlap_mentioned_p (low_dst, src))
702 aarch64_emit_move (gen_highpart (word_mode, dst),
703 gen_highpart_mode (word_mode, TImode, src));
704 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
706 else
708 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
709 aarch64_emit_move (gen_highpart (word_mode, dst),
710 gen_highpart_mode (word_mode, TImode, src));
712 return;
713 case TFmode:
714 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
715 gen_rtx_REG (DFmode, src_regno));
716 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
717 gen_rtx_REG (DFmode, src_regno + 1));
718 return;
719 default:
720 gcc_unreachable ();
724 bool
725 aarch64_split_128bit_move_p (rtx dst, rtx src)
727 return (! REG_P (src)
728 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
731 /* Split a complex SIMD combine. */
733 void
734 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
736 enum machine_mode src_mode = GET_MODE (src1);
737 enum machine_mode dst_mode = GET_MODE (dst);
739 gcc_assert (VECTOR_MODE_P (dst_mode));
741 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
743 rtx (*gen) (rtx, rtx, rtx);
745 switch (src_mode)
747 case V8QImode:
748 gen = gen_aarch64_simd_combinev8qi;
749 break;
750 case V4HImode:
751 gen = gen_aarch64_simd_combinev4hi;
752 break;
753 case V2SImode:
754 gen = gen_aarch64_simd_combinev2si;
755 break;
756 case V2SFmode:
757 gen = gen_aarch64_simd_combinev2sf;
758 break;
759 case DImode:
760 gen = gen_aarch64_simd_combinedi;
761 break;
762 case DFmode:
763 gen = gen_aarch64_simd_combinedf;
764 break;
765 default:
766 gcc_unreachable ();
769 emit_insn (gen (dst, src1, src2));
770 return;
774 /* Split a complex SIMD move. */
776 void
777 aarch64_split_simd_move (rtx dst, rtx src)
779 enum machine_mode src_mode = GET_MODE (src);
780 enum machine_mode dst_mode = GET_MODE (dst);
782 gcc_assert (VECTOR_MODE_P (dst_mode));
784 if (REG_P (dst) && REG_P (src))
786 rtx (*gen) (rtx, rtx);
788 gcc_assert (VECTOR_MODE_P (src_mode));
790 switch (src_mode)
792 case V16QImode:
793 gen = gen_aarch64_split_simd_movv16qi;
794 break;
795 case V8HImode:
796 gen = gen_aarch64_split_simd_movv8hi;
797 break;
798 case V4SImode:
799 gen = gen_aarch64_split_simd_movv4si;
800 break;
801 case V2DImode:
802 gen = gen_aarch64_split_simd_movv2di;
803 break;
804 case V4SFmode:
805 gen = gen_aarch64_split_simd_movv4sf;
806 break;
807 case V2DFmode:
808 gen = gen_aarch64_split_simd_movv2df;
809 break;
810 default:
811 gcc_unreachable ();
814 emit_insn (gen (dst, src));
815 return;
819 static rtx
820 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
822 if (can_create_pseudo_p ())
823 return force_reg (mode, value);
824 else
826 x = aarch64_emit_move (x, value);
827 return x;
832 static rtx
833 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
835 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
837 rtx high;
838 /* Load the full offset into a register. This
839 might be improvable in the future. */
840 high = GEN_INT (offset);
841 offset = 0;
842 high = aarch64_force_temporary (mode, temp, high);
843 reg = aarch64_force_temporary (mode, temp,
844 gen_rtx_PLUS (mode, high, reg));
846 return plus_constant (mode, reg, offset);
849 void
850 aarch64_expand_mov_immediate (rtx dest, rtx imm)
852 enum machine_mode mode = GET_MODE (dest);
853 unsigned HOST_WIDE_INT mask;
854 int i;
855 bool first;
856 unsigned HOST_WIDE_INT val;
857 bool subtargets;
858 rtx subtarget;
859 int one_match, zero_match;
861 gcc_assert (mode == SImode || mode == DImode);
863 /* Check on what type of symbol it is. */
864 if (GET_CODE (imm) == SYMBOL_REF
865 || GET_CODE (imm) == LABEL_REF
866 || GET_CODE (imm) == CONST)
868 rtx mem, base, offset;
869 enum aarch64_symbol_type sty;
871 /* If we have (const (plus symbol offset)), separate out the offset
872 before we start classifying the symbol. */
873 split_const (imm, &base, &offset);
875 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
876 switch (sty)
878 case SYMBOL_FORCE_TO_MEM:
879 if (offset != const0_rtx
880 && targetm.cannot_force_const_mem (mode, imm))
882 gcc_assert(can_create_pseudo_p ());
883 base = aarch64_force_temporary (mode, dest, base);
884 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
885 aarch64_emit_move (dest, base);
886 return;
888 mem = force_const_mem (mode, imm);
889 gcc_assert (mem);
890 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
891 return;
893 case SYMBOL_SMALL_TLSGD:
894 case SYMBOL_SMALL_TLSDESC:
895 case SYMBOL_SMALL_GOTTPREL:
896 case SYMBOL_SMALL_GOT:
897 case SYMBOL_TINY_GOT:
898 if (offset != const0_rtx)
900 gcc_assert(can_create_pseudo_p ());
901 base = aarch64_force_temporary (mode, dest, base);
902 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
903 aarch64_emit_move (dest, base);
904 return;
906 /* FALLTHRU */
908 case SYMBOL_SMALL_TPREL:
909 case SYMBOL_SMALL_ABSOLUTE:
910 case SYMBOL_TINY_ABSOLUTE:
911 aarch64_load_symref_appropriately (dest, imm, sty);
912 return;
914 default:
915 gcc_unreachable ();
919 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
921 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
922 return;
925 if (!CONST_INT_P (imm))
927 if (GET_CODE (imm) == HIGH)
928 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
929 else
931 rtx mem = force_const_mem (mode, imm);
932 gcc_assert (mem);
933 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
936 return;
939 if (mode == SImode)
941 /* We know we can't do this in 1 insn, and we must be able to do it
942 in two; so don't mess around looking for sequences that don't buy
943 us anything. */
944 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
945 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
946 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
947 return;
950 /* Remaining cases are all for DImode. */
952 val = INTVAL (imm);
953 subtargets = optimize && can_create_pseudo_p ();
955 one_match = 0;
956 zero_match = 0;
957 mask = 0xffff;
959 for (i = 0; i < 64; i += 16, mask <<= 16)
961 if ((val & mask) == 0)
962 zero_match++;
963 else if ((val & mask) == mask)
964 one_match++;
967 if (one_match == 2)
969 mask = 0xffff;
970 for (i = 0; i < 64; i += 16, mask <<= 16)
972 if ((val & mask) != mask)
974 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
975 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
976 GEN_INT ((val >> i) & 0xffff)));
977 return;
980 gcc_unreachable ();
983 if (zero_match == 2)
984 goto simple_sequence;
986 mask = 0x0ffff0000UL;
987 for (i = 16; i < 64; i += 16, mask <<= 16)
989 HOST_WIDE_INT comp = mask & ~(mask - 1);
991 if (aarch64_uimm12_shift (val - (val & mask)))
993 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
995 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
996 emit_insn (gen_adddi3 (dest, subtarget,
997 GEN_INT (val - (val & mask))));
998 return;
1000 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1002 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1004 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1005 GEN_INT ((val + comp) & mask)));
1006 emit_insn (gen_adddi3 (dest, subtarget,
1007 GEN_INT (val - ((val + comp) & mask))));
1008 return;
1010 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1012 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1014 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1015 GEN_INT ((val - comp) | ~mask)));
1016 emit_insn (gen_adddi3 (dest, subtarget,
1017 GEN_INT (val - ((val - comp) | ~mask))));
1018 return;
1020 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1022 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1024 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1025 GEN_INT (val | ~mask)));
1026 emit_insn (gen_adddi3 (dest, subtarget,
1027 GEN_INT (val - (val | ~mask))));
1028 return;
1032 /* See if we can do it by arithmetically combining two
1033 immediates. */
1034 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1036 int j;
1037 mask = 0xffff;
1039 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1040 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1042 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1043 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1044 GEN_INT (aarch64_bitmasks[i])));
1045 emit_insn (gen_adddi3 (dest, subtarget,
1046 GEN_INT (val - aarch64_bitmasks[i])));
1047 return;
1050 for (j = 0; j < 64; j += 16, mask <<= 16)
1052 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1054 emit_insn (gen_rtx_SET (VOIDmode, dest,
1055 GEN_INT (aarch64_bitmasks[i])));
1056 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1057 GEN_INT ((val >> j) & 0xffff)));
1058 return;
1063 /* See if we can do it by logically combining two immediates. */
1064 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1066 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1068 int j;
1070 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1071 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1073 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1074 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1075 GEN_INT (aarch64_bitmasks[i])));
1076 emit_insn (gen_iordi3 (dest, subtarget,
1077 GEN_INT (aarch64_bitmasks[j])));
1078 return;
1081 else if ((val & aarch64_bitmasks[i]) == val)
1083 int j;
1085 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1086 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1089 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1090 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1091 GEN_INT (aarch64_bitmasks[j])));
1092 emit_insn (gen_anddi3 (dest, subtarget,
1093 GEN_INT (aarch64_bitmasks[i])));
1094 return;
1099 simple_sequence:
1100 first = true;
1101 mask = 0xffff;
1102 for (i = 0; i < 64; i += 16, mask <<= 16)
1104 if ((val & mask) != 0)
1106 if (first)
1108 emit_insn (gen_rtx_SET (VOIDmode, dest,
1109 GEN_INT (val & mask)));
1110 first = false;
1112 else
1113 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1114 GEN_INT ((val >> i) & 0xffff)));
1119 static bool
1120 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1122 /* Indirect calls are not currently supported. */
1123 if (decl == NULL)
1124 return false;
1126 /* Cannot tail-call to long-calls, since these are outside of the
1127 range of a branch instruction (we could handle this if we added
1128 support for indirect tail-calls. */
1129 if (aarch64_decl_is_long_call_p (decl))
1130 return false;
1132 return true;
1135 /* Implement TARGET_PASS_BY_REFERENCE. */
1137 static bool
1138 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1139 enum machine_mode mode,
1140 const_tree type,
1141 bool named ATTRIBUTE_UNUSED)
1143 HOST_WIDE_INT size;
1144 enum machine_mode dummymode;
1145 int nregs;
1147 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1148 size = (mode == BLKmode && type)
1149 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1151 if (type)
1153 /* Arrays always passed by reference. */
1154 if (TREE_CODE (type) == ARRAY_TYPE)
1155 return true;
1156 /* Other aggregates based on their size. */
1157 if (AGGREGATE_TYPE_P (type))
1158 size = int_size_in_bytes (type);
1161 /* Variable sized arguments are always returned by reference. */
1162 if (size < 0)
1163 return true;
1165 /* Can this be a candidate to be passed in fp/simd register(s)? */
1166 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1167 &dummymode, &nregs,
1168 NULL))
1169 return false;
1171 /* Arguments which are variable sized or larger than 2 registers are
1172 passed by reference unless they are a homogenous floating point
1173 aggregate. */
1174 return size > 2 * UNITS_PER_WORD;
1177 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1178 static bool
1179 aarch64_return_in_msb (const_tree valtype)
1181 enum machine_mode dummy_mode;
1182 int dummy_int;
1184 /* Never happens in little-endian mode. */
1185 if (!BYTES_BIG_ENDIAN)
1186 return false;
1188 /* Only composite types smaller than or equal to 16 bytes can
1189 be potentially returned in registers. */
1190 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1191 || int_size_in_bytes (valtype) <= 0
1192 || int_size_in_bytes (valtype) > 16)
1193 return false;
1195 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1196 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1197 is always passed/returned in the least significant bits of fp/simd
1198 register(s). */
1199 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1200 &dummy_mode, &dummy_int, NULL))
1201 return false;
1203 return true;
1206 /* Implement TARGET_FUNCTION_VALUE.
1207 Define how to find the value returned by a function. */
1209 static rtx
1210 aarch64_function_value (const_tree type, const_tree func,
1211 bool outgoing ATTRIBUTE_UNUSED)
1213 enum machine_mode mode;
1214 int unsignedp;
1215 int count;
1216 enum machine_mode ag_mode;
1218 mode = TYPE_MODE (type);
1219 if (INTEGRAL_TYPE_P (type))
1220 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1222 if (aarch64_return_in_msb (type))
1224 HOST_WIDE_INT size = int_size_in_bytes (type);
1226 if (size % UNITS_PER_WORD != 0)
1228 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1229 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1233 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1234 &ag_mode, &count, NULL))
1236 if (!aarch64_composite_type_p (type, mode))
1238 gcc_assert (count == 1 && mode == ag_mode);
1239 return gen_rtx_REG (mode, V0_REGNUM);
1241 else
1243 int i;
1244 rtx par;
1246 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1247 for (i = 0; i < count; i++)
1249 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1250 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1251 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1252 XVECEXP (par, 0, i) = tmp;
1254 return par;
1257 else
1258 return gen_rtx_REG (mode, R0_REGNUM);
1261 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1262 Return true if REGNO is the number of a hard register in which the values
1263 of called function may come back. */
1265 static bool
1266 aarch64_function_value_regno_p (const unsigned int regno)
1268 /* Maximum of 16 bytes can be returned in the general registers. Examples
1269 of 16-byte return values are: 128-bit integers and 16-byte small
1270 structures (excluding homogeneous floating-point aggregates). */
1271 if (regno == R0_REGNUM || regno == R1_REGNUM)
1272 return true;
1274 /* Up to four fp/simd registers can return a function value, e.g. a
1275 homogeneous floating-point aggregate having four members. */
1276 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1277 return !TARGET_GENERAL_REGS_ONLY;
1279 return false;
1282 /* Implement TARGET_RETURN_IN_MEMORY.
1284 If the type T of the result of a function is such that
1285 void func (T arg)
1286 would require that arg be passed as a value in a register (or set of
1287 registers) according to the parameter passing rules, then the result
1288 is returned in the same registers as would be used for such an
1289 argument. */
1291 static bool
1292 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1294 HOST_WIDE_INT size;
1295 enum machine_mode ag_mode;
1296 int count;
1298 if (!AGGREGATE_TYPE_P (type)
1299 && TREE_CODE (type) != COMPLEX_TYPE
1300 && TREE_CODE (type) != VECTOR_TYPE)
1301 /* Simple scalar types always returned in registers. */
1302 return false;
1304 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1305 type,
1306 &ag_mode,
1307 &count,
1308 NULL))
1309 return false;
1311 /* Types larger than 2 registers returned in memory. */
1312 size = int_size_in_bytes (type);
1313 return (size < 0 || size > 2 * UNITS_PER_WORD);
1316 static bool
1317 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1318 const_tree type, int *nregs)
1320 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1321 return aarch64_vfp_is_call_or_return_candidate (mode,
1322 type,
1323 &pcum->aapcs_vfp_rmode,
1324 nregs,
1325 NULL);
1328 /* Given MODE and TYPE of a function argument, return the alignment in
1329 bits. The idea is to suppress any stronger alignment requested by
1330 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1331 This is a helper function for local use only. */
1333 static unsigned int
1334 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1336 unsigned int alignment;
1338 if (type)
1340 if (!integer_zerop (TYPE_SIZE (type)))
1342 if (TYPE_MODE (type) == mode)
1343 alignment = TYPE_ALIGN (type);
1344 else
1345 alignment = GET_MODE_ALIGNMENT (mode);
1347 else
1348 alignment = 0;
1350 else
1351 alignment = GET_MODE_ALIGNMENT (mode);
1353 return alignment;
1356 /* Layout a function argument according to the AAPCS64 rules. The rule
1357 numbers refer to the rule numbers in the AAPCS64. */
1359 static void
1360 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1361 const_tree type,
1362 bool named ATTRIBUTE_UNUSED)
1364 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365 int ncrn, nvrn, nregs;
1366 bool allocate_ncrn, allocate_nvrn;
1368 /* We need to do this once per argument. */
1369 if (pcum->aapcs_arg_processed)
1370 return;
1372 pcum->aapcs_arg_processed = true;
1374 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1375 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1376 mode,
1377 type,
1378 &nregs);
1380 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1381 The following code thus handles passing by SIMD/FP registers first. */
1383 nvrn = pcum->aapcs_nvrn;
1385 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1386 and homogenous short-vector aggregates (HVA). */
1387 if (allocate_nvrn)
1389 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1391 pcum->aapcs_nextnvrn = nvrn + nregs;
1392 if (!aarch64_composite_type_p (type, mode))
1394 gcc_assert (nregs == 1);
1395 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1397 else
1399 rtx par;
1400 int i;
1401 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1402 for (i = 0; i < nregs; i++)
1404 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1405 V0_REGNUM + nvrn + i);
1406 tmp = gen_rtx_EXPR_LIST
1407 (VOIDmode, tmp,
1408 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1409 XVECEXP (par, 0, i) = tmp;
1411 pcum->aapcs_reg = par;
1413 return;
1415 else
1417 /* C.3 NSRN is set to 8. */
1418 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1419 goto on_stack;
1423 ncrn = pcum->aapcs_ncrn;
1424 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1425 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1428 /* C6 - C9. though the sign and zero extension semantics are
1429 handled elsewhere. This is the case where the argument fits
1430 entirely general registers. */
1431 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1433 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1435 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1437 /* C.8 if the argument has an alignment of 16 then the NGRN is
1438 rounded up to the next even number. */
1439 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1441 ++ncrn;
1442 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1444 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1445 A reg is still generated for it, but the caller should be smart
1446 enough not to use it. */
1447 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1449 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1451 else
1453 rtx par;
1454 int i;
1456 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1457 for (i = 0; i < nregs; i++)
1459 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1460 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1461 GEN_INT (i * UNITS_PER_WORD));
1462 XVECEXP (par, 0, i) = tmp;
1464 pcum->aapcs_reg = par;
1467 pcum->aapcs_nextncrn = ncrn + nregs;
1468 return;
1471 /* C.11 */
1472 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1474 /* The argument is passed on stack; record the needed number of words for
1475 this argument (we can re-use NREGS) and align the total size if
1476 necessary. */
1477 on_stack:
1478 pcum->aapcs_stack_words = nregs;
1479 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1480 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1481 16 / UNITS_PER_WORD) + 1;
1482 return;
1485 /* Implement TARGET_FUNCTION_ARG. */
1487 static rtx
1488 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1489 const_tree type, bool named)
1491 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1492 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1494 if (mode == VOIDmode)
1495 return NULL_RTX;
1497 aarch64_layout_arg (pcum_v, mode, type, named);
1498 return pcum->aapcs_reg;
1501 void
1502 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1503 const_tree fntype ATTRIBUTE_UNUSED,
1504 rtx libname ATTRIBUTE_UNUSED,
1505 const_tree fndecl ATTRIBUTE_UNUSED,
1506 unsigned n_named ATTRIBUTE_UNUSED)
1508 pcum->aapcs_ncrn = 0;
1509 pcum->aapcs_nvrn = 0;
1510 pcum->aapcs_nextncrn = 0;
1511 pcum->aapcs_nextnvrn = 0;
1512 pcum->pcs_variant = ARM_PCS_AAPCS64;
1513 pcum->aapcs_reg = NULL_RTX;
1514 pcum->aapcs_arg_processed = false;
1515 pcum->aapcs_stack_words = 0;
1516 pcum->aapcs_stack_size = 0;
1518 return;
1521 static void
1522 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1523 enum machine_mode mode,
1524 const_tree type,
1525 bool named)
1527 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1528 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1530 aarch64_layout_arg (pcum_v, mode, type, named);
1531 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1532 != (pcum->aapcs_stack_words != 0));
1533 pcum->aapcs_arg_processed = false;
1534 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1535 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1536 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1537 pcum->aapcs_stack_words = 0;
1538 pcum->aapcs_reg = NULL_RTX;
1542 bool
1543 aarch64_function_arg_regno_p (unsigned regno)
1545 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1546 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1549 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1550 PARM_BOUNDARY bits of alignment, but will be given anything up
1551 to STACK_BOUNDARY bits if the type requires it. This makes sure
1552 that both before and after the layout of each argument, the Next
1553 Stacked Argument Address (NSAA) will have a minimum alignment of
1554 8 bytes. */
1556 static unsigned int
1557 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1559 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1561 if (alignment < PARM_BOUNDARY)
1562 alignment = PARM_BOUNDARY;
1563 if (alignment > STACK_BOUNDARY)
1564 alignment = STACK_BOUNDARY;
1565 return alignment;
1568 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1570 Return true if an argument passed on the stack should be padded upwards,
1571 i.e. if the least-significant byte of the stack slot has useful data.
1573 Small aggregate types are placed in the lowest memory address.
1575 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1577 bool
1578 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1580 /* On little-endian targets, the least significant byte of every stack
1581 argument is passed at the lowest byte address of the stack slot. */
1582 if (!BYTES_BIG_ENDIAN)
1583 return true;
1585 /* Otherwise, integral types and floating point types are padded downward:
1586 the least significant byte of a stack argument is passed at the highest
1587 byte address of the stack slot. */
1588 if (type
1589 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1590 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1591 return false;
1593 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1594 return true;
1597 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1599 It specifies padding for the last (may also be the only)
1600 element of a block move between registers and memory. If
1601 assuming the block is in the memory, padding upward means that
1602 the last element is padded after its highest significant byte,
1603 while in downward padding, the last element is padded at the
1604 its least significant byte side.
1606 Small aggregates and small complex types are always padded
1607 upwards.
1609 We don't need to worry about homogeneous floating-point or
1610 short-vector aggregates; their move is not affected by the
1611 padding direction determined here. Regardless of endianness,
1612 each element of such an aggregate is put in the least
1613 significant bits of a fp/simd register.
1615 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1616 register has useful data, and return the opposite if the most
1617 significant byte does. */
1619 bool
1620 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1621 bool first ATTRIBUTE_UNUSED)
1624 /* Small composite types are always padded upward. */
1625 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1627 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1628 : GET_MODE_SIZE (mode));
1629 if (size < 2 * UNITS_PER_WORD)
1630 return true;
1633 /* Otherwise, use the default padding. */
1634 return !BYTES_BIG_ENDIAN;
1637 static enum machine_mode
1638 aarch64_libgcc_cmp_return_mode (void)
1640 return SImode;
1643 static bool
1644 aarch64_frame_pointer_required (void)
1646 /* If the function contains dynamic stack allocations, we need to
1647 use the frame pointer to access the static parts of the frame. */
1648 if (cfun->calls_alloca)
1649 return true;
1651 /* We may have turned flag_omit_frame_pointer on in order to have this
1652 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1653 and we'll check it here.
1654 If we really did set flag_omit_frame_pointer normally, then we return false
1655 (no frame pointer required) in all cases. */
1657 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1658 return false;
1659 else if (flag_omit_leaf_frame_pointer)
1660 return !crtl->is_leaf;
1661 return true;
1664 /* Mark the registers that need to be saved by the callee and calculate
1665 the size of the callee-saved registers area and frame record (both FP
1666 and LR may be omitted). */
1667 static void
1668 aarch64_layout_frame (void)
1670 HOST_WIDE_INT offset = 0;
1671 int regno;
1673 if (reload_completed && cfun->machine->frame.laid_out)
1674 return;
1676 cfun->machine->frame.fp_lr_offset = 0;
1678 /* First mark all the registers that really need to be saved... */
1679 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1680 cfun->machine->frame.reg_offset[regno] = -1;
1682 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1683 cfun->machine->frame.reg_offset[regno] = -1;
1685 /* ... that includes the eh data registers (if needed)... */
1686 if (crtl->calls_eh_return)
1687 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1688 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1690 /* ... and any callee saved register that dataflow says is live. */
1691 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1692 if (df_regs_ever_live_p (regno)
1693 && !call_used_regs[regno])
1694 cfun->machine->frame.reg_offset[regno] = 0;
1696 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1697 if (df_regs_ever_live_p (regno)
1698 && !call_used_regs[regno])
1699 cfun->machine->frame.reg_offset[regno] = 0;
1701 if (frame_pointer_needed)
1703 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1704 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1705 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1708 /* Now assign stack slots for them. */
1709 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1710 if (cfun->machine->frame.reg_offset[regno] != -1)
1712 cfun->machine->frame.reg_offset[regno] = offset;
1713 offset += UNITS_PER_WORD;
1716 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1717 if (cfun->machine->frame.reg_offset[regno] != -1)
1719 cfun->machine->frame.reg_offset[regno] = offset;
1720 offset += UNITS_PER_WORD;
1723 if (frame_pointer_needed)
1725 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1726 offset += UNITS_PER_WORD;
1727 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1730 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1732 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1733 offset += UNITS_PER_WORD;
1734 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1737 cfun->machine->frame.padding0 =
1738 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1739 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1741 cfun->machine->frame.saved_regs_size = offset;
1742 cfun->machine->frame.laid_out = true;
1745 /* Make the last instruction frame-related and note that it performs
1746 the operation described by FRAME_PATTERN. */
1748 static void
1749 aarch64_set_frame_expr (rtx frame_pattern)
1751 rtx insn;
1753 insn = get_last_insn ();
1754 RTX_FRAME_RELATED_P (insn) = 1;
1755 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1756 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1757 frame_pattern,
1758 REG_NOTES (insn));
1761 static bool
1762 aarch64_register_saved_on_entry (int regno)
1764 return cfun->machine->frame.reg_offset[regno] != -1;
1768 static void
1769 aarch64_save_or_restore_fprs (int start_offset, int increment,
1770 bool restore, rtx base_rtx)
1773 unsigned regno;
1774 unsigned regno2;
1775 rtx insn;
1776 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1779 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1781 if (aarch64_register_saved_on_entry (regno))
1783 rtx mem;
1784 mem = gen_mem_ref (DFmode,
1785 plus_constant (Pmode,
1786 base_rtx,
1787 start_offset));
1789 for (regno2 = regno + 1;
1790 regno2 <= V31_REGNUM
1791 && !aarch64_register_saved_on_entry (regno2);
1792 regno2++)
1794 /* Empty loop. */
1796 if (regno2 <= V31_REGNUM &&
1797 aarch64_register_saved_on_entry (regno2))
1799 rtx mem2;
1800 /* Next highest register to be saved. */
1801 mem2 = gen_mem_ref (DFmode,
1802 plus_constant
1803 (Pmode,
1804 base_rtx,
1805 start_offset + increment));
1806 if (restore == false)
1808 insn = emit_insn
1809 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1810 mem2, gen_rtx_REG (DFmode, regno2)));
1813 else
1815 insn = emit_insn
1816 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1817 gen_rtx_REG (DFmode, regno2), mem2));
1819 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1820 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1823 /* The first part of a frame-related parallel insn
1824 is always assumed to be relevant to the frame
1825 calculations; subsequent parts, are only
1826 frame-related if explicitly marked. */
1827 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1828 1)) = 1;
1829 regno = regno2;
1830 start_offset += increment * 2;
1832 else
1834 if (restore == false)
1835 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1836 else
1838 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1839 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1841 start_offset += increment;
1843 RTX_FRAME_RELATED_P (insn) = 1;
1850 /* offset from the stack pointer of where the saves and
1851 restore's have to happen. */
1852 static void
1853 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1854 bool restore)
1856 rtx insn;
1857 rtx base_rtx = stack_pointer_rtx;
1858 HOST_WIDE_INT start_offset = offset;
1859 HOST_WIDE_INT increment = UNITS_PER_WORD;
1860 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1861 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1862 unsigned regno;
1863 unsigned regno2;
1865 for (regno = R0_REGNUM; regno <= limit; regno++)
1867 if (aarch64_register_saved_on_entry (regno))
1869 rtx mem;
1870 mem = gen_mem_ref (Pmode,
1871 plus_constant (Pmode,
1872 base_rtx,
1873 start_offset));
1875 for (regno2 = regno + 1;
1876 regno2 <= limit
1877 && !aarch64_register_saved_on_entry (regno2);
1878 regno2++)
1880 /* Empty loop. */
1882 if (regno2 <= limit &&
1883 aarch64_register_saved_on_entry (regno2))
1885 rtx mem2;
1886 /* Next highest register to be saved. */
1887 mem2 = gen_mem_ref (Pmode,
1888 plus_constant
1889 (Pmode,
1890 base_rtx,
1891 start_offset + increment));
1892 if (restore == false)
1894 insn = emit_insn
1895 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1896 mem2, gen_rtx_REG (DImode, regno2)));
1899 else
1901 insn = emit_insn
1902 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1903 gen_rtx_REG (DImode, regno2), mem2));
1905 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1906 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1909 /* The first part of a frame-related parallel insn
1910 is always assumed to be relevant to the frame
1911 calculations; subsequent parts, are only
1912 frame-related if explicitly marked. */
1913 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1914 1)) = 1;
1915 regno = regno2;
1916 start_offset += increment * 2;
1918 else
1920 if (restore == false)
1921 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1922 else
1924 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1925 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1927 start_offset += increment;
1929 RTX_FRAME_RELATED_P (insn) = 1;
1933 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1937 /* AArch64 stack frames generated by this compiler look like:
1939 +-------------------------------+
1941 | incoming stack arguments |
1943 +-------------------------------+ <-- arg_pointer_rtx
1945 | callee-allocated save area |
1946 | for register varargs |
1948 +-------------------------------+
1950 | local variables |
1952 +-------------------------------+ <-- frame_pointer_rtx
1954 | callee-saved registers |
1956 +-------------------------------+
1957 | LR' |
1958 +-------------------------------+
1959 | FP' |
1960 P +-------------------------------+ <-- hard_frame_pointer_rtx
1961 | dynamic allocation |
1962 +-------------------------------+
1964 | outgoing stack arguments |
1966 +-------------------------------+ <-- stack_pointer_rtx
1968 Dynamic stack allocations such as alloca insert data at point P.
1969 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1970 hard_frame_pointer_rtx unchanged. */
1972 /* Generate the prologue instructions for entry into a function.
1973 Establish the stack frame by decreasing the stack pointer with a
1974 properly calculated size and, if necessary, create a frame record
1975 filled with the values of LR and previous frame pointer. The
1976 current FP is also set up if it is in use. */
1978 void
1979 aarch64_expand_prologue (void)
1981 /* sub sp, sp, #<frame_size>
1982 stp {fp, lr}, [sp, #<frame_size> - 16]
1983 add fp, sp, #<frame_size> - hardfp_offset
1984 stp {cs_reg}, [fp, #-16] etc.
1986 sub sp, sp, <final_adjustment_if_any>
1988 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1989 HOST_WIDE_INT frame_size, offset;
1990 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1991 rtx insn;
1993 aarch64_layout_frame ();
1994 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1995 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1996 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1997 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1998 + crtl->outgoing_args_size);
1999 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2000 STACK_BOUNDARY / BITS_PER_UNIT);
2002 if (flag_stack_usage_info)
2003 current_function_static_stack_size = frame_size;
2005 fp_offset = (offset
2006 - original_frame_size
2007 - cfun->machine->frame.saved_regs_size);
2009 /* Store pairs and load pairs have a range only -512 to 504. */
2010 if (offset >= 512)
2012 /* When the frame has a large size, an initial decrease is done on
2013 the stack pointer to jump over the callee-allocated save area for
2014 register varargs, the local variable area and/or the callee-saved
2015 register area. This will allow the pre-index write-back
2016 store pair instructions to be used for setting up the stack frame
2017 efficiently. */
2018 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2019 if (offset >= 512)
2020 offset = cfun->machine->frame.saved_regs_size;
2022 frame_size -= (offset + crtl->outgoing_args_size);
2023 fp_offset = 0;
2025 if (frame_size >= 0x1000000)
2027 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2028 emit_move_insn (op0, GEN_INT (-frame_size));
2029 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2030 aarch64_set_frame_expr (gen_rtx_SET
2031 (Pmode, stack_pointer_rtx,
2032 gen_rtx_PLUS (Pmode,
2033 stack_pointer_rtx,
2034 GEN_INT (-frame_size))));
2036 else if (frame_size > 0)
2038 if ((frame_size & 0xfff) != frame_size)
2040 insn = emit_insn (gen_add2_insn
2041 (stack_pointer_rtx,
2042 GEN_INT (-(frame_size
2043 & ~(HOST_WIDE_INT)0xfff))));
2044 RTX_FRAME_RELATED_P (insn) = 1;
2046 if ((frame_size & 0xfff) != 0)
2048 insn = emit_insn (gen_add2_insn
2049 (stack_pointer_rtx,
2050 GEN_INT (-(frame_size
2051 & (HOST_WIDE_INT)0xfff))));
2052 RTX_FRAME_RELATED_P (insn) = 1;
2056 else
2057 frame_size = -1;
2059 if (offset > 0)
2061 /* Save the frame pointer and lr if the frame pointer is needed
2062 first. Make the frame pointer point to the location of the
2063 old frame pointer on the stack. */
2064 if (frame_pointer_needed)
2066 rtx mem_fp, mem_lr;
2068 if (fp_offset)
2070 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2071 GEN_INT (-offset)));
2072 RTX_FRAME_RELATED_P (insn) = 1;
2073 aarch64_set_frame_expr (gen_rtx_SET
2074 (Pmode, stack_pointer_rtx,
2075 gen_rtx_MINUS (Pmode,
2076 stack_pointer_rtx,
2077 GEN_INT (offset))));
2078 mem_fp = gen_frame_mem (DImode,
2079 plus_constant (Pmode,
2080 stack_pointer_rtx,
2081 fp_offset));
2082 mem_lr = gen_frame_mem (DImode,
2083 plus_constant (Pmode,
2084 stack_pointer_rtx,
2085 fp_offset
2086 + UNITS_PER_WORD));
2087 insn = emit_insn (gen_store_pairdi (mem_fp,
2088 hard_frame_pointer_rtx,
2089 mem_lr,
2090 gen_rtx_REG (DImode,
2091 LR_REGNUM)));
2093 else
2095 insn = emit_insn (gen_storewb_pairdi_di
2096 (stack_pointer_rtx, stack_pointer_rtx,
2097 hard_frame_pointer_rtx,
2098 gen_rtx_REG (DImode, LR_REGNUM),
2099 GEN_INT (-offset),
2100 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2101 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2104 /* The first part of a frame-related parallel insn is always
2105 assumed to be relevant to the frame calculations;
2106 subsequent parts, are only frame-related if explicitly
2107 marked. */
2108 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2109 RTX_FRAME_RELATED_P (insn) = 1;
2111 /* Set up frame pointer to point to the location of the
2112 previous frame pointer on the stack. */
2113 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2114 stack_pointer_rtx,
2115 GEN_INT (fp_offset)));
2116 aarch64_set_frame_expr (gen_rtx_SET
2117 (Pmode, hard_frame_pointer_rtx,
2118 gen_rtx_PLUS (Pmode,
2119 stack_pointer_rtx,
2120 GEN_INT (fp_offset))));
2121 RTX_FRAME_RELATED_P (insn) = 1;
2122 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2123 hard_frame_pointer_rtx));
2125 else
2127 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2128 GEN_INT (-offset)));
2129 RTX_FRAME_RELATED_P (insn) = 1;
2132 aarch64_save_or_restore_callee_save_registers
2133 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2136 /* when offset >= 512,
2137 sub sp, sp, #<outgoing_args_size> */
2138 if (frame_size > -1)
2140 if (crtl->outgoing_args_size > 0)
2142 insn = emit_insn (gen_add2_insn
2143 (stack_pointer_rtx,
2144 GEN_INT (- crtl->outgoing_args_size)));
2145 RTX_FRAME_RELATED_P (insn) = 1;
2150 /* Generate the epilogue instructions for returning from a function. */
2151 void
2152 aarch64_expand_epilogue (bool for_sibcall)
2154 HOST_WIDE_INT original_frame_size, frame_size, offset;
2155 HOST_WIDE_INT fp_offset;
2156 rtx insn;
2157 rtx cfa_reg;
2159 aarch64_layout_frame ();
2160 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2161 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2162 + crtl->outgoing_args_size);
2163 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2164 STACK_BOUNDARY / BITS_PER_UNIT);
2166 fp_offset = (offset
2167 - original_frame_size
2168 - cfun->machine->frame.saved_regs_size);
2170 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2172 /* Store pairs and load pairs have a range only -512 to 504. */
2173 if (offset >= 512)
2175 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2176 if (offset >= 512)
2177 offset = cfun->machine->frame.saved_regs_size;
2179 frame_size -= (offset + crtl->outgoing_args_size);
2180 fp_offset = 0;
2181 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2183 insn = emit_insn (gen_add2_insn
2184 (stack_pointer_rtx,
2185 GEN_INT (crtl->outgoing_args_size)));
2186 RTX_FRAME_RELATED_P (insn) = 1;
2189 else
2190 frame_size = -1;
2192 /* If there were outgoing arguments or we've done dynamic stack
2193 allocation, then restore the stack pointer from the frame
2194 pointer. This is at most one insn and more efficient than using
2195 GCC's internal mechanism. */
2196 if (frame_pointer_needed
2197 && (crtl->outgoing_args_size || cfun->calls_alloca))
2199 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2200 hard_frame_pointer_rtx,
2201 GEN_INT (- fp_offset)));
2202 RTX_FRAME_RELATED_P (insn) = 1;
2203 /* As SP is set to (FP - fp_offset), according to the rules in
2204 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2205 from the value of SP from now on. */
2206 cfa_reg = stack_pointer_rtx;
2209 aarch64_save_or_restore_callee_save_registers
2210 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2212 /* Restore the frame pointer and lr if the frame pointer is needed. */
2213 if (offset > 0)
2215 if (frame_pointer_needed)
2217 rtx mem_fp, mem_lr;
2219 if (fp_offset)
2221 mem_fp = gen_frame_mem (DImode,
2222 plus_constant (Pmode,
2223 stack_pointer_rtx,
2224 fp_offset));
2225 mem_lr = gen_frame_mem (DImode,
2226 plus_constant (Pmode,
2227 stack_pointer_rtx,
2228 fp_offset
2229 + UNITS_PER_WORD));
2230 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2231 mem_fp,
2232 gen_rtx_REG (DImode,
2233 LR_REGNUM),
2234 mem_lr));
2236 else
2238 insn = emit_insn (gen_loadwb_pairdi_di
2239 (stack_pointer_rtx,
2240 stack_pointer_rtx,
2241 hard_frame_pointer_rtx,
2242 gen_rtx_REG (DImode, LR_REGNUM),
2243 GEN_INT (offset),
2244 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2245 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2246 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2247 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2248 plus_constant (Pmode, cfa_reg,
2249 offset))));
2252 /* The first part of a frame-related parallel insn
2253 is always assumed to be relevant to the frame
2254 calculations; subsequent parts, are only
2255 frame-related if explicitly marked. */
2256 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2257 RTX_FRAME_RELATED_P (insn) = 1;
2258 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2259 add_reg_note (insn, REG_CFA_RESTORE,
2260 gen_rtx_REG (DImode, LR_REGNUM));
2262 if (fp_offset)
2264 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2265 GEN_INT (offset)));
2266 RTX_FRAME_RELATED_P (insn) = 1;
2269 else
2271 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2272 GEN_INT (offset)));
2273 RTX_FRAME_RELATED_P (insn) = 1;
2277 /* Stack adjustment for exception handler. */
2278 if (crtl->calls_eh_return)
2280 /* We need to unwind the stack by the offset computed by
2281 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2282 based on SP. Ideally we would update the SP and define the
2283 CFA along the lines of:
2285 SP = SP + EH_RETURN_STACKADJ_RTX
2286 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2288 However the dwarf emitter only understands a constant
2289 register offset.
2291 The solution choosen here is to use the otherwise unused IP0
2292 as a temporary register to hold the current SP value. The
2293 CFA is described using IP0 then SP is modified. */
2295 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2297 insn = emit_move_insn (ip0, stack_pointer_rtx);
2298 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2299 RTX_FRAME_RELATED_P (insn) = 1;
2301 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2303 /* Ensure the assignment to IP0 does not get optimized away. */
2304 emit_use (ip0);
2307 if (frame_size > -1)
2309 if (frame_size >= 0x1000000)
2311 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2312 emit_move_insn (op0, GEN_INT (frame_size));
2313 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2314 aarch64_set_frame_expr (gen_rtx_SET
2315 (Pmode, stack_pointer_rtx,
2316 gen_rtx_PLUS (Pmode,
2317 stack_pointer_rtx,
2318 GEN_INT (frame_size))));
2320 else if (frame_size > 0)
2322 if ((frame_size & 0xfff) != 0)
2324 insn = emit_insn (gen_add2_insn
2325 (stack_pointer_rtx,
2326 GEN_INT ((frame_size
2327 & (HOST_WIDE_INT) 0xfff))));
2328 RTX_FRAME_RELATED_P (insn) = 1;
2330 if ((frame_size & 0xfff) != frame_size)
2332 insn = emit_insn (gen_add2_insn
2333 (stack_pointer_rtx,
2334 GEN_INT ((frame_size
2335 & ~ (HOST_WIDE_INT) 0xfff))));
2336 RTX_FRAME_RELATED_P (insn) = 1;
2340 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2341 gen_rtx_PLUS (Pmode,
2342 stack_pointer_rtx,
2343 GEN_INT (offset))));
2346 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2347 if (!for_sibcall)
2348 emit_jump_insn (ret_rtx);
2351 /* Return the place to copy the exception unwinding return address to.
2352 This will probably be a stack slot, but could (in theory be the
2353 return register). */
2355 aarch64_final_eh_return_addr (void)
2357 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2358 aarch64_layout_frame ();
2359 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2360 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2361 + crtl->outgoing_args_size);
2362 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2363 STACK_BOUNDARY / BITS_PER_UNIT);
2364 fp_offset = offset
2365 - original_frame_size
2366 - cfun->machine->frame.saved_regs_size;
2368 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2369 return gen_rtx_REG (DImode, LR_REGNUM);
2371 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2372 result in a store to save LR introduced by builtin_eh_return () being
2373 incorrectly deleted because the alias is not detected.
2374 So in the calculation of the address to copy the exception unwinding
2375 return address to, we note 2 cases.
2376 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2377 we return a SP-relative location since all the addresses are SP-relative
2378 in this case. This prevents the store from being optimized away.
2379 If the fp_offset is not 0, then the addresses will be FP-relative and
2380 therefore we return a FP-relative location. */
2382 if (frame_pointer_needed)
2384 if (fp_offset)
2385 return gen_frame_mem (DImode,
2386 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2387 else
2388 return gen_frame_mem (DImode,
2389 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2392 /* If FP is not needed, we calculate the location of LR, which would be
2393 at the top of the saved registers block. */
2395 return gen_frame_mem (DImode,
2396 plus_constant (Pmode,
2397 stack_pointer_rtx,
2398 fp_offset
2399 + cfun->machine->frame.saved_regs_size
2400 - 2 * UNITS_PER_WORD));
2403 /* Output code to build up a constant in a register. */
2404 static void
2405 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2407 if (aarch64_bitmask_imm (val, DImode))
2408 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2409 else
2411 int i;
2412 int ncount = 0;
2413 int zcount = 0;
2414 HOST_WIDE_INT valp = val >> 16;
2415 HOST_WIDE_INT valm;
2416 HOST_WIDE_INT tval;
2418 for (i = 16; i < 64; i += 16)
2420 valm = (valp & 0xffff);
2422 if (valm != 0)
2423 ++ zcount;
2425 if (valm != 0xffff)
2426 ++ ncount;
2428 valp >>= 16;
2431 /* zcount contains the number of additional MOVK instructions
2432 required if the constant is built up with an initial MOVZ instruction,
2433 while ncount is the number of MOVK instructions required if starting
2434 with a MOVN instruction. Choose the sequence that yields the fewest
2435 number of instructions, preferring MOVZ instructions when they are both
2436 the same. */
2437 if (ncount < zcount)
2439 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2440 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2441 tval = 0xffff;
2443 else
2445 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2446 GEN_INT (val & 0xffff));
2447 tval = 0;
2450 val >>= 16;
2452 for (i = 16; i < 64; i += 16)
2454 if ((val & 0xffff) != tval)
2455 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2456 GEN_INT (i), GEN_INT (val & 0xffff)));
2457 val >>= 16;
2462 static void
2463 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2465 HOST_WIDE_INT mdelta = delta;
2466 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2467 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2469 if (mdelta < 0)
2470 mdelta = -mdelta;
2472 if (mdelta >= 4096 * 4096)
2474 aarch64_build_constant (scratchreg, delta);
2475 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2477 else if (mdelta > 0)
2479 if (mdelta >= 4096)
2481 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2482 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2483 if (delta < 0)
2484 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2485 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2486 else
2487 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2488 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2490 if (mdelta % 4096 != 0)
2492 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2493 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2494 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2499 /* Output code to add DELTA to the first argument, and then jump
2500 to FUNCTION. Used for C++ multiple inheritance. */
2501 static void
2502 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2503 HOST_WIDE_INT delta,
2504 HOST_WIDE_INT vcall_offset,
2505 tree function)
2507 /* The this pointer is always in x0. Note that this differs from
2508 Arm where the this pointer maybe bumped to r1 if r0 is required
2509 to return a pointer to an aggregate. On AArch64 a result value
2510 pointer will be in x8. */
2511 int this_regno = R0_REGNUM;
2512 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2514 reload_completed = 1;
2515 emit_note (NOTE_INSN_PROLOGUE_END);
2517 if (vcall_offset == 0)
2518 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2519 else
2521 gcc_assert ((vcall_offset & 0x7) == 0);
2523 this_rtx = gen_rtx_REG (Pmode, this_regno);
2524 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2525 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2527 addr = this_rtx;
2528 if (delta != 0)
2530 if (delta >= -256 && delta < 256)
2531 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2532 plus_constant (Pmode, this_rtx, delta));
2533 else
2534 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2537 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2539 if (vcall_offset >= -256 && vcall_offset < 32768)
2540 addr = plus_constant (Pmode, temp0, vcall_offset);
2541 else
2543 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2544 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2547 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2548 emit_insn (gen_add2_insn (this_rtx, temp1));
2551 /* Generate a tail call to the target function. */
2552 if (!TREE_USED (function))
2554 assemble_external (function);
2555 TREE_USED (function) = 1;
2557 funexp = XEXP (DECL_RTL (function), 0);
2558 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2559 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2560 SIBLING_CALL_P (insn) = 1;
2562 insn = get_insns ();
2563 shorten_branches (insn);
2564 final_start_function (insn, file, 1);
2565 final (insn, file, 1);
2566 final_end_function ();
2568 /* Stop pretending to be a post-reload pass. */
2569 reload_completed = 0;
2572 static int
2573 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2575 if (GET_CODE (*x) == SYMBOL_REF)
2576 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2578 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2579 TLS offsets, not real symbol references. */
2580 if (GET_CODE (*x) == UNSPEC
2581 && XINT (*x, 1) == UNSPEC_TLS)
2582 return -1;
2584 return 0;
2587 static bool
2588 aarch64_tls_referenced_p (rtx x)
2590 if (!TARGET_HAVE_TLS)
2591 return false;
2593 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2597 static int
2598 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2600 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2601 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2603 if (*imm1 < *imm2)
2604 return -1;
2605 if (*imm1 > *imm2)
2606 return +1;
2607 return 0;
2611 static void
2612 aarch64_build_bitmask_table (void)
2614 unsigned HOST_WIDE_INT mask, imm;
2615 unsigned int log_e, e, s, r;
2616 unsigned int nimms = 0;
2618 for (log_e = 1; log_e <= 6; log_e++)
2620 e = 1 << log_e;
2621 if (e == 64)
2622 mask = ~(HOST_WIDE_INT) 0;
2623 else
2624 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2625 for (s = 1; s < e; s++)
2627 for (r = 0; r < e; r++)
2629 /* set s consecutive bits to 1 (s < 64) */
2630 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2631 /* rotate right by r */
2632 if (r != 0)
2633 imm = ((imm >> r) | (imm << (e - r))) & mask;
2634 /* replicate the constant depending on SIMD size */
2635 switch (log_e) {
2636 case 1: imm |= (imm << 2);
2637 case 2: imm |= (imm << 4);
2638 case 3: imm |= (imm << 8);
2639 case 4: imm |= (imm << 16);
2640 case 5: imm |= (imm << 32);
2641 case 6:
2642 break;
2643 default:
2644 gcc_unreachable ();
2646 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2647 aarch64_bitmasks[nimms++] = imm;
2652 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2653 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2654 aarch64_bitmasks_cmp);
2658 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2659 a left shift of 0 or 12 bits. */
2660 bool
2661 aarch64_uimm12_shift (HOST_WIDE_INT val)
2663 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2664 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2669 /* Return true if val is an immediate that can be loaded into a
2670 register by a MOVZ instruction. */
2671 static bool
2672 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2674 if (GET_MODE_SIZE (mode) > 4)
2676 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2677 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2678 return 1;
2680 else
2682 /* Ignore sign extension. */
2683 val &= (HOST_WIDE_INT) 0xffffffff;
2685 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2686 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2690 /* Return true if val is a valid bitmask immediate. */
2691 bool
2692 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2694 if (GET_MODE_SIZE (mode) < 8)
2696 /* Replicate bit pattern. */
2697 val &= (HOST_WIDE_INT) 0xffffffff;
2698 val |= val << 32;
2700 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2701 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2705 /* Return true if val is an immediate that can be loaded into a
2706 register in a single instruction. */
2707 bool
2708 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2710 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2711 return 1;
2712 return aarch64_bitmask_imm (val, mode);
2715 static bool
2716 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2718 rtx base, offset;
2720 if (GET_CODE (x) == HIGH)
2721 return true;
2723 split_const (x, &base, &offset);
2724 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2725 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2726 != SYMBOL_FORCE_TO_MEM);
2728 return aarch64_tls_referenced_p (x);
2731 /* Return true if register REGNO is a valid index register.
2732 STRICT_P is true if REG_OK_STRICT is in effect. */
2734 bool
2735 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2737 if (!HARD_REGISTER_NUM_P (regno))
2739 if (!strict_p)
2740 return true;
2742 if (!reg_renumber)
2743 return false;
2745 regno = reg_renumber[regno];
2747 return GP_REGNUM_P (regno);
2750 /* Return true if register REGNO is a valid base register for mode MODE.
2751 STRICT_P is true if REG_OK_STRICT is in effect. */
2753 bool
2754 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2756 if (!HARD_REGISTER_NUM_P (regno))
2758 if (!strict_p)
2759 return true;
2761 if (!reg_renumber)
2762 return false;
2764 regno = reg_renumber[regno];
2767 /* The fake registers will be eliminated to either the stack or
2768 hard frame pointer, both of which are usually valid base registers.
2769 Reload deals with the cases where the eliminated form isn't valid. */
2770 return (GP_REGNUM_P (regno)
2771 || regno == SP_REGNUM
2772 || regno == FRAME_POINTER_REGNUM
2773 || regno == ARG_POINTER_REGNUM);
2776 /* Return true if X is a valid base register for mode MODE.
2777 STRICT_P is true if REG_OK_STRICT is in effect. */
2779 static bool
2780 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2782 if (!strict_p && GET_CODE (x) == SUBREG)
2783 x = SUBREG_REG (x);
2785 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2788 /* Return true if address offset is a valid index. If it is, fill in INFO
2789 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2791 static bool
2792 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2793 enum machine_mode mode, bool strict_p)
2795 enum aarch64_address_type type;
2796 rtx index;
2797 int shift;
2799 /* (reg:P) */
2800 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2801 && GET_MODE (x) == Pmode)
2803 type = ADDRESS_REG_REG;
2804 index = x;
2805 shift = 0;
2807 /* (sign_extend:DI (reg:SI)) */
2808 else if ((GET_CODE (x) == SIGN_EXTEND
2809 || GET_CODE (x) == ZERO_EXTEND)
2810 && GET_MODE (x) == DImode
2811 && GET_MODE (XEXP (x, 0)) == SImode)
2813 type = (GET_CODE (x) == SIGN_EXTEND)
2814 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2815 index = XEXP (x, 0);
2816 shift = 0;
2818 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2819 else if (GET_CODE (x) == MULT
2820 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2821 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2822 && GET_MODE (XEXP (x, 0)) == DImode
2823 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2824 && CONST_INT_P (XEXP (x, 1)))
2826 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2827 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2828 index = XEXP (XEXP (x, 0), 0);
2829 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2831 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2832 else if (GET_CODE (x) == ASHIFT
2833 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2834 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2835 && GET_MODE (XEXP (x, 0)) == DImode
2836 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2837 && CONST_INT_P (XEXP (x, 1)))
2839 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2840 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2841 index = XEXP (XEXP (x, 0), 0);
2842 shift = INTVAL (XEXP (x, 1));
2844 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2845 else if ((GET_CODE (x) == SIGN_EXTRACT
2846 || GET_CODE (x) == ZERO_EXTRACT)
2847 && GET_MODE (x) == DImode
2848 && GET_CODE (XEXP (x, 0)) == MULT
2849 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2850 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2852 type = (GET_CODE (x) == SIGN_EXTRACT)
2853 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2854 index = XEXP (XEXP (x, 0), 0);
2855 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2856 if (INTVAL (XEXP (x, 1)) != 32 + shift
2857 || INTVAL (XEXP (x, 2)) != 0)
2858 shift = -1;
2860 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2861 (const_int 0xffffffff<<shift)) */
2862 else if (GET_CODE (x) == AND
2863 && GET_MODE (x) == DImode
2864 && GET_CODE (XEXP (x, 0)) == MULT
2865 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2866 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2867 && CONST_INT_P (XEXP (x, 1)))
2869 type = ADDRESS_REG_UXTW;
2870 index = XEXP (XEXP (x, 0), 0);
2871 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2872 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2873 shift = -1;
2875 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2876 else if ((GET_CODE (x) == SIGN_EXTRACT
2877 || GET_CODE (x) == ZERO_EXTRACT)
2878 && GET_MODE (x) == DImode
2879 && GET_CODE (XEXP (x, 0)) == ASHIFT
2880 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2881 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2883 type = (GET_CODE (x) == SIGN_EXTRACT)
2884 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2885 index = XEXP (XEXP (x, 0), 0);
2886 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2887 if (INTVAL (XEXP (x, 1)) != 32 + shift
2888 || INTVAL (XEXP (x, 2)) != 0)
2889 shift = -1;
2891 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2892 (const_int 0xffffffff<<shift)) */
2893 else if (GET_CODE (x) == AND
2894 && GET_MODE (x) == DImode
2895 && GET_CODE (XEXP (x, 0)) == ASHIFT
2896 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2897 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2898 && CONST_INT_P (XEXP (x, 1)))
2900 type = ADDRESS_REG_UXTW;
2901 index = XEXP (XEXP (x, 0), 0);
2902 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2903 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2904 shift = -1;
2906 /* (mult:P (reg:P) (const_int scale)) */
2907 else if (GET_CODE (x) == MULT
2908 && GET_MODE (x) == Pmode
2909 && GET_MODE (XEXP (x, 0)) == Pmode
2910 && CONST_INT_P (XEXP (x, 1)))
2912 type = ADDRESS_REG_REG;
2913 index = XEXP (x, 0);
2914 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2916 /* (ashift:P (reg:P) (const_int shift)) */
2917 else if (GET_CODE (x) == ASHIFT
2918 && GET_MODE (x) == Pmode
2919 && GET_MODE (XEXP (x, 0)) == Pmode
2920 && CONST_INT_P (XEXP (x, 1)))
2922 type = ADDRESS_REG_REG;
2923 index = XEXP (x, 0);
2924 shift = INTVAL (XEXP (x, 1));
2926 else
2927 return false;
2929 if (GET_CODE (index) == SUBREG)
2930 index = SUBREG_REG (index);
2932 if ((shift == 0 ||
2933 (shift > 0 && shift <= 3
2934 && (1 << shift) == GET_MODE_SIZE (mode)))
2935 && REG_P (index)
2936 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2938 info->type = type;
2939 info->offset = index;
2940 info->shift = shift;
2941 return true;
2944 return false;
2947 static inline bool
2948 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2950 return (offset >= -64 * GET_MODE_SIZE (mode)
2951 && offset < 64 * GET_MODE_SIZE (mode)
2952 && offset % GET_MODE_SIZE (mode) == 0);
2955 static inline bool
2956 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2957 HOST_WIDE_INT offset)
2959 return offset >= -256 && offset < 256;
2962 static inline bool
2963 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2965 return (offset >= 0
2966 && offset < 4096 * GET_MODE_SIZE (mode)
2967 && offset % GET_MODE_SIZE (mode) == 0);
2970 /* Return true if X is a valid address for machine mode MODE. If it is,
2971 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2972 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2974 static bool
2975 aarch64_classify_address (struct aarch64_address_info *info,
2976 rtx x, enum machine_mode mode,
2977 RTX_CODE outer_code, bool strict_p)
2979 enum rtx_code code = GET_CODE (x);
2980 rtx op0, op1;
2981 bool allow_reg_index_p =
2982 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2984 /* Don't support anything other than POST_INC or REG addressing for
2985 AdvSIMD. */
2986 if (aarch64_vector_mode_p (mode)
2987 && (code != POST_INC && code != REG))
2988 return false;
2990 switch (code)
2992 case REG:
2993 case SUBREG:
2994 info->type = ADDRESS_REG_IMM;
2995 info->base = x;
2996 info->offset = const0_rtx;
2997 return aarch64_base_register_rtx_p (x, strict_p);
2999 case PLUS:
3000 op0 = XEXP (x, 0);
3001 op1 = XEXP (x, 1);
3002 if (GET_MODE_SIZE (mode) != 0
3003 && CONST_INT_P (op1)
3004 && aarch64_base_register_rtx_p (op0, strict_p))
3006 HOST_WIDE_INT offset = INTVAL (op1);
3008 info->type = ADDRESS_REG_IMM;
3009 info->base = op0;
3010 info->offset = op1;
3012 /* TImode and TFmode values are allowed in both pairs of X
3013 registers and individual Q registers. The available
3014 address modes are:
3015 X,X: 7-bit signed scaled offset
3016 Q: 9-bit signed offset
3017 We conservatively require an offset representable in either mode.
3019 if (mode == TImode || mode == TFmode)
3020 return (offset_7bit_signed_scaled_p (mode, offset)
3021 && offset_9bit_signed_unscaled_p (mode, offset));
3023 if (outer_code == PARALLEL)
3024 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3025 && offset_7bit_signed_scaled_p (mode, offset));
3026 else
3027 return (offset_9bit_signed_unscaled_p (mode, offset)
3028 || offset_12bit_unsigned_scaled_p (mode, offset));
3031 if (allow_reg_index_p)
3033 /* Look for base + (scaled/extended) index register. */
3034 if (aarch64_base_register_rtx_p (op0, strict_p)
3035 && aarch64_classify_index (info, op1, mode, strict_p))
3037 info->base = op0;
3038 return true;
3040 if (aarch64_base_register_rtx_p (op1, strict_p)
3041 && aarch64_classify_index (info, op0, mode, strict_p))
3043 info->base = op1;
3044 return true;
3048 return false;
3050 case POST_INC:
3051 case POST_DEC:
3052 case PRE_INC:
3053 case PRE_DEC:
3054 info->type = ADDRESS_REG_WB;
3055 info->base = XEXP (x, 0);
3056 info->offset = NULL_RTX;
3057 return aarch64_base_register_rtx_p (info->base, strict_p);
3059 case POST_MODIFY:
3060 case PRE_MODIFY:
3061 info->type = ADDRESS_REG_WB;
3062 info->base = XEXP (x, 0);
3063 if (GET_CODE (XEXP (x, 1)) == PLUS
3064 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3065 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3066 && aarch64_base_register_rtx_p (info->base, strict_p))
3068 HOST_WIDE_INT offset;
3069 info->offset = XEXP (XEXP (x, 1), 1);
3070 offset = INTVAL (info->offset);
3072 /* TImode and TFmode values are allowed in both pairs of X
3073 registers and individual Q registers. The available
3074 address modes are:
3075 X,X: 7-bit signed scaled offset
3076 Q: 9-bit signed offset
3077 We conservatively require an offset representable in either mode.
3079 if (mode == TImode || mode == TFmode)
3080 return (offset_7bit_signed_scaled_p (mode, offset)
3081 && offset_9bit_signed_unscaled_p (mode, offset));
3083 if (outer_code == PARALLEL)
3084 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3085 && offset_7bit_signed_scaled_p (mode, offset));
3086 else
3087 return offset_9bit_signed_unscaled_p (mode, offset);
3089 return false;
3091 case CONST:
3092 case SYMBOL_REF:
3093 case LABEL_REF:
3094 /* load literal: pc-relative constant pool entry. Only supported
3095 for SI mode or larger. */
3096 info->type = ADDRESS_SYMBOLIC;
3097 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3099 rtx sym, addend;
3101 split_const (x, &sym, &addend);
3102 return (GET_CODE (sym) == LABEL_REF
3103 || (GET_CODE (sym) == SYMBOL_REF
3104 && CONSTANT_POOL_ADDRESS_P (sym)));
3106 return false;
3108 case LO_SUM:
3109 info->type = ADDRESS_LO_SUM;
3110 info->base = XEXP (x, 0);
3111 info->offset = XEXP (x, 1);
3112 if (allow_reg_index_p
3113 && aarch64_base_register_rtx_p (info->base, strict_p))
3115 rtx sym, offs;
3116 split_const (info->offset, &sym, &offs);
3117 if (GET_CODE (sym) == SYMBOL_REF
3118 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3119 == SYMBOL_SMALL_ABSOLUTE))
3121 /* The symbol and offset must be aligned to the access size. */
3122 unsigned int align;
3123 unsigned int ref_size;
3125 if (CONSTANT_POOL_ADDRESS_P (sym))
3126 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3127 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3129 tree exp = SYMBOL_REF_DECL (sym);
3130 align = TYPE_ALIGN (TREE_TYPE (exp));
3131 align = CONSTANT_ALIGNMENT (exp, align);
3133 else if (SYMBOL_REF_DECL (sym))
3134 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3135 else
3136 align = BITS_PER_UNIT;
3138 ref_size = GET_MODE_SIZE (mode);
3139 if (ref_size == 0)
3140 ref_size = GET_MODE_SIZE (DImode);
3142 return ((INTVAL (offs) & (ref_size - 1)) == 0
3143 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3146 return false;
3148 default:
3149 return false;
3153 bool
3154 aarch64_symbolic_address_p (rtx x)
3156 rtx offset;
3158 split_const (x, &x, &offset);
3159 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3162 /* Classify the base of symbolic expression X, given that X appears in
3163 context CONTEXT. */
3165 enum aarch64_symbol_type
3166 aarch64_classify_symbolic_expression (rtx x,
3167 enum aarch64_symbol_context context)
3169 rtx offset;
3171 split_const (x, &x, &offset);
3172 return aarch64_classify_symbol (x, context);
3176 /* Return TRUE if X is a legitimate address for accessing memory in
3177 mode MODE. */
3178 static bool
3179 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3181 struct aarch64_address_info addr;
3183 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3186 /* Return TRUE if X is a legitimate address for accessing memory in
3187 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3188 pair operation. */
3189 bool
3190 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3191 RTX_CODE outer_code, bool strict_p)
3193 struct aarch64_address_info addr;
3195 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3198 /* Return TRUE if rtx X is immediate constant 0.0 */
3199 bool
3200 aarch64_float_const_zero_rtx_p (rtx x)
3202 REAL_VALUE_TYPE r;
3204 if (GET_MODE (x) == VOIDmode)
3205 return false;
3207 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3208 if (REAL_VALUE_MINUS_ZERO (r))
3209 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3210 return REAL_VALUES_EQUAL (r, dconst0);
3213 /* Return the fixed registers used for condition codes. */
3215 static bool
3216 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3218 *p1 = CC_REGNUM;
3219 *p2 = INVALID_REGNUM;
3220 return true;
3223 enum machine_mode
3224 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3226 /* All floating point compares return CCFP if it is an equality
3227 comparison, and CCFPE otherwise. */
3228 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3230 switch (code)
3232 case EQ:
3233 case NE:
3234 case UNORDERED:
3235 case ORDERED:
3236 case UNLT:
3237 case UNLE:
3238 case UNGT:
3239 case UNGE:
3240 case UNEQ:
3241 case LTGT:
3242 return CCFPmode;
3244 case LT:
3245 case LE:
3246 case GT:
3247 case GE:
3248 return CCFPEmode;
3250 default:
3251 gcc_unreachable ();
3255 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3256 && y == const0_rtx
3257 && (code == EQ || code == NE || code == LT || code == GE)
3258 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3259 || GET_CODE (x) == NEG))
3260 return CC_NZmode;
3262 /* A compare with a shifted or negated operand. Because of canonicalization,
3263 the comparison will have to be swapped when we emit the assembly
3264 code. */
3265 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3266 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3267 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3268 || GET_CODE (x) == LSHIFTRT
3269 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3270 || GET_CODE (x) == NEG))
3271 return CC_SWPmode;
3273 /* A compare of a mode narrower than SI mode against zero can be done
3274 by extending the value in the comparison. */
3275 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3276 && y == const0_rtx)
3277 /* Only use sign-extension if we really need it. */
3278 return ((code == GT || code == GE || code == LE || code == LT)
3279 ? CC_SESWPmode : CC_ZESWPmode);
3281 /* For everything else, return CCmode. */
3282 return CCmode;
3285 static unsigned
3286 aarch64_get_condition_code (rtx x)
3288 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3289 enum rtx_code comp_code = GET_CODE (x);
3291 if (GET_MODE_CLASS (mode) != MODE_CC)
3292 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3294 switch (mode)
3296 case CCFPmode:
3297 case CCFPEmode:
3298 switch (comp_code)
3300 case GE: return AARCH64_GE;
3301 case GT: return AARCH64_GT;
3302 case LE: return AARCH64_LS;
3303 case LT: return AARCH64_MI;
3304 case NE: return AARCH64_NE;
3305 case EQ: return AARCH64_EQ;
3306 case ORDERED: return AARCH64_VC;
3307 case UNORDERED: return AARCH64_VS;
3308 case UNLT: return AARCH64_LT;
3309 case UNLE: return AARCH64_LE;
3310 case UNGT: return AARCH64_HI;
3311 case UNGE: return AARCH64_PL;
3312 default: gcc_unreachable ();
3314 break;
3316 case CCmode:
3317 switch (comp_code)
3319 case NE: return AARCH64_NE;
3320 case EQ: return AARCH64_EQ;
3321 case GE: return AARCH64_GE;
3322 case GT: return AARCH64_GT;
3323 case LE: return AARCH64_LE;
3324 case LT: return AARCH64_LT;
3325 case GEU: return AARCH64_CS;
3326 case GTU: return AARCH64_HI;
3327 case LEU: return AARCH64_LS;
3328 case LTU: return AARCH64_CC;
3329 default: gcc_unreachable ();
3331 break;
3333 case CC_SWPmode:
3334 case CC_ZESWPmode:
3335 case CC_SESWPmode:
3336 switch (comp_code)
3338 case NE: return AARCH64_NE;
3339 case EQ: return AARCH64_EQ;
3340 case GE: return AARCH64_LE;
3341 case GT: return AARCH64_LT;
3342 case LE: return AARCH64_GE;
3343 case LT: return AARCH64_GT;
3344 case GEU: return AARCH64_LS;
3345 case GTU: return AARCH64_CC;
3346 case LEU: return AARCH64_CS;
3347 case LTU: return AARCH64_HI;
3348 default: gcc_unreachable ();
3350 break;
3352 case CC_NZmode:
3353 switch (comp_code)
3355 case NE: return AARCH64_NE;
3356 case EQ: return AARCH64_EQ;
3357 case GE: return AARCH64_PL;
3358 case LT: return AARCH64_MI;
3359 default: gcc_unreachable ();
3361 break;
3363 default:
3364 gcc_unreachable ();
3365 break;
3369 static unsigned
3370 bit_count (unsigned HOST_WIDE_INT value)
3372 unsigned count = 0;
3374 while (value)
3376 count++;
3377 value &= value - 1;
3380 return count;
3383 void
3384 aarch64_print_operand (FILE *f, rtx x, char code)
3386 switch (code)
3388 case 'e':
3389 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3391 int n;
3393 if (GET_CODE (x) != CONST_INT
3394 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3396 output_operand_lossage ("invalid operand for '%%%c'", code);
3397 return;
3400 switch (n)
3402 case 3:
3403 fputc ('b', f);
3404 break;
3405 case 4:
3406 fputc ('h', f);
3407 break;
3408 case 5:
3409 fputc ('w', f);
3410 break;
3411 default:
3412 output_operand_lossage ("invalid operand for '%%%c'", code);
3413 return;
3416 break;
3418 case 'p':
3420 int n;
3422 /* Print N such that 2^N == X. */
3423 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3425 output_operand_lossage ("invalid operand for '%%%c'", code);
3426 return;
3429 asm_fprintf (f, "%d", n);
3431 break;
3433 case 'P':
3434 /* Print the number of non-zero bits in X (a const_int). */
3435 if (GET_CODE (x) != CONST_INT)
3437 output_operand_lossage ("invalid operand for '%%%c'", code);
3438 return;
3441 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3442 break;
3444 case 'H':
3445 /* Print the higher numbered register of a pair (TImode) of regs. */
3446 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3448 output_operand_lossage ("invalid operand for '%%%c'", code);
3449 return;
3452 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3453 break;
3455 case 'm':
3456 /* Print a condition (eq, ne, etc). */
3458 /* CONST_TRUE_RTX means always -- that's the default. */
3459 if (x == const_true_rtx)
3460 return;
3462 if (!COMPARISON_P (x))
3464 output_operand_lossage ("invalid operand for '%%%c'", code);
3465 return;
3468 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3469 break;
3471 case 'M':
3472 /* Print the inverse of a condition (eq <-> ne, etc). */
3474 /* CONST_TRUE_RTX means never -- that's the default. */
3475 if (x == const_true_rtx)
3477 fputs ("nv", f);
3478 return;
3481 if (!COMPARISON_P (x))
3483 output_operand_lossage ("invalid operand for '%%%c'", code);
3484 return;
3487 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3488 (aarch64_get_condition_code (x))], f);
3489 break;
3491 case 'b':
3492 case 'h':
3493 case 's':
3494 case 'd':
3495 case 'q':
3496 /* Print a scalar FP/SIMD register name. */
3497 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3499 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3500 return;
3502 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3503 break;
3505 case 'S':
3506 case 'T':
3507 case 'U':
3508 case 'V':
3509 /* Print the first FP/SIMD register name in a list. */
3510 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3512 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3513 return;
3515 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3516 break;
3518 case 'X':
3519 /* Print bottom 16 bits of integer constant in hex. */
3520 if (GET_CODE (x) != CONST_INT)
3522 output_operand_lossage ("invalid operand for '%%%c'", code);
3523 return;
3525 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3526 break;
3528 case 'w':
3529 case 'x':
3530 /* Print a general register name or the zero register (32-bit or
3531 64-bit). */
3532 if (x == const0_rtx
3533 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3535 asm_fprintf (f, "%czr", code);
3536 break;
3539 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3541 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3542 break;
3545 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3547 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3548 break;
3551 /* Fall through */
3553 case 0:
3554 /* Print a normal operand, if it's a general register, then we
3555 assume DImode. */
3556 if (x == NULL)
3558 output_operand_lossage ("missing operand");
3559 return;
3562 switch (GET_CODE (x))
3564 case REG:
3565 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3566 break;
3568 case MEM:
3569 aarch64_memory_reference_mode = GET_MODE (x);
3570 output_address (XEXP (x, 0));
3571 break;
3573 case LABEL_REF:
3574 case SYMBOL_REF:
3575 output_addr_const (asm_out_file, x);
3576 break;
3578 case CONST_INT:
3579 asm_fprintf (f, "%wd", INTVAL (x));
3580 break;
3582 case CONST_VECTOR:
3583 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3585 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3586 HOST_WIDE_INT_MIN,
3587 HOST_WIDE_INT_MAX));
3588 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3590 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3592 fputc ('0', f);
3594 else
3595 gcc_unreachable ();
3596 break;
3598 case CONST_DOUBLE:
3599 /* CONST_DOUBLE can represent a double-width integer.
3600 In this case, the mode of x is VOIDmode. */
3601 if (GET_MODE (x) == VOIDmode)
3602 ; /* Do Nothing. */
3603 else if (aarch64_float_const_zero_rtx_p (x))
3605 fputc ('0', f);
3606 break;
3608 else if (aarch64_float_const_representable_p (x))
3610 #define buf_size 20
3611 char float_buf[buf_size] = {'\0'};
3612 REAL_VALUE_TYPE r;
3613 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3614 real_to_decimal_for_mode (float_buf, &r,
3615 buf_size, buf_size,
3616 1, GET_MODE (x));
3617 asm_fprintf (asm_out_file, "%s", float_buf);
3618 break;
3619 #undef buf_size
3621 output_operand_lossage ("invalid constant");
3622 return;
3623 default:
3624 output_operand_lossage ("invalid operand");
3625 return;
3627 break;
3629 case 'A':
3630 if (GET_CODE (x) == HIGH)
3631 x = XEXP (x, 0);
3633 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3635 case SYMBOL_SMALL_GOT:
3636 asm_fprintf (asm_out_file, ":got:");
3637 break;
3639 case SYMBOL_SMALL_TLSGD:
3640 asm_fprintf (asm_out_file, ":tlsgd:");
3641 break;
3643 case SYMBOL_SMALL_TLSDESC:
3644 asm_fprintf (asm_out_file, ":tlsdesc:");
3645 break;
3647 case SYMBOL_SMALL_GOTTPREL:
3648 asm_fprintf (asm_out_file, ":gottprel:");
3649 break;
3651 case SYMBOL_SMALL_TPREL:
3652 asm_fprintf (asm_out_file, ":tprel:");
3653 break;
3655 case SYMBOL_TINY_GOT:
3656 gcc_unreachable ();
3657 break;
3659 default:
3660 break;
3662 output_addr_const (asm_out_file, x);
3663 break;
3665 case 'L':
3666 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3668 case SYMBOL_SMALL_GOT:
3669 asm_fprintf (asm_out_file, ":lo12:");
3670 break;
3672 case SYMBOL_SMALL_TLSGD:
3673 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3674 break;
3676 case SYMBOL_SMALL_TLSDESC:
3677 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3678 break;
3680 case SYMBOL_SMALL_GOTTPREL:
3681 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3682 break;
3684 case SYMBOL_SMALL_TPREL:
3685 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3686 break;
3688 case SYMBOL_TINY_GOT:
3689 asm_fprintf (asm_out_file, ":got:");
3690 break;
3692 default:
3693 break;
3695 output_addr_const (asm_out_file, x);
3696 break;
3698 case 'G':
3700 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3702 case SYMBOL_SMALL_TPREL:
3703 asm_fprintf (asm_out_file, ":tprel_hi12:");
3704 break;
3705 default:
3706 break;
3708 output_addr_const (asm_out_file, x);
3709 break;
3711 default:
3712 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3713 return;
3717 void
3718 aarch64_print_operand_address (FILE *f, rtx x)
3720 struct aarch64_address_info addr;
3722 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3723 MEM, true))
3724 switch (addr.type)
3726 case ADDRESS_REG_IMM:
3727 if (addr.offset == const0_rtx)
3728 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3729 else
3730 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3731 INTVAL (addr.offset));
3732 return;
3734 case ADDRESS_REG_REG:
3735 if (addr.shift == 0)
3736 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3737 reg_names [REGNO (addr.offset)]);
3738 else
3739 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3740 reg_names [REGNO (addr.offset)], addr.shift);
3741 return;
3743 case ADDRESS_REG_UXTW:
3744 if (addr.shift == 0)
3745 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3746 REGNO (addr.offset) - R0_REGNUM);
3747 else
3748 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3749 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3750 return;
3752 case ADDRESS_REG_SXTW:
3753 if (addr.shift == 0)
3754 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3755 REGNO (addr.offset) - R0_REGNUM);
3756 else
3757 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3758 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3759 return;
3761 case ADDRESS_REG_WB:
3762 switch (GET_CODE (x))
3764 case PRE_INC:
3765 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3766 GET_MODE_SIZE (aarch64_memory_reference_mode));
3767 return;
3768 case POST_INC:
3769 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3770 GET_MODE_SIZE (aarch64_memory_reference_mode));
3771 return;
3772 case PRE_DEC:
3773 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3774 GET_MODE_SIZE (aarch64_memory_reference_mode));
3775 return;
3776 case POST_DEC:
3777 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3778 GET_MODE_SIZE (aarch64_memory_reference_mode));
3779 return;
3780 case PRE_MODIFY:
3781 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3782 INTVAL (addr.offset));
3783 return;
3784 case POST_MODIFY:
3785 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3786 INTVAL (addr.offset));
3787 return;
3788 default:
3789 break;
3791 break;
3793 case ADDRESS_LO_SUM:
3794 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3795 output_addr_const (f, addr.offset);
3796 asm_fprintf (f, "]");
3797 return;
3799 case ADDRESS_SYMBOLIC:
3800 break;
3803 output_addr_const (f, x);
3806 bool
3807 aarch64_label_mentioned_p (rtx x)
3809 const char *fmt;
3810 int i;
3812 if (GET_CODE (x) == LABEL_REF)
3813 return true;
3815 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3816 referencing instruction, but they are constant offsets, not
3817 symbols. */
3818 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3819 return false;
3821 fmt = GET_RTX_FORMAT (GET_CODE (x));
3822 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3824 if (fmt[i] == 'E')
3826 int j;
3828 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3829 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3830 return 1;
3832 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3833 return 1;
3836 return 0;
3839 /* Implement REGNO_REG_CLASS. */
3841 enum reg_class
3842 aarch64_regno_regclass (unsigned regno)
3844 if (GP_REGNUM_P (regno))
3845 return CORE_REGS;
3847 if (regno == SP_REGNUM)
3848 return STACK_REG;
3850 if (regno == FRAME_POINTER_REGNUM
3851 || regno == ARG_POINTER_REGNUM)
3852 return CORE_REGS;
3854 if (FP_REGNUM_P (regno))
3855 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3857 return NO_REGS;
3860 /* Try a machine-dependent way of reloading an illegitimate address
3861 operand. If we find one, push the reload and return the new rtx. */
3864 aarch64_legitimize_reload_address (rtx *x_p,
3865 enum machine_mode mode,
3866 int opnum, int type,
3867 int ind_levels ATTRIBUTE_UNUSED)
3869 rtx x = *x_p;
3871 /* Do not allow mem (plus (reg, const)) if vector mode. */
3872 if (aarch64_vector_mode_p (mode)
3873 && GET_CODE (x) == PLUS
3874 && REG_P (XEXP (x, 0))
3875 && CONST_INT_P (XEXP (x, 1)))
3877 rtx orig_rtx = x;
3878 x = copy_rtx (x);
3879 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3880 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3881 opnum, (enum reload_type) type);
3882 return x;
3885 /* We must recognize output that we have already generated ourselves. */
3886 if (GET_CODE (x) == PLUS
3887 && GET_CODE (XEXP (x, 0)) == PLUS
3888 && REG_P (XEXP (XEXP (x, 0), 0))
3889 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3890 && CONST_INT_P (XEXP (x, 1)))
3892 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3893 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3894 opnum, (enum reload_type) type);
3895 return x;
3898 /* We wish to handle large displacements off a base register by splitting
3899 the addend across an add and the mem insn. This can cut the number of
3900 extra insns needed from 3 to 1. It is only useful for load/store of a
3901 single register with 12 bit offset field. */
3902 if (GET_CODE (x) == PLUS
3903 && REG_P (XEXP (x, 0))
3904 && CONST_INT_P (XEXP (x, 1))
3905 && HARD_REGISTER_P (XEXP (x, 0))
3906 && mode != TImode
3907 && mode != TFmode
3908 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3910 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3911 HOST_WIDE_INT low = val & 0xfff;
3912 HOST_WIDE_INT high = val - low;
3913 HOST_WIDE_INT offs;
3914 rtx cst;
3916 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3917 BLKmode alignment. */
3918 if (GET_MODE_SIZE (mode) == 0)
3919 return NULL_RTX;
3921 offs = low % GET_MODE_SIZE (mode);
3923 /* Align misaligned offset by adjusting high part to compensate. */
3924 if (offs != 0)
3926 if (aarch64_uimm12_shift (high + offs))
3928 /* Align down. */
3929 low = low - offs;
3930 high = high + offs;
3932 else
3934 /* Align up. */
3935 offs = GET_MODE_SIZE (mode) - offs;
3936 low = low + offs;
3937 high = high + (low & 0x1000) - offs;
3938 low &= 0xfff;
3942 /* Check for overflow. */
3943 if (high + low != val)
3944 return NULL_RTX;
3946 cst = GEN_INT (high);
3947 if (!aarch64_uimm12_shift (high))
3948 cst = force_const_mem (Pmode, cst);
3950 /* Reload high part into base reg, leaving the low part
3951 in the mem instruction. */
3952 x = gen_rtx_PLUS (Pmode,
3953 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3954 GEN_INT (low));
3956 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3957 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3958 opnum, (enum reload_type) type);
3959 return x;
3962 return NULL_RTX;
3966 static reg_class_t
3967 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3968 reg_class_t rclass,
3969 enum machine_mode mode,
3970 secondary_reload_info *sri)
3972 /* Address expressions of the form PLUS (SP, large_offset) need two
3973 scratch registers, one for the constant, and one for holding a
3974 copy of SP, since SP cannot be used on the RHS of an add-reg
3975 instruction. */
3976 if (mode == DImode
3977 && GET_CODE (x) == PLUS
3978 && XEXP (x, 0) == stack_pointer_rtx
3979 && CONST_INT_P (XEXP (x, 1))
3980 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3982 sri->icode = CODE_FOR_reload_sp_immediate;
3983 return NO_REGS;
3986 /* Without the TARGET_SIMD instructions we cannot move a Q register
3987 to a Q register directly. We need a scratch. */
3988 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3989 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3990 && reg_class_subset_p (rclass, FP_REGS))
3992 if (mode == TFmode)
3993 sri->icode = CODE_FOR_aarch64_reload_movtf;
3994 else if (mode == TImode)
3995 sri->icode = CODE_FOR_aarch64_reload_movti;
3996 return NO_REGS;
3999 /* A TFmode or TImode memory access should be handled via an FP_REGS
4000 because AArch64 has richer addressing modes for LDR/STR instructions
4001 than LDP/STP instructions. */
4002 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4003 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4004 return FP_REGS;
4006 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4007 return CORE_REGS;
4009 return NO_REGS;
4012 static bool
4013 aarch64_can_eliminate (const int from, const int to)
4015 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4016 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4018 if (frame_pointer_needed)
4020 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4021 return true;
4022 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4023 return false;
4024 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4025 && !cfun->calls_alloca)
4026 return true;
4027 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4028 return true;
4029 return false;
4031 else
4033 /* If we decided that we didn't need a leaf frame pointer but then used
4034 LR in the function, then we'll want a frame pointer after all, so
4035 prevent this elimination to ensure a frame pointer is used.
4037 NOTE: the original value of flag_omit_frame_pointer gets trashed
4038 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4039 of faked_omit_frame_pointer here (which is true when we always
4040 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4041 pointers when LR is clobbered). */
4042 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4043 && df_regs_ever_live_p (LR_REGNUM)
4044 && faked_omit_frame_pointer)
4045 return false;
4048 return true;
4051 HOST_WIDE_INT
4052 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4054 HOST_WIDE_INT frame_size;
4055 HOST_WIDE_INT offset;
4057 aarch64_layout_frame ();
4058 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4059 + crtl->outgoing_args_size
4060 + cfun->machine->saved_varargs_size);
4062 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4063 offset = frame_size;
4065 if (to == HARD_FRAME_POINTER_REGNUM)
4067 if (from == ARG_POINTER_REGNUM)
4068 return offset - crtl->outgoing_args_size;
4070 if (from == FRAME_POINTER_REGNUM)
4071 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4074 if (to == STACK_POINTER_REGNUM)
4076 if (from == FRAME_POINTER_REGNUM)
4078 HOST_WIDE_INT elim = crtl->outgoing_args_size
4079 + cfun->machine->frame.saved_regs_size
4080 + get_frame_size ()
4081 - cfun->machine->frame.fp_lr_offset;
4082 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4083 return elim;
4087 return offset;
4091 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4092 previous frame. */
4095 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4097 if (count != 0)
4098 return const0_rtx;
4099 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4103 static void
4104 aarch64_asm_trampoline_template (FILE *f)
4106 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4107 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4108 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4109 assemble_aligned_integer (4, const0_rtx);
4110 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4111 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4114 unsigned
4115 aarch64_trampoline_size (void)
4117 return 32; /* 3 insns + padding + 2 dwords. */
4120 static void
4121 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4123 rtx fnaddr, mem, a_tramp;
4125 /* Don't need to copy the trailing D-words, we fill those in below. */
4126 emit_block_move (m_tramp, assemble_trampoline_template (),
4127 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
4128 mem = adjust_address (m_tramp, DImode, 16);
4129 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4130 emit_move_insn (mem, fnaddr);
4132 mem = adjust_address (m_tramp, DImode, 24);
4133 emit_move_insn (mem, chain_value);
4135 /* XXX We should really define a "clear_cache" pattern and use
4136 gen_clear_cache(). */
4137 a_tramp = XEXP (m_tramp, 0);
4138 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4139 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
4140 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4143 static unsigned char
4144 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4146 switch (regclass)
4148 case CORE_REGS:
4149 case POINTER_REGS:
4150 case GENERAL_REGS:
4151 case ALL_REGS:
4152 case FP_REGS:
4153 case FP_LO_REGS:
4154 return
4155 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4156 (GET_MODE_SIZE (mode) + 7) / 8;
4157 case STACK_REG:
4158 return 1;
4160 case NO_REGS:
4161 return 0;
4163 default:
4164 break;
4166 gcc_unreachable ();
4169 static reg_class_t
4170 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4172 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4173 ? GENERAL_REGS : regclass);
4176 void
4177 aarch64_asm_output_labelref (FILE* f, const char *name)
4179 asm_fprintf (f, "%U%s", name);
4182 static void
4183 aarch64_elf_asm_constructor (rtx symbol, int priority)
4185 if (priority == DEFAULT_INIT_PRIORITY)
4186 default_ctor_section_asm_out_constructor (symbol, priority);
4187 else
4189 section *s;
4190 char buf[18];
4191 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4192 s = get_section (buf, SECTION_WRITE, NULL);
4193 switch_to_section (s);
4194 assemble_align (POINTER_SIZE);
4195 fputs ("\t.dword\t", asm_out_file);
4196 output_addr_const (asm_out_file, symbol);
4197 fputc ('\n', asm_out_file);
4201 static void
4202 aarch64_elf_asm_destructor (rtx symbol, int priority)
4204 if (priority == DEFAULT_INIT_PRIORITY)
4205 default_dtor_section_asm_out_destructor (symbol, priority);
4206 else
4208 section *s;
4209 char buf[18];
4210 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4211 s = get_section (buf, SECTION_WRITE, NULL);
4212 switch_to_section (s);
4213 assemble_align (POINTER_SIZE);
4214 fputs ("\t.dword\t", asm_out_file);
4215 output_addr_const (asm_out_file, symbol);
4216 fputc ('\n', asm_out_file);
4220 const char*
4221 aarch64_output_casesi (rtx *operands)
4223 char buf[100];
4224 char label[100];
4225 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4226 int index;
4227 static const char *const patterns[4][2] =
4230 "ldrb\t%w3, [%0,%w1,uxtw]",
4231 "add\t%3, %4, %w3, sxtb #2"
4234 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4235 "add\t%3, %4, %w3, sxth #2"
4238 "ldr\t%w3, [%0,%w1,uxtw #2]",
4239 "add\t%3, %4, %w3, sxtw #2"
4241 /* We assume that DImode is only generated when not optimizing and
4242 that we don't really need 64-bit address offsets. That would
4243 imply an object file with 8GB of code in a single function! */
4245 "ldr\t%w3, [%0,%w1,uxtw #2]",
4246 "add\t%3, %4, %w3, sxtw #2"
4250 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4252 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4254 gcc_assert (index >= 0 && index <= 3);
4256 /* Need to implement table size reduction, by chaning the code below. */
4257 output_asm_insn (patterns[index][0], operands);
4258 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4259 snprintf (buf, sizeof (buf),
4260 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4261 output_asm_insn (buf, operands);
4262 output_asm_insn (patterns[index][1], operands);
4263 output_asm_insn ("br\t%3", operands);
4264 assemble_label (asm_out_file, label);
4265 return "";
4269 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4270 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4271 operator. */
4274 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4276 if (shift >= 0 && shift <= 3)
4278 int size;
4279 for (size = 8; size <= 32; size *= 2)
4281 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4282 if (mask == bits << shift)
4283 return size;
4286 return 0;
4289 static bool
4290 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4291 const_rtx x ATTRIBUTE_UNUSED)
4293 /* We can't use blocks for constants when we're using a per-function
4294 constant pool. */
4295 return false;
4298 static section *
4299 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4300 rtx x ATTRIBUTE_UNUSED,
4301 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4303 /* Force all constant pool entries into the current function section. */
4304 return function_section (current_function_decl);
4308 /* Costs. */
4310 /* Helper function for rtx cost calculation. Strip a shift expression
4311 from X. Returns the inner operand if successful, or the original
4312 expression on failure. */
4313 static rtx
4314 aarch64_strip_shift (rtx x)
4316 rtx op = x;
4318 if ((GET_CODE (op) == ASHIFT
4319 || GET_CODE (op) == ASHIFTRT
4320 || GET_CODE (op) == LSHIFTRT)
4321 && CONST_INT_P (XEXP (op, 1)))
4322 return XEXP (op, 0);
4324 if (GET_CODE (op) == MULT
4325 && CONST_INT_P (XEXP (op, 1))
4326 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4327 return XEXP (op, 0);
4329 return x;
4332 /* Helper function for rtx cost calculation. Strip a shift or extend
4333 expression from X. Returns the inner operand if successful, or the
4334 original expression on failure. We deal with a number of possible
4335 canonicalization variations here. */
4336 static rtx
4337 aarch64_strip_shift_or_extend (rtx x)
4339 rtx op = x;
4341 /* Zero and sign extraction of a widened value. */
4342 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4343 && XEXP (op, 2) == const0_rtx
4344 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4345 XEXP (op, 1)))
4346 return XEXP (XEXP (op, 0), 0);
4348 /* It can also be represented (for zero-extend) as an AND with an
4349 immediate. */
4350 if (GET_CODE (op) == AND
4351 && GET_CODE (XEXP (op, 0)) == MULT
4352 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4353 && CONST_INT_P (XEXP (op, 1))
4354 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4355 INTVAL (XEXP (op, 1))) != 0)
4356 return XEXP (XEXP (op, 0), 0);
4358 /* Now handle extended register, as this may also have an optional
4359 left shift by 1..4. */
4360 if (GET_CODE (op) == ASHIFT
4361 && CONST_INT_P (XEXP (op, 1))
4362 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4363 op = XEXP (op, 0);
4365 if (GET_CODE (op) == ZERO_EXTEND
4366 || GET_CODE (op) == SIGN_EXTEND)
4367 op = XEXP (op, 0);
4369 if (op != x)
4370 return op;
4372 return aarch64_strip_shift (x);
4375 /* Calculate the cost of calculating X, storing it in *COST. Result
4376 is true if the total cost of the operation has now been calculated. */
4377 static bool
4378 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4379 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4381 rtx op0, op1;
4382 const struct cpu_rtx_cost_table *extra_cost
4383 = aarch64_tune_params->insn_extra_cost;
4385 switch (code)
4387 case SET:
4388 op0 = SET_DEST (x);
4389 op1 = SET_SRC (x);
4391 switch (GET_CODE (op0))
4393 case MEM:
4394 if (speed)
4395 *cost += extra_cost->memory_store;
4397 if (op1 != const0_rtx)
4398 *cost += rtx_cost (op1, SET, 1, speed);
4399 return true;
4401 case SUBREG:
4402 if (! REG_P (SUBREG_REG (op0)))
4403 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4404 /* Fall through. */
4405 case REG:
4406 /* Cost is just the cost of the RHS of the set. */
4407 *cost += rtx_cost (op1, SET, 1, true);
4408 return true;
4410 case ZERO_EXTRACT: /* Bit-field insertion. */
4411 case SIGN_EXTRACT:
4412 /* Strip any redundant widening of the RHS to meet the width of
4413 the target. */
4414 if (GET_CODE (op1) == SUBREG)
4415 op1 = SUBREG_REG (op1);
4416 if ((GET_CODE (op1) == ZERO_EXTEND
4417 || GET_CODE (op1) == SIGN_EXTEND)
4418 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4419 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4420 >= INTVAL (XEXP (op0, 1))))
4421 op1 = XEXP (op1, 0);
4422 *cost += rtx_cost (op1, SET, 1, speed);
4423 return true;
4425 default:
4426 break;
4428 return false;
4430 case MEM:
4431 if (speed)
4432 *cost += extra_cost->memory_load;
4434 return true;
4436 case NEG:
4437 op0 = CONST0_RTX (GET_MODE (x));
4438 op1 = XEXP (x, 0);
4439 goto cost_minus;
4441 case COMPARE:
4442 op0 = XEXP (x, 0);
4443 op1 = XEXP (x, 1);
4445 if (op1 == const0_rtx
4446 && GET_CODE (op0) == AND)
4448 x = op0;
4449 goto cost_logic;
4452 /* Comparisons can work if the order is swapped.
4453 Canonicalization puts the more complex operation first, but
4454 we want it in op1. */
4455 if (! (REG_P (op0)
4456 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4458 op0 = XEXP (x, 1);
4459 op1 = XEXP (x, 0);
4461 goto cost_minus;
4463 case MINUS:
4464 op0 = XEXP (x, 0);
4465 op1 = XEXP (x, 1);
4467 cost_minus:
4468 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4469 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4470 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4472 if (op0 != const0_rtx)
4473 *cost += rtx_cost (op0, MINUS, 0, speed);
4475 if (CONST_INT_P (op1))
4477 if (!aarch64_uimm12_shift (INTVAL (op1)))
4478 *cost += rtx_cost (op1, MINUS, 1, speed);
4480 else
4482 op1 = aarch64_strip_shift_or_extend (op1);
4483 *cost += rtx_cost (op1, MINUS, 1, speed);
4485 return true;
4488 return false;
4490 case PLUS:
4491 op0 = XEXP (x, 0);
4492 op1 = XEXP (x, 1);
4494 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4496 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4498 *cost += rtx_cost (op0, PLUS, 0, speed);
4500 else
4502 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4504 if (new_op0 == op0
4505 && GET_CODE (op0) == MULT)
4507 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4508 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4509 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4510 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4512 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4513 speed)
4514 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4515 speed)
4516 + rtx_cost (op1, PLUS, 1, speed));
4517 if (speed)
4518 *cost += extra_cost->int_multiply_extend_add;
4519 return true;
4521 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4522 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4523 + rtx_cost (op1, PLUS, 1, speed));
4525 if (speed)
4526 *cost += extra_cost->int_multiply_add;
4529 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4530 + rtx_cost (op1, PLUS, 1, speed));
4532 return true;
4535 return false;
4537 case IOR:
4538 case XOR:
4539 case AND:
4540 cost_logic:
4541 op0 = XEXP (x, 0);
4542 op1 = XEXP (x, 1);
4544 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4546 if (CONST_INT_P (op1)
4547 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4549 *cost += rtx_cost (op0, AND, 0, speed);
4551 else
4553 if (GET_CODE (op0) == NOT)
4554 op0 = XEXP (op0, 0);
4555 op0 = aarch64_strip_shift (op0);
4556 *cost += (rtx_cost (op0, AND, 0, speed)
4557 + rtx_cost (op1, AND, 1, speed));
4559 return true;
4561 return false;
4563 case ZERO_EXTEND:
4564 if ((GET_MODE (x) == DImode
4565 && GET_MODE (XEXP (x, 0)) == SImode)
4566 || GET_CODE (XEXP (x, 0)) == MEM)
4568 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4569 return true;
4571 return false;
4573 case SIGN_EXTEND:
4574 if (GET_CODE (XEXP (x, 0)) == MEM)
4576 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4577 return true;
4579 return false;
4581 case ROTATE:
4582 if (!CONST_INT_P (XEXP (x, 1)))
4583 *cost += COSTS_N_INSNS (2);
4584 /* Fall through. */
4585 case ROTATERT:
4586 case LSHIFTRT:
4587 case ASHIFT:
4588 case ASHIFTRT:
4590 /* Shifting by a register often takes an extra cycle. */
4591 if (speed && !CONST_INT_P (XEXP (x, 1)))
4592 *cost += extra_cost->register_shift;
4594 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4595 return true;
4597 case HIGH:
4598 if (!CONSTANT_P (XEXP (x, 0)))
4599 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4600 return true;
4602 case LO_SUM:
4603 if (!CONSTANT_P (XEXP (x, 1)))
4604 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4605 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4606 return true;
4608 case ZERO_EXTRACT:
4609 case SIGN_EXTRACT:
4610 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4611 return true;
4613 case MULT:
4614 op0 = XEXP (x, 0);
4615 op1 = XEXP (x, 1);
4617 *cost = COSTS_N_INSNS (1);
4618 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4620 if (CONST_INT_P (op1)
4621 && exact_log2 (INTVAL (op1)) > 0)
4623 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4624 return true;
4627 if ((GET_CODE (op0) == ZERO_EXTEND
4628 && GET_CODE (op1) == ZERO_EXTEND)
4629 || (GET_CODE (op0) == SIGN_EXTEND
4630 && GET_CODE (op1) == SIGN_EXTEND))
4632 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4633 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4634 if (speed)
4635 *cost += extra_cost->int_multiply_extend;
4636 return true;
4639 if (speed)
4640 *cost += extra_cost->int_multiply;
4642 else if (speed)
4644 if (GET_MODE (x) == DFmode)
4645 *cost += extra_cost->double_multiply;
4646 else if (GET_MODE (x) == SFmode)
4647 *cost += extra_cost->float_multiply;
4650 return false; /* All arguments need to be in registers. */
4652 case MOD:
4653 case UMOD:
4654 *cost = COSTS_N_INSNS (2);
4655 if (speed)
4657 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4658 *cost += (extra_cost->int_multiply_add
4659 + extra_cost->int_divide);
4660 else if (GET_MODE (x) == DFmode)
4661 *cost += (extra_cost->double_multiply
4662 + extra_cost->double_divide);
4663 else if (GET_MODE (x) == SFmode)
4664 *cost += (extra_cost->float_multiply
4665 + extra_cost->float_divide);
4667 return false; /* All arguments need to be in registers. */
4669 case DIV:
4670 case UDIV:
4671 *cost = COSTS_N_INSNS (1);
4672 if (speed)
4674 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4675 *cost += extra_cost->int_divide;
4676 else if (GET_MODE (x) == DFmode)
4677 *cost += extra_cost->double_divide;
4678 else if (GET_MODE (x) == SFmode)
4679 *cost += extra_cost->float_divide;
4681 return false; /* All arguments need to be in registers. */
4683 default:
4684 break;
4686 return false;
4689 static int
4690 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4691 enum machine_mode mode ATTRIBUTE_UNUSED,
4692 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4694 enum rtx_code c = GET_CODE (x);
4695 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4697 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4698 return addr_cost->pre_modify;
4700 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4701 return addr_cost->post_modify;
4703 if (c == PLUS)
4705 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4706 return addr_cost->imm_offset;
4707 else if (GET_CODE (XEXP (x, 0)) == MULT
4708 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4709 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4710 return addr_cost->register_extend;
4712 return addr_cost->register_offset;
4714 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4715 return addr_cost->imm_offset;
4717 return 0;
4720 static int
4721 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4722 reg_class_t from, reg_class_t to)
4724 const struct cpu_regmove_cost *regmove_cost
4725 = aarch64_tune_params->regmove_cost;
4727 if (from == GENERAL_REGS && to == GENERAL_REGS)
4728 return regmove_cost->GP2GP;
4729 else if (from == GENERAL_REGS)
4730 return regmove_cost->GP2FP;
4731 else if (to == GENERAL_REGS)
4732 return regmove_cost->FP2GP;
4734 /* When AdvSIMD instructions are disabled it is not possible to move
4735 a 128-bit value directly between Q registers. This is handled in
4736 secondary reload. A general register is used as a scratch to move
4737 the upper DI value and the lower DI value is moved directly,
4738 hence the cost is the sum of three moves. */
4740 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4741 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4743 return regmove_cost->FP2FP;
4746 static int
4747 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4748 reg_class_t rclass ATTRIBUTE_UNUSED,
4749 bool in ATTRIBUTE_UNUSED)
4751 return aarch64_tune_params->memmov_cost;
4754 /* Vectorizer cost model target hooks. */
4756 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4757 static int
4758 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4759 tree vectype,
4760 int misalign ATTRIBUTE_UNUSED)
4762 unsigned elements;
4764 switch (type_of_cost)
4766 case scalar_stmt:
4767 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4769 case scalar_load:
4770 return aarch64_tune_params->vec_costs->scalar_load_cost;
4772 case scalar_store:
4773 return aarch64_tune_params->vec_costs->scalar_store_cost;
4775 case vector_stmt:
4776 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4778 case vector_load:
4779 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4781 case vector_store:
4782 return aarch64_tune_params->vec_costs->vec_store_cost;
4784 case vec_to_scalar:
4785 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4787 case scalar_to_vec:
4788 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4790 case unaligned_load:
4791 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4793 case unaligned_store:
4794 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4796 case cond_branch_taken:
4797 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4799 case cond_branch_not_taken:
4800 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4802 case vec_perm:
4803 case vec_promote_demote:
4804 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4806 case vec_construct:
4807 elements = TYPE_VECTOR_SUBPARTS (vectype);
4808 return elements / 2 + 1;
4810 default:
4811 gcc_unreachable ();
4815 /* Implement targetm.vectorize.add_stmt_cost. */
4816 static unsigned
4817 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4818 struct _stmt_vec_info *stmt_info, int misalign,
4819 enum vect_cost_model_location where)
4821 unsigned *cost = (unsigned *) data;
4822 unsigned retval = 0;
4824 if (flag_vect_cost_model)
4826 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4827 int stmt_cost =
4828 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4830 /* Statements in an inner loop relative to the loop being
4831 vectorized are weighted more heavily. The value here is
4832 a function (linear for now) of the loop nest level. */
4833 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4835 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4836 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4837 unsigned nest_level = loop_depth (loop);
4839 count *= nest_level;
4842 retval = (unsigned) (count * stmt_cost);
4843 cost[where] += retval;
4846 return retval;
4849 static void initialize_aarch64_code_model (void);
4851 /* Parse the architecture extension string. */
4853 static void
4854 aarch64_parse_extension (char *str)
4856 /* The extension string is parsed left to right. */
4857 const struct aarch64_option_extension *opt = NULL;
4859 /* Flag to say whether we are adding or removing an extension. */
4860 int adding_ext = -1;
4862 while (str != NULL && *str != 0)
4864 char *ext;
4865 size_t len;
4867 str++;
4868 ext = strchr (str, '+');
4870 if (ext != NULL)
4871 len = ext - str;
4872 else
4873 len = strlen (str);
4875 if (len >= 2 && strncmp (str, "no", 2) == 0)
4877 adding_ext = 0;
4878 len -= 2;
4879 str += 2;
4881 else if (len > 0)
4882 adding_ext = 1;
4884 if (len == 0)
4886 error ("missing feature modifier after %qs", "+no");
4887 return;
4890 /* Scan over the extensions table trying to find an exact match. */
4891 for (opt = all_extensions; opt->name != NULL; opt++)
4893 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4895 /* Add or remove the extension. */
4896 if (adding_ext)
4897 aarch64_isa_flags |= opt->flags_on;
4898 else
4899 aarch64_isa_flags &= ~(opt->flags_off);
4900 break;
4904 if (opt->name == NULL)
4906 /* Extension not found in list. */
4907 error ("unknown feature modifier %qs", str);
4908 return;
4911 str = ext;
4914 return;
4917 /* Parse the ARCH string. */
4919 static void
4920 aarch64_parse_arch (void)
4922 char *ext;
4923 const struct processor *arch;
4924 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4925 size_t len;
4927 strcpy (str, aarch64_arch_string);
4929 ext = strchr (str, '+');
4931 if (ext != NULL)
4932 len = ext - str;
4933 else
4934 len = strlen (str);
4936 if (len == 0)
4938 error ("missing arch name in -march=%qs", str);
4939 return;
4942 /* Loop through the list of supported ARCHs to find a match. */
4943 for (arch = all_architectures; arch->name != NULL; arch++)
4945 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4947 selected_arch = arch;
4948 aarch64_isa_flags = selected_arch->flags;
4949 selected_cpu = &all_cores[selected_arch->core];
4951 if (ext != NULL)
4953 /* ARCH string contains at least one extension. */
4954 aarch64_parse_extension (ext);
4957 return;
4961 /* ARCH name not found in list. */
4962 error ("unknown value %qs for -march", str);
4963 return;
4966 /* Parse the CPU string. */
4968 static void
4969 aarch64_parse_cpu (void)
4971 char *ext;
4972 const struct processor *cpu;
4973 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4974 size_t len;
4976 strcpy (str, aarch64_cpu_string);
4978 ext = strchr (str, '+');
4980 if (ext != NULL)
4981 len = ext - str;
4982 else
4983 len = strlen (str);
4985 if (len == 0)
4987 error ("missing cpu name in -mcpu=%qs", str);
4988 return;
4991 /* Loop through the list of supported CPUs to find a match. */
4992 for (cpu = all_cores; cpu->name != NULL; cpu++)
4994 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4996 selected_cpu = cpu;
4997 aarch64_isa_flags = selected_cpu->flags;
4999 if (ext != NULL)
5001 /* CPU string contains at least one extension. */
5002 aarch64_parse_extension (ext);
5005 return;
5009 /* CPU name not found in list. */
5010 error ("unknown value %qs for -mcpu", str);
5011 return;
5014 /* Parse the TUNE string. */
5016 static void
5017 aarch64_parse_tune (void)
5019 const struct processor *cpu;
5020 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5021 strcpy (str, aarch64_tune_string);
5023 /* Loop through the list of supported CPUs to find a match. */
5024 for (cpu = all_cores; cpu->name != NULL; cpu++)
5026 if (strcmp (cpu->name, str) == 0)
5028 selected_tune = cpu;
5029 return;
5033 /* CPU name not found in list. */
5034 error ("unknown value %qs for -mtune", str);
5035 return;
5039 /* Implement TARGET_OPTION_OVERRIDE. */
5041 static void
5042 aarch64_override_options (void)
5044 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5045 otherwise march remains undefined. mtune can be used with either march or
5046 mcpu. */
5048 if (aarch64_arch_string)
5050 aarch64_parse_arch ();
5051 aarch64_cpu_string = NULL;
5054 if (aarch64_cpu_string)
5056 aarch64_parse_cpu ();
5057 selected_arch = NULL;
5060 if (aarch64_tune_string)
5062 aarch64_parse_tune ();
5065 initialize_aarch64_code_model ();
5067 aarch64_build_bitmask_table ();
5069 /* This target defaults to strict volatile bitfields. */
5070 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5071 flag_strict_volatile_bitfields = 1;
5073 /* If the user did not specify a processor, choose the default
5074 one for them. This will be the CPU set during configuration using
5075 --with-cpu, otherwise it is "generic". */
5076 if (!selected_cpu)
5078 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5079 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5082 gcc_assert (selected_cpu);
5084 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5085 if (!selected_tune)
5086 selected_tune = &all_cores[selected_cpu->core];
5088 aarch64_tune_flags = selected_tune->flags;
5089 aarch64_tune = selected_tune->core;
5090 aarch64_tune_params = selected_tune->tune;
5092 aarch64_override_options_after_change ();
5095 /* Implement targetm.override_options_after_change. */
5097 static void
5098 aarch64_override_options_after_change (void)
5100 faked_omit_frame_pointer = false;
5102 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5103 that aarch64_frame_pointer_required will be called. We need to remember
5104 whether flag_omit_frame_pointer was turned on normally or just faked. */
5106 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5108 flag_omit_frame_pointer = true;
5109 faked_omit_frame_pointer = true;
5113 static struct machine_function *
5114 aarch64_init_machine_status (void)
5116 struct machine_function *machine;
5117 machine = ggc_alloc_cleared_machine_function ();
5118 return machine;
5121 void
5122 aarch64_init_expanders (void)
5124 init_machine_status = aarch64_init_machine_status;
5127 /* A checking mechanism for the implementation of the various code models. */
5128 static void
5129 initialize_aarch64_code_model (void)
5131 if (flag_pic)
5133 switch (aarch64_cmodel_var)
5135 case AARCH64_CMODEL_TINY:
5136 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5137 break;
5138 case AARCH64_CMODEL_SMALL:
5139 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5140 break;
5141 case AARCH64_CMODEL_LARGE:
5142 sorry ("code model %qs with -f%s", "large",
5143 flag_pic > 1 ? "PIC" : "pic");
5144 default:
5145 gcc_unreachable ();
5148 else
5149 aarch64_cmodel = aarch64_cmodel_var;
5152 /* Return true if SYMBOL_REF X binds locally. */
5154 static bool
5155 aarch64_symbol_binds_local_p (const_rtx x)
5157 return (SYMBOL_REF_DECL (x)
5158 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5159 : SYMBOL_REF_LOCAL_P (x));
5162 /* Return true if SYMBOL_REF X is thread local */
5163 static bool
5164 aarch64_tls_symbol_p (rtx x)
5166 if (! TARGET_HAVE_TLS)
5167 return false;
5169 if (GET_CODE (x) != SYMBOL_REF)
5170 return false;
5172 return SYMBOL_REF_TLS_MODEL (x) != 0;
5175 /* Classify a TLS symbol into one of the TLS kinds. */
5176 enum aarch64_symbol_type
5177 aarch64_classify_tls_symbol (rtx x)
5179 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5181 switch (tls_kind)
5183 case TLS_MODEL_GLOBAL_DYNAMIC:
5184 case TLS_MODEL_LOCAL_DYNAMIC:
5185 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5187 case TLS_MODEL_INITIAL_EXEC:
5188 return SYMBOL_SMALL_GOTTPREL;
5190 case TLS_MODEL_LOCAL_EXEC:
5191 return SYMBOL_SMALL_TPREL;
5193 case TLS_MODEL_EMULATED:
5194 case TLS_MODEL_NONE:
5195 return SYMBOL_FORCE_TO_MEM;
5197 default:
5198 gcc_unreachable ();
5202 /* Return the method that should be used to access SYMBOL_REF or
5203 LABEL_REF X in context CONTEXT. */
5205 enum aarch64_symbol_type
5206 aarch64_classify_symbol (rtx x,
5207 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5209 if (GET_CODE (x) == LABEL_REF)
5211 switch (aarch64_cmodel)
5213 case AARCH64_CMODEL_LARGE:
5214 return SYMBOL_FORCE_TO_MEM;
5216 case AARCH64_CMODEL_TINY_PIC:
5217 case AARCH64_CMODEL_TINY:
5218 return SYMBOL_TINY_ABSOLUTE;
5220 case AARCH64_CMODEL_SMALL_PIC:
5221 case AARCH64_CMODEL_SMALL:
5222 return SYMBOL_SMALL_ABSOLUTE;
5224 default:
5225 gcc_unreachable ();
5229 if (GET_CODE (x) == SYMBOL_REF)
5231 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5232 || CONSTANT_POOL_ADDRESS_P (x))
5233 return SYMBOL_FORCE_TO_MEM;
5235 if (aarch64_tls_symbol_p (x))
5236 return aarch64_classify_tls_symbol (x);
5238 switch (aarch64_cmodel)
5240 case AARCH64_CMODEL_TINY:
5241 if (SYMBOL_REF_WEAK (x))
5242 return SYMBOL_FORCE_TO_MEM;
5243 return SYMBOL_TINY_ABSOLUTE;
5245 case AARCH64_CMODEL_SMALL:
5246 if (SYMBOL_REF_WEAK (x))
5247 return SYMBOL_FORCE_TO_MEM;
5248 return SYMBOL_SMALL_ABSOLUTE;
5250 case AARCH64_CMODEL_TINY_PIC:
5251 if (!aarch64_symbol_binds_local_p (x))
5252 return SYMBOL_TINY_GOT;
5253 return SYMBOL_TINY_ABSOLUTE;
5255 case AARCH64_CMODEL_SMALL_PIC:
5256 if (!aarch64_symbol_binds_local_p (x))
5257 return SYMBOL_SMALL_GOT;
5258 return SYMBOL_SMALL_ABSOLUTE;
5260 default:
5261 gcc_unreachable ();
5265 /* By default push everything into the constant pool. */
5266 return SYMBOL_FORCE_TO_MEM;
5269 bool
5270 aarch64_constant_address_p (rtx x)
5272 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5275 bool
5276 aarch64_legitimate_pic_operand_p (rtx x)
5278 if (GET_CODE (x) == SYMBOL_REF
5279 || (GET_CODE (x) == CONST
5280 && GET_CODE (XEXP (x, 0)) == PLUS
5281 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5282 return false;
5284 return true;
5287 /* Return true if X holds either a quarter-precision or
5288 floating-point +0.0 constant. */
5289 static bool
5290 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5292 if (!CONST_DOUBLE_P (x))
5293 return false;
5295 /* TODO: We could handle moving 0.0 to a TFmode register,
5296 but first we would like to refactor the movtf_aarch64
5297 to be more amicable to split moves properly and
5298 correctly gate on TARGET_SIMD. For now - reject all
5299 constants which are not to SFmode or DFmode registers. */
5300 if (!(mode == SFmode || mode == DFmode))
5301 return false;
5303 if (aarch64_float_const_zero_rtx_p (x))
5304 return true;
5305 return aarch64_float_const_representable_p (x);
5308 static bool
5309 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5311 /* Do not allow vector struct mode constants. We could support
5312 0 and -1 easily, but they need support in aarch64-simd.md. */
5313 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5314 return false;
5316 /* This could probably go away because
5317 we now decompose CONST_INTs according to expand_mov_immediate. */
5318 if ((GET_CODE (x) == CONST_VECTOR
5319 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5320 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5321 return !targetm.cannot_force_const_mem (mode, x);
5323 if (GET_CODE (x) == HIGH
5324 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5325 return true;
5327 return aarch64_constant_address_p (x);
5331 aarch64_load_tp (rtx target)
5333 if (!target
5334 || GET_MODE (target) != Pmode
5335 || !register_operand (target, Pmode))
5336 target = gen_reg_rtx (Pmode);
5338 /* Can return in any reg. */
5339 emit_insn (gen_aarch64_load_tp_hard (target));
5340 return target;
5343 /* On AAPCS systems, this is the "struct __va_list". */
5344 static GTY(()) tree va_list_type;
5346 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5347 Return the type to use as __builtin_va_list.
5349 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5351 struct __va_list
5353 void *__stack;
5354 void *__gr_top;
5355 void *__vr_top;
5356 int __gr_offs;
5357 int __vr_offs;
5358 }; */
5360 static tree
5361 aarch64_build_builtin_va_list (void)
5363 tree va_list_name;
5364 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5366 /* Create the type. */
5367 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5368 /* Give it the required name. */
5369 va_list_name = build_decl (BUILTINS_LOCATION,
5370 TYPE_DECL,
5371 get_identifier ("__va_list"),
5372 va_list_type);
5373 DECL_ARTIFICIAL (va_list_name) = 1;
5374 TYPE_NAME (va_list_type) = va_list_name;
5375 TYPE_STUB_DECL (va_list_type) = va_list_name;
5377 /* Create the fields. */
5378 f_stack = build_decl (BUILTINS_LOCATION,
5379 FIELD_DECL, get_identifier ("__stack"),
5380 ptr_type_node);
5381 f_grtop = build_decl (BUILTINS_LOCATION,
5382 FIELD_DECL, get_identifier ("__gr_top"),
5383 ptr_type_node);
5384 f_vrtop = build_decl (BUILTINS_LOCATION,
5385 FIELD_DECL, get_identifier ("__vr_top"),
5386 ptr_type_node);
5387 f_groff = build_decl (BUILTINS_LOCATION,
5388 FIELD_DECL, get_identifier ("__gr_offs"),
5389 integer_type_node);
5390 f_vroff = build_decl (BUILTINS_LOCATION,
5391 FIELD_DECL, get_identifier ("__vr_offs"),
5392 integer_type_node);
5394 DECL_ARTIFICIAL (f_stack) = 1;
5395 DECL_ARTIFICIAL (f_grtop) = 1;
5396 DECL_ARTIFICIAL (f_vrtop) = 1;
5397 DECL_ARTIFICIAL (f_groff) = 1;
5398 DECL_ARTIFICIAL (f_vroff) = 1;
5400 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5401 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5402 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5403 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5404 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5406 TYPE_FIELDS (va_list_type) = f_stack;
5407 DECL_CHAIN (f_stack) = f_grtop;
5408 DECL_CHAIN (f_grtop) = f_vrtop;
5409 DECL_CHAIN (f_vrtop) = f_groff;
5410 DECL_CHAIN (f_groff) = f_vroff;
5412 /* Compute its layout. */
5413 layout_type (va_list_type);
5415 return va_list_type;
5418 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5419 static void
5420 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5422 const CUMULATIVE_ARGS *cum;
5423 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5424 tree stack, grtop, vrtop, groff, vroff;
5425 tree t;
5426 int gr_save_area_size;
5427 int vr_save_area_size;
5428 int vr_offset;
5430 cum = &crtl->args.info;
5431 gr_save_area_size
5432 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5433 vr_save_area_size
5434 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5436 if (TARGET_GENERAL_REGS_ONLY)
5438 if (cum->aapcs_nvrn > 0)
5439 sorry ("%qs and floating point or vector arguments",
5440 "-mgeneral-regs-only");
5441 vr_save_area_size = 0;
5444 f_stack = TYPE_FIELDS (va_list_type_node);
5445 f_grtop = DECL_CHAIN (f_stack);
5446 f_vrtop = DECL_CHAIN (f_grtop);
5447 f_groff = DECL_CHAIN (f_vrtop);
5448 f_vroff = DECL_CHAIN (f_groff);
5450 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5451 NULL_TREE);
5452 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5453 NULL_TREE);
5454 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5455 NULL_TREE);
5456 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5457 NULL_TREE);
5458 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5459 NULL_TREE);
5461 /* Emit code to initialize STACK, which points to the next varargs stack
5462 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5463 by named arguments. STACK is 8-byte aligned. */
5464 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5465 if (cum->aapcs_stack_size > 0)
5466 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5467 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5468 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5470 /* Emit code to initialize GRTOP, the top of the GR save area.
5471 virtual_incoming_args_rtx should have been 16 byte aligned. */
5472 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5473 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5474 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5476 /* Emit code to initialize VRTOP, the top of the VR save area.
5477 This address is gr_save_area_bytes below GRTOP, rounded
5478 down to the next 16-byte boundary. */
5479 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5480 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5481 STACK_BOUNDARY / BITS_PER_UNIT);
5483 if (vr_offset)
5484 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5485 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5486 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5488 /* Emit code to initialize GROFF, the offset from GRTOP of the
5489 next GPR argument. */
5490 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5491 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5492 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5494 /* Likewise emit code to initialize VROFF, the offset from FTOP
5495 of the next VR argument. */
5496 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5497 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5498 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5501 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5503 static tree
5504 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5505 gimple_seq *post_p ATTRIBUTE_UNUSED)
5507 tree addr;
5508 bool indirect_p;
5509 bool is_ha; /* is HFA or HVA. */
5510 bool dw_align; /* double-word align. */
5511 enum machine_mode ag_mode = VOIDmode;
5512 int nregs;
5513 enum machine_mode mode;
5515 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5516 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5517 HOST_WIDE_INT size, rsize, adjust, align;
5518 tree t, u, cond1, cond2;
5520 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5521 if (indirect_p)
5522 type = build_pointer_type (type);
5524 mode = TYPE_MODE (type);
5526 f_stack = TYPE_FIELDS (va_list_type_node);
5527 f_grtop = DECL_CHAIN (f_stack);
5528 f_vrtop = DECL_CHAIN (f_grtop);
5529 f_groff = DECL_CHAIN (f_vrtop);
5530 f_vroff = DECL_CHAIN (f_groff);
5532 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5533 f_stack, NULL_TREE);
5534 size = int_size_in_bytes (type);
5535 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5537 dw_align = false;
5538 adjust = 0;
5539 if (aarch64_vfp_is_call_or_return_candidate (mode,
5540 type,
5541 &ag_mode,
5542 &nregs,
5543 &is_ha))
5545 /* TYPE passed in fp/simd registers. */
5546 if (TARGET_GENERAL_REGS_ONLY)
5547 sorry ("%qs and floating point or vector arguments",
5548 "-mgeneral-regs-only");
5550 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5551 unshare_expr (valist), f_vrtop, NULL_TREE);
5552 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5553 unshare_expr (valist), f_vroff, NULL_TREE);
5555 rsize = nregs * UNITS_PER_VREG;
5557 if (is_ha)
5559 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5560 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5562 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5563 && size < UNITS_PER_VREG)
5565 adjust = UNITS_PER_VREG - size;
5568 else
5570 /* TYPE passed in general registers. */
5571 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5572 unshare_expr (valist), f_grtop, NULL_TREE);
5573 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5574 unshare_expr (valist), f_groff, NULL_TREE);
5575 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5576 nregs = rsize / UNITS_PER_WORD;
5578 if (align > 8)
5579 dw_align = true;
5581 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5582 && size < UNITS_PER_WORD)
5584 adjust = UNITS_PER_WORD - size;
5588 /* Get a local temporary for the field value. */
5589 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5591 /* Emit code to branch if off >= 0. */
5592 t = build2 (GE_EXPR, boolean_type_node, off,
5593 build_int_cst (TREE_TYPE (off), 0));
5594 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5596 if (dw_align)
5598 /* Emit: offs = (offs + 15) & -16. */
5599 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5600 build_int_cst (TREE_TYPE (off), 15));
5601 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5602 build_int_cst (TREE_TYPE (off), -16));
5603 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5605 else
5606 roundup = NULL;
5608 /* Update ap.__[g|v]r_offs */
5609 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5610 build_int_cst (TREE_TYPE (off), rsize));
5611 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5613 /* String up. */
5614 if (roundup)
5615 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5617 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5618 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5619 build_int_cst (TREE_TYPE (f_off), 0));
5620 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5622 /* String up: make sure the assignment happens before the use. */
5623 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5624 COND_EXPR_ELSE (cond1) = t;
5626 /* Prepare the trees handling the argument that is passed on the stack;
5627 the top level node will store in ON_STACK. */
5628 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5629 if (align > 8)
5631 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5632 t = fold_convert (intDI_type_node, arg);
5633 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5634 build_int_cst (TREE_TYPE (t), 15));
5635 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5636 build_int_cst (TREE_TYPE (t), -16));
5637 t = fold_convert (TREE_TYPE (arg), t);
5638 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5640 else
5641 roundup = NULL;
5642 /* Advance ap.__stack */
5643 t = fold_convert (intDI_type_node, arg);
5644 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5645 build_int_cst (TREE_TYPE (t), size + 7));
5646 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5647 build_int_cst (TREE_TYPE (t), -8));
5648 t = fold_convert (TREE_TYPE (arg), t);
5649 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5650 /* String up roundup and advance. */
5651 if (roundup)
5652 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5653 /* String up with arg */
5654 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5655 /* Big-endianness related address adjustment. */
5656 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5657 && size < UNITS_PER_WORD)
5659 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5660 size_int (UNITS_PER_WORD - size));
5661 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5664 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5665 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5667 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5668 t = off;
5669 if (adjust)
5670 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5671 build_int_cst (TREE_TYPE (off), adjust));
5673 t = fold_convert (sizetype, t);
5674 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5676 if (is_ha)
5678 /* type ha; // treat as "struct {ftype field[n];}"
5679 ... [computing offs]
5680 for (i = 0; i <nregs; ++i, offs += 16)
5681 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5682 return ha; */
5683 int i;
5684 tree tmp_ha, field_t, field_ptr_t;
5686 /* Declare a local variable. */
5687 tmp_ha = create_tmp_var_raw (type, "ha");
5688 gimple_add_tmp_var (tmp_ha);
5690 /* Establish the base type. */
5691 switch (ag_mode)
5693 case SFmode:
5694 field_t = float_type_node;
5695 field_ptr_t = float_ptr_type_node;
5696 break;
5697 case DFmode:
5698 field_t = double_type_node;
5699 field_ptr_t = double_ptr_type_node;
5700 break;
5701 case TFmode:
5702 field_t = long_double_type_node;
5703 field_ptr_t = long_double_ptr_type_node;
5704 break;
5705 /* The half precision and quad precision are not fully supported yet. Enable
5706 the following code after the support is complete. Need to find the correct
5707 type node for __fp16 *. */
5708 #if 0
5709 case HFmode:
5710 field_t = float_type_node;
5711 field_ptr_t = float_ptr_type_node;
5712 break;
5713 #endif
5714 case V2SImode:
5715 case V4SImode:
5717 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5718 field_t = build_vector_type_for_mode (innertype, ag_mode);
5719 field_ptr_t = build_pointer_type (field_t);
5721 break;
5722 default:
5723 gcc_assert (0);
5726 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5727 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5728 addr = t;
5729 t = fold_convert (field_ptr_t, addr);
5730 t = build2 (MODIFY_EXPR, field_t,
5731 build1 (INDIRECT_REF, field_t, tmp_ha),
5732 build1 (INDIRECT_REF, field_t, t));
5734 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5735 for (i = 1; i < nregs; ++i)
5737 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5738 u = fold_convert (field_ptr_t, addr);
5739 u = build2 (MODIFY_EXPR, field_t,
5740 build2 (MEM_REF, field_t, tmp_ha,
5741 build_int_cst (field_ptr_t,
5742 (i *
5743 int_size_in_bytes (field_t)))),
5744 build1 (INDIRECT_REF, field_t, u));
5745 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5748 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5749 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5752 COND_EXPR_ELSE (cond2) = t;
5753 addr = fold_convert (build_pointer_type (type), cond1);
5754 addr = build_va_arg_indirect_ref (addr);
5756 if (indirect_p)
5757 addr = build_va_arg_indirect_ref (addr);
5759 return addr;
5762 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5764 static void
5765 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5766 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5767 int no_rtl)
5769 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5770 CUMULATIVE_ARGS local_cum;
5771 int gr_saved, vr_saved;
5773 /* The caller has advanced CUM up to, but not beyond, the last named
5774 argument. Advance a local copy of CUM past the last "real" named
5775 argument, to find out how many registers are left over. */
5776 local_cum = *cum;
5777 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5779 /* Found out how many registers we need to save. */
5780 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5781 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5783 if (TARGET_GENERAL_REGS_ONLY)
5785 if (local_cum.aapcs_nvrn > 0)
5786 sorry ("%qs and floating point or vector arguments",
5787 "-mgeneral-regs-only");
5788 vr_saved = 0;
5791 if (!no_rtl)
5793 if (gr_saved > 0)
5795 rtx ptr, mem;
5797 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5798 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5799 - gr_saved * UNITS_PER_WORD);
5800 mem = gen_frame_mem (BLKmode, ptr);
5801 set_mem_alias_set (mem, get_varargs_alias_set ());
5803 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5804 mem, gr_saved);
5806 if (vr_saved > 0)
5808 /* We can't use move_block_from_reg, because it will use
5809 the wrong mode, storing D regs only. */
5810 enum machine_mode mode = TImode;
5811 int off, i;
5813 /* Set OFF to the offset from virtual_incoming_args_rtx of
5814 the first vector register. The VR save area lies below
5815 the GR one, and is aligned to 16 bytes. */
5816 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5817 STACK_BOUNDARY / BITS_PER_UNIT);
5818 off -= vr_saved * UNITS_PER_VREG;
5820 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5822 rtx ptr, mem;
5824 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5825 mem = gen_frame_mem (mode, ptr);
5826 set_mem_alias_set (mem, get_varargs_alias_set ());
5827 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5828 off += UNITS_PER_VREG;
5833 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5834 any complication of having crtl->args.pretend_args_size changed. */
5835 cfun->machine->saved_varargs_size
5836 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5837 STACK_BOUNDARY / BITS_PER_UNIT)
5838 + vr_saved * UNITS_PER_VREG);
5841 static void
5842 aarch64_conditional_register_usage (void)
5844 int i;
5845 if (!TARGET_FLOAT)
5847 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5849 fixed_regs[i] = 1;
5850 call_used_regs[i] = 1;
5855 /* Walk down the type tree of TYPE counting consecutive base elements.
5856 If *MODEP is VOIDmode, then set it to the first valid floating point
5857 type. If a non-floating point type is found, or if a floating point
5858 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5859 otherwise return the count in the sub-tree. */
5860 static int
5861 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5863 enum machine_mode mode;
5864 HOST_WIDE_INT size;
5866 switch (TREE_CODE (type))
5868 case REAL_TYPE:
5869 mode = TYPE_MODE (type);
5870 if (mode != DFmode && mode != SFmode && mode != TFmode)
5871 return -1;
5873 if (*modep == VOIDmode)
5874 *modep = mode;
5876 if (*modep == mode)
5877 return 1;
5879 break;
5881 case COMPLEX_TYPE:
5882 mode = TYPE_MODE (TREE_TYPE (type));
5883 if (mode != DFmode && mode != SFmode && mode != TFmode)
5884 return -1;
5886 if (*modep == VOIDmode)
5887 *modep = mode;
5889 if (*modep == mode)
5890 return 2;
5892 break;
5894 case VECTOR_TYPE:
5895 /* Use V2SImode and V4SImode as representatives of all 64-bit
5896 and 128-bit vector types. */
5897 size = int_size_in_bytes (type);
5898 switch (size)
5900 case 8:
5901 mode = V2SImode;
5902 break;
5903 case 16:
5904 mode = V4SImode;
5905 break;
5906 default:
5907 return -1;
5910 if (*modep == VOIDmode)
5911 *modep = mode;
5913 /* Vector modes are considered to be opaque: two vectors are
5914 equivalent for the purposes of being homogeneous aggregates
5915 if they are the same size. */
5916 if (*modep == mode)
5917 return 1;
5919 break;
5921 case ARRAY_TYPE:
5923 int count;
5924 tree index = TYPE_DOMAIN (type);
5926 /* Can't handle incomplete types. */
5927 if (!COMPLETE_TYPE_P (type))
5928 return -1;
5930 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5931 if (count == -1
5932 || !index
5933 || !TYPE_MAX_VALUE (index)
5934 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5935 || !TYPE_MIN_VALUE (index)
5936 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5937 || count < 0)
5938 return -1;
5940 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5941 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5943 /* There must be no padding. */
5944 if (!host_integerp (TYPE_SIZE (type), 1)
5945 || (tree_low_cst (TYPE_SIZE (type), 1)
5946 != count * GET_MODE_BITSIZE (*modep)))
5947 return -1;
5949 return count;
5952 case RECORD_TYPE:
5954 int count = 0;
5955 int sub_count;
5956 tree field;
5958 /* Can't handle incomplete types. */
5959 if (!COMPLETE_TYPE_P (type))
5960 return -1;
5962 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5964 if (TREE_CODE (field) != FIELD_DECL)
5965 continue;
5967 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5968 if (sub_count < 0)
5969 return -1;
5970 count += sub_count;
5973 /* There must be no padding. */
5974 if (!host_integerp (TYPE_SIZE (type), 1)
5975 || (tree_low_cst (TYPE_SIZE (type), 1)
5976 != count * GET_MODE_BITSIZE (*modep)))
5977 return -1;
5979 return count;
5982 case UNION_TYPE:
5983 case QUAL_UNION_TYPE:
5985 /* These aren't very interesting except in a degenerate case. */
5986 int count = 0;
5987 int sub_count;
5988 tree field;
5990 /* Can't handle incomplete types. */
5991 if (!COMPLETE_TYPE_P (type))
5992 return -1;
5994 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5996 if (TREE_CODE (field) != FIELD_DECL)
5997 continue;
5999 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6000 if (sub_count < 0)
6001 return -1;
6002 count = count > sub_count ? count : sub_count;
6005 /* There must be no padding. */
6006 if (!host_integerp (TYPE_SIZE (type), 1)
6007 || (tree_low_cst (TYPE_SIZE (type), 1)
6008 != count * GET_MODE_BITSIZE (*modep)))
6009 return -1;
6011 return count;
6014 default:
6015 break;
6018 return -1;
6021 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6022 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6023 array types. The C99 floating-point complex types are also considered
6024 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6025 types, which are GCC extensions and out of the scope of AAPCS64, are
6026 treated as composite types here as well.
6028 Note that MODE itself is not sufficient in determining whether a type
6029 is such a composite type or not. This is because
6030 stor-layout.c:compute_record_mode may have already changed the MODE
6031 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6032 structure with only one field may have its MODE set to the mode of the
6033 field. Also an integer mode whose size matches the size of the
6034 RECORD_TYPE type may be used to substitute the original mode
6035 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6036 solely relied on. */
6038 static bool
6039 aarch64_composite_type_p (const_tree type,
6040 enum machine_mode mode)
6042 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6043 return true;
6045 if (mode == BLKmode
6046 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6047 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6048 return true;
6050 return false;
6053 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6054 type as described in AAPCS64 \S 4.1.2.
6056 See the comment above aarch64_composite_type_p for the notes on MODE. */
6058 static bool
6059 aarch64_short_vector_p (const_tree type,
6060 enum machine_mode mode)
6062 HOST_WIDE_INT size = -1;
6064 if (type && TREE_CODE (type) == VECTOR_TYPE)
6065 size = int_size_in_bytes (type);
6066 else if (!aarch64_composite_type_p (type, mode)
6067 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6068 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6069 size = GET_MODE_SIZE (mode);
6071 return (size == 8 || size == 16) ? true : false;
6074 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6075 shall be passed or returned in simd/fp register(s) (providing these
6076 parameter passing registers are available).
6078 Upon successful return, *COUNT returns the number of needed registers,
6079 *BASE_MODE returns the mode of the individual register and when IS_HAF
6080 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6081 floating-point aggregate or a homogeneous short-vector aggregate. */
6083 static bool
6084 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6085 const_tree type,
6086 enum machine_mode *base_mode,
6087 int *count,
6088 bool *is_ha)
6090 enum machine_mode new_mode = VOIDmode;
6091 bool composite_p = aarch64_composite_type_p (type, mode);
6093 if (is_ha != NULL) *is_ha = false;
6095 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6096 || aarch64_short_vector_p (type, mode))
6098 *count = 1;
6099 new_mode = mode;
6101 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6103 if (is_ha != NULL) *is_ha = true;
6104 *count = 2;
6105 new_mode = GET_MODE_INNER (mode);
6107 else if (type && composite_p)
6109 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6111 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6113 if (is_ha != NULL) *is_ha = true;
6114 *count = ag_count;
6116 else
6117 return false;
6119 else
6120 return false;
6122 *base_mode = new_mode;
6123 return true;
6126 /* Implement TARGET_STRUCT_VALUE_RTX. */
6128 static rtx
6129 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6130 int incoming ATTRIBUTE_UNUSED)
6132 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6135 /* Implements target hook vector_mode_supported_p. */
6136 static bool
6137 aarch64_vector_mode_supported_p (enum machine_mode mode)
6139 if (TARGET_SIMD
6140 && (mode == V4SImode || mode == V8HImode
6141 || mode == V16QImode || mode == V2DImode
6142 || mode == V2SImode || mode == V4HImode
6143 || mode == V8QImode || mode == V2SFmode
6144 || mode == V4SFmode || mode == V2DFmode))
6145 return true;
6147 return false;
6150 /* Return appropriate SIMD container
6151 for MODE within a vector of WIDTH bits. */
6152 static enum machine_mode
6153 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6155 gcc_assert (width == 64 || width == 128);
6156 if (TARGET_SIMD)
6158 if (width == 128)
6159 switch (mode)
6161 case DFmode:
6162 return V2DFmode;
6163 case SFmode:
6164 return V4SFmode;
6165 case SImode:
6166 return V4SImode;
6167 case HImode:
6168 return V8HImode;
6169 case QImode:
6170 return V16QImode;
6171 case DImode:
6172 return V2DImode;
6173 default:
6174 break;
6176 else
6177 switch (mode)
6179 case SFmode:
6180 return V2SFmode;
6181 case SImode:
6182 return V2SImode;
6183 case HImode:
6184 return V4HImode;
6185 case QImode:
6186 return V8QImode;
6187 default:
6188 break;
6191 return word_mode;
6194 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6195 static enum machine_mode
6196 aarch64_preferred_simd_mode (enum machine_mode mode)
6198 return aarch64_simd_container_mode (mode, 128);
6201 /* Return the bitmask of possible vector sizes for the vectorizer
6202 to iterate over. */
6203 static unsigned int
6204 aarch64_autovectorize_vector_sizes (void)
6206 return (16 | 8);
6209 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6210 vector types in order to conform to the AAPCS64 (see "Procedure
6211 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6212 qualify for emission with the mangled names defined in that document,
6213 a vector type must not only be of the correct mode but also be
6214 composed of AdvSIMD vector element types (e.g.
6215 _builtin_aarch64_simd_qi); these types are registered by
6216 aarch64_init_simd_builtins (). In other words, vector types defined
6217 in other ways e.g. via vector_size attribute will get default
6218 mangled names. */
6219 typedef struct
6221 enum machine_mode mode;
6222 const char *element_type_name;
6223 const char *mangled_name;
6224 } aarch64_simd_mangle_map_entry;
6226 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6227 /* 64-bit containerized types. */
6228 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6229 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6230 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6231 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6232 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6233 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6234 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6235 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6236 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6237 /* 128-bit containerized types. */
6238 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6239 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6240 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6241 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6242 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6243 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6244 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6245 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6246 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6247 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6248 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6249 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6250 { VOIDmode, NULL, NULL }
6253 /* Implement TARGET_MANGLE_TYPE. */
6255 static const char *
6256 aarch64_mangle_type (const_tree type)
6258 /* The AArch64 ABI documents say that "__va_list" has to be
6259 managled as if it is in the "std" namespace. */
6260 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6261 return "St9__va_list";
6263 /* Check the mode of the vector type, and the name of the vector
6264 element type, against the table. */
6265 if (TREE_CODE (type) == VECTOR_TYPE)
6267 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6269 while (pos->mode != VOIDmode)
6271 tree elt_type = TREE_TYPE (type);
6273 if (pos->mode == TYPE_MODE (type)
6274 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6275 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6276 pos->element_type_name))
6277 return pos->mangled_name;
6279 pos++;
6283 /* Use the default mangling. */
6284 return NULL;
6287 /* Return the equivalent letter for size. */
6288 static char
6289 sizetochar (int size)
6291 switch (size)
6293 case 64: return 'd';
6294 case 32: return 's';
6295 case 16: return 'h';
6296 case 8 : return 'b';
6297 default: gcc_unreachable ();
6301 /* Return true iff x is a uniform vector of floating-point
6302 constants, and the constant can be represented in
6303 quarter-precision form. Note, as aarch64_float_const_representable
6304 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6305 static bool
6306 aarch64_vect_float_const_representable_p (rtx x)
6308 int i = 0;
6309 REAL_VALUE_TYPE r0, ri;
6310 rtx x0, xi;
6312 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6313 return false;
6315 x0 = CONST_VECTOR_ELT (x, 0);
6316 if (!CONST_DOUBLE_P (x0))
6317 return false;
6319 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6321 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6323 xi = CONST_VECTOR_ELT (x, i);
6324 if (!CONST_DOUBLE_P (xi))
6325 return false;
6327 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6328 if (!REAL_VALUES_EQUAL (r0, ri))
6329 return false;
6332 return aarch64_float_const_representable_p (x0);
6335 /* Return true for valid and false for invalid. */
6336 bool
6337 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6338 struct simd_immediate_info *info)
6340 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6341 matches = 1; \
6342 for (i = 0; i < idx; i += (STRIDE)) \
6343 if (!(TEST)) \
6344 matches = 0; \
6345 if (matches) \
6347 immtype = (CLASS); \
6348 elsize = (ELSIZE); \
6349 eshift = (SHIFT); \
6350 emvn = (NEG); \
6351 break; \
6354 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6355 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6356 unsigned char bytes[16];
6357 int immtype = -1, matches;
6358 unsigned int invmask = inverse ? 0xff : 0;
6359 int eshift, emvn;
6361 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6363 if (! (aarch64_simd_imm_zero_p (op, mode)
6364 || aarch64_vect_float_const_representable_p (op)))
6365 return false;
6367 if (info)
6369 info->value = CONST_VECTOR_ELT (op, 0);
6370 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6371 info->mvn = false;
6372 info->shift = 0;
6375 return true;
6378 /* Splat vector constant out into a byte vector. */
6379 for (i = 0; i < n_elts; i++)
6381 rtx el = CONST_VECTOR_ELT (op, i);
6382 unsigned HOST_WIDE_INT elpart;
6383 unsigned int part, parts;
6385 if (GET_CODE (el) == CONST_INT)
6387 elpart = INTVAL (el);
6388 parts = 1;
6390 else if (GET_CODE (el) == CONST_DOUBLE)
6392 elpart = CONST_DOUBLE_LOW (el);
6393 parts = 2;
6395 else
6396 gcc_unreachable ();
6398 for (part = 0; part < parts; part++)
6400 unsigned int byte;
6401 for (byte = 0; byte < innersize; byte++)
6403 bytes[idx++] = (elpart & 0xff) ^ invmask;
6404 elpart >>= BITS_PER_UNIT;
6406 if (GET_CODE (el) == CONST_DOUBLE)
6407 elpart = CONST_DOUBLE_HIGH (el);
6411 /* Sanity check. */
6412 gcc_assert (idx == GET_MODE_SIZE (mode));
6416 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6417 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6419 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6420 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6422 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6423 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6425 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6426 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6428 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6430 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6432 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6433 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6435 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6436 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6438 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6439 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6441 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6442 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6444 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6446 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6448 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6449 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6451 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6452 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6454 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6455 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6457 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6458 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6460 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6462 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6463 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6465 while (0);
6467 if (immtype == -1)
6468 return false;
6470 if (info)
6472 info->element_width = elsize;
6473 info->mvn = emvn != 0;
6474 info->shift = eshift;
6476 unsigned HOST_WIDE_INT imm = 0;
6478 if (immtype >= 12 && immtype <= 15)
6479 info->msl = true;
6481 /* Un-invert bytes of recognized vector, if necessary. */
6482 if (invmask != 0)
6483 for (i = 0; i < idx; i++)
6484 bytes[i] ^= invmask;
6486 if (immtype == 17)
6488 /* FIXME: Broken on 32-bit H_W_I hosts. */
6489 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6491 for (i = 0; i < 8; i++)
6492 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6493 << (i * BITS_PER_UNIT);
6496 info->value = GEN_INT (imm);
6498 else
6500 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6501 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6503 /* Construct 'abcdefgh' because the assembler cannot handle
6504 generic constants. */
6505 if (info->mvn)
6506 imm = ~imm;
6507 imm = (imm >> info->shift) & 0xff;
6508 info->value = GEN_INT (imm);
6512 return true;
6513 #undef CHECK
6516 static bool
6517 aarch64_const_vec_all_same_int_p (rtx x,
6518 HOST_WIDE_INT minval,
6519 HOST_WIDE_INT maxval)
6521 HOST_WIDE_INT firstval;
6522 int count, i;
6524 if (GET_CODE (x) != CONST_VECTOR
6525 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6526 return false;
6528 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6529 if (firstval < minval || firstval > maxval)
6530 return false;
6532 count = CONST_VECTOR_NUNITS (x);
6533 for (i = 1; i < count; i++)
6534 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6535 return false;
6537 return true;
6540 /* Check of immediate shift constants are within range. */
6541 bool
6542 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6544 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6545 if (left)
6546 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6547 else
6548 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6551 /* Return true if X is a uniform vector where all elements
6552 are either the floating-point constant 0.0 or the
6553 integer constant 0. */
6554 bool
6555 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6557 return x == CONST0_RTX (mode);
6560 bool
6561 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6563 HOST_WIDE_INT imm = INTVAL (x);
6564 int i;
6566 for (i = 0; i < 8; i++)
6568 unsigned int byte = imm & 0xff;
6569 if (byte != 0xff && byte != 0)
6570 return false;
6571 imm >>= 8;
6574 return true;
6577 bool
6578 aarch64_mov_operand_p (rtx x,
6579 enum aarch64_symbol_context context,
6580 enum machine_mode mode)
6582 if (GET_CODE (x) == HIGH
6583 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6584 return true;
6586 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6587 return true;
6589 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6590 return true;
6592 return aarch64_classify_symbolic_expression (x, context)
6593 == SYMBOL_TINY_ABSOLUTE;
6596 /* Return a const_int vector of VAL. */
6598 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6600 int nunits = GET_MODE_NUNITS (mode);
6601 rtvec v = rtvec_alloc (nunits);
6602 int i;
6604 for (i=0; i < nunits; i++)
6605 RTVEC_ELT (v, i) = GEN_INT (val);
6607 return gen_rtx_CONST_VECTOR (mode, v);
6610 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6612 bool
6613 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6615 enum machine_mode vmode;
6617 gcc_assert (!VECTOR_MODE_P (mode));
6618 vmode = aarch64_preferred_simd_mode (mode);
6619 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6620 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6623 /* Construct and return a PARALLEL RTX vector. */
6625 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6627 int nunits = GET_MODE_NUNITS (mode);
6628 rtvec v = rtvec_alloc (nunits / 2);
6629 int base = high ? nunits / 2 : 0;
6630 rtx t1;
6631 int i;
6633 for (i=0; i < nunits / 2; i++)
6634 RTVEC_ELT (v, i) = GEN_INT (base + i);
6636 t1 = gen_rtx_PARALLEL (mode, v);
6637 return t1;
6640 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6641 HIGH (exclusive). */
6642 void
6643 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6645 HOST_WIDE_INT lane;
6646 gcc_assert (GET_CODE (operand) == CONST_INT);
6647 lane = INTVAL (operand);
6649 if (lane < low || lane >= high)
6650 error ("lane out of range");
6653 void
6654 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6656 gcc_assert (GET_CODE (operand) == CONST_INT);
6657 HOST_WIDE_INT lane = INTVAL (operand);
6659 if (lane < low || lane >= high)
6660 error ("constant out of range");
6663 /* Emit code to reinterpret one AdvSIMD type as another,
6664 without altering bits. */
6665 void
6666 aarch64_simd_reinterpret (rtx dest, rtx src)
6668 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6671 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6672 registers). */
6673 void
6674 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6675 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6676 rtx op1)
6678 rtx mem = gen_rtx_MEM (mode, destaddr);
6679 rtx tmp1 = gen_reg_rtx (mode);
6680 rtx tmp2 = gen_reg_rtx (mode);
6682 emit_insn (intfn (tmp1, op1, tmp2));
6684 emit_move_insn (mem, tmp1);
6685 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6686 emit_move_insn (mem, tmp2);
6689 /* Return TRUE if OP is a valid vector addressing mode. */
6690 bool
6691 aarch64_simd_mem_operand_p (rtx op)
6693 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6694 || GET_CODE (XEXP (op, 0)) == REG);
6697 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6698 not to early-clobber SRC registers in the process.
6700 We assume that the operands described by SRC and DEST represent a
6701 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6702 number of components into which the copy has been decomposed. */
6703 void
6704 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6705 rtx *src, unsigned int count)
6707 unsigned int i;
6709 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6710 || REGNO (operands[0]) < REGNO (operands[1]))
6712 for (i = 0; i < count; i++)
6714 operands[2 * i] = dest[i];
6715 operands[2 * i + 1] = src[i];
6718 else
6720 for (i = 0; i < count; i++)
6722 operands[2 * i] = dest[count - i - 1];
6723 operands[2 * i + 1] = src[count - i - 1];
6728 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6729 one of VSTRUCT modes: OI, CI or XI. */
6731 aarch64_simd_attr_length_move (rtx insn)
6733 enum machine_mode mode;
6735 extract_insn_cached (insn);
6737 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6739 mode = GET_MODE (recog_data.operand[0]);
6740 switch (mode)
6742 case OImode:
6743 return 8;
6744 case CImode:
6745 return 12;
6746 case XImode:
6747 return 16;
6748 default:
6749 gcc_unreachable ();
6752 return 4;
6755 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6756 alignment of a vector to 128 bits. */
6757 static HOST_WIDE_INT
6758 aarch64_simd_vector_alignment (const_tree type)
6760 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6761 return MIN (align, 128);
6764 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6765 static bool
6766 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6768 if (is_packed)
6769 return false;
6771 /* We guarantee alignment for vectors up to 128-bits. */
6772 if (tree_int_cst_compare (TYPE_SIZE (type),
6773 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6774 return false;
6776 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6777 return true;
6780 /* If VALS is a vector constant that can be loaded into a register
6781 using DUP, generate instructions to do so and return an RTX to
6782 assign to the register. Otherwise return NULL_RTX. */
6783 static rtx
6784 aarch64_simd_dup_constant (rtx vals)
6786 enum machine_mode mode = GET_MODE (vals);
6787 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6788 int n_elts = GET_MODE_NUNITS (mode);
6789 bool all_same = true;
6790 rtx x;
6791 int i;
6793 if (GET_CODE (vals) != CONST_VECTOR)
6794 return NULL_RTX;
6796 for (i = 1; i < n_elts; ++i)
6798 x = CONST_VECTOR_ELT (vals, i);
6799 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6800 all_same = false;
6803 if (!all_same)
6804 return NULL_RTX;
6806 /* We can load this constant by using DUP and a constant in a
6807 single ARM register. This will be cheaper than a vector
6808 load. */
6809 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6810 return gen_rtx_VEC_DUPLICATE (mode, x);
6814 /* Generate code to load VALS, which is a PARALLEL containing only
6815 constants (for vec_init) or CONST_VECTOR, efficiently into a
6816 register. Returns an RTX to copy into the register, or NULL_RTX
6817 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6818 static rtx
6819 aarch64_simd_make_constant (rtx vals)
6821 enum machine_mode mode = GET_MODE (vals);
6822 rtx const_dup;
6823 rtx const_vec = NULL_RTX;
6824 int n_elts = GET_MODE_NUNITS (mode);
6825 int n_const = 0;
6826 int i;
6828 if (GET_CODE (vals) == CONST_VECTOR)
6829 const_vec = vals;
6830 else if (GET_CODE (vals) == PARALLEL)
6832 /* A CONST_VECTOR must contain only CONST_INTs and
6833 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6834 Only store valid constants in a CONST_VECTOR. */
6835 for (i = 0; i < n_elts; ++i)
6837 rtx x = XVECEXP (vals, 0, i);
6838 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6839 n_const++;
6841 if (n_const == n_elts)
6842 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6844 else
6845 gcc_unreachable ();
6847 if (const_vec != NULL_RTX
6848 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6849 /* Load using MOVI/MVNI. */
6850 return const_vec;
6851 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6852 /* Loaded using DUP. */
6853 return const_dup;
6854 else if (const_vec != NULL_RTX)
6855 /* Load from constant pool. We can not take advantage of single-cycle
6856 LD1 because we need a PC-relative addressing mode. */
6857 return const_vec;
6858 else
6859 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6860 We can not construct an initializer. */
6861 return NULL_RTX;
6864 void
6865 aarch64_expand_vector_init (rtx target, rtx vals)
6867 enum machine_mode mode = GET_MODE (target);
6868 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6869 int n_elts = GET_MODE_NUNITS (mode);
6870 int n_var = 0, one_var = -1;
6871 bool all_same = true;
6872 rtx x, mem;
6873 int i;
6875 x = XVECEXP (vals, 0, 0);
6876 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6877 n_var = 1, one_var = 0;
6879 for (i = 1; i < n_elts; ++i)
6881 x = XVECEXP (vals, 0, i);
6882 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6883 ++n_var, one_var = i;
6885 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6886 all_same = false;
6889 if (n_var == 0)
6891 rtx constant = aarch64_simd_make_constant (vals);
6892 if (constant != NULL_RTX)
6894 emit_move_insn (target, constant);
6895 return;
6899 /* Splat a single non-constant element if we can. */
6900 if (all_same)
6902 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6903 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6904 return;
6907 /* One field is non-constant. Load constant then overwrite varying
6908 field. This is more efficient than using the stack. */
6909 if (n_var == 1)
6911 rtx copy = copy_rtx (vals);
6912 rtx index = GEN_INT (one_var);
6913 enum insn_code icode;
6915 /* Load constant part of vector, substitute neighboring value for
6916 varying element. */
6917 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6918 aarch64_expand_vector_init (target, copy);
6920 /* Insert variable. */
6921 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6922 icode = optab_handler (vec_set_optab, mode);
6923 gcc_assert (icode != CODE_FOR_nothing);
6924 emit_insn (GEN_FCN (icode) (target, x, index));
6925 return;
6928 /* Construct the vector in memory one field at a time
6929 and load the whole vector. */
6930 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6931 for (i = 0; i < n_elts; i++)
6932 emit_move_insn (adjust_address_nv (mem, inner_mode,
6933 i * GET_MODE_SIZE (inner_mode)),
6934 XVECEXP (vals, 0, i));
6935 emit_move_insn (target, mem);
6939 static unsigned HOST_WIDE_INT
6940 aarch64_shift_truncation_mask (enum machine_mode mode)
6942 return
6943 (aarch64_vector_mode_supported_p (mode)
6944 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6947 #ifndef TLS_SECTION_ASM_FLAG
6948 #define TLS_SECTION_ASM_FLAG 'T'
6949 #endif
6951 void
6952 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6953 tree decl ATTRIBUTE_UNUSED)
6955 char flagchars[10], *f = flagchars;
6957 /* If we have already declared this section, we can use an
6958 abbreviated form to switch back to it -- unless this section is
6959 part of a COMDAT groups, in which case GAS requires the full
6960 declaration every time. */
6961 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6962 && (flags & SECTION_DECLARED))
6964 fprintf (asm_out_file, "\t.section\t%s\n", name);
6965 return;
6968 if (!(flags & SECTION_DEBUG))
6969 *f++ = 'a';
6970 if (flags & SECTION_WRITE)
6971 *f++ = 'w';
6972 if (flags & SECTION_CODE)
6973 *f++ = 'x';
6974 if (flags & SECTION_SMALL)
6975 *f++ = 's';
6976 if (flags & SECTION_MERGE)
6977 *f++ = 'M';
6978 if (flags & SECTION_STRINGS)
6979 *f++ = 'S';
6980 if (flags & SECTION_TLS)
6981 *f++ = TLS_SECTION_ASM_FLAG;
6982 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6983 *f++ = 'G';
6984 *f = '\0';
6986 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6988 if (!(flags & SECTION_NOTYPE))
6990 const char *type;
6991 const char *format;
6993 if (flags & SECTION_BSS)
6994 type = "nobits";
6995 else
6996 type = "progbits";
6998 #ifdef TYPE_OPERAND_FMT
6999 format = "," TYPE_OPERAND_FMT;
7000 #else
7001 format = ",@%s";
7002 #endif
7004 fprintf (asm_out_file, format, type);
7006 if (flags & SECTION_ENTSIZE)
7007 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7008 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7010 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7011 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7012 else
7013 fprintf (asm_out_file, ",%s,comdat",
7014 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7018 putc ('\n', asm_out_file);
7021 /* Select a format to encode pointers in exception handling data. */
7023 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7025 int type;
7026 switch (aarch64_cmodel)
7028 case AARCH64_CMODEL_TINY:
7029 case AARCH64_CMODEL_TINY_PIC:
7030 case AARCH64_CMODEL_SMALL:
7031 case AARCH64_CMODEL_SMALL_PIC:
7032 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7033 for everything. */
7034 type = DW_EH_PE_sdata4;
7035 break;
7036 default:
7037 /* No assumptions here. 8-byte relocs required. */
7038 type = DW_EH_PE_sdata8;
7039 break;
7041 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7044 /* Emit load exclusive. */
7046 static void
7047 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7048 rtx mem, rtx model_rtx)
7050 rtx (*gen) (rtx, rtx, rtx);
7052 switch (mode)
7054 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7055 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7056 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7057 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7058 default:
7059 gcc_unreachable ();
7062 emit_insn (gen (rval, mem, model_rtx));
7065 /* Emit store exclusive. */
7067 static void
7068 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7069 rtx rval, rtx mem, rtx model_rtx)
7071 rtx (*gen) (rtx, rtx, rtx, rtx);
7073 switch (mode)
7075 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7076 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7077 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7078 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7079 default:
7080 gcc_unreachable ();
7083 emit_insn (gen (bval, rval, mem, model_rtx));
7086 /* Mark the previous jump instruction as unlikely. */
7088 static void
7089 aarch64_emit_unlikely_jump (rtx insn)
7091 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
7093 insn = emit_jump_insn (insn);
7094 add_reg_note (insn, REG_BR_PROB, very_unlikely);
7097 /* Expand a compare and swap pattern. */
7099 void
7100 aarch64_expand_compare_and_swap (rtx operands[])
7102 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7103 enum machine_mode mode, cmp_mode;
7104 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7106 bval = operands[0];
7107 rval = operands[1];
7108 mem = operands[2];
7109 oldval = operands[3];
7110 newval = operands[4];
7111 is_weak = operands[5];
7112 mod_s = operands[6];
7113 mod_f = operands[7];
7114 mode = GET_MODE (mem);
7115 cmp_mode = mode;
7117 /* Normally the succ memory model must be stronger than fail, but in the
7118 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7119 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7121 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7122 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7123 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7125 switch (mode)
7127 case QImode:
7128 case HImode:
7129 /* For short modes, we're going to perform the comparison in SImode,
7130 so do the zero-extension now. */
7131 cmp_mode = SImode;
7132 rval = gen_reg_rtx (SImode);
7133 oldval = convert_modes (SImode, mode, oldval, true);
7134 /* Fall through. */
7136 case SImode:
7137 case DImode:
7138 /* Force the value into a register if needed. */
7139 if (!aarch64_plus_operand (oldval, mode))
7140 oldval = force_reg (cmp_mode, oldval);
7141 break;
7143 default:
7144 gcc_unreachable ();
7147 switch (mode)
7149 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7150 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7151 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7152 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7153 default:
7154 gcc_unreachable ();
7157 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7159 if (mode == QImode || mode == HImode)
7160 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7162 x = gen_rtx_REG (CCmode, CC_REGNUM);
7163 x = gen_rtx_EQ (SImode, x, const0_rtx);
7164 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7167 /* Split a compare and swap pattern. */
7169 void
7170 aarch64_split_compare_and_swap (rtx operands[])
7172 rtx rval, mem, oldval, newval, scratch;
7173 enum machine_mode mode;
7174 bool is_weak;
7175 rtx label1, label2, x, cond;
7177 rval = operands[0];
7178 mem = operands[1];
7179 oldval = operands[2];
7180 newval = operands[3];
7181 is_weak = (operands[4] != const0_rtx);
7182 scratch = operands[7];
7183 mode = GET_MODE (mem);
7185 label1 = NULL_RTX;
7186 if (!is_weak)
7188 label1 = gen_label_rtx ();
7189 emit_label (label1);
7191 label2 = gen_label_rtx ();
7193 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7195 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7196 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7197 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7198 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7199 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7201 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7203 if (!is_weak)
7205 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7206 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7207 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7208 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7210 else
7212 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7213 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7214 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7217 emit_label (label2);
7220 /* Split an atomic operation. */
7222 void
7223 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7224 rtx value, rtx model_rtx, rtx cond)
7226 enum machine_mode mode = GET_MODE (mem);
7227 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7228 rtx label, x;
7230 label = gen_label_rtx ();
7231 emit_label (label);
7233 if (new_out)
7234 new_out = gen_lowpart (wmode, new_out);
7235 if (old_out)
7236 old_out = gen_lowpart (wmode, old_out);
7237 else
7238 old_out = new_out;
7239 value = simplify_gen_subreg (wmode, value, mode, 0);
7241 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7243 switch (code)
7245 case SET:
7246 new_out = value;
7247 break;
7249 case NOT:
7250 x = gen_rtx_AND (wmode, old_out, value);
7251 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7252 x = gen_rtx_NOT (wmode, new_out);
7253 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7254 break;
7256 case MINUS:
7257 if (CONST_INT_P (value))
7259 value = GEN_INT (-INTVAL (value));
7260 code = PLUS;
7262 /* Fall through. */
7264 default:
7265 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7266 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7267 break;
7270 aarch64_emit_store_exclusive (mode, cond, mem,
7271 gen_lowpart (mode, new_out), model_rtx);
7273 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7274 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7275 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7276 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7279 static void
7280 aarch64_print_extension (void)
7282 const struct aarch64_option_extension *opt = NULL;
7284 for (opt = all_extensions; opt->name != NULL; opt++)
7285 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7286 asm_fprintf (asm_out_file, "+%s", opt->name);
7288 asm_fprintf (asm_out_file, "\n");
7291 static void
7292 aarch64_start_file (void)
7294 if (selected_arch)
7296 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7297 aarch64_print_extension ();
7299 else if (selected_cpu)
7301 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7302 aarch64_print_extension ();
7304 default_file_start();
7307 /* Target hook for c_mode_for_suffix. */
7308 static enum machine_mode
7309 aarch64_c_mode_for_suffix (char suffix)
7311 if (suffix == 'q')
7312 return TFmode;
7314 return VOIDmode;
7317 /* We can only represent floating point constants which will fit in
7318 "quarter-precision" values. These values are characterised by
7319 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7322 (-1)^s * (n/16) * 2^r
7324 Where:
7325 's' is the sign bit.
7326 'n' is an integer in the range 16 <= n <= 31.
7327 'r' is an integer in the range -3 <= r <= 4. */
7329 /* Return true iff X can be represented by a quarter-precision
7330 floating point immediate operand X. Note, we cannot represent 0.0. */
7331 bool
7332 aarch64_float_const_representable_p (rtx x)
7334 /* This represents our current view of how many bits
7335 make up the mantissa. */
7336 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7337 int exponent;
7338 unsigned HOST_WIDE_INT mantissa, mask;
7339 HOST_WIDE_INT m1, m2;
7340 REAL_VALUE_TYPE r, m;
7342 if (!CONST_DOUBLE_P (x))
7343 return false;
7345 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7347 /* We cannot represent infinities, NaNs or +/-zero. We won't
7348 know if we have +zero until we analyse the mantissa, but we
7349 can reject the other invalid values. */
7350 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7351 || REAL_VALUE_MINUS_ZERO (r))
7352 return false;
7354 /* Extract exponent. */
7355 r = real_value_abs (&r);
7356 exponent = REAL_EXP (&r);
7358 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7359 highest (sign) bit, with a fixed binary point at bit point_pos.
7360 m1 holds the low part of the mantissa, m2 the high part.
7361 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7362 bits for the mantissa, this can fail (low bits will be lost). */
7363 real_ldexp (&m, &r, point_pos - exponent);
7364 REAL_VALUE_TO_INT (&m1, &m2, m);
7366 /* If the low part of the mantissa has bits set we cannot represent
7367 the value. */
7368 if (m1 != 0)
7369 return false;
7370 /* We have rejected the lower HOST_WIDE_INT, so update our
7371 understanding of how many bits lie in the mantissa and
7372 look only at the high HOST_WIDE_INT. */
7373 mantissa = m2;
7374 point_pos -= HOST_BITS_PER_WIDE_INT;
7376 /* We can only represent values with a mantissa of the form 1.xxxx. */
7377 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7378 if ((mantissa & mask) != 0)
7379 return false;
7381 /* Having filtered unrepresentable values, we may now remove all
7382 but the highest 5 bits. */
7383 mantissa >>= point_pos - 5;
7385 /* We cannot represent the value 0.0, so reject it. This is handled
7386 elsewhere. */
7387 if (mantissa == 0)
7388 return false;
7390 /* Then, as bit 4 is always set, we can mask it off, leaving
7391 the mantissa in the range [0, 15]. */
7392 mantissa &= ~(1 << 4);
7393 gcc_assert (mantissa <= 15);
7395 /* GCC internally does not use IEEE754-like encoding (where normalized
7396 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7397 Our mantissa values are shifted 4 places to the left relative to
7398 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7399 by 5 places to correct for GCC's representation. */
7400 exponent = 5 - exponent;
7402 return (exponent >= 0 && exponent <= 7);
7405 char*
7406 aarch64_output_simd_mov_immediate (rtx const_vector,
7407 enum machine_mode mode,
7408 unsigned width)
7410 bool is_valid;
7411 static char templ[40];
7412 const char *mnemonic;
7413 const char *shift_op;
7414 unsigned int lane_count = 0;
7415 char element_char;
7417 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7419 /* This will return true to show const_vector is legal for use as either
7420 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7421 also update INFO to show how the immediate should be generated. */
7422 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7423 gcc_assert (is_valid);
7425 element_char = sizetochar (info.element_width);
7426 lane_count = width / info.element_width;
7428 mode = GET_MODE_INNER (mode);
7429 if (mode == SFmode || mode == DFmode)
7431 gcc_assert (info.shift == 0 && ! info.mvn);
7432 if (aarch64_float_const_zero_rtx_p (info.value))
7433 info.value = GEN_INT (0);
7434 else
7436 #define buf_size 20
7437 REAL_VALUE_TYPE r;
7438 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7439 char float_buf[buf_size] = {'\0'};
7440 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7441 #undef buf_size
7443 if (lane_count == 1)
7444 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7445 else
7446 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7447 lane_count, element_char, float_buf);
7448 return templ;
7452 mnemonic = info.mvn ? "mvni" : "movi";
7453 shift_op = info.msl ? "msl" : "lsl";
7455 if (lane_count == 1)
7456 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7457 mnemonic, UINTVAL (info.value));
7458 else if (info.shift)
7459 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7460 ", %s %d", mnemonic, lane_count, element_char,
7461 UINTVAL (info.value), shift_op, info.shift);
7462 else
7463 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7464 mnemonic, lane_count, element_char, UINTVAL (info.value));
7465 return templ;
7468 char*
7469 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7470 enum machine_mode mode)
7472 enum machine_mode vmode;
7474 gcc_assert (!VECTOR_MODE_P (mode));
7475 vmode = aarch64_simd_container_mode (mode, 64);
7476 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7477 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7480 /* Split operands into moves from op[1] + op[2] into op[0]. */
7482 void
7483 aarch64_split_combinev16qi (rtx operands[3])
7485 unsigned int dest = REGNO (operands[0]);
7486 unsigned int src1 = REGNO (operands[1]);
7487 unsigned int src2 = REGNO (operands[2]);
7488 enum machine_mode halfmode = GET_MODE (operands[1]);
7489 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7490 rtx destlo, desthi;
7492 gcc_assert (halfmode == V16QImode);
7494 if (src1 == dest && src2 == dest + halfregs)
7496 /* No-op move. Can't split to nothing; emit something. */
7497 emit_note (NOTE_INSN_DELETED);
7498 return;
7501 /* Preserve register attributes for variable tracking. */
7502 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7503 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7504 GET_MODE_SIZE (halfmode));
7506 /* Special case of reversed high/low parts. */
7507 if (reg_overlap_mentioned_p (operands[2], destlo)
7508 && reg_overlap_mentioned_p (operands[1], desthi))
7510 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7511 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7512 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7514 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7516 /* Try to avoid unnecessary moves if part of the result
7517 is in the right place already. */
7518 if (src1 != dest)
7519 emit_move_insn (destlo, operands[1]);
7520 if (src2 != dest + halfregs)
7521 emit_move_insn (desthi, operands[2]);
7523 else
7525 if (src2 != dest + halfregs)
7526 emit_move_insn (desthi, operands[2]);
7527 if (src1 != dest)
7528 emit_move_insn (destlo, operands[1]);
7532 /* vec_perm support. */
7534 #define MAX_VECT_LEN 16
7536 struct expand_vec_perm_d
7538 rtx target, op0, op1;
7539 unsigned char perm[MAX_VECT_LEN];
7540 enum machine_mode vmode;
7541 unsigned char nelt;
7542 bool one_vector_p;
7543 bool testing_p;
7546 /* Generate a variable permutation. */
7548 static void
7549 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7551 enum machine_mode vmode = GET_MODE (target);
7552 bool one_vector_p = rtx_equal_p (op0, op1);
7554 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7555 gcc_checking_assert (GET_MODE (op0) == vmode);
7556 gcc_checking_assert (GET_MODE (op1) == vmode);
7557 gcc_checking_assert (GET_MODE (sel) == vmode);
7558 gcc_checking_assert (TARGET_SIMD);
7560 if (one_vector_p)
7562 if (vmode == V8QImode)
7564 /* Expand the argument to a V16QI mode by duplicating it. */
7565 rtx pair = gen_reg_rtx (V16QImode);
7566 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7567 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7569 else
7571 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7574 else
7576 rtx pair;
7578 if (vmode == V8QImode)
7580 pair = gen_reg_rtx (V16QImode);
7581 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7582 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7584 else
7586 pair = gen_reg_rtx (OImode);
7587 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7588 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7593 void
7594 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7596 enum machine_mode vmode = GET_MODE (target);
7597 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7598 bool one_vector_p = rtx_equal_p (op0, op1);
7599 rtx rmask[MAX_VECT_LEN], mask;
7601 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7603 /* The TBL instruction does not use a modulo index, so we must take care
7604 of that ourselves. */
7605 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7606 for (i = 0; i < nelt; ++i)
7607 rmask[i] = mask;
7608 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7609 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7611 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7614 /* Recognize patterns suitable for the TRN instructions. */
7615 static bool
7616 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7618 unsigned int i, odd, mask, nelt = d->nelt;
7619 rtx out, in0, in1, x;
7620 rtx (*gen) (rtx, rtx, rtx);
7621 enum machine_mode vmode = d->vmode;
7623 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7624 return false;
7626 /* Note that these are little-endian tests.
7627 We correct for big-endian later. */
7628 if (d->perm[0] == 0)
7629 odd = 0;
7630 else if (d->perm[0] == 1)
7631 odd = 1;
7632 else
7633 return false;
7634 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7636 for (i = 0; i < nelt; i += 2)
7638 if (d->perm[i] != i + odd)
7639 return false;
7640 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7641 return false;
7644 /* Success! */
7645 if (d->testing_p)
7646 return true;
7648 in0 = d->op0;
7649 in1 = d->op1;
7650 if (BYTES_BIG_ENDIAN)
7652 x = in0, in0 = in1, in1 = x;
7653 odd = !odd;
7655 out = d->target;
7657 if (odd)
7659 switch (vmode)
7661 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7662 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7663 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7664 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7665 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7666 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7667 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7668 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7669 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7670 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7671 default:
7672 return false;
7675 else
7677 switch (vmode)
7679 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7680 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7681 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7682 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7683 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7684 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7685 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7686 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7687 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7688 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7689 default:
7690 return false;
7694 emit_insn (gen (out, in0, in1));
7695 return true;
7698 /* Recognize patterns suitable for the UZP instructions. */
7699 static bool
7700 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7702 unsigned int i, odd, mask, nelt = d->nelt;
7703 rtx out, in0, in1, x;
7704 rtx (*gen) (rtx, rtx, rtx);
7705 enum machine_mode vmode = d->vmode;
7707 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7708 return false;
7710 /* Note that these are little-endian tests.
7711 We correct for big-endian later. */
7712 if (d->perm[0] == 0)
7713 odd = 0;
7714 else if (d->perm[0] == 1)
7715 odd = 1;
7716 else
7717 return false;
7718 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7720 for (i = 0; i < nelt; i++)
7722 unsigned elt = (i * 2 + odd) & mask;
7723 if (d->perm[i] != elt)
7724 return false;
7727 /* Success! */
7728 if (d->testing_p)
7729 return true;
7731 in0 = d->op0;
7732 in1 = d->op1;
7733 if (BYTES_BIG_ENDIAN)
7735 x = in0, in0 = in1, in1 = x;
7736 odd = !odd;
7738 out = d->target;
7740 if (odd)
7742 switch (vmode)
7744 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7745 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7746 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7747 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7748 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7749 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7750 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7751 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7752 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7753 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7754 default:
7755 return false;
7758 else
7760 switch (vmode)
7762 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7763 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7764 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7765 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7766 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7767 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7768 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7769 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7770 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7771 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7772 default:
7773 return false;
7777 emit_insn (gen (out, in0, in1));
7778 return true;
7781 /* Recognize patterns suitable for the ZIP instructions. */
7782 static bool
7783 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7785 unsigned int i, high, mask, nelt = d->nelt;
7786 rtx out, in0, in1, x;
7787 rtx (*gen) (rtx, rtx, rtx);
7788 enum machine_mode vmode = d->vmode;
7790 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7791 return false;
7793 /* Note that these are little-endian tests.
7794 We correct for big-endian later. */
7795 high = nelt / 2;
7796 if (d->perm[0] == high)
7797 /* Do Nothing. */
7799 else if (d->perm[0] == 0)
7800 high = 0;
7801 else
7802 return false;
7803 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7805 for (i = 0; i < nelt / 2; i++)
7807 unsigned elt = (i + high) & mask;
7808 if (d->perm[i * 2] != elt)
7809 return false;
7810 elt = (elt + nelt) & mask;
7811 if (d->perm[i * 2 + 1] != elt)
7812 return false;
7815 /* Success! */
7816 if (d->testing_p)
7817 return true;
7819 in0 = d->op0;
7820 in1 = d->op1;
7821 if (BYTES_BIG_ENDIAN)
7823 x = in0, in0 = in1, in1 = x;
7824 high = !high;
7826 out = d->target;
7828 if (high)
7830 switch (vmode)
7832 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7833 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7834 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7835 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7836 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7837 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7838 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7839 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7840 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7841 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7842 default:
7843 return false;
7846 else
7848 switch (vmode)
7850 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7851 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7852 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7853 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7854 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7855 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7856 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7857 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7858 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7859 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7860 default:
7861 return false;
7865 emit_insn (gen (out, in0, in1));
7866 return true;
7869 static bool
7870 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7872 rtx rperm[MAX_VECT_LEN], sel;
7873 enum machine_mode vmode = d->vmode;
7874 unsigned int i, nelt = d->nelt;
7876 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7877 numbering of elements for big-endian, we must reverse the order. */
7878 if (BYTES_BIG_ENDIAN)
7879 return false;
7881 if (d->testing_p)
7882 return true;
7884 /* Generic code will try constant permutation twice. Once with the
7885 original mode and again with the elements lowered to QImode.
7886 So wait and don't do the selector expansion ourselves. */
7887 if (vmode != V8QImode && vmode != V16QImode)
7888 return false;
7890 for (i = 0; i < nelt; ++i)
7891 rperm[i] = GEN_INT (d->perm[i]);
7892 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7893 sel = force_reg (vmode, sel);
7895 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7896 return true;
7899 static bool
7900 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7902 /* The pattern matching functions above are written to look for a small
7903 number to begin the sequence (0, 1, N/2). If we begin with an index
7904 from the second operand, we can swap the operands. */
7905 if (d->perm[0] >= d->nelt)
7907 unsigned i, nelt = d->nelt;
7908 rtx x;
7910 for (i = 0; i < nelt; ++i)
7911 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7913 x = d->op0;
7914 d->op0 = d->op1;
7915 d->op1 = x;
7918 if (TARGET_SIMD)
7920 if (aarch64_evpc_zip (d))
7921 return true;
7922 else if (aarch64_evpc_uzp (d))
7923 return true;
7924 else if (aarch64_evpc_trn (d))
7925 return true;
7926 return aarch64_evpc_tbl (d);
7928 return false;
7931 /* Expand a vec_perm_const pattern. */
7933 bool
7934 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7936 struct expand_vec_perm_d d;
7937 int i, nelt, which;
7939 d.target = target;
7940 d.op0 = op0;
7941 d.op1 = op1;
7943 d.vmode = GET_MODE (target);
7944 gcc_assert (VECTOR_MODE_P (d.vmode));
7945 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7946 d.testing_p = false;
7948 for (i = which = 0; i < nelt; ++i)
7950 rtx e = XVECEXP (sel, 0, i);
7951 int ei = INTVAL (e) & (2 * nelt - 1);
7952 which |= (ei < nelt ? 1 : 2);
7953 d.perm[i] = ei;
7956 switch (which)
7958 default:
7959 gcc_unreachable ();
7961 case 3:
7962 d.one_vector_p = false;
7963 if (!rtx_equal_p (op0, op1))
7964 break;
7966 /* The elements of PERM do not suggest that only the first operand
7967 is used, but both operands are identical. Allow easier matching
7968 of the permutation by folding the permutation into the single
7969 input vector. */
7970 /* Fall Through. */
7971 case 2:
7972 for (i = 0; i < nelt; ++i)
7973 d.perm[i] &= nelt - 1;
7974 d.op0 = op1;
7975 d.one_vector_p = true;
7976 break;
7978 case 1:
7979 d.op1 = op0;
7980 d.one_vector_p = true;
7981 break;
7984 return aarch64_expand_vec_perm_const_1 (&d);
7987 static bool
7988 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7989 const unsigned char *sel)
7991 struct expand_vec_perm_d d;
7992 unsigned int i, nelt, which;
7993 bool ret;
7995 d.vmode = vmode;
7996 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7997 d.testing_p = true;
7998 memcpy (d.perm, sel, nelt);
8000 /* Calculate whether all elements are in one vector. */
8001 for (i = which = 0; i < nelt; ++i)
8003 unsigned char e = d.perm[i];
8004 gcc_assert (e < 2 * nelt);
8005 which |= (e < nelt ? 1 : 2);
8008 /* If all elements are from the second vector, reindex as if from the
8009 first vector. */
8010 if (which == 2)
8011 for (i = 0; i < nelt; ++i)
8012 d.perm[i] -= nelt;
8014 /* Check whether the mask can be applied to a single vector. */
8015 d.one_vector_p = (which != 3);
8017 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8018 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8019 if (!d.one_vector_p)
8020 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8022 start_sequence ();
8023 ret = aarch64_expand_vec_perm_const_1 (&d);
8024 end_sequence ();
8026 return ret;
8029 #undef TARGET_ADDRESS_COST
8030 #define TARGET_ADDRESS_COST aarch64_address_cost
8032 /* This hook will determines whether unnamed bitfields affect the alignment
8033 of the containing structure. The hook returns true if the structure
8034 should inherit the alignment requirements of an unnamed bitfield's
8035 type. */
8036 #undef TARGET_ALIGN_ANON_BITFIELD
8037 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8039 #undef TARGET_ASM_ALIGNED_DI_OP
8040 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8042 #undef TARGET_ASM_ALIGNED_HI_OP
8043 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8045 #undef TARGET_ASM_ALIGNED_SI_OP
8046 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8048 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8049 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8050 hook_bool_const_tree_hwi_hwi_const_tree_true
8052 #undef TARGET_ASM_FILE_START
8053 #define TARGET_ASM_FILE_START aarch64_start_file
8055 #undef TARGET_ASM_OUTPUT_MI_THUNK
8056 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8058 #undef TARGET_ASM_SELECT_RTX_SECTION
8059 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8061 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8062 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8064 #undef TARGET_BUILD_BUILTIN_VA_LIST
8065 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8067 #undef TARGET_CALLEE_COPIES
8068 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8070 #undef TARGET_CAN_ELIMINATE
8071 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8073 #undef TARGET_CANNOT_FORCE_CONST_MEM
8074 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8076 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8077 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8079 /* Only the least significant bit is used for initialization guard
8080 variables. */
8081 #undef TARGET_CXX_GUARD_MASK_BIT
8082 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8084 #undef TARGET_C_MODE_FOR_SUFFIX
8085 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8087 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8088 #undef TARGET_DEFAULT_TARGET_FLAGS
8089 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8090 #endif
8092 #undef TARGET_CLASS_MAX_NREGS
8093 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8095 #undef TARGET_BUILTIN_DECL
8096 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8098 #undef TARGET_EXPAND_BUILTIN
8099 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8101 #undef TARGET_EXPAND_BUILTIN_VA_START
8102 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8104 #undef TARGET_FOLD_BUILTIN
8105 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8107 #undef TARGET_FUNCTION_ARG
8108 #define TARGET_FUNCTION_ARG aarch64_function_arg
8110 #undef TARGET_FUNCTION_ARG_ADVANCE
8111 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8113 #undef TARGET_FUNCTION_ARG_BOUNDARY
8114 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8116 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8117 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8119 #undef TARGET_FUNCTION_VALUE
8120 #define TARGET_FUNCTION_VALUE aarch64_function_value
8122 #undef TARGET_FUNCTION_VALUE_REGNO_P
8123 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8125 #undef TARGET_FRAME_POINTER_REQUIRED
8126 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8128 #undef TARGET_GIMPLE_FOLD_BUILTIN
8129 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8131 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8132 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8134 #undef TARGET_INIT_BUILTINS
8135 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8137 #undef TARGET_LEGITIMATE_ADDRESS_P
8138 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8140 #undef TARGET_LEGITIMATE_CONSTANT_P
8141 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8143 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8144 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8146 #undef TARGET_MANGLE_TYPE
8147 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8149 #undef TARGET_MEMORY_MOVE_COST
8150 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8152 #undef TARGET_MUST_PASS_IN_STACK
8153 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8155 /* This target hook should return true if accesses to volatile bitfields
8156 should use the narrowest mode possible. It should return false if these
8157 accesses should use the bitfield container type. */
8158 #undef TARGET_NARROW_VOLATILE_BITFIELD
8159 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8161 #undef TARGET_OPTION_OVERRIDE
8162 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8164 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8165 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8166 aarch64_override_options_after_change
8168 #undef TARGET_PASS_BY_REFERENCE
8169 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8171 #undef TARGET_PREFERRED_RELOAD_CLASS
8172 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8174 #undef TARGET_SECONDARY_RELOAD
8175 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8177 #undef TARGET_SHIFT_TRUNCATION_MASK
8178 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8180 #undef TARGET_SETUP_INCOMING_VARARGS
8181 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8183 #undef TARGET_STRUCT_VALUE_RTX
8184 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8186 #undef TARGET_REGISTER_MOVE_COST
8187 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8189 #undef TARGET_RETURN_IN_MEMORY
8190 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8192 #undef TARGET_RETURN_IN_MSB
8193 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8195 #undef TARGET_RTX_COSTS
8196 #define TARGET_RTX_COSTS aarch64_rtx_costs
8198 #undef TARGET_TRAMPOLINE_INIT
8199 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8201 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8202 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8204 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8205 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8207 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8208 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8210 #undef TARGET_VECTORIZE_ADD_STMT_COST
8211 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8213 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8214 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8215 aarch64_builtin_vectorization_cost
8217 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8218 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8220 #undef TARGET_VECTORIZE_BUILTINS
8221 #define TARGET_VECTORIZE_BUILTINS
8223 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8224 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8225 aarch64_builtin_vectorized_function
8227 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8228 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8229 aarch64_autovectorize_vector_sizes
8231 /* Section anchor support. */
8233 #undef TARGET_MIN_ANCHOR_OFFSET
8234 #define TARGET_MIN_ANCHOR_OFFSET -256
8236 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8237 byte offset; we can do much more for larger data types, but have no way
8238 to determine the size of the access. We assume accesses are aligned. */
8239 #undef TARGET_MAX_ANCHOR_OFFSET
8240 #define TARGET_MAX_ANCHOR_OFFSET 4095
8242 #undef TARGET_VECTOR_ALIGNMENT
8243 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8245 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8246 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8247 aarch64_simd_vector_alignment_reachable
8249 /* vec_perm support. */
8251 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8252 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8253 aarch64_vectorize_vec_perm_const_ok
8256 #undef TARGET_FIXED_CONDITION_CODE_REGS
8257 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8259 struct gcc_target targetm = TARGET_INITIALIZER;
8261 #include "gt-aarch64.h"