[AArch64] Use plus_constant.
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blobc65d8451b1dcedf7bba1856db35c6c4cc7099a5b
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
48 #include "cfgloop.h"
49 #include "tree-vectorizer.h"
51 /* Defined for convenience. */
52 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
54 /* Classifies an address.
56 ADDRESS_REG_IMM
57 A simple base register plus immediate offset.
59 ADDRESS_REG_WB
60 A base register indexed by immediate offset with writeback.
62 ADDRESS_REG_REG
63 A base register indexed by (optionally scaled) register.
65 ADDRESS_REG_UXTW
66 A base register indexed by (optionally scaled) zero-extended register.
68 ADDRESS_REG_SXTW
69 A base register indexed by (optionally scaled) sign-extended register.
71 ADDRESS_LO_SUM
72 A LO_SUM rtx with a base register and "LO12" symbol relocation.
74 ADDRESS_SYMBOLIC:
75 A constant symbolic address, in pc-relative literal pool. */
77 enum aarch64_address_type {
78 ADDRESS_REG_IMM,
79 ADDRESS_REG_WB,
80 ADDRESS_REG_REG,
81 ADDRESS_REG_UXTW,
82 ADDRESS_REG_SXTW,
83 ADDRESS_LO_SUM,
84 ADDRESS_SYMBOLIC
87 struct aarch64_address_info {
88 enum aarch64_address_type type;
89 rtx base;
90 rtx offset;
91 int shift;
92 enum aarch64_symbol_type symbol_type;
95 struct simd_immediate_info
97 rtx value;
98 int shift;
99 int element_width;
100 bool mvn;
101 bool msl;
104 /* The current code model. */
105 enum aarch64_code_model aarch64_cmodel;
107 #ifdef HAVE_AS_TLS
108 #undef TARGET_HAVE_TLS
109 #define TARGET_HAVE_TLS 1
110 #endif
112 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
113 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
114 const_tree,
115 enum machine_mode *, int *,
116 bool *);
117 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
118 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
119 static void aarch64_override_options_after_change (void);
120 static bool aarch64_vector_mode_supported_p (enum machine_mode);
121 static unsigned bit_count (unsigned HOST_WIDE_INT);
122 static bool aarch64_const_vec_all_same_int_p (rtx,
123 HOST_WIDE_INT, HOST_WIDE_INT);
125 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
126 const unsigned char *sel);
128 /* The processor for which instructions should be scheduled. */
129 enum aarch64_processor aarch64_tune = generic;
131 /* The current tuning set. */
132 const struct tune_params *aarch64_tune_params;
134 /* Mask to specify which instructions we are allowed to generate. */
135 unsigned long aarch64_isa_flags = 0;
137 /* Mask to specify which instruction scheduling options should be used. */
138 unsigned long aarch64_tune_flags = 0;
140 /* Tuning parameters. */
142 #if HAVE_DESIGNATED_INITIALIZERS
143 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
144 #else
145 #define NAMED_PARAM(NAME, VAL) (VAL)
146 #endif
148 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
149 __extension__
150 #endif
151 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
153 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
154 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
155 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
156 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
157 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
158 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
159 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
160 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
161 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
162 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
163 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
164 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
170 static const struct cpu_addrcost_table generic_addrcost_table =
172 NAMED_PARAM (pre_modify, 0),
173 NAMED_PARAM (post_modify, 0),
174 NAMED_PARAM (register_offset, 0),
175 NAMED_PARAM (register_extend, 0),
176 NAMED_PARAM (imm_offset, 0)
179 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
180 __extension__
181 #endif
182 static const struct cpu_regmove_cost generic_regmove_cost =
184 NAMED_PARAM (GP2GP, 1),
185 NAMED_PARAM (GP2FP, 2),
186 NAMED_PARAM (FP2GP, 2),
187 /* We currently do not provide direct support for TFmode Q->Q move.
188 Therefore we need to raise the cost above 2 in order to have
189 reload handle the situation. */
190 NAMED_PARAM (FP2FP, 4)
193 /* Generic costs for vector insn classes. */
194 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
195 __extension__
196 #endif
197 static const struct cpu_vector_cost generic_vector_cost =
199 NAMED_PARAM (scalar_stmt_cost, 1),
200 NAMED_PARAM (scalar_load_cost, 1),
201 NAMED_PARAM (scalar_store_cost, 1),
202 NAMED_PARAM (vec_stmt_cost, 1),
203 NAMED_PARAM (vec_to_scalar_cost, 1),
204 NAMED_PARAM (scalar_to_vec_cost, 1),
205 NAMED_PARAM (vec_align_load_cost, 1),
206 NAMED_PARAM (vec_unalign_load_cost, 1),
207 NAMED_PARAM (vec_unalign_store_cost, 1),
208 NAMED_PARAM (vec_store_cost, 1),
209 NAMED_PARAM (cond_taken_branch_cost, 3),
210 NAMED_PARAM (cond_not_taken_branch_cost, 1)
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
214 __extension__
215 #endif
216 static const struct tune_params generic_tunings =
218 &generic_rtx_cost_table,
219 &generic_addrcost_table,
220 &generic_regmove_cost,
221 &generic_vector_cost,
222 NAMED_PARAM (memmov_cost, 4)
225 /* A processor implementing AArch64. */
226 struct processor
228 const char *const name;
229 enum aarch64_processor core;
230 const char *arch;
231 const unsigned long flags;
232 const struct tune_params *const tune;
235 /* Processor cores implementing AArch64. */
236 static const struct processor all_cores[] =
238 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
239 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
240 #include "aarch64-cores.def"
241 #undef AARCH64_CORE
242 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
243 {NULL, aarch64_none, NULL, 0, NULL}
246 /* Architectures implementing AArch64. */
247 static const struct processor all_architectures[] =
249 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
250 {NAME, CORE, #ARCH, FLAGS, NULL},
251 #include "aarch64-arches.def"
252 #undef AARCH64_ARCH
253 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
254 {NULL, aarch64_none, NULL, 0, NULL}
257 /* Target specification. These are populated as commandline arguments
258 are processed, or NULL if not specified. */
259 static const struct processor *selected_arch;
260 static const struct processor *selected_cpu;
261 static const struct processor *selected_tune;
263 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
265 /* An ISA extension in the co-processor and main instruction set space. */
266 struct aarch64_option_extension
268 const char *const name;
269 const unsigned long flags_on;
270 const unsigned long flags_off;
273 /* ISA extensions in AArch64. */
274 static const struct aarch64_option_extension all_extensions[] =
276 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
277 {NAME, FLAGS_ON, FLAGS_OFF},
278 #include "aarch64-option-extensions.def"
279 #undef AARCH64_OPT_EXTENSION
280 {NULL, 0, 0}
283 /* Used to track the size of an address when generating a pre/post
284 increment address. */
285 static enum machine_mode aarch64_memory_reference_mode;
287 /* Used to force GTY into this file. */
288 static GTY(()) int gty_dummy;
290 /* A table of valid AArch64 "bitmask immediate" values for
291 logical instructions. */
293 #define AARCH64_NUM_BITMASKS 5334
294 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
296 /* Did we set flag_omit_frame_pointer just so
297 aarch64_frame_pointer_required would be called? */
298 static bool faked_omit_frame_pointer;
300 typedef enum aarch64_cond_code
302 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
303 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
304 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
306 aarch64_cc;
308 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
310 /* The condition codes of the processor, and the inverse function. */
311 static const char * const aarch64_condition_codes[] =
313 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
314 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
317 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
318 unsigned
319 aarch64_dbx_register_number (unsigned regno)
321 if (GP_REGNUM_P (regno))
322 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
323 else if (regno == SP_REGNUM)
324 return AARCH64_DWARF_SP;
325 else if (FP_REGNUM_P (regno))
326 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
328 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
329 equivalent DWARF register. */
330 return DWARF_FRAME_REGISTERS;
333 /* Return TRUE if MODE is any of the large INT modes. */
334 static bool
335 aarch64_vect_struct_mode_p (enum machine_mode mode)
337 return mode == OImode || mode == CImode || mode == XImode;
340 /* Return TRUE if MODE is any of the vector modes. */
341 static bool
342 aarch64_vector_mode_p (enum machine_mode mode)
344 return aarch64_vector_mode_supported_p (mode)
345 || aarch64_vect_struct_mode_p (mode);
348 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
349 static bool
350 aarch64_array_mode_supported_p (enum machine_mode mode,
351 unsigned HOST_WIDE_INT nelems)
353 if (TARGET_SIMD
354 && AARCH64_VALID_SIMD_QREG_MODE (mode)
355 && (nelems >= 2 && nelems <= 4))
356 return true;
358 return false;
361 /* Implement HARD_REGNO_NREGS. */
364 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
366 switch (aarch64_regno_regclass (regno))
368 case FP_REGS:
369 case FP_LO_REGS:
370 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
371 default:
372 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
374 gcc_unreachable ();
377 /* Implement HARD_REGNO_MODE_OK. */
380 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
382 if (GET_MODE_CLASS (mode) == MODE_CC)
383 return regno == CC_REGNUM;
385 if (regno == SP_REGNUM)
386 /* The purpose of comparing with ptr_mode is to support the
387 global register variable associated with the stack pointer
388 register via the syntax of asm ("wsp") in ILP32. */
389 return mode == Pmode || mode == ptr_mode;
391 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
392 return mode == Pmode;
394 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
395 return 1;
397 if (FP_REGNUM_P (regno))
399 if (aarch64_vect_struct_mode_p (mode))
400 return
401 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
402 else
403 return 1;
406 return 0;
409 /* Return true if calls to DECL should be treated as
410 long-calls (ie called via a register). */
411 static bool
412 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
414 return false;
417 /* Return true if calls to symbol-ref SYM should be treated as
418 long-calls (ie called via a register). */
419 bool
420 aarch64_is_long_call_p (rtx sym)
422 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
425 /* Return true if the offsets to a zero/sign-extract operation
426 represent an expression that matches an extend operation. The
427 operands represent the paramters from
429 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
430 bool
431 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
432 rtx extract_imm)
434 HOST_WIDE_INT mult_val, extract_val;
436 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
437 return false;
439 mult_val = INTVAL (mult_imm);
440 extract_val = INTVAL (extract_imm);
442 if (extract_val > 8
443 && extract_val < GET_MODE_BITSIZE (mode)
444 && exact_log2 (extract_val & ~7) > 0
445 && (extract_val & 7) <= 4
446 && mult_val == (1 << (extract_val & 7)))
447 return true;
449 return false;
452 /* Emit an insn that's a simple single-set. Both the operands must be
453 known to be valid. */
454 inline static rtx
455 emit_set_insn (rtx x, rtx y)
457 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
460 /* X and Y are two things to compare using CODE. Emit the compare insn and
461 return the rtx for register 0 in the proper mode. */
463 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
465 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
466 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
468 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
469 return cc_reg;
472 /* Build the SYMBOL_REF for __tls_get_addr. */
474 static GTY(()) rtx tls_get_addr_libfunc;
477 aarch64_tls_get_addr (void)
479 if (!tls_get_addr_libfunc)
480 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
481 return tls_get_addr_libfunc;
484 /* Return the TLS model to use for ADDR. */
486 static enum tls_model
487 tls_symbolic_operand_type (rtx addr)
489 enum tls_model tls_kind = TLS_MODEL_NONE;
490 rtx sym, addend;
492 if (GET_CODE (addr) == CONST)
494 split_const (addr, &sym, &addend);
495 if (GET_CODE (sym) == SYMBOL_REF)
496 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
498 else if (GET_CODE (addr) == SYMBOL_REF)
499 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
501 return tls_kind;
504 /* We'll allow lo_sum's in addresses in our legitimate addresses
505 so that combine would take care of combining addresses where
506 necessary, but for generation purposes, we'll generate the address
507 as :
508 RTL Absolute
509 tmp = hi (symbol_ref); adrp x1, foo
510 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
513 PIC TLS
514 adrp x1, :got:foo adrp tmp, :tlsgd:foo
515 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
516 bl __tls_get_addr
519 Load TLS symbol, depending on TLS mechanism and TLS access model.
521 Global Dynamic - Traditional TLS:
522 adrp tmp, :tlsgd:imm
523 add dest, tmp, #:tlsgd_lo12:imm
524 bl __tls_get_addr
526 Global Dynamic - TLS Descriptors:
527 adrp dest, :tlsdesc:imm
528 ldr tmp, [dest, #:tlsdesc_lo12:imm]
529 add dest, dest, #:tlsdesc_lo12:imm
530 blr tmp
531 mrs tp, tpidr_el0
532 add dest, dest, tp
534 Initial Exec:
535 mrs tp, tpidr_el0
536 adrp tmp, :gottprel:imm
537 ldr dest, [tmp, #:gottprel_lo12:imm]
538 add dest, dest, tp
540 Local Exec:
541 mrs tp, tpidr_el0
542 add t0, tp, #:tprel_hi12:imm
543 add t0, #:tprel_lo12_nc:imm
546 static void
547 aarch64_load_symref_appropriately (rtx dest, rtx imm,
548 enum aarch64_symbol_type type)
550 switch (type)
552 case SYMBOL_SMALL_ABSOLUTE:
554 /* In ILP32, the mode of dest can be either SImode or DImode. */
555 rtx tmp_reg = dest;
556 enum machine_mode mode = GET_MODE (dest);
558 gcc_assert (mode == Pmode || mode == ptr_mode);
560 if (can_create_pseudo_p ())
561 tmp_reg = gen_reg_rtx (mode);
563 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
564 emit_insn (gen_add_losym (dest, tmp_reg, imm));
565 return;
568 case SYMBOL_TINY_ABSOLUTE:
569 emit_insn (gen_rtx_SET (Pmode, dest, imm));
570 return;
572 case SYMBOL_SMALL_GOT:
574 /* In ILP32, the mode of dest can be either SImode or DImode,
575 while the got entry is always of SImode size. The mode of
576 dest depends on how dest is used: if dest is assigned to a
577 pointer (e.g. in the memory), it has SImode; it may have
578 DImode if dest is dereferenced to access the memeory.
579 This is why we have to handle three different ldr_got_small
580 patterns here (two patterns for ILP32). */
581 rtx tmp_reg = dest;
582 enum machine_mode mode = GET_MODE (dest);
584 if (can_create_pseudo_p ())
585 tmp_reg = gen_reg_rtx (mode);
587 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
588 if (mode == ptr_mode)
590 if (mode == DImode)
591 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
592 else
593 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
595 else
597 gcc_assert (mode == Pmode);
598 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
601 return;
604 case SYMBOL_SMALL_TLSGD:
606 rtx insns;
607 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
609 start_sequence ();
610 emit_call_insn (gen_tlsgd_small (result, imm));
611 insns = get_insns ();
612 end_sequence ();
614 RTL_CONST_CALL_P (insns) = 1;
615 emit_libcall_block (insns, dest, result, imm);
616 return;
619 case SYMBOL_SMALL_TLSDESC:
621 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
622 rtx tp;
624 emit_insn (gen_tlsdesc_small (imm));
625 tp = aarch64_load_tp (NULL);
626 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
627 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
628 return;
631 case SYMBOL_SMALL_GOTTPREL:
633 rtx tmp_reg = gen_reg_rtx (Pmode);
634 rtx tp = aarch64_load_tp (NULL);
635 emit_insn (gen_tlsie_small (tmp_reg, imm));
636 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
637 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
638 return;
641 case SYMBOL_SMALL_TPREL:
643 rtx tp = aarch64_load_tp (NULL);
644 emit_insn (gen_tlsle_small (dest, tp, imm));
645 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
646 return;
649 case SYMBOL_TINY_GOT:
650 emit_insn (gen_ldr_got_tiny (dest, imm));
651 return;
653 default:
654 gcc_unreachable ();
658 /* Emit a move from SRC to DEST. Assume that the move expanders can
659 handle all moves if !can_create_pseudo_p (). The distinction is
660 important because, unlike emit_move_insn, the move expanders know
661 how to force Pmode objects into the constant pool even when the
662 constant pool address is not itself legitimate. */
663 static rtx
664 aarch64_emit_move (rtx dest, rtx src)
666 return (can_create_pseudo_p ()
667 ? emit_move_insn (dest, src)
668 : emit_move_insn_1 (dest, src));
671 void
672 aarch64_split_128bit_move (rtx dst, rtx src)
674 rtx low_dst;
676 enum machine_mode src_mode = GET_MODE (src);
677 enum machine_mode dst_mode = GET_MODE (dst);
678 int src_regno = REGNO (src);
679 int dst_regno = REGNO (dst);
681 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
683 if (REG_P (dst) && REG_P (src))
685 gcc_assert (src_mode == TImode || src_mode == TFmode);
687 /* Handle r -> w, w -> r. */
688 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
690 switch (src_mode) {
691 case TImode:
692 emit_insn
693 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
694 emit_insn
695 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
696 return;
697 case TFmode:
698 emit_insn
699 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
700 emit_insn
701 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
702 return;
703 default:
704 gcc_unreachable ();
707 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
709 switch (src_mode) {
710 case TImode:
711 emit_insn
712 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
713 emit_insn
714 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
715 return;
716 case TFmode:
717 emit_insn
718 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
719 emit_insn
720 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
721 return;
722 default:
723 gcc_unreachable ();
726 /* Fall through to r -> r cases. */
729 switch (dst_mode) {
730 case TImode:
731 low_dst = gen_lowpart (word_mode, dst);
732 if (REG_P (low_dst)
733 && reg_overlap_mentioned_p (low_dst, src))
735 aarch64_emit_move (gen_highpart (word_mode, dst),
736 gen_highpart_mode (word_mode, TImode, src));
737 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
739 else
741 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
742 aarch64_emit_move (gen_highpart (word_mode, dst),
743 gen_highpart_mode (word_mode, TImode, src));
745 return;
746 case TFmode:
747 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
748 gen_rtx_REG (DFmode, src_regno));
749 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
750 gen_rtx_REG (DFmode, src_regno + 1));
751 return;
752 default:
753 gcc_unreachable ();
757 bool
758 aarch64_split_128bit_move_p (rtx dst, rtx src)
760 return (! REG_P (src)
761 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
764 /* Split a complex SIMD combine. */
766 void
767 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
769 enum machine_mode src_mode = GET_MODE (src1);
770 enum machine_mode dst_mode = GET_MODE (dst);
772 gcc_assert (VECTOR_MODE_P (dst_mode));
774 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
776 rtx (*gen) (rtx, rtx, rtx);
778 switch (src_mode)
780 case V8QImode:
781 gen = gen_aarch64_simd_combinev8qi;
782 break;
783 case V4HImode:
784 gen = gen_aarch64_simd_combinev4hi;
785 break;
786 case V2SImode:
787 gen = gen_aarch64_simd_combinev2si;
788 break;
789 case V2SFmode:
790 gen = gen_aarch64_simd_combinev2sf;
791 break;
792 case DImode:
793 gen = gen_aarch64_simd_combinedi;
794 break;
795 case DFmode:
796 gen = gen_aarch64_simd_combinedf;
797 break;
798 default:
799 gcc_unreachable ();
802 emit_insn (gen (dst, src1, src2));
803 return;
807 /* Split a complex SIMD move. */
809 void
810 aarch64_split_simd_move (rtx dst, rtx src)
812 enum machine_mode src_mode = GET_MODE (src);
813 enum machine_mode dst_mode = GET_MODE (dst);
815 gcc_assert (VECTOR_MODE_P (dst_mode));
817 if (REG_P (dst) && REG_P (src))
819 rtx (*gen) (rtx, rtx);
821 gcc_assert (VECTOR_MODE_P (src_mode));
823 switch (src_mode)
825 case V16QImode:
826 gen = gen_aarch64_split_simd_movv16qi;
827 break;
828 case V8HImode:
829 gen = gen_aarch64_split_simd_movv8hi;
830 break;
831 case V4SImode:
832 gen = gen_aarch64_split_simd_movv4si;
833 break;
834 case V2DImode:
835 gen = gen_aarch64_split_simd_movv2di;
836 break;
837 case V4SFmode:
838 gen = gen_aarch64_split_simd_movv4sf;
839 break;
840 case V2DFmode:
841 gen = gen_aarch64_split_simd_movv2df;
842 break;
843 default:
844 gcc_unreachable ();
847 emit_insn (gen (dst, src));
848 return;
852 static rtx
853 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
855 if (can_create_pseudo_p ())
856 return force_reg (mode, value);
857 else
859 x = aarch64_emit_move (x, value);
860 return x;
865 static rtx
866 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
868 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
870 rtx high;
871 /* Load the full offset into a register. This
872 might be improvable in the future. */
873 high = GEN_INT (offset);
874 offset = 0;
875 high = aarch64_force_temporary (mode, temp, high);
876 reg = aarch64_force_temporary (mode, temp,
877 gen_rtx_PLUS (mode, high, reg));
879 return plus_constant (mode, reg, offset);
882 void
883 aarch64_expand_mov_immediate (rtx dest, rtx imm)
885 enum machine_mode mode = GET_MODE (dest);
886 unsigned HOST_WIDE_INT mask;
887 int i;
888 bool first;
889 unsigned HOST_WIDE_INT val;
890 bool subtargets;
891 rtx subtarget;
892 int one_match, zero_match;
894 gcc_assert (mode == SImode || mode == DImode);
896 /* Check on what type of symbol it is. */
897 if (GET_CODE (imm) == SYMBOL_REF
898 || GET_CODE (imm) == LABEL_REF
899 || GET_CODE (imm) == CONST)
901 rtx mem, base, offset;
902 enum aarch64_symbol_type sty;
904 /* If we have (const (plus symbol offset)), separate out the offset
905 before we start classifying the symbol. */
906 split_const (imm, &base, &offset);
908 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
909 switch (sty)
911 case SYMBOL_FORCE_TO_MEM:
912 if (offset != const0_rtx
913 && targetm.cannot_force_const_mem (mode, imm))
915 gcc_assert(can_create_pseudo_p ());
916 base = aarch64_force_temporary (mode, dest, base);
917 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
918 aarch64_emit_move (dest, base);
919 return;
921 mem = force_const_mem (ptr_mode, imm);
922 gcc_assert (mem);
923 if (mode != ptr_mode)
924 mem = gen_rtx_ZERO_EXTEND (mode, mem);
925 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
926 return;
928 case SYMBOL_SMALL_TLSGD:
929 case SYMBOL_SMALL_TLSDESC:
930 case SYMBOL_SMALL_GOTTPREL:
931 case SYMBOL_SMALL_GOT:
932 case SYMBOL_TINY_GOT:
933 if (offset != const0_rtx)
935 gcc_assert(can_create_pseudo_p ());
936 base = aarch64_force_temporary (mode, dest, base);
937 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
938 aarch64_emit_move (dest, base);
939 return;
941 /* FALLTHRU */
943 case SYMBOL_SMALL_TPREL:
944 case SYMBOL_SMALL_ABSOLUTE:
945 case SYMBOL_TINY_ABSOLUTE:
946 aarch64_load_symref_appropriately (dest, imm, sty);
947 return;
949 default:
950 gcc_unreachable ();
954 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
956 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
957 return;
960 if (!CONST_INT_P (imm))
962 if (GET_CODE (imm) == HIGH)
963 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
964 else
966 rtx mem = force_const_mem (mode, imm);
967 gcc_assert (mem);
968 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
971 return;
974 if (mode == SImode)
976 /* We know we can't do this in 1 insn, and we must be able to do it
977 in two; so don't mess around looking for sequences that don't buy
978 us anything. */
979 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
980 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
981 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
982 return;
985 /* Remaining cases are all for DImode. */
987 val = INTVAL (imm);
988 subtargets = optimize && can_create_pseudo_p ();
990 one_match = 0;
991 zero_match = 0;
992 mask = 0xffff;
994 for (i = 0; i < 64; i += 16, mask <<= 16)
996 if ((val & mask) == 0)
997 zero_match++;
998 else if ((val & mask) == mask)
999 one_match++;
1002 if (one_match == 2)
1004 mask = 0xffff;
1005 for (i = 0; i < 64; i += 16, mask <<= 16)
1007 if ((val & mask) != mask)
1009 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1010 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1011 GEN_INT ((val >> i) & 0xffff)));
1012 return;
1015 gcc_unreachable ();
1018 if (zero_match == 2)
1019 goto simple_sequence;
1021 mask = 0x0ffff0000UL;
1022 for (i = 16; i < 64; i += 16, mask <<= 16)
1024 HOST_WIDE_INT comp = mask & ~(mask - 1);
1026 if (aarch64_uimm12_shift (val - (val & mask)))
1028 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1030 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1031 emit_insn (gen_adddi3 (dest, subtarget,
1032 GEN_INT (val - (val & mask))));
1033 return;
1035 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1037 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1039 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1040 GEN_INT ((val + comp) & mask)));
1041 emit_insn (gen_adddi3 (dest, subtarget,
1042 GEN_INT (val - ((val + comp) & mask))));
1043 return;
1045 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1047 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1049 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1050 GEN_INT ((val - comp) | ~mask)));
1051 emit_insn (gen_adddi3 (dest, subtarget,
1052 GEN_INT (val - ((val - comp) | ~mask))));
1053 return;
1055 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1057 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1059 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1060 GEN_INT (val | ~mask)));
1061 emit_insn (gen_adddi3 (dest, subtarget,
1062 GEN_INT (val - (val | ~mask))));
1063 return;
1067 /* See if we can do it by arithmetically combining two
1068 immediates. */
1069 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1071 int j;
1072 mask = 0xffff;
1074 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1075 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1077 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1078 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1079 GEN_INT (aarch64_bitmasks[i])));
1080 emit_insn (gen_adddi3 (dest, subtarget,
1081 GEN_INT (val - aarch64_bitmasks[i])));
1082 return;
1085 for (j = 0; j < 64; j += 16, mask <<= 16)
1087 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1089 emit_insn (gen_rtx_SET (VOIDmode, dest,
1090 GEN_INT (aarch64_bitmasks[i])));
1091 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1092 GEN_INT ((val >> j) & 0xffff)));
1093 return;
1098 /* See if we can do it by logically combining two immediates. */
1099 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1101 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1103 int j;
1105 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1106 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1108 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1109 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1110 GEN_INT (aarch64_bitmasks[i])));
1111 emit_insn (gen_iordi3 (dest, subtarget,
1112 GEN_INT (aarch64_bitmasks[j])));
1113 return;
1116 else if ((val & aarch64_bitmasks[i]) == val)
1118 int j;
1120 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1121 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1124 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1125 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1126 GEN_INT (aarch64_bitmasks[j])));
1127 emit_insn (gen_anddi3 (dest, subtarget,
1128 GEN_INT (aarch64_bitmasks[i])));
1129 return;
1134 simple_sequence:
1135 first = true;
1136 mask = 0xffff;
1137 for (i = 0; i < 64; i += 16, mask <<= 16)
1139 if ((val & mask) != 0)
1141 if (first)
1143 emit_insn (gen_rtx_SET (VOIDmode, dest,
1144 GEN_INT (val & mask)));
1145 first = false;
1147 else
1148 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1149 GEN_INT ((val >> i) & 0xffff)));
1154 static bool
1155 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1157 /* Indirect calls are not currently supported. */
1158 if (decl == NULL)
1159 return false;
1161 /* Cannot tail-call to long-calls, since these are outside of the
1162 range of a branch instruction (we could handle this if we added
1163 support for indirect tail-calls. */
1164 if (aarch64_decl_is_long_call_p (decl))
1165 return false;
1167 return true;
1170 /* Implement TARGET_PASS_BY_REFERENCE. */
1172 static bool
1173 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1174 enum machine_mode mode,
1175 const_tree type,
1176 bool named ATTRIBUTE_UNUSED)
1178 HOST_WIDE_INT size;
1179 enum machine_mode dummymode;
1180 int nregs;
1182 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1183 size = (mode == BLKmode && type)
1184 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1186 if (type)
1188 /* Arrays always passed by reference. */
1189 if (TREE_CODE (type) == ARRAY_TYPE)
1190 return true;
1191 /* Other aggregates based on their size. */
1192 if (AGGREGATE_TYPE_P (type))
1193 size = int_size_in_bytes (type);
1196 /* Variable sized arguments are always returned by reference. */
1197 if (size < 0)
1198 return true;
1200 /* Can this be a candidate to be passed in fp/simd register(s)? */
1201 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1202 &dummymode, &nregs,
1203 NULL))
1204 return false;
1206 /* Arguments which are variable sized or larger than 2 registers are
1207 passed by reference unless they are a homogenous floating point
1208 aggregate. */
1209 return size > 2 * UNITS_PER_WORD;
1212 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1213 static bool
1214 aarch64_return_in_msb (const_tree valtype)
1216 enum machine_mode dummy_mode;
1217 int dummy_int;
1219 /* Never happens in little-endian mode. */
1220 if (!BYTES_BIG_ENDIAN)
1221 return false;
1223 /* Only composite types smaller than or equal to 16 bytes can
1224 be potentially returned in registers. */
1225 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1226 || int_size_in_bytes (valtype) <= 0
1227 || int_size_in_bytes (valtype) > 16)
1228 return false;
1230 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1231 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1232 is always passed/returned in the least significant bits of fp/simd
1233 register(s). */
1234 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1235 &dummy_mode, &dummy_int, NULL))
1236 return false;
1238 return true;
1241 /* Implement TARGET_FUNCTION_VALUE.
1242 Define how to find the value returned by a function. */
1244 static rtx
1245 aarch64_function_value (const_tree type, const_tree func,
1246 bool outgoing ATTRIBUTE_UNUSED)
1248 enum machine_mode mode;
1249 int unsignedp;
1250 int count;
1251 enum machine_mode ag_mode;
1253 mode = TYPE_MODE (type);
1254 if (INTEGRAL_TYPE_P (type))
1255 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1257 if (aarch64_return_in_msb (type))
1259 HOST_WIDE_INT size = int_size_in_bytes (type);
1261 if (size % UNITS_PER_WORD != 0)
1263 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1264 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1268 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1269 &ag_mode, &count, NULL))
1271 if (!aarch64_composite_type_p (type, mode))
1273 gcc_assert (count == 1 && mode == ag_mode);
1274 return gen_rtx_REG (mode, V0_REGNUM);
1276 else
1278 int i;
1279 rtx par;
1281 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1282 for (i = 0; i < count; i++)
1284 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1285 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1286 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1287 XVECEXP (par, 0, i) = tmp;
1289 return par;
1292 else
1293 return gen_rtx_REG (mode, R0_REGNUM);
1296 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1297 Return true if REGNO is the number of a hard register in which the values
1298 of called function may come back. */
1300 static bool
1301 aarch64_function_value_regno_p (const unsigned int regno)
1303 /* Maximum of 16 bytes can be returned in the general registers. Examples
1304 of 16-byte return values are: 128-bit integers and 16-byte small
1305 structures (excluding homogeneous floating-point aggregates). */
1306 if (regno == R0_REGNUM || regno == R1_REGNUM)
1307 return true;
1309 /* Up to four fp/simd registers can return a function value, e.g. a
1310 homogeneous floating-point aggregate having four members. */
1311 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1312 return !TARGET_GENERAL_REGS_ONLY;
1314 return false;
1317 /* Implement TARGET_RETURN_IN_MEMORY.
1319 If the type T of the result of a function is such that
1320 void func (T arg)
1321 would require that arg be passed as a value in a register (or set of
1322 registers) according to the parameter passing rules, then the result
1323 is returned in the same registers as would be used for such an
1324 argument. */
1326 static bool
1327 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1329 HOST_WIDE_INT size;
1330 enum machine_mode ag_mode;
1331 int count;
1333 if (!AGGREGATE_TYPE_P (type)
1334 && TREE_CODE (type) != COMPLEX_TYPE
1335 && TREE_CODE (type) != VECTOR_TYPE)
1336 /* Simple scalar types always returned in registers. */
1337 return false;
1339 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1340 type,
1341 &ag_mode,
1342 &count,
1343 NULL))
1344 return false;
1346 /* Types larger than 2 registers returned in memory. */
1347 size = int_size_in_bytes (type);
1348 return (size < 0 || size > 2 * UNITS_PER_WORD);
1351 static bool
1352 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1353 const_tree type, int *nregs)
1355 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1356 return aarch64_vfp_is_call_or_return_candidate (mode,
1357 type,
1358 &pcum->aapcs_vfp_rmode,
1359 nregs,
1360 NULL);
1363 /* Given MODE and TYPE of a function argument, return the alignment in
1364 bits. The idea is to suppress any stronger alignment requested by
1365 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1366 This is a helper function for local use only. */
1368 static unsigned int
1369 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1371 unsigned int alignment;
1373 if (type)
1375 if (!integer_zerop (TYPE_SIZE (type)))
1377 if (TYPE_MODE (type) == mode)
1378 alignment = TYPE_ALIGN (type);
1379 else
1380 alignment = GET_MODE_ALIGNMENT (mode);
1382 else
1383 alignment = 0;
1385 else
1386 alignment = GET_MODE_ALIGNMENT (mode);
1388 return alignment;
1391 /* Layout a function argument according to the AAPCS64 rules. The rule
1392 numbers refer to the rule numbers in the AAPCS64. */
1394 static void
1395 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1396 const_tree type,
1397 bool named ATTRIBUTE_UNUSED)
1399 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1400 int ncrn, nvrn, nregs;
1401 bool allocate_ncrn, allocate_nvrn;
1403 /* We need to do this once per argument. */
1404 if (pcum->aapcs_arg_processed)
1405 return;
1407 pcum->aapcs_arg_processed = true;
1409 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1410 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1411 mode,
1412 type,
1413 &nregs);
1415 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1416 The following code thus handles passing by SIMD/FP registers first. */
1418 nvrn = pcum->aapcs_nvrn;
1420 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1421 and homogenous short-vector aggregates (HVA). */
1422 if (allocate_nvrn)
1424 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1426 pcum->aapcs_nextnvrn = nvrn + nregs;
1427 if (!aarch64_composite_type_p (type, mode))
1429 gcc_assert (nregs == 1);
1430 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1432 else
1434 rtx par;
1435 int i;
1436 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1437 for (i = 0; i < nregs; i++)
1439 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1440 V0_REGNUM + nvrn + i);
1441 tmp = gen_rtx_EXPR_LIST
1442 (VOIDmode, tmp,
1443 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1444 XVECEXP (par, 0, i) = tmp;
1446 pcum->aapcs_reg = par;
1448 return;
1450 else
1452 /* C.3 NSRN is set to 8. */
1453 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1454 goto on_stack;
1458 ncrn = pcum->aapcs_ncrn;
1459 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1460 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1463 /* C6 - C9. though the sign and zero extension semantics are
1464 handled elsewhere. This is the case where the argument fits
1465 entirely general registers. */
1466 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1468 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1470 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1472 /* C.8 if the argument has an alignment of 16 then the NGRN is
1473 rounded up to the next even number. */
1474 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1476 ++ncrn;
1477 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1479 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1480 A reg is still generated for it, but the caller should be smart
1481 enough not to use it. */
1482 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1484 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1486 else
1488 rtx par;
1489 int i;
1491 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1492 for (i = 0; i < nregs; i++)
1494 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1495 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1496 GEN_INT (i * UNITS_PER_WORD));
1497 XVECEXP (par, 0, i) = tmp;
1499 pcum->aapcs_reg = par;
1502 pcum->aapcs_nextncrn = ncrn + nregs;
1503 return;
1506 /* C.11 */
1507 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1509 /* The argument is passed on stack; record the needed number of words for
1510 this argument (we can re-use NREGS) and align the total size if
1511 necessary. */
1512 on_stack:
1513 pcum->aapcs_stack_words = nregs;
1514 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1515 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1516 16 / UNITS_PER_WORD) + 1;
1517 return;
1520 /* Implement TARGET_FUNCTION_ARG. */
1522 static rtx
1523 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1524 const_tree type, bool named)
1526 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1527 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1529 if (mode == VOIDmode)
1530 return NULL_RTX;
1532 aarch64_layout_arg (pcum_v, mode, type, named);
1533 return pcum->aapcs_reg;
1536 void
1537 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1538 const_tree fntype ATTRIBUTE_UNUSED,
1539 rtx libname ATTRIBUTE_UNUSED,
1540 const_tree fndecl ATTRIBUTE_UNUSED,
1541 unsigned n_named ATTRIBUTE_UNUSED)
1543 pcum->aapcs_ncrn = 0;
1544 pcum->aapcs_nvrn = 0;
1545 pcum->aapcs_nextncrn = 0;
1546 pcum->aapcs_nextnvrn = 0;
1547 pcum->pcs_variant = ARM_PCS_AAPCS64;
1548 pcum->aapcs_reg = NULL_RTX;
1549 pcum->aapcs_arg_processed = false;
1550 pcum->aapcs_stack_words = 0;
1551 pcum->aapcs_stack_size = 0;
1553 return;
1556 static void
1557 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1558 enum machine_mode mode,
1559 const_tree type,
1560 bool named)
1562 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1563 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1565 aarch64_layout_arg (pcum_v, mode, type, named);
1566 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1567 != (pcum->aapcs_stack_words != 0));
1568 pcum->aapcs_arg_processed = false;
1569 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1570 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1571 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1572 pcum->aapcs_stack_words = 0;
1573 pcum->aapcs_reg = NULL_RTX;
1577 bool
1578 aarch64_function_arg_regno_p (unsigned regno)
1580 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1581 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1584 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1585 PARM_BOUNDARY bits of alignment, but will be given anything up
1586 to STACK_BOUNDARY bits if the type requires it. This makes sure
1587 that both before and after the layout of each argument, the Next
1588 Stacked Argument Address (NSAA) will have a minimum alignment of
1589 8 bytes. */
1591 static unsigned int
1592 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1594 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1596 if (alignment < PARM_BOUNDARY)
1597 alignment = PARM_BOUNDARY;
1598 if (alignment > STACK_BOUNDARY)
1599 alignment = STACK_BOUNDARY;
1600 return alignment;
1603 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1605 Return true if an argument passed on the stack should be padded upwards,
1606 i.e. if the least-significant byte of the stack slot has useful data.
1608 Small aggregate types are placed in the lowest memory address.
1610 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1612 bool
1613 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1615 /* On little-endian targets, the least significant byte of every stack
1616 argument is passed at the lowest byte address of the stack slot. */
1617 if (!BYTES_BIG_ENDIAN)
1618 return true;
1620 /* Otherwise, integral, floating-point and pointer types are padded downward:
1621 the least significant byte of a stack argument is passed at the highest
1622 byte address of the stack slot. */
1623 if (type
1624 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1625 || POINTER_TYPE_P (type))
1626 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1627 return false;
1629 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1630 return true;
1633 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1635 It specifies padding for the last (may also be the only)
1636 element of a block move between registers and memory. If
1637 assuming the block is in the memory, padding upward means that
1638 the last element is padded after its highest significant byte,
1639 while in downward padding, the last element is padded at the
1640 its least significant byte side.
1642 Small aggregates and small complex types are always padded
1643 upwards.
1645 We don't need to worry about homogeneous floating-point or
1646 short-vector aggregates; their move is not affected by the
1647 padding direction determined here. Regardless of endianness,
1648 each element of such an aggregate is put in the least
1649 significant bits of a fp/simd register.
1651 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1652 register has useful data, and return the opposite if the most
1653 significant byte does. */
1655 bool
1656 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1657 bool first ATTRIBUTE_UNUSED)
1660 /* Small composite types are always padded upward. */
1661 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1663 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1664 : GET_MODE_SIZE (mode));
1665 if (size < 2 * UNITS_PER_WORD)
1666 return true;
1669 /* Otherwise, use the default padding. */
1670 return !BYTES_BIG_ENDIAN;
1673 static enum machine_mode
1674 aarch64_libgcc_cmp_return_mode (void)
1676 return SImode;
1679 static bool
1680 aarch64_frame_pointer_required (void)
1682 /* If the function contains dynamic stack allocations, we need to
1683 use the frame pointer to access the static parts of the frame. */
1684 if (cfun->calls_alloca)
1685 return true;
1687 /* We may have turned flag_omit_frame_pointer on in order to have this
1688 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1689 and we'll check it here.
1690 If we really did set flag_omit_frame_pointer normally, then we return false
1691 (no frame pointer required) in all cases. */
1693 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1694 return false;
1695 else if (flag_omit_leaf_frame_pointer)
1696 return !crtl->is_leaf;
1697 return true;
1700 /* Mark the registers that need to be saved by the callee and calculate
1701 the size of the callee-saved registers area and frame record (both FP
1702 and LR may be omitted). */
1703 static void
1704 aarch64_layout_frame (void)
1706 HOST_WIDE_INT offset = 0;
1707 int regno;
1709 if (reload_completed && cfun->machine->frame.laid_out)
1710 return;
1712 cfun->machine->frame.fp_lr_offset = 0;
1714 /* First mark all the registers that really need to be saved... */
1715 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1716 cfun->machine->frame.reg_offset[regno] = -1;
1718 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1719 cfun->machine->frame.reg_offset[regno] = -1;
1721 /* ... that includes the eh data registers (if needed)... */
1722 if (crtl->calls_eh_return)
1723 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1724 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1726 /* ... and any callee saved register that dataflow says is live. */
1727 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1728 if (df_regs_ever_live_p (regno)
1729 && !call_used_regs[regno])
1730 cfun->machine->frame.reg_offset[regno] = 0;
1732 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1733 if (df_regs_ever_live_p (regno)
1734 && !call_used_regs[regno])
1735 cfun->machine->frame.reg_offset[regno] = 0;
1737 if (frame_pointer_needed)
1739 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1740 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1741 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1744 /* Now assign stack slots for them. */
1745 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1746 if (cfun->machine->frame.reg_offset[regno] != -1)
1748 cfun->machine->frame.reg_offset[regno] = offset;
1749 offset += UNITS_PER_WORD;
1752 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1753 if (cfun->machine->frame.reg_offset[regno] != -1)
1755 cfun->machine->frame.reg_offset[regno] = offset;
1756 offset += UNITS_PER_WORD;
1759 if (frame_pointer_needed)
1761 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1762 offset += UNITS_PER_WORD;
1763 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1766 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1768 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1769 offset += UNITS_PER_WORD;
1770 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1773 cfun->machine->frame.padding0 =
1774 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1775 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1777 cfun->machine->frame.saved_regs_size = offset;
1778 cfun->machine->frame.laid_out = true;
1781 /* Make the last instruction frame-related and note that it performs
1782 the operation described by FRAME_PATTERN. */
1784 static void
1785 aarch64_set_frame_expr (rtx frame_pattern)
1787 rtx insn;
1789 insn = get_last_insn ();
1790 RTX_FRAME_RELATED_P (insn) = 1;
1791 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1792 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1793 frame_pattern,
1794 REG_NOTES (insn));
1797 static bool
1798 aarch64_register_saved_on_entry (int regno)
1800 return cfun->machine->frame.reg_offset[regno] != -1;
1804 static void
1805 aarch64_save_or_restore_fprs (int start_offset, int increment,
1806 bool restore, rtx base_rtx)
1809 unsigned regno;
1810 unsigned regno2;
1811 rtx insn;
1812 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1815 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1817 if (aarch64_register_saved_on_entry (regno))
1819 rtx mem;
1820 mem = gen_mem_ref (DFmode,
1821 plus_constant (Pmode,
1822 base_rtx,
1823 start_offset));
1825 for (regno2 = regno + 1;
1826 regno2 <= V31_REGNUM
1827 && !aarch64_register_saved_on_entry (regno2);
1828 regno2++)
1830 /* Empty loop. */
1832 if (regno2 <= V31_REGNUM &&
1833 aarch64_register_saved_on_entry (regno2))
1835 rtx mem2;
1836 /* Next highest register to be saved. */
1837 mem2 = gen_mem_ref (DFmode,
1838 plus_constant
1839 (Pmode,
1840 base_rtx,
1841 start_offset + increment));
1842 if (restore == false)
1844 insn = emit_insn
1845 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1846 mem2, gen_rtx_REG (DFmode, regno2)));
1849 else
1851 insn = emit_insn
1852 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1853 gen_rtx_REG (DFmode, regno2), mem2));
1855 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1856 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1859 /* The first part of a frame-related parallel insn
1860 is always assumed to be relevant to the frame
1861 calculations; subsequent parts, are only
1862 frame-related if explicitly marked. */
1863 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1864 1)) = 1;
1865 regno = regno2;
1866 start_offset += increment * 2;
1868 else
1870 if (restore == false)
1871 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1872 else
1874 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1875 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1877 start_offset += increment;
1879 RTX_FRAME_RELATED_P (insn) = 1;
1886 /* offset from the stack pointer of where the saves and
1887 restore's have to happen. */
1888 static void
1889 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1890 bool restore)
1892 rtx insn;
1893 rtx base_rtx = stack_pointer_rtx;
1894 HOST_WIDE_INT start_offset = offset;
1895 HOST_WIDE_INT increment = UNITS_PER_WORD;
1896 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1897 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1898 unsigned regno;
1899 unsigned regno2;
1901 for (regno = R0_REGNUM; regno <= limit; regno++)
1903 if (aarch64_register_saved_on_entry (regno))
1905 rtx mem;
1906 mem = gen_mem_ref (Pmode,
1907 plus_constant (Pmode,
1908 base_rtx,
1909 start_offset));
1911 for (regno2 = regno + 1;
1912 regno2 <= limit
1913 && !aarch64_register_saved_on_entry (regno2);
1914 regno2++)
1916 /* Empty loop. */
1918 if (regno2 <= limit &&
1919 aarch64_register_saved_on_entry (regno2))
1921 rtx mem2;
1922 /* Next highest register to be saved. */
1923 mem2 = gen_mem_ref (Pmode,
1924 plus_constant
1925 (Pmode,
1926 base_rtx,
1927 start_offset + increment));
1928 if (restore == false)
1930 insn = emit_insn
1931 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1932 mem2, gen_rtx_REG (DImode, regno2)));
1935 else
1937 insn = emit_insn
1938 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1939 gen_rtx_REG (DImode, regno2), mem2));
1941 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1942 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1945 /* The first part of a frame-related parallel insn
1946 is always assumed to be relevant to the frame
1947 calculations; subsequent parts, are only
1948 frame-related if explicitly marked. */
1949 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1950 1)) = 1;
1951 regno = regno2;
1952 start_offset += increment * 2;
1954 else
1956 if (restore == false)
1957 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1958 else
1960 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1961 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1963 start_offset += increment;
1965 RTX_FRAME_RELATED_P (insn) = 1;
1969 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1973 /* AArch64 stack frames generated by this compiler look like:
1975 +-------------------------------+
1977 | incoming stack arguments |
1979 +-------------------------------+ <-- arg_pointer_rtx
1981 | callee-allocated save area |
1982 | for register varargs |
1984 +-------------------------------+
1986 | local variables |
1988 +-------------------------------+ <-- frame_pointer_rtx
1990 | callee-saved registers |
1992 +-------------------------------+
1993 | LR' |
1994 +-------------------------------+
1995 | FP' |
1996 P +-------------------------------+ <-- hard_frame_pointer_rtx
1997 | dynamic allocation |
1998 +-------------------------------+
2000 | outgoing stack arguments |
2002 +-------------------------------+ <-- stack_pointer_rtx
2004 Dynamic stack allocations such as alloca insert data at point P.
2005 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2006 hard_frame_pointer_rtx unchanged. */
2008 /* Generate the prologue instructions for entry into a function.
2009 Establish the stack frame by decreasing the stack pointer with a
2010 properly calculated size and, if necessary, create a frame record
2011 filled with the values of LR and previous frame pointer. The
2012 current FP is also set up is it is in use. */
2014 void
2015 aarch64_expand_prologue (void)
2017 /* sub sp, sp, #<frame_size>
2018 stp {fp, lr}, [sp, #<frame_size> - 16]
2019 add fp, sp, #<frame_size> - hardfp_offset
2020 stp {cs_reg}, [fp, #-16] etc.
2022 sub sp, sp, <final_adjustment_if_any>
2024 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2025 HOST_WIDE_INT frame_size, offset;
2026 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2027 rtx insn;
2029 aarch64_layout_frame ();
2030 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2031 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2032 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2033 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2034 + crtl->outgoing_args_size);
2035 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2036 STACK_BOUNDARY / BITS_PER_UNIT);
2038 if (flag_stack_usage_info)
2039 current_function_static_stack_size = frame_size;
2041 fp_offset = (offset
2042 - original_frame_size
2043 - cfun->machine->frame.saved_regs_size);
2045 /* Store pairs and load pairs have a range only -512 to 504. */
2046 if (offset >= 512)
2048 /* When the frame has a large size, an initial decrease is done on
2049 the stack pointer to jump over the callee-allocated save area for
2050 register varargs, the local variable area and/or the callee-saved
2051 register area. This will allow the pre-index write-back
2052 store pair instructions to be used for setting up the stack frame
2053 efficiently. */
2054 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2055 if (offset >= 512)
2056 offset = cfun->machine->frame.saved_regs_size;
2058 frame_size -= (offset + crtl->outgoing_args_size);
2059 fp_offset = 0;
2061 if (frame_size >= 0x1000000)
2063 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2064 emit_move_insn (op0, GEN_INT (-frame_size));
2065 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2066 aarch64_set_frame_expr (gen_rtx_SET
2067 (Pmode, stack_pointer_rtx,
2068 plus_constant (Pmode,
2069 stack_pointer_rtx,
2070 -frame_size)));
2072 else if (frame_size > 0)
2074 if ((frame_size & 0xfff) != frame_size)
2076 insn = emit_insn (gen_add2_insn
2077 (stack_pointer_rtx,
2078 GEN_INT (-(frame_size
2079 & ~(HOST_WIDE_INT)0xfff))));
2080 RTX_FRAME_RELATED_P (insn) = 1;
2082 if ((frame_size & 0xfff) != 0)
2084 insn = emit_insn (gen_add2_insn
2085 (stack_pointer_rtx,
2086 GEN_INT (-(frame_size
2087 & (HOST_WIDE_INT)0xfff))));
2088 RTX_FRAME_RELATED_P (insn) = 1;
2092 else
2093 frame_size = -1;
2095 if (offset > 0)
2097 /* Save the frame pointer and lr if the frame pointer is needed
2098 first. Make the frame pointer point to the location of the
2099 old frame pointer on the stack. */
2100 if (frame_pointer_needed)
2102 rtx mem_fp, mem_lr;
2104 if (fp_offset)
2106 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2107 GEN_INT (-offset)));
2108 RTX_FRAME_RELATED_P (insn) = 1;
2109 aarch64_set_frame_expr (gen_rtx_SET
2110 (Pmode, stack_pointer_rtx,
2111 gen_rtx_MINUS (Pmode,
2112 stack_pointer_rtx,
2113 GEN_INT (offset))));
2114 mem_fp = gen_frame_mem (DImode,
2115 plus_constant (Pmode,
2116 stack_pointer_rtx,
2117 fp_offset));
2118 mem_lr = gen_frame_mem (DImode,
2119 plus_constant (Pmode,
2120 stack_pointer_rtx,
2121 fp_offset
2122 + UNITS_PER_WORD));
2123 insn = emit_insn (gen_store_pairdi (mem_fp,
2124 hard_frame_pointer_rtx,
2125 mem_lr,
2126 gen_rtx_REG (DImode,
2127 LR_REGNUM)));
2129 else
2131 insn = emit_insn (gen_storewb_pairdi_di
2132 (stack_pointer_rtx, stack_pointer_rtx,
2133 hard_frame_pointer_rtx,
2134 gen_rtx_REG (DImode, LR_REGNUM),
2135 GEN_INT (-offset),
2136 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2137 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2140 /* The first part of a frame-related parallel insn is always
2141 assumed to be relevant to the frame calculations;
2142 subsequent parts, are only frame-related if explicitly
2143 marked. */
2144 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2145 RTX_FRAME_RELATED_P (insn) = 1;
2147 /* Set up frame pointer to point to the location of the
2148 previous frame pointer on the stack. */
2149 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2150 stack_pointer_rtx,
2151 GEN_INT (fp_offset)));
2152 aarch64_set_frame_expr (gen_rtx_SET
2153 (Pmode, hard_frame_pointer_rtx,
2154 plus_constant (Pmode,
2155 stack_pointer_rtx,
2156 fp_offset)));
2157 RTX_FRAME_RELATED_P (insn) = 1;
2158 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2159 hard_frame_pointer_rtx));
2161 else
2163 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2164 GEN_INT (-offset)));
2165 RTX_FRAME_RELATED_P (insn) = 1;
2168 aarch64_save_or_restore_callee_save_registers
2169 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2172 /* when offset >= 512,
2173 sub sp, sp, #<outgoing_args_size> */
2174 if (frame_size > -1)
2176 if (crtl->outgoing_args_size > 0)
2178 insn = emit_insn (gen_add2_insn
2179 (stack_pointer_rtx,
2180 GEN_INT (- crtl->outgoing_args_size)));
2181 RTX_FRAME_RELATED_P (insn) = 1;
2186 /* Generate the epilogue instructions for returning from a function. */
2187 void
2188 aarch64_expand_epilogue (bool for_sibcall)
2190 HOST_WIDE_INT original_frame_size, frame_size, offset;
2191 HOST_WIDE_INT fp_offset;
2192 rtx insn;
2193 rtx cfa_reg;
2195 aarch64_layout_frame ();
2196 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198 + crtl->outgoing_args_size);
2199 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200 STACK_BOUNDARY / BITS_PER_UNIT);
2202 fp_offset = (offset
2203 - original_frame_size
2204 - cfun->machine->frame.saved_regs_size);
2206 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2208 /* Store pairs and load pairs have a range only -512 to 504. */
2209 if (offset >= 512)
2211 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2212 if (offset >= 512)
2213 offset = cfun->machine->frame.saved_regs_size;
2215 frame_size -= (offset + crtl->outgoing_args_size);
2216 fp_offset = 0;
2217 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2219 insn = emit_insn (gen_add2_insn
2220 (stack_pointer_rtx,
2221 GEN_INT (crtl->outgoing_args_size)));
2222 RTX_FRAME_RELATED_P (insn) = 1;
2225 else
2226 frame_size = -1;
2228 /* If there were outgoing arguments or we've done dynamic stack
2229 allocation, then restore the stack pointer from the frame
2230 pointer. This is at most one insn and more efficient than using
2231 GCC's internal mechanism. */
2232 if (frame_pointer_needed
2233 && (crtl->outgoing_args_size || cfun->calls_alloca))
2235 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2236 hard_frame_pointer_rtx,
2237 GEN_INT (- fp_offset)));
2238 RTX_FRAME_RELATED_P (insn) = 1;
2239 /* As SP is set to (FP - fp_offset), according to the rules in
2240 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2241 from the value of SP from now on. */
2242 cfa_reg = stack_pointer_rtx;
2245 aarch64_save_or_restore_callee_save_registers
2246 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2248 /* Restore the frame pointer and lr if the frame pointer is needed. */
2249 if (offset > 0)
2251 if (frame_pointer_needed)
2253 rtx mem_fp, mem_lr;
2255 if (fp_offset)
2257 mem_fp = gen_frame_mem (DImode,
2258 plus_constant (Pmode,
2259 stack_pointer_rtx,
2260 fp_offset));
2261 mem_lr = gen_frame_mem (DImode,
2262 plus_constant (Pmode,
2263 stack_pointer_rtx,
2264 fp_offset
2265 + UNITS_PER_WORD));
2266 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2267 mem_fp,
2268 gen_rtx_REG (DImode,
2269 LR_REGNUM),
2270 mem_lr));
2272 else
2274 insn = emit_insn (gen_loadwb_pairdi_di
2275 (stack_pointer_rtx,
2276 stack_pointer_rtx,
2277 hard_frame_pointer_rtx,
2278 gen_rtx_REG (DImode, LR_REGNUM),
2279 GEN_INT (offset),
2280 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2281 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2282 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2283 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2284 plus_constant (Pmode, cfa_reg,
2285 offset))));
2288 /* The first part of a frame-related parallel insn
2289 is always assumed to be relevant to the frame
2290 calculations; subsequent parts, are only
2291 frame-related if explicitly marked. */
2292 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2293 RTX_FRAME_RELATED_P (insn) = 1;
2294 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2295 add_reg_note (insn, REG_CFA_RESTORE,
2296 gen_rtx_REG (DImode, LR_REGNUM));
2298 if (fp_offset)
2300 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2301 GEN_INT (offset)));
2302 RTX_FRAME_RELATED_P (insn) = 1;
2305 else
2307 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2308 GEN_INT (offset)));
2309 RTX_FRAME_RELATED_P (insn) = 1;
2313 /* Stack adjustment for exception handler. */
2314 if (crtl->calls_eh_return)
2316 /* We need to unwind the stack by the offset computed by
2317 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2318 based on SP. Ideally we would update the SP and define the
2319 CFA along the lines of:
2321 SP = SP + EH_RETURN_STACKADJ_RTX
2322 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2324 However the dwarf emitter only understands a constant
2325 register offset.
2327 The solution chosen here is to use the otherwise unused IP0
2328 as a temporary register to hold the current SP value. The
2329 CFA is described using IP0 then SP is modified. */
2331 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2333 insn = emit_move_insn (ip0, stack_pointer_rtx);
2334 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2335 RTX_FRAME_RELATED_P (insn) = 1;
2337 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2339 /* Ensure the assignment to IP0 does not get optimized away. */
2340 emit_use (ip0);
2343 if (frame_size > -1)
2345 if (frame_size >= 0x1000000)
2347 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2348 emit_move_insn (op0, GEN_INT (frame_size));
2349 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2350 aarch64_set_frame_expr (gen_rtx_SET
2351 (Pmode, stack_pointer_rtx,
2352 plus_constant (Pmode,
2353 stack_pointer_rtx,
2354 frame_size)));
2356 else if (frame_size > 0)
2358 if ((frame_size & 0xfff) != 0)
2360 insn = emit_insn (gen_add2_insn
2361 (stack_pointer_rtx,
2362 GEN_INT ((frame_size
2363 & (HOST_WIDE_INT) 0xfff))));
2364 RTX_FRAME_RELATED_P (insn) = 1;
2366 if ((frame_size & 0xfff) != frame_size)
2368 insn = emit_insn (gen_add2_insn
2369 (stack_pointer_rtx,
2370 GEN_INT ((frame_size
2371 & ~ (HOST_WIDE_INT) 0xfff))));
2372 RTX_FRAME_RELATED_P (insn) = 1;
2376 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2377 plus_constant (Pmode,
2378 stack_pointer_rtx,
2379 offset)));
2382 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2383 if (!for_sibcall)
2384 emit_jump_insn (ret_rtx);
2387 /* Return the place to copy the exception unwinding return address to.
2388 This will probably be a stack slot, but could (in theory be the
2389 return register). */
2391 aarch64_final_eh_return_addr (void)
2393 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2394 aarch64_layout_frame ();
2395 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2396 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2397 + crtl->outgoing_args_size);
2398 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2399 STACK_BOUNDARY / BITS_PER_UNIT);
2400 fp_offset = offset
2401 - original_frame_size
2402 - cfun->machine->frame.saved_regs_size;
2404 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2405 return gen_rtx_REG (DImode, LR_REGNUM);
2407 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2408 result in a store to save LR introduced by builtin_eh_return () being
2409 incorrectly deleted because the alias is not detected.
2410 So in the calculation of the address to copy the exception unwinding
2411 return address to, we note 2 cases.
2412 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2413 we return a SP-relative location since all the addresses are SP-relative
2414 in this case. This prevents the store from being optimized away.
2415 If the fp_offset is not 0, then the addresses will be FP-relative and
2416 therefore we return a FP-relative location. */
2418 if (frame_pointer_needed)
2420 if (fp_offset)
2421 return gen_frame_mem (DImode,
2422 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2423 else
2424 return gen_frame_mem (DImode,
2425 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2428 /* If FP is not needed, we calculate the location of LR, which would be
2429 at the top of the saved registers block. */
2431 return gen_frame_mem (DImode,
2432 plus_constant (Pmode,
2433 stack_pointer_rtx,
2434 fp_offset
2435 + cfun->machine->frame.saved_regs_size
2436 - 2 * UNITS_PER_WORD));
2439 /* Output code to build up a constant in a register. */
2440 static void
2441 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2443 if (aarch64_bitmask_imm (val, DImode))
2444 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2445 else
2447 int i;
2448 int ncount = 0;
2449 int zcount = 0;
2450 HOST_WIDE_INT valp = val >> 16;
2451 HOST_WIDE_INT valm;
2452 HOST_WIDE_INT tval;
2454 for (i = 16; i < 64; i += 16)
2456 valm = (valp & 0xffff);
2458 if (valm != 0)
2459 ++ zcount;
2461 if (valm != 0xffff)
2462 ++ ncount;
2464 valp >>= 16;
2467 /* zcount contains the number of additional MOVK instructions
2468 required if the constant is built up with an initial MOVZ instruction,
2469 while ncount is the number of MOVK instructions required if starting
2470 with a MOVN instruction. Choose the sequence that yields the fewest
2471 number of instructions, preferring MOVZ instructions when they are both
2472 the same. */
2473 if (ncount < zcount)
2475 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2476 GEN_INT ((~val) & 0xffff));
2477 tval = 0xffff;
2479 else
2481 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2482 GEN_INT (val & 0xffff));
2483 tval = 0;
2486 val >>= 16;
2488 for (i = 16; i < 64; i += 16)
2490 if ((val & 0xffff) != tval)
2491 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2492 GEN_INT (i), GEN_INT (val & 0xffff)));
2493 val >>= 16;
2498 static void
2499 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2501 HOST_WIDE_INT mdelta = delta;
2502 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2503 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2505 if (mdelta < 0)
2506 mdelta = -mdelta;
2508 if (mdelta >= 4096 * 4096)
2510 aarch64_build_constant (scratchreg, delta);
2511 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2513 else if (mdelta > 0)
2515 if (mdelta >= 4096)
2517 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2518 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2519 if (delta < 0)
2520 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2521 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2522 else
2523 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2524 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2526 if (mdelta % 4096 != 0)
2528 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2529 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2530 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2535 /* Output code to add DELTA to the first argument, and then jump
2536 to FUNCTION. Used for C++ multiple inheritance. */
2537 static void
2538 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2539 HOST_WIDE_INT delta,
2540 HOST_WIDE_INT vcall_offset,
2541 tree function)
2543 /* The this pointer is always in x0. Note that this differs from
2544 Arm where the this pointer maybe bumped to r1 if r0 is required
2545 to return a pointer to an aggregate. On AArch64 a result value
2546 pointer will be in x8. */
2547 int this_regno = R0_REGNUM;
2548 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2550 reload_completed = 1;
2551 emit_note (NOTE_INSN_PROLOGUE_END);
2553 if (vcall_offset == 0)
2554 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2555 else
2557 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2559 this_rtx = gen_rtx_REG (Pmode, this_regno);
2560 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2561 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2563 addr = this_rtx;
2564 if (delta != 0)
2566 if (delta >= -256 && delta < 256)
2567 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2568 plus_constant (Pmode, this_rtx, delta));
2569 else
2570 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2573 if (Pmode == ptr_mode)
2574 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2575 else
2576 aarch64_emit_move (temp0,
2577 gen_rtx_ZERO_EXTEND (Pmode,
2578 gen_rtx_MEM (ptr_mode, addr)));
2580 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2581 addr = plus_constant (Pmode, temp0, vcall_offset);
2582 else
2584 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2585 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2588 if (Pmode == ptr_mode)
2589 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2590 else
2591 aarch64_emit_move (temp1,
2592 gen_rtx_SIGN_EXTEND (Pmode,
2593 gen_rtx_MEM (ptr_mode, addr)));
2595 emit_insn (gen_add2_insn (this_rtx, temp1));
2598 /* Generate a tail call to the target function. */
2599 if (!TREE_USED (function))
2601 assemble_external (function);
2602 TREE_USED (function) = 1;
2604 funexp = XEXP (DECL_RTL (function), 0);
2605 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2606 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2607 SIBLING_CALL_P (insn) = 1;
2609 insn = get_insns ();
2610 shorten_branches (insn);
2611 final_start_function (insn, file, 1);
2612 final (insn, file, 1);
2613 final_end_function ();
2615 /* Stop pretending to be a post-reload pass. */
2616 reload_completed = 0;
2619 static int
2620 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2622 if (GET_CODE (*x) == SYMBOL_REF)
2623 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2625 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2626 TLS offsets, not real symbol references. */
2627 if (GET_CODE (*x) == UNSPEC
2628 && XINT (*x, 1) == UNSPEC_TLS)
2629 return -1;
2631 return 0;
2634 static bool
2635 aarch64_tls_referenced_p (rtx x)
2637 if (!TARGET_HAVE_TLS)
2638 return false;
2640 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2644 static int
2645 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2647 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2648 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2650 if (*imm1 < *imm2)
2651 return -1;
2652 if (*imm1 > *imm2)
2653 return +1;
2654 return 0;
2658 static void
2659 aarch64_build_bitmask_table (void)
2661 unsigned HOST_WIDE_INT mask, imm;
2662 unsigned int log_e, e, s, r;
2663 unsigned int nimms = 0;
2665 for (log_e = 1; log_e <= 6; log_e++)
2667 e = 1 << log_e;
2668 if (e == 64)
2669 mask = ~(HOST_WIDE_INT) 0;
2670 else
2671 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2672 for (s = 1; s < e; s++)
2674 for (r = 0; r < e; r++)
2676 /* set s consecutive bits to 1 (s < 64) */
2677 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2678 /* rotate right by r */
2679 if (r != 0)
2680 imm = ((imm >> r) | (imm << (e - r))) & mask;
2681 /* replicate the constant depending on SIMD size */
2682 switch (log_e) {
2683 case 1: imm |= (imm << 2);
2684 case 2: imm |= (imm << 4);
2685 case 3: imm |= (imm << 8);
2686 case 4: imm |= (imm << 16);
2687 case 5: imm |= (imm << 32);
2688 case 6:
2689 break;
2690 default:
2691 gcc_unreachable ();
2693 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2694 aarch64_bitmasks[nimms++] = imm;
2699 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2700 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2701 aarch64_bitmasks_cmp);
2705 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2706 a left shift of 0 or 12 bits. */
2707 bool
2708 aarch64_uimm12_shift (HOST_WIDE_INT val)
2710 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2711 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2716 /* Return true if val is an immediate that can be loaded into a
2717 register by a MOVZ instruction. */
2718 static bool
2719 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2721 if (GET_MODE_SIZE (mode) > 4)
2723 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2724 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2725 return 1;
2727 else
2729 /* Ignore sign extension. */
2730 val &= (HOST_WIDE_INT) 0xffffffff;
2732 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2733 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2737 /* Return true if val is a valid bitmask immediate. */
2738 bool
2739 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2741 if (GET_MODE_SIZE (mode) < 8)
2743 /* Replicate bit pattern. */
2744 val &= (HOST_WIDE_INT) 0xffffffff;
2745 val |= val << 32;
2747 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2748 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2752 /* Return true if val is an immediate that can be loaded into a
2753 register in a single instruction. */
2754 bool
2755 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2757 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2758 return 1;
2759 return aarch64_bitmask_imm (val, mode);
2762 static bool
2763 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2765 rtx base, offset;
2767 if (GET_CODE (x) == HIGH)
2768 return true;
2770 split_const (x, &base, &offset);
2771 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2773 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2774 != SYMBOL_FORCE_TO_MEM)
2775 return true;
2776 else
2777 /* Avoid generating a 64-bit relocation in ILP32; leave
2778 to aarch64_expand_mov_immediate to handle it properly. */
2779 return mode != ptr_mode;
2782 return aarch64_tls_referenced_p (x);
2785 /* Return true if register REGNO is a valid index register.
2786 STRICT_P is true if REG_OK_STRICT is in effect. */
2788 bool
2789 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2791 if (!HARD_REGISTER_NUM_P (regno))
2793 if (!strict_p)
2794 return true;
2796 if (!reg_renumber)
2797 return false;
2799 regno = reg_renumber[regno];
2801 return GP_REGNUM_P (regno);
2804 /* Return true if register REGNO is a valid base register for mode MODE.
2805 STRICT_P is true if REG_OK_STRICT is in effect. */
2807 bool
2808 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2810 if (!HARD_REGISTER_NUM_P (regno))
2812 if (!strict_p)
2813 return true;
2815 if (!reg_renumber)
2816 return false;
2818 regno = reg_renumber[regno];
2821 /* The fake registers will be eliminated to either the stack or
2822 hard frame pointer, both of which are usually valid base registers.
2823 Reload deals with the cases where the eliminated form isn't valid. */
2824 return (GP_REGNUM_P (regno)
2825 || regno == SP_REGNUM
2826 || regno == FRAME_POINTER_REGNUM
2827 || regno == ARG_POINTER_REGNUM);
2830 /* Return true if X is a valid base register for mode MODE.
2831 STRICT_P is true if REG_OK_STRICT is in effect. */
2833 static bool
2834 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2836 if (!strict_p && GET_CODE (x) == SUBREG)
2837 x = SUBREG_REG (x);
2839 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2842 /* Return true if address offset is a valid index. If it is, fill in INFO
2843 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2845 static bool
2846 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2847 enum machine_mode mode, bool strict_p)
2849 enum aarch64_address_type type;
2850 rtx index;
2851 int shift;
2853 /* (reg:P) */
2854 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2855 && GET_MODE (x) == Pmode)
2857 type = ADDRESS_REG_REG;
2858 index = x;
2859 shift = 0;
2861 /* (sign_extend:DI (reg:SI)) */
2862 else if ((GET_CODE (x) == SIGN_EXTEND
2863 || GET_CODE (x) == ZERO_EXTEND)
2864 && GET_MODE (x) == DImode
2865 && GET_MODE (XEXP (x, 0)) == SImode)
2867 type = (GET_CODE (x) == SIGN_EXTEND)
2868 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2869 index = XEXP (x, 0);
2870 shift = 0;
2872 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2873 else if (GET_CODE (x) == MULT
2874 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2875 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2876 && GET_MODE (XEXP (x, 0)) == DImode
2877 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2878 && CONST_INT_P (XEXP (x, 1)))
2880 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2881 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2882 index = XEXP (XEXP (x, 0), 0);
2883 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2885 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2886 else if (GET_CODE (x) == ASHIFT
2887 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2888 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2889 && GET_MODE (XEXP (x, 0)) == DImode
2890 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2891 && CONST_INT_P (XEXP (x, 1)))
2893 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2894 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2895 index = XEXP (XEXP (x, 0), 0);
2896 shift = INTVAL (XEXP (x, 1));
2898 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2899 else if ((GET_CODE (x) == SIGN_EXTRACT
2900 || GET_CODE (x) == ZERO_EXTRACT)
2901 && GET_MODE (x) == DImode
2902 && GET_CODE (XEXP (x, 0)) == MULT
2903 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2904 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2906 type = (GET_CODE (x) == SIGN_EXTRACT)
2907 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2908 index = XEXP (XEXP (x, 0), 0);
2909 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2910 if (INTVAL (XEXP (x, 1)) != 32 + shift
2911 || INTVAL (XEXP (x, 2)) != 0)
2912 shift = -1;
2914 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2915 (const_int 0xffffffff<<shift)) */
2916 else if (GET_CODE (x) == AND
2917 && GET_MODE (x) == DImode
2918 && GET_CODE (XEXP (x, 0)) == MULT
2919 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2920 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2921 && CONST_INT_P (XEXP (x, 1)))
2923 type = ADDRESS_REG_UXTW;
2924 index = XEXP (XEXP (x, 0), 0);
2925 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2926 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2927 shift = -1;
2929 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2930 else if ((GET_CODE (x) == SIGN_EXTRACT
2931 || GET_CODE (x) == ZERO_EXTRACT)
2932 && GET_MODE (x) == DImode
2933 && GET_CODE (XEXP (x, 0)) == ASHIFT
2934 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2935 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2937 type = (GET_CODE (x) == SIGN_EXTRACT)
2938 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2939 index = XEXP (XEXP (x, 0), 0);
2940 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2941 if (INTVAL (XEXP (x, 1)) != 32 + shift
2942 || INTVAL (XEXP (x, 2)) != 0)
2943 shift = -1;
2945 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2946 (const_int 0xffffffff<<shift)) */
2947 else if (GET_CODE (x) == AND
2948 && GET_MODE (x) == DImode
2949 && GET_CODE (XEXP (x, 0)) == ASHIFT
2950 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2951 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2952 && CONST_INT_P (XEXP (x, 1)))
2954 type = ADDRESS_REG_UXTW;
2955 index = XEXP (XEXP (x, 0), 0);
2956 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2957 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2958 shift = -1;
2960 /* (mult:P (reg:P) (const_int scale)) */
2961 else if (GET_CODE (x) == MULT
2962 && GET_MODE (x) == Pmode
2963 && GET_MODE (XEXP (x, 0)) == Pmode
2964 && CONST_INT_P (XEXP (x, 1)))
2966 type = ADDRESS_REG_REG;
2967 index = XEXP (x, 0);
2968 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2970 /* (ashift:P (reg:P) (const_int shift)) */
2971 else if (GET_CODE (x) == ASHIFT
2972 && GET_MODE (x) == Pmode
2973 && GET_MODE (XEXP (x, 0)) == Pmode
2974 && CONST_INT_P (XEXP (x, 1)))
2976 type = ADDRESS_REG_REG;
2977 index = XEXP (x, 0);
2978 shift = INTVAL (XEXP (x, 1));
2980 else
2981 return false;
2983 if (GET_CODE (index) == SUBREG)
2984 index = SUBREG_REG (index);
2986 if ((shift == 0 ||
2987 (shift > 0 && shift <= 3
2988 && (1 << shift) == GET_MODE_SIZE (mode)))
2989 && REG_P (index)
2990 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2992 info->type = type;
2993 info->offset = index;
2994 info->shift = shift;
2995 return true;
2998 return false;
3001 static inline bool
3002 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3004 return (offset >= -64 * GET_MODE_SIZE (mode)
3005 && offset < 64 * GET_MODE_SIZE (mode)
3006 && offset % GET_MODE_SIZE (mode) == 0);
3009 static inline bool
3010 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3011 HOST_WIDE_INT offset)
3013 return offset >= -256 && offset < 256;
3016 static inline bool
3017 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3019 return (offset >= 0
3020 && offset < 4096 * GET_MODE_SIZE (mode)
3021 && offset % GET_MODE_SIZE (mode) == 0);
3024 /* Return true if X is a valid address for machine mode MODE. If it is,
3025 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3026 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3028 static bool
3029 aarch64_classify_address (struct aarch64_address_info *info,
3030 rtx x, enum machine_mode mode,
3031 RTX_CODE outer_code, bool strict_p)
3033 enum rtx_code code = GET_CODE (x);
3034 rtx op0, op1;
3035 bool allow_reg_index_p =
3036 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3038 /* Don't support anything other than POST_INC or REG addressing for
3039 AdvSIMD. */
3040 if (aarch64_vector_mode_p (mode)
3041 && (code != POST_INC && code != REG))
3042 return false;
3044 switch (code)
3046 case REG:
3047 case SUBREG:
3048 info->type = ADDRESS_REG_IMM;
3049 info->base = x;
3050 info->offset = const0_rtx;
3051 return aarch64_base_register_rtx_p (x, strict_p);
3053 case PLUS:
3054 op0 = XEXP (x, 0);
3055 op1 = XEXP (x, 1);
3056 if (GET_MODE_SIZE (mode) != 0
3057 && CONST_INT_P (op1)
3058 && aarch64_base_register_rtx_p (op0, strict_p))
3060 HOST_WIDE_INT offset = INTVAL (op1);
3062 info->type = ADDRESS_REG_IMM;
3063 info->base = op0;
3064 info->offset = op1;
3066 /* TImode and TFmode values are allowed in both pairs of X
3067 registers and individual Q registers. The available
3068 address modes are:
3069 X,X: 7-bit signed scaled offset
3070 Q: 9-bit signed offset
3071 We conservatively require an offset representable in either mode.
3073 if (mode == TImode || mode == TFmode)
3074 return (offset_7bit_signed_scaled_p (mode, offset)
3075 && offset_9bit_signed_unscaled_p (mode, offset));
3077 if (outer_code == PARALLEL)
3078 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3079 && offset_7bit_signed_scaled_p (mode, offset));
3080 else
3081 return (offset_9bit_signed_unscaled_p (mode, offset)
3082 || offset_12bit_unsigned_scaled_p (mode, offset));
3085 if (allow_reg_index_p)
3087 /* Look for base + (scaled/extended) index register. */
3088 if (aarch64_base_register_rtx_p (op0, strict_p)
3089 && aarch64_classify_index (info, op1, mode, strict_p))
3091 info->base = op0;
3092 return true;
3094 if (aarch64_base_register_rtx_p (op1, strict_p)
3095 && aarch64_classify_index (info, op0, mode, strict_p))
3097 info->base = op1;
3098 return true;
3102 return false;
3104 case POST_INC:
3105 case POST_DEC:
3106 case PRE_INC:
3107 case PRE_DEC:
3108 info->type = ADDRESS_REG_WB;
3109 info->base = XEXP (x, 0);
3110 info->offset = NULL_RTX;
3111 return aarch64_base_register_rtx_p (info->base, strict_p);
3113 case POST_MODIFY:
3114 case PRE_MODIFY:
3115 info->type = ADDRESS_REG_WB;
3116 info->base = XEXP (x, 0);
3117 if (GET_CODE (XEXP (x, 1)) == PLUS
3118 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3119 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3120 && aarch64_base_register_rtx_p (info->base, strict_p))
3122 HOST_WIDE_INT offset;
3123 info->offset = XEXP (XEXP (x, 1), 1);
3124 offset = INTVAL (info->offset);
3126 /* TImode and TFmode values are allowed in both pairs of X
3127 registers and individual Q registers. The available
3128 address modes are:
3129 X,X: 7-bit signed scaled offset
3130 Q: 9-bit signed offset
3131 We conservatively require an offset representable in either mode.
3133 if (mode == TImode || mode == TFmode)
3134 return (offset_7bit_signed_scaled_p (mode, offset)
3135 && offset_9bit_signed_unscaled_p (mode, offset));
3137 if (outer_code == PARALLEL)
3138 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3139 && offset_7bit_signed_scaled_p (mode, offset));
3140 else
3141 return offset_9bit_signed_unscaled_p (mode, offset);
3143 return false;
3145 case CONST:
3146 case SYMBOL_REF:
3147 case LABEL_REF:
3148 /* load literal: pc-relative constant pool entry. Only supported
3149 for SI mode or larger. */
3150 info->type = ADDRESS_SYMBOLIC;
3151 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3153 rtx sym, addend;
3155 split_const (x, &sym, &addend);
3156 return (GET_CODE (sym) == LABEL_REF
3157 || (GET_CODE (sym) == SYMBOL_REF
3158 && CONSTANT_POOL_ADDRESS_P (sym)));
3160 return false;
3162 case LO_SUM:
3163 info->type = ADDRESS_LO_SUM;
3164 info->base = XEXP (x, 0);
3165 info->offset = XEXP (x, 1);
3166 if (allow_reg_index_p
3167 && aarch64_base_register_rtx_p (info->base, strict_p))
3169 rtx sym, offs;
3170 split_const (info->offset, &sym, &offs);
3171 if (GET_CODE (sym) == SYMBOL_REF
3172 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3173 == SYMBOL_SMALL_ABSOLUTE))
3175 /* The symbol and offset must be aligned to the access size. */
3176 unsigned int align;
3177 unsigned int ref_size;
3179 if (CONSTANT_POOL_ADDRESS_P (sym))
3180 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3181 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3183 tree exp = SYMBOL_REF_DECL (sym);
3184 align = TYPE_ALIGN (TREE_TYPE (exp));
3185 align = CONSTANT_ALIGNMENT (exp, align);
3187 else if (SYMBOL_REF_DECL (sym))
3188 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3189 else
3190 align = BITS_PER_UNIT;
3192 ref_size = GET_MODE_SIZE (mode);
3193 if (ref_size == 0)
3194 ref_size = GET_MODE_SIZE (DImode);
3196 return ((INTVAL (offs) & (ref_size - 1)) == 0
3197 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3200 return false;
3202 default:
3203 return false;
3207 bool
3208 aarch64_symbolic_address_p (rtx x)
3210 rtx offset;
3212 split_const (x, &x, &offset);
3213 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3216 /* Classify the base of symbolic expression X, given that X appears in
3217 context CONTEXT. */
3219 enum aarch64_symbol_type
3220 aarch64_classify_symbolic_expression (rtx x,
3221 enum aarch64_symbol_context context)
3223 rtx offset;
3225 split_const (x, &x, &offset);
3226 return aarch64_classify_symbol (x, context);
3230 /* Return TRUE if X is a legitimate address for accessing memory in
3231 mode MODE. */
3232 static bool
3233 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3235 struct aarch64_address_info addr;
3237 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3240 /* Return TRUE if X is a legitimate address for accessing memory in
3241 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3242 pair operation. */
3243 bool
3244 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3245 RTX_CODE outer_code, bool strict_p)
3247 struct aarch64_address_info addr;
3249 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3252 /* Return TRUE if rtx X is immediate constant 0.0 */
3253 bool
3254 aarch64_float_const_zero_rtx_p (rtx x)
3256 REAL_VALUE_TYPE r;
3258 if (GET_MODE (x) == VOIDmode)
3259 return false;
3261 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3262 if (REAL_VALUE_MINUS_ZERO (r))
3263 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3264 return REAL_VALUES_EQUAL (r, dconst0);
3267 /* Return the fixed registers used for condition codes. */
3269 static bool
3270 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3272 *p1 = CC_REGNUM;
3273 *p2 = INVALID_REGNUM;
3274 return true;
3277 enum machine_mode
3278 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3280 /* All floating point compares return CCFP if it is an equality
3281 comparison, and CCFPE otherwise. */
3282 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3284 switch (code)
3286 case EQ:
3287 case NE:
3288 case UNORDERED:
3289 case ORDERED:
3290 case UNLT:
3291 case UNLE:
3292 case UNGT:
3293 case UNGE:
3294 case UNEQ:
3295 case LTGT:
3296 return CCFPmode;
3298 case LT:
3299 case LE:
3300 case GT:
3301 case GE:
3302 return CCFPEmode;
3304 default:
3305 gcc_unreachable ();
3309 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3310 && y == const0_rtx
3311 && (code == EQ || code == NE || code == LT || code == GE)
3312 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3313 || GET_CODE (x) == NEG))
3314 return CC_NZmode;
3316 /* A compare with a shifted or negated operand. Because of canonicalization,
3317 the comparison will have to be swapped when we emit the assembly
3318 code. */
3319 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3320 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3321 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3322 || GET_CODE (x) == LSHIFTRT
3323 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3324 || GET_CODE (x) == NEG))
3325 return CC_SWPmode;
3327 /* A compare of a mode narrower than SI mode against zero can be done
3328 by extending the value in the comparison. */
3329 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3330 && y == const0_rtx)
3331 /* Only use sign-extension if we really need it. */
3332 return ((code == GT || code == GE || code == LE || code == LT)
3333 ? CC_SESWPmode : CC_ZESWPmode);
3335 /* For everything else, return CCmode. */
3336 return CCmode;
3339 static unsigned
3340 aarch64_get_condition_code (rtx x)
3342 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3343 enum rtx_code comp_code = GET_CODE (x);
3345 if (GET_MODE_CLASS (mode) != MODE_CC)
3346 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3348 switch (mode)
3350 case CCFPmode:
3351 case CCFPEmode:
3352 switch (comp_code)
3354 case GE: return AARCH64_GE;
3355 case GT: return AARCH64_GT;
3356 case LE: return AARCH64_LS;
3357 case LT: return AARCH64_MI;
3358 case NE: return AARCH64_NE;
3359 case EQ: return AARCH64_EQ;
3360 case ORDERED: return AARCH64_VC;
3361 case UNORDERED: return AARCH64_VS;
3362 case UNLT: return AARCH64_LT;
3363 case UNLE: return AARCH64_LE;
3364 case UNGT: return AARCH64_HI;
3365 case UNGE: return AARCH64_PL;
3366 default: gcc_unreachable ();
3368 break;
3370 case CCmode:
3371 switch (comp_code)
3373 case NE: return AARCH64_NE;
3374 case EQ: return AARCH64_EQ;
3375 case GE: return AARCH64_GE;
3376 case GT: return AARCH64_GT;
3377 case LE: return AARCH64_LE;
3378 case LT: return AARCH64_LT;
3379 case GEU: return AARCH64_CS;
3380 case GTU: return AARCH64_HI;
3381 case LEU: return AARCH64_LS;
3382 case LTU: return AARCH64_CC;
3383 default: gcc_unreachable ();
3385 break;
3387 case CC_SWPmode:
3388 case CC_ZESWPmode:
3389 case CC_SESWPmode:
3390 switch (comp_code)
3392 case NE: return AARCH64_NE;
3393 case EQ: return AARCH64_EQ;
3394 case GE: return AARCH64_LE;
3395 case GT: return AARCH64_LT;
3396 case LE: return AARCH64_GE;
3397 case LT: return AARCH64_GT;
3398 case GEU: return AARCH64_LS;
3399 case GTU: return AARCH64_CC;
3400 case LEU: return AARCH64_CS;
3401 case LTU: return AARCH64_HI;
3402 default: gcc_unreachable ();
3404 break;
3406 case CC_NZmode:
3407 switch (comp_code)
3409 case NE: return AARCH64_NE;
3410 case EQ: return AARCH64_EQ;
3411 case GE: return AARCH64_PL;
3412 case LT: return AARCH64_MI;
3413 default: gcc_unreachable ();
3415 break;
3417 default:
3418 gcc_unreachable ();
3419 break;
3423 static unsigned
3424 bit_count (unsigned HOST_WIDE_INT value)
3426 unsigned count = 0;
3428 while (value)
3430 count++;
3431 value &= value - 1;
3434 return count;
3437 void
3438 aarch64_print_operand (FILE *f, rtx x, char code)
3440 switch (code)
3442 case 'e':
3443 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3445 int n;
3447 if (GET_CODE (x) != CONST_INT
3448 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3450 output_operand_lossage ("invalid operand for '%%%c'", code);
3451 return;
3454 switch (n)
3456 case 3:
3457 fputc ('b', f);
3458 break;
3459 case 4:
3460 fputc ('h', f);
3461 break;
3462 case 5:
3463 fputc ('w', f);
3464 break;
3465 default:
3466 output_operand_lossage ("invalid operand for '%%%c'", code);
3467 return;
3470 break;
3472 case 'p':
3474 int n;
3476 /* Print N such that 2^N == X. */
3477 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3479 output_operand_lossage ("invalid operand for '%%%c'", code);
3480 return;
3483 asm_fprintf (f, "%d", n);
3485 break;
3487 case 'P':
3488 /* Print the number of non-zero bits in X (a const_int). */
3489 if (GET_CODE (x) != CONST_INT)
3491 output_operand_lossage ("invalid operand for '%%%c'", code);
3492 return;
3495 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3496 break;
3498 case 'H':
3499 /* Print the higher numbered register of a pair (TImode) of regs. */
3500 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3502 output_operand_lossage ("invalid operand for '%%%c'", code);
3503 return;
3506 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3507 break;
3509 case 'm':
3510 /* Print a condition (eq, ne, etc). */
3512 /* CONST_TRUE_RTX means always -- that's the default. */
3513 if (x == const_true_rtx)
3514 return;
3516 if (!COMPARISON_P (x))
3518 output_operand_lossage ("invalid operand for '%%%c'", code);
3519 return;
3522 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3523 break;
3525 case 'M':
3526 /* Print the inverse of a condition (eq <-> ne, etc). */
3528 /* CONST_TRUE_RTX means never -- that's the default. */
3529 if (x == const_true_rtx)
3531 fputs ("nv", f);
3532 return;
3535 if (!COMPARISON_P (x))
3537 output_operand_lossage ("invalid operand for '%%%c'", code);
3538 return;
3541 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3542 (aarch64_get_condition_code (x))], f);
3543 break;
3545 case 'b':
3546 case 'h':
3547 case 's':
3548 case 'd':
3549 case 'q':
3550 /* Print a scalar FP/SIMD register name. */
3551 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3553 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3554 return;
3556 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3557 break;
3559 case 'S':
3560 case 'T':
3561 case 'U':
3562 case 'V':
3563 /* Print the first FP/SIMD register name in a list. */
3564 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3566 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3567 return;
3569 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3570 break;
3572 case 'X':
3573 /* Print bottom 16 bits of integer constant in hex. */
3574 if (GET_CODE (x) != CONST_INT)
3576 output_operand_lossage ("invalid operand for '%%%c'", code);
3577 return;
3579 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3580 break;
3582 case 'w':
3583 case 'x':
3584 /* Print a general register name or the zero register (32-bit or
3585 64-bit). */
3586 if (x == const0_rtx
3587 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3589 asm_fprintf (f, "%czr", code);
3590 break;
3593 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3595 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3596 break;
3599 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3601 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3602 break;
3605 /* Fall through */
3607 case 0:
3608 /* Print a normal operand, if it's a general register, then we
3609 assume DImode. */
3610 if (x == NULL)
3612 output_operand_lossage ("missing operand");
3613 return;
3616 switch (GET_CODE (x))
3618 case REG:
3619 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3620 break;
3622 case MEM:
3623 aarch64_memory_reference_mode = GET_MODE (x);
3624 output_address (XEXP (x, 0));
3625 break;
3627 case LABEL_REF:
3628 case SYMBOL_REF:
3629 output_addr_const (asm_out_file, x);
3630 break;
3632 case CONST_INT:
3633 asm_fprintf (f, "%wd", INTVAL (x));
3634 break;
3636 case CONST_VECTOR:
3637 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3639 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3640 HOST_WIDE_INT_MIN,
3641 HOST_WIDE_INT_MAX));
3642 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3644 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3646 fputc ('0', f);
3648 else
3649 gcc_unreachable ();
3650 break;
3652 case CONST_DOUBLE:
3653 /* CONST_DOUBLE can represent a double-width integer.
3654 In this case, the mode of x is VOIDmode. */
3655 if (GET_MODE (x) == VOIDmode)
3656 ; /* Do Nothing. */
3657 else if (aarch64_float_const_zero_rtx_p (x))
3659 fputc ('0', f);
3660 break;
3662 else if (aarch64_float_const_representable_p (x))
3664 #define buf_size 20
3665 char float_buf[buf_size] = {'\0'};
3666 REAL_VALUE_TYPE r;
3667 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3668 real_to_decimal_for_mode (float_buf, &r,
3669 buf_size, buf_size,
3670 1, GET_MODE (x));
3671 asm_fprintf (asm_out_file, "%s", float_buf);
3672 break;
3673 #undef buf_size
3675 output_operand_lossage ("invalid constant");
3676 return;
3677 default:
3678 output_operand_lossage ("invalid operand");
3679 return;
3681 break;
3683 case 'A':
3684 if (GET_CODE (x) == HIGH)
3685 x = XEXP (x, 0);
3687 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3689 case SYMBOL_SMALL_GOT:
3690 asm_fprintf (asm_out_file, ":got:");
3691 break;
3693 case SYMBOL_SMALL_TLSGD:
3694 asm_fprintf (asm_out_file, ":tlsgd:");
3695 break;
3697 case SYMBOL_SMALL_TLSDESC:
3698 asm_fprintf (asm_out_file, ":tlsdesc:");
3699 break;
3701 case SYMBOL_SMALL_GOTTPREL:
3702 asm_fprintf (asm_out_file, ":gottprel:");
3703 break;
3705 case SYMBOL_SMALL_TPREL:
3706 asm_fprintf (asm_out_file, ":tprel:");
3707 break;
3709 case SYMBOL_TINY_GOT:
3710 gcc_unreachable ();
3711 break;
3713 default:
3714 break;
3716 output_addr_const (asm_out_file, x);
3717 break;
3719 case 'L':
3720 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3722 case SYMBOL_SMALL_GOT:
3723 asm_fprintf (asm_out_file, ":lo12:");
3724 break;
3726 case SYMBOL_SMALL_TLSGD:
3727 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3728 break;
3730 case SYMBOL_SMALL_TLSDESC:
3731 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3732 break;
3734 case SYMBOL_SMALL_GOTTPREL:
3735 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3736 break;
3738 case SYMBOL_SMALL_TPREL:
3739 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3740 break;
3742 case SYMBOL_TINY_GOT:
3743 asm_fprintf (asm_out_file, ":got:");
3744 break;
3746 default:
3747 break;
3749 output_addr_const (asm_out_file, x);
3750 break;
3752 case 'G':
3754 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3756 case SYMBOL_SMALL_TPREL:
3757 asm_fprintf (asm_out_file, ":tprel_hi12:");
3758 break;
3759 default:
3760 break;
3762 output_addr_const (asm_out_file, x);
3763 break;
3765 default:
3766 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3767 return;
3771 void
3772 aarch64_print_operand_address (FILE *f, rtx x)
3774 struct aarch64_address_info addr;
3776 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3777 MEM, true))
3778 switch (addr.type)
3780 case ADDRESS_REG_IMM:
3781 if (addr.offset == const0_rtx)
3782 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3783 else
3784 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3785 INTVAL (addr.offset));
3786 return;
3788 case ADDRESS_REG_REG:
3789 if (addr.shift == 0)
3790 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3791 reg_names [REGNO (addr.offset)]);
3792 else
3793 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3794 reg_names [REGNO (addr.offset)], addr.shift);
3795 return;
3797 case ADDRESS_REG_UXTW:
3798 if (addr.shift == 0)
3799 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3800 REGNO (addr.offset) - R0_REGNUM);
3801 else
3802 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3803 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3804 return;
3806 case ADDRESS_REG_SXTW:
3807 if (addr.shift == 0)
3808 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3809 REGNO (addr.offset) - R0_REGNUM);
3810 else
3811 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3812 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3813 return;
3815 case ADDRESS_REG_WB:
3816 switch (GET_CODE (x))
3818 case PRE_INC:
3819 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3820 GET_MODE_SIZE (aarch64_memory_reference_mode));
3821 return;
3822 case POST_INC:
3823 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3824 GET_MODE_SIZE (aarch64_memory_reference_mode));
3825 return;
3826 case PRE_DEC:
3827 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3828 GET_MODE_SIZE (aarch64_memory_reference_mode));
3829 return;
3830 case POST_DEC:
3831 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3832 GET_MODE_SIZE (aarch64_memory_reference_mode));
3833 return;
3834 case PRE_MODIFY:
3835 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3836 INTVAL (addr.offset));
3837 return;
3838 case POST_MODIFY:
3839 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3840 INTVAL (addr.offset));
3841 return;
3842 default:
3843 break;
3845 break;
3847 case ADDRESS_LO_SUM:
3848 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3849 output_addr_const (f, addr.offset);
3850 asm_fprintf (f, "]");
3851 return;
3853 case ADDRESS_SYMBOLIC:
3854 break;
3857 output_addr_const (f, x);
3860 void
3861 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3862 int labelno ATTRIBUTE_UNUSED)
3864 sorry ("function profiling");
3867 bool
3868 aarch64_label_mentioned_p (rtx x)
3870 const char *fmt;
3871 int i;
3873 if (GET_CODE (x) == LABEL_REF)
3874 return true;
3876 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3877 referencing instruction, but they are constant offsets, not
3878 symbols. */
3879 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3880 return false;
3882 fmt = GET_RTX_FORMAT (GET_CODE (x));
3883 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3885 if (fmt[i] == 'E')
3887 int j;
3889 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3890 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3891 return 1;
3893 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3894 return 1;
3897 return 0;
3900 /* Implement REGNO_REG_CLASS. */
3902 enum reg_class
3903 aarch64_regno_regclass (unsigned regno)
3905 if (GP_REGNUM_P (regno))
3906 return CORE_REGS;
3908 if (regno == SP_REGNUM)
3909 return STACK_REG;
3911 if (regno == FRAME_POINTER_REGNUM
3912 || regno == ARG_POINTER_REGNUM)
3913 return CORE_REGS;
3915 if (FP_REGNUM_P (regno))
3916 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3918 return NO_REGS;
3921 /* Try a machine-dependent way of reloading an illegitimate address
3922 operand. If we find one, push the reload and return the new rtx. */
3925 aarch64_legitimize_reload_address (rtx *x_p,
3926 enum machine_mode mode,
3927 int opnum, int type,
3928 int ind_levels ATTRIBUTE_UNUSED)
3930 rtx x = *x_p;
3932 /* Do not allow mem (plus (reg, const)) if vector mode. */
3933 if (aarch64_vector_mode_p (mode)
3934 && GET_CODE (x) == PLUS
3935 && REG_P (XEXP (x, 0))
3936 && CONST_INT_P (XEXP (x, 1)))
3938 rtx orig_rtx = x;
3939 x = copy_rtx (x);
3940 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3941 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3942 opnum, (enum reload_type) type);
3943 return x;
3946 /* We must recognize output that we have already generated ourselves. */
3947 if (GET_CODE (x) == PLUS
3948 && GET_CODE (XEXP (x, 0)) == PLUS
3949 && REG_P (XEXP (XEXP (x, 0), 0))
3950 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3951 && CONST_INT_P (XEXP (x, 1)))
3953 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3954 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3955 opnum, (enum reload_type) type);
3956 return x;
3959 /* We wish to handle large displacements off a base register by splitting
3960 the addend across an add and the mem insn. This can cut the number of
3961 extra insns needed from 3 to 1. It is only useful for load/store of a
3962 single register with 12 bit offset field. */
3963 if (GET_CODE (x) == PLUS
3964 && REG_P (XEXP (x, 0))
3965 && CONST_INT_P (XEXP (x, 1))
3966 && HARD_REGISTER_P (XEXP (x, 0))
3967 && mode != TImode
3968 && mode != TFmode
3969 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3971 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3972 HOST_WIDE_INT low = val & 0xfff;
3973 HOST_WIDE_INT high = val - low;
3974 HOST_WIDE_INT offs;
3975 rtx cst;
3976 enum machine_mode xmode = GET_MODE (x);
3978 /* In ILP32, xmode can be either DImode or SImode. */
3979 gcc_assert (xmode == DImode || xmode == SImode);
3981 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3982 BLKmode alignment. */
3983 if (GET_MODE_SIZE (mode) == 0)
3984 return NULL_RTX;
3986 offs = low % GET_MODE_SIZE (mode);
3988 /* Align misaligned offset by adjusting high part to compensate. */
3989 if (offs != 0)
3991 if (aarch64_uimm12_shift (high + offs))
3993 /* Align down. */
3994 low = low - offs;
3995 high = high + offs;
3997 else
3999 /* Align up. */
4000 offs = GET_MODE_SIZE (mode) - offs;
4001 low = low + offs;
4002 high = high + (low & 0x1000) - offs;
4003 low &= 0xfff;
4007 /* Check for overflow. */
4008 if (high + low != val)
4009 return NULL_RTX;
4011 cst = GEN_INT (high);
4012 if (!aarch64_uimm12_shift (high))
4013 cst = force_const_mem (xmode, cst);
4015 /* Reload high part into base reg, leaving the low part
4016 in the mem instruction. */
4017 x = plus_constant (xmode,
4018 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4019 low);
4021 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4022 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4023 opnum, (enum reload_type) type);
4024 return x;
4027 return NULL_RTX;
4031 static reg_class_t
4032 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4033 reg_class_t rclass,
4034 enum machine_mode mode,
4035 secondary_reload_info *sri)
4037 /* Address expressions of the form PLUS (SP, large_offset) need two
4038 scratch registers, one for the constant, and one for holding a
4039 copy of SP, since SP cannot be used on the RHS of an add-reg
4040 instruction. */
4041 if (mode == DImode
4042 && GET_CODE (x) == PLUS
4043 && XEXP (x, 0) == stack_pointer_rtx
4044 && CONST_INT_P (XEXP (x, 1))
4045 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
4047 sri->icode = CODE_FOR_reload_sp_immediate;
4048 return NO_REGS;
4051 /* Without the TARGET_SIMD instructions we cannot move a Q register
4052 to a Q register directly. We need a scratch. */
4053 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4054 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4055 && reg_class_subset_p (rclass, FP_REGS))
4057 if (mode == TFmode)
4058 sri->icode = CODE_FOR_aarch64_reload_movtf;
4059 else if (mode == TImode)
4060 sri->icode = CODE_FOR_aarch64_reload_movti;
4061 return NO_REGS;
4064 /* A TFmode or TImode memory access should be handled via an FP_REGS
4065 because AArch64 has richer addressing modes for LDR/STR instructions
4066 than LDP/STP instructions. */
4067 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4068 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4069 return FP_REGS;
4071 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4072 return CORE_REGS;
4074 return NO_REGS;
4077 static bool
4078 aarch64_can_eliminate (const int from, const int to)
4080 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4081 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4083 if (frame_pointer_needed)
4085 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4086 return true;
4087 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4088 return false;
4089 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4090 && !cfun->calls_alloca)
4091 return true;
4092 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4093 return true;
4094 return false;
4096 else
4098 /* If we decided that we didn't need a leaf frame pointer but then used
4099 LR in the function, then we'll want a frame pointer after all, so
4100 prevent this elimination to ensure a frame pointer is used.
4102 NOTE: the original value of flag_omit_frame_pointer gets trashed
4103 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4104 of faked_omit_frame_pointer here (which is true when we always
4105 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4106 pointers when LR is clobbered). */
4107 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4108 && df_regs_ever_live_p (LR_REGNUM)
4109 && faked_omit_frame_pointer)
4110 return false;
4113 return true;
4116 HOST_WIDE_INT
4117 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4119 HOST_WIDE_INT frame_size;
4120 HOST_WIDE_INT offset;
4122 aarch64_layout_frame ();
4123 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4124 + crtl->outgoing_args_size
4125 + cfun->machine->saved_varargs_size);
4127 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4128 offset = frame_size;
4130 if (to == HARD_FRAME_POINTER_REGNUM)
4132 if (from == ARG_POINTER_REGNUM)
4133 return offset - crtl->outgoing_args_size;
4135 if (from == FRAME_POINTER_REGNUM)
4136 return cfun->machine->frame.saved_regs_size;
4139 if (to == STACK_POINTER_REGNUM)
4141 if (from == FRAME_POINTER_REGNUM)
4143 HOST_WIDE_INT elim = crtl->outgoing_args_size
4144 + cfun->machine->frame.saved_regs_size
4145 - cfun->machine->frame.fp_lr_offset;
4146 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4147 return elim;
4151 return offset;
4155 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4156 previous frame. */
4159 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4161 if (count != 0)
4162 return const0_rtx;
4163 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4167 static void
4168 aarch64_asm_trampoline_template (FILE *f)
4170 if (TARGET_ILP32)
4172 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4173 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4175 else
4177 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4178 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4180 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4181 assemble_aligned_integer (4, const0_rtx);
4182 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4183 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4186 static void
4187 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4189 rtx fnaddr, mem, a_tramp;
4190 const int tramp_code_sz = 16;
4192 /* Don't need to copy the trailing D-words, we fill those in below. */
4193 emit_block_move (m_tramp, assemble_trampoline_template (),
4194 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4195 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4196 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4197 if (GET_MODE (fnaddr) != ptr_mode)
4198 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4199 emit_move_insn (mem, fnaddr);
4201 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4202 emit_move_insn (mem, chain_value);
4204 /* XXX We should really define a "clear_cache" pattern and use
4205 gen_clear_cache(). */
4206 a_tramp = XEXP (m_tramp, 0);
4207 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4208 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4209 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4210 ptr_mode);
4213 static unsigned char
4214 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4216 switch (regclass)
4218 case CORE_REGS:
4219 case POINTER_REGS:
4220 case GENERAL_REGS:
4221 case ALL_REGS:
4222 case FP_REGS:
4223 case FP_LO_REGS:
4224 return
4225 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4226 (GET_MODE_SIZE (mode) + 7) / 8;
4227 case STACK_REG:
4228 return 1;
4230 case NO_REGS:
4231 return 0;
4233 default:
4234 break;
4236 gcc_unreachable ();
4239 static reg_class_t
4240 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4242 if (regclass == POINTER_REGS || regclass == STACK_REG)
4243 return GENERAL_REGS;
4245 /* If it's an integer immediate that MOVI can't handle, then
4246 FP_REGS is not an option, so we return NO_REGS instead. */
4247 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4248 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4249 return NO_REGS;
4251 return regclass;
4254 void
4255 aarch64_asm_output_labelref (FILE* f, const char *name)
4257 asm_fprintf (f, "%U%s", name);
4260 static void
4261 aarch64_elf_asm_constructor (rtx symbol, int priority)
4263 if (priority == DEFAULT_INIT_PRIORITY)
4264 default_ctor_section_asm_out_constructor (symbol, priority);
4265 else
4267 section *s;
4268 char buf[18];
4269 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4270 s = get_section (buf, SECTION_WRITE, NULL);
4271 switch_to_section (s);
4272 assemble_align (POINTER_SIZE);
4273 assemble_aligned_integer (POINTER_BYTES, symbol);
4277 static void
4278 aarch64_elf_asm_destructor (rtx symbol, int priority)
4280 if (priority == DEFAULT_INIT_PRIORITY)
4281 default_dtor_section_asm_out_destructor (symbol, priority);
4282 else
4284 section *s;
4285 char buf[18];
4286 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4287 s = get_section (buf, SECTION_WRITE, NULL);
4288 switch_to_section (s);
4289 assemble_align (POINTER_SIZE);
4290 assemble_aligned_integer (POINTER_BYTES, symbol);
4294 const char*
4295 aarch64_output_casesi (rtx *operands)
4297 char buf[100];
4298 char label[100];
4299 rtx diff_vec = PATTERN (next_active_insn (operands[2]));
4300 int index;
4301 static const char *const patterns[4][2] =
4304 "ldrb\t%w3, [%0,%w1,uxtw]",
4305 "add\t%3, %4, %w3, sxtb #2"
4308 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4309 "add\t%3, %4, %w3, sxth #2"
4312 "ldr\t%w3, [%0,%w1,uxtw #2]",
4313 "add\t%3, %4, %w3, sxtw #2"
4315 /* We assume that DImode is only generated when not optimizing and
4316 that we don't really need 64-bit address offsets. That would
4317 imply an object file with 8GB of code in a single function! */
4319 "ldr\t%w3, [%0,%w1,uxtw #2]",
4320 "add\t%3, %4, %w3, sxtw #2"
4324 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4326 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4328 gcc_assert (index >= 0 && index <= 3);
4330 /* Need to implement table size reduction, by chaning the code below. */
4331 output_asm_insn (patterns[index][0], operands);
4332 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4333 snprintf (buf, sizeof (buf),
4334 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4335 output_asm_insn (buf, operands);
4336 output_asm_insn (patterns[index][1], operands);
4337 output_asm_insn ("br\t%3", operands);
4338 assemble_label (asm_out_file, label);
4339 return "";
4343 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4344 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4345 operator. */
4348 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4350 if (shift >= 0 && shift <= 3)
4352 int size;
4353 for (size = 8; size <= 32; size *= 2)
4355 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4356 if (mask == bits << shift)
4357 return size;
4360 return 0;
4363 static bool
4364 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4365 const_rtx x ATTRIBUTE_UNUSED)
4367 /* We can't use blocks for constants when we're using a per-function
4368 constant pool. */
4369 return false;
4372 static section *
4373 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4374 rtx x ATTRIBUTE_UNUSED,
4375 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4377 /* Force all constant pool entries into the current function section. */
4378 return function_section (current_function_decl);
4382 /* Costs. */
4384 /* Helper function for rtx cost calculation. Strip a shift expression
4385 from X. Returns the inner operand if successful, or the original
4386 expression on failure. */
4387 static rtx
4388 aarch64_strip_shift (rtx x)
4390 rtx op = x;
4392 if ((GET_CODE (op) == ASHIFT
4393 || GET_CODE (op) == ASHIFTRT
4394 || GET_CODE (op) == LSHIFTRT)
4395 && CONST_INT_P (XEXP (op, 1)))
4396 return XEXP (op, 0);
4398 if (GET_CODE (op) == MULT
4399 && CONST_INT_P (XEXP (op, 1))
4400 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4401 return XEXP (op, 0);
4403 return x;
4406 /* Helper function for rtx cost calculation. Strip a shift or extend
4407 expression from X. Returns the inner operand if successful, or the
4408 original expression on failure. We deal with a number of possible
4409 canonicalization variations here. */
4410 static rtx
4411 aarch64_strip_shift_or_extend (rtx x)
4413 rtx op = x;
4415 /* Zero and sign extraction of a widened value. */
4416 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4417 && XEXP (op, 2) == const0_rtx
4418 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4419 XEXP (op, 1)))
4420 return XEXP (XEXP (op, 0), 0);
4422 /* It can also be represented (for zero-extend) as an AND with an
4423 immediate. */
4424 if (GET_CODE (op) == AND
4425 && GET_CODE (XEXP (op, 0)) == MULT
4426 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4427 && CONST_INT_P (XEXP (op, 1))
4428 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4429 INTVAL (XEXP (op, 1))) != 0)
4430 return XEXP (XEXP (op, 0), 0);
4432 /* Now handle extended register, as this may also have an optional
4433 left shift by 1..4. */
4434 if (GET_CODE (op) == ASHIFT
4435 && CONST_INT_P (XEXP (op, 1))
4436 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4437 op = XEXP (op, 0);
4439 if (GET_CODE (op) == ZERO_EXTEND
4440 || GET_CODE (op) == SIGN_EXTEND)
4441 op = XEXP (op, 0);
4443 if (op != x)
4444 return op;
4446 return aarch64_strip_shift (x);
4449 /* Calculate the cost of calculating X, storing it in *COST. Result
4450 is true if the total cost of the operation has now been calculated. */
4451 static bool
4452 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4453 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4455 rtx op0, op1;
4456 const struct cpu_rtx_cost_table *extra_cost
4457 = aarch64_tune_params->insn_extra_cost;
4459 switch (code)
4461 case SET:
4462 op0 = SET_DEST (x);
4463 op1 = SET_SRC (x);
4465 switch (GET_CODE (op0))
4467 case MEM:
4468 if (speed)
4469 *cost += extra_cost->memory_store;
4471 if (op1 != const0_rtx)
4472 *cost += rtx_cost (op1, SET, 1, speed);
4473 return true;
4475 case SUBREG:
4476 if (! REG_P (SUBREG_REG (op0)))
4477 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4478 /* Fall through. */
4479 case REG:
4480 /* Cost is just the cost of the RHS of the set. */
4481 *cost += rtx_cost (op1, SET, 1, true);
4482 return true;
4484 case ZERO_EXTRACT: /* Bit-field insertion. */
4485 case SIGN_EXTRACT:
4486 /* Strip any redundant widening of the RHS to meet the width of
4487 the target. */
4488 if (GET_CODE (op1) == SUBREG)
4489 op1 = SUBREG_REG (op1);
4490 if ((GET_CODE (op1) == ZERO_EXTEND
4491 || GET_CODE (op1) == SIGN_EXTEND)
4492 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4493 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4494 >= INTVAL (XEXP (op0, 1))))
4495 op1 = XEXP (op1, 0);
4496 *cost += rtx_cost (op1, SET, 1, speed);
4497 return true;
4499 default:
4500 break;
4502 return false;
4504 case MEM:
4505 if (speed)
4506 *cost += extra_cost->memory_load;
4508 return true;
4510 case NEG:
4511 op0 = CONST0_RTX (GET_MODE (x));
4512 op1 = XEXP (x, 0);
4513 goto cost_minus;
4515 case COMPARE:
4516 op0 = XEXP (x, 0);
4517 op1 = XEXP (x, 1);
4519 if (op1 == const0_rtx
4520 && GET_CODE (op0) == AND)
4522 x = op0;
4523 goto cost_logic;
4526 /* Comparisons can work if the order is swapped.
4527 Canonicalization puts the more complex operation first, but
4528 we want it in op1. */
4529 if (! (REG_P (op0)
4530 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4532 op0 = XEXP (x, 1);
4533 op1 = XEXP (x, 0);
4535 goto cost_minus;
4537 case MINUS:
4538 op0 = XEXP (x, 0);
4539 op1 = XEXP (x, 1);
4541 cost_minus:
4542 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4543 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4544 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4546 if (op0 != const0_rtx)
4547 *cost += rtx_cost (op0, MINUS, 0, speed);
4549 if (CONST_INT_P (op1))
4551 if (!aarch64_uimm12_shift (INTVAL (op1)))
4552 *cost += rtx_cost (op1, MINUS, 1, speed);
4554 else
4556 op1 = aarch64_strip_shift_or_extend (op1);
4557 *cost += rtx_cost (op1, MINUS, 1, speed);
4559 return true;
4562 return false;
4564 case PLUS:
4565 op0 = XEXP (x, 0);
4566 op1 = XEXP (x, 1);
4568 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4570 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4572 *cost += rtx_cost (op0, PLUS, 0, speed);
4574 else
4576 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4578 if (new_op0 == op0
4579 && GET_CODE (op0) == MULT)
4581 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4582 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4583 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4584 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4586 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4587 speed)
4588 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4589 speed)
4590 + rtx_cost (op1, PLUS, 1, speed));
4591 if (speed)
4592 *cost += extra_cost->int_multiply_extend_add;
4593 return true;
4595 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4596 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4597 + rtx_cost (op1, PLUS, 1, speed));
4599 if (speed)
4600 *cost += extra_cost->int_multiply_add;
4603 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4604 + rtx_cost (op1, PLUS, 1, speed));
4606 return true;
4609 return false;
4611 case IOR:
4612 case XOR:
4613 case AND:
4614 cost_logic:
4615 op0 = XEXP (x, 0);
4616 op1 = XEXP (x, 1);
4618 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4620 if (CONST_INT_P (op1)
4621 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4623 *cost += rtx_cost (op0, AND, 0, speed);
4625 else
4627 if (GET_CODE (op0) == NOT)
4628 op0 = XEXP (op0, 0);
4629 op0 = aarch64_strip_shift (op0);
4630 *cost += (rtx_cost (op0, AND, 0, speed)
4631 + rtx_cost (op1, AND, 1, speed));
4633 return true;
4635 return false;
4637 case ZERO_EXTEND:
4638 if ((GET_MODE (x) == DImode
4639 && GET_MODE (XEXP (x, 0)) == SImode)
4640 || GET_CODE (XEXP (x, 0)) == MEM)
4642 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4643 return true;
4645 return false;
4647 case SIGN_EXTEND:
4648 if (GET_CODE (XEXP (x, 0)) == MEM)
4650 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4651 return true;
4653 return false;
4655 case ROTATE:
4656 if (!CONST_INT_P (XEXP (x, 1)))
4657 *cost += COSTS_N_INSNS (2);
4658 /* Fall through. */
4659 case ROTATERT:
4660 case LSHIFTRT:
4661 case ASHIFT:
4662 case ASHIFTRT:
4664 /* Shifting by a register often takes an extra cycle. */
4665 if (speed && !CONST_INT_P (XEXP (x, 1)))
4666 *cost += extra_cost->register_shift;
4668 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4669 return true;
4671 case HIGH:
4672 if (!CONSTANT_P (XEXP (x, 0)))
4673 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4674 return true;
4676 case LO_SUM:
4677 if (!CONSTANT_P (XEXP (x, 1)))
4678 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4679 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4680 return true;
4682 case ZERO_EXTRACT:
4683 case SIGN_EXTRACT:
4684 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4685 return true;
4687 case MULT:
4688 op0 = XEXP (x, 0);
4689 op1 = XEXP (x, 1);
4691 *cost = COSTS_N_INSNS (1);
4692 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4694 if (CONST_INT_P (op1)
4695 && exact_log2 (INTVAL (op1)) > 0)
4697 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4698 return true;
4701 if ((GET_CODE (op0) == ZERO_EXTEND
4702 && GET_CODE (op1) == ZERO_EXTEND)
4703 || (GET_CODE (op0) == SIGN_EXTEND
4704 && GET_CODE (op1) == SIGN_EXTEND))
4706 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4707 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4708 if (speed)
4709 *cost += extra_cost->int_multiply_extend;
4710 return true;
4713 if (speed)
4714 *cost += extra_cost->int_multiply;
4716 else if (speed)
4718 if (GET_MODE (x) == DFmode)
4719 *cost += extra_cost->double_multiply;
4720 else if (GET_MODE (x) == SFmode)
4721 *cost += extra_cost->float_multiply;
4724 return false; /* All arguments need to be in registers. */
4726 case MOD:
4727 case UMOD:
4728 *cost = COSTS_N_INSNS (2);
4729 if (speed)
4731 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4732 *cost += (extra_cost->int_multiply_add
4733 + extra_cost->int_divide);
4734 else if (GET_MODE (x) == DFmode)
4735 *cost += (extra_cost->double_multiply
4736 + extra_cost->double_divide);
4737 else if (GET_MODE (x) == SFmode)
4738 *cost += (extra_cost->float_multiply
4739 + extra_cost->float_divide);
4741 return false; /* All arguments need to be in registers. */
4743 case DIV:
4744 case UDIV:
4745 *cost = COSTS_N_INSNS (1);
4746 if (speed)
4748 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4749 *cost += extra_cost->int_divide;
4750 else if (GET_MODE (x) == DFmode)
4751 *cost += extra_cost->double_divide;
4752 else if (GET_MODE (x) == SFmode)
4753 *cost += extra_cost->float_divide;
4755 return false; /* All arguments need to be in registers. */
4757 default:
4758 break;
4760 return false;
4763 static int
4764 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4765 enum machine_mode mode ATTRIBUTE_UNUSED,
4766 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4768 enum rtx_code c = GET_CODE (x);
4769 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4771 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4772 return addr_cost->pre_modify;
4774 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4775 return addr_cost->post_modify;
4777 if (c == PLUS)
4779 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4780 return addr_cost->imm_offset;
4781 else if (GET_CODE (XEXP (x, 0)) == MULT
4782 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4783 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4784 return addr_cost->register_extend;
4786 return addr_cost->register_offset;
4788 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4789 return addr_cost->imm_offset;
4791 return 0;
4794 static int
4795 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4796 reg_class_t from, reg_class_t to)
4798 const struct cpu_regmove_cost *regmove_cost
4799 = aarch64_tune_params->regmove_cost;
4801 if (from == GENERAL_REGS && to == GENERAL_REGS)
4802 return regmove_cost->GP2GP;
4803 else if (from == GENERAL_REGS)
4804 return regmove_cost->GP2FP;
4805 else if (to == GENERAL_REGS)
4806 return regmove_cost->FP2GP;
4808 /* When AdvSIMD instructions are disabled it is not possible to move
4809 a 128-bit value directly between Q registers. This is handled in
4810 secondary reload. A general register is used as a scratch to move
4811 the upper DI value and the lower DI value is moved directly,
4812 hence the cost is the sum of three moves. */
4814 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4815 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4817 return regmove_cost->FP2FP;
4820 static int
4821 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4822 reg_class_t rclass ATTRIBUTE_UNUSED,
4823 bool in ATTRIBUTE_UNUSED)
4825 return aarch64_tune_params->memmov_cost;
4828 /* Vectorizer cost model target hooks. */
4830 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4831 static int
4832 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4833 tree vectype,
4834 int misalign ATTRIBUTE_UNUSED)
4836 unsigned elements;
4838 switch (type_of_cost)
4840 case scalar_stmt:
4841 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4843 case scalar_load:
4844 return aarch64_tune_params->vec_costs->scalar_load_cost;
4846 case scalar_store:
4847 return aarch64_tune_params->vec_costs->scalar_store_cost;
4849 case vector_stmt:
4850 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4852 case vector_load:
4853 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4855 case vector_store:
4856 return aarch64_tune_params->vec_costs->vec_store_cost;
4858 case vec_to_scalar:
4859 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4861 case scalar_to_vec:
4862 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4864 case unaligned_load:
4865 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4867 case unaligned_store:
4868 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4870 case cond_branch_taken:
4871 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4873 case cond_branch_not_taken:
4874 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4876 case vec_perm:
4877 case vec_promote_demote:
4878 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4880 case vec_construct:
4881 elements = TYPE_VECTOR_SUBPARTS (vectype);
4882 return elements / 2 + 1;
4884 default:
4885 gcc_unreachable ();
4889 /* Implement targetm.vectorize.add_stmt_cost. */
4890 static unsigned
4891 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4892 struct _stmt_vec_info *stmt_info, int misalign,
4893 enum vect_cost_model_location where)
4895 unsigned *cost = (unsigned *) data;
4896 unsigned retval = 0;
4898 if (flag_vect_cost_model)
4900 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4901 int stmt_cost =
4902 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4904 /* Statements in an inner loop relative to the loop being
4905 vectorized are weighted more heavily. The value here is
4906 a function (linear for now) of the loop nest level. */
4907 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4909 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4910 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4911 unsigned nest_level = loop_depth (loop);
4913 count *= nest_level;
4916 retval = (unsigned) (count * stmt_cost);
4917 cost[where] += retval;
4920 return retval;
4923 static void initialize_aarch64_code_model (void);
4925 /* Parse the architecture extension string. */
4927 static void
4928 aarch64_parse_extension (char *str)
4930 /* The extension string is parsed left to right. */
4931 const struct aarch64_option_extension *opt = NULL;
4933 /* Flag to say whether we are adding or removing an extension. */
4934 int adding_ext = -1;
4936 while (str != NULL && *str != 0)
4938 char *ext;
4939 size_t len;
4941 str++;
4942 ext = strchr (str, '+');
4944 if (ext != NULL)
4945 len = ext - str;
4946 else
4947 len = strlen (str);
4949 if (len >= 2 && strncmp (str, "no", 2) == 0)
4951 adding_ext = 0;
4952 len -= 2;
4953 str += 2;
4955 else if (len > 0)
4956 adding_ext = 1;
4958 if (len == 0)
4960 error ("missing feature modifier after %qs", "+no");
4961 return;
4964 /* Scan over the extensions table trying to find an exact match. */
4965 for (opt = all_extensions; opt->name != NULL; opt++)
4967 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4969 /* Add or remove the extension. */
4970 if (adding_ext)
4971 aarch64_isa_flags |= opt->flags_on;
4972 else
4973 aarch64_isa_flags &= ~(opt->flags_off);
4974 break;
4978 if (opt->name == NULL)
4980 /* Extension not found in list. */
4981 error ("unknown feature modifier %qs", str);
4982 return;
4985 str = ext;
4988 return;
4991 /* Parse the ARCH string. */
4993 static void
4994 aarch64_parse_arch (void)
4996 char *ext;
4997 const struct processor *arch;
4998 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4999 size_t len;
5001 strcpy (str, aarch64_arch_string);
5003 ext = strchr (str, '+');
5005 if (ext != NULL)
5006 len = ext - str;
5007 else
5008 len = strlen (str);
5010 if (len == 0)
5012 error ("missing arch name in -march=%qs", str);
5013 return;
5016 /* Loop through the list of supported ARCHs to find a match. */
5017 for (arch = all_architectures; arch->name != NULL; arch++)
5019 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5021 selected_arch = arch;
5022 aarch64_isa_flags = selected_arch->flags;
5023 selected_cpu = &all_cores[selected_arch->core];
5025 if (ext != NULL)
5027 /* ARCH string contains at least one extension. */
5028 aarch64_parse_extension (ext);
5031 return;
5035 /* ARCH name not found in list. */
5036 error ("unknown value %qs for -march", str);
5037 return;
5040 /* Parse the CPU string. */
5042 static void
5043 aarch64_parse_cpu (void)
5045 char *ext;
5046 const struct processor *cpu;
5047 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5048 size_t len;
5050 strcpy (str, aarch64_cpu_string);
5052 ext = strchr (str, '+');
5054 if (ext != NULL)
5055 len = ext - str;
5056 else
5057 len = strlen (str);
5059 if (len == 0)
5061 error ("missing cpu name in -mcpu=%qs", str);
5062 return;
5065 /* Loop through the list of supported CPUs to find a match. */
5066 for (cpu = all_cores; cpu->name != NULL; cpu++)
5068 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5070 selected_cpu = cpu;
5071 aarch64_isa_flags = selected_cpu->flags;
5073 if (ext != NULL)
5075 /* CPU string contains at least one extension. */
5076 aarch64_parse_extension (ext);
5079 return;
5083 /* CPU name not found in list. */
5084 error ("unknown value %qs for -mcpu", str);
5085 return;
5088 /* Parse the TUNE string. */
5090 static void
5091 aarch64_parse_tune (void)
5093 const struct processor *cpu;
5094 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5095 strcpy (str, aarch64_tune_string);
5097 /* Loop through the list of supported CPUs to find a match. */
5098 for (cpu = all_cores; cpu->name != NULL; cpu++)
5100 if (strcmp (cpu->name, str) == 0)
5102 selected_tune = cpu;
5103 return;
5107 /* CPU name not found in list. */
5108 error ("unknown value %qs for -mtune", str);
5109 return;
5113 /* Implement TARGET_OPTION_OVERRIDE. */
5115 static void
5116 aarch64_override_options (void)
5118 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5119 otherwise march remains undefined. mtune can be used with either march or
5120 mcpu. */
5122 if (aarch64_arch_string)
5124 aarch64_parse_arch ();
5125 aarch64_cpu_string = NULL;
5128 if (aarch64_cpu_string)
5130 aarch64_parse_cpu ();
5131 selected_arch = NULL;
5134 if (aarch64_tune_string)
5136 aarch64_parse_tune ();
5139 initialize_aarch64_code_model ();
5141 aarch64_build_bitmask_table ();
5143 /* This target defaults to strict volatile bitfields. */
5144 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5145 flag_strict_volatile_bitfields = 1;
5147 /* If the user did not specify a processor, choose the default
5148 one for them. This will be the CPU set during configuration using
5149 --with-cpu, otherwise it is "generic". */
5150 if (!selected_cpu)
5152 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5153 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5156 gcc_assert (selected_cpu);
5158 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5159 if (!selected_tune)
5160 selected_tune = &all_cores[selected_cpu->core];
5162 aarch64_tune_flags = selected_tune->flags;
5163 aarch64_tune = selected_tune->core;
5164 aarch64_tune_params = selected_tune->tune;
5166 aarch64_override_options_after_change ();
5169 /* Implement targetm.override_options_after_change. */
5171 static void
5172 aarch64_override_options_after_change (void)
5174 faked_omit_frame_pointer = false;
5176 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5177 that aarch64_frame_pointer_required will be called. We need to remember
5178 whether flag_omit_frame_pointer was turned on normally or just faked. */
5180 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5182 flag_omit_frame_pointer = true;
5183 faked_omit_frame_pointer = true;
5187 static struct machine_function *
5188 aarch64_init_machine_status (void)
5190 struct machine_function *machine;
5191 machine = ggc_alloc_cleared_machine_function ();
5192 return machine;
5195 void
5196 aarch64_init_expanders (void)
5198 init_machine_status = aarch64_init_machine_status;
5201 /* A checking mechanism for the implementation of the various code models. */
5202 static void
5203 initialize_aarch64_code_model (void)
5205 if (flag_pic)
5207 switch (aarch64_cmodel_var)
5209 case AARCH64_CMODEL_TINY:
5210 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5211 break;
5212 case AARCH64_CMODEL_SMALL:
5213 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5214 break;
5215 case AARCH64_CMODEL_LARGE:
5216 sorry ("code model %qs with -f%s", "large",
5217 flag_pic > 1 ? "PIC" : "pic");
5218 default:
5219 gcc_unreachable ();
5222 else
5223 aarch64_cmodel = aarch64_cmodel_var;
5226 /* Return true if SYMBOL_REF X binds locally. */
5228 static bool
5229 aarch64_symbol_binds_local_p (const_rtx x)
5231 return (SYMBOL_REF_DECL (x)
5232 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5233 : SYMBOL_REF_LOCAL_P (x));
5236 /* Return true if SYMBOL_REF X is thread local */
5237 static bool
5238 aarch64_tls_symbol_p (rtx x)
5240 if (! TARGET_HAVE_TLS)
5241 return false;
5243 if (GET_CODE (x) != SYMBOL_REF)
5244 return false;
5246 return SYMBOL_REF_TLS_MODEL (x) != 0;
5249 /* Classify a TLS symbol into one of the TLS kinds. */
5250 enum aarch64_symbol_type
5251 aarch64_classify_tls_symbol (rtx x)
5253 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5255 switch (tls_kind)
5257 case TLS_MODEL_GLOBAL_DYNAMIC:
5258 case TLS_MODEL_LOCAL_DYNAMIC:
5259 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5261 case TLS_MODEL_INITIAL_EXEC:
5262 return SYMBOL_SMALL_GOTTPREL;
5264 case TLS_MODEL_LOCAL_EXEC:
5265 return SYMBOL_SMALL_TPREL;
5267 case TLS_MODEL_EMULATED:
5268 case TLS_MODEL_NONE:
5269 return SYMBOL_FORCE_TO_MEM;
5271 default:
5272 gcc_unreachable ();
5276 /* Return the method that should be used to access SYMBOL_REF or
5277 LABEL_REF X in context CONTEXT. */
5279 enum aarch64_symbol_type
5280 aarch64_classify_symbol (rtx x,
5281 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5283 if (GET_CODE (x) == LABEL_REF)
5285 switch (aarch64_cmodel)
5287 case AARCH64_CMODEL_LARGE:
5288 return SYMBOL_FORCE_TO_MEM;
5290 case AARCH64_CMODEL_TINY_PIC:
5291 case AARCH64_CMODEL_TINY:
5292 return SYMBOL_TINY_ABSOLUTE;
5294 case AARCH64_CMODEL_SMALL_PIC:
5295 case AARCH64_CMODEL_SMALL:
5296 return SYMBOL_SMALL_ABSOLUTE;
5298 default:
5299 gcc_unreachable ();
5303 if (GET_CODE (x) == SYMBOL_REF)
5305 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5306 || CONSTANT_POOL_ADDRESS_P (x))
5307 return SYMBOL_FORCE_TO_MEM;
5309 if (aarch64_tls_symbol_p (x))
5310 return aarch64_classify_tls_symbol (x);
5312 switch (aarch64_cmodel)
5314 case AARCH64_CMODEL_TINY:
5315 if (SYMBOL_REF_WEAK (x))
5316 return SYMBOL_FORCE_TO_MEM;
5317 return SYMBOL_TINY_ABSOLUTE;
5319 case AARCH64_CMODEL_SMALL:
5320 if (SYMBOL_REF_WEAK (x))
5321 return SYMBOL_FORCE_TO_MEM;
5322 return SYMBOL_SMALL_ABSOLUTE;
5324 case AARCH64_CMODEL_TINY_PIC:
5325 if (!aarch64_symbol_binds_local_p (x))
5326 return SYMBOL_TINY_GOT;
5327 return SYMBOL_TINY_ABSOLUTE;
5329 case AARCH64_CMODEL_SMALL_PIC:
5330 if (!aarch64_symbol_binds_local_p (x))
5331 return SYMBOL_SMALL_GOT;
5332 return SYMBOL_SMALL_ABSOLUTE;
5334 default:
5335 gcc_unreachable ();
5339 /* By default push everything into the constant pool. */
5340 return SYMBOL_FORCE_TO_MEM;
5343 bool
5344 aarch64_constant_address_p (rtx x)
5346 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5349 bool
5350 aarch64_legitimate_pic_operand_p (rtx x)
5352 if (GET_CODE (x) == SYMBOL_REF
5353 || (GET_CODE (x) == CONST
5354 && GET_CODE (XEXP (x, 0)) == PLUS
5355 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5356 return false;
5358 return true;
5361 /* Return true if X holds either a quarter-precision or
5362 floating-point +0.0 constant. */
5363 static bool
5364 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5366 if (!CONST_DOUBLE_P (x))
5367 return false;
5369 /* TODO: We could handle moving 0.0 to a TFmode register,
5370 but first we would like to refactor the movtf_aarch64
5371 to be more amicable to split moves properly and
5372 correctly gate on TARGET_SIMD. For now - reject all
5373 constants which are not to SFmode or DFmode registers. */
5374 if (!(mode == SFmode || mode == DFmode))
5375 return false;
5377 if (aarch64_float_const_zero_rtx_p (x))
5378 return true;
5379 return aarch64_float_const_representable_p (x);
5382 static bool
5383 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5385 /* Do not allow vector struct mode constants. We could support
5386 0 and -1 easily, but they need support in aarch64-simd.md. */
5387 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5388 return false;
5390 /* This could probably go away because
5391 we now decompose CONST_INTs according to expand_mov_immediate. */
5392 if ((GET_CODE (x) == CONST_VECTOR
5393 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5394 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5395 return !targetm.cannot_force_const_mem (mode, x);
5397 if (GET_CODE (x) == HIGH
5398 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5399 return true;
5401 return aarch64_constant_address_p (x);
5405 aarch64_load_tp (rtx target)
5407 if (!target
5408 || GET_MODE (target) != Pmode
5409 || !register_operand (target, Pmode))
5410 target = gen_reg_rtx (Pmode);
5412 /* Can return in any reg. */
5413 emit_insn (gen_aarch64_load_tp_hard (target));
5414 return target;
5417 /* On AAPCS systems, this is the "struct __va_list". */
5418 static GTY(()) tree va_list_type;
5420 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5421 Return the type to use as __builtin_va_list.
5423 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5425 struct __va_list
5427 void *__stack;
5428 void *__gr_top;
5429 void *__vr_top;
5430 int __gr_offs;
5431 int __vr_offs;
5432 }; */
5434 static tree
5435 aarch64_build_builtin_va_list (void)
5437 tree va_list_name;
5438 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5440 /* Create the type. */
5441 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5442 /* Give it the required name. */
5443 va_list_name = build_decl (BUILTINS_LOCATION,
5444 TYPE_DECL,
5445 get_identifier ("__va_list"),
5446 va_list_type);
5447 DECL_ARTIFICIAL (va_list_name) = 1;
5448 TYPE_NAME (va_list_type) = va_list_name;
5449 TYPE_STUB_DECL (va_list_type) = va_list_name;
5451 /* Create the fields. */
5452 f_stack = build_decl (BUILTINS_LOCATION,
5453 FIELD_DECL, get_identifier ("__stack"),
5454 ptr_type_node);
5455 f_grtop = build_decl (BUILTINS_LOCATION,
5456 FIELD_DECL, get_identifier ("__gr_top"),
5457 ptr_type_node);
5458 f_vrtop = build_decl (BUILTINS_LOCATION,
5459 FIELD_DECL, get_identifier ("__vr_top"),
5460 ptr_type_node);
5461 f_groff = build_decl (BUILTINS_LOCATION,
5462 FIELD_DECL, get_identifier ("__gr_offs"),
5463 integer_type_node);
5464 f_vroff = build_decl (BUILTINS_LOCATION,
5465 FIELD_DECL, get_identifier ("__vr_offs"),
5466 integer_type_node);
5468 DECL_ARTIFICIAL (f_stack) = 1;
5469 DECL_ARTIFICIAL (f_grtop) = 1;
5470 DECL_ARTIFICIAL (f_vrtop) = 1;
5471 DECL_ARTIFICIAL (f_groff) = 1;
5472 DECL_ARTIFICIAL (f_vroff) = 1;
5474 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5475 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5476 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5477 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5478 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5480 TYPE_FIELDS (va_list_type) = f_stack;
5481 DECL_CHAIN (f_stack) = f_grtop;
5482 DECL_CHAIN (f_grtop) = f_vrtop;
5483 DECL_CHAIN (f_vrtop) = f_groff;
5484 DECL_CHAIN (f_groff) = f_vroff;
5486 /* Compute its layout. */
5487 layout_type (va_list_type);
5489 return va_list_type;
5492 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5493 static void
5494 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5496 const CUMULATIVE_ARGS *cum;
5497 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5498 tree stack, grtop, vrtop, groff, vroff;
5499 tree t;
5500 int gr_save_area_size;
5501 int vr_save_area_size;
5502 int vr_offset;
5504 cum = &crtl->args.info;
5505 gr_save_area_size
5506 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5507 vr_save_area_size
5508 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5510 if (TARGET_GENERAL_REGS_ONLY)
5512 if (cum->aapcs_nvrn > 0)
5513 sorry ("%qs and floating point or vector arguments",
5514 "-mgeneral-regs-only");
5515 vr_save_area_size = 0;
5518 f_stack = TYPE_FIELDS (va_list_type_node);
5519 f_grtop = DECL_CHAIN (f_stack);
5520 f_vrtop = DECL_CHAIN (f_grtop);
5521 f_groff = DECL_CHAIN (f_vrtop);
5522 f_vroff = DECL_CHAIN (f_groff);
5524 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5525 NULL_TREE);
5526 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5527 NULL_TREE);
5528 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5529 NULL_TREE);
5530 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5531 NULL_TREE);
5532 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5533 NULL_TREE);
5535 /* Emit code to initialize STACK, which points to the next varargs stack
5536 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5537 by named arguments. STACK is 8-byte aligned. */
5538 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5539 if (cum->aapcs_stack_size > 0)
5540 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5541 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5542 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5544 /* Emit code to initialize GRTOP, the top of the GR save area.
5545 virtual_incoming_args_rtx should have been 16 byte aligned. */
5546 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5547 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5548 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5550 /* Emit code to initialize VRTOP, the top of the VR save area.
5551 This address is gr_save_area_bytes below GRTOP, rounded
5552 down to the next 16-byte boundary. */
5553 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5554 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5555 STACK_BOUNDARY / BITS_PER_UNIT);
5557 if (vr_offset)
5558 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5559 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5560 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5562 /* Emit code to initialize GROFF, the offset from GRTOP of the
5563 next GPR argument. */
5564 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5565 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5566 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5568 /* Likewise emit code to initialize VROFF, the offset from FTOP
5569 of the next VR argument. */
5570 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5571 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5572 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5575 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5577 static tree
5578 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5579 gimple_seq *post_p ATTRIBUTE_UNUSED)
5581 tree addr;
5582 bool indirect_p;
5583 bool is_ha; /* is HFA or HVA. */
5584 bool dw_align; /* double-word align. */
5585 enum machine_mode ag_mode = VOIDmode;
5586 int nregs;
5587 enum machine_mode mode;
5589 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5590 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5591 HOST_WIDE_INT size, rsize, adjust, align;
5592 tree t, u, cond1, cond2;
5594 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5595 if (indirect_p)
5596 type = build_pointer_type (type);
5598 mode = TYPE_MODE (type);
5600 f_stack = TYPE_FIELDS (va_list_type_node);
5601 f_grtop = DECL_CHAIN (f_stack);
5602 f_vrtop = DECL_CHAIN (f_grtop);
5603 f_groff = DECL_CHAIN (f_vrtop);
5604 f_vroff = DECL_CHAIN (f_groff);
5606 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5607 f_stack, NULL_TREE);
5608 size = int_size_in_bytes (type);
5609 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5611 dw_align = false;
5612 adjust = 0;
5613 if (aarch64_vfp_is_call_or_return_candidate (mode,
5614 type,
5615 &ag_mode,
5616 &nregs,
5617 &is_ha))
5619 /* TYPE passed in fp/simd registers. */
5620 if (TARGET_GENERAL_REGS_ONLY)
5621 sorry ("%qs and floating point or vector arguments",
5622 "-mgeneral-regs-only");
5624 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5625 unshare_expr (valist), f_vrtop, NULL_TREE);
5626 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5627 unshare_expr (valist), f_vroff, NULL_TREE);
5629 rsize = nregs * UNITS_PER_VREG;
5631 if (is_ha)
5633 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5634 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5636 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5637 && size < UNITS_PER_VREG)
5639 adjust = UNITS_PER_VREG - size;
5642 else
5644 /* TYPE passed in general registers. */
5645 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5646 unshare_expr (valist), f_grtop, NULL_TREE);
5647 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5648 unshare_expr (valist), f_groff, NULL_TREE);
5649 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5650 nregs = rsize / UNITS_PER_WORD;
5652 if (align > 8)
5653 dw_align = true;
5655 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5656 && size < UNITS_PER_WORD)
5658 adjust = UNITS_PER_WORD - size;
5662 /* Get a local temporary for the field value. */
5663 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5665 /* Emit code to branch if off >= 0. */
5666 t = build2 (GE_EXPR, boolean_type_node, off,
5667 build_int_cst (TREE_TYPE (off), 0));
5668 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5670 if (dw_align)
5672 /* Emit: offs = (offs + 15) & -16. */
5673 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5674 build_int_cst (TREE_TYPE (off), 15));
5675 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5676 build_int_cst (TREE_TYPE (off), -16));
5677 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5679 else
5680 roundup = NULL;
5682 /* Update ap.__[g|v]r_offs */
5683 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5684 build_int_cst (TREE_TYPE (off), rsize));
5685 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5687 /* String up. */
5688 if (roundup)
5689 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5691 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5692 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5693 build_int_cst (TREE_TYPE (f_off), 0));
5694 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5696 /* String up: make sure the assignment happens before the use. */
5697 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5698 COND_EXPR_ELSE (cond1) = t;
5700 /* Prepare the trees handling the argument that is passed on the stack;
5701 the top level node will store in ON_STACK. */
5702 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5703 if (align > 8)
5705 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5706 t = fold_convert (intDI_type_node, arg);
5707 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5708 build_int_cst (TREE_TYPE (t), 15));
5709 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5710 build_int_cst (TREE_TYPE (t), -16));
5711 t = fold_convert (TREE_TYPE (arg), t);
5712 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5714 else
5715 roundup = NULL;
5716 /* Advance ap.__stack */
5717 t = fold_convert (intDI_type_node, arg);
5718 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5719 build_int_cst (TREE_TYPE (t), size + 7));
5720 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5721 build_int_cst (TREE_TYPE (t), -8));
5722 t = fold_convert (TREE_TYPE (arg), t);
5723 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5724 /* String up roundup and advance. */
5725 if (roundup)
5726 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5727 /* String up with arg */
5728 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5729 /* Big-endianness related address adjustment. */
5730 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5731 && size < UNITS_PER_WORD)
5733 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5734 size_int (UNITS_PER_WORD - size));
5735 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5738 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5739 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5741 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5742 t = off;
5743 if (adjust)
5744 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5745 build_int_cst (TREE_TYPE (off), adjust));
5747 t = fold_convert (sizetype, t);
5748 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5750 if (is_ha)
5752 /* type ha; // treat as "struct {ftype field[n];}"
5753 ... [computing offs]
5754 for (i = 0; i <nregs; ++i, offs += 16)
5755 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5756 return ha; */
5757 int i;
5758 tree tmp_ha, field_t, field_ptr_t;
5760 /* Declare a local variable. */
5761 tmp_ha = create_tmp_var_raw (type, "ha");
5762 gimple_add_tmp_var (tmp_ha);
5764 /* Establish the base type. */
5765 switch (ag_mode)
5767 case SFmode:
5768 field_t = float_type_node;
5769 field_ptr_t = float_ptr_type_node;
5770 break;
5771 case DFmode:
5772 field_t = double_type_node;
5773 field_ptr_t = double_ptr_type_node;
5774 break;
5775 case TFmode:
5776 field_t = long_double_type_node;
5777 field_ptr_t = long_double_ptr_type_node;
5778 break;
5779 /* The half precision and quad precision are not fully supported yet. Enable
5780 the following code after the support is complete. Need to find the correct
5781 type node for __fp16 *. */
5782 #if 0
5783 case HFmode:
5784 field_t = float_type_node;
5785 field_ptr_t = float_ptr_type_node;
5786 break;
5787 #endif
5788 case V2SImode:
5789 case V4SImode:
5791 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5792 field_t = build_vector_type_for_mode (innertype, ag_mode);
5793 field_ptr_t = build_pointer_type (field_t);
5795 break;
5796 default:
5797 gcc_assert (0);
5800 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5801 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5802 addr = t;
5803 t = fold_convert (field_ptr_t, addr);
5804 t = build2 (MODIFY_EXPR, field_t,
5805 build1 (INDIRECT_REF, field_t, tmp_ha),
5806 build1 (INDIRECT_REF, field_t, t));
5808 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5809 for (i = 1; i < nregs; ++i)
5811 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5812 u = fold_convert (field_ptr_t, addr);
5813 u = build2 (MODIFY_EXPR, field_t,
5814 build2 (MEM_REF, field_t, tmp_ha,
5815 build_int_cst (field_ptr_t,
5816 (i *
5817 int_size_in_bytes (field_t)))),
5818 build1 (INDIRECT_REF, field_t, u));
5819 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5822 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5823 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5826 COND_EXPR_ELSE (cond2) = t;
5827 addr = fold_convert (build_pointer_type (type), cond1);
5828 addr = build_va_arg_indirect_ref (addr);
5830 if (indirect_p)
5831 addr = build_va_arg_indirect_ref (addr);
5833 return addr;
5836 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5838 static void
5839 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5840 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5841 int no_rtl)
5843 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5844 CUMULATIVE_ARGS local_cum;
5845 int gr_saved, vr_saved;
5847 /* The caller has advanced CUM up to, but not beyond, the last named
5848 argument. Advance a local copy of CUM past the last "real" named
5849 argument, to find out how many registers are left over. */
5850 local_cum = *cum;
5851 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5853 /* Found out how many registers we need to save. */
5854 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5855 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5857 if (TARGET_GENERAL_REGS_ONLY)
5859 if (local_cum.aapcs_nvrn > 0)
5860 sorry ("%qs and floating point or vector arguments",
5861 "-mgeneral-regs-only");
5862 vr_saved = 0;
5865 if (!no_rtl)
5867 if (gr_saved > 0)
5869 rtx ptr, mem;
5871 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5872 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5873 - gr_saved * UNITS_PER_WORD);
5874 mem = gen_frame_mem (BLKmode, ptr);
5875 set_mem_alias_set (mem, get_varargs_alias_set ());
5877 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5878 mem, gr_saved);
5880 if (vr_saved > 0)
5882 /* We can't use move_block_from_reg, because it will use
5883 the wrong mode, storing D regs only. */
5884 enum machine_mode mode = TImode;
5885 int off, i;
5887 /* Set OFF to the offset from virtual_incoming_args_rtx of
5888 the first vector register. The VR save area lies below
5889 the GR one, and is aligned to 16 bytes. */
5890 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5891 STACK_BOUNDARY / BITS_PER_UNIT);
5892 off -= vr_saved * UNITS_PER_VREG;
5894 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5896 rtx ptr, mem;
5898 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5899 mem = gen_frame_mem (mode, ptr);
5900 set_mem_alias_set (mem, get_varargs_alias_set ());
5901 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5902 off += UNITS_PER_VREG;
5907 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5908 any complication of having crtl->args.pretend_args_size changed. */
5909 cfun->machine->saved_varargs_size
5910 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5911 STACK_BOUNDARY / BITS_PER_UNIT)
5912 + vr_saved * UNITS_PER_VREG);
5915 static void
5916 aarch64_conditional_register_usage (void)
5918 int i;
5919 if (!TARGET_FLOAT)
5921 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5923 fixed_regs[i] = 1;
5924 call_used_regs[i] = 1;
5929 /* Walk down the type tree of TYPE counting consecutive base elements.
5930 If *MODEP is VOIDmode, then set it to the first valid floating point
5931 type. If a non-floating point type is found, or if a floating point
5932 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5933 otherwise return the count in the sub-tree. */
5934 static int
5935 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5937 enum machine_mode mode;
5938 HOST_WIDE_INT size;
5940 switch (TREE_CODE (type))
5942 case REAL_TYPE:
5943 mode = TYPE_MODE (type);
5944 if (mode != DFmode && mode != SFmode && mode != TFmode)
5945 return -1;
5947 if (*modep == VOIDmode)
5948 *modep = mode;
5950 if (*modep == mode)
5951 return 1;
5953 break;
5955 case COMPLEX_TYPE:
5956 mode = TYPE_MODE (TREE_TYPE (type));
5957 if (mode != DFmode && mode != SFmode && mode != TFmode)
5958 return -1;
5960 if (*modep == VOIDmode)
5961 *modep = mode;
5963 if (*modep == mode)
5964 return 2;
5966 break;
5968 case VECTOR_TYPE:
5969 /* Use V2SImode and V4SImode as representatives of all 64-bit
5970 and 128-bit vector types. */
5971 size = int_size_in_bytes (type);
5972 switch (size)
5974 case 8:
5975 mode = V2SImode;
5976 break;
5977 case 16:
5978 mode = V4SImode;
5979 break;
5980 default:
5981 return -1;
5984 if (*modep == VOIDmode)
5985 *modep = mode;
5987 /* Vector modes are considered to be opaque: two vectors are
5988 equivalent for the purposes of being homogeneous aggregates
5989 if they are the same size. */
5990 if (*modep == mode)
5991 return 1;
5993 break;
5995 case ARRAY_TYPE:
5997 int count;
5998 tree index = TYPE_DOMAIN (type);
6000 /* Can't handle incomplete types. */
6001 if (!COMPLETE_TYPE_P (type))
6002 return -1;
6004 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6005 if (count == -1
6006 || !index
6007 || !TYPE_MAX_VALUE (index)
6008 || !host_integerp (TYPE_MAX_VALUE (index), 1)
6009 || !TYPE_MIN_VALUE (index)
6010 || !host_integerp (TYPE_MIN_VALUE (index), 1)
6011 || count < 0)
6012 return -1;
6014 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
6015 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
6017 /* There must be no padding. */
6018 if (!host_integerp (TYPE_SIZE (type), 1)
6019 || (tree_low_cst (TYPE_SIZE (type), 1)
6020 != count * GET_MODE_BITSIZE (*modep)))
6021 return -1;
6023 return count;
6026 case RECORD_TYPE:
6028 int count = 0;
6029 int sub_count;
6030 tree field;
6032 /* Can't handle incomplete types. */
6033 if (!COMPLETE_TYPE_P (type))
6034 return -1;
6036 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6038 if (TREE_CODE (field) != FIELD_DECL)
6039 continue;
6041 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6042 if (sub_count < 0)
6043 return -1;
6044 count += sub_count;
6047 /* There must be no padding. */
6048 if (!host_integerp (TYPE_SIZE (type), 1)
6049 || (tree_low_cst (TYPE_SIZE (type), 1)
6050 != count * GET_MODE_BITSIZE (*modep)))
6051 return -1;
6053 return count;
6056 case UNION_TYPE:
6057 case QUAL_UNION_TYPE:
6059 /* These aren't very interesting except in a degenerate case. */
6060 int count = 0;
6061 int sub_count;
6062 tree field;
6064 /* Can't handle incomplete types. */
6065 if (!COMPLETE_TYPE_P (type))
6066 return -1;
6068 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6070 if (TREE_CODE (field) != FIELD_DECL)
6071 continue;
6073 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6074 if (sub_count < 0)
6075 return -1;
6076 count = count > sub_count ? count : sub_count;
6079 /* There must be no padding. */
6080 if (!host_integerp (TYPE_SIZE (type), 1)
6081 || (tree_low_cst (TYPE_SIZE (type), 1)
6082 != count * GET_MODE_BITSIZE (*modep)))
6083 return -1;
6085 return count;
6088 default:
6089 break;
6092 return -1;
6095 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6096 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6097 array types. The C99 floating-point complex types are also considered
6098 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6099 types, which are GCC extensions and out of the scope of AAPCS64, are
6100 treated as composite types here as well.
6102 Note that MODE itself is not sufficient in determining whether a type
6103 is such a composite type or not. This is because
6104 stor-layout.c:compute_record_mode may have already changed the MODE
6105 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6106 structure with only one field may have its MODE set to the mode of the
6107 field. Also an integer mode whose size matches the size of the
6108 RECORD_TYPE type may be used to substitute the original mode
6109 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6110 solely relied on. */
6112 static bool
6113 aarch64_composite_type_p (const_tree type,
6114 enum machine_mode mode)
6116 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6117 return true;
6119 if (mode == BLKmode
6120 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6121 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6122 return true;
6124 return false;
6127 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6128 type as described in AAPCS64 \S 4.1.2.
6130 See the comment above aarch64_composite_type_p for the notes on MODE. */
6132 static bool
6133 aarch64_short_vector_p (const_tree type,
6134 enum machine_mode mode)
6136 HOST_WIDE_INT size = -1;
6138 if (type && TREE_CODE (type) == VECTOR_TYPE)
6139 size = int_size_in_bytes (type);
6140 else if (!aarch64_composite_type_p (type, mode)
6141 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6142 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6143 size = GET_MODE_SIZE (mode);
6145 return (size == 8 || size == 16) ? true : false;
6148 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6149 shall be passed or returned in simd/fp register(s) (providing these
6150 parameter passing registers are available).
6152 Upon successful return, *COUNT returns the number of needed registers,
6153 *BASE_MODE returns the mode of the individual register and when IS_HAF
6154 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6155 floating-point aggregate or a homogeneous short-vector aggregate. */
6157 static bool
6158 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6159 const_tree type,
6160 enum machine_mode *base_mode,
6161 int *count,
6162 bool *is_ha)
6164 enum machine_mode new_mode = VOIDmode;
6165 bool composite_p = aarch64_composite_type_p (type, mode);
6167 if (is_ha != NULL) *is_ha = false;
6169 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6170 || aarch64_short_vector_p (type, mode))
6172 *count = 1;
6173 new_mode = mode;
6175 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6177 if (is_ha != NULL) *is_ha = true;
6178 *count = 2;
6179 new_mode = GET_MODE_INNER (mode);
6181 else if (type && composite_p)
6183 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6185 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6187 if (is_ha != NULL) *is_ha = true;
6188 *count = ag_count;
6190 else
6191 return false;
6193 else
6194 return false;
6196 *base_mode = new_mode;
6197 return true;
6200 /* Implement TARGET_STRUCT_VALUE_RTX. */
6202 static rtx
6203 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6204 int incoming ATTRIBUTE_UNUSED)
6206 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6209 /* Implements target hook vector_mode_supported_p. */
6210 static bool
6211 aarch64_vector_mode_supported_p (enum machine_mode mode)
6213 if (TARGET_SIMD
6214 && (mode == V4SImode || mode == V8HImode
6215 || mode == V16QImode || mode == V2DImode
6216 || mode == V2SImode || mode == V4HImode
6217 || mode == V8QImode || mode == V2SFmode
6218 || mode == V4SFmode || mode == V2DFmode))
6219 return true;
6221 return false;
6224 /* Return appropriate SIMD container
6225 for MODE within a vector of WIDTH bits. */
6226 static enum machine_mode
6227 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6229 gcc_assert (width == 64 || width == 128);
6230 if (TARGET_SIMD)
6232 if (width == 128)
6233 switch (mode)
6235 case DFmode:
6236 return V2DFmode;
6237 case SFmode:
6238 return V4SFmode;
6239 case SImode:
6240 return V4SImode;
6241 case HImode:
6242 return V8HImode;
6243 case QImode:
6244 return V16QImode;
6245 case DImode:
6246 return V2DImode;
6247 default:
6248 break;
6250 else
6251 switch (mode)
6253 case SFmode:
6254 return V2SFmode;
6255 case SImode:
6256 return V2SImode;
6257 case HImode:
6258 return V4HImode;
6259 case QImode:
6260 return V8QImode;
6261 default:
6262 break;
6265 return word_mode;
6268 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6269 static enum machine_mode
6270 aarch64_preferred_simd_mode (enum machine_mode mode)
6272 return aarch64_simd_container_mode (mode, 128);
6275 /* Return the bitmask of possible vector sizes for the vectorizer
6276 to iterate over. */
6277 static unsigned int
6278 aarch64_autovectorize_vector_sizes (void)
6280 return (16 | 8);
6283 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6284 vector types in order to conform to the AAPCS64 (see "Procedure
6285 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6286 qualify for emission with the mangled names defined in that document,
6287 a vector type must not only be of the correct mode but also be
6288 composed of AdvSIMD vector element types (e.g.
6289 _builtin_aarch64_simd_qi); these types are registered by
6290 aarch64_init_simd_builtins (). In other words, vector types defined
6291 in other ways e.g. via vector_size attribute will get default
6292 mangled names. */
6293 typedef struct
6295 enum machine_mode mode;
6296 const char *element_type_name;
6297 const char *mangled_name;
6298 } aarch64_simd_mangle_map_entry;
6300 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6301 /* 64-bit containerized types. */
6302 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6303 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6304 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6305 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6306 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6307 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6308 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6309 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6310 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6311 /* 128-bit containerized types. */
6312 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6313 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6314 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6315 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6316 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6317 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6318 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6319 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6320 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6321 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6322 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6323 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6324 { VOIDmode, NULL, NULL }
6327 /* Implement TARGET_MANGLE_TYPE. */
6329 static const char *
6330 aarch64_mangle_type (const_tree type)
6332 /* The AArch64 ABI documents say that "__va_list" has to be
6333 managled as if it is in the "std" namespace. */
6334 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6335 return "St9__va_list";
6337 /* Check the mode of the vector type, and the name of the vector
6338 element type, against the table. */
6339 if (TREE_CODE (type) == VECTOR_TYPE)
6341 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6343 while (pos->mode != VOIDmode)
6345 tree elt_type = TREE_TYPE (type);
6347 if (pos->mode == TYPE_MODE (type)
6348 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6349 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6350 pos->element_type_name))
6351 return pos->mangled_name;
6353 pos++;
6357 /* Use the default mangling. */
6358 return NULL;
6361 /* Return the equivalent letter for size. */
6362 static char
6363 sizetochar (int size)
6365 switch (size)
6367 case 64: return 'd';
6368 case 32: return 's';
6369 case 16: return 'h';
6370 case 8 : return 'b';
6371 default: gcc_unreachable ();
6375 /* Return true iff x is a uniform vector of floating-point
6376 constants, and the constant can be represented in
6377 quarter-precision form. Note, as aarch64_float_const_representable
6378 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6379 static bool
6380 aarch64_vect_float_const_representable_p (rtx x)
6382 int i = 0;
6383 REAL_VALUE_TYPE r0, ri;
6384 rtx x0, xi;
6386 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6387 return false;
6389 x0 = CONST_VECTOR_ELT (x, 0);
6390 if (!CONST_DOUBLE_P (x0))
6391 return false;
6393 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6395 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6397 xi = CONST_VECTOR_ELT (x, i);
6398 if (!CONST_DOUBLE_P (xi))
6399 return false;
6401 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6402 if (!REAL_VALUES_EQUAL (r0, ri))
6403 return false;
6406 return aarch64_float_const_representable_p (x0);
6409 /* Return true for valid and false for invalid. */
6410 bool
6411 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6412 struct simd_immediate_info *info)
6414 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6415 matches = 1; \
6416 for (i = 0; i < idx; i += (STRIDE)) \
6417 if (!(TEST)) \
6418 matches = 0; \
6419 if (matches) \
6421 immtype = (CLASS); \
6422 elsize = (ELSIZE); \
6423 eshift = (SHIFT); \
6424 emvn = (NEG); \
6425 break; \
6428 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6429 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6430 unsigned char bytes[16];
6431 int immtype = -1, matches;
6432 unsigned int invmask = inverse ? 0xff : 0;
6433 int eshift, emvn;
6435 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6437 if (! (aarch64_simd_imm_zero_p (op, mode)
6438 || aarch64_vect_float_const_representable_p (op)))
6439 return false;
6441 if (info)
6443 info->value = CONST_VECTOR_ELT (op, 0);
6444 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6445 info->mvn = false;
6446 info->shift = 0;
6449 return true;
6452 /* Splat vector constant out into a byte vector. */
6453 for (i = 0; i < n_elts; i++)
6455 rtx el = CONST_VECTOR_ELT (op, i);
6456 unsigned HOST_WIDE_INT elpart;
6457 unsigned int part, parts;
6459 if (GET_CODE (el) == CONST_INT)
6461 elpart = INTVAL (el);
6462 parts = 1;
6464 else if (GET_CODE (el) == CONST_DOUBLE)
6466 elpart = CONST_DOUBLE_LOW (el);
6467 parts = 2;
6469 else
6470 gcc_unreachable ();
6472 for (part = 0; part < parts; part++)
6474 unsigned int byte;
6475 for (byte = 0; byte < innersize; byte++)
6477 bytes[idx++] = (elpart & 0xff) ^ invmask;
6478 elpart >>= BITS_PER_UNIT;
6480 if (GET_CODE (el) == CONST_DOUBLE)
6481 elpart = CONST_DOUBLE_HIGH (el);
6485 /* Sanity check. */
6486 gcc_assert (idx == GET_MODE_SIZE (mode));
6490 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6491 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6493 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6494 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6496 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6497 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6499 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6500 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6502 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6504 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6506 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6507 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6509 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6510 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6512 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6513 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6515 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6516 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6518 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6520 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6522 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6523 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6525 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6526 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6528 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6529 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6531 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6532 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6534 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6536 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6537 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6539 while (0);
6541 if (immtype == -1)
6542 return false;
6544 if (info)
6546 info->element_width = elsize;
6547 info->mvn = emvn != 0;
6548 info->shift = eshift;
6550 unsigned HOST_WIDE_INT imm = 0;
6552 if (immtype >= 12 && immtype <= 15)
6553 info->msl = true;
6555 /* Un-invert bytes of recognized vector, if necessary. */
6556 if (invmask != 0)
6557 for (i = 0; i < idx; i++)
6558 bytes[i] ^= invmask;
6560 if (immtype == 17)
6562 /* FIXME: Broken on 32-bit H_W_I hosts. */
6563 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6565 for (i = 0; i < 8; i++)
6566 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6567 << (i * BITS_PER_UNIT);
6570 info->value = GEN_INT (imm);
6572 else
6574 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6575 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6577 /* Construct 'abcdefgh' because the assembler cannot handle
6578 generic constants. */
6579 if (info->mvn)
6580 imm = ~imm;
6581 imm = (imm >> info->shift) & 0xff;
6582 info->value = GEN_INT (imm);
6586 return true;
6587 #undef CHECK
6590 static bool
6591 aarch64_const_vec_all_same_int_p (rtx x,
6592 HOST_WIDE_INT minval,
6593 HOST_WIDE_INT maxval)
6595 HOST_WIDE_INT firstval;
6596 int count, i;
6598 if (GET_CODE (x) != CONST_VECTOR
6599 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6600 return false;
6602 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6603 if (firstval < minval || firstval > maxval)
6604 return false;
6606 count = CONST_VECTOR_NUNITS (x);
6607 for (i = 1; i < count; i++)
6608 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6609 return false;
6611 return true;
6614 /* Check of immediate shift constants are within range. */
6615 bool
6616 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6618 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6619 if (left)
6620 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6621 else
6622 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6625 /* Return true if X is a uniform vector where all elements
6626 are either the floating-point constant 0.0 or the
6627 integer constant 0. */
6628 bool
6629 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6631 return x == CONST0_RTX (mode);
6634 bool
6635 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6637 HOST_WIDE_INT imm = INTVAL (x);
6638 int i;
6640 for (i = 0; i < 8; i++)
6642 unsigned int byte = imm & 0xff;
6643 if (byte != 0xff && byte != 0)
6644 return false;
6645 imm >>= 8;
6648 return true;
6651 bool
6652 aarch64_mov_operand_p (rtx x,
6653 enum aarch64_symbol_context context,
6654 enum machine_mode mode)
6656 if (GET_CODE (x) == HIGH
6657 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6658 return true;
6660 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6661 return true;
6663 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6664 return true;
6666 return aarch64_classify_symbolic_expression (x, context)
6667 == SYMBOL_TINY_ABSOLUTE;
6670 /* Return a const_int vector of VAL. */
6672 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6674 int nunits = GET_MODE_NUNITS (mode);
6675 rtvec v = rtvec_alloc (nunits);
6676 int i;
6678 for (i=0; i < nunits; i++)
6679 RTVEC_ELT (v, i) = GEN_INT (val);
6681 return gen_rtx_CONST_VECTOR (mode, v);
6684 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6686 bool
6687 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6689 enum machine_mode vmode;
6691 gcc_assert (!VECTOR_MODE_P (mode));
6692 vmode = aarch64_preferred_simd_mode (mode);
6693 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6694 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6697 /* Construct and return a PARALLEL RTX vector. */
6699 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6701 int nunits = GET_MODE_NUNITS (mode);
6702 rtvec v = rtvec_alloc (nunits / 2);
6703 int base = high ? nunits / 2 : 0;
6704 rtx t1;
6705 int i;
6707 for (i=0; i < nunits / 2; i++)
6708 RTVEC_ELT (v, i) = GEN_INT (base + i);
6710 t1 = gen_rtx_PARALLEL (mode, v);
6711 return t1;
6714 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6715 HIGH (exclusive). */
6716 void
6717 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6719 HOST_WIDE_INT lane;
6720 gcc_assert (GET_CODE (operand) == CONST_INT);
6721 lane = INTVAL (operand);
6723 if (lane < low || lane >= high)
6724 error ("lane out of range");
6727 void
6728 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6730 gcc_assert (GET_CODE (operand) == CONST_INT);
6731 HOST_WIDE_INT lane = INTVAL (operand);
6733 if (lane < low || lane >= high)
6734 error ("constant out of range");
6737 /* Emit code to reinterpret one AdvSIMD type as another,
6738 without altering bits. */
6739 void
6740 aarch64_simd_reinterpret (rtx dest, rtx src)
6742 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6745 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6746 registers). */
6747 void
6748 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6749 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6750 rtx op1)
6752 rtx mem = gen_rtx_MEM (mode, destaddr);
6753 rtx tmp1 = gen_reg_rtx (mode);
6754 rtx tmp2 = gen_reg_rtx (mode);
6756 emit_insn (intfn (tmp1, op1, tmp2));
6758 emit_move_insn (mem, tmp1);
6759 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6760 emit_move_insn (mem, tmp2);
6763 /* Return TRUE if OP is a valid vector addressing mode. */
6764 bool
6765 aarch64_simd_mem_operand_p (rtx op)
6767 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6768 || GET_CODE (XEXP (op, 0)) == REG);
6771 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6772 not to early-clobber SRC registers in the process.
6774 We assume that the operands described by SRC and DEST represent a
6775 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6776 number of components into which the copy has been decomposed. */
6777 void
6778 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6779 rtx *src, unsigned int count)
6781 unsigned int i;
6783 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6784 || REGNO (operands[0]) < REGNO (operands[1]))
6786 for (i = 0; i < count; i++)
6788 operands[2 * i] = dest[i];
6789 operands[2 * i + 1] = src[i];
6792 else
6794 for (i = 0; i < count; i++)
6796 operands[2 * i] = dest[count - i - 1];
6797 operands[2 * i + 1] = src[count - i - 1];
6802 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6803 one of VSTRUCT modes: OI, CI or XI. */
6805 aarch64_simd_attr_length_move (rtx insn)
6807 enum machine_mode mode;
6809 extract_insn_cached (insn);
6811 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6813 mode = GET_MODE (recog_data.operand[0]);
6814 switch (mode)
6816 case OImode:
6817 return 8;
6818 case CImode:
6819 return 12;
6820 case XImode:
6821 return 16;
6822 default:
6823 gcc_unreachable ();
6826 return 4;
6829 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6830 alignment of a vector to 128 bits. */
6831 static HOST_WIDE_INT
6832 aarch64_simd_vector_alignment (const_tree type)
6834 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6835 return MIN (align, 128);
6838 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6839 static bool
6840 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6842 if (is_packed)
6843 return false;
6845 /* We guarantee alignment for vectors up to 128-bits. */
6846 if (tree_int_cst_compare (TYPE_SIZE (type),
6847 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6848 return false;
6850 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6851 return true;
6854 /* If VALS is a vector constant that can be loaded into a register
6855 using DUP, generate instructions to do so and return an RTX to
6856 assign to the register. Otherwise return NULL_RTX. */
6857 static rtx
6858 aarch64_simd_dup_constant (rtx vals)
6860 enum machine_mode mode = GET_MODE (vals);
6861 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6862 int n_elts = GET_MODE_NUNITS (mode);
6863 bool all_same = true;
6864 rtx x;
6865 int i;
6867 if (GET_CODE (vals) != CONST_VECTOR)
6868 return NULL_RTX;
6870 for (i = 1; i < n_elts; ++i)
6872 x = CONST_VECTOR_ELT (vals, i);
6873 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6874 all_same = false;
6877 if (!all_same)
6878 return NULL_RTX;
6880 /* We can load this constant by using DUP and a constant in a
6881 single ARM register. This will be cheaper than a vector
6882 load. */
6883 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6884 return gen_rtx_VEC_DUPLICATE (mode, x);
6888 /* Generate code to load VALS, which is a PARALLEL containing only
6889 constants (for vec_init) or CONST_VECTOR, efficiently into a
6890 register. Returns an RTX to copy into the register, or NULL_RTX
6891 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6892 static rtx
6893 aarch64_simd_make_constant (rtx vals)
6895 enum machine_mode mode = GET_MODE (vals);
6896 rtx const_dup;
6897 rtx const_vec = NULL_RTX;
6898 int n_elts = GET_MODE_NUNITS (mode);
6899 int n_const = 0;
6900 int i;
6902 if (GET_CODE (vals) == CONST_VECTOR)
6903 const_vec = vals;
6904 else if (GET_CODE (vals) == PARALLEL)
6906 /* A CONST_VECTOR must contain only CONST_INTs and
6907 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6908 Only store valid constants in a CONST_VECTOR. */
6909 for (i = 0; i < n_elts; ++i)
6911 rtx x = XVECEXP (vals, 0, i);
6912 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6913 n_const++;
6915 if (n_const == n_elts)
6916 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6918 else
6919 gcc_unreachable ();
6921 if (const_vec != NULL_RTX
6922 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6923 /* Load using MOVI/MVNI. */
6924 return const_vec;
6925 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6926 /* Loaded using DUP. */
6927 return const_dup;
6928 else if (const_vec != NULL_RTX)
6929 /* Load from constant pool. We can not take advantage of single-cycle
6930 LD1 because we need a PC-relative addressing mode. */
6931 return const_vec;
6932 else
6933 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6934 We can not construct an initializer. */
6935 return NULL_RTX;
6938 void
6939 aarch64_expand_vector_init (rtx target, rtx vals)
6941 enum machine_mode mode = GET_MODE (target);
6942 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6943 int n_elts = GET_MODE_NUNITS (mode);
6944 int n_var = 0, one_var = -1;
6945 bool all_same = true;
6946 rtx x, mem;
6947 int i;
6949 x = XVECEXP (vals, 0, 0);
6950 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6951 n_var = 1, one_var = 0;
6953 for (i = 1; i < n_elts; ++i)
6955 x = XVECEXP (vals, 0, i);
6956 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6957 ++n_var, one_var = i;
6959 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6960 all_same = false;
6963 if (n_var == 0)
6965 rtx constant = aarch64_simd_make_constant (vals);
6966 if (constant != NULL_RTX)
6968 emit_move_insn (target, constant);
6969 return;
6973 /* Splat a single non-constant element if we can. */
6974 if (all_same)
6976 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6977 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6978 return;
6981 /* One field is non-constant. Load constant then overwrite varying
6982 field. This is more efficient than using the stack. */
6983 if (n_var == 1)
6985 rtx copy = copy_rtx (vals);
6986 rtx index = GEN_INT (one_var);
6987 enum insn_code icode;
6989 /* Load constant part of vector, substitute neighboring value for
6990 varying element. */
6991 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6992 aarch64_expand_vector_init (target, copy);
6994 /* Insert variable. */
6995 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6996 icode = optab_handler (vec_set_optab, mode);
6997 gcc_assert (icode != CODE_FOR_nothing);
6998 emit_insn (GEN_FCN (icode) (target, x, index));
6999 return;
7002 /* Construct the vector in memory one field at a time
7003 and load the whole vector. */
7004 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7005 for (i = 0; i < n_elts; i++)
7006 emit_move_insn (adjust_address_nv (mem, inner_mode,
7007 i * GET_MODE_SIZE (inner_mode)),
7008 XVECEXP (vals, 0, i));
7009 emit_move_insn (target, mem);
7013 static unsigned HOST_WIDE_INT
7014 aarch64_shift_truncation_mask (enum machine_mode mode)
7016 return
7017 (aarch64_vector_mode_supported_p (mode)
7018 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7021 #ifndef TLS_SECTION_ASM_FLAG
7022 #define TLS_SECTION_ASM_FLAG 'T'
7023 #endif
7025 void
7026 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7027 tree decl ATTRIBUTE_UNUSED)
7029 char flagchars[10], *f = flagchars;
7031 /* If we have already declared this section, we can use an
7032 abbreviated form to switch back to it -- unless this section is
7033 part of a COMDAT groups, in which case GAS requires the full
7034 declaration every time. */
7035 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7036 && (flags & SECTION_DECLARED))
7038 fprintf (asm_out_file, "\t.section\t%s\n", name);
7039 return;
7042 if (!(flags & SECTION_DEBUG))
7043 *f++ = 'a';
7044 if (flags & SECTION_WRITE)
7045 *f++ = 'w';
7046 if (flags & SECTION_CODE)
7047 *f++ = 'x';
7048 if (flags & SECTION_SMALL)
7049 *f++ = 's';
7050 if (flags & SECTION_MERGE)
7051 *f++ = 'M';
7052 if (flags & SECTION_STRINGS)
7053 *f++ = 'S';
7054 if (flags & SECTION_TLS)
7055 *f++ = TLS_SECTION_ASM_FLAG;
7056 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7057 *f++ = 'G';
7058 *f = '\0';
7060 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7062 if (!(flags & SECTION_NOTYPE))
7064 const char *type;
7065 const char *format;
7067 if (flags & SECTION_BSS)
7068 type = "nobits";
7069 else
7070 type = "progbits";
7072 #ifdef TYPE_OPERAND_FMT
7073 format = "," TYPE_OPERAND_FMT;
7074 #else
7075 format = ",@%s";
7076 #endif
7078 fprintf (asm_out_file, format, type);
7080 if (flags & SECTION_ENTSIZE)
7081 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7082 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7084 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7085 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7086 else
7087 fprintf (asm_out_file, ",%s,comdat",
7088 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7092 putc ('\n', asm_out_file);
7095 /* Select a format to encode pointers in exception handling data. */
7097 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7099 int type;
7100 switch (aarch64_cmodel)
7102 case AARCH64_CMODEL_TINY:
7103 case AARCH64_CMODEL_TINY_PIC:
7104 case AARCH64_CMODEL_SMALL:
7105 case AARCH64_CMODEL_SMALL_PIC:
7106 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7107 for everything. */
7108 type = DW_EH_PE_sdata4;
7109 break;
7110 default:
7111 /* No assumptions here. 8-byte relocs required. */
7112 type = DW_EH_PE_sdata8;
7113 break;
7115 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7118 /* Emit load exclusive. */
7120 static void
7121 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7122 rtx mem, rtx model_rtx)
7124 rtx (*gen) (rtx, rtx, rtx);
7126 switch (mode)
7128 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7129 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7130 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7131 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7132 default:
7133 gcc_unreachable ();
7136 emit_insn (gen (rval, mem, model_rtx));
7139 /* Emit store exclusive. */
7141 static void
7142 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7143 rtx rval, rtx mem, rtx model_rtx)
7145 rtx (*gen) (rtx, rtx, rtx, rtx);
7147 switch (mode)
7149 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7150 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7151 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7152 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7153 default:
7154 gcc_unreachable ();
7157 emit_insn (gen (bval, rval, mem, model_rtx));
7160 /* Mark the previous jump instruction as unlikely. */
7162 static void
7163 aarch64_emit_unlikely_jump (rtx insn)
7165 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
7167 insn = emit_jump_insn (insn);
7168 add_reg_note (insn, REG_BR_PROB, very_unlikely);
7171 /* Expand a compare and swap pattern. */
7173 void
7174 aarch64_expand_compare_and_swap (rtx operands[])
7176 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7177 enum machine_mode mode, cmp_mode;
7178 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7180 bval = operands[0];
7181 rval = operands[1];
7182 mem = operands[2];
7183 oldval = operands[3];
7184 newval = operands[4];
7185 is_weak = operands[5];
7186 mod_s = operands[6];
7187 mod_f = operands[7];
7188 mode = GET_MODE (mem);
7189 cmp_mode = mode;
7191 /* Normally the succ memory model must be stronger than fail, but in the
7192 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7193 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7195 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7196 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7197 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7199 switch (mode)
7201 case QImode:
7202 case HImode:
7203 /* For short modes, we're going to perform the comparison in SImode,
7204 so do the zero-extension now. */
7205 cmp_mode = SImode;
7206 rval = gen_reg_rtx (SImode);
7207 oldval = convert_modes (SImode, mode, oldval, true);
7208 /* Fall through. */
7210 case SImode:
7211 case DImode:
7212 /* Force the value into a register if needed. */
7213 if (!aarch64_plus_operand (oldval, mode))
7214 oldval = force_reg (cmp_mode, oldval);
7215 break;
7217 default:
7218 gcc_unreachable ();
7221 switch (mode)
7223 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7224 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7225 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7226 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7227 default:
7228 gcc_unreachable ();
7231 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7233 if (mode == QImode || mode == HImode)
7234 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7236 x = gen_rtx_REG (CCmode, CC_REGNUM);
7237 x = gen_rtx_EQ (SImode, x, const0_rtx);
7238 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7241 /* Split a compare and swap pattern. */
7243 void
7244 aarch64_split_compare_and_swap (rtx operands[])
7246 rtx rval, mem, oldval, newval, scratch;
7247 enum machine_mode mode;
7248 bool is_weak;
7249 rtx label1, label2, x, cond;
7251 rval = operands[0];
7252 mem = operands[1];
7253 oldval = operands[2];
7254 newval = operands[3];
7255 is_weak = (operands[4] != const0_rtx);
7256 scratch = operands[7];
7257 mode = GET_MODE (mem);
7259 label1 = NULL_RTX;
7260 if (!is_weak)
7262 label1 = gen_label_rtx ();
7263 emit_label (label1);
7265 label2 = gen_label_rtx ();
7267 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7269 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7270 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7271 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7272 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7273 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7275 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7277 if (!is_weak)
7279 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7280 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7281 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7282 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7284 else
7286 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7287 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7288 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7291 emit_label (label2);
7294 /* Split an atomic operation. */
7296 void
7297 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7298 rtx value, rtx model_rtx, rtx cond)
7300 enum machine_mode mode = GET_MODE (mem);
7301 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7302 rtx label, x;
7304 label = gen_label_rtx ();
7305 emit_label (label);
7307 if (new_out)
7308 new_out = gen_lowpart (wmode, new_out);
7309 if (old_out)
7310 old_out = gen_lowpart (wmode, old_out);
7311 else
7312 old_out = new_out;
7313 value = simplify_gen_subreg (wmode, value, mode, 0);
7315 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7317 switch (code)
7319 case SET:
7320 new_out = value;
7321 break;
7323 case NOT:
7324 x = gen_rtx_AND (wmode, old_out, value);
7325 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7326 x = gen_rtx_NOT (wmode, new_out);
7327 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7328 break;
7330 case MINUS:
7331 if (CONST_INT_P (value))
7333 value = GEN_INT (-INTVAL (value));
7334 code = PLUS;
7336 /* Fall through. */
7338 default:
7339 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7340 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7341 break;
7344 aarch64_emit_store_exclusive (mode, cond, mem,
7345 gen_lowpart (mode, new_out), model_rtx);
7347 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7348 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7349 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7350 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7353 static void
7354 aarch64_print_extension (void)
7356 const struct aarch64_option_extension *opt = NULL;
7358 for (opt = all_extensions; opt->name != NULL; opt++)
7359 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7360 asm_fprintf (asm_out_file, "+%s", opt->name);
7362 asm_fprintf (asm_out_file, "\n");
7365 static void
7366 aarch64_start_file (void)
7368 if (selected_arch)
7370 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7371 aarch64_print_extension ();
7373 else if (selected_cpu)
7375 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7376 aarch64_print_extension ();
7378 default_file_start();
7381 /* Target hook for c_mode_for_suffix. */
7382 static enum machine_mode
7383 aarch64_c_mode_for_suffix (char suffix)
7385 if (suffix == 'q')
7386 return TFmode;
7388 return VOIDmode;
7391 /* We can only represent floating point constants which will fit in
7392 "quarter-precision" values. These values are characterised by
7393 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7396 (-1)^s * (n/16) * 2^r
7398 Where:
7399 's' is the sign bit.
7400 'n' is an integer in the range 16 <= n <= 31.
7401 'r' is an integer in the range -3 <= r <= 4. */
7403 /* Return true iff X can be represented by a quarter-precision
7404 floating point immediate operand X. Note, we cannot represent 0.0. */
7405 bool
7406 aarch64_float_const_representable_p (rtx x)
7408 /* This represents our current view of how many bits
7409 make up the mantissa. */
7410 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7411 int exponent;
7412 unsigned HOST_WIDE_INT mantissa, mask;
7413 HOST_WIDE_INT m1, m2;
7414 REAL_VALUE_TYPE r, m;
7416 if (!CONST_DOUBLE_P (x))
7417 return false;
7419 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7421 /* We cannot represent infinities, NaNs or +/-zero. We won't
7422 know if we have +zero until we analyse the mantissa, but we
7423 can reject the other invalid values. */
7424 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7425 || REAL_VALUE_MINUS_ZERO (r))
7426 return false;
7428 /* Extract exponent. */
7429 r = real_value_abs (&r);
7430 exponent = REAL_EXP (&r);
7432 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7433 highest (sign) bit, with a fixed binary point at bit point_pos.
7434 m1 holds the low part of the mantissa, m2 the high part.
7435 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7436 bits for the mantissa, this can fail (low bits will be lost). */
7437 real_ldexp (&m, &r, point_pos - exponent);
7438 REAL_VALUE_TO_INT (&m1, &m2, m);
7440 /* If the low part of the mantissa has bits set we cannot represent
7441 the value. */
7442 if (m1 != 0)
7443 return false;
7444 /* We have rejected the lower HOST_WIDE_INT, so update our
7445 understanding of how many bits lie in the mantissa and
7446 look only at the high HOST_WIDE_INT. */
7447 mantissa = m2;
7448 point_pos -= HOST_BITS_PER_WIDE_INT;
7450 /* We can only represent values with a mantissa of the form 1.xxxx. */
7451 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7452 if ((mantissa & mask) != 0)
7453 return false;
7455 /* Having filtered unrepresentable values, we may now remove all
7456 but the highest 5 bits. */
7457 mantissa >>= point_pos - 5;
7459 /* We cannot represent the value 0.0, so reject it. This is handled
7460 elsewhere. */
7461 if (mantissa == 0)
7462 return false;
7464 /* Then, as bit 4 is always set, we can mask it off, leaving
7465 the mantissa in the range [0, 15]. */
7466 mantissa &= ~(1 << 4);
7467 gcc_assert (mantissa <= 15);
7469 /* GCC internally does not use IEEE754-like encoding (where normalized
7470 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7471 Our mantissa values are shifted 4 places to the left relative to
7472 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7473 by 5 places to correct for GCC's representation. */
7474 exponent = 5 - exponent;
7476 return (exponent >= 0 && exponent <= 7);
7479 char*
7480 aarch64_output_simd_mov_immediate (rtx const_vector,
7481 enum machine_mode mode,
7482 unsigned width)
7484 bool is_valid;
7485 static char templ[40];
7486 const char *mnemonic;
7487 const char *shift_op;
7488 unsigned int lane_count = 0;
7489 char element_char;
7491 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7493 /* This will return true to show const_vector is legal for use as either
7494 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7495 also update INFO to show how the immediate should be generated. */
7496 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7497 gcc_assert (is_valid);
7499 element_char = sizetochar (info.element_width);
7500 lane_count = width / info.element_width;
7502 mode = GET_MODE_INNER (mode);
7503 if (mode == SFmode || mode == DFmode)
7505 gcc_assert (info.shift == 0 && ! info.mvn);
7506 if (aarch64_float_const_zero_rtx_p (info.value))
7507 info.value = GEN_INT (0);
7508 else
7510 #define buf_size 20
7511 REAL_VALUE_TYPE r;
7512 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7513 char float_buf[buf_size] = {'\0'};
7514 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7515 #undef buf_size
7517 if (lane_count == 1)
7518 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7519 else
7520 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7521 lane_count, element_char, float_buf);
7522 return templ;
7526 mnemonic = info.mvn ? "mvni" : "movi";
7527 shift_op = info.msl ? "msl" : "lsl";
7529 if (lane_count == 1)
7530 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7531 mnemonic, UINTVAL (info.value));
7532 else if (info.shift)
7533 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7534 ", %s %d", mnemonic, lane_count, element_char,
7535 UINTVAL (info.value), shift_op, info.shift);
7536 else
7537 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7538 mnemonic, lane_count, element_char, UINTVAL (info.value));
7539 return templ;
7542 char*
7543 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7544 enum machine_mode mode)
7546 enum machine_mode vmode;
7548 gcc_assert (!VECTOR_MODE_P (mode));
7549 vmode = aarch64_simd_container_mode (mode, 64);
7550 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7551 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7554 /* Split operands into moves from op[1] + op[2] into op[0]. */
7556 void
7557 aarch64_split_combinev16qi (rtx operands[3])
7559 unsigned int dest = REGNO (operands[0]);
7560 unsigned int src1 = REGNO (operands[1]);
7561 unsigned int src2 = REGNO (operands[2]);
7562 enum machine_mode halfmode = GET_MODE (operands[1]);
7563 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7564 rtx destlo, desthi;
7566 gcc_assert (halfmode == V16QImode);
7568 if (src1 == dest && src2 == dest + halfregs)
7570 /* No-op move. Can't split to nothing; emit something. */
7571 emit_note (NOTE_INSN_DELETED);
7572 return;
7575 /* Preserve register attributes for variable tracking. */
7576 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7577 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7578 GET_MODE_SIZE (halfmode));
7580 /* Special case of reversed high/low parts. */
7581 if (reg_overlap_mentioned_p (operands[2], destlo)
7582 && reg_overlap_mentioned_p (operands[1], desthi))
7584 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7585 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7586 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7588 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7590 /* Try to avoid unnecessary moves if part of the result
7591 is in the right place already. */
7592 if (src1 != dest)
7593 emit_move_insn (destlo, operands[1]);
7594 if (src2 != dest + halfregs)
7595 emit_move_insn (desthi, operands[2]);
7597 else
7599 if (src2 != dest + halfregs)
7600 emit_move_insn (desthi, operands[2]);
7601 if (src1 != dest)
7602 emit_move_insn (destlo, operands[1]);
7606 /* vec_perm support. */
7608 #define MAX_VECT_LEN 16
7610 struct expand_vec_perm_d
7612 rtx target, op0, op1;
7613 unsigned char perm[MAX_VECT_LEN];
7614 enum machine_mode vmode;
7615 unsigned char nelt;
7616 bool one_vector_p;
7617 bool testing_p;
7620 /* Generate a variable permutation. */
7622 static void
7623 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7625 enum machine_mode vmode = GET_MODE (target);
7626 bool one_vector_p = rtx_equal_p (op0, op1);
7628 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7629 gcc_checking_assert (GET_MODE (op0) == vmode);
7630 gcc_checking_assert (GET_MODE (op1) == vmode);
7631 gcc_checking_assert (GET_MODE (sel) == vmode);
7632 gcc_checking_assert (TARGET_SIMD);
7634 if (one_vector_p)
7636 if (vmode == V8QImode)
7638 /* Expand the argument to a V16QI mode by duplicating it. */
7639 rtx pair = gen_reg_rtx (V16QImode);
7640 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7641 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7643 else
7645 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7648 else
7650 rtx pair;
7652 if (vmode == V8QImode)
7654 pair = gen_reg_rtx (V16QImode);
7655 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7656 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7658 else
7660 pair = gen_reg_rtx (OImode);
7661 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7662 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7667 void
7668 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7670 enum machine_mode vmode = GET_MODE (target);
7671 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7672 bool one_vector_p = rtx_equal_p (op0, op1);
7673 rtx rmask[MAX_VECT_LEN], mask;
7675 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7677 /* The TBL instruction does not use a modulo index, so we must take care
7678 of that ourselves. */
7679 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7680 for (i = 0; i < nelt; ++i)
7681 rmask[i] = mask;
7682 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7683 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7685 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7688 /* Recognize patterns suitable for the TRN instructions. */
7689 static bool
7690 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7692 unsigned int i, odd, mask, nelt = d->nelt;
7693 rtx out, in0, in1, x;
7694 rtx (*gen) (rtx, rtx, rtx);
7695 enum machine_mode vmode = d->vmode;
7697 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7698 return false;
7700 /* Note that these are little-endian tests.
7701 We correct for big-endian later. */
7702 if (d->perm[0] == 0)
7703 odd = 0;
7704 else if (d->perm[0] == 1)
7705 odd = 1;
7706 else
7707 return false;
7708 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7710 for (i = 0; i < nelt; i += 2)
7712 if (d->perm[i] != i + odd)
7713 return false;
7714 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7715 return false;
7718 /* Success! */
7719 if (d->testing_p)
7720 return true;
7722 in0 = d->op0;
7723 in1 = d->op1;
7724 if (BYTES_BIG_ENDIAN)
7726 x = in0, in0 = in1, in1 = x;
7727 odd = !odd;
7729 out = d->target;
7731 if (odd)
7733 switch (vmode)
7735 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7736 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7737 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7738 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7739 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7740 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7741 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7742 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7743 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7744 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7745 default:
7746 return false;
7749 else
7751 switch (vmode)
7753 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7754 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7755 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7756 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7757 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7758 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7759 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7760 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7761 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7762 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7763 default:
7764 return false;
7768 emit_insn (gen (out, in0, in1));
7769 return true;
7772 /* Recognize patterns suitable for the UZP instructions. */
7773 static bool
7774 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7776 unsigned int i, odd, mask, nelt = d->nelt;
7777 rtx out, in0, in1, x;
7778 rtx (*gen) (rtx, rtx, rtx);
7779 enum machine_mode vmode = d->vmode;
7781 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7782 return false;
7784 /* Note that these are little-endian tests.
7785 We correct for big-endian later. */
7786 if (d->perm[0] == 0)
7787 odd = 0;
7788 else if (d->perm[0] == 1)
7789 odd = 1;
7790 else
7791 return false;
7792 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7794 for (i = 0; i < nelt; i++)
7796 unsigned elt = (i * 2 + odd) & mask;
7797 if (d->perm[i] != elt)
7798 return false;
7801 /* Success! */
7802 if (d->testing_p)
7803 return true;
7805 in0 = d->op0;
7806 in1 = d->op1;
7807 if (BYTES_BIG_ENDIAN)
7809 x = in0, in0 = in1, in1 = x;
7810 odd = !odd;
7812 out = d->target;
7814 if (odd)
7816 switch (vmode)
7818 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7819 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7820 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7821 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7822 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7823 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7824 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7825 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7826 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7827 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7828 default:
7829 return false;
7832 else
7834 switch (vmode)
7836 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7837 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7838 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7839 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7840 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7841 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7842 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7843 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7844 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7845 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7846 default:
7847 return false;
7851 emit_insn (gen (out, in0, in1));
7852 return true;
7855 /* Recognize patterns suitable for the ZIP instructions. */
7856 static bool
7857 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7859 unsigned int i, high, mask, nelt = d->nelt;
7860 rtx out, in0, in1, x;
7861 rtx (*gen) (rtx, rtx, rtx);
7862 enum machine_mode vmode = d->vmode;
7864 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7865 return false;
7867 /* Note that these are little-endian tests.
7868 We correct for big-endian later. */
7869 high = nelt / 2;
7870 if (d->perm[0] == high)
7871 /* Do Nothing. */
7873 else if (d->perm[0] == 0)
7874 high = 0;
7875 else
7876 return false;
7877 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7879 for (i = 0; i < nelt / 2; i++)
7881 unsigned elt = (i + high) & mask;
7882 if (d->perm[i * 2] != elt)
7883 return false;
7884 elt = (elt + nelt) & mask;
7885 if (d->perm[i * 2 + 1] != elt)
7886 return false;
7889 /* Success! */
7890 if (d->testing_p)
7891 return true;
7893 in0 = d->op0;
7894 in1 = d->op1;
7895 if (BYTES_BIG_ENDIAN)
7897 x = in0, in0 = in1, in1 = x;
7898 high = !high;
7900 out = d->target;
7902 if (high)
7904 switch (vmode)
7906 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7907 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7908 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7909 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7910 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7911 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7912 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7913 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7914 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7915 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7916 default:
7917 return false;
7920 else
7922 switch (vmode)
7924 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7925 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7926 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7927 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7928 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7929 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7930 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7931 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7932 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7933 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7934 default:
7935 return false;
7939 emit_insn (gen (out, in0, in1));
7940 return true;
7943 static bool
7944 aarch64_evpc_dup (struct expand_vec_perm_d *d)
7946 rtx (*gen) (rtx, rtx, rtx);
7947 rtx out = d->target;
7948 rtx in0;
7949 enum machine_mode vmode = d->vmode;
7950 unsigned int i, elt, nelt = d->nelt;
7951 rtx lane;
7953 /* TODO: This may not be big-endian safe. */
7954 if (BYTES_BIG_ENDIAN)
7955 return false;
7957 elt = d->perm[0];
7958 for (i = 1; i < nelt; i++)
7960 if (elt != d->perm[i])
7961 return false;
7964 /* The generic preparation in aarch64_expand_vec_perm_const_1
7965 swaps the operand order and the permute indices if it finds
7966 d->perm[0] to be in the second operand. Thus, we can always
7967 use d->op0 and need not do any extra arithmetic to get the
7968 correct lane number. */
7969 in0 = d->op0;
7970 lane = GEN_INT (elt);
7972 switch (vmode)
7974 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
7975 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
7976 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
7977 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
7978 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
7979 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
7980 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
7981 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
7982 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
7983 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
7984 default:
7985 return false;
7988 emit_insn (gen (out, in0, lane));
7989 return true;
7992 static bool
7993 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7995 rtx rperm[MAX_VECT_LEN], sel;
7996 enum machine_mode vmode = d->vmode;
7997 unsigned int i, nelt = d->nelt;
7999 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8000 numbering of elements for big-endian, we must reverse the order. */
8001 if (BYTES_BIG_ENDIAN)
8002 return false;
8004 if (d->testing_p)
8005 return true;
8007 /* Generic code will try constant permutation twice. Once with the
8008 original mode and again with the elements lowered to QImode.
8009 So wait and don't do the selector expansion ourselves. */
8010 if (vmode != V8QImode && vmode != V16QImode)
8011 return false;
8013 for (i = 0; i < nelt; ++i)
8014 rperm[i] = GEN_INT (d->perm[i]);
8015 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8016 sel = force_reg (vmode, sel);
8018 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8019 return true;
8022 static bool
8023 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8025 /* The pattern matching functions above are written to look for a small
8026 number to begin the sequence (0, 1, N/2). If we begin with an index
8027 from the second operand, we can swap the operands. */
8028 if (d->perm[0] >= d->nelt)
8030 unsigned i, nelt = d->nelt;
8031 rtx x;
8033 for (i = 0; i < nelt; ++i)
8034 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8036 x = d->op0;
8037 d->op0 = d->op1;
8038 d->op1 = x;
8041 if (TARGET_SIMD)
8043 if (aarch64_evpc_zip (d))
8044 return true;
8045 else if (aarch64_evpc_uzp (d))
8046 return true;
8047 else if (aarch64_evpc_trn (d))
8048 return true;
8049 else if (aarch64_evpc_dup (d))
8050 return true;
8051 return aarch64_evpc_tbl (d);
8053 return false;
8056 /* Expand a vec_perm_const pattern. */
8058 bool
8059 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8061 struct expand_vec_perm_d d;
8062 int i, nelt, which;
8064 d.target = target;
8065 d.op0 = op0;
8066 d.op1 = op1;
8068 d.vmode = GET_MODE (target);
8069 gcc_assert (VECTOR_MODE_P (d.vmode));
8070 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8071 d.testing_p = false;
8073 for (i = which = 0; i < nelt; ++i)
8075 rtx e = XVECEXP (sel, 0, i);
8076 int ei = INTVAL (e) & (2 * nelt - 1);
8077 which |= (ei < nelt ? 1 : 2);
8078 d.perm[i] = ei;
8081 switch (which)
8083 default:
8084 gcc_unreachable ();
8086 case 3:
8087 d.one_vector_p = false;
8088 if (!rtx_equal_p (op0, op1))
8089 break;
8091 /* The elements of PERM do not suggest that only the first operand
8092 is used, but both operands are identical. Allow easier matching
8093 of the permutation by folding the permutation into the single
8094 input vector. */
8095 /* Fall Through. */
8096 case 2:
8097 for (i = 0; i < nelt; ++i)
8098 d.perm[i] &= nelt - 1;
8099 d.op0 = op1;
8100 d.one_vector_p = true;
8101 break;
8103 case 1:
8104 d.op1 = op0;
8105 d.one_vector_p = true;
8106 break;
8109 return aarch64_expand_vec_perm_const_1 (&d);
8112 static bool
8113 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8114 const unsigned char *sel)
8116 struct expand_vec_perm_d d;
8117 unsigned int i, nelt, which;
8118 bool ret;
8120 d.vmode = vmode;
8121 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8122 d.testing_p = true;
8123 memcpy (d.perm, sel, nelt);
8125 /* Calculate whether all elements are in one vector. */
8126 for (i = which = 0; i < nelt; ++i)
8128 unsigned char e = d.perm[i];
8129 gcc_assert (e < 2 * nelt);
8130 which |= (e < nelt ? 1 : 2);
8133 /* If all elements are from the second vector, reindex as if from the
8134 first vector. */
8135 if (which == 2)
8136 for (i = 0; i < nelt; ++i)
8137 d.perm[i] -= nelt;
8139 /* Check whether the mask can be applied to a single vector. */
8140 d.one_vector_p = (which != 3);
8142 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8143 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8144 if (!d.one_vector_p)
8145 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8147 start_sequence ();
8148 ret = aarch64_expand_vec_perm_const_1 (&d);
8149 end_sequence ();
8151 return ret;
8154 #undef TARGET_ADDRESS_COST
8155 #define TARGET_ADDRESS_COST aarch64_address_cost
8157 /* This hook will determines whether unnamed bitfields affect the alignment
8158 of the containing structure. The hook returns true if the structure
8159 should inherit the alignment requirements of an unnamed bitfield's
8160 type. */
8161 #undef TARGET_ALIGN_ANON_BITFIELD
8162 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8164 #undef TARGET_ASM_ALIGNED_DI_OP
8165 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8167 #undef TARGET_ASM_ALIGNED_HI_OP
8168 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8170 #undef TARGET_ASM_ALIGNED_SI_OP
8171 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8173 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8174 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8175 hook_bool_const_tree_hwi_hwi_const_tree_true
8177 #undef TARGET_ASM_FILE_START
8178 #define TARGET_ASM_FILE_START aarch64_start_file
8180 #undef TARGET_ASM_OUTPUT_MI_THUNK
8181 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8183 #undef TARGET_ASM_SELECT_RTX_SECTION
8184 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8186 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8187 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8189 #undef TARGET_BUILD_BUILTIN_VA_LIST
8190 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8192 #undef TARGET_CALLEE_COPIES
8193 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8195 #undef TARGET_CAN_ELIMINATE
8196 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8198 #undef TARGET_CANNOT_FORCE_CONST_MEM
8199 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8201 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8202 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8204 /* Only the least significant bit is used for initialization guard
8205 variables. */
8206 #undef TARGET_CXX_GUARD_MASK_BIT
8207 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8209 #undef TARGET_C_MODE_FOR_SUFFIX
8210 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8212 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8213 #undef TARGET_DEFAULT_TARGET_FLAGS
8214 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8215 #endif
8217 #undef TARGET_CLASS_MAX_NREGS
8218 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8220 #undef TARGET_BUILTIN_DECL
8221 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8223 #undef TARGET_EXPAND_BUILTIN
8224 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8226 #undef TARGET_EXPAND_BUILTIN_VA_START
8227 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8229 #undef TARGET_FOLD_BUILTIN
8230 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8232 #undef TARGET_FUNCTION_ARG
8233 #define TARGET_FUNCTION_ARG aarch64_function_arg
8235 #undef TARGET_FUNCTION_ARG_ADVANCE
8236 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8238 #undef TARGET_FUNCTION_ARG_BOUNDARY
8239 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8241 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8242 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8244 #undef TARGET_FUNCTION_VALUE
8245 #define TARGET_FUNCTION_VALUE aarch64_function_value
8247 #undef TARGET_FUNCTION_VALUE_REGNO_P
8248 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8250 #undef TARGET_FRAME_POINTER_REQUIRED
8251 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8253 #undef TARGET_GIMPLE_FOLD_BUILTIN
8254 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8256 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8257 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8259 #undef TARGET_INIT_BUILTINS
8260 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8262 #undef TARGET_LEGITIMATE_ADDRESS_P
8263 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8265 #undef TARGET_LEGITIMATE_CONSTANT_P
8266 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8268 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8269 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8271 #undef TARGET_MANGLE_TYPE
8272 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8274 #undef TARGET_MEMORY_MOVE_COST
8275 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8277 #undef TARGET_MUST_PASS_IN_STACK
8278 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8280 /* This target hook should return true if accesses to volatile bitfields
8281 should use the narrowest mode possible. It should return false if these
8282 accesses should use the bitfield container type. */
8283 #undef TARGET_NARROW_VOLATILE_BITFIELD
8284 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8286 #undef TARGET_OPTION_OVERRIDE
8287 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8289 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8290 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8291 aarch64_override_options_after_change
8293 #undef TARGET_PASS_BY_REFERENCE
8294 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8296 #undef TARGET_PREFERRED_RELOAD_CLASS
8297 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8299 #undef TARGET_SECONDARY_RELOAD
8300 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8302 #undef TARGET_SHIFT_TRUNCATION_MASK
8303 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8305 #undef TARGET_SETUP_INCOMING_VARARGS
8306 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8308 #undef TARGET_STRUCT_VALUE_RTX
8309 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8311 #undef TARGET_REGISTER_MOVE_COST
8312 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8314 #undef TARGET_RETURN_IN_MEMORY
8315 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8317 #undef TARGET_RETURN_IN_MSB
8318 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8320 #undef TARGET_RTX_COSTS
8321 #define TARGET_RTX_COSTS aarch64_rtx_costs
8323 #undef TARGET_TRAMPOLINE_INIT
8324 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8326 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8327 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8329 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8330 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8332 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8333 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8335 #undef TARGET_VECTORIZE_ADD_STMT_COST
8336 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8338 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8339 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8340 aarch64_builtin_vectorization_cost
8342 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8343 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8345 #undef TARGET_VECTORIZE_BUILTINS
8346 #define TARGET_VECTORIZE_BUILTINS
8348 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8349 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8350 aarch64_builtin_vectorized_function
8352 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8353 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8354 aarch64_autovectorize_vector_sizes
8356 /* Section anchor support. */
8358 #undef TARGET_MIN_ANCHOR_OFFSET
8359 #define TARGET_MIN_ANCHOR_OFFSET -256
8361 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8362 byte offset; we can do much more for larger data types, but have no way
8363 to determine the size of the access. We assume accesses are aligned. */
8364 #undef TARGET_MAX_ANCHOR_OFFSET
8365 #define TARGET_MAX_ANCHOR_OFFSET 4095
8367 #undef TARGET_VECTOR_ALIGNMENT
8368 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8370 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8371 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8372 aarch64_simd_vector_alignment_reachable
8374 /* vec_perm support. */
8376 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8377 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8378 aarch64_vectorize_vec_perm_const_ok
8381 #undef TARGET_FIXED_CONDITION_CODE_REGS
8382 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8384 struct gcc_target targetm = TARGET_INITIALIZER;
8386 #include "gt-aarch64.h"