[AArch64] Remove un-necessary secondary reload for addition to SP.
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blobf6f587a9122e28cb12693d24856e4b8a854aa73e
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
48 #include "cfgloop.h"
49 #include "tree-vectorizer.h"
51 /* Defined for convenience. */
52 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
54 /* Classifies an address.
56 ADDRESS_REG_IMM
57 A simple base register plus immediate offset.
59 ADDRESS_REG_WB
60 A base register indexed by immediate offset with writeback.
62 ADDRESS_REG_REG
63 A base register indexed by (optionally scaled) register.
65 ADDRESS_REG_UXTW
66 A base register indexed by (optionally scaled) zero-extended register.
68 ADDRESS_REG_SXTW
69 A base register indexed by (optionally scaled) sign-extended register.
71 ADDRESS_LO_SUM
72 A LO_SUM rtx with a base register and "LO12" symbol relocation.
74 ADDRESS_SYMBOLIC:
75 A constant symbolic address, in pc-relative literal pool. */
77 enum aarch64_address_type {
78 ADDRESS_REG_IMM,
79 ADDRESS_REG_WB,
80 ADDRESS_REG_REG,
81 ADDRESS_REG_UXTW,
82 ADDRESS_REG_SXTW,
83 ADDRESS_LO_SUM,
84 ADDRESS_SYMBOLIC
87 struct aarch64_address_info {
88 enum aarch64_address_type type;
89 rtx base;
90 rtx offset;
91 int shift;
92 enum aarch64_symbol_type symbol_type;
95 struct simd_immediate_info
97 rtx value;
98 int shift;
99 int element_width;
100 bool mvn;
101 bool msl;
104 /* The current code model. */
105 enum aarch64_code_model aarch64_cmodel;
107 #ifdef HAVE_AS_TLS
108 #undef TARGET_HAVE_TLS
109 #define TARGET_HAVE_TLS 1
110 #endif
112 static bool aarch64_lra_p (void);
113 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
114 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
115 const_tree,
116 enum machine_mode *, int *,
117 bool *);
118 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
119 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
120 static void aarch64_override_options_after_change (void);
121 static bool aarch64_vector_mode_supported_p (enum machine_mode);
122 static unsigned bit_count (unsigned HOST_WIDE_INT);
123 static bool aarch64_const_vec_all_same_int_p (rtx,
124 HOST_WIDE_INT, HOST_WIDE_INT);
126 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
127 const unsigned char *sel);
129 /* The processor for which instructions should be scheduled. */
130 enum aarch64_processor aarch64_tune = generic;
132 /* The current tuning set. */
133 const struct tune_params *aarch64_tune_params;
135 /* Mask to specify which instructions we are allowed to generate. */
136 unsigned long aarch64_isa_flags = 0;
138 /* Mask to specify which instruction scheduling options should be used. */
139 unsigned long aarch64_tune_flags = 0;
141 /* Tuning parameters. */
143 #if HAVE_DESIGNATED_INITIALIZERS
144 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
145 #else
146 #define NAMED_PARAM(NAME, VAL) (VAL)
147 #endif
149 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
150 __extension__
151 #endif
152 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
154 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
155 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
156 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
157 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
158 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
159 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
160 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
161 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
162 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
163 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
164 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
165 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
168 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
169 __extension__
170 #endif
171 static const struct cpu_addrcost_table generic_addrcost_table =
173 NAMED_PARAM (pre_modify, 0),
174 NAMED_PARAM (post_modify, 0),
175 NAMED_PARAM (register_offset, 0),
176 NAMED_PARAM (register_extend, 0),
177 NAMED_PARAM (imm_offset, 0)
180 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
181 __extension__
182 #endif
183 static const struct cpu_regmove_cost generic_regmove_cost =
185 NAMED_PARAM (GP2GP, 1),
186 NAMED_PARAM (GP2FP, 2),
187 NAMED_PARAM (FP2GP, 2),
188 /* We currently do not provide direct support for TFmode Q->Q move.
189 Therefore we need to raise the cost above 2 in order to have
190 reload handle the situation. */
191 NAMED_PARAM (FP2FP, 4)
194 /* Generic costs for vector insn classes. */
195 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
196 __extension__
197 #endif
198 static const struct cpu_vector_cost generic_vector_cost =
200 NAMED_PARAM (scalar_stmt_cost, 1),
201 NAMED_PARAM (scalar_load_cost, 1),
202 NAMED_PARAM (scalar_store_cost, 1),
203 NAMED_PARAM (vec_stmt_cost, 1),
204 NAMED_PARAM (vec_to_scalar_cost, 1),
205 NAMED_PARAM (scalar_to_vec_cost, 1),
206 NAMED_PARAM (vec_align_load_cost, 1),
207 NAMED_PARAM (vec_unalign_load_cost, 1),
208 NAMED_PARAM (vec_unalign_store_cost, 1),
209 NAMED_PARAM (vec_store_cost, 1),
210 NAMED_PARAM (cond_taken_branch_cost, 3),
211 NAMED_PARAM (cond_not_taken_branch_cost, 1)
214 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
215 __extension__
216 #endif
217 static const struct tune_params generic_tunings =
219 &generic_rtx_cost_table,
220 &generic_addrcost_table,
221 &generic_regmove_cost,
222 &generic_vector_cost,
223 NAMED_PARAM (memmov_cost, 4)
226 /* A processor implementing AArch64. */
227 struct processor
229 const char *const name;
230 enum aarch64_processor core;
231 const char *arch;
232 const unsigned long flags;
233 const struct tune_params *const tune;
236 /* Processor cores implementing AArch64. */
237 static const struct processor all_cores[] =
239 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
240 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
241 #include "aarch64-cores.def"
242 #undef AARCH64_CORE
243 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
244 {NULL, aarch64_none, NULL, 0, NULL}
247 /* Architectures implementing AArch64. */
248 static const struct processor all_architectures[] =
250 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
251 {NAME, CORE, #ARCH, FLAGS, NULL},
252 #include "aarch64-arches.def"
253 #undef AARCH64_ARCH
254 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
255 {NULL, aarch64_none, NULL, 0, NULL}
258 /* Target specification. These are populated as commandline arguments
259 are processed, or NULL if not specified. */
260 static const struct processor *selected_arch;
261 static const struct processor *selected_cpu;
262 static const struct processor *selected_tune;
264 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
266 /* An ISA extension in the co-processor and main instruction set space. */
267 struct aarch64_option_extension
269 const char *const name;
270 const unsigned long flags_on;
271 const unsigned long flags_off;
274 /* ISA extensions in AArch64. */
275 static const struct aarch64_option_extension all_extensions[] =
277 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
278 {NAME, FLAGS_ON, FLAGS_OFF},
279 #include "aarch64-option-extensions.def"
280 #undef AARCH64_OPT_EXTENSION
281 {NULL, 0, 0}
284 /* Used to track the size of an address when generating a pre/post
285 increment address. */
286 static enum machine_mode aarch64_memory_reference_mode;
288 /* Used to force GTY into this file. */
289 static GTY(()) int gty_dummy;
291 /* A table of valid AArch64 "bitmask immediate" values for
292 logical instructions. */
294 #define AARCH64_NUM_BITMASKS 5334
295 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
297 /* Did we set flag_omit_frame_pointer just so
298 aarch64_frame_pointer_required would be called? */
299 static bool faked_omit_frame_pointer;
301 typedef enum aarch64_cond_code
303 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
304 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
305 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
307 aarch64_cc;
309 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
311 /* The condition codes of the processor, and the inverse function. */
312 static const char * const aarch64_condition_codes[] =
314 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
315 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
318 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
319 unsigned
320 aarch64_dbx_register_number (unsigned regno)
322 if (GP_REGNUM_P (regno))
323 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
324 else if (regno == SP_REGNUM)
325 return AARCH64_DWARF_SP;
326 else if (FP_REGNUM_P (regno))
327 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
329 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
330 equivalent DWARF register. */
331 return DWARF_FRAME_REGISTERS;
334 /* Return TRUE if MODE is any of the large INT modes. */
335 static bool
336 aarch64_vect_struct_mode_p (enum machine_mode mode)
338 return mode == OImode || mode == CImode || mode == XImode;
341 /* Return TRUE if MODE is any of the vector modes. */
342 static bool
343 aarch64_vector_mode_p (enum machine_mode mode)
345 return aarch64_vector_mode_supported_p (mode)
346 || aarch64_vect_struct_mode_p (mode);
349 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
350 static bool
351 aarch64_array_mode_supported_p (enum machine_mode mode,
352 unsigned HOST_WIDE_INT nelems)
354 if (TARGET_SIMD
355 && AARCH64_VALID_SIMD_QREG_MODE (mode)
356 && (nelems >= 2 && nelems <= 4))
357 return true;
359 return false;
362 /* Implement HARD_REGNO_NREGS. */
365 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
367 switch (aarch64_regno_regclass (regno))
369 case FP_REGS:
370 case FP_LO_REGS:
371 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
372 default:
373 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
375 gcc_unreachable ();
378 /* Implement HARD_REGNO_MODE_OK. */
381 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
383 if (GET_MODE_CLASS (mode) == MODE_CC)
384 return regno == CC_REGNUM;
386 if (regno == SP_REGNUM)
387 /* The purpose of comparing with ptr_mode is to support the
388 global register variable associated with the stack pointer
389 register via the syntax of asm ("wsp") in ILP32. */
390 return mode == Pmode || mode == ptr_mode;
392 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
393 return mode == Pmode;
395 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
396 return 1;
398 if (FP_REGNUM_P (regno))
400 if (aarch64_vect_struct_mode_p (mode))
401 return
402 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
403 else
404 return 1;
407 return 0;
410 /* Return true if calls to DECL should be treated as
411 long-calls (ie called via a register). */
412 static bool
413 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
415 return false;
418 /* Return true if calls to symbol-ref SYM should be treated as
419 long-calls (ie called via a register). */
420 bool
421 aarch64_is_long_call_p (rtx sym)
423 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
426 /* Return true if the offsets to a zero/sign-extract operation
427 represent an expression that matches an extend operation. The
428 operands represent the paramters from
430 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
431 bool
432 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
433 rtx extract_imm)
435 HOST_WIDE_INT mult_val, extract_val;
437 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
438 return false;
440 mult_val = INTVAL (mult_imm);
441 extract_val = INTVAL (extract_imm);
443 if (extract_val > 8
444 && extract_val < GET_MODE_BITSIZE (mode)
445 && exact_log2 (extract_val & ~7) > 0
446 && (extract_val & 7) <= 4
447 && mult_val == (1 << (extract_val & 7)))
448 return true;
450 return false;
453 /* Emit an insn that's a simple single-set. Both the operands must be
454 known to be valid. */
455 inline static rtx
456 emit_set_insn (rtx x, rtx y)
458 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
461 /* X and Y are two things to compare using CODE. Emit the compare insn and
462 return the rtx for register 0 in the proper mode. */
464 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
466 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
467 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
469 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
470 return cc_reg;
473 /* Build the SYMBOL_REF for __tls_get_addr. */
475 static GTY(()) rtx tls_get_addr_libfunc;
478 aarch64_tls_get_addr (void)
480 if (!tls_get_addr_libfunc)
481 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
482 return tls_get_addr_libfunc;
485 /* Return the TLS model to use for ADDR. */
487 static enum tls_model
488 tls_symbolic_operand_type (rtx addr)
490 enum tls_model tls_kind = TLS_MODEL_NONE;
491 rtx sym, addend;
493 if (GET_CODE (addr) == CONST)
495 split_const (addr, &sym, &addend);
496 if (GET_CODE (sym) == SYMBOL_REF)
497 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
499 else if (GET_CODE (addr) == SYMBOL_REF)
500 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
502 return tls_kind;
505 /* We'll allow lo_sum's in addresses in our legitimate addresses
506 so that combine would take care of combining addresses where
507 necessary, but for generation purposes, we'll generate the address
508 as :
509 RTL Absolute
510 tmp = hi (symbol_ref); adrp x1, foo
511 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
514 PIC TLS
515 adrp x1, :got:foo adrp tmp, :tlsgd:foo
516 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
517 bl __tls_get_addr
520 Load TLS symbol, depending on TLS mechanism and TLS access model.
522 Global Dynamic - Traditional TLS:
523 adrp tmp, :tlsgd:imm
524 add dest, tmp, #:tlsgd_lo12:imm
525 bl __tls_get_addr
527 Global Dynamic - TLS Descriptors:
528 adrp dest, :tlsdesc:imm
529 ldr tmp, [dest, #:tlsdesc_lo12:imm]
530 add dest, dest, #:tlsdesc_lo12:imm
531 blr tmp
532 mrs tp, tpidr_el0
533 add dest, dest, tp
535 Initial Exec:
536 mrs tp, tpidr_el0
537 adrp tmp, :gottprel:imm
538 ldr dest, [tmp, #:gottprel_lo12:imm]
539 add dest, dest, tp
541 Local Exec:
542 mrs tp, tpidr_el0
543 add t0, tp, #:tprel_hi12:imm
544 add t0, #:tprel_lo12_nc:imm
547 static void
548 aarch64_load_symref_appropriately (rtx dest, rtx imm,
549 enum aarch64_symbol_type type)
551 switch (type)
553 case SYMBOL_SMALL_ABSOLUTE:
555 /* In ILP32, the mode of dest can be either SImode or DImode. */
556 rtx tmp_reg = dest;
557 enum machine_mode mode = GET_MODE (dest);
559 gcc_assert (mode == Pmode || mode == ptr_mode);
561 if (can_create_pseudo_p ())
562 tmp_reg = gen_reg_rtx (mode);
564 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
565 emit_insn (gen_add_losym (dest, tmp_reg, imm));
566 return;
569 case SYMBOL_TINY_ABSOLUTE:
570 emit_insn (gen_rtx_SET (Pmode, dest, imm));
571 return;
573 case SYMBOL_SMALL_GOT:
575 /* In ILP32, the mode of dest can be either SImode or DImode,
576 while the got entry is always of SImode size. The mode of
577 dest depends on how dest is used: if dest is assigned to a
578 pointer (e.g. in the memory), it has SImode; it may have
579 DImode if dest is dereferenced to access the memeory.
580 This is why we have to handle three different ldr_got_small
581 patterns here (two patterns for ILP32). */
582 rtx tmp_reg = dest;
583 enum machine_mode mode = GET_MODE (dest);
585 if (can_create_pseudo_p ())
586 tmp_reg = gen_reg_rtx (mode);
588 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
589 if (mode == ptr_mode)
591 if (mode == DImode)
592 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
593 else
594 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
596 else
598 gcc_assert (mode == Pmode);
599 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
602 return;
605 case SYMBOL_SMALL_TLSGD:
607 rtx insns;
608 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
610 start_sequence ();
611 emit_call_insn (gen_tlsgd_small (result, imm));
612 insns = get_insns ();
613 end_sequence ();
615 RTL_CONST_CALL_P (insns) = 1;
616 emit_libcall_block (insns, dest, result, imm);
617 return;
620 case SYMBOL_SMALL_TLSDESC:
622 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
623 rtx tp;
625 emit_insn (gen_tlsdesc_small (imm));
626 tp = aarch64_load_tp (NULL);
627 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
628 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
629 return;
632 case SYMBOL_SMALL_GOTTPREL:
634 rtx tmp_reg = gen_reg_rtx (Pmode);
635 rtx tp = aarch64_load_tp (NULL);
636 emit_insn (gen_tlsie_small (tmp_reg, imm));
637 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
638 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
639 return;
642 case SYMBOL_SMALL_TPREL:
644 rtx tp = aarch64_load_tp (NULL);
645 emit_insn (gen_tlsle_small (dest, tp, imm));
646 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
647 return;
650 case SYMBOL_TINY_GOT:
651 emit_insn (gen_ldr_got_tiny (dest, imm));
652 return;
654 default:
655 gcc_unreachable ();
659 /* Emit a move from SRC to DEST. Assume that the move expanders can
660 handle all moves if !can_create_pseudo_p (). The distinction is
661 important because, unlike emit_move_insn, the move expanders know
662 how to force Pmode objects into the constant pool even when the
663 constant pool address is not itself legitimate. */
664 static rtx
665 aarch64_emit_move (rtx dest, rtx src)
667 return (can_create_pseudo_p ()
668 ? emit_move_insn (dest, src)
669 : emit_move_insn_1 (dest, src));
672 void
673 aarch64_split_128bit_move (rtx dst, rtx src)
675 rtx low_dst;
677 enum machine_mode src_mode = GET_MODE (src);
678 enum machine_mode dst_mode = GET_MODE (dst);
679 int src_regno = REGNO (src);
680 int dst_regno = REGNO (dst);
682 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
684 if (REG_P (dst) && REG_P (src))
686 gcc_assert (src_mode == TImode || src_mode == TFmode);
688 /* Handle r -> w, w -> r. */
689 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
691 switch (src_mode) {
692 case TImode:
693 emit_insn
694 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
695 emit_insn
696 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
697 return;
698 case TFmode:
699 emit_insn
700 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
701 emit_insn
702 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
703 return;
704 default:
705 gcc_unreachable ();
708 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
710 switch (src_mode) {
711 case TImode:
712 emit_insn
713 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
714 emit_insn
715 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
716 return;
717 case TFmode:
718 emit_insn
719 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
720 emit_insn
721 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
722 return;
723 default:
724 gcc_unreachable ();
727 /* Fall through to r -> r cases. */
730 switch (dst_mode) {
731 case TImode:
732 low_dst = gen_lowpart (word_mode, dst);
733 if (REG_P (low_dst)
734 && reg_overlap_mentioned_p (low_dst, src))
736 aarch64_emit_move (gen_highpart (word_mode, dst),
737 gen_highpart_mode (word_mode, TImode, src));
738 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
740 else
742 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
743 aarch64_emit_move (gen_highpart (word_mode, dst),
744 gen_highpart_mode (word_mode, TImode, src));
746 return;
747 case TFmode:
748 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
749 gen_rtx_REG (DFmode, src_regno));
750 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
751 gen_rtx_REG (DFmode, src_regno + 1));
752 return;
753 default:
754 gcc_unreachable ();
758 bool
759 aarch64_split_128bit_move_p (rtx dst, rtx src)
761 return (! REG_P (src)
762 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
765 /* Split a complex SIMD combine. */
767 void
768 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
770 enum machine_mode src_mode = GET_MODE (src1);
771 enum machine_mode dst_mode = GET_MODE (dst);
773 gcc_assert (VECTOR_MODE_P (dst_mode));
775 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
777 rtx (*gen) (rtx, rtx, rtx);
779 switch (src_mode)
781 case V8QImode:
782 gen = gen_aarch64_simd_combinev8qi;
783 break;
784 case V4HImode:
785 gen = gen_aarch64_simd_combinev4hi;
786 break;
787 case V2SImode:
788 gen = gen_aarch64_simd_combinev2si;
789 break;
790 case V2SFmode:
791 gen = gen_aarch64_simd_combinev2sf;
792 break;
793 case DImode:
794 gen = gen_aarch64_simd_combinedi;
795 break;
796 case DFmode:
797 gen = gen_aarch64_simd_combinedf;
798 break;
799 default:
800 gcc_unreachable ();
803 emit_insn (gen (dst, src1, src2));
804 return;
808 /* Split a complex SIMD move. */
810 void
811 aarch64_split_simd_move (rtx dst, rtx src)
813 enum machine_mode src_mode = GET_MODE (src);
814 enum machine_mode dst_mode = GET_MODE (dst);
816 gcc_assert (VECTOR_MODE_P (dst_mode));
818 if (REG_P (dst) && REG_P (src))
820 rtx (*gen) (rtx, rtx);
822 gcc_assert (VECTOR_MODE_P (src_mode));
824 switch (src_mode)
826 case V16QImode:
827 gen = gen_aarch64_split_simd_movv16qi;
828 break;
829 case V8HImode:
830 gen = gen_aarch64_split_simd_movv8hi;
831 break;
832 case V4SImode:
833 gen = gen_aarch64_split_simd_movv4si;
834 break;
835 case V2DImode:
836 gen = gen_aarch64_split_simd_movv2di;
837 break;
838 case V4SFmode:
839 gen = gen_aarch64_split_simd_movv4sf;
840 break;
841 case V2DFmode:
842 gen = gen_aarch64_split_simd_movv2df;
843 break;
844 default:
845 gcc_unreachable ();
848 emit_insn (gen (dst, src));
849 return;
853 static rtx
854 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
856 if (can_create_pseudo_p ())
857 return force_reg (mode, value);
858 else
860 x = aarch64_emit_move (x, value);
861 return x;
866 static rtx
867 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
869 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
871 rtx high;
872 /* Load the full offset into a register. This
873 might be improvable in the future. */
874 high = GEN_INT (offset);
875 offset = 0;
876 high = aarch64_force_temporary (mode, temp, high);
877 reg = aarch64_force_temporary (mode, temp,
878 gen_rtx_PLUS (mode, high, reg));
880 return plus_constant (mode, reg, offset);
883 void
884 aarch64_expand_mov_immediate (rtx dest, rtx imm)
886 enum machine_mode mode = GET_MODE (dest);
887 unsigned HOST_WIDE_INT mask;
888 int i;
889 bool first;
890 unsigned HOST_WIDE_INT val;
891 bool subtargets;
892 rtx subtarget;
893 int one_match, zero_match;
895 gcc_assert (mode == SImode || mode == DImode);
897 /* Check on what type of symbol it is. */
898 if (GET_CODE (imm) == SYMBOL_REF
899 || GET_CODE (imm) == LABEL_REF
900 || GET_CODE (imm) == CONST)
902 rtx mem, base, offset;
903 enum aarch64_symbol_type sty;
905 /* If we have (const (plus symbol offset)), separate out the offset
906 before we start classifying the symbol. */
907 split_const (imm, &base, &offset);
909 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
910 switch (sty)
912 case SYMBOL_FORCE_TO_MEM:
913 if (offset != const0_rtx
914 && targetm.cannot_force_const_mem (mode, imm))
916 gcc_assert(can_create_pseudo_p ());
917 base = aarch64_force_temporary (mode, dest, base);
918 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
919 aarch64_emit_move (dest, base);
920 return;
922 mem = force_const_mem (ptr_mode, imm);
923 gcc_assert (mem);
924 if (mode != ptr_mode)
925 mem = gen_rtx_ZERO_EXTEND (mode, mem);
926 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
927 return;
929 case SYMBOL_SMALL_TLSGD:
930 case SYMBOL_SMALL_TLSDESC:
931 case SYMBOL_SMALL_GOTTPREL:
932 case SYMBOL_SMALL_GOT:
933 case SYMBOL_TINY_GOT:
934 if (offset != const0_rtx)
936 gcc_assert(can_create_pseudo_p ());
937 base = aarch64_force_temporary (mode, dest, base);
938 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
939 aarch64_emit_move (dest, base);
940 return;
942 /* FALLTHRU */
944 case SYMBOL_SMALL_TPREL:
945 case SYMBOL_SMALL_ABSOLUTE:
946 case SYMBOL_TINY_ABSOLUTE:
947 aarch64_load_symref_appropriately (dest, imm, sty);
948 return;
950 default:
951 gcc_unreachable ();
955 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
957 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
958 return;
961 if (!CONST_INT_P (imm))
963 if (GET_CODE (imm) == HIGH)
964 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
965 else
967 rtx mem = force_const_mem (mode, imm);
968 gcc_assert (mem);
969 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
972 return;
975 if (mode == SImode)
977 /* We know we can't do this in 1 insn, and we must be able to do it
978 in two; so don't mess around looking for sequences that don't buy
979 us anything. */
980 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
981 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
982 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
983 return;
986 /* Remaining cases are all for DImode. */
988 val = INTVAL (imm);
989 subtargets = optimize && can_create_pseudo_p ();
991 one_match = 0;
992 zero_match = 0;
993 mask = 0xffff;
995 for (i = 0; i < 64; i += 16, mask <<= 16)
997 if ((val & mask) == 0)
998 zero_match++;
999 else if ((val & mask) == mask)
1000 one_match++;
1003 if (one_match == 2)
1005 mask = 0xffff;
1006 for (i = 0; i < 64; i += 16, mask <<= 16)
1008 if ((val & mask) != mask)
1010 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1011 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1012 GEN_INT ((val >> i) & 0xffff)));
1013 return;
1016 gcc_unreachable ();
1019 if (zero_match == 2)
1020 goto simple_sequence;
1022 mask = 0x0ffff0000UL;
1023 for (i = 16; i < 64; i += 16, mask <<= 16)
1025 HOST_WIDE_INT comp = mask & ~(mask - 1);
1027 if (aarch64_uimm12_shift (val - (val & mask)))
1029 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1031 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1032 emit_insn (gen_adddi3 (dest, subtarget,
1033 GEN_INT (val - (val & mask))));
1034 return;
1036 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1038 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1040 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1041 GEN_INT ((val + comp) & mask)));
1042 emit_insn (gen_adddi3 (dest, subtarget,
1043 GEN_INT (val - ((val + comp) & mask))));
1044 return;
1046 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1048 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1050 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1051 GEN_INT ((val - comp) | ~mask)));
1052 emit_insn (gen_adddi3 (dest, subtarget,
1053 GEN_INT (val - ((val - comp) | ~mask))));
1054 return;
1056 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1058 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1060 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1061 GEN_INT (val | ~mask)));
1062 emit_insn (gen_adddi3 (dest, subtarget,
1063 GEN_INT (val - (val | ~mask))));
1064 return;
1068 /* See if we can do it by arithmetically combining two
1069 immediates. */
1070 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1072 int j;
1073 mask = 0xffff;
1075 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1076 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1078 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1079 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1080 GEN_INT (aarch64_bitmasks[i])));
1081 emit_insn (gen_adddi3 (dest, subtarget,
1082 GEN_INT (val - aarch64_bitmasks[i])));
1083 return;
1086 for (j = 0; j < 64; j += 16, mask <<= 16)
1088 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1090 emit_insn (gen_rtx_SET (VOIDmode, dest,
1091 GEN_INT (aarch64_bitmasks[i])));
1092 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1093 GEN_INT ((val >> j) & 0xffff)));
1094 return;
1099 /* See if we can do it by logically combining two immediates. */
1100 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1102 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1104 int j;
1106 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1107 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1109 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1110 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1111 GEN_INT (aarch64_bitmasks[i])));
1112 emit_insn (gen_iordi3 (dest, subtarget,
1113 GEN_INT (aarch64_bitmasks[j])));
1114 return;
1117 else if ((val & aarch64_bitmasks[i]) == val)
1119 int j;
1121 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1122 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1125 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1126 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1127 GEN_INT (aarch64_bitmasks[j])));
1128 emit_insn (gen_anddi3 (dest, subtarget,
1129 GEN_INT (aarch64_bitmasks[i])));
1130 return;
1135 simple_sequence:
1136 first = true;
1137 mask = 0xffff;
1138 for (i = 0; i < 64; i += 16, mask <<= 16)
1140 if ((val & mask) != 0)
1142 if (first)
1144 emit_insn (gen_rtx_SET (VOIDmode, dest,
1145 GEN_INT (val & mask)));
1146 first = false;
1148 else
1149 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1150 GEN_INT ((val >> i) & 0xffff)));
1155 static bool
1156 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1158 /* Indirect calls are not currently supported. */
1159 if (decl == NULL)
1160 return false;
1162 /* Cannot tail-call to long-calls, since these are outside of the
1163 range of a branch instruction (we could handle this if we added
1164 support for indirect tail-calls. */
1165 if (aarch64_decl_is_long_call_p (decl))
1166 return false;
1168 return true;
1171 /* Implement TARGET_PASS_BY_REFERENCE. */
1173 static bool
1174 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1175 enum machine_mode mode,
1176 const_tree type,
1177 bool named ATTRIBUTE_UNUSED)
1179 HOST_WIDE_INT size;
1180 enum machine_mode dummymode;
1181 int nregs;
1183 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1184 size = (mode == BLKmode && type)
1185 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1187 if (type)
1189 /* Arrays always passed by reference. */
1190 if (TREE_CODE (type) == ARRAY_TYPE)
1191 return true;
1192 /* Other aggregates based on their size. */
1193 if (AGGREGATE_TYPE_P (type))
1194 size = int_size_in_bytes (type);
1197 /* Variable sized arguments are always returned by reference. */
1198 if (size < 0)
1199 return true;
1201 /* Can this be a candidate to be passed in fp/simd register(s)? */
1202 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1203 &dummymode, &nregs,
1204 NULL))
1205 return false;
1207 /* Arguments which are variable sized or larger than 2 registers are
1208 passed by reference unless they are a homogenous floating point
1209 aggregate. */
1210 return size > 2 * UNITS_PER_WORD;
1213 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1214 static bool
1215 aarch64_return_in_msb (const_tree valtype)
1217 enum machine_mode dummy_mode;
1218 int dummy_int;
1220 /* Never happens in little-endian mode. */
1221 if (!BYTES_BIG_ENDIAN)
1222 return false;
1224 /* Only composite types smaller than or equal to 16 bytes can
1225 be potentially returned in registers. */
1226 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1227 || int_size_in_bytes (valtype) <= 0
1228 || int_size_in_bytes (valtype) > 16)
1229 return false;
1231 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1232 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1233 is always passed/returned in the least significant bits of fp/simd
1234 register(s). */
1235 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1236 &dummy_mode, &dummy_int, NULL))
1237 return false;
1239 return true;
1242 /* Implement TARGET_FUNCTION_VALUE.
1243 Define how to find the value returned by a function. */
1245 static rtx
1246 aarch64_function_value (const_tree type, const_tree func,
1247 bool outgoing ATTRIBUTE_UNUSED)
1249 enum machine_mode mode;
1250 int unsignedp;
1251 int count;
1252 enum machine_mode ag_mode;
1254 mode = TYPE_MODE (type);
1255 if (INTEGRAL_TYPE_P (type))
1256 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1258 if (aarch64_return_in_msb (type))
1260 HOST_WIDE_INT size = int_size_in_bytes (type);
1262 if (size % UNITS_PER_WORD != 0)
1264 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1265 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1269 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1270 &ag_mode, &count, NULL))
1272 if (!aarch64_composite_type_p (type, mode))
1274 gcc_assert (count == 1 && mode == ag_mode);
1275 return gen_rtx_REG (mode, V0_REGNUM);
1277 else
1279 int i;
1280 rtx par;
1282 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1283 for (i = 0; i < count; i++)
1285 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1286 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1287 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1288 XVECEXP (par, 0, i) = tmp;
1290 return par;
1293 else
1294 return gen_rtx_REG (mode, R0_REGNUM);
1297 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1298 Return true if REGNO is the number of a hard register in which the values
1299 of called function may come back. */
1301 static bool
1302 aarch64_function_value_regno_p (const unsigned int regno)
1304 /* Maximum of 16 bytes can be returned in the general registers. Examples
1305 of 16-byte return values are: 128-bit integers and 16-byte small
1306 structures (excluding homogeneous floating-point aggregates). */
1307 if (regno == R0_REGNUM || regno == R1_REGNUM)
1308 return true;
1310 /* Up to four fp/simd registers can return a function value, e.g. a
1311 homogeneous floating-point aggregate having four members. */
1312 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1313 return !TARGET_GENERAL_REGS_ONLY;
1315 return false;
1318 /* Implement TARGET_RETURN_IN_MEMORY.
1320 If the type T of the result of a function is such that
1321 void func (T arg)
1322 would require that arg be passed as a value in a register (or set of
1323 registers) according to the parameter passing rules, then the result
1324 is returned in the same registers as would be used for such an
1325 argument. */
1327 static bool
1328 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1330 HOST_WIDE_INT size;
1331 enum machine_mode ag_mode;
1332 int count;
1334 if (!AGGREGATE_TYPE_P (type)
1335 && TREE_CODE (type) != COMPLEX_TYPE
1336 && TREE_CODE (type) != VECTOR_TYPE)
1337 /* Simple scalar types always returned in registers. */
1338 return false;
1340 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1341 type,
1342 &ag_mode,
1343 &count,
1344 NULL))
1345 return false;
1347 /* Types larger than 2 registers returned in memory. */
1348 size = int_size_in_bytes (type);
1349 return (size < 0 || size > 2 * UNITS_PER_WORD);
1352 static bool
1353 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1354 const_tree type, int *nregs)
1356 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1357 return aarch64_vfp_is_call_or_return_candidate (mode,
1358 type,
1359 &pcum->aapcs_vfp_rmode,
1360 nregs,
1361 NULL);
1364 /* Given MODE and TYPE of a function argument, return the alignment in
1365 bits. The idea is to suppress any stronger alignment requested by
1366 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1367 This is a helper function for local use only. */
1369 static unsigned int
1370 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1372 unsigned int alignment;
1374 if (type)
1376 if (!integer_zerop (TYPE_SIZE (type)))
1378 if (TYPE_MODE (type) == mode)
1379 alignment = TYPE_ALIGN (type);
1380 else
1381 alignment = GET_MODE_ALIGNMENT (mode);
1383 else
1384 alignment = 0;
1386 else
1387 alignment = GET_MODE_ALIGNMENT (mode);
1389 return alignment;
1392 /* Layout a function argument according to the AAPCS64 rules. The rule
1393 numbers refer to the rule numbers in the AAPCS64. */
1395 static void
1396 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1397 const_tree type,
1398 bool named ATTRIBUTE_UNUSED)
1400 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1401 int ncrn, nvrn, nregs;
1402 bool allocate_ncrn, allocate_nvrn;
1404 /* We need to do this once per argument. */
1405 if (pcum->aapcs_arg_processed)
1406 return;
1408 pcum->aapcs_arg_processed = true;
1410 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1411 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1412 mode,
1413 type,
1414 &nregs);
1416 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1417 The following code thus handles passing by SIMD/FP registers first. */
1419 nvrn = pcum->aapcs_nvrn;
1421 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1422 and homogenous short-vector aggregates (HVA). */
1423 if (allocate_nvrn)
1425 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1427 pcum->aapcs_nextnvrn = nvrn + nregs;
1428 if (!aarch64_composite_type_p (type, mode))
1430 gcc_assert (nregs == 1);
1431 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1433 else
1435 rtx par;
1436 int i;
1437 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1438 for (i = 0; i < nregs; i++)
1440 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1441 V0_REGNUM + nvrn + i);
1442 tmp = gen_rtx_EXPR_LIST
1443 (VOIDmode, tmp,
1444 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1445 XVECEXP (par, 0, i) = tmp;
1447 pcum->aapcs_reg = par;
1449 return;
1451 else
1453 /* C.3 NSRN is set to 8. */
1454 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1455 goto on_stack;
1459 ncrn = pcum->aapcs_ncrn;
1460 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1461 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1464 /* C6 - C9. though the sign and zero extension semantics are
1465 handled elsewhere. This is the case where the argument fits
1466 entirely general registers. */
1467 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1469 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1471 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1473 /* C.8 if the argument has an alignment of 16 then the NGRN is
1474 rounded up to the next even number. */
1475 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1477 ++ncrn;
1478 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1480 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1481 A reg is still generated for it, but the caller should be smart
1482 enough not to use it. */
1483 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1485 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1487 else
1489 rtx par;
1490 int i;
1492 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1493 for (i = 0; i < nregs; i++)
1495 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1496 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1497 GEN_INT (i * UNITS_PER_WORD));
1498 XVECEXP (par, 0, i) = tmp;
1500 pcum->aapcs_reg = par;
1503 pcum->aapcs_nextncrn = ncrn + nregs;
1504 return;
1507 /* C.11 */
1508 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1510 /* The argument is passed on stack; record the needed number of words for
1511 this argument (we can re-use NREGS) and align the total size if
1512 necessary. */
1513 on_stack:
1514 pcum->aapcs_stack_words = nregs;
1515 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1516 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1517 16 / UNITS_PER_WORD) + 1;
1518 return;
1521 /* Implement TARGET_FUNCTION_ARG. */
1523 static rtx
1524 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1525 const_tree type, bool named)
1527 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1528 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1530 if (mode == VOIDmode)
1531 return NULL_RTX;
1533 aarch64_layout_arg (pcum_v, mode, type, named);
1534 return pcum->aapcs_reg;
1537 void
1538 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1539 const_tree fntype ATTRIBUTE_UNUSED,
1540 rtx libname ATTRIBUTE_UNUSED,
1541 const_tree fndecl ATTRIBUTE_UNUSED,
1542 unsigned n_named ATTRIBUTE_UNUSED)
1544 pcum->aapcs_ncrn = 0;
1545 pcum->aapcs_nvrn = 0;
1546 pcum->aapcs_nextncrn = 0;
1547 pcum->aapcs_nextnvrn = 0;
1548 pcum->pcs_variant = ARM_PCS_AAPCS64;
1549 pcum->aapcs_reg = NULL_RTX;
1550 pcum->aapcs_arg_processed = false;
1551 pcum->aapcs_stack_words = 0;
1552 pcum->aapcs_stack_size = 0;
1554 return;
1557 static void
1558 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1559 enum machine_mode mode,
1560 const_tree type,
1561 bool named)
1563 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1564 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1566 aarch64_layout_arg (pcum_v, mode, type, named);
1567 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1568 != (pcum->aapcs_stack_words != 0));
1569 pcum->aapcs_arg_processed = false;
1570 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1571 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1572 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1573 pcum->aapcs_stack_words = 0;
1574 pcum->aapcs_reg = NULL_RTX;
1578 bool
1579 aarch64_function_arg_regno_p (unsigned regno)
1581 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1582 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1585 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1586 PARM_BOUNDARY bits of alignment, but will be given anything up
1587 to STACK_BOUNDARY bits if the type requires it. This makes sure
1588 that both before and after the layout of each argument, the Next
1589 Stacked Argument Address (NSAA) will have a minimum alignment of
1590 8 bytes. */
1592 static unsigned int
1593 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1595 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1597 if (alignment < PARM_BOUNDARY)
1598 alignment = PARM_BOUNDARY;
1599 if (alignment > STACK_BOUNDARY)
1600 alignment = STACK_BOUNDARY;
1601 return alignment;
1604 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1606 Return true if an argument passed on the stack should be padded upwards,
1607 i.e. if the least-significant byte of the stack slot has useful data.
1609 Small aggregate types are placed in the lowest memory address.
1611 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1613 bool
1614 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1616 /* On little-endian targets, the least significant byte of every stack
1617 argument is passed at the lowest byte address of the stack slot. */
1618 if (!BYTES_BIG_ENDIAN)
1619 return true;
1621 /* Otherwise, integral, floating-point and pointer types are padded downward:
1622 the least significant byte of a stack argument is passed at the highest
1623 byte address of the stack slot. */
1624 if (type
1625 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1626 || POINTER_TYPE_P (type))
1627 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1628 return false;
1630 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1631 return true;
1634 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1636 It specifies padding for the last (may also be the only)
1637 element of a block move between registers and memory. If
1638 assuming the block is in the memory, padding upward means that
1639 the last element is padded after its highest significant byte,
1640 while in downward padding, the last element is padded at the
1641 its least significant byte side.
1643 Small aggregates and small complex types are always padded
1644 upwards.
1646 We don't need to worry about homogeneous floating-point or
1647 short-vector aggregates; their move is not affected by the
1648 padding direction determined here. Regardless of endianness,
1649 each element of such an aggregate is put in the least
1650 significant bits of a fp/simd register.
1652 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1653 register has useful data, and return the opposite if the most
1654 significant byte does. */
1656 bool
1657 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1658 bool first ATTRIBUTE_UNUSED)
1661 /* Small composite types are always padded upward. */
1662 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1664 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1665 : GET_MODE_SIZE (mode));
1666 if (size < 2 * UNITS_PER_WORD)
1667 return true;
1670 /* Otherwise, use the default padding. */
1671 return !BYTES_BIG_ENDIAN;
1674 static enum machine_mode
1675 aarch64_libgcc_cmp_return_mode (void)
1677 return SImode;
1680 static bool
1681 aarch64_frame_pointer_required (void)
1683 /* If the function contains dynamic stack allocations, we need to
1684 use the frame pointer to access the static parts of the frame. */
1685 if (cfun->calls_alloca)
1686 return true;
1688 /* We may have turned flag_omit_frame_pointer on in order to have this
1689 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1690 and we'll check it here.
1691 If we really did set flag_omit_frame_pointer normally, then we return false
1692 (no frame pointer required) in all cases. */
1694 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1695 return false;
1696 else if (flag_omit_leaf_frame_pointer)
1697 return !crtl->is_leaf;
1698 return true;
1701 /* Mark the registers that need to be saved by the callee and calculate
1702 the size of the callee-saved registers area and frame record (both FP
1703 and LR may be omitted). */
1704 static void
1705 aarch64_layout_frame (void)
1707 HOST_WIDE_INT offset = 0;
1708 int regno;
1710 if (reload_completed && cfun->machine->frame.laid_out)
1711 return;
1713 cfun->machine->frame.fp_lr_offset = 0;
1715 /* First mark all the registers that really need to be saved... */
1716 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1717 cfun->machine->frame.reg_offset[regno] = -1;
1719 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1720 cfun->machine->frame.reg_offset[regno] = -1;
1722 /* ... that includes the eh data registers (if needed)... */
1723 if (crtl->calls_eh_return)
1724 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1725 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1727 /* ... and any callee saved register that dataflow says is live. */
1728 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1729 if (df_regs_ever_live_p (regno)
1730 && !call_used_regs[regno])
1731 cfun->machine->frame.reg_offset[regno] = 0;
1733 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1734 if (df_regs_ever_live_p (regno)
1735 && !call_used_regs[regno])
1736 cfun->machine->frame.reg_offset[regno] = 0;
1738 if (frame_pointer_needed)
1740 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1741 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1742 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1745 /* Now assign stack slots for them. */
1746 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1747 if (cfun->machine->frame.reg_offset[regno] != -1)
1749 cfun->machine->frame.reg_offset[regno] = offset;
1750 offset += UNITS_PER_WORD;
1753 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1754 if (cfun->machine->frame.reg_offset[regno] != -1)
1756 cfun->machine->frame.reg_offset[regno] = offset;
1757 offset += UNITS_PER_WORD;
1760 if (frame_pointer_needed)
1762 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1763 offset += UNITS_PER_WORD;
1764 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1767 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1769 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1770 offset += UNITS_PER_WORD;
1771 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1774 cfun->machine->frame.padding0 =
1775 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1776 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1778 cfun->machine->frame.saved_regs_size = offset;
1779 cfun->machine->frame.laid_out = true;
1782 /* Make the last instruction frame-related and note that it performs
1783 the operation described by FRAME_PATTERN. */
1785 static void
1786 aarch64_set_frame_expr (rtx frame_pattern)
1788 rtx insn;
1790 insn = get_last_insn ();
1791 RTX_FRAME_RELATED_P (insn) = 1;
1792 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1793 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1794 frame_pattern,
1795 REG_NOTES (insn));
1798 static bool
1799 aarch64_register_saved_on_entry (int regno)
1801 return cfun->machine->frame.reg_offset[regno] != -1;
1805 static void
1806 aarch64_save_or_restore_fprs (int start_offset, int increment,
1807 bool restore, rtx base_rtx)
1810 unsigned regno;
1811 unsigned regno2;
1812 rtx insn;
1813 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1816 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1818 if (aarch64_register_saved_on_entry (regno))
1820 rtx mem;
1821 mem = gen_mem_ref (DFmode,
1822 plus_constant (Pmode,
1823 base_rtx,
1824 start_offset));
1826 for (regno2 = regno + 1;
1827 regno2 <= V31_REGNUM
1828 && !aarch64_register_saved_on_entry (regno2);
1829 regno2++)
1831 /* Empty loop. */
1833 if (regno2 <= V31_REGNUM &&
1834 aarch64_register_saved_on_entry (regno2))
1836 rtx mem2;
1837 /* Next highest register to be saved. */
1838 mem2 = gen_mem_ref (DFmode,
1839 plus_constant
1840 (Pmode,
1841 base_rtx,
1842 start_offset + increment));
1843 if (restore == false)
1845 insn = emit_insn
1846 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1847 mem2, gen_rtx_REG (DFmode, regno2)));
1850 else
1852 insn = emit_insn
1853 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1854 gen_rtx_REG (DFmode, regno2), mem2));
1856 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1857 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1860 /* The first part of a frame-related parallel insn
1861 is always assumed to be relevant to the frame
1862 calculations; subsequent parts, are only
1863 frame-related if explicitly marked. */
1864 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1865 1)) = 1;
1866 regno = regno2;
1867 start_offset += increment * 2;
1869 else
1871 if (restore == false)
1872 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1873 else
1875 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1876 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1878 start_offset += increment;
1880 RTX_FRAME_RELATED_P (insn) = 1;
1887 /* offset from the stack pointer of where the saves and
1888 restore's have to happen. */
1889 static void
1890 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1891 bool restore)
1893 rtx insn;
1894 rtx base_rtx = stack_pointer_rtx;
1895 HOST_WIDE_INT start_offset = offset;
1896 HOST_WIDE_INT increment = UNITS_PER_WORD;
1897 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1898 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1899 unsigned regno;
1900 unsigned regno2;
1902 for (regno = R0_REGNUM; regno <= limit; regno++)
1904 if (aarch64_register_saved_on_entry (regno))
1906 rtx mem;
1907 mem = gen_mem_ref (Pmode,
1908 plus_constant (Pmode,
1909 base_rtx,
1910 start_offset));
1912 for (regno2 = regno + 1;
1913 regno2 <= limit
1914 && !aarch64_register_saved_on_entry (regno2);
1915 regno2++)
1917 /* Empty loop. */
1919 if (regno2 <= limit &&
1920 aarch64_register_saved_on_entry (regno2))
1922 rtx mem2;
1923 /* Next highest register to be saved. */
1924 mem2 = gen_mem_ref (Pmode,
1925 plus_constant
1926 (Pmode,
1927 base_rtx,
1928 start_offset + increment));
1929 if (restore == false)
1931 insn = emit_insn
1932 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1933 mem2, gen_rtx_REG (DImode, regno2)));
1936 else
1938 insn = emit_insn
1939 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1940 gen_rtx_REG (DImode, regno2), mem2));
1942 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1943 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1946 /* The first part of a frame-related parallel insn
1947 is always assumed to be relevant to the frame
1948 calculations; subsequent parts, are only
1949 frame-related if explicitly marked. */
1950 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1951 1)) = 1;
1952 regno = regno2;
1953 start_offset += increment * 2;
1955 else
1957 if (restore == false)
1958 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1959 else
1961 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1962 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1964 start_offset += increment;
1966 RTX_FRAME_RELATED_P (insn) = 1;
1970 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1974 /* AArch64 stack frames generated by this compiler look like:
1976 +-------------------------------+
1978 | incoming stack arguments |
1980 +-------------------------------+ <-- arg_pointer_rtx
1982 | callee-allocated save area |
1983 | for register varargs |
1985 +-------------------------------+
1987 | local variables |
1989 +-------------------------------+ <-- frame_pointer_rtx
1991 | callee-saved registers |
1993 +-------------------------------+
1994 | LR' |
1995 +-------------------------------+
1996 | FP' |
1997 P +-------------------------------+ <-- hard_frame_pointer_rtx
1998 | dynamic allocation |
1999 +-------------------------------+
2001 | outgoing stack arguments |
2003 +-------------------------------+ <-- stack_pointer_rtx
2005 Dynamic stack allocations such as alloca insert data at point P.
2006 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2007 hard_frame_pointer_rtx unchanged. */
2009 /* Generate the prologue instructions for entry into a function.
2010 Establish the stack frame by decreasing the stack pointer with a
2011 properly calculated size and, if necessary, create a frame record
2012 filled with the values of LR and previous frame pointer. The
2013 current FP is also set up is it is in use. */
2015 void
2016 aarch64_expand_prologue (void)
2018 /* sub sp, sp, #<frame_size>
2019 stp {fp, lr}, [sp, #<frame_size> - 16]
2020 add fp, sp, #<frame_size> - hardfp_offset
2021 stp {cs_reg}, [fp, #-16] etc.
2023 sub sp, sp, <final_adjustment_if_any>
2025 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2026 HOST_WIDE_INT frame_size, offset;
2027 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2028 rtx insn;
2030 aarch64_layout_frame ();
2031 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2032 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2033 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2034 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2035 + crtl->outgoing_args_size);
2036 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2037 STACK_BOUNDARY / BITS_PER_UNIT);
2039 if (flag_stack_usage_info)
2040 current_function_static_stack_size = frame_size;
2042 fp_offset = (offset
2043 - original_frame_size
2044 - cfun->machine->frame.saved_regs_size);
2046 /* Store pairs and load pairs have a range only -512 to 504. */
2047 if (offset >= 512)
2049 /* When the frame has a large size, an initial decrease is done on
2050 the stack pointer to jump over the callee-allocated save area for
2051 register varargs, the local variable area and/or the callee-saved
2052 register area. This will allow the pre-index write-back
2053 store pair instructions to be used for setting up the stack frame
2054 efficiently. */
2055 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2056 if (offset >= 512)
2057 offset = cfun->machine->frame.saved_regs_size;
2059 frame_size -= (offset + crtl->outgoing_args_size);
2060 fp_offset = 0;
2062 if (frame_size >= 0x1000000)
2064 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2065 emit_move_insn (op0, GEN_INT (-frame_size));
2066 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2067 aarch64_set_frame_expr (gen_rtx_SET
2068 (Pmode, stack_pointer_rtx,
2069 plus_constant (Pmode,
2070 stack_pointer_rtx,
2071 -frame_size)));
2073 else if (frame_size > 0)
2075 if ((frame_size & 0xfff) != frame_size)
2077 insn = emit_insn (gen_add2_insn
2078 (stack_pointer_rtx,
2079 GEN_INT (-(frame_size
2080 & ~(HOST_WIDE_INT)0xfff))));
2081 RTX_FRAME_RELATED_P (insn) = 1;
2083 if ((frame_size & 0xfff) != 0)
2085 insn = emit_insn (gen_add2_insn
2086 (stack_pointer_rtx,
2087 GEN_INT (-(frame_size
2088 & (HOST_WIDE_INT)0xfff))));
2089 RTX_FRAME_RELATED_P (insn) = 1;
2093 else
2094 frame_size = -1;
2096 if (offset > 0)
2098 /* Save the frame pointer and lr if the frame pointer is needed
2099 first. Make the frame pointer point to the location of the
2100 old frame pointer on the stack. */
2101 if (frame_pointer_needed)
2103 rtx mem_fp, mem_lr;
2105 if (fp_offset)
2107 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2108 GEN_INT (-offset)));
2109 RTX_FRAME_RELATED_P (insn) = 1;
2110 aarch64_set_frame_expr (gen_rtx_SET
2111 (Pmode, stack_pointer_rtx,
2112 gen_rtx_MINUS (Pmode,
2113 stack_pointer_rtx,
2114 GEN_INT (offset))));
2115 mem_fp = gen_frame_mem (DImode,
2116 plus_constant (Pmode,
2117 stack_pointer_rtx,
2118 fp_offset));
2119 mem_lr = gen_frame_mem (DImode,
2120 plus_constant (Pmode,
2121 stack_pointer_rtx,
2122 fp_offset
2123 + UNITS_PER_WORD));
2124 insn = emit_insn (gen_store_pairdi (mem_fp,
2125 hard_frame_pointer_rtx,
2126 mem_lr,
2127 gen_rtx_REG (DImode,
2128 LR_REGNUM)));
2130 else
2132 insn = emit_insn (gen_storewb_pairdi_di
2133 (stack_pointer_rtx, stack_pointer_rtx,
2134 hard_frame_pointer_rtx,
2135 gen_rtx_REG (DImode, LR_REGNUM),
2136 GEN_INT (-offset),
2137 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2138 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2141 /* The first part of a frame-related parallel insn is always
2142 assumed to be relevant to the frame calculations;
2143 subsequent parts, are only frame-related if explicitly
2144 marked. */
2145 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2146 RTX_FRAME_RELATED_P (insn) = 1;
2148 /* Set up frame pointer to point to the location of the
2149 previous frame pointer on the stack. */
2150 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2151 stack_pointer_rtx,
2152 GEN_INT (fp_offset)));
2153 aarch64_set_frame_expr (gen_rtx_SET
2154 (Pmode, hard_frame_pointer_rtx,
2155 plus_constant (Pmode,
2156 stack_pointer_rtx,
2157 fp_offset)));
2158 RTX_FRAME_RELATED_P (insn) = 1;
2159 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2160 hard_frame_pointer_rtx));
2162 else
2164 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2165 GEN_INT (-offset)));
2166 RTX_FRAME_RELATED_P (insn) = 1;
2169 aarch64_save_or_restore_callee_save_registers
2170 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2173 /* when offset >= 512,
2174 sub sp, sp, #<outgoing_args_size> */
2175 if (frame_size > -1)
2177 if (crtl->outgoing_args_size > 0)
2179 insn = emit_insn (gen_add2_insn
2180 (stack_pointer_rtx,
2181 GEN_INT (- crtl->outgoing_args_size)));
2182 RTX_FRAME_RELATED_P (insn) = 1;
2187 /* Generate the epilogue instructions for returning from a function. */
2188 void
2189 aarch64_expand_epilogue (bool for_sibcall)
2191 HOST_WIDE_INT original_frame_size, frame_size, offset;
2192 HOST_WIDE_INT fp_offset;
2193 rtx insn;
2194 rtx cfa_reg;
2196 aarch64_layout_frame ();
2197 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2198 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2199 + crtl->outgoing_args_size);
2200 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2201 STACK_BOUNDARY / BITS_PER_UNIT);
2203 fp_offset = (offset
2204 - original_frame_size
2205 - cfun->machine->frame.saved_regs_size);
2207 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2209 /* Store pairs and load pairs have a range only -512 to 504. */
2210 if (offset >= 512)
2212 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2213 if (offset >= 512)
2214 offset = cfun->machine->frame.saved_regs_size;
2216 frame_size -= (offset + crtl->outgoing_args_size);
2217 fp_offset = 0;
2218 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2220 insn = emit_insn (gen_add2_insn
2221 (stack_pointer_rtx,
2222 GEN_INT (crtl->outgoing_args_size)));
2223 RTX_FRAME_RELATED_P (insn) = 1;
2226 else
2227 frame_size = -1;
2229 /* If there were outgoing arguments or we've done dynamic stack
2230 allocation, then restore the stack pointer from the frame
2231 pointer. This is at most one insn and more efficient than using
2232 GCC's internal mechanism. */
2233 if (frame_pointer_needed
2234 && (crtl->outgoing_args_size || cfun->calls_alloca))
2236 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2237 hard_frame_pointer_rtx,
2238 GEN_INT (- fp_offset)));
2239 RTX_FRAME_RELATED_P (insn) = 1;
2240 /* As SP is set to (FP - fp_offset), according to the rules in
2241 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2242 from the value of SP from now on. */
2243 cfa_reg = stack_pointer_rtx;
2246 aarch64_save_or_restore_callee_save_registers
2247 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2249 /* Restore the frame pointer and lr if the frame pointer is needed. */
2250 if (offset > 0)
2252 if (frame_pointer_needed)
2254 rtx mem_fp, mem_lr;
2256 if (fp_offset)
2258 mem_fp = gen_frame_mem (DImode,
2259 plus_constant (Pmode,
2260 stack_pointer_rtx,
2261 fp_offset));
2262 mem_lr = gen_frame_mem (DImode,
2263 plus_constant (Pmode,
2264 stack_pointer_rtx,
2265 fp_offset
2266 + UNITS_PER_WORD));
2267 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2268 mem_fp,
2269 gen_rtx_REG (DImode,
2270 LR_REGNUM),
2271 mem_lr));
2273 else
2275 insn = emit_insn (gen_loadwb_pairdi_di
2276 (stack_pointer_rtx,
2277 stack_pointer_rtx,
2278 hard_frame_pointer_rtx,
2279 gen_rtx_REG (DImode, LR_REGNUM),
2280 GEN_INT (offset),
2281 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2282 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2283 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2284 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2285 plus_constant (Pmode, cfa_reg,
2286 offset))));
2289 /* The first part of a frame-related parallel insn
2290 is always assumed to be relevant to the frame
2291 calculations; subsequent parts, are only
2292 frame-related if explicitly marked. */
2293 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2294 RTX_FRAME_RELATED_P (insn) = 1;
2295 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2296 add_reg_note (insn, REG_CFA_RESTORE,
2297 gen_rtx_REG (DImode, LR_REGNUM));
2299 if (fp_offset)
2301 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2302 GEN_INT (offset)));
2303 RTX_FRAME_RELATED_P (insn) = 1;
2306 else
2308 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2309 GEN_INT (offset)));
2310 RTX_FRAME_RELATED_P (insn) = 1;
2314 /* Stack adjustment for exception handler. */
2315 if (crtl->calls_eh_return)
2317 /* We need to unwind the stack by the offset computed by
2318 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2319 based on SP. Ideally we would update the SP and define the
2320 CFA along the lines of:
2322 SP = SP + EH_RETURN_STACKADJ_RTX
2323 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2325 However the dwarf emitter only understands a constant
2326 register offset.
2328 The solution chosen here is to use the otherwise unused IP0
2329 as a temporary register to hold the current SP value. The
2330 CFA is described using IP0 then SP is modified. */
2332 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2334 insn = emit_move_insn (ip0, stack_pointer_rtx);
2335 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2336 RTX_FRAME_RELATED_P (insn) = 1;
2338 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2340 /* Ensure the assignment to IP0 does not get optimized away. */
2341 emit_use (ip0);
2344 if (frame_size > -1)
2346 if (frame_size >= 0x1000000)
2348 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2349 emit_move_insn (op0, GEN_INT (frame_size));
2350 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2351 aarch64_set_frame_expr (gen_rtx_SET
2352 (Pmode, stack_pointer_rtx,
2353 plus_constant (Pmode,
2354 stack_pointer_rtx,
2355 frame_size)));
2357 else if (frame_size > 0)
2359 if ((frame_size & 0xfff) != 0)
2361 insn = emit_insn (gen_add2_insn
2362 (stack_pointer_rtx,
2363 GEN_INT ((frame_size
2364 & (HOST_WIDE_INT) 0xfff))));
2365 RTX_FRAME_RELATED_P (insn) = 1;
2367 if ((frame_size & 0xfff) != frame_size)
2369 insn = emit_insn (gen_add2_insn
2370 (stack_pointer_rtx,
2371 GEN_INT ((frame_size
2372 & ~ (HOST_WIDE_INT) 0xfff))));
2373 RTX_FRAME_RELATED_P (insn) = 1;
2377 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2378 plus_constant (Pmode,
2379 stack_pointer_rtx,
2380 offset)));
2383 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2384 if (!for_sibcall)
2385 emit_jump_insn (ret_rtx);
2388 /* Return the place to copy the exception unwinding return address to.
2389 This will probably be a stack slot, but could (in theory be the
2390 return register). */
2392 aarch64_final_eh_return_addr (void)
2394 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2395 aarch64_layout_frame ();
2396 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2397 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2398 + crtl->outgoing_args_size);
2399 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2400 STACK_BOUNDARY / BITS_PER_UNIT);
2401 fp_offset = offset
2402 - original_frame_size
2403 - cfun->machine->frame.saved_regs_size;
2405 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2406 return gen_rtx_REG (DImode, LR_REGNUM);
2408 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2409 result in a store to save LR introduced by builtin_eh_return () being
2410 incorrectly deleted because the alias is not detected.
2411 So in the calculation of the address to copy the exception unwinding
2412 return address to, we note 2 cases.
2413 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2414 we return a SP-relative location since all the addresses are SP-relative
2415 in this case. This prevents the store from being optimized away.
2416 If the fp_offset is not 0, then the addresses will be FP-relative and
2417 therefore we return a FP-relative location. */
2419 if (frame_pointer_needed)
2421 if (fp_offset)
2422 return gen_frame_mem (DImode,
2423 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2424 else
2425 return gen_frame_mem (DImode,
2426 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2429 /* If FP is not needed, we calculate the location of LR, which would be
2430 at the top of the saved registers block. */
2432 return gen_frame_mem (DImode,
2433 plus_constant (Pmode,
2434 stack_pointer_rtx,
2435 fp_offset
2436 + cfun->machine->frame.saved_regs_size
2437 - 2 * UNITS_PER_WORD));
2440 /* Output code to build up a constant in a register. */
2441 static void
2442 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2444 if (aarch64_bitmask_imm (val, DImode))
2445 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2446 else
2448 int i;
2449 int ncount = 0;
2450 int zcount = 0;
2451 HOST_WIDE_INT valp = val >> 16;
2452 HOST_WIDE_INT valm;
2453 HOST_WIDE_INT tval;
2455 for (i = 16; i < 64; i += 16)
2457 valm = (valp & 0xffff);
2459 if (valm != 0)
2460 ++ zcount;
2462 if (valm != 0xffff)
2463 ++ ncount;
2465 valp >>= 16;
2468 /* zcount contains the number of additional MOVK instructions
2469 required if the constant is built up with an initial MOVZ instruction,
2470 while ncount is the number of MOVK instructions required if starting
2471 with a MOVN instruction. Choose the sequence that yields the fewest
2472 number of instructions, preferring MOVZ instructions when they are both
2473 the same. */
2474 if (ncount < zcount)
2476 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2477 GEN_INT ((~val) & 0xffff));
2478 tval = 0xffff;
2480 else
2482 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2483 GEN_INT (val & 0xffff));
2484 tval = 0;
2487 val >>= 16;
2489 for (i = 16; i < 64; i += 16)
2491 if ((val & 0xffff) != tval)
2492 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2493 GEN_INT (i), GEN_INT (val & 0xffff)));
2494 val >>= 16;
2499 static void
2500 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2502 HOST_WIDE_INT mdelta = delta;
2503 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2504 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2506 if (mdelta < 0)
2507 mdelta = -mdelta;
2509 if (mdelta >= 4096 * 4096)
2511 aarch64_build_constant (scratchreg, delta);
2512 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2514 else if (mdelta > 0)
2516 if (mdelta >= 4096)
2518 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2519 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2520 if (delta < 0)
2521 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2522 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2523 else
2524 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2525 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2527 if (mdelta % 4096 != 0)
2529 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2530 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2531 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2536 /* Output code to add DELTA to the first argument, and then jump
2537 to FUNCTION. Used for C++ multiple inheritance. */
2538 static void
2539 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2540 HOST_WIDE_INT delta,
2541 HOST_WIDE_INT vcall_offset,
2542 tree function)
2544 /* The this pointer is always in x0. Note that this differs from
2545 Arm where the this pointer maybe bumped to r1 if r0 is required
2546 to return a pointer to an aggregate. On AArch64 a result value
2547 pointer will be in x8. */
2548 int this_regno = R0_REGNUM;
2549 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2551 reload_completed = 1;
2552 emit_note (NOTE_INSN_PROLOGUE_END);
2554 if (vcall_offset == 0)
2555 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2556 else
2558 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2560 this_rtx = gen_rtx_REG (Pmode, this_regno);
2561 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2562 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2564 addr = this_rtx;
2565 if (delta != 0)
2567 if (delta >= -256 && delta < 256)
2568 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2569 plus_constant (Pmode, this_rtx, delta));
2570 else
2571 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2574 if (Pmode == ptr_mode)
2575 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2576 else
2577 aarch64_emit_move (temp0,
2578 gen_rtx_ZERO_EXTEND (Pmode,
2579 gen_rtx_MEM (ptr_mode, addr)));
2581 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2582 addr = plus_constant (Pmode, temp0, vcall_offset);
2583 else
2585 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2586 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2589 if (Pmode == ptr_mode)
2590 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2591 else
2592 aarch64_emit_move (temp1,
2593 gen_rtx_SIGN_EXTEND (Pmode,
2594 gen_rtx_MEM (ptr_mode, addr)));
2596 emit_insn (gen_add2_insn (this_rtx, temp1));
2599 /* Generate a tail call to the target function. */
2600 if (!TREE_USED (function))
2602 assemble_external (function);
2603 TREE_USED (function) = 1;
2605 funexp = XEXP (DECL_RTL (function), 0);
2606 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2607 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2608 SIBLING_CALL_P (insn) = 1;
2610 insn = get_insns ();
2611 shorten_branches (insn);
2612 final_start_function (insn, file, 1);
2613 final (insn, file, 1);
2614 final_end_function ();
2616 /* Stop pretending to be a post-reload pass. */
2617 reload_completed = 0;
2620 static int
2621 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2623 if (GET_CODE (*x) == SYMBOL_REF)
2624 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2626 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2627 TLS offsets, not real symbol references. */
2628 if (GET_CODE (*x) == UNSPEC
2629 && XINT (*x, 1) == UNSPEC_TLS)
2630 return -1;
2632 return 0;
2635 static bool
2636 aarch64_tls_referenced_p (rtx x)
2638 if (!TARGET_HAVE_TLS)
2639 return false;
2641 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2645 static int
2646 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2648 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2649 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2651 if (*imm1 < *imm2)
2652 return -1;
2653 if (*imm1 > *imm2)
2654 return +1;
2655 return 0;
2659 static void
2660 aarch64_build_bitmask_table (void)
2662 unsigned HOST_WIDE_INT mask, imm;
2663 unsigned int log_e, e, s, r;
2664 unsigned int nimms = 0;
2666 for (log_e = 1; log_e <= 6; log_e++)
2668 e = 1 << log_e;
2669 if (e == 64)
2670 mask = ~(HOST_WIDE_INT) 0;
2671 else
2672 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2673 for (s = 1; s < e; s++)
2675 for (r = 0; r < e; r++)
2677 /* set s consecutive bits to 1 (s < 64) */
2678 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2679 /* rotate right by r */
2680 if (r != 0)
2681 imm = ((imm >> r) | (imm << (e - r))) & mask;
2682 /* replicate the constant depending on SIMD size */
2683 switch (log_e) {
2684 case 1: imm |= (imm << 2);
2685 case 2: imm |= (imm << 4);
2686 case 3: imm |= (imm << 8);
2687 case 4: imm |= (imm << 16);
2688 case 5: imm |= (imm << 32);
2689 case 6:
2690 break;
2691 default:
2692 gcc_unreachable ();
2694 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2695 aarch64_bitmasks[nimms++] = imm;
2700 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2701 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2702 aarch64_bitmasks_cmp);
2706 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2707 a left shift of 0 or 12 bits. */
2708 bool
2709 aarch64_uimm12_shift (HOST_WIDE_INT val)
2711 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2712 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2717 /* Return true if val is an immediate that can be loaded into a
2718 register by a MOVZ instruction. */
2719 static bool
2720 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2722 if (GET_MODE_SIZE (mode) > 4)
2724 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2725 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2726 return 1;
2728 else
2730 /* Ignore sign extension. */
2731 val &= (HOST_WIDE_INT) 0xffffffff;
2733 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2734 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2738 /* Return true if val is a valid bitmask immediate. */
2739 bool
2740 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2742 if (GET_MODE_SIZE (mode) < 8)
2744 /* Replicate bit pattern. */
2745 val &= (HOST_WIDE_INT) 0xffffffff;
2746 val |= val << 32;
2748 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2749 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2753 /* Return true if val is an immediate that can be loaded into a
2754 register in a single instruction. */
2755 bool
2756 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2758 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2759 return 1;
2760 return aarch64_bitmask_imm (val, mode);
2763 static bool
2764 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2766 rtx base, offset;
2768 if (GET_CODE (x) == HIGH)
2769 return true;
2771 split_const (x, &base, &offset);
2772 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2774 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2775 != SYMBOL_FORCE_TO_MEM)
2776 return true;
2777 else
2778 /* Avoid generating a 64-bit relocation in ILP32; leave
2779 to aarch64_expand_mov_immediate to handle it properly. */
2780 return mode != ptr_mode;
2783 return aarch64_tls_referenced_p (x);
2786 /* Return true if register REGNO is a valid index register.
2787 STRICT_P is true if REG_OK_STRICT is in effect. */
2789 bool
2790 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2792 if (!HARD_REGISTER_NUM_P (regno))
2794 if (!strict_p)
2795 return true;
2797 if (!reg_renumber)
2798 return false;
2800 regno = reg_renumber[regno];
2802 return GP_REGNUM_P (regno);
2805 /* Return true if register REGNO is a valid base register for mode MODE.
2806 STRICT_P is true if REG_OK_STRICT is in effect. */
2808 bool
2809 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2811 if (!HARD_REGISTER_NUM_P (regno))
2813 if (!strict_p)
2814 return true;
2816 if (!reg_renumber)
2817 return false;
2819 regno = reg_renumber[regno];
2822 /* The fake registers will be eliminated to either the stack or
2823 hard frame pointer, both of which are usually valid base registers.
2824 Reload deals with the cases where the eliminated form isn't valid. */
2825 return (GP_REGNUM_P (regno)
2826 || regno == SP_REGNUM
2827 || regno == FRAME_POINTER_REGNUM
2828 || regno == ARG_POINTER_REGNUM);
2831 /* Return true if X is a valid base register for mode MODE.
2832 STRICT_P is true if REG_OK_STRICT is in effect. */
2834 static bool
2835 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2837 if (!strict_p && GET_CODE (x) == SUBREG)
2838 x = SUBREG_REG (x);
2840 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2843 /* Return true if address offset is a valid index. If it is, fill in INFO
2844 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2846 static bool
2847 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2848 enum machine_mode mode, bool strict_p)
2850 enum aarch64_address_type type;
2851 rtx index;
2852 int shift;
2854 /* (reg:P) */
2855 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2856 && GET_MODE (x) == Pmode)
2858 type = ADDRESS_REG_REG;
2859 index = x;
2860 shift = 0;
2862 /* (sign_extend:DI (reg:SI)) */
2863 else if ((GET_CODE (x) == SIGN_EXTEND
2864 || GET_CODE (x) == ZERO_EXTEND)
2865 && GET_MODE (x) == DImode
2866 && GET_MODE (XEXP (x, 0)) == SImode)
2868 type = (GET_CODE (x) == SIGN_EXTEND)
2869 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2870 index = XEXP (x, 0);
2871 shift = 0;
2873 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2874 else if (GET_CODE (x) == MULT
2875 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2876 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2877 && GET_MODE (XEXP (x, 0)) == DImode
2878 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2879 && CONST_INT_P (XEXP (x, 1)))
2881 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2882 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2883 index = XEXP (XEXP (x, 0), 0);
2884 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2886 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2887 else if (GET_CODE (x) == ASHIFT
2888 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2889 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2890 && GET_MODE (XEXP (x, 0)) == DImode
2891 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2892 && CONST_INT_P (XEXP (x, 1)))
2894 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2895 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2896 index = XEXP (XEXP (x, 0), 0);
2897 shift = INTVAL (XEXP (x, 1));
2899 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2900 else if ((GET_CODE (x) == SIGN_EXTRACT
2901 || GET_CODE (x) == ZERO_EXTRACT)
2902 && GET_MODE (x) == DImode
2903 && GET_CODE (XEXP (x, 0)) == MULT
2904 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2905 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2907 type = (GET_CODE (x) == SIGN_EXTRACT)
2908 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2909 index = XEXP (XEXP (x, 0), 0);
2910 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2911 if (INTVAL (XEXP (x, 1)) != 32 + shift
2912 || INTVAL (XEXP (x, 2)) != 0)
2913 shift = -1;
2915 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2916 (const_int 0xffffffff<<shift)) */
2917 else if (GET_CODE (x) == AND
2918 && GET_MODE (x) == DImode
2919 && GET_CODE (XEXP (x, 0)) == MULT
2920 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2921 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2922 && CONST_INT_P (XEXP (x, 1)))
2924 type = ADDRESS_REG_UXTW;
2925 index = XEXP (XEXP (x, 0), 0);
2926 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2927 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2928 shift = -1;
2930 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2931 else if ((GET_CODE (x) == SIGN_EXTRACT
2932 || GET_CODE (x) == ZERO_EXTRACT)
2933 && GET_MODE (x) == DImode
2934 && GET_CODE (XEXP (x, 0)) == ASHIFT
2935 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2936 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2938 type = (GET_CODE (x) == SIGN_EXTRACT)
2939 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2940 index = XEXP (XEXP (x, 0), 0);
2941 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2942 if (INTVAL (XEXP (x, 1)) != 32 + shift
2943 || INTVAL (XEXP (x, 2)) != 0)
2944 shift = -1;
2946 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2947 (const_int 0xffffffff<<shift)) */
2948 else if (GET_CODE (x) == AND
2949 && GET_MODE (x) == DImode
2950 && GET_CODE (XEXP (x, 0)) == ASHIFT
2951 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2952 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2953 && CONST_INT_P (XEXP (x, 1)))
2955 type = ADDRESS_REG_UXTW;
2956 index = XEXP (XEXP (x, 0), 0);
2957 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2958 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2959 shift = -1;
2961 /* (mult:P (reg:P) (const_int scale)) */
2962 else if (GET_CODE (x) == MULT
2963 && GET_MODE (x) == Pmode
2964 && GET_MODE (XEXP (x, 0)) == Pmode
2965 && CONST_INT_P (XEXP (x, 1)))
2967 type = ADDRESS_REG_REG;
2968 index = XEXP (x, 0);
2969 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2971 /* (ashift:P (reg:P) (const_int shift)) */
2972 else if (GET_CODE (x) == ASHIFT
2973 && GET_MODE (x) == Pmode
2974 && GET_MODE (XEXP (x, 0)) == Pmode
2975 && CONST_INT_P (XEXP (x, 1)))
2977 type = ADDRESS_REG_REG;
2978 index = XEXP (x, 0);
2979 shift = INTVAL (XEXP (x, 1));
2981 else
2982 return false;
2984 if (GET_CODE (index) == SUBREG)
2985 index = SUBREG_REG (index);
2987 if ((shift == 0 ||
2988 (shift > 0 && shift <= 3
2989 && (1 << shift) == GET_MODE_SIZE (mode)))
2990 && REG_P (index)
2991 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2993 info->type = type;
2994 info->offset = index;
2995 info->shift = shift;
2996 return true;
2999 return false;
3002 static inline bool
3003 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3005 return (offset >= -64 * GET_MODE_SIZE (mode)
3006 && offset < 64 * GET_MODE_SIZE (mode)
3007 && offset % GET_MODE_SIZE (mode) == 0);
3010 static inline bool
3011 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3012 HOST_WIDE_INT offset)
3014 return offset >= -256 && offset < 256;
3017 static inline bool
3018 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3020 return (offset >= 0
3021 && offset < 4096 * GET_MODE_SIZE (mode)
3022 && offset % GET_MODE_SIZE (mode) == 0);
3025 /* Return true if X is a valid address for machine mode MODE. If it is,
3026 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3027 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3029 static bool
3030 aarch64_classify_address (struct aarch64_address_info *info,
3031 rtx x, enum machine_mode mode,
3032 RTX_CODE outer_code, bool strict_p)
3034 enum rtx_code code = GET_CODE (x);
3035 rtx op0, op1;
3036 bool allow_reg_index_p =
3037 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3039 /* Don't support anything other than POST_INC or REG addressing for
3040 AdvSIMD. */
3041 if (aarch64_vector_mode_p (mode)
3042 && (code != POST_INC && code != REG))
3043 return false;
3045 switch (code)
3047 case REG:
3048 case SUBREG:
3049 info->type = ADDRESS_REG_IMM;
3050 info->base = x;
3051 info->offset = const0_rtx;
3052 return aarch64_base_register_rtx_p (x, strict_p);
3054 case PLUS:
3055 op0 = XEXP (x, 0);
3056 op1 = XEXP (x, 1);
3057 if (GET_MODE_SIZE (mode) != 0
3058 && CONST_INT_P (op1)
3059 && aarch64_base_register_rtx_p (op0, strict_p))
3061 HOST_WIDE_INT offset = INTVAL (op1);
3063 info->type = ADDRESS_REG_IMM;
3064 info->base = op0;
3065 info->offset = op1;
3067 /* TImode and TFmode values are allowed in both pairs of X
3068 registers and individual Q registers. The available
3069 address modes are:
3070 X,X: 7-bit signed scaled offset
3071 Q: 9-bit signed offset
3072 We conservatively require an offset representable in either mode.
3074 if (mode == TImode || mode == TFmode)
3075 return (offset_7bit_signed_scaled_p (mode, offset)
3076 && offset_9bit_signed_unscaled_p (mode, offset));
3078 if (outer_code == PARALLEL)
3079 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3080 && offset_7bit_signed_scaled_p (mode, offset));
3081 else
3082 return (offset_9bit_signed_unscaled_p (mode, offset)
3083 || offset_12bit_unsigned_scaled_p (mode, offset));
3086 if (allow_reg_index_p)
3088 /* Look for base + (scaled/extended) index register. */
3089 if (aarch64_base_register_rtx_p (op0, strict_p)
3090 && aarch64_classify_index (info, op1, mode, strict_p))
3092 info->base = op0;
3093 return true;
3095 if (aarch64_base_register_rtx_p (op1, strict_p)
3096 && aarch64_classify_index (info, op0, mode, strict_p))
3098 info->base = op1;
3099 return true;
3103 return false;
3105 case POST_INC:
3106 case POST_DEC:
3107 case PRE_INC:
3108 case PRE_DEC:
3109 info->type = ADDRESS_REG_WB;
3110 info->base = XEXP (x, 0);
3111 info->offset = NULL_RTX;
3112 return aarch64_base_register_rtx_p (info->base, strict_p);
3114 case POST_MODIFY:
3115 case PRE_MODIFY:
3116 info->type = ADDRESS_REG_WB;
3117 info->base = XEXP (x, 0);
3118 if (GET_CODE (XEXP (x, 1)) == PLUS
3119 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3120 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3121 && aarch64_base_register_rtx_p (info->base, strict_p))
3123 HOST_WIDE_INT offset;
3124 info->offset = XEXP (XEXP (x, 1), 1);
3125 offset = INTVAL (info->offset);
3127 /* TImode and TFmode values are allowed in both pairs of X
3128 registers and individual Q registers. The available
3129 address modes are:
3130 X,X: 7-bit signed scaled offset
3131 Q: 9-bit signed offset
3132 We conservatively require an offset representable in either mode.
3134 if (mode == TImode || mode == TFmode)
3135 return (offset_7bit_signed_scaled_p (mode, offset)
3136 && offset_9bit_signed_unscaled_p (mode, offset));
3138 if (outer_code == PARALLEL)
3139 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3140 && offset_7bit_signed_scaled_p (mode, offset));
3141 else
3142 return offset_9bit_signed_unscaled_p (mode, offset);
3144 return false;
3146 case CONST:
3147 case SYMBOL_REF:
3148 case LABEL_REF:
3149 /* load literal: pc-relative constant pool entry. Only supported
3150 for SI mode or larger. */
3151 info->type = ADDRESS_SYMBOLIC;
3152 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3154 rtx sym, addend;
3156 split_const (x, &sym, &addend);
3157 return (GET_CODE (sym) == LABEL_REF
3158 || (GET_CODE (sym) == SYMBOL_REF
3159 && CONSTANT_POOL_ADDRESS_P (sym)));
3161 return false;
3163 case LO_SUM:
3164 info->type = ADDRESS_LO_SUM;
3165 info->base = XEXP (x, 0);
3166 info->offset = XEXP (x, 1);
3167 if (allow_reg_index_p
3168 && aarch64_base_register_rtx_p (info->base, strict_p))
3170 rtx sym, offs;
3171 split_const (info->offset, &sym, &offs);
3172 if (GET_CODE (sym) == SYMBOL_REF
3173 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3174 == SYMBOL_SMALL_ABSOLUTE))
3176 /* The symbol and offset must be aligned to the access size. */
3177 unsigned int align;
3178 unsigned int ref_size;
3180 if (CONSTANT_POOL_ADDRESS_P (sym))
3181 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3182 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3184 tree exp = SYMBOL_REF_DECL (sym);
3185 align = TYPE_ALIGN (TREE_TYPE (exp));
3186 align = CONSTANT_ALIGNMENT (exp, align);
3188 else if (SYMBOL_REF_DECL (sym))
3189 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3190 else
3191 align = BITS_PER_UNIT;
3193 ref_size = GET_MODE_SIZE (mode);
3194 if (ref_size == 0)
3195 ref_size = GET_MODE_SIZE (DImode);
3197 return ((INTVAL (offs) & (ref_size - 1)) == 0
3198 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3201 return false;
3203 default:
3204 return false;
3208 bool
3209 aarch64_symbolic_address_p (rtx x)
3211 rtx offset;
3213 split_const (x, &x, &offset);
3214 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3217 /* Classify the base of symbolic expression X, given that X appears in
3218 context CONTEXT. */
3220 enum aarch64_symbol_type
3221 aarch64_classify_symbolic_expression (rtx x,
3222 enum aarch64_symbol_context context)
3224 rtx offset;
3226 split_const (x, &x, &offset);
3227 return aarch64_classify_symbol (x, context);
3231 /* Return TRUE if X is a legitimate address for accessing memory in
3232 mode MODE. */
3233 static bool
3234 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3236 struct aarch64_address_info addr;
3238 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3241 /* Return TRUE if X is a legitimate address for accessing memory in
3242 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3243 pair operation. */
3244 bool
3245 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3246 RTX_CODE outer_code, bool strict_p)
3248 struct aarch64_address_info addr;
3250 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3253 /* Return TRUE if rtx X is immediate constant 0.0 */
3254 bool
3255 aarch64_float_const_zero_rtx_p (rtx x)
3257 REAL_VALUE_TYPE r;
3259 if (GET_MODE (x) == VOIDmode)
3260 return false;
3262 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3263 if (REAL_VALUE_MINUS_ZERO (r))
3264 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3265 return REAL_VALUES_EQUAL (r, dconst0);
3268 /* Return the fixed registers used for condition codes. */
3270 static bool
3271 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3273 *p1 = CC_REGNUM;
3274 *p2 = INVALID_REGNUM;
3275 return true;
3278 enum machine_mode
3279 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3281 /* All floating point compares return CCFP if it is an equality
3282 comparison, and CCFPE otherwise. */
3283 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3285 switch (code)
3287 case EQ:
3288 case NE:
3289 case UNORDERED:
3290 case ORDERED:
3291 case UNLT:
3292 case UNLE:
3293 case UNGT:
3294 case UNGE:
3295 case UNEQ:
3296 case LTGT:
3297 return CCFPmode;
3299 case LT:
3300 case LE:
3301 case GT:
3302 case GE:
3303 return CCFPEmode;
3305 default:
3306 gcc_unreachable ();
3310 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3311 && y == const0_rtx
3312 && (code == EQ || code == NE || code == LT || code == GE)
3313 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3314 || GET_CODE (x) == NEG))
3315 return CC_NZmode;
3317 /* A compare with a shifted or negated operand. Because of canonicalization,
3318 the comparison will have to be swapped when we emit the assembly
3319 code. */
3320 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3321 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3322 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3323 || GET_CODE (x) == LSHIFTRT
3324 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND
3325 || GET_CODE (x) == NEG))
3326 return CC_SWPmode;
3328 /* A compare of a mode narrower than SI mode against zero can be done
3329 by extending the value in the comparison. */
3330 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3331 && y == const0_rtx)
3332 /* Only use sign-extension if we really need it. */
3333 return ((code == GT || code == GE || code == LE || code == LT)
3334 ? CC_SESWPmode : CC_ZESWPmode);
3336 /* For everything else, return CCmode. */
3337 return CCmode;
3340 static unsigned
3341 aarch64_get_condition_code (rtx x)
3343 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3344 enum rtx_code comp_code = GET_CODE (x);
3346 if (GET_MODE_CLASS (mode) != MODE_CC)
3347 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3349 switch (mode)
3351 case CCFPmode:
3352 case CCFPEmode:
3353 switch (comp_code)
3355 case GE: return AARCH64_GE;
3356 case GT: return AARCH64_GT;
3357 case LE: return AARCH64_LS;
3358 case LT: return AARCH64_MI;
3359 case NE: return AARCH64_NE;
3360 case EQ: return AARCH64_EQ;
3361 case ORDERED: return AARCH64_VC;
3362 case UNORDERED: return AARCH64_VS;
3363 case UNLT: return AARCH64_LT;
3364 case UNLE: return AARCH64_LE;
3365 case UNGT: return AARCH64_HI;
3366 case UNGE: return AARCH64_PL;
3367 default: gcc_unreachable ();
3369 break;
3371 case CCmode:
3372 switch (comp_code)
3374 case NE: return AARCH64_NE;
3375 case EQ: return AARCH64_EQ;
3376 case GE: return AARCH64_GE;
3377 case GT: return AARCH64_GT;
3378 case LE: return AARCH64_LE;
3379 case LT: return AARCH64_LT;
3380 case GEU: return AARCH64_CS;
3381 case GTU: return AARCH64_HI;
3382 case LEU: return AARCH64_LS;
3383 case LTU: return AARCH64_CC;
3384 default: gcc_unreachable ();
3386 break;
3388 case CC_SWPmode:
3389 case CC_ZESWPmode:
3390 case CC_SESWPmode:
3391 switch (comp_code)
3393 case NE: return AARCH64_NE;
3394 case EQ: return AARCH64_EQ;
3395 case GE: return AARCH64_LE;
3396 case GT: return AARCH64_LT;
3397 case LE: return AARCH64_GE;
3398 case LT: return AARCH64_GT;
3399 case GEU: return AARCH64_LS;
3400 case GTU: return AARCH64_CC;
3401 case LEU: return AARCH64_CS;
3402 case LTU: return AARCH64_HI;
3403 default: gcc_unreachable ();
3405 break;
3407 case CC_NZmode:
3408 switch (comp_code)
3410 case NE: return AARCH64_NE;
3411 case EQ: return AARCH64_EQ;
3412 case GE: return AARCH64_PL;
3413 case LT: return AARCH64_MI;
3414 default: gcc_unreachable ();
3416 break;
3418 default:
3419 gcc_unreachable ();
3420 break;
3424 static unsigned
3425 bit_count (unsigned HOST_WIDE_INT value)
3427 unsigned count = 0;
3429 while (value)
3431 count++;
3432 value &= value - 1;
3435 return count;
3438 void
3439 aarch64_print_operand (FILE *f, rtx x, char code)
3441 switch (code)
3443 case 'e':
3444 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3446 int n;
3448 if (GET_CODE (x) != CONST_INT
3449 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3451 output_operand_lossage ("invalid operand for '%%%c'", code);
3452 return;
3455 switch (n)
3457 case 3:
3458 fputc ('b', f);
3459 break;
3460 case 4:
3461 fputc ('h', f);
3462 break;
3463 case 5:
3464 fputc ('w', f);
3465 break;
3466 default:
3467 output_operand_lossage ("invalid operand for '%%%c'", code);
3468 return;
3471 break;
3473 case 'p':
3475 int n;
3477 /* Print N such that 2^N == X. */
3478 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3480 output_operand_lossage ("invalid operand for '%%%c'", code);
3481 return;
3484 asm_fprintf (f, "%d", n);
3486 break;
3488 case 'P':
3489 /* Print the number of non-zero bits in X (a const_int). */
3490 if (GET_CODE (x) != CONST_INT)
3492 output_operand_lossage ("invalid operand for '%%%c'", code);
3493 return;
3496 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3497 break;
3499 case 'H':
3500 /* Print the higher numbered register of a pair (TImode) of regs. */
3501 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3503 output_operand_lossage ("invalid operand for '%%%c'", code);
3504 return;
3507 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3508 break;
3510 case 'm':
3511 /* Print a condition (eq, ne, etc). */
3513 /* CONST_TRUE_RTX means always -- that's the default. */
3514 if (x == const_true_rtx)
3515 return;
3517 if (!COMPARISON_P (x))
3519 output_operand_lossage ("invalid operand for '%%%c'", code);
3520 return;
3523 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3524 break;
3526 case 'M':
3527 /* Print the inverse of a condition (eq <-> ne, etc). */
3529 /* CONST_TRUE_RTX means never -- that's the default. */
3530 if (x == const_true_rtx)
3532 fputs ("nv", f);
3533 return;
3536 if (!COMPARISON_P (x))
3538 output_operand_lossage ("invalid operand for '%%%c'", code);
3539 return;
3542 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3543 (aarch64_get_condition_code (x))], f);
3544 break;
3546 case 'b':
3547 case 'h':
3548 case 's':
3549 case 'd':
3550 case 'q':
3551 /* Print a scalar FP/SIMD register name. */
3552 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3554 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3555 return;
3557 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3558 break;
3560 case 'S':
3561 case 'T':
3562 case 'U':
3563 case 'V':
3564 /* Print the first FP/SIMD register name in a list. */
3565 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3567 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3568 return;
3570 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3571 break;
3573 case 'X':
3574 /* Print bottom 16 bits of integer constant in hex. */
3575 if (GET_CODE (x) != CONST_INT)
3577 output_operand_lossage ("invalid operand for '%%%c'", code);
3578 return;
3580 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3581 break;
3583 case 'w':
3584 case 'x':
3585 /* Print a general register name or the zero register (32-bit or
3586 64-bit). */
3587 if (x == const0_rtx
3588 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3590 asm_fprintf (f, "%czr", code);
3591 break;
3594 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3596 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3597 break;
3600 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3602 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3603 break;
3606 /* Fall through */
3608 case 0:
3609 /* Print a normal operand, if it's a general register, then we
3610 assume DImode. */
3611 if (x == NULL)
3613 output_operand_lossage ("missing operand");
3614 return;
3617 switch (GET_CODE (x))
3619 case REG:
3620 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3621 break;
3623 case MEM:
3624 aarch64_memory_reference_mode = GET_MODE (x);
3625 output_address (XEXP (x, 0));
3626 break;
3628 case LABEL_REF:
3629 case SYMBOL_REF:
3630 output_addr_const (asm_out_file, x);
3631 break;
3633 case CONST_INT:
3634 asm_fprintf (f, "%wd", INTVAL (x));
3635 break;
3637 case CONST_VECTOR:
3638 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3640 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3641 HOST_WIDE_INT_MIN,
3642 HOST_WIDE_INT_MAX));
3643 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3645 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3647 fputc ('0', f);
3649 else
3650 gcc_unreachable ();
3651 break;
3653 case CONST_DOUBLE:
3654 /* CONST_DOUBLE can represent a double-width integer.
3655 In this case, the mode of x is VOIDmode. */
3656 if (GET_MODE (x) == VOIDmode)
3657 ; /* Do Nothing. */
3658 else if (aarch64_float_const_zero_rtx_p (x))
3660 fputc ('0', f);
3661 break;
3663 else if (aarch64_float_const_representable_p (x))
3665 #define buf_size 20
3666 char float_buf[buf_size] = {'\0'};
3667 REAL_VALUE_TYPE r;
3668 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3669 real_to_decimal_for_mode (float_buf, &r,
3670 buf_size, buf_size,
3671 1, GET_MODE (x));
3672 asm_fprintf (asm_out_file, "%s", float_buf);
3673 break;
3674 #undef buf_size
3676 output_operand_lossage ("invalid constant");
3677 return;
3678 default:
3679 output_operand_lossage ("invalid operand");
3680 return;
3682 break;
3684 case 'A':
3685 if (GET_CODE (x) == HIGH)
3686 x = XEXP (x, 0);
3688 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3690 case SYMBOL_SMALL_GOT:
3691 asm_fprintf (asm_out_file, ":got:");
3692 break;
3694 case SYMBOL_SMALL_TLSGD:
3695 asm_fprintf (asm_out_file, ":tlsgd:");
3696 break;
3698 case SYMBOL_SMALL_TLSDESC:
3699 asm_fprintf (asm_out_file, ":tlsdesc:");
3700 break;
3702 case SYMBOL_SMALL_GOTTPREL:
3703 asm_fprintf (asm_out_file, ":gottprel:");
3704 break;
3706 case SYMBOL_SMALL_TPREL:
3707 asm_fprintf (asm_out_file, ":tprel:");
3708 break;
3710 case SYMBOL_TINY_GOT:
3711 gcc_unreachable ();
3712 break;
3714 default:
3715 break;
3717 output_addr_const (asm_out_file, x);
3718 break;
3720 case 'L':
3721 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3723 case SYMBOL_SMALL_GOT:
3724 asm_fprintf (asm_out_file, ":lo12:");
3725 break;
3727 case SYMBOL_SMALL_TLSGD:
3728 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3729 break;
3731 case SYMBOL_SMALL_TLSDESC:
3732 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3733 break;
3735 case SYMBOL_SMALL_GOTTPREL:
3736 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3737 break;
3739 case SYMBOL_SMALL_TPREL:
3740 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3741 break;
3743 case SYMBOL_TINY_GOT:
3744 asm_fprintf (asm_out_file, ":got:");
3745 break;
3747 default:
3748 break;
3750 output_addr_const (asm_out_file, x);
3751 break;
3753 case 'G':
3755 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3757 case SYMBOL_SMALL_TPREL:
3758 asm_fprintf (asm_out_file, ":tprel_hi12:");
3759 break;
3760 default:
3761 break;
3763 output_addr_const (asm_out_file, x);
3764 break;
3766 default:
3767 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3768 return;
3772 void
3773 aarch64_print_operand_address (FILE *f, rtx x)
3775 struct aarch64_address_info addr;
3777 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3778 MEM, true))
3779 switch (addr.type)
3781 case ADDRESS_REG_IMM:
3782 if (addr.offset == const0_rtx)
3783 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3784 else
3785 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3786 INTVAL (addr.offset));
3787 return;
3789 case ADDRESS_REG_REG:
3790 if (addr.shift == 0)
3791 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3792 reg_names [REGNO (addr.offset)]);
3793 else
3794 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3795 reg_names [REGNO (addr.offset)], addr.shift);
3796 return;
3798 case ADDRESS_REG_UXTW:
3799 if (addr.shift == 0)
3800 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3801 REGNO (addr.offset) - R0_REGNUM);
3802 else
3803 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3804 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3805 return;
3807 case ADDRESS_REG_SXTW:
3808 if (addr.shift == 0)
3809 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3810 REGNO (addr.offset) - R0_REGNUM);
3811 else
3812 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3813 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3814 return;
3816 case ADDRESS_REG_WB:
3817 switch (GET_CODE (x))
3819 case PRE_INC:
3820 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3821 GET_MODE_SIZE (aarch64_memory_reference_mode));
3822 return;
3823 case POST_INC:
3824 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3825 GET_MODE_SIZE (aarch64_memory_reference_mode));
3826 return;
3827 case PRE_DEC:
3828 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3829 GET_MODE_SIZE (aarch64_memory_reference_mode));
3830 return;
3831 case POST_DEC:
3832 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3833 GET_MODE_SIZE (aarch64_memory_reference_mode));
3834 return;
3835 case PRE_MODIFY:
3836 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3837 INTVAL (addr.offset));
3838 return;
3839 case POST_MODIFY:
3840 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3841 INTVAL (addr.offset));
3842 return;
3843 default:
3844 break;
3846 break;
3848 case ADDRESS_LO_SUM:
3849 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3850 output_addr_const (f, addr.offset);
3851 asm_fprintf (f, "]");
3852 return;
3854 case ADDRESS_SYMBOLIC:
3855 break;
3858 output_addr_const (f, x);
3861 bool
3862 aarch64_label_mentioned_p (rtx x)
3864 const char *fmt;
3865 int i;
3867 if (GET_CODE (x) == LABEL_REF)
3868 return true;
3870 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3871 referencing instruction, but they are constant offsets, not
3872 symbols. */
3873 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3874 return false;
3876 fmt = GET_RTX_FORMAT (GET_CODE (x));
3877 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3879 if (fmt[i] == 'E')
3881 int j;
3883 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3884 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3885 return 1;
3887 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3888 return 1;
3891 return 0;
3894 /* Implement REGNO_REG_CLASS. */
3896 enum reg_class
3897 aarch64_regno_regclass (unsigned regno)
3899 if (GP_REGNUM_P (regno))
3900 return CORE_REGS;
3902 if (regno == SP_REGNUM)
3903 return STACK_REG;
3905 if (regno == FRAME_POINTER_REGNUM
3906 || regno == ARG_POINTER_REGNUM)
3907 return CORE_REGS;
3909 if (FP_REGNUM_P (regno))
3910 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3912 return NO_REGS;
3915 /* Try a machine-dependent way of reloading an illegitimate address
3916 operand. If we find one, push the reload and return the new rtx. */
3919 aarch64_legitimize_reload_address (rtx *x_p,
3920 enum machine_mode mode,
3921 int opnum, int type,
3922 int ind_levels ATTRIBUTE_UNUSED)
3924 rtx x = *x_p;
3926 /* Do not allow mem (plus (reg, const)) if vector mode. */
3927 if (aarch64_vector_mode_p (mode)
3928 && GET_CODE (x) == PLUS
3929 && REG_P (XEXP (x, 0))
3930 && CONST_INT_P (XEXP (x, 1)))
3932 rtx orig_rtx = x;
3933 x = copy_rtx (x);
3934 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3935 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3936 opnum, (enum reload_type) type);
3937 return x;
3940 /* We must recognize output that we have already generated ourselves. */
3941 if (GET_CODE (x) == PLUS
3942 && GET_CODE (XEXP (x, 0)) == PLUS
3943 && REG_P (XEXP (XEXP (x, 0), 0))
3944 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3945 && CONST_INT_P (XEXP (x, 1)))
3947 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3948 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3949 opnum, (enum reload_type) type);
3950 return x;
3953 /* We wish to handle large displacements off a base register by splitting
3954 the addend across an add and the mem insn. This can cut the number of
3955 extra insns needed from 3 to 1. It is only useful for load/store of a
3956 single register with 12 bit offset field. */
3957 if (GET_CODE (x) == PLUS
3958 && REG_P (XEXP (x, 0))
3959 && CONST_INT_P (XEXP (x, 1))
3960 && HARD_REGISTER_P (XEXP (x, 0))
3961 && mode != TImode
3962 && mode != TFmode
3963 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3965 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3966 HOST_WIDE_INT low = val & 0xfff;
3967 HOST_WIDE_INT high = val - low;
3968 HOST_WIDE_INT offs;
3969 rtx cst;
3970 enum machine_mode xmode = GET_MODE (x);
3972 /* In ILP32, xmode can be either DImode or SImode. */
3973 gcc_assert (xmode == DImode || xmode == SImode);
3975 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3976 BLKmode alignment. */
3977 if (GET_MODE_SIZE (mode) == 0)
3978 return NULL_RTX;
3980 offs = low % GET_MODE_SIZE (mode);
3982 /* Align misaligned offset by adjusting high part to compensate. */
3983 if (offs != 0)
3985 if (aarch64_uimm12_shift (high + offs))
3987 /* Align down. */
3988 low = low - offs;
3989 high = high + offs;
3991 else
3993 /* Align up. */
3994 offs = GET_MODE_SIZE (mode) - offs;
3995 low = low + offs;
3996 high = high + (low & 0x1000) - offs;
3997 low &= 0xfff;
4001 /* Check for overflow. */
4002 if (high + low != val)
4003 return NULL_RTX;
4005 cst = GEN_INT (high);
4006 if (!aarch64_uimm12_shift (high))
4007 cst = force_const_mem (xmode, cst);
4009 /* Reload high part into base reg, leaving the low part
4010 in the mem instruction. */
4011 x = gen_rtx_PLUS (xmode,
4012 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4013 GEN_INT (low));
4015 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4016 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4017 opnum, (enum reload_type) type);
4018 return x;
4021 return NULL_RTX;
4025 static reg_class_t
4026 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4027 reg_class_t rclass,
4028 enum machine_mode mode,
4029 secondary_reload_info *sri)
4031 /* Without the TARGET_SIMD instructions we cannot move a Q register
4032 to a Q register directly. We need a scratch. */
4033 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4034 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4035 && reg_class_subset_p (rclass, FP_REGS))
4037 if (mode == TFmode)
4038 sri->icode = CODE_FOR_aarch64_reload_movtf;
4039 else if (mode == TImode)
4040 sri->icode = CODE_FOR_aarch64_reload_movti;
4041 return NO_REGS;
4044 /* A TFmode or TImode memory access should be handled via an FP_REGS
4045 because AArch64 has richer addressing modes for LDR/STR instructions
4046 than LDP/STP instructions. */
4047 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4048 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4049 return FP_REGS;
4051 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4052 return CORE_REGS;
4054 return NO_REGS;
4057 static bool
4058 aarch64_can_eliminate (const int from, const int to)
4060 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4061 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4063 if (frame_pointer_needed)
4065 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4066 return true;
4067 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4068 return false;
4069 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4070 && !cfun->calls_alloca)
4071 return true;
4072 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4073 return true;
4074 return false;
4076 else
4078 /* If we decided that we didn't need a leaf frame pointer but then used
4079 LR in the function, then we'll want a frame pointer after all, so
4080 prevent this elimination to ensure a frame pointer is used.
4082 NOTE: the original value of flag_omit_frame_pointer gets trashed
4083 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4084 of faked_omit_frame_pointer here (which is true when we always
4085 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4086 pointers when LR is clobbered). */
4087 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4088 && df_regs_ever_live_p (LR_REGNUM)
4089 && faked_omit_frame_pointer)
4090 return false;
4093 return true;
4096 HOST_WIDE_INT
4097 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4099 HOST_WIDE_INT frame_size;
4100 HOST_WIDE_INT offset;
4102 aarch64_layout_frame ();
4103 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4104 + crtl->outgoing_args_size
4105 + cfun->machine->saved_varargs_size);
4107 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4108 offset = frame_size;
4110 if (to == HARD_FRAME_POINTER_REGNUM)
4112 if (from == ARG_POINTER_REGNUM)
4113 return offset - crtl->outgoing_args_size;
4115 if (from == FRAME_POINTER_REGNUM)
4116 return cfun->machine->frame.saved_regs_size;
4119 if (to == STACK_POINTER_REGNUM)
4121 if (from == FRAME_POINTER_REGNUM)
4123 HOST_WIDE_INT elim = crtl->outgoing_args_size
4124 + cfun->machine->frame.saved_regs_size
4125 - cfun->machine->frame.fp_lr_offset;
4126 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4127 return elim;
4131 return offset;
4135 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4136 previous frame. */
4139 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4141 if (count != 0)
4142 return const0_rtx;
4143 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4147 static void
4148 aarch64_asm_trampoline_template (FILE *f)
4150 if (TARGET_ILP32)
4152 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4153 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4155 else
4157 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4158 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4160 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4161 assemble_aligned_integer (4, const0_rtx);
4162 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4163 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4166 static void
4167 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4169 rtx fnaddr, mem, a_tramp;
4170 const int tramp_code_sz = 16;
4172 /* Don't need to copy the trailing D-words, we fill those in below. */
4173 emit_block_move (m_tramp, assemble_trampoline_template (),
4174 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4175 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4176 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4177 if (GET_MODE (fnaddr) != ptr_mode)
4178 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4179 emit_move_insn (mem, fnaddr);
4181 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4182 emit_move_insn (mem, chain_value);
4184 /* XXX We should really define a "clear_cache" pattern and use
4185 gen_clear_cache(). */
4186 a_tramp = XEXP (m_tramp, 0);
4187 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4188 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4189 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4190 ptr_mode);
4193 static unsigned char
4194 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4196 switch (regclass)
4198 case CORE_REGS:
4199 case POINTER_REGS:
4200 case GENERAL_REGS:
4201 case ALL_REGS:
4202 case FP_REGS:
4203 case FP_LO_REGS:
4204 return
4205 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4206 (GET_MODE_SIZE (mode) + 7) / 8;
4207 case STACK_REG:
4208 return 1;
4210 case NO_REGS:
4211 return 0;
4213 default:
4214 break;
4216 gcc_unreachable ();
4219 static reg_class_t
4220 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4222 if (regclass == POINTER_REGS || regclass == STACK_REG)
4223 return GENERAL_REGS;
4225 /* If it's an integer immediate that MOVI can't handle, then
4226 FP_REGS is not an option, so we return NO_REGS instead. */
4227 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4228 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4229 return NO_REGS;
4231 return regclass;
4234 void
4235 aarch64_asm_output_labelref (FILE* f, const char *name)
4237 asm_fprintf (f, "%U%s", name);
4240 static void
4241 aarch64_elf_asm_constructor (rtx symbol, int priority)
4243 if (priority == DEFAULT_INIT_PRIORITY)
4244 default_ctor_section_asm_out_constructor (symbol, priority);
4245 else
4247 section *s;
4248 char buf[18];
4249 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4250 s = get_section (buf, SECTION_WRITE, NULL);
4251 switch_to_section (s);
4252 assemble_align (POINTER_SIZE);
4253 assemble_aligned_integer (POINTER_BYTES, symbol);
4257 static void
4258 aarch64_elf_asm_destructor (rtx symbol, int priority)
4260 if (priority == DEFAULT_INIT_PRIORITY)
4261 default_dtor_section_asm_out_destructor (symbol, priority);
4262 else
4264 section *s;
4265 char buf[18];
4266 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4267 s = get_section (buf, SECTION_WRITE, NULL);
4268 switch_to_section (s);
4269 assemble_align (POINTER_SIZE);
4270 assemble_aligned_integer (POINTER_BYTES, symbol);
4274 const char*
4275 aarch64_output_casesi (rtx *operands)
4277 char buf[100];
4278 char label[100];
4279 rtx diff_vec = PATTERN (next_active_insn (operands[2]));
4280 int index;
4281 static const char *const patterns[4][2] =
4284 "ldrb\t%w3, [%0,%w1,uxtw]",
4285 "add\t%3, %4, %w3, sxtb #2"
4288 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4289 "add\t%3, %4, %w3, sxth #2"
4292 "ldr\t%w3, [%0,%w1,uxtw #2]",
4293 "add\t%3, %4, %w3, sxtw #2"
4295 /* We assume that DImode is only generated when not optimizing and
4296 that we don't really need 64-bit address offsets. That would
4297 imply an object file with 8GB of code in a single function! */
4299 "ldr\t%w3, [%0,%w1,uxtw #2]",
4300 "add\t%3, %4, %w3, sxtw #2"
4304 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4306 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4308 gcc_assert (index >= 0 && index <= 3);
4310 /* Need to implement table size reduction, by chaning the code below. */
4311 output_asm_insn (patterns[index][0], operands);
4312 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4313 snprintf (buf, sizeof (buf),
4314 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4315 output_asm_insn (buf, operands);
4316 output_asm_insn (patterns[index][1], operands);
4317 output_asm_insn ("br\t%3", operands);
4318 assemble_label (asm_out_file, label);
4319 return "";
4323 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4324 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4325 operator. */
4328 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4330 if (shift >= 0 && shift <= 3)
4332 int size;
4333 for (size = 8; size <= 32; size *= 2)
4335 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4336 if (mask == bits << shift)
4337 return size;
4340 return 0;
4343 static bool
4344 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4345 const_rtx x ATTRIBUTE_UNUSED)
4347 /* We can't use blocks for constants when we're using a per-function
4348 constant pool. */
4349 return false;
4352 static section *
4353 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4354 rtx x ATTRIBUTE_UNUSED,
4355 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4357 /* Force all constant pool entries into the current function section. */
4358 return function_section (current_function_decl);
4362 /* Costs. */
4364 /* Helper function for rtx cost calculation. Strip a shift expression
4365 from X. Returns the inner operand if successful, or the original
4366 expression on failure. */
4367 static rtx
4368 aarch64_strip_shift (rtx x)
4370 rtx op = x;
4372 if ((GET_CODE (op) == ASHIFT
4373 || GET_CODE (op) == ASHIFTRT
4374 || GET_CODE (op) == LSHIFTRT)
4375 && CONST_INT_P (XEXP (op, 1)))
4376 return XEXP (op, 0);
4378 if (GET_CODE (op) == MULT
4379 && CONST_INT_P (XEXP (op, 1))
4380 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4381 return XEXP (op, 0);
4383 return x;
4386 /* Helper function for rtx cost calculation. Strip a shift or extend
4387 expression from X. Returns the inner operand if successful, or the
4388 original expression on failure. We deal with a number of possible
4389 canonicalization variations here. */
4390 static rtx
4391 aarch64_strip_shift_or_extend (rtx x)
4393 rtx op = x;
4395 /* Zero and sign extraction of a widened value. */
4396 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4397 && XEXP (op, 2) == const0_rtx
4398 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4399 XEXP (op, 1)))
4400 return XEXP (XEXP (op, 0), 0);
4402 /* It can also be represented (for zero-extend) as an AND with an
4403 immediate. */
4404 if (GET_CODE (op) == AND
4405 && GET_CODE (XEXP (op, 0)) == MULT
4406 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4407 && CONST_INT_P (XEXP (op, 1))
4408 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4409 INTVAL (XEXP (op, 1))) != 0)
4410 return XEXP (XEXP (op, 0), 0);
4412 /* Now handle extended register, as this may also have an optional
4413 left shift by 1..4. */
4414 if (GET_CODE (op) == ASHIFT
4415 && CONST_INT_P (XEXP (op, 1))
4416 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4417 op = XEXP (op, 0);
4419 if (GET_CODE (op) == ZERO_EXTEND
4420 || GET_CODE (op) == SIGN_EXTEND)
4421 op = XEXP (op, 0);
4423 if (op != x)
4424 return op;
4426 return aarch64_strip_shift (x);
4429 /* Calculate the cost of calculating X, storing it in *COST. Result
4430 is true if the total cost of the operation has now been calculated. */
4431 static bool
4432 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4433 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4435 rtx op0, op1;
4436 const struct cpu_rtx_cost_table *extra_cost
4437 = aarch64_tune_params->insn_extra_cost;
4439 switch (code)
4441 case SET:
4442 op0 = SET_DEST (x);
4443 op1 = SET_SRC (x);
4445 switch (GET_CODE (op0))
4447 case MEM:
4448 if (speed)
4449 *cost += extra_cost->memory_store;
4451 if (op1 != const0_rtx)
4452 *cost += rtx_cost (op1, SET, 1, speed);
4453 return true;
4455 case SUBREG:
4456 if (! REG_P (SUBREG_REG (op0)))
4457 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4458 /* Fall through. */
4459 case REG:
4460 /* Cost is just the cost of the RHS of the set. */
4461 *cost += rtx_cost (op1, SET, 1, true);
4462 return true;
4464 case ZERO_EXTRACT: /* Bit-field insertion. */
4465 case SIGN_EXTRACT:
4466 /* Strip any redundant widening of the RHS to meet the width of
4467 the target. */
4468 if (GET_CODE (op1) == SUBREG)
4469 op1 = SUBREG_REG (op1);
4470 if ((GET_CODE (op1) == ZERO_EXTEND
4471 || GET_CODE (op1) == SIGN_EXTEND)
4472 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4473 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4474 >= INTVAL (XEXP (op0, 1))))
4475 op1 = XEXP (op1, 0);
4476 *cost += rtx_cost (op1, SET, 1, speed);
4477 return true;
4479 default:
4480 break;
4482 return false;
4484 case MEM:
4485 if (speed)
4486 *cost += extra_cost->memory_load;
4488 return true;
4490 case NEG:
4491 op0 = CONST0_RTX (GET_MODE (x));
4492 op1 = XEXP (x, 0);
4493 goto cost_minus;
4495 case COMPARE:
4496 op0 = XEXP (x, 0);
4497 op1 = XEXP (x, 1);
4499 if (op1 == const0_rtx
4500 && GET_CODE (op0) == AND)
4502 x = op0;
4503 goto cost_logic;
4506 /* Comparisons can work if the order is swapped.
4507 Canonicalization puts the more complex operation first, but
4508 we want it in op1. */
4509 if (! (REG_P (op0)
4510 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4512 op0 = XEXP (x, 1);
4513 op1 = XEXP (x, 0);
4515 goto cost_minus;
4517 case MINUS:
4518 op0 = XEXP (x, 0);
4519 op1 = XEXP (x, 1);
4521 cost_minus:
4522 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4523 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4524 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4526 if (op0 != const0_rtx)
4527 *cost += rtx_cost (op0, MINUS, 0, speed);
4529 if (CONST_INT_P (op1))
4531 if (!aarch64_uimm12_shift (INTVAL (op1)))
4532 *cost += rtx_cost (op1, MINUS, 1, speed);
4534 else
4536 op1 = aarch64_strip_shift_or_extend (op1);
4537 *cost += rtx_cost (op1, MINUS, 1, speed);
4539 return true;
4542 return false;
4544 case PLUS:
4545 op0 = XEXP (x, 0);
4546 op1 = XEXP (x, 1);
4548 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4550 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4552 *cost += rtx_cost (op0, PLUS, 0, speed);
4554 else
4556 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4558 if (new_op0 == op0
4559 && GET_CODE (op0) == MULT)
4561 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4562 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4563 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4564 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4566 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4567 speed)
4568 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4569 speed)
4570 + rtx_cost (op1, PLUS, 1, speed));
4571 if (speed)
4572 *cost += extra_cost->int_multiply_extend_add;
4573 return true;
4575 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4576 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4577 + rtx_cost (op1, PLUS, 1, speed));
4579 if (speed)
4580 *cost += extra_cost->int_multiply_add;
4583 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4584 + rtx_cost (op1, PLUS, 1, speed));
4586 return true;
4589 return false;
4591 case IOR:
4592 case XOR:
4593 case AND:
4594 cost_logic:
4595 op0 = XEXP (x, 0);
4596 op1 = XEXP (x, 1);
4598 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4600 if (CONST_INT_P (op1)
4601 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4603 *cost += rtx_cost (op0, AND, 0, speed);
4605 else
4607 if (GET_CODE (op0) == NOT)
4608 op0 = XEXP (op0, 0);
4609 op0 = aarch64_strip_shift (op0);
4610 *cost += (rtx_cost (op0, AND, 0, speed)
4611 + rtx_cost (op1, AND, 1, speed));
4613 return true;
4615 return false;
4617 case ZERO_EXTEND:
4618 if ((GET_MODE (x) == DImode
4619 && GET_MODE (XEXP (x, 0)) == SImode)
4620 || GET_CODE (XEXP (x, 0)) == MEM)
4622 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4623 return true;
4625 return false;
4627 case SIGN_EXTEND:
4628 if (GET_CODE (XEXP (x, 0)) == MEM)
4630 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4631 return true;
4633 return false;
4635 case ROTATE:
4636 if (!CONST_INT_P (XEXP (x, 1)))
4637 *cost += COSTS_N_INSNS (2);
4638 /* Fall through. */
4639 case ROTATERT:
4640 case LSHIFTRT:
4641 case ASHIFT:
4642 case ASHIFTRT:
4644 /* Shifting by a register often takes an extra cycle. */
4645 if (speed && !CONST_INT_P (XEXP (x, 1)))
4646 *cost += extra_cost->register_shift;
4648 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4649 return true;
4651 case HIGH:
4652 if (!CONSTANT_P (XEXP (x, 0)))
4653 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4654 return true;
4656 case LO_SUM:
4657 if (!CONSTANT_P (XEXP (x, 1)))
4658 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4659 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4660 return true;
4662 case ZERO_EXTRACT:
4663 case SIGN_EXTRACT:
4664 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4665 return true;
4667 case MULT:
4668 op0 = XEXP (x, 0);
4669 op1 = XEXP (x, 1);
4671 *cost = COSTS_N_INSNS (1);
4672 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4674 if (CONST_INT_P (op1)
4675 && exact_log2 (INTVAL (op1)) > 0)
4677 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4678 return true;
4681 if ((GET_CODE (op0) == ZERO_EXTEND
4682 && GET_CODE (op1) == ZERO_EXTEND)
4683 || (GET_CODE (op0) == SIGN_EXTEND
4684 && GET_CODE (op1) == SIGN_EXTEND))
4686 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4687 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4688 if (speed)
4689 *cost += extra_cost->int_multiply_extend;
4690 return true;
4693 if (speed)
4694 *cost += extra_cost->int_multiply;
4696 else if (speed)
4698 if (GET_MODE (x) == DFmode)
4699 *cost += extra_cost->double_multiply;
4700 else if (GET_MODE (x) == SFmode)
4701 *cost += extra_cost->float_multiply;
4704 return false; /* All arguments need to be in registers. */
4706 case MOD:
4707 case UMOD:
4708 *cost = COSTS_N_INSNS (2);
4709 if (speed)
4711 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4712 *cost += (extra_cost->int_multiply_add
4713 + extra_cost->int_divide);
4714 else if (GET_MODE (x) == DFmode)
4715 *cost += (extra_cost->double_multiply
4716 + extra_cost->double_divide);
4717 else if (GET_MODE (x) == SFmode)
4718 *cost += (extra_cost->float_multiply
4719 + extra_cost->float_divide);
4721 return false; /* All arguments need to be in registers. */
4723 case DIV:
4724 case UDIV:
4725 *cost = COSTS_N_INSNS (1);
4726 if (speed)
4728 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4729 *cost += extra_cost->int_divide;
4730 else if (GET_MODE (x) == DFmode)
4731 *cost += extra_cost->double_divide;
4732 else if (GET_MODE (x) == SFmode)
4733 *cost += extra_cost->float_divide;
4735 return false; /* All arguments need to be in registers. */
4737 default:
4738 break;
4740 return false;
4743 static int
4744 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4745 enum machine_mode mode ATTRIBUTE_UNUSED,
4746 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4748 enum rtx_code c = GET_CODE (x);
4749 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4751 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4752 return addr_cost->pre_modify;
4754 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4755 return addr_cost->post_modify;
4757 if (c == PLUS)
4759 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4760 return addr_cost->imm_offset;
4761 else if (GET_CODE (XEXP (x, 0)) == MULT
4762 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4763 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4764 return addr_cost->register_extend;
4766 return addr_cost->register_offset;
4768 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4769 return addr_cost->imm_offset;
4771 return 0;
4774 static int
4775 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4776 reg_class_t from, reg_class_t to)
4778 const struct cpu_regmove_cost *regmove_cost
4779 = aarch64_tune_params->regmove_cost;
4781 if (from == GENERAL_REGS && to == GENERAL_REGS)
4782 return regmove_cost->GP2GP;
4783 else if (from == GENERAL_REGS)
4784 return regmove_cost->GP2FP;
4785 else if (to == GENERAL_REGS)
4786 return regmove_cost->FP2GP;
4788 /* When AdvSIMD instructions are disabled it is not possible to move
4789 a 128-bit value directly between Q registers. This is handled in
4790 secondary reload. A general register is used as a scratch to move
4791 the upper DI value and the lower DI value is moved directly,
4792 hence the cost is the sum of three moves. */
4794 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4795 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4797 return regmove_cost->FP2FP;
4800 static int
4801 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4802 reg_class_t rclass ATTRIBUTE_UNUSED,
4803 bool in ATTRIBUTE_UNUSED)
4805 return aarch64_tune_params->memmov_cost;
4808 /* Vectorizer cost model target hooks. */
4810 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4811 static int
4812 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4813 tree vectype,
4814 int misalign ATTRIBUTE_UNUSED)
4816 unsigned elements;
4818 switch (type_of_cost)
4820 case scalar_stmt:
4821 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4823 case scalar_load:
4824 return aarch64_tune_params->vec_costs->scalar_load_cost;
4826 case scalar_store:
4827 return aarch64_tune_params->vec_costs->scalar_store_cost;
4829 case vector_stmt:
4830 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4832 case vector_load:
4833 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4835 case vector_store:
4836 return aarch64_tune_params->vec_costs->vec_store_cost;
4838 case vec_to_scalar:
4839 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4841 case scalar_to_vec:
4842 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4844 case unaligned_load:
4845 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4847 case unaligned_store:
4848 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4850 case cond_branch_taken:
4851 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4853 case cond_branch_not_taken:
4854 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4856 case vec_perm:
4857 case vec_promote_demote:
4858 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4860 case vec_construct:
4861 elements = TYPE_VECTOR_SUBPARTS (vectype);
4862 return elements / 2 + 1;
4864 default:
4865 gcc_unreachable ();
4869 /* Implement targetm.vectorize.add_stmt_cost. */
4870 static unsigned
4871 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4872 struct _stmt_vec_info *stmt_info, int misalign,
4873 enum vect_cost_model_location where)
4875 unsigned *cost = (unsigned *) data;
4876 unsigned retval = 0;
4878 if (flag_vect_cost_model)
4880 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4881 int stmt_cost =
4882 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4884 /* Statements in an inner loop relative to the loop being
4885 vectorized are weighted more heavily. The value here is
4886 a function (linear for now) of the loop nest level. */
4887 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4889 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4890 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4891 unsigned nest_level = loop_depth (loop);
4893 count *= nest_level;
4896 retval = (unsigned) (count * stmt_cost);
4897 cost[where] += retval;
4900 return retval;
4903 static void initialize_aarch64_code_model (void);
4905 /* Parse the architecture extension string. */
4907 static void
4908 aarch64_parse_extension (char *str)
4910 /* The extension string is parsed left to right. */
4911 const struct aarch64_option_extension *opt = NULL;
4913 /* Flag to say whether we are adding or removing an extension. */
4914 int adding_ext = -1;
4916 while (str != NULL && *str != 0)
4918 char *ext;
4919 size_t len;
4921 str++;
4922 ext = strchr (str, '+');
4924 if (ext != NULL)
4925 len = ext - str;
4926 else
4927 len = strlen (str);
4929 if (len >= 2 && strncmp (str, "no", 2) == 0)
4931 adding_ext = 0;
4932 len -= 2;
4933 str += 2;
4935 else if (len > 0)
4936 adding_ext = 1;
4938 if (len == 0)
4940 error ("missing feature modifier after %qs", "+no");
4941 return;
4944 /* Scan over the extensions table trying to find an exact match. */
4945 for (opt = all_extensions; opt->name != NULL; opt++)
4947 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4949 /* Add or remove the extension. */
4950 if (adding_ext)
4951 aarch64_isa_flags |= opt->flags_on;
4952 else
4953 aarch64_isa_flags &= ~(opt->flags_off);
4954 break;
4958 if (opt->name == NULL)
4960 /* Extension not found in list. */
4961 error ("unknown feature modifier %qs", str);
4962 return;
4965 str = ext;
4968 return;
4971 /* Parse the ARCH string. */
4973 static void
4974 aarch64_parse_arch (void)
4976 char *ext;
4977 const struct processor *arch;
4978 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4979 size_t len;
4981 strcpy (str, aarch64_arch_string);
4983 ext = strchr (str, '+');
4985 if (ext != NULL)
4986 len = ext - str;
4987 else
4988 len = strlen (str);
4990 if (len == 0)
4992 error ("missing arch name in -march=%qs", str);
4993 return;
4996 /* Loop through the list of supported ARCHs to find a match. */
4997 for (arch = all_architectures; arch->name != NULL; arch++)
4999 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5001 selected_arch = arch;
5002 aarch64_isa_flags = selected_arch->flags;
5003 selected_cpu = &all_cores[selected_arch->core];
5005 if (ext != NULL)
5007 /* ARCH string contains at least one extension. */
5008 aarch64_parse_extension (ext);
5011 return;
5015 /* ARCH name not found in list. */
5016 error ("unknown value %qs for -march", str);
5017 return;
5020 /* Parse the CPU string. */
5022 static void
5023 aarch64_parse_cpu (void)
5025 char *ext;
5026 const struct processor *cpu;
5027 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5028 size_t len;
5030 strcpy (str, aarch64_cpu_string);
5032 ext = strchr (str, '+');
5034 if (ext != NULL)
5035 len = ext - str;
5036 else
5037 len = strlen (str);
5039 if (len == 0)
5041 error ("missing cpu name in -mcpu=%qs", str);
5042 return;
5045 /* Loop through the list of supported CPUs to find a match. */
5046 for (cpu = all_cores; cpu->name != NULL; cpu++)
5048 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5050 selected_cpu = cpu;
5051 aarch64_isa_flags = selected_cpu->flags;
5053 if (ext != NULL)
5055 /* CPU string contains at least one extension. */
5056 aarch64_parse_extension (ext);
5059 return;
5063 /* CPU name not found in list. */
5064 error ("unknown value %qs for -mcpu", str);
5065 return;
5068 /* Parse the TUNE string. */
5070 static void
5071 aarch64_parse_tune (void)
5073 const struct processor *cpu;
5074 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5075 strcpy (str, aarch64_tune_string);
5077 /* Loop through the list of supported CPUs to find a match. */
5078 for (cpu = all_cores; cpu->name != NULL; cpu++)
5080 if (strcmp (cpu->name, str) == 0)
5082 selected_tune = cpu;
5083 return;
5087 /* CPU name not found in list. */
5088 error ("unknown value %qs for -mtune", str);
5089 return;
5093 /* Implement TARGET_OPTION_OVERRIDE. */
5095 static void
5096 aarch64_override_options (void)
5098 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5099 otherwise march remains undefined. mtune can be used with either march or
5100 mcpu. */
5102 if (aarch64_arch_string)
5104 aarch64_parse_arch ();
5105 aarch64_cpu_string = NULL;
5108 if (aarch64_cpu_string)
5110 aarch64_parse_cpu ();
5111 selected_arch = NULL;
5114 if (aarch64_tune_string)
5116 aarch64_parse_tune ();
5119 initialize_aarch64_code_model ();
5121 aarch64_build_bitmask_table ();
5123 /* This target defaults to strict volatile bitfields. */
5124 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5125 flag_strict_volatile_bitfields = 1;
5127 /* If the user did not specify a processor, choose the default
5128 one for them. This will be the CPU set during configuration using
5129 --with-cpu, otherwise it is "generic". */
5130 if (!selected_cpu)
5132 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5133 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5136 gcc_assert (selected_cpu);
5138 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5139 if (!selected_tune)
5140 selected_tune = &all_cores[selected_cpu->core];
5142 aarch64_tune_flags = selected_tune->flags;
5143 aarch64_tune = selected_tune->core;
5144 aarch64_tune_params = selected_tune->tune;
5146 aarch64_override_options_after_change ();
5149 /* Implement targetm.override_options_after_change. */
5151 static void
5152 aarch64_override_options_after_change (void)
5154 faked_omit_frame_pointer = false;
5156 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5157 that aarch64_frame_pointer_required will be called. We need to remember
5158 whether flag_omit_frame_pointer was turned on normally or just faked. */
5160 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5162 flag_omit_frame_pointer = true;
5163 faked_omit_frame_pointer = true;
5167 static struct machine_function *
5168 aarch64_init_machine_status (void)
5170 struct machine_function *machine;
5171 machine = ggc_alloc_cleared_machine_function ();
5172 return machine;
5175 void
5176 aarch64_init_expanders (void)
5178 init_machine_status = aarch64_init_machine_status;
5181 /* A checking mechanism for the implementation of the various code models. */
5182 static void
5183 initialize_aarch64_code_model (void)
5185 if (flag_pic)
5187 switch (aarch64_cmodel_var)
5189 case AARCH64_CMODEL_TINY:
5190 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5191 break;
5192 case AARCH64_CMODEL_SMALL:
5193 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5194 break;
5195 case AARCH64_CMODEL_LARGE:
5196 sorry ("code model %qs with -f%s", "large",
5197 flag_pic > 1 ? "PIC" : "pic");
5198 default:
5199 gcc_unreachable ();
5202 else
5203 aarch64_cmodel = aarch64_cmodel_var;
5206 /* Return true if SYMBOL_REF X binds locally. */
5208 static bool
5209 aarch64_symbol_binds_local_p (const_rtx x)
5211 return (SYMBOL_REF_DECL (x)
5212 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5213 : SYMBOL_REF_LOCAL_P (x));
5216 /* Return true if SYMBOL_REF X is thread local */
5217 static bool
5218 aarch64_tls_symbol_p (rtx x)
5220 if (! TARGET_HAVE_TLS)
5221 return false;
5223 if (GET_CODE (x) != SYMBOL_REF)
5224 return false;
5226 return SYMBOL_REF_TLS_MODEL (x) != 0;
5229 /* Classify a TLS symbol into one of the TLS kinds. */
5230 enum aarch64_symbol_type
5231 aarch64_classify_tls_symbol (rtx x)
5233 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5235 switch (tls_kind)
5237 case TLS_MODEL_GLOBAL_DYNAMIC:
5238 case TLS_MODEL_LOCAL_DYNAMIC:
5239 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5241 case TLS_MODEL_INITIAL_EXEC:
5242 return SYMBOL_SMALL_GOTTPREL;
5244 case TLS_MODEL_LOCAL_EXEC:
5245 return SYMBOL_SMALL_TPREL;
5247 case TLS_MODEL_EMULATED:
5248 case TLS_MODEL_NONE:
5249 return SYMBOL_FORCE_TO_MEM;
5251 default:
5252 gcc_unreachable ();
5256 /* Return the method that should be used to access SYMBOL_REF or
5257 LABEL_REF X in context CONTEXT. */
5259 enum aarch64_symbol_type
5260 aarch64_classify_symbol (rtx x,
5261 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5263 if (GET_CODE (x) == LABEL_REF)
5265 switch (aarch64_cmodel)
5267 case AARCH64_CMODEL_LARGE:
5268 return SYMBOL_FORCE_TO_MEM;
5270 case AARCH64_CMODEL_TINY_PIC:
5271 case AARCH64_CMODEL_TINY:
5272 return SYMBOL_TINY_ABSOLUTE;
5274 case AARCH64_CMODEL_SMALL_PIC:
5275 case AARCH64_CMODEL_SMALL:
5276 return SYMBOL_SMALL_ABSOLUTE;
5278 default:
5279 gcc_unreachable ();
5283 if (GET_CODE (x) == SYMBOL_REF)
5285 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5286 || CONSTANT_POOL_ADDRESS_P (x))
5287 return SYMBOL_FORCE_TO_MEM;
5289 if (aarch64_tls_symbol_p (x))
5290 return aarch64_classify_tls_symbol (x);
5292 switch (aarch64_cmodel)
5294 case AARCH64_CMODEL_TINY:
5295 if (SYMBOL_REF_WEAK (x))
5296 return SYMBOL_FORCE_TO_MEM;
5297 return SYMBOL_TINY_ABSOLUTE;
5299 case AARCH64_CMODEL_SMALL:
5300 if (SYMBOL_REF_WEAK (x))
5301 return SYMBOL_FORCE_TO_MEM;
5302 return SYMBOL_SMALL_ABSOLUTE;
5304 case AARCH64_CMODEL_TINY_PIC:
5305 if (!aarch64_symbol_binds_local_p (x))
5306 return SYMBOL_TINY_GOT;
5307 return SYMBOL_TINY_ABSOLUTE;
5309 case AARCH64_CMODEL_SMALL_PIC:
5310 if (!aarch64_symbol_binds_local_p (x))
5311 return SYMBOL_SMALL_GOT;
5312 return SYMBOL_SMALL_ABSOLUTE;
5314 default:
5315 gcc_unreachable ();
5319 /* By default push everything into the constant pool. */
5320 return SYMBOL_FORCE_TO_MEM;
5323 bool
5324 aarch64_constant_address_p (rtx x)
5326 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5329 bool
5330 aarch64_legitimate_pic_operand_p (rtx x)
5332 if (GET_CODE (x) == SYMBOL_REF
5333 || (GET_CODE (x) == CONST
5334 && GET_CODE (XEXP (x, 0)) == PLUS
5335 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5336 return false;
5338 return true;
5341 /* Return true if X holds either a quarter-precision or
5342 floating-point +0.0 constant. */
5343 static bool
5344 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5346 if (!CONST_DOUBLE_P (x))
5347 return false;
5349 /* TODO: We could handle moving 0.0 to a TFmode register,
5350 but first we would like to refactor the movtf_aarch64
5351 to be more amicable to split moves properly and
5352 correctly gate on TARGET_SIMD. For now - reject all
5353 constants which are not to SFmode or DFmode registers. */
5354 if (!(mode == SFmode || mode == DFmode))
5355 return false;
5357 if (aarch64_float_const_zero_rtx_p (x))
5358 return true;
5359 return aarch64_float_const_representable_p (x);
5362 static bool
5363 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5365 /* Do not allow vector struct mode constants. We could support
5366 0 and -1 easily, but they need support in aarch64-simd.md. */
5367 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5368 return false;
5370 /* This could probably go away because
5371 we now decompose CONST_INTs according to expand_mov_immediate. */
5372 if ((GET_CODE (x) == CONST_VECTOR
5373 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5374 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5375 return !targetm.cannot_force_const_mem (mode, x);
5377 if (GET_CODE (x) == HIGH
5378 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5379 return true;
5381 return aarch64_constant_address_p (x);
5385 aarch64_load_tp (rtx target)
5387 if (!target
5388 || GET_MODE (target) != Pmode
5389 || !register_operand (target, Pmode))
5390 target = gen_reg_rtx (Pmode);
5392 /* Can return in any reg. */
5393 emit_insn (gen_aarch64_load_tp_hard (target));
5394 return target;
5397 /* On AAPCS systems, this is the "struct __va_list". */
5398 static GTY(()) tree va_list_type;
5400 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5401 Return the type to use as __builtin_va_list.
5403 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5405 struct __va_list
5407 void *__stack;
5408 void *__gr_top;
5409 void *__vr_top;
5410 int __gr_offs;
5411 int __vr_offs;
5412 }; */
5414 static tree
5415 aarch64_build_builtin_va_list (void)
5417 tree va_list_name;
5418 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5420 /* Create the type. */
5421 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5422 /* Give it the required name. */
5423 va_list_name = build_decl (BUILTINS_LOCATION,
5424 TYPE_DECL,
5425 get_identifier ("__va_list"),
5426 va_list_type);
5427 DECL_ARTIFICIAL (va_list_name) = 1;
5428 TYPE_NAME (va_list_type) = va_list_name;
5429 TYPE_STUB_DECL (va_list_type) = va_list_name;
5431 /* Create the fields. */
5432 f_stack = build_decl (BUILTINS_LOCATION,
5433 FIELD_DECL, get_identifier ("__stack"),
5434 ptr_type_node);
5435 f_grtop = build_decl (BUILTINS_LOCATION,
5436 FIELD_DECL, get_identifier ("__gr_top"),
5437 ptr_type_node);
5438 f_vrtop = build_decl (BUILTINS_LOCATION,
5439 FIELD_DECL, get_identifier ("__vr_top"),
5440 ptr_type_node);
5441 f_groff = build_decl (BUILTINS_LOCATION,
5442 FIELD_DECL, get_identifier ("__gr_offs"),
5443 integer_type_node);
5444 f_vroff = build_decl (BUILTINS_LOCATION,
5445 FIELD_DECL, get_identifier ("__vr_offs"),
5446 integer_type_node);
5448 DECL_ARTIFICIAL (f_stack) = 1;
5449 DECL_ARTIFICIAL (f_grtop) = 1;
5450 DECL_ARTIFICIAL (f_vrtop) = 1;
5451 DECL_ARTIFICIAL (f_groff) = 1;
5452 DECL_ARTIFICIAL (f_vroff) = 1;
5454 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5455 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5456 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5457 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5458 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5460 TYPE_FIELDS (va_list_type) = f_stack;
5461 DECL_CHAIN (f_stack) = f_grtop;
5462 DECL_CHAIN (f_grtop) = f_vrtop;
5463 DECL_CHAIN (f_vrtop) = f_groff;
5464 DECL_CHAIN (f_groff) = f_vroff;
5466 /* Compute its layout. */
5467 layout_type (va_list_type);
5469 return va_list_type;
5472 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5473 static void
5474 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5476 const CUMULATIVE_ARGS *cum;
5477 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5478 tree stack, grtop, vrtop, groff, vroff;
5479 tree t;
5480 int gr_save_area_size;
5481 int vr_save_area_size;
5482 int vr_offset;
5484 cum = &crtl->args.info;
5485 gr_save_area_size
5486 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5487 vr_save_area_size
5488 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5490 if (TARGET_GENERAL_REGS_ONLY)
5492 if (cum->aapcs_nvrn > 0)
5493 sorry ("%qs and floating point or vector arguments",
5494 "-mgeneral-regs-only");
5495 vr_save_area_size = 0;
5498 f_stack = TYPE_FIELDS (va_list_type_node);
5499 f_grtop = DECL_CHAIN (f_stack);
5500 f_vrtop = DECL_CHAIN (f_grtop);
5501 f_groff = DECL_CHAIN (f_vrtop);
5502 f_vroff = DECL_CHAIN (f_groff);
5504 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5505 NULL_TREE);
5506 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5507 NULL_TREE);
5508 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5509 NULL_TREE);
5510 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5511 NULL_TREE);
5512 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5513 NULL_TREE);
5515 /* Emit code to initialize STACK, which points to the next varargs stack
5516 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5517 by named arguments. STACK is 8-byte aligned. */
5518 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5519 if (cum->aapcs_stack_size > 0)
5520 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5521 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5522 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5524 /* Emit code to initialize GRTOP, the top of the GR save area.
5525 virtual_incoming_args_rtx should have been 16 byte aligned. */
5526 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5527 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5528 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5530 /* Emit code to initialize VRTOP, the top of the VR save area.
5531 This address is gr_save_area_bytes below GRTOP, rounded
5532 down to the next 16-byte boundary. */
5533 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5534 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5535 STACK_BOUNDARY / BITS_PER_UNIT);
5537 if (vr_offset)
5538 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5539 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5540 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5542 /* Emit code to initialize GROFF, the offset from GRTOP of the
5543 next GPR argument. */
5544 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5545 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5546 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5548 /* Likewise emit code to initialize VROFF, the offset from FTOP
5549 of the next VR argument. */
5550 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5551 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5552 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5555 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5557 static tree
5558 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5559 gimple_seq *post_p ATTRIBUTE_UNUSED)
5561 tree addr;
5562 bool indirect_p;
5563 bool is_ha; /* is HFA or HVA. */
5564 bool dw_align; /* double-word align. */
5565 enum machine_mode ag_mode = VOIDmode;
5566 int nregs;
5567 enum machine_mode mode;
5569 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5570 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5571 HOST_WIDE_INT size, rsize, adjust, align;
5572 tree t, u, cond1, cond2;
5574 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5575 if (indirect_p)
5576 type = build_pointer_type (type);
5578 mode = TYPE_MODE (type);
5580 f_stack = TYPE_FIELDS (va_list_type_node);
5581 f_grtop = DECL_CHAIN (f_stack);
5582 f_vrtop = DECL_CHAIN (f_grtop);
5583 f_groff = DECL_CHAIN (f_vrtop);
5584 f_vroff = DECL_CHAIN (f_groff);
5586 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5587 f_stack, NULL_TREE);
5588 size = int_size_in_bytes (type);
5589 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5591 dw_align = false;
5592 adjust = 0;
5593 if (aarch64_vfp_is_call_or_return_candidate (mode,
5594 type,
5595 &ag_mode,
5596 &nregs,
5597 &is_ha))
5599 /* TYPE passed in fp/simd registers. */
5600 if (TARGET_GENERAL_REGS_ONLY)
5601 sorry ("%qs and floating point or vector arguments",
5602 "-mgeneral-regs-only");
5604 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5605 unshare_expr (valist), f_vrtop, NULL_TREE);
5606 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5607 unshare_expr (valist), f_vroff, NULL_TREE);
5609 rsize = nregs * UNITS_PER_VREG;
5611 if (is_ha)
5613 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5614 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5616 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5617 && size < UNITS_PER_VREG)
5619 adjust = UNITS_PER_VREG - size;
5622 else
5624 /* TYPE passed in general registers. */
5625 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5626 unshare_expr (valist), f_grtop, NULL_TREE);
5627 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5628 unshare_expr (valist), f_groff, NULL_TREE);
5629 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5630 nregs = rsize / UNITS_PER_WORD;
5632 if (align > 8)
5633 dw_align = true;
5635 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5636 && size < UNITS_PER_WORD)
5638 adjust = UNITS_PER_WORD - size;
5642 /* Get a local temporary for the field value. */
5643 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5645 /* Emit code to branch if off >= 0. */
5646 t = build2 (GE_EXPR, boolean_type_node, off,
5647 build_int_cst (TREE_TYPE (off), 0));
5648 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5650 if (dw_align)
5652 /* Emit: offs = (offs + 15) & -16. */
5653 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5654 build_int_cst (TREE_TYPE (off), 15));
5655 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5656 build_int_cst (TREE_TYPE (off), -16));
5657 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5659 else
5660 roundup = NULL;
5662 /* Update ap.__[g|v]r_offs */
5663 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5664 build_int_cst (TREE_TYPE (off), rsize));
5665 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5667 /* String up. */
5668 if (roundup)
5669 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5671 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5672 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5673 build_int_cst (TREE_TYPE (f_off), 0));
5674 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5676 /* String up: make sure the assignment happens before the use. */
5677 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5678 COND_EXPR_ELSE (cond1) = t;
5680 /* Prepare the trees handling the argument that is passed on the stack;
5681 the top level node will store in ON_STACK. */
5682 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5683 if (align > 8)
5685 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5686 t = fold_convert (intDI_type_node, arg);
5687 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5688 build_int_cst (TREE_TYPE (t), 15));
5689 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5690 build_int_cst (TREE_TYPE (t), -16));
5691 t = fold_convert (TREE_TYPE (arg), t);
5692 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5694 else
5695 roundup = NULL;
5696 /* Advance ap.__stack */
5697 t = fold_convert (intDI_type_node, arg);
5698 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5699 build_int_cst (TREE_TYPE (t), size + 7));
5700 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5701 build_int_cst (TREE_TYPE (t), -8));
5702 t = fold_convert (TREE_TYPE (arg), t);
5703 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5704 /* String up roundup and advance. */
5705 if (roundup)
5706 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5707 /* String up with arg */
5708 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5709 /* Big-endianness related address adjustment. */
5710 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5711 && size < UNITS_PER_WORD)
5713 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5714 size_int (UNITS_PER_WORD - size));
5715 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5718 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5719 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5721 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5722 t = off;
5723 if (adjust)
5724 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5725 build_int_cst (TREE_TYPE (off), adjust));
5727 t = fold_convert (sizetype, t);
5728 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5730 if (is_ha)
5732 /* type ha; // treat as "struct {ftype field[n];}"
5733 ... [computing offs]
5734 for (i = 0; i <nregs; ++i, offs += 16)
5735 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5736 return ha; */
5737 int i;
5738 tree tmp_ha, field_t, field_ptr_t;
5740 /* Declare a local variable. */
5741 tmp_ha = create_tmp_var_raw (type, "ha");
5742 gimple_add_tmp_var (tmp_ha);
5744 /* Establish the base type. */
5745 switch (ag_mode)
5747 case SFmode:
5748 field_t = float_type_node;
5749 field_ptr_t = float_ptr_type_node;
5750 break;
5751 case DFmode:
5752 field_t = double_type_node;
5753 field_ptr_t = double_ptr_type_node;
5754 break;
5755 case TFmode:
5756 field_t = long_double_type_node;
5757 field_ptr_t = long_double_ptr_type_node;
5758 break;
5759 /* The half precision and quad precision are not fully supported yet. Enable
5760 the following code after the support is complete. Need to find the correct
5761 type node for __fp16 *. */
5762 #if 0
5763 case HFmode:
5764 field_t = float_type_node;
5765 field_ptr_t = float_ptr_type_node;
5766 break;
5767 #endif
5768 case V2SImode:
5769 case V4SImode:
5771 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5772 field_t = build_vector_type_for_mode (innertype, ag_mode);
5773 field_ptr_t = build_pointer_type (field_t);
5775 break;
5776 default:
5777 gcc_assert (0);
5780 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5781 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5782 addr = t;
5783 t = fold_convert (field_ptr_t, addr);
5784 t = build2 (MODIFY_EXPR, field_t,
5785 build1 (INDIRECT_REF, field_t, tmp_ha),
5786 build1 (INDIRECT_REF, field_t, t));
5788 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5789 for (i = 1; i < nregs; ++i)
5791 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5792 u = fold_convert (field_ptr_t, addr);
5793 u = build2 (MODIFY_EXPR, field_t,
5794 build2 (MEM_REF, field_t, tmp_ha,
5795 build_int_cst (field_ptr_t,
5796 (i *
5797 int_size_in_bytes (field_t)))),
5798 build1 (INDIRECT_REF, field_t, u));
5799 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5802 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5803 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5806 COND_EXPR_ELSE (cond2) = t;
5807 addr = fold_convert (build_pointer_type (type), cond1);
5808 addr = build_va_arg_indirect_ref (addr);
5810 if (indirect_p)
5811 addr = build_va_arg_indirect_ref (addr);
5813 return addr;
5816 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5818 static void
5819 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5820 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5821 int no_rtl)
5823 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5824 CUMULATIVE_ARGS local_cum;
5825 int gr_saved, vr_saved;
5827 /* The caller has advanced CUM up to, but not beyond, the last named
5828 argument. Advance a local copy of CUM past the last "real" named
5829 argument, to find out how many registers are left over. */
5830 local_cum = *cum;
5831 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5833 /* Found out how many registers we need to save. */
5834 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5835 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5837 if (TARGET_GENERAL_REGS_ONLY)
5839 if (local_cum.aapcs_nvrn > 0)
5840 sorry ("%qs and floating point or vector arguments",
5841 "-mgeneral-regs-only");
5842 vr_saved = 0;
5845 if (!no_rtl)
5847 if (gr_saved > 0)
5849 rtx ptr, mem;
5851 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5852 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5853 - gr_saved * UNITS_PER_WORD);
5854 mem = gen_frame_mem (BLKmode, ptr);
5855 set_mem_alias_set (mem, get_varargs_alias_set ());
5857 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5858 mem, gr_saved);
5860 if (vr_saved > 0)
5862 /* We can't use move_block_from_reg, because it will use
5863 the wrong mode, storing D regs only. */
5864 enum machine_mode mode = TImode;
5865 int off, i;
5867 /* Set OFF to the offset from virtual_incoming_args_rtx of
5868 the first vector register. The VR save area lies below
5869 the GR one, and is aligned to 16 bytes. */
5870 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5871 STACK_BOUNDARY / BITS_PER_UNIT);
5872 off -= vr_saved * UNITS_PER_VREG;
5874 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5876 rtx ptr, mem;
5878 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5879 mem = gen_frame_mem (mode, ptr);
5880 set_mem_alias_set (mem, get_varargs_alias_set ());
5881 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5882 off += UNITS_PER_VREG;
5887 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5888 any complication of having crtl->args.pretend_args_size changed. */
5889 cfun->machine->saved_varargs_size
5890 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5891 STACK_BOUNDARY / BITS_PER_UNIT)
5892 + vr_saved * UNITS_PER_VREG);
5895 static void
5896 aarch64_conditional_register_usage (void)
5898 int i;
5899 if (!TARGET_FLOAT)
5901 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5903 fixed_regs[i] = 1;
5904 call_used_regs[i] = 1;
5909 /* Walk down the type tree of TYPE counting consecutive base elements.
5910 If *MODEP is VOIDmode, then set it to the first valid floating point
5911 type. If a non-floating point type is found, or if a floating point
5912 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5913 otherwise return the count in the sub-tree. */
5914 static int
5915 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5917 enum machine_mode mode;
5918 HOST_WIDE_INT size;
5920 switch (TREE_CODE (type))
5922 case REAL_TYPE:
5923 mode = TYPE_MODE (type);
5924 if (mode != DFmode && mode != SFmode && mode != TFmode)
5925 return -1;
5927 if (*modep == VOIDmode)
5928 *modep = mode;
5930 if (*modep == mode)
5931 return 1;
5933 break;
5935 case COMPLEX_TYPE:
5936 mode = TYPE_MODE (TREE_TYPE (type));
5937 if (mode != DFmode && mode != SFmode && mode != TFmode)
5938 return -1;
5940 if (*modep == VOIDmode)
5941 *modep = mode;
5943 if (*modep == mode)
5944 return 2;
5946 break;
5948 case VECTOR_TYPE:
5949 /* Use V2SImode and V4SImode as representatives of all 64-bit
5950 and 128-bit vector types. */
5951 size = int_size_in_bytes (type);
5952 switch (size)
5954 case 8:
5955 mode = V2SImode;
5956 break;
5957 case 16:
5958 mode = V4SImode;
5959 break;
5960 default:
5961 return -1;
5964 if (*modep == VOIDmode)
5965 *modep = mode;
5967 /* Vector modes are considered to be opaque: two vectors are
5968 equivalent for the purposes of being homogeneous aggregates
5969 if they are the same size. */
5970 if (*modep == mode)
5971 return 1;
5973 break;
5975 case ARRAY_TYPE:
5977 int count;
5978 tree index = TYPE_DOMAIN (type);
5980 /* Can't handle incomplete types. */
5981 if (!COMPLETE_TYPE_P (type))
5982 return -1;
5984 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5985 if (count == -1
5986 || !index
5987 || !TYPE_MAX_VALUE (index)
5988 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5989 || !TYPE_MIN_VALUE (index)
5990 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5991 || count < 0)
5992 return -1;
5994 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5995 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5997 /* There must be no padding. */
5998 if (!host_integerp (TYPE_SIZE (type), 1)
5999 || (tree_low_cst (TYPE_SIZE (type), 1)
6000 != count * GET_MODE_BITSIZE (*modep)))
6001 return -1;
6003 return count;
6006 case RECORD_TYPE:
6008 int count = 0;
6009 int sub_count;
6010 tree field;
6012 /* Can't handle incomplete types. */
6013 if (!COMPLETE_TYPE_P (type))
6014 return -1;
6016 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6018 if (TREE_CODE (field) != FIELD_DECL)
6019 continue;
6021 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6022 if (sub_count < 0)
6023 return -1;
6024 count += sub_count;
6027 /* There must be no padding. */
6028 if (!host_integerp (TYPE_SIZE (type), 1)
6029 || (tree_low_cst (TYPE_SIZE (type), 1)
6030 != count * GET_MODE_BITSIZE (*modep)))
6031 return -1;
6033 return count;
6036 case UNION_TYPE:
6037 case QUAL_UNION_TYPE:
6039 /* These aren't very interesting except in a degenerate case. */
6040 int count = 0;
6041 int sub_count;
6042 tree field;
6044 /* Can't handle incomplete types. */
6045 if (!COMPLETE_TYPE_P (type))
6046 return -1;
6048 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6050 if (TREE_CODE (field) != FIELD_DECL)
6051 continue;
6053 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6054 if (sub_count < 0)
6055 return -1;
6056 count = count > sub_count ? count : sub_count;
6059 /* There must be no padding. */
6060 if (!host_integerp (TYPE_SIZE (type), 1)
6061 || (tree_low_cst (TYPE_SIZE (type), 1)
6062 != count * GET_MODE_BITSIZE (*modep)))
6063 return -1;
6065 return count;
6068 default:
6069 break;
6072 return -1;
6075 /* Return true if we use LRA instead of reload pass. */
6076 static bool
6077 aarch64_lra_p (void)
6079 return aarch64_lra_flag;
6082 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6083 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6084 array types. The C99 floating-point complex types are also considered
6085 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6086 types, which are GCC extensions and out of the scope of AAPCS64, are
6087 treated as composite types here as well.
6089 Note that MODE itself is not sufficient in determining whether a type
6090 is such a composite type or not. This is because
6091 stor-layout.c:compute_record_mode may have already changed the MODE
6092 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6093 structure with only one field may have its MODE set to the mode of the
6094 field. Also an integer mode whose size matches the size of the
6095 RECORD_TYPE type may be used to substitute the original mode
6096 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6097 solely relied on. */
6099 static bool
6100 aarch64_composite_type_p (const_tree type,
6101 enum machine_mode mode)
6103 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6104 return true;
6106 if (mode == BLKmode
6107 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6108 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6109 return true;
6111 return false;
6114 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6115 type as described in AAPCS64 \S 4.1.2.
6117 See the comment above aarch64_composite_type_p for the notes on MODE. */
6119 static bool
6120 aarch64_short_vector_p (const_tree type,
6121 enum machine_mode mode)
6123 HOST_WIDE_INT size = -1;
6125 if (type && TREE_CODE (type) == VECTOR_TYPE)
6126 size = int_size_in_bytes (type);
6127 else if (!aarch64_composite_type_p (type, mode)
6128 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6129 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6130 size = GET_MODE_SIZE (mode);
6132 return (size == 8 || size == 16) ? true : false;
6135 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6136 shall be passed or returned in simd/fp register(s) (providing these
6137 parameter passing registers are available).
6139 Upon successful return, *COUNT returns the number of needed registers,
6140 *BASE_MODE returns the mode of the individual register and when IS_HAF
6141 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6142 floating-point aggregate or a homogeneous short-vector aggregate. */
6144 static bool
6145 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6146 const_tree type,
6147 enum machine_mode *base_mode,
6148 int *count,
6149 bool *is_ha)
6151 enum machine_mode new_mode = VOIDmode;
6152 bool composite_p = aarch64_composite_type_p (type, mode);
6154 if (is_ha != NULL) *is_ha = false;
6156 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6157 || aarch64_short_vector_p (type, mode))
6159 *count = 1;
6160 new_mode = mode;
6162 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6164 if (is_ha != NULL) *is_ha = true;
6165 *count = 2;
6166 new_mode = GET_MODE_INNER (mode);
6168 else if (type && composite_p)
6170 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6172 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6174 if (is_ha != NULL) *is_ha = true;
6175 *count = ag_count;
6177 else
6178 return false;
6180 else
6181 return false;
6183 *base_mode = new_mode;
6184 return true;
6187 /* Implement TARGET_STRUCT_VALUE_RTX. */
6189 static rtx
6190 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6191 int incoming ATTRIBUTE_UNUSED)
6193 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6196 /* Implements target hook vector_mode_supported_p. */
6197 static bool
6198 aarch64_vector_mode_supported_p (enum machine_mode mode)
6200 if (TARGET_SIMD
6201 && (mode == V4SImode || mode == V8HImode
6202 || mode == V16QImode || mode == V2DImode
6203 || mode == V2SImode || mode == V4HImode
6204 || mode == V8QImode || mode == V2SFmode
6205 || mode == V4SFmode || mode == V2DFmode))
6206 return true;
6208 return false;
6211 /* Return appropriate SIMD container
6212 for MODE within a vector of WIDTH bits. */
6213 static enum machine_mode
6214 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6216 gcc_assert (width == 64 || width == 128);
6217 if (TARGET_SIMD)
6219 if (width == 128)
6220 switch (mode)
6222 case DFmode:
6223 return V2DFmode;
6224 case SFmode:
6225 return V4SFmode;
6226 case SImode:
6227 return V4SImode;
6228 case HImode:
6229 return V8HImode;
6230 case QImode:
6231 return V16QImode;
6232 case DImode:
6233 return V2DImode;
6234 default:
6235 break;
6237 else
6238 switch (mode)
6240 case SFmode:
6241 return V2SFmode;
6242 case SImode:
6243 return V2SImode;
6244 case HImode:
6245 return V4HImode;
6246 case QImode:
6247 return V8QImode;
6248 default:
6249 break;
6252 return word_mode;
6255 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6256 static enum machine_mode
6257 aarch64_preferred_simd_mode (enum machine_mode mode)
6259 return aarch64_simd_container_mode (mode, 128);
6262 /* Return the bitmask of possible vector sizes for the vectorizer
6263 to iterate over. */
6264 static unsigned int
6265 aarch64_autovectorize_vector_sizes (void)
6267 return (16 | 8);
6270 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6271 vector types in order to conform to the AAPCS64 (see "Procedure
6272 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6273 qualify for emission with the mangled names defined in that document,
6274 a vector type must not only be of the correct mode but also be
6275 composed of AdvSIMD vector element types (e.g.
6276 _builtin_aarch64_simd_qi); these types are registered by
6277 aarch64_init_simd_builtins (). In other words, vector types defined
6278 in other ways e.g. via vector_size attribute will get default
6279 mangled names. */
6280 typedef struct
6282 enum machine_mode mode;
6283 const char *element_type_name;
6284 const char *mangled_name;
6285 } aarch64_simd_mangle_map_entry;
6287 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6288 /* 64-bit containerized types. */
6289 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6290 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6291 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6292 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6293 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6294 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6295 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6296 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6297 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6298 /* 128-bit containerized types. */
6299 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6300 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6301 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6302 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6303 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6304 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6305 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6306 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6307 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6308 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6309 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6310 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6311 { VOIDmode, NULL, NULL }
6314 /* Implement TARGET_MANGLE_TYPE. */
6316 static const char *
6317 aarch64_mangle_type (const_tree type)
6319 /* The AArch64 ABI documents say that "__va_list" has to be
6320 managled as if it is in the "std" namespace. */
6321 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6322 return "St9__va_list";
6324 /* Check the mode of the vector type, and the name of the vector
6325 element type, against the table. */
6326 if (TREE_CODE (type) == VECTOR_TYPE)
6328 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6330 while (pos->mode != VOIDmode)
6332 tree elt_type = TREE_TYPE (type);
6334 if (pos->mode == TYPE_MODE (type)
6335 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6336 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6337 pos->element_type_name))
6338 return pos->mangled_name;
6340 pos++;
6344 /* Use the default mangling. */
6345 return NULL;
6348 /* Return the equivalent letter for size. */
6349 static char
6350 sizetochar (int size)
6352 switch (size)
6354 case 64: return 'd';
6355 case 32: return 's';
6356 case 16: return 'h';
6357 case 8 : return 'b';
6358 default: gcc_unreachable ();
6362 /* Return true iff x is a uniform vector of floating-point
6363 constants, and the constant can be represented in
6364 quarter-precision form. Note, as aarch64_float_const_representable
6365 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6366 static bool
6367 aarch64_vect_float_const_representable_p (rtx x)
6369 int i = 0;
6370 REAL_VALUE_TYPE r0, ri;
6371 rtx x0, xi;
6373 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6374 return false;
6376 x0 = CONST_VECTOR_ELT (x, 0);
6377 if (!CONST_DOUBLE_P (x0))
6378 return false;
6380 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6382 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6384 xi = CONST_VECTOR_ELT (x, i);
6385 if (!CONST_DOUBLE_P (xi))
6386 return false;
6388 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6389 if (!REAL_VALUES_EQUAL (r0, ri))
6390 return false;
6393 return aarch64_float_const_representable_p (x0);
6396 /* Return true for valid and false for invalid. */
6397 bool
6398 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6399 struct simd_immediate_info *info)
6401 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6402 matches = 1; \
6403 for (i = 0; i < idx; i += (STRIDE)) \
6404 if (!(TEST)) \
6405 matches = 0; \
6406 if (matches) \
6408 immtype = (CLASS); \
6409 elsize = (ELSIZE); \
6410 eshift = (SHIFT); \
6411 emvn = (NEG); \
6412 break; \
6415 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6416 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6417 unsigned char bytes[16];
6418 int immtype = -1, matches;
6419 unsigned int invmask = inverse ? 0xff : 0;
6420 int eshift, emvn;
6422 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6424 if (! (aarch64_simd_imm_zero_p (op, mode)
6425 || aarch64_vect_float_const_representable_p (op)))
6426 return false;
6428 if (info)
6430 info->value = CONST_VECTOR_ELT (op, 0);
6431 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6432 info->mvn = false;
6433 info->shift = 0;
6436 return true;
6439 /* Splat vector constant out into a byte vector. */
6440 for (i = 0; i < n_elts; i++)
6442 rtx el = CONST_VECTOR_ELT (op, i);
6443 unsigned HOST_WIDE_INT elpart;
6444 unsigned int part, parts;
6446 if (GET_CODE (el) == CONST_INT)
6448 elpart = INTVAL (el);
6449 parts = 1;
6451 else if (GET_CODE (el) == CONST_DOUBLE)
6453 elpart = CONST_DOUBLE_LOW (el);
6454 parts = 2;
6456 else
6457 gcc_unreachable ();
6459 for (part = 0; part < parts; part++)
6461 unsigned int byte;
6462 for (byte = 0; byte < innersize; byte++)
6464 bytes[idx++] = (elpart & 0xff) ^ invmask;
6465 elpart >>= BITS_PER_UNIT;
6467 if (GET_CODE (el) == CONST_DOUBLE)
6468 elpart = CONST_DOUBLE_HIGH (el);
6472 /* Sanity check. */
6473 gcc_assert (idx == GET_MODE_SIZE (mode));
6477 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6478 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6480 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6481 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6483 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6484 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6486 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6487 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6489 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6491 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6493 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6494 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6496 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6497 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6499 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6500 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6502 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6503 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6505 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6507 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6509 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6510 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6512 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6513 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6515 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6516 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6518 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6519 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6521 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6523 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6524 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6526 while (0);
6528 if (immtype == -1)
6529 return false;
6531 if (info)
6533 info->element_width = elsize;
6534 info->mvn = emvn != 0;
6535 info->shift = eshift;
6537 unsigned HOST_WIDE_INT imm = 0;
6539 if (immtype >= 12 && immtype <= 15)
6540 info->msl = true;
6542 /* Un-invert bytes of recognized vector, if necessary. */
6543 if (invmask != 0)
6544 for (i = 0; i < idx; i++)
6545 bytes[i] ^= invmask;
6547 if (immtype == 17)
6549 /* FIXME: Broken on 32-bit H_W_I hosts. */
6550 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6552 for (i = 0; i < 8; i++)
6553 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6554 << (i * BITS_PER_UNIT);
6557 info->value = GEN_INT (imm);
6559 else
6561 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6562 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6564 /* Construct 'abcdefgh' because the assembler cannot handle
6565 generic constants. */
6566 if (info->mvn)
6567 imm = ~imm;
6568 imm = (imm >> info->shift) & 0xff;
6569 info->value = GEN_INT (imm);
6573 return true;
6574 #undef CHECK
6577 static bool
6578 aarch64_const_vec_all_same_int_p (rtx x,
6579 HOST_WIDE_INT minval,
6580 HOST_WIDE_INT maxval)
6582 HOST_WIDE_INT firstval;
6583 int count, i;
6585 if (GET_CODE (x) != CONST_VECTOR
6586 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6587 return false;
6589 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6590 if (firstval < minval || firstval > maxval)
6591 return false;
6593 count = CONST_VECTOR_NUNITS (x);
6594 for (i = 1; i < count; i++)
6595 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6596 return false;
6598 return true;
6601 /* Check of immediate shift constants are within range. */
6602 bool
6603 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6605 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6606 if (left)
6607 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6608 else
6609 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6612 /* Return true if X is a uniform vector where all elements
6613 are either the floating-point constant 0.0 or the
6614 integer constant 0. */
6615 bool
6616 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6618 return x == CONST0_RTX (mode);
6621 bool
6622 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6624 HOST_WIDE_INT imm = INTVAL (x);
6625 int i;
6627 for (i = 0; i < 8; i++)
6629 unsigned int byte = imm & 0xff;
6630 if (byte != 0xff && byte != 0)
6631 return false;
6632 imm >>= 8;
6635 return true;
6638 bool
6639 aarch64_mov_operand_p (rtx x,
6640 enum aarch64_symbol_context context,
6641 enum machine_mode mode)
6643 if (GET_CODE (x) == HIGH
6644 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6645 return true;
6647 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6648 return true;
6650 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6651 return true;
6653 return aarch64_classify_symbolic_expression (x, context)
6654 == SYMBOL_TINY_ABSOLUTE;
6657 /* Return a const_int vector of VAL. */
6659 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6661 int nunits = GET_MODE_NUNITS (mode);
6662 rtvec v = rtvec_alloc (nunits);
6663 int i;
6665 for (i=0; i < nunits; i++)
6666 RTVEC_ELT (v, i) = GEN_INT (val);
6668 return gen_rtx_CONST_VECTOR (mode, v);
6671 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6673 bool
6674 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6676 enum machine_mode vmode;
6678 gcc_assert (!VECTOR_MODE_P (mode));
6679 vmode = aarch64_preferred_simd_mode (mode);
6680 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6681 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6684 /* Construct and return a PARALLEL RTX vector. */
6686 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6688 int nunits = GET_MODE_NUNITS (mode);
6689 rtvec v = rtvec_alloc (nunits / 2);
6690 int base = high ? nunits / 2 : 0;
6691 rtx t1;
6692 int i;
6694 for (i=0; i < nunits / 2; i++)
6695 RTVEC_ELT (v, i) = GEN_INT (base + i);
6697 t1 = gen_rtx_PARALLEL (mode, v);
6698 return t1;
6701 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6702 HIGH (exclusive). */
6703 void
6704 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6706 HOST_WIDE_INT lane;
6707 gcc_assert (GET_CODE (operand) == CONST_INT);
6708 lane = INTVAL (operand);
6710 if (lane < low || lane >= high)
6711 error ("lane out of range");
6714 void
6715 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6717 gcc_assert (GET_CODE (operand) == CONST_INT);
6718 HOST_WIDE_INT lane = INTVAL (operand);
6720 if (lane < low || lane >= high)
6721 error ("constant out of range");
6724 /* Emit code to reinterpret one AdvSIMD type as another,
6725 without altering bits. */
6726 void
6727 aarch64_simd_reinterpret (rtx dest, rtx src)
6729 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6732 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6733 registers). */
6734 void
6735 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6736 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6737 rtx op1)
6739 rtx mem = gen_rtx_MEM (mode, destaddr);
6740 rtx tmp1 = gen_reg_rtx (mode);
6741 rtx tmp2 = gen_reg_rtx (mode);
6743 emit_insn (intfn (tmp1, op1, tmp2));
6745 emit_move_insn (mem, tmp1);
6746 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6747 emit_move_insn (mem, tmp2);
6750 /* Return TRUE if OP is a valid vector addressing mode. */
6751 bool
6752 aarch64_simd_mem_operand_p (rtx op)
6754 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6755 || GET_CODE (XEXP (op, 0)) == REG);
6758 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6759 not to early-clobber SRC registers in the process.
6761 We assume that the operands described by SRC and DEST represent a
6762 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6763 number of components into which the copy has been decomposed. */
6764 void
6765 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6766 rtx *src, unsigned int count)
6768 unsigned int i;
6770 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6771 || REGNO (operands[0]) < REGNO (operands[1]))
6773 for (i = 0; i < count; i++)
6775 operands[2 * i] = dest[i];
6776 operands[2 * i + 1] = src[i];
6779 else
6781 for (i = 0; i < count; i++)
6783 operands[2 * i] = dest[count - i - 1];
6784 operands[2 * i + 1] = src[count - i - 1];
6789 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6790 one of VSTRUCT modes: OI, CI or XI. */
6792 aarch64_simd_attr_length_move (rtx insn)
6794 enum machine_mode mode;
6796 extract_insn_cached (insn);
6798 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6800 mode = GET_MODE (recog_data.operand[0]);
6801 switch (mode)
6803 case OImode:
6804 return 8;
6805 case CImode:
6806 return 12;
6807 case XImode:
6808 return 16;
6809 default:
6810 gcc_unreachable ();
6813 return 4;
6816 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6817 alignment of a vector to 128 bits. */
6818 static HOST_WIDE_INT
6819 aarch64_simd_vector_alignment (const_tree type)
6821 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6822 return MIN (align, 128);
6825 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6826 static bool
6827 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6829 if (is_packed)
6830 return false;
6832 /* We guarantee alignment for vectors up to 128-bits. */
6833 if (tree_int_cst_compare (TYPE_SIZE (type),
6834 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6835 return false;
6837 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6838 return true;
6841 /* If VALS is a vector constant that can be loaded into a register
6842 using DUP, generate instructions to do so and return an RTX to
6843 assign to the register. Otherwise return NULL_RTX. */
6844 static rtx
6845 aarch64_simd_dup_constant (rtx vals)
6847 enum machine_mode mode = GET_MODE (vals);
6848 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6849 int n_elts = GET_MODE_NUNITS (mode);
6850 bool all_same = true;
6851 rtx x;
6852 int i;
6854 if (GET_CODE (vals) != CONST_VECTOR)
6855 return NULL_RTX;
6857 for (i = 1; i < n_elts; ++i)
6859 x = CONST_VECTOR_ELT (vals, i);
6860 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6861 all_same = false;
6864 if (!all_same)
6865 return NULL_RTX;
6867 /* We can load this constant by using DUP and a constant in a
6868 single ARM register. This will be cheaper than a vector
6869 load. */
6870 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6871 return gen_rtx_VEC_DUPLICATE (mode, x);
6875 /* Generate code to load VALS, which is a PARALLEL containing only
6876 constants (for vec_init) or CONST_VECTOR, efficiently into a
6877 register. Returns an RTX to copy into the register, or NULL_RTX
6878 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6879 static rtx
6880 aarch64_simd_make_constant (rtx vals)
6882 enum machine_mode mode = GET_MODE (vals);
6883 rtx const_dup;
6884 rtx const_vec = NULL_RTX;
6885 int n_elts = GET_MODE_NUNITS (mode);
6886 int n_const = 0;
6887 int i;
6889 if (GET_CODE (vals) == CONST_VECTOR)
6890 const_vec = vals;
6891 else if (GET_CODE (vals) == PARALLEL)
6893 /* A CONST_VECTOR must contain only CONST_INTs and
6894 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6895 Only store valid constants in a CONST_VECTOR. */
6896 for (i = 0; i < n_elts; ++i)
6898 rtx x = XVECEXP (vals, 0, i);
6899 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6900 n_const++;
6902 if (n_const == n_elts)
6903 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6905 else
6906 gcc_unreachable ();
6908 if (const_vec != NULL_RTX
6909 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6910 /* Load using MOVI/MVNI. */
6911 return const_vec;
6912 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6913 /* Loaded using DUP. */
6914 return const_dup;
6915 else if (const_vec != NULL_RTX)
6916 /* Load from constant pool. We can not take advantage of single-cycle
6917 LD1 because we need a PC-relative addressing mode. */
6918 return const_vec;
6919 else
6920 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6921 We can not construct an initializer. */
6922 return NULL_RTX;
6925 void
6926 aarch64_expand_vector_init (rtx target, rtx vals)
6928 enum machine_mode mode = GET_MODE (target);
6929 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6930 int n_elts = GET_MODE_NUNITS (mode);
6931 int n_var = 0, one_var = -1;
6932 bool all_same = true;
6933 rtx x, mem;
6934 int i;
6936 x = XVECEXP (vals, 0, 0);
6937 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6938 n_var = 1, one_var = 0;
6940 for (i = 1; i < n_elts; ++i)
6942 x = XVECEXP (vals, 0, i);
6943 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6944 ++n_var, one_var = i;
6946 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6947 all_same = false;
6950 if (n_var == 0)
6952 rtx constant = aarch64_simd_make_constant (vals);
6953 if (constant != NULL_RTX)
6955 emit_move_insn (target, constant);
6956 return;
6960 /* Splat a single non-constant element if we can. */
6961 if (all_same)
6963 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6964 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6965 return;
6968 /* One field is non-constant. Load constant then overwrite varying
6969 field. This is more efficient than using the stack. */
6970 if (n_var == 1)
6972 rtx copy = copy_rtx (vals);
6973 rtx index = GEN_INT (one_var);
6974 enum insn_code icode;
6976 /* Load constant part of vector, substitute neighboring value for
6977 varying element. */
6978 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6979 aarch64_expand_vector_init (target, copy);
6981 /* Insert variable. */
6982 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6983 icode = optab_handler (vec_set_optab, mode);
6984 gcc_assert (icode != CODE_FOR_nothing);
6985 emit_insn (GEN_FCN (icode) (target, x, index));
6986 return;
6989 /* Construct the vector in memory one field at a time
6990 and load the whole vector. */
6991 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6992 for (i = 0; i < n_elts; i++)
6993 emit_move_insn (adjust_address_nv (mem, inner_mode,
6994 i * GET_MODE_SIZE (inner_mode)),
6995 XVECEXP (vals, 0, i));
6996 emit_move_insn (target, mem);
7000 static unsigned HOST_WIDE_INT
7001 aarch64_shift_truncation_mask (enum machine_mode mode)
7003 return
7004 (aarch64_vector_mode_supported_p (mode)
7005 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7008 #ifndef TLS_SECTION_ASM_FLAG
7009 #define TLS_SECTION_ASM_FLAG 'T'
7010 #endif
7012 void
7013 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7014 tree decl ATTRIBUTE_UNUSED)
7016 char flagchars[10], *f = flagchars;
7018 /* If we have already declared this section, we can use an
7019 abbreviated form to switch back to it -- unless this section is
7020 part of a COMDAT groups, in which case GAS requires the full
7021 declaration every time. */
7022 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7023 && (flags & SECTION_DECLARED))
7025 fprintf (asm_out_file, "\t.section\t%s\n", name);
7026 return;
7029 if (!(flags & SECTION_DEBUG))
7030 *f++ = 'a';
7031 if (flags & SECTION_WRITE)
7032 *f++ = 'w';
7033 if (flags & SECTION_CODE)
7034 *f++ = 'x';
7035 if (flags & SECTION_SMALL)
7036 *f++ = 's';
7037 if (flags & SECTION_MERGE)
7038 *f++ = 'M';
7039 if (flags & SECTION_STRINGS)
7040 *f++ = 'S';
7041 if (flags & SECTION_TLS)
7042 *f++ = TLS_SECTION_ASM_FLAG;
7043 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7044 *f++ = 'G';
7045 *f = '\0';
7047 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7049 if (!(flags & SECTION_NOTYPE))
7051 const char *type;
7052 const char *format;
7054 if (flags & SECTION_BSS)
7055 type = "nobits";
7056 else
7057 type = "progbits";
7059 #ifdef TYPE_OPERAND_FMT
7060 format = "," TYPE_OPERAND_FMT;
7061 #else
7062 format = ",@%s";
7063 #endif
7065 fprintf (asm_out_file, format, type);
7067 if (flags & SECTION_ENTSIZE)
7068 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7069 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7071 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7072 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7073 else
7074 fprintf (asm_out_file, ",%s,comdat",
7075 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7079 putc ('\n', asm_out_file);
7082 /* Select a format to encode pointers in exception handling data. */
7084 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7086 int type;
7087 switch (aarch64_cmodel)
7089 case AARCH64_CMODEL_TINY:
7090 case AARCH64_CMODEL_TINY_PIC:
7091 case AARCH64_CMODEL_SMALL:
7092 case AARCH64_CMODEL_SMALL_PIC:
7093 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7094 for everything. */
7095 type = DW_EH_PE_sdata4;
7096 break;
7097 default:
7098 /* No assumptions here. 8-byte relocs required. */
7099 type = DW_EH_PE_sdata8;
7100 break;
7102 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7105 /* Emit load exclusive. */
7107 static void
7108 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7109 rtx mem, rtx model_rtx)
7111 rtx (*gen) (rtx, rtx, rtx);
7113 switch (mode)
7115 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7116 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7117 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7118 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7119 default:
7120 gcc_unreachable ();
7123 emit_insn (gen (rval, mem, model_rtx));
7126 /* Emit store exclusive. */
7128 static void
7129 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7130 rtx rval, rtx mem, rtx model_rtx)
7132 rtx (*gen) (rtx, rtx, rtx, rtx);
7134 switch (mode)
7136 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7137 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7138 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7139 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7140 default:
7141 gcc_unreachable ();
7144 emit_insn (gen (bval, rval, mem, model_rtx));
7147 /* Mark the previous jump instruction as unlikely. */
7149 static void
7150 aarch64_emit_unlikely_jump (rtx insn)
7152 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7154 insn = emit_jump_insn (insn);
7155 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7158 /* Expand a compare and swap pattern. */
7160 void
7161 aarch64_expand_compare_and_swap (rtx operands[])
7163 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7164 enum machine_mode mode, cmp_mode;
7165 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7167 bval = operands[0];
7168 rval = operands[1];
7169 mem = operands[2];
7170 oldval = operands[3];
7171 newval = operands[4];
7172 is_weak = operands[5];
7173 mod_s = operands[6];
7174 mod_f = operands[7];
7175 mode = GET_MODE (mem);
7176 cmp_mode = mode;
7178 /* Normally the succ memory model must be stronger than fail, but in the
7179 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7180 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7182 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7183 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7184 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7186 switch (mode)
7188 case QImode:
7189 case HImode:
7190 /* For short modes, we're going to perform the comparison in SImode,
7191 so do the zero-extension now. */
7192 cmp_mode = SImode;
7193 rval = gen_reg_rtx (SImode);
7194 oldval = convert_modes (SImode, mode, oldval, true);
7195 /* Fall through. */
7197 case SImode:
7198 case DImode:
7199 /* Force the value into a register if needed. */
7200 if (!aarch64_plus_operand (oldval, mode))
7201 oldval = force_reg (cmp_mode, oldval);
7202 break;
7204 default:
7205 gcc_unreachable ();
7208 switch (mode)
7210 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7211 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7212 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7213 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7214 default:
7215 gcc_unreachable ();
7218 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7220 if (mode == QImode || mode == HImode)
7221 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7223 x = gen_rtx_REG (CCmode, CC_REGNUM);
7224 x = gen_rtx_EQ (SImode, x, const0_rtx);
7225 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7228 /* Split a compare and swap pattern. */
7230 void
7231 aarch64_split_compare_and_swap (rtx operands[])
7233 rtx rval, mem, oldval, newval, scratch;
7234 enum machine_mode mode;
7235 bool is_weak;
7236 rtx label1, label2, x, cond;
7238 rval = operands[0];
7239 mem = operands[1];
7240 oldval = operands[2];
7241 newval = operands[3];
7242 is_weak = (operands[4] != const0_rtx);
7243 scratch = operands[7];
7244 mode = GET_MODE (mem);
7246 label1 = NULL_RTX;
7247 if (!is_weak)
7249 label1 = gen_label_rtx ();
7250 emit_label (label1);
7252 label2 = gen_label_rtx ();
7254 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7256 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7257 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7258 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7259 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7260 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7262 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7264 if (!is_weak)
7266 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7267 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7268 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7269 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7271 else
7273 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7274 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7275 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7278 emit_label (label2);
7281 /* Split an atomic operation. */
7283 void
7284 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7285 rtx value, rtx model_rtx, rtx cond)
7287 enum machine_mode mode = GET_MODE (mem);
7288 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7289 rtx label, x;
7291 label = gen_label_rtx ();
7292 emit_label (label);
7294 if (new_out)
7295 new_out = gen_lowpart (wmode, new_out);
7296 if (old_out)
7297 old_out = gen_lowpart (wmode, old_out);
7298 else
7299 old_out = new_out;
7300 value = simplify_gen_subreg (wmode, value, mode, 0);
7302 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7304 switch (code)
7306 case SET:
7307 new_out = value;
7308 break;
7310 case NOT:
7311 x = gen_rtx_AND (wmode, old_out, value);
7312 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7313 x = gen_rtx_NOT (wmode, new_out);
7314 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7315 break;
7317 case MINUS:
7318 if (CONST_INT_P (value))
7320 value = GEN_INT (-INTVAL (value));
7321 code = PLUS;
7323 /* Fall through. */
7325 default:
7326 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7327 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7328 break;
7331 aarch64_emit_store_exclusive (mode, cond, mem,
7332 gen_lowpart (mode, new_out), model_rtx);
7334 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7335 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7336 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7337 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7340 static void
7341 aarch64_print_extension (void)
7343 const struct aarch64_option_extension *opt = NULL;
7345 for (opt = all_extensions; opt->name != NULL; opt++)
7346 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7347 asm_fprintf (asm_out_file, "+%s", opt->name);
7349 asm_fprintf (asm_out_file, "\n");
7352 static void
7353 aarch64_start_file (void)
7355 if (selected_arch)
7357 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7358 aarch64_print_extension ();
7360 else if (selected_cpu)
7362 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7363 aarch64_print_extension ();
7365 default_file_start();
7368 /* Target hook for c_mode_for_suffix. */
7369 static enum machine_mode
7370 aarch64_c_mode_for_suffix (char suffix)
7372 if (suffix == 'q')
7373 return TFmode;
7375 return VOIDmode;
7378 /* We can only represent floating point constants which will fit in
7379 "quarter-precision" values. These values are characterised by
7380 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7383 (-1)^s * (n/16) * 2^r
7385 Where:
7386 's' is the sign bit.
7387 'n' is an integer in the range 16 <= n <= 31.
7388 'r' is an integer in the range -3 <= r <= 4. */
7390 /* Return true iff X can be represented by a quarter-precision
7391 floating point immediate operand X. Note, we cannot represent 0.0. */
7392 bool
7393 aarch64_float_const_representable_p (rtx x)
7395 /* This represents our current view of how many bits
7396 make up the mantissa. */
7397 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7398 int exponent;
7399 unsigned HOST_WIDE_INT mantissa, mask;
7400 HOST_WIDE_INT m1, m2;
7401 REAL_VALUE_TYPE r, m;
7403 if (!CONST_DOUBLE_P (x))
7404 return false;
7406 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7408 /* We cannot represent infinities, NaNs or +/-zero. We won't
7409 know if we have +zero until we analyse the mantissa, but we
7410 can reject the other invalid values. */
7411 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7412 || REAL_VALUE_MINUS_ZERO (r))
7413 return false;
7415 /* Extract exponent. */
7416 r = real_value_abs (&r);
7417 exponent = REAL_EXP (&r);
7419 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7420 highest (sign) bit, with a fixed binary point at bit point_pos.
7421 m1 holds the low part of the mantissa, m2 the high part.
7422 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7423 bits for the mantissa, this can fail (low bits will be lost). */
7424 real_ldexp (&m, &r, point_pos - exponent);
7425 REAL_VALUE_TO_INT (&m1, &m2, m);
7427 /* If the low part of the mantissa has bits set we cannot represent
7428 the value. */
7429 if (m1 != 0)
7430 return false;
7431 /* We have rejected the lower HOST_WIDE_INT, so update our
7432 understanding of how many bits lie in the mantissa and
7433 look only at the high HOST_WIDE_INT. */
7434 mantissa = m2;
7435 point_pos -= HOST_BITS_PER_WIDE_INT;
7437 /* We can only represent values with a mantissa of the form 1.xxxx. */
7438 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7439 if ((mantissa & mask) != 0)
7440 return false;
7442 /* Having filtered unrepresentable values, we may now remove all
7443 but the highest 5 bits. */
7444 mantissa >>= point_pos - 5;
7446 /* We cannot represent the value 0.0, so reject it. This is handled
7447 elsewhere. */
7448 if (mantissa == 0)
7449 return false;
7451 /* Then, as bit 4 is always set, we can mask it off, leaving
7452 the mantissa in the range [0, 15]. */
7453 mantissa &= ~(1 << 4);
7454 gcc_assert (mantissa <= 15);
7456 /* GCC internally does not use IEEE754-like encoding (where normalized
7457 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7458 Our mantissa values are shifted 4 places to the left relative to
7459 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7460 by 5 places to correct for GCC's representation. */
7461 exponent = 5 - exponent;
7463 return (exponent >= 0 && exponent <= 7);
7466 char*
7467 aarch64_output_simd_mov_immediate (rtx const_vector,
7468 enum machine_mode mode,
7469 unsigned width)
7471 bool is_valid;
7472 static char templ[40];
7473 const char *mnemonic;
7474 const char *shift_op;
7475 unsigned int lane_count = 0;
7476 char element_char;
7478 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7480 /* This will return true to show const_vector is legal for use as either
7481 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7482 also update INFO to show how the immediate should be generated. */
7483 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7484 gcc_assert (is_valid);
7486 element_char = sizetochar (info.element_width);
7487 lane_count = width / info.element_width;
7489 mode = GET_MODE_INNER (mode);
7490 if (mode == SFmode || mode == DFmode)
7492 gcc_assert (info.shift == 0 && ! info.mvn);
7493 if (aarch64_float_const_zero_rtx_p (info.value))
7494 info.value = GEN_INT (0);
7495 else
7497 #define buf_size 20
7498 REAL_VALUE_TYPE r;
7499 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7500 char float_buf[buf_size] = {'\0'};
7501 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7502 #undef buf_size
7504 if (lane_count == 1)
7505 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7506 else
7507 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7508 lane_count, element_char, float_buf);
7509 return templ;
7513 mnemonic = info.mvn ? "mvni" : "movi";
7514 shift_op = info.msl ? "msl" : "lsl";
7516 if (lane_count == 1)
7517 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7518 mnemonic, UINTVAL (info.value));
7519 else if (info.shift)
7520 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7521 ", %s %d", mnemonic, lane_count, element_char,
7522 UINTVAL (info.value), shift_op, info.shift);
7523 else
7524 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7525 mnemonic, lane_count, element_char, UINTVAL (info.value));
7526 return templ;
7529 char*
7530 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7531 enum machine_mode mode)
7533 enum machine_mode vmode;
7535 gcc_assert (!VECTOR_MODE_P (mode));
7536 vmode = aarch64_simd_container_mode (mode, 64);
7537 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7538 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7541 /* Split operands into moves from op[1] + op[2] into op[0]. */
7543 void
7544 aarch64_split_combinev16qi (rtx operands[3])
7546 unsigned int dest = REGNO (operands[0]);
7547 unsigned int src1 = REGNO (operands[1]);
7548 unsigned int src2 = REGNO (operands[2]);
7549 enum machine_mode halfmode = GET_MODE (operands[1]);
7550 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7551 rtx destlo, desthi;
7553 gcc_assert (halfmode == V16QImode);
7555 if (src1 == dest && src2 == dest + halfregs)
7557 /* No-op move. Can't split to nothing; emit something. */
7558 emit_note (NOTE_INSN_DELETED);
7559 return;
7562 /* Preserve register attributes for variable tracking. */
7563 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7564 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7565 GET_MODE_SIZE (halfmode));
7567 /* Special case of reversed high/low parts. */
7568 if (reg_overlap_mentioned_p (operands[2], destlo)
7569 && reg_overlap_mentioned_p (operands[1], desthi))
7571 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7572 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7573 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7575 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7577 /* Try to avoid unnecessary moves if part of the result
7578 is in the right place already. */
7579 if (src1 != dest)
7580 emit_move_insn (destlo, operands[1]);
7581 if (src2 != dest + halfregs)
7582 emit_move_insn (desthi, operands[2]);
7584 else
7586 if (src2 != dest + halfregs)
7587 emit_move_insn (desthi, operands[2]);
7588 if (src1 != dest)
7589 emit_move_insn (destlo, operands[1]);
7593 /* vec_perm support. */
7595 #define MAX_VECT_LEN 16
7597 struct expand_vec_perm_d
7599 rtx target, op0, op1;
7600 unsigned char perm[MAX_VECT_LEN];
7601 enum machine_mode vmode;
7602 unsigned char nelt;
7603 bool one_vector_p;
7604 bool testing_p;
7607 /* Generate a variable permutation. */
7609 static void
7610 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7612 enum machine_mode vmode = GET_MODE (target);
7613 bool one_vector_p = rtx_equal_p (op0, op1);
7615 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7616 gcc_checking_assert (GET_MODE (op0) == vmode);
7617 gcc_checking_assert (GET_MODE (op1) == vmode);
7618 gcc_checking_assert (GET_MODE (sel) == vmode);
7619 gcc_checking_assert (TARGET_SIMD);
7621 if (one_vector_p)
7623 if (vmode == V8QImode)
7625 /* Expand the argument to a V16QI mode by duplicating it. */
7626 rtx pair = gen_reg_rtx (V16QImode);
7627 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7628 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7630 else
7632 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7635 else
7637 rtx pair;
7639 if (vmode == V8QImode)
7641 pair = gen_reg_rtx (V16QImode);
7642 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7643 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7645 else
7647 pair = gen_reg_rtx (OImode);
7648 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7649 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7654 void
7655 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7657 enum machine_mode vmode = GET_MODE (target);
7658 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7659 bool one_vector_p = rtx_equal_p (op0, op1);
7660 rtx rmask[MAX_VECT_LEN], mask;
7662 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7664 /* The TBL instruction does not use a modulo index, so we must take care
7665 of that ourselves. */
7666 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7667 for (i = 0; i < nelt; ++i)
7668 rmask[i] = mask;
7669 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7670 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7672 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7675 /* Recognize patterns suitable for the TRN instructions. */
7676 static bool
7677 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7679 unsigned int i, odd, mask, nelt = d->nelt;
7680 rtx out, in0, in1, x;
7681 rtx (*gen) (rtx, rtx, rtx);
7682 enum machine_mode vmode = d->vmode;
7684 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7685 return false;
7687 /* Note that these are little-endian tests.
7688 We correct for big-endian later. */
7689 if (d->perm[0] == 0)
7690 odd = 0;
7691 else if (d->perm[0] == 1)
7692 odd = 1;
7693 else
7694 return false;
7695 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7697 for (i = 0; i < nelt; i += 2)
7699 if (d->perm[i] != i + odd)
7700 return false;
7701 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7702 return false;
7705 /* Success! */
7706 if (d->testing_p)
7707 return true;
7709 in0 = d->op0;
7710 in1 = d->op1;
7711 if (BYTES_BIG_ENDIAN)
7713 x = in0, in0 = in1, in1 = x;
7714 odd = !odd;
7716 out = d->target;
7718 if (odd)
7720 switch (vmode)
7722 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7723 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7724 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7725 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7726 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7727 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7728 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7729 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7730 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7731 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7732 default:
7733 return false;
7736 else
7738 switch (vmode)
7740 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7741 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7742 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7743 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7744 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7745 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7746 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7747 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7748 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7749 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7750 default:
7751 return false;
7755 emit_insn (gen (out, in0, in1));
7756 return true;
7759 /* Recognize patterns suitable for the UZP instructions. */
7760 static bool
7761 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7763 unsigned int i, odd, mask, nelt = d->nelt;
7764 rtx out, in0, in1, x;
7765 rtx (*gen) (rtx, rtx, rtx);
7766 enum machine_mode vmode = d->vmode;
7768 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7769 return false;
7771 /* Note that these are little-endian tests.
7772 We correct for big-endian later. */
7773 if (d->perm[0] == 0)
7774 odd = 0;
7775 else if (d->perm[0] == 1)
7776 odd = 1;
7777 else
7778 return false;
7779 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7781 for (i = 0; i < nelt; i++)
7783 unsigned elt = (i * 2 + odd) & mask;
7784 if (d->perm[i] != elt)
7785 return false;
7788 /* Success! */
7789 if (d->testing_p)
7790 return true;
7792 in0 = d->op0;
7793 in1 = d->op1;
7794 if (BYTES_BIG_ENDIAN)
7796 x = in0, in0 = in1, in1 = x;
7797 odd = !odd;
7799 out = d->target;
7801 if (odd)
7803 switch (vmode)
7805 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7806 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7807 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7808 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7809 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7810 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7811 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7812 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7813 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7814 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7815 default:
7816 return false;
7819 else
7821 switch (vmode)
7823 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7824 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7825 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7826 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7827 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7828 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7829 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7830 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7831 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7832 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7833 default:
7834 return false;
7838 emit_insn (gen (out, in0, in1));
7839 return true;
7842 /* Recognize patterns suitable for the ZIP instructions. */
7843 static bool
7844 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7846 unsigned int i, high, mask, nelt = d->nelt;
7847 rtx out, in0, in1, x;
7848 rtx (*gen) (rtx, rtx, rtx);
7849 enum machine_mode vmode = d->vmode;
7851 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7852 return false;
7854 /* Note that these are little-endian tests.
7855 We correct for big-endian later. */
7856 high = nelt / 2;
7857 if (d->perm[0] == high)
7858 /* Do Nothing. */
7860 else if (d->perm[0] == 0)
7861 high = 0;
7862 else
7863 return false;
7864 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7866 for (i = 0; i < nelt / 2; i++)
7868 unsigned elt = (i + high) & mask;
7869 if (d->perm[i * 2] != elt)
7870 return false;
7871 elt = (elt + nelt) & mask;
7872 if (d->perm[i * 2 + 1] != elt)
7873 return false;
7876 /* Success! */
7877 if (d->testing_p)
7878 return true;
7880 in0 = d->op0;
7881 in1 = d->op1;
7882 if (BYTES_BIG_ENDIAN)
7884 x = in0, in0 = in1, in1 = x;
7885 high = !high;
7887 out = d->target;
7889 if (high)
7891 switch (vmode)
7893 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7894 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7895 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7896 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7897 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7898 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7899 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7900 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7901 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7902 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7903 default:
7904 return false;
7907 else
7909 switch (vmode)
7911 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7912 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7913 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7914 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7915 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7916 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7917 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7918 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7919 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7920 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7921 default:
7922 return false;
7926 emit_insn (gen (out, in0, in1));
7927 return true;
7930 static bool
7931 aarch64_evpc_dup (struct expand_vec_perm_d *d)
7933 rtx (*gen) (rtx, rtx, rtx);
7934 rtx out = d->target;
7935 rtx in0;
7936 enum machine_mode vmode = d->vmode;
7937 unsigned int i, elt, nelt = d->nelt;
7938 rtx lane;
7940 /* TODO: This may not be big-endian safe. */
7941 if (BYTES_BIG_ENDIAN)
7942 return false;
7944 elt = d->perm[0];
7945 for (i = 1; i < nelt; i++)
7947 if (elt != d->perm[i])
7948 return false;
7951 /* The generic preparation in aarch64_expand_vec_perm_const_1
7952 swaps the operand order and the permute indices if it finds
7953 d->perm[0] to be in the second operand. Thus, we can always
7954 use d->op0 and need not do any extra arithmetic to get the
7955 correct lane number. */
7956 in0 = d->op0;
7957 lane = GEN_INT (elt);
7959 switch (vmode)
7961 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
7962 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
7963 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
7964 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
7965 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
7966 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
7967 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
7968 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
7969 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
7970 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
7971 default:
7972 return false;
7975 emit_insn (gen (out, in0, lane));
7976 return true;
7979 static bool
7980 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7982 rtx rperm[MAX_VECT_LEN], sel;
7983 enum machine_mode vmode = d->vmode;
7984 unsigned int i, nelt = d->nelt;
7986 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7987 numbering of elements for big-endian, we must reverse the order. */
7988 if (BYTES_BIG_ENDIAN)
7989 return false;
7991 if (d->testing_p)
7992 return true;
7994 /* Generic code will try constant permutation twice. Once with the
7995 original mode and again with the elements lowered to QImode.
7996 So wait and don't do the selector expansion ourselves. */
7997 if (vmode != V8QImode && vmode != V16QImode)
7998 return false;
8000 for (i = 0; i < nelt; ++i)
8001 rperm[i] = GEN_INT (d->perm[i]);
8002 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8003 sel = force_reg (vmode, sel);
8005 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8006 return true;
8009 static bool
8010 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8012 /* The pattern matching functions above are written to look for a small
8013 number to begin the sequence (0, 1, N/2). If we begin with an index
8014 from the second operand, we can swap the operands. */
8015 if (d->perm[0] >= d->nelt)
8017 unsigned i, nelt = d->nelt;
8018 rtx x;
8020 for (i = 0; i < nelt; ++i)
8021 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8023 x = d->op0;
8024 d->op0 = d->op1;
8025 d->op1 = x;
8028 if (TARGET_SIMD)
8030 if (aarch64_evpc_zip (d))
8031 return true;
8032 else if (aarch64_evpc_uzp (d))
8033 return true;
8034 else if (aarch64_evpc_trn (d))
8035 return true;
8036 else if (aarch64_evpc_dup (d))
8037 return true;
8038 return aarch64_evpc_tbl (d);
8040 return false;
8043 /* Expand a vec_perm_const pattern. */
8045 bool
8046 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8048 struct expand_vec_perm_d d;
8049 int i, nelt, which;
8051 d.target = target;
8052 d.op0 = op0;
8053 d.op1 = op1;
8055 d.vmode = GET_MODE (target);
8056 gcc_assert (VECTOR_MODE_P (d.vmode));
8057 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8058 d.testing_p = false;
8060 for (i = which = 0; i < nelt; ++i)
8062 rtx e = XVECEXP (sel, 0, i);
8063 int ei = INTVAL (e) & (2 * nelt - 1);
8064 which |= (ei < nelt ? 1 : 2);
8065 d.perm[i] = ei;
8068 switch (which)
8070 default:
8071 gcc_unreachable ();
8073 case 3:
8074 d.one_vector_p = false;
8075 if (!rtx_equal_p (op0, op1))
8076 break;
8078 /* The elements of PERM do not suggest that only the first operand
8079 is used, but both operands are identical. Allow easier matching
8080 of the permutation by folding the permutation into the single
8081 input vector. */
8082 /* Fall Through. */
8083 case 2:
8084 for (i = 0; i < nelt; ++i)
8085 d.perm[i] &= nelt - 1;
8086 d.op0 = op1;
8087 d.one_vector_p = true;
8088 break;
8090 case 1:
8091 d.op1 = op0;
8092 d.one_vector_p = true;
8093 break;
8096 return aarch64_expand_vec_perm_const_1 (&d);
8099 static bool
8100 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8101 const unsigned char *sel)
8103 struct expand_vec_perm_d d;
8104 unsigned int i, nelt, which;
8105 bool ret;
8107 d.vmode = vmode;
8108 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8109 d.testing_p = true;
8110 memcpy (d.perm, sel, nelt);
8112 /* Calculate whether all elements are in one vector. */
8113 for (i = which = 0; i < nelt; ++i)
8115 unsigned char e = d.perm[i];
8116 gcc_assert (e < 2 * nelt);
8117 which |= (e < nelt ? 1 : 2);
8120 /* If all elements are from the second vector, reindex as if from the
8121 first vector. */
8122 if (which == 2)
8123 for (i = 0; i < nelt; ++i)
8124 d.perm[i] -= nelt;
8126 /* Check whether the mask can be applied to a single vector. */
8127 d.one_vector_p = (which != 3);
8129 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8130 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8131 if (!d.one_vector_p)
8132 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8134 start_sequence ();
8135 ret = aarch64_expand_vec_perm_const_1 (&d);
8136 end_sequence ();
8138 return ret;
8141 #undef TARGET_ADDRESS_COST
8142 #define TARGET_ADDRESS_COST aarch64_address_cost
8144 /* This hook will determines whether unnamed bitfields affect the alignment
8145 of the containing structure. The hook returns true if the structure
8146 should inherit the alignment requirements of an unnamed bitfield's
8147 type. */
8148 #undef TARGET_ALIGN_ANON_BITFIELD
8149 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8151 #undef TARGET_ASM_ALIGNED_DI_OP
8152 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8154 #undef TARGET_ASM_ALIGNED_HI_OP
8155 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8157 #undef TARGET_ASM_ALIGNED_SI_OP
8158 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8160 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8161 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8162 hook_bool_const_tree_hwi_hwi_const_tree_true
8164 #undef TARGET_ASM_FILE_START
8165 #define TARGET_ASM_FILE_START aarch64_start_file
8167 #undef TARGET_ASM_OUTPUT_MI_THUNK
8168 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8170 #undef TARGET_ASM_SELECT_RTX_SECTION
8171 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8173 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8174 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8176 #undef TARGET_BUILD_BUILTIN_VA_LIST
8177 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8179 #undef TARGET_CALLEE_COPIES
8180 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8182 #undef TARGET_CAN_ELIMINATE
8183 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8185 #undef TARGET_CANNOT_FORCE_CONST_MEM
8186 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8188 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8189 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8191 /* Only the least significant bit is used for initialization guard
8192 variables. */
8193 #undef TARGET_CXX_GUARD_MASK_BIT
8194 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8196 #undef TARGET_C_MODE_FOR_SUFFIX
8197 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8199 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8200 #undef TARGET_DEFAULT_TARGET_FLAGS
8201 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8202 #endif
8204 #undef TARGET_CLASS_MAX_NREGS
8205 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8207 #undef TARGET_BUILTIN_DECL
8208 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8210 #undef TARGET_EXPAND_BUILTIN
8211 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8213 #undef TARGET_EXPAND_BUILTIN_VA_START
8214 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8216 #undef TARGET_FOLD_BUILTIN
8217 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8219 #undef TARGET_FUNCTION_ARG
8220 #define TARGET_FUNCTION_ARG aarch64_function_arg
8222 #undef TARGET_FUNCTION_ARG_ADVANCE
8223 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8225 #undef TARGET_FUNCTION_ARG_BOUNDARY
8226 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8228 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8229 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8231 #undef TARGET_FUNCTION_VALUE
8232 #define TARGET_FUNCTION_VALUE aarch64_function_value
8234 #undef TARGET_FUNCTION_VALUE_REGNO_P
8235 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8237 #undef TARGET_FRAME_POINTER_REQUIRED
8238 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8240 #undef TARGET_GIMPLE_FOLD_BUILTIN
8241 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8243 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8244 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8246 #undef TARGET_INIT_BUILTINS
8247 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8249 #undef TARGET_LEGITIMATE_ADDRESS_P
8250 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8252 #undef TARGET_LEGITIMATE_CONSTANT_P
8253 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8255 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8256 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8258 #undef TARGET_LRA_P
8259 #define TARGET_LRA_P aarch64_lra_p
8261 #undef TARGET_MANGLE_TYPE
8262 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8264 #undef TARGET_MEMORY_MOVE_COST
8265 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8267 #undef TARGET_MUST_PASS_IN_STACK
8268 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8270 /* This target hook should return true if accesses to volatile bitfields
8271 should use the narrowest mode possible. It should return false if these
8272 accesses should use the bitfield container type. */
8273 #undef TARGET_NARROW_VOLATILE_BITFIELD
8274 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8276 #undef TARGET_OPTION_OVERRIDE
8277 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8279 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8280 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8281 aarch64_override_options_after_change
8283 #undef TARGET_PASS_BY_REFERENCE
8284 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8286 #undef TARGET_PREFERRED_RELOAD_CLASS
8287 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8289 #undef TARGET_SECONDARY_RELOAD
8290 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8292 #undef TARGET_SHIFT_TRUNCATION_MASK
8293 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8295 #undef TARGET_SETUP_INCOMING_VARARGS
8296 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8298 #undef TARGET_STRUCT_VALUE_RTX
8299 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8301 #undef TARGET_REGISTER_MOVE_COST
8302 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8304 #undef TARGET_RETURN_IN_MEMORY
8305 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8307 #undef TARGET_RETURN_IN_MSB
8308 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8310 #undef TARGET_RTX_COSTS
8311 #define TARGET_RTX_COSTS aarch64_rtx_costs
8313 #undef TARGET_TRAMPOLINE_INIT
8314 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8316 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8317 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8319 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8320 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8322 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8323 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8325 #undef TARGET_VECTORIZE_ADD_STMT_COST
8326 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8328 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8329 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8330 aarch64_builtin_vectorization_cost
8332 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8333 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8335 #undef TARGET_VECTORIZE_BUILTINS
8336 #define TARGET_VECTORIZE_BUILTINS
8338 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8339 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8340 aarch64_builtin_vectorized_function
8342 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8343 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8344 aarch64_autovectorize_vector_sizes
8346 /* Section anchor support. */
8348 #undef TARGET_MIN_ANCHOR_OFFSET
8349 #define TARGET_MIN_ANCHOR_OFFSET -256
8351 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8352 byte offset; we can do much more for larger data types, but have no way
8353 to determine the size of the access. We assume accesses are aligned. */
8354 #undef TARGET_MAX_ANCHOR_OFFSET
8355 #define TARGET_MAX_ANCHOR_OFFSET 4095
8357 #undef TARGET_VECTOR_ALIGNMENT
8358 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8360 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8361 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8362 aarch64_simd_vector_alignment_reachable
8364 /* vec_perm support. */
8366 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8367 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8368 aarch64_vectorize_vec_perm_const_ok
8371 #undef TARGET_FIXED_CONDITION_CODE_REGS
8372 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8374 struct gcc_target targetm = TARGET_INITIALIZER;
8376 #include "gt-aarch64.h"