[AArch64] Make vabs<q>_f<32, 64> a tree/gimple intrinsic.
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blobd2a495dbc626bd3118c52980544157b4c8dfd5b4
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
49 /* Classifies an address.
51 ADDRESS_REG_IMM
52 A simple base register plus immediate offset.
54 ADDRESS_REG_WB
55 A base register indexed by immediate offset with writeback.
57 ADDRESS_REG_REG
58 A base register indexed by (optionally scaled) register.
60 ADDRESS_REG_UXTW
61 A base register indexed by (optionally scaled) zero-extended register.
63 ADDRESS_REG_SXTW
64 A base register indexed by (optionally scaled) sign-extended register.
66 ADDRESS_LO_SUM
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
69 ADDRESS_SYMBOLIC:
70 A constant symbolic address, in pc-relative literal pool. */
72 enum aarch64_address_type {
73 ADDRESS_REG_IMM,
74 ADDRESS_REG_WB,
75 ADDRESS_REG_REG,
76 ADDRESS_REG_UXTW,
77 ADDRESS_REG_SXTW,
78 ADDRESS_LO_SUM,
79 ADDRESS_SYMBOLIC
82 struct aarch64_address_info {
83 enum aarch64_address_type type;
84 rtx base;
85 rtx offset;
86 int shift;
87 enum aarch64_symbol_type symbol_type;
90 /* The current code model. */
91 enum aarch64_code_model aarch64_cmodel;
93 #ifdef HAVE_AS_TLS
94 #undef TARGET_HAVE_TLS
95 #define TARGET_HAVE_TLS 1
96 #endif
98 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
99 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
100 const_tree,
101 enum machine_mode *, int *,
102 bool *);
103 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
104 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
105 static void aarch64_override_options_after_change (void);
106 static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
107 int *, unsigned char *, int *, int *);
108 static bool aarch64_vector_mode_supported_p (enum machine_mode);
109 static unsigned bit_count (unsigned HOST_WIDE_INT);
110 static bool aarch64_const_vec_all_same_int_p (rtx,
111 HOST_WIDE_INT, HOST_WIDE_INT);
113 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
114 const unsigned char *sel);
116 /* The processor for which instructions should be scheduled. */
117 enum aarch64_processor aarch64_tune = generic;
119 /* The current tuning set. */
120 const struct tune_params *aarch64_tune_params;
122 /* Mask to specify which instructions we are allowed to generate. */
123 unsigned long aarch64_isa_flags = 0;
125 /* Mask to specify which instruction scheduling options should be used. */
126 unsigned long aarch64_tune_flags = 0;
128 /* Tuning parameters. */
130 #if HAVE_DESIGNATED_INITIALIZERS
131 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
132 #else
133 #define NAMED_PARAM(NAME, VAL) (VAL)
134 #endif
136 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
137 __extension__
138 #endif
139 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
141 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
142 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
143 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
144 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
145 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
146 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
147 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
148 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
149 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
150 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
151 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
152 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
155 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
156 __extension__
157 #endif
158 static const struct cpu_addrcost_table generic_addrcost_table =
160 NAMED_PARAM (pre_modify, 0),
161 NAMED_PARAM (post_modify, 0),
162 NAMED_PARAM (register_offset, 0),
163 NAMED_PARAM (register_extend, 0),
164 NAMED_PARAM (imm_offset, 0)
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
170 static const struct cpu_regmove_cost generic_regmove_cost =
172 NAMED_PARAM (GP2GP, 1),
173 NAMED_PARAM (GP2FP, 2),
174 NAMED_PARAM (FP2GP, 2),
175 /* We currently do not provide direct support for TFmode Q->Q move.
176 Therefore we need to raise the cost above 2 in order to have
177 reload handle the situation. */
178 NAMED_PARAM (FP2FP, 4)
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 __extension__
183 #endif
184 static const struct tune_params generic_tunings =
186 &generic_rtx_cost_table,
187 &generic_addrcost_table,
188 &generic_regmove_cost,
189 NAMED_PARAM (memmov_cost, 4)
192 /* A processor implementing AArch64. */
193 struct processor
195 const char *const name;
196 enum aarch64_processor core;
197 const char *arch;
198 const unsigned long flags;
199 const struct tune_params *const tune;
202 /* Processor cores implementing AArch64. */
203 static const struct processor all_cores[] =
205 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
206 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
207 #include "aarch64-cores.def"
208 #undef AARCH64_CORE
209 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
210 {NULL, aarch64_none, NULL, 0, NULL}
213 /* Architectures implementing AArch64. */
214 static const struct processor all_architectures[] =
216 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
217 {NAME, CORE, #ARCH, FLAGS, NULL},
218 #include "aarch64-arches.def"
219 #undef AARCH64_ARCH
220 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
221 {NULL, aarch64_none, NULL, 0, NULL}
224 /* Target specification. These are populated as commandline arguments
225 are processed, or NULL if not specified. */
226 static const struct processor *selected_arch;
227 static const struct processor *selected_cpu;
228 static const struct processor *selected_tune;
230 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
232 /* An ISA extension in the co-processor and main instruction set space. */
233 struct aarch64_option_extension
235 const char *const name;
236 const unsigned long flags_on;
237 const unsigned long flags_off;
240 /* ISA extensions in AArch64. */
241 static const struct aarch64_option_extension all_extensions[] =
243 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
244 {NAME, FLAGS_ON, FLAGS_OFF},
245 #include "aarch64-option-extensions.def"
246 #undef AARCH64_OPT_EXTENSION
247 {NULL, 0, 0}
250 /* Used to track the size of an address when generating a pre/post
251 increment address. */
252 static enum machine_mode aarch64_memory_reference_mode;
254 /* Used to force GTY into this file. */
255 static GTY(()) int gty_dummy;
257 /* A table of valid AArch64 "bitmask immediate" values for
258 logical instructions. */
260 #define AARCH64_NUM_BITMASKS 5334
261 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
263 /* Did we set flag_omit_frame_pointer just so
264 aarch64_frame_pointer_required would be called? */
265 static bool faked_omit_frame_pointer;
267 typedef enum aarch64_cond_code
269 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
270 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
271 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
273 aarch64_cc;
275 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
277 /* The condition codes of the processor, and the inverse function. */
278 static const char * const aarch64_condition_codes[] =
280 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
281 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
284 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
285 unsigned
286 aarch64_dbx_register_number (unsigned regno)
288 if (GP_REGNUM_P (regno))
289 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
290 else if (regno == SP_REGNUM)
291 return AARCH64_DWARF_SP;
292 else if (FP_REGNUM_P (regno))
293 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
295 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
296 equivalent DWARF register. */
297 return DWARF_FRAME_REGISTERS;
300 /* Return TRUE if MODE is any of the large INT modes. */
301 static bool
302 aarch64_vect_struct_mode_p (enum machine_mode mode)
304 return mode == OImode || mode == CImode || mode == XImode;
307 /* Return TRUE if MODE is any of the vector modes. */
308 static bool
309 aarch64_vector_mode_p (enum machine_mode mode)
311 return aarch64_vector_mode_supported_p (mode)
312 || aarch64_vect_struct_mode_p (mode);
315 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
316 static bool
317 aarch64_array_mode_supported_p (enum machine_mode mode,
318 unsigned HOST_WIDE_INT nelems)
320 if (TARGET_SIMD
321 && AARCH64_VALID_SIMD_QREG_MODE (mode)
322 && (nelems >= 2 && nelems <= 4))
323 return true;
325 return false;
328 /* Implement HARD_REGNO_NREGS. */
331 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
333 switch (aarch64_regno_regclass (regno))
335 case FP_REGS:
336 case FP_LO_REGS:
337 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
338 default:
339 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
341 gcc_unreachable ();
344 /* Implement HARD_REGNO_MODE_OK. */
347 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
349 if (GET_MODE_CLASS (mode) == MODE_CC)
350 return regno == CC_REGNUM;
352 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
353 || regno == ARG_POINTER_REGNUM)
354 return mode == Pmode;
356 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
357 return 1;
359 if (FP_REGNUM_P (regno))
361 if (aarch64_vect_struct_mode_p (mode))
362 return
363 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
364 else
365 return 1;
368 return 0;
371 /* Return true if calls to DECL should be treated as
372 long-calls (ie called via a register). */
373 static bool
374 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
376 return false;
379 /* Return true if calls to symbol-ref SYM should be treated as
380 long-calls (ie called via a register). */
381 bool
382 aarch64_is_long_call_p (rtx sym)
384 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
387 /* Return true if the offsets to a zero/sign-extract operation
388 represent an expression that matches an extend operation. The
389 operands represent the paramters from
391 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
392 bool
393 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
394 rtx extract_imm)
396 HOST_WIDE_INT mult_val, extract_val;
398 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
399 return false;
401 mult_val = INTVAL (mult_imm);
402 extract_val = INTVAL (extract_imm);
404 if (extract_val > 8
405 && extract_val < GET_MODE_BITSIZE (mode)
406 && exact_log2 (extract_val & ~7) > 0
407 && (extract_val & 7) <= 4
408 && mult_val == (1 << (extract_val & 7)))
409 return true;
411 return false;
414 /* Emit an insn that's a simple single-set. Both the operands must be
415 known to be valid. */
416 inline static rtx
417 emit_set_insn (rtx x, rtx y)
419 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
422 /* X and Y are two things to compare using CODE. Emit the compare insn and
423 return the rtx for register 0 in the proper mode. */
425 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
427 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
428 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
430 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
431 return cc_reg;
434 /* Build the SYMBOL_REF for __tls_get_addr. */
436 static GTY(()) rtx tls_get_addr_libfunc;
439 aarch64_tls_get_addr (void)
441 if (!tls_get_addr_libfunc)
442 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
443 return tls_get_addr_libfunc;
446 /* Return the TLS model to use for ADDR. */
448 static enum tls_model
449 tls_symbolic_operand_type (rtx addr)
451 enum tls_model tls_kind = TLS_MODEL_NONE;
452 rtx sym, addend;
454 if (GET_CODE (addr) == CONST)
456 split_const (addr, &sym, &addend);
457 if (GET_CODE (sym) == SYMBOL_REF)
458 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
460 else if (GET_CODE (addr) == SYMBOL_REF)
461 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
463 return tls_kind;
466 /* We'll allow lo_sum's in addresses in our legitimate addresses
467 so that combine would take care of combining addresses where
468 necessary, but for generation purposes, we'll generate the address
469 as :
470 RTL Absolute
471 tmp = hi (symbol_ref); adrp x1, foo
472 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
475 PIC TLS
476 adrp x1, :got:foo adrp tmp, :tlsgd:foo
477 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
478 bl __tls_get_addr
481 Load TLS symbol, depending on TLS mechanism and TLS access model.
483 Global Dynamic - Traditional TLS:
484 adrp tmp, :tlsgd:imm
485 add dest, tmp, #:tlsgd_lo12:imm
486 bl __tls_get_addr
488 Global Dynamic - TLS Descriptors:
489 adrp dest, :tlsdesc:imm
490 ldr tmp, [dest, #:tlsdesc_lo12:imm]
491 add dest, dest, #:tlsdesc_lo12:imm
492 blr tmp
493 mrs tp, tpidr_el0
494 add dest, dest, tp
496 Initial Exec:
497 mrs tp, tpidr_el0
498 adrp tmp, :gottprel:imm
499 ldr dest, [tmp, #:gottprel_lo12:imm]
500 add dest, dest, tp
502 Local Exec:
503 mrs tp, tpidr_el0
504 add t0, tp, #:tprel_hi12:imm
505 add t0, #:tprel_lo12_nc:imm
508 static void
509 aarch64_load_symref_appropriately (rtx dest, rtx imm,
510 enum aarch64_symbol_type type)
512 switch (type)
514 case SYMBOL_SMALL_ABSOLUTE:
516 rtx tmp_reg = dest;
517 if (can_create_pseudo_p ())
519 tmp_reg = gen_reg_rtx (Pmode);
522 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
523 emit_insn (gen_add_losym (dest, tmp_reg, imm));
524 return;
527 case SYMBOL_SMALL_GOT:
529 rtx tmp_reg = dest;
530 if (can_create_pseudo_p ())
532 tmp_reg = gen_reg_rtx (Pmode);
534 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
535 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
536 return;
539 case SYMBOL_SMALL_TLSGD:
541 rtx insns;
542 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
544 start_sequence ();
545 emit_call_insn (gen_tlsgd_small (result, imm));
546 insns = get_insns ();
547 end_sequence ();
549 RTL_CONST_CALL_P (insns) = 1;
550 emit_libcall_block (insns, dest, result, imm);
551 return;
554 case SYMBOL_SMALL_TLSDESC:
556 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
557 rtx tp;
559 emit_insn (gen_tlsdesc_small (imm));
560 tp = aarch64_load_tp (NULL);
561 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
562 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
563 return;
566 case SYMBOL_SMALL_GOTTPREL:
568 rtx tmp_reg = gen_reg_rtx (Pmode);
569 rtx tp = aarch64_load_tp (NULL);
570 emit_insn (gen_tlsie_small (tmp_reg, imm));
571 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
572 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
573 return;
576 case SYMBOL_SMALL_TPREL:
578 rtx tp = aarch64_load_tp (NULL);
579 emit_insn (gen_tlsle_small (dest, tp, imm));
580 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
581 return;
584 default:
585 gcc_unreachable ();
589 /* Emit a move from SRC to DEST. Assume that the move expanders can
590 handle all moves if !can_create_pseudo_p (). The distinction is
591 important because, unlike emit_move_insn, the move expanders know
592 how to force Pmode objects into the constant pool even when the
593 constant pool address is not itself legitimate. */
594 static rtx
595 aarch64_emit_move (rtx dest, rtx src)
597 return (can_create_pseudo_p ()
598 ? emit_move_insn (dest, src)
599 : emit_move_insn_1 (dest, src));
602 void
603 aarch64_split_128bit_move (rtx dst, rtx src)
605 rtx low_dst;
607 gcc_assert (GET_MODE (dst) == TImode);
609 if (REG_P (dst) && REG_P (src))
611 int src_regno = REGNO (src);
612 int dst_regno = REGNO (dst);
614 gcc_assert (GET_MODE (src) == TImode);
616 /* Handle r -> w, w -> r. */
617 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
619 emit_insn (gen_aarch64_movtilow_di (dst,
620 gen_lowpart (word_mode, src)));
621 emit_insn (gen_aarch64_movtihigh_di (dst,
622 gen_highpart (word_mode, src)));
623 return;
625 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
627 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
628 src));
629 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
630 src));
631 return;
633 /* Fall through to r -> r cases. */
636 low_dst = gen_lowpart (word_mode, dst);
637 if (REG_P (low_dst)
638 && reg_overlap_mentioned_p (low_dst, src))
640 aarch64_emit_move (gen_highpart (word_mode, dst),
641 gen_highpart_mode (word_mode, TImode, src));
642 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
644 else
646 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
647 aarch64_emit_move (gen_highpart (word_mode, dst),
648 gen_highpart_mode (word_mode, TImode, src));
652 bool
653 aarch64_split_128bit_move_p (rtx dst, rtx src)
655 return (! REG_P (src)
656 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
659 static rtx
660 aarch64_force_temporary (rtx x, rtx value)
662 if (can_create_pseudo_p ())
663 return force_reg (Pmode, value);
664 else
666 x = aarch64_emit_move (x, value);
667 return x;
672 static rtx
673 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
675 if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
677 rtx high;
678 /* Load the full offset into a register. This
679 might be improvable in the future. */
680 high = GEN_INT (offset);
681 offset = 0;
682 high = aarch64_force_temporary (temp, high);
683 reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
685 return plus_constant (mode, reg, offset);
688 void
689 aarch64_expand_mov_immediate (rtx dest, rtx imm)
691 enum machine_mode mode = GET_MODE (dest);
692 unsigned HOST_WIDE_INT mask;
693 int i;
694 bool first;
695 unsigned HOST_WIDE_INT val;
696 bool subtargets;
697 rtx subtarget;
698 int one_match, zero_match;
700 gcc_assert (mode == SImode || mode == DImode);
702 /* Check on what type of symbol it is. */
703 if (GET_CODE (imm) == SYMBOL_REF
704 || GET_CODE (imm) == LABEL_REF
705 || GET_CODE (imm) == CONST)
707 rtx mem, base, offset;
708 enum aarch64_symbol_type sty;
710 /* If we have (const (plus symbol offset)), separate out the offset
711 before we start classifying the symbol. */
712 split_const (imm, &base, &offset);
714 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
715 switch (sty)
717 case SYMBOL_FORCE_TO_MEM:
718 if (offset != const0_rtx
719 && targetm.cannot_force_const_mem (mode, imm))
721 gcc_assert(can_create_pseudo_p ());
722 base = aarch64_force_temporary (dest, base);
723 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
724 aarch64_emit_move (dest, base);
725 return;
727 mem = force_const_mem (mode, imm);
728 gcc_assert (mem);
729 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
730 return;
732 case SYMBOL_SMALL_TLSGD:
733 case SYMBOL_SMALL_TLSDESC:
734 case SYMBOL_SMALL_GOTTPREL:
735 case SYMBOL_SMALL_GOT:
736 if (offset != const0_rtx)
738 gcc_assert(can_create_pseudo_p ());
739 base = aarch64_force_temporary (dest, base);
740 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
741 aarch64_emit_move (dest, base);
742 return;
744 /* FALLTHRU */
746 case SYMBOL_SMALL_TPREL:
747 case SYMBOL_SMALL_ABSOLUTE:
748 aarch64_load_symref_appropriately (dest, imm, sty);
749 return;
751 default:
752 gcc_unreachable ();
756 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
758 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
759 return;
762 if (!CONST_INT_P (imm))
764 if (GET_CODE (imm) == HIGH)
765 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
766 else
768 rtx mem = force_const_mem (mode, imm);
769 gcc_assert (mem);
770 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
773 return;
776 if (mode == SImode)
778 /* We know we can't do this in 1 insn, and we must be able to do it
779 in two; so don't mess around looking for sequences that don't buy
780 us anything. */
781 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
782 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
783 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
784 return;
787 /* Remaining cases are all for DImode. */
789 val = INTVAL (imm);
790 subtargets = optimize && can_create_pseudo_p ();
792 one_match = 0;
793 zero_match = 0;
794 mask = 0xffff;
796 for (i = 0; i < 64; i += 16, mask <<= 16)
798 if ((val & mask) == 0)
799 zero_match++;
800 else if ((val & mask) == mask)
801 one_match++;
804 if (one_match == 2)
806 mask = 0xffff;
807 for (i = 0; i < 64; i += 16, mask <<= 16)
809 if ((val & mask) != mask)
811 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
812 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
813 GEN_INT ((val >> i) & 0xffff)));
814 return;
817 gcc_unreachable ();
820 if (zero_match == 2)
821 goto simple_sequence;
823 mask = 0x0ffff0000UL;
824 for (i = 16; i < 64; i += 16, mask <<= 16)
826 HOST_WIDE_INT comp = mask & ~(mask - 1);
828 if (aarch64_uimm12_shift (val - (val & mask)))
830 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
832 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
833 emit_insn (gen_adddi3 (dest, subtarget,
834 GEN_INT (val - (val & mask))));
835 return;
837 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
839 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
841 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
842 GEN_INT ((val + comp) & mask)));
843 emit_insn (gen_adddi3 (dest, subtarget,
844 GEN_INT (val - ((val + comp) & mask))));
845 return;
847 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
849 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
851 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
852 GEN_INT ((val - comp) | ~mask)));
853 emit_insn (gen_adddi3 (dest, subtarget,
854 GEN_INT (val - ((val - comp) | ~mask))));
855 return;
857 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
859 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
861 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
862 GEN_INT (val | ~mask)));
863 emit_insn (gen_adddi3 (dest, subtarget,
864 GEN_INT (val - (val | ~mask))));
865 return;
869 /* See if we can do it by arithmetically combining two
870 immediates. */
871 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
873 int j;
874 mask = 0xffff;
876 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
877 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
879 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
880 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
881 GEN_INT (aarch64_bitmasks[i])));
882 emit_insn (gen_adddi3 (dest, subtarget,
883 GEN_INT (val - aarch64_bitmasks[i])));
884 return;
887 for (j = 0; j < 64; j += 16, mask <<= 16)
889 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
891 emit_insn (gen_rtx_SET (VOIDmode, dest,
892 GEN_INT (aarch64_bitmasks[i])));
893 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
894 GEN_INT ((val >> j) & 0xffff)));
895 return;
900 /* See if we can do it by logically combining two immediates. */
901 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
903 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
905 int j;
907 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
908 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
910 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
911 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
912 GEN_INT (aarch64_bitmasks[i])));
913 emit_insn (gen_iordi3 (dest, subtarget,
914 GEN_INT (aarch64_bitmasks[j])));
915 return;
918 else if ((val & aarch64_bitmasks[i]) == val)
920 int j;
922 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
923 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
926 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
927 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
928 GEN_INT (aarch64_bitmasks[j])));
929 emit_insn (gen_anddi3 (dest, subtarget,
930 GEN_INT (aarch64_bitmasks[i])));
931 return;
936 simple_sequence:
937 first = true;
938 mask = 0xffff;
939 for (i = 0; i < 64; i += 16, mask <<= 16)
941 if ((val & mask) != 0)
943 if (first)
945 emit_insn (gen_rtx_SET (VOIDmode, dest,
946 GEN_INT (val & mask)));
947 first = false;
949 else
950 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
951 GEN_INT ((val >> i) & 0xffff)));
956 static bool
957 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
959 /* Indirect calls are not currently supported. */
960 if (decl == NULL)
961 return false;
963 /* Cannot tail-call to long-calls, since these are outside of the
964 range of a branch instruction (we could handle this if we added
965 support for indirect tail-calls. */
966 if (aarch64_decl_is_long_call_p (decl))
967 return false;
969 return true;
972 /* Implement TARGET_PASS_BY_REFERENCE. */
974 static bool
975 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
976 enum machine_mode mode,
977 const_tree type,
978 bool named ATTRIBUTE_UNUSED)
980 HOST_WIDE_INT size;
981 enum machine_mode dummymode;
982 int nregs;
984 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
985 size = (mode == BLKmode && type)
986 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
988 if (type)
990 /* Arrays always passed by reference. */
991 if (TREE_CODE (type) == ARRAY_TYPE)
992 return true;
993 /* Other aggregates based on their size. */
994 if (AGGREGATE_TYPE_P (type))
995 size = int_size_in_bytes (type);
998 /* Variable sized arguments are always returned by reference. */
999 if (size < 0)
1000 return true;
1002 /* Can this be a candidate to be passed in fp/simd register(s)? */
1003 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1004 &dummymode, &nregs,
1005 NULL))
1006 return false;
1008 /* Arguments which are variable sized or larger than 2 registers are
1009 passed by reference unless they are a homogenous floating point
1010 aggregate. */
1011 return size > 2 * UNITS_PER_WORD;
1014 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1015 static bool
1016 aarch64_return_in_msb (const_tree valtype)
1018 enum machine_mode dummy_mode;
1019 int dummy_int;
1021 /* Never happens in little-endian mode. */
1022 if (!BYTES_BIG_ENDIAN)
1023 return false;
1025 /* Only composite types smaller than or equal to 16 bytes can
1026 be potentially returned in registers. */
1027 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1028 || int_size_in_bytes (valtype) <= 0
1029 || int_size_in_bytes (valtype) > 16)
1030 return false;
1032 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1033 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1034 is always passed/returned in the least significant bits of fp/simd
1035 register(s). */
1036 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1037 &dummy_mode, &dummy_int, NULL))
1038 return false;
1040 return true;
1043 /* Implement TARGET_FUNCTION_VALUE.
1044 Define how to find the value returned by a function. */
1046 static rtx
1047 aarch64_function_value (const_tree type, const_tree func,
1048 bool outgoing ATTRIBUTE_UNUSED)
1050 enum machine_mode mode;
1051 int unsignedp;
1052 int count;
1053 enum machine_mode ag_mode;
1055 mode = TYPE_MODE (type);
1056 if (INTEGRAL_TYPE_P (type))
1057 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1059 if (aarch64_return_in_msb (type))
1061 HOST_WIDE_INT size = int_size_in_bytes (type);
1063 if (size % UNITS_PER_WORD != 0)
1065 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1066 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1070 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1071 &ag_mode, &count, NULL))
1073 if (!aarch64_composite_type_p (type, mode))
1075 gcc_assert (count == 1 && mode == ag_mode);
1076 return gen_rtx_REG (mode, V0_REGNUM);
1078 else
1080 int i;
1081 rtx par;
1083 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1084 for (i = 0; i < count; i++)
1086 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1087 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1088 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1089 XVECEXP (par, 0, i) = tmp;
1091 return par;
1094 else
1095 return gen_rtx_REG (mode, R0_REGNUM);
1098 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1099 Return true if REGNO is the number of a hard register in which the values
1100 of called function may come back. */
1102 static bool
1103 aarch64_function_value_regno_p (const unsigned int regno)
1105 /* Maximum of 16 bytes can be returned in the general registers. Examples
1106 of 16-byte return values are: 128-bit integers and 16-byte small
1107 structures (excluding homogeneous floating-point aggregates). */
1108 if (regno == R0_REGNUM || regno == R1_REGNUM)
1109 return true;
1111 /* Up to four fp/simd registers can return a function value, e.g. a
1112 homogeneous floating-point aggregate having four members. */
1113 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1114 return !TARGET_GENERAL_REGS_ONLY;
1116 return false;
1119 /* Implement TARGET_RETURN_IN_MEMORY.
1121 If the type T of the result of a function is such that
1122 void func (T arg)
1123 would require that arg be passed as a value in a register (or set of
1124 registers) according to the parameter passing rules, then the result
1125 is returned in the same registers as would be used for such an
1126 argument. */
1128 static bool
1129 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1131 HOST_WIDE_INT size;
1132 enum machine_mode ag_mode;
1133 int count;
1135 if (!AGGREGATE_TYPE_P (type)
1136 && TREE_CODE (type) != COMPLEX_TYPE
1137 && TREE_CODE (type) != VECTOR_TYPE)
1138 /* Simple scalar types always returned in registers. */
1139 return false;
1141 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1142 type,
1143 &ag_mode,
1144 &count,
1145 NULL))
1146 return false;
1148 /* Types larger than 2 registers returned in memory. */
1149 size = int_size_in_bytes (type);
1150 return (size < 0 || size > 2 * UNITS_PER_WORD);
1153 static bool
1154 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1155 const_tree type, int *nregs)
1157 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1158 return aarch64_vfp_is_call_or_return_candidate (mode,
1159 type,
1160 &pcum->aapcs_vfp_rmode,
1161 nregs,
1162 NULL);
1165 /* Given MODE and TYPE of a function argument, return the alignment in
1166 bits. The idea is to suppress any stronger alignment requested by
1167 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1168 This is a helper function for local use only. */
1170 static unsigned int
1171 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1173 unsigned int alignment;
1175 if (type)
1177 if (!integer_zerop (TYPE_SIZE (type)))
1179 if (TYPE_MODE (type) == mode)
1180 alignment = TYPE_ALIGN (type);
1181 else
1182 alignment = GET_MODE_ALIGNMENT (mode);
1184 else
1185 alignment = 0;
1187 else
1188 alignment = GET_MODE_ALIGNMENT (mode);
1190 return alignment;
1193 /* Layout a function argument according to the AAPCS64 rules. The rule
1194 numbers refer to the rule numbers in the AAPCS64. */
1196 static void
1197 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1198 const_tree type,
1199 bool named ATTRIBUTE_UNUSED)
1201 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1202 int ncrn, nvrn, nregs;
1203 bool allocate_ncrn, allocate_nvrn;
1205 /* We need to do this once per argument. */
1206 if (pcum->aapcs_arg_processed)
1207 return;
1209 pcum->aapcs_arg_processed = true;
1211 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1212 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1213 mode,
1214 type,
1215 &nregs);
1217 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1218 The following code thus handles passing by SIMD/FP registers first. */
1220 nvrn = pcum->aapcs_nvrn;
1222 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1223 and homogenous short-vector aggregates (HVA). */
1224 if (allocate_nvrn)
1226 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1228 pcum->aapcs_nextnvrn = nvrn + nregs;
1229 if (!aarch64_composite_type_p (type, mode))
1231 gcc_assert (nregs == 1);
1232 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1234 else
1236 rtx par;
1237 int i;
1238 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1239 for (i = 0; i < nregs; i++)
1241 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1242 V0_REGNUM + nvrn + i);
1243 tmp = gen_rtx_EXPR_LIST
1244 (VOIDmode, tmp,
1245 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1246 XVECEXP (par, 0, i) = tmp;
1248 pcum->aapcs_reg = par;
1250 return;
1252 else
1254 /* C.3 NSRN is set to 8. */
1255 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1256 goto on_stack;
1260 ncrn = pcum->aapcs_ncrn;
1261 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1262 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1265 /* C6 - C9. though the sign and zero extension semantics are
1266 handled elsewhere. This is the case where the argument fits
1267 entirely general registers. */
1268 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1270 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1272 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1274 /* C.8 if the argument has an alignment of 16 then the NGRN is
1275 rounded up to the next even number. */
1276 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1278 ++ncrn;
1279 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1281 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1282 A reg is still generated for it, but the caller should be smart
1283 enough not to use it. */
1284 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1286 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1288 else
1290 rtx par;
1291 int i;
1293 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1294 for (i = 0; i < nregs; i++)
1296 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1297 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1298 GEN_INT (i * UNITS_PER_WORD));
1299 XVECEXP (par, 0, i) = tmp;
1301 pcum->aapcs_reg = par;
1304 pcum->aapcs_nextncrn = ncrn + nregs;
1305 return;
1308 /* C.11 */
1309 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1311 /* The argument is passed on stack; record the needed number of words for
1312 this argument (we can re-use NREGS) and align the total size if
1313 necessary. */
1314 on_stack:
1315 pcum->aapcs_stack_words = nregs;
1316 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1317 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1318 16 / UNITS_PER_WORD) + 1;
1319 return;
1322 /* Implement TARGET_FUNCTION_ARG. */
1324 static rtx
1325 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1326 const_tree type, bool named)
1328 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1329 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1331 if (mode == VOIDmode)
1332 return NULL_RTX;
1334 aarch64_layout_arg (pcum_v, mode, type, named);
1335 return pcum->aapcs_reg;
1338 void
1339 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1340 const_tree fntype ATTRIBUTE_UNUSED,
1341 rtx libname ATTRIBUTE_UNUSED,
1342 const_tree fndecl ATTRIBUTE_UNUSED,
1343 unsigned n_named ATTRIBUTE_UNUSED)
1345 pcum->aapcs_ncrn = 0;
1346 pcum->aapcs_nvrn = 0;
1347 pcum->aapcs_nextncrn = 0;
1348 pcum->aapcs_nextnvrn = 0;
1349 pcum->pcs_variant = ARM_PCS_AAPCS64;
1350 pcum->aapcs_reg = NULL_RTX;
1351 pcum->aapcs_arg_processed = false;
1352 pcum->aapcs_stack_words = 0;
1353 pcum->aapcs_stack_size = 0;
1355 return;
1358 static void
1359 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1360 enum machine_mode mode,
1361 const_tree type,
1362 bool named)
1364 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1365 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1367 aarch64_layout_arg (pcum_v, mode, type, named);
1368 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1369 != (pcum->aapcs_stack_words != 0));
1370 pcum->aapcs_arg_processed = false;
1371 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1372 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1373 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1374 pcum->aapcs_stack_words = 0;
1375 pcum->aapcs_reg = NULL_RTX;
1379 bool
1380 aarch64_function_arg_regno_p (unsigned regno)
1382 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1383 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1386 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1387 PARM_BOUNDARY bits of alignment, but will be given anything up
1388 to STACK_BOUNDARY bits if the type requires it. This makes sure
1389 that both before and after the layout of each argument, the Next
1390 Stacked Argument Address (NSAA) will have a minimum alignment of
1391 8 bytes. */
1393 static unsigned int
1394 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1396 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1398 if (alignment < PARM_BOUNDARY)
1399 alignment = PARM_BOUNDARY;
1400 if (alignment > STACK_BOUNDARY)
1401 alignment = STACK_BOUNDARY;
1402 return alignment;
1405 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1407 Return true if an argument passed on the stack should be padded upwards,
1408 i.e. if the least-significant byte of the stack slot has useful data.
1410 Small aggregate types are placed in the lowest memory address.
1412 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1414 bool
1415 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1417 /* On little-endian targets, the least significant byte of every stack
1418 argument is passed at the lowest byte address of the stack slot. */
1419 if (!BYTES_BIG_ENDIAN)
1420 return true;
1422 /* Otherwise, integral types and floating point types are padded downward:
1423 the least significant byte of a stack argument is passed at the highest
1424 byte address of the stack slot. */
1425 if (type
1426 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1427 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1428 return false;
1430 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1431 return true;
1434 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1436 It specifies padding for the last (may also be the only)
1437 element of a block move between registers and memory. If
1438 assuming the block is in the memory, padding upward means that
1439 the last element is padded after its highest significant byte,
1440 while in downward padding, the last element is padded at the
1441 its least significant byte side.
1443 Small aggregates and small complex types are always padded
1444 upwards.
1446 We don't need to worry about homogeneous floating-point or
1447 short-vector aggregates; their move is not affected by the
1448 padding direction determined here. Regardless of endianness,
1449 each element of such an aggregate is put in the least
1450 significant bits of a fp/simd register.
1452 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1453 register has useful data, and return the opposite if the most
1454 significant byte does. */
1456 bool
1457 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1458 bool first ATTRIBUTE_UNUSED)
1461 /* Small composite types are always padded upward. */
1462 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1464 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1465 : GET_MODE_SIZE (mode));
1466 if (size < 2 * UNITS_PER_WORD)
1467 return true;
1470 /* Otherwise, use the default padding. */
1471 return !BYTES_BIG_ENDIAN;
1474 static enum machine_mode
1475 aarch64_libgcc_cmp_return_mode (void)
1477 return SImode;
1480 static bool
1481 aarch64_frame_pointer_required (void)
1483 /* If the function contains dynamic stack allocations, we need to
1484 use the frame pointer to access the static parts of the frame. */
1485 if (cfun->calls_alloca)
1486 return true;
1488 /* We may have turned flag_omit_frame_pointer on in order to have this
1489 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1490 and we'll check it here.
1491 If we really did set flag_omit_frame_pointer normally, then we return false
1492 (no frame pointer required) in all cases. */
1494 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1495 return false;
1496 else if (flag_omit_leaf_frame_pointer)
1497 return !crtl->is_leaf;
1498 return true;
1501 /* Mark the registers that need to be saved by the callee and calculate
1502 the size of the callee-saved registers area and frame record (both FP
1503 and LR may be omitted). */
1504 static void
1505 aarch64_layout_frame (void)
1507 HOST_WIDE_INT offset = 0;
1508 int regno;
1510 if (reload_completed && cfun->machine->frame.laid_out)
1511 return;
1513 cfun->machine->frame.fp_lr_offset = 0;
1515 /* First mark all the registers that really need to be saved... */
1516 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1517 cfun->machine->frame.reg_offset[regno] = -1;
1519 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1520 cfun->machine->frame.reg_offset[regno] = -1;
1522 /* ... that includes the eh data registers (if needed)... */
1523 if (crtl->calls_eh_return)
1524 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1525 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1527 /* ... and any callee saved register that dataflow says is live. */
1528 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1529 if (df_regs_ever_live_p (regno)
1530 && !call_used_regs[regno])
1531 cfun->machine->frame.reg_offset[regno] = 0;
1533 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1534 if (df_regs_ever_live_p (regno)
1535 && !call_used_regs[regno])
1536 cfun->machine->frame.reg_offset[regno] = 0;
1538 if (frame_pointer_needed)
1540 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1541 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1542 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1545 /* Now assign stack slots for them. */
1546 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1547 if (cfun->machine->frame.reg_offset[regno] != -1)
1549 cfun->machine->frame.reg_offset[regno] = offset;
1550 offset += UNITS_PER_WORD;
1553 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1554 if (cfun->machine->frame.reg_offset[regno] != -1)
1556 cfun->machine->frame.reg_offset[regno] = offset;
1557 offset += UNITS_PER_WORD;
1560 if (frame_pointer_needed)
1562 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1563 offset += UNITS_PER_WORD;
1564 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1567 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1569 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1570 offset += UNITS_PER_WORD;
1571 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1574 cfun->machine->frame.padding0 =
1575 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1576 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1578 cfun->machine->frame.saved_regs_size = offset;
1579 cfun->machine->frame.laid_out = true;
1582 /* Make the last instruction frame-related and note that it performs
1583 the operation described by FRAME_PATTERN. */
1585 static void
1586 aarch64_set_frame_expr (rtx frame_pattern)
1588 rtx insn;
1590 insn = get_last_insn ();
1591 RTX_FRAME_RELATED_P (insn) = 1;
1592 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1593 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1594 frame_pattern,
1595 REG_NOTES (insn));
1598 static bool
1599 aarch64_register_saved_on_entry (int regno)
1601 return cfun->machine->frame.reg_offset[regno] != -1;
1605 static void
1606 aarch64_save_or_restore_fprs (int start_offset, int increment,
1607 bool restore, rtx base_rtx)
1610 unsigned regno;
1611 unsigned regno2;
1612 rtx insn;
1613 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1616 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1618 if (aarch64_register_saved_on_entry (regno))
1620 rtx mem;
1621 mem = gen_mem_ref (DFmode,
1622 plus_constant (Pmode,
1623 base_rtx,
1624 start_offset));
1626 for (regno2 = regno + 1;
1627 regno2 <= V31_REGNUM
1628 && !aarch64_register_saved_on_entry (regno2);
1629 regno2++)
1631 /* Empty loop. */
1633 if (regno2 <= V31_REGNUM &&
1634 aarch64_register_saved_on_entry (regno2))
1636 rtx mem2;
1637 /* Next highest register to be saved. */
1638 mem2 = gen_mem_ref (DFmode,
1639 plus_constant
1640 (Pmode,
1641 base_rtx,
1642 start_offset + increment));
1643 if (restore == false)
1645 insn = emit_insn
1646 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1647 mem2, gen_rtx_REG (DFmode, regno2)));
1650 else
1652 insn = emit_insn
1653 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1654 gen_rtx_REG (DFmode, regno2), mem2));
1656 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1657 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1660 /* The first part of a frame-related parallel insn
1661 is always assumed to be relevant to the frame
1662 calculations; subsequent parts, are only
1663 frame-related if explicitly marked. */
1664 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1665 1)) = 1;
1666 regno = regno2;
1667 start_offset += increment * 2;
1669 else
1671 if (restore == false)
1672 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1673 else
1675 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1676 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1678 start_offset += increment;
1680 RTX_FRAME_RELATED_P (insn) = 1;
1687 /* offset from the stack pointer of where the saves and
1688 restore's have to happen. */
1689 static void
1690 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1691 bool restore)
1693 rtx insn;
1694 rtx base_rtx = stack_pointer_rtx;
1695 HOST_WIDE_INT start_offset = offset;
1696 HOST_WIDE_INT increment = UNITS_PER_WORD;
1697 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1698 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1699 unsigned regno;
1700 unsigned regno2;
1702 for (regno = R0_REGNUM; regno <= limit; regno++)
1704 if (aarch64_register_saved_on_entry (regno))
1706 rtx mem;
1707 mem = gen_mem_ref (Pmode,
1708 plus_constant (Pmode,
1709 base_rtx,
1710 start_offset));
1712 for (regno2 = regno + 1;
1713 regno2 <= limit
1714 && !aarch64_register_saved_on_entry (regno2);
1715 regno2++)
1717 /* Empty loop. */
1719 if (regno2 <= limit &&
1720 aarch64_register_saved_on_entry (regno2))
1722 rtx mem2;
1723 /* Next highest register to be saved. */
1724 mem2 = gen_mem_ref (Pmode,
1725 plus_constant
1726 (Pmode,
1727 base_rtx,
1728 start_offset + increment));
1729 if (restore == false)
1731 insn = emit_insn
1732 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1733 mem2, gen_rtx_REG (DImode, regno2)));
1736 else
1738 insn = emit_insn
1739 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1740 gen_rtx_REG (DImode, regno2), mem2));
1742 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1743 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1746 /* The first part of a frame-related parallel insn
1747 is always assumed to be relevant to the frame
1748 calculations; subsequent parts, are only
1749 frame-related if explicitly marked. */
1750 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1751 1)) = 1;
1752 regno = regno2;
1753 start_offset += increment * 2;
1755 else
1757 if (restore == false)
1758 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1759 else
1761 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1762 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1764 start_offset += increment;
1766 RTX_FRAME_RELATED_P (insn) = 1;
1770 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1774 /* AArch64 stack frames generated by this compiler look like:
1776 +-------------------------------+
1778 | incoming stack arguments |
1780 +-------------------------------+ <-- arg_pointer_rtx
1782 | callee-allocated save area |
1783 | for register varargs |
1785 +-------------------------------+
1787 | local variables |
1789 +-------------------------------+ <-- frame_pointer_rtx
1791 | callee-saved registers |
1793 +-------------------------------+
1794 | LR' |
1795 +-------------------------------+
1796 | FP' |
1797 P +-------------------------------+ <-- hard_frame_pointer_rtx
1798 | dynamic allocation |
1799 +-------------------------------+
1801 | outgoing stack arguments |
1803 +-------------------------------+ <-- stack_pointer_rtx
1805 Dynamic stack allocations such as alloca insert data at point P.
1806 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1807 hard_frame_pointer_rtx unchanged. */
1809 /* Generate the prologue instructions for entry into a function.
1810 Establish the stack frame by decreasing the stack pointer with a
1811 properly calculated size and, if necessary, create a frame record
1812 filled with the values of LR and previous frame pointer. The
1813 current FP is also set up is it is in use. */
1815 void
1816 aarch64_expand_prologue (void)
1818 /* sub sp, sp, #<frame_size>
1819 stp {fp, lr}, [sp, #<frame_size> - 16]
1820 add fp, sp, #<frame_size> - hardfp_offset
1821 stp {cs_reg}, [fp, #-16] etc.
1823 sub sp, sp, <final_adjustment_if_any>
1825 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1826 HOST_WIDE_INT frame_size, offset;
1827 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1828 rtx insn;
1830 aarch64_layout_frame ();
1831 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1832 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1833 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1834 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1835 + crtl->outgoing_args_size);
1836 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1837 STACK_BOUNDARY / BITS_PER_UNIT);
1839 if (flag_stack_usage_info)
1840 current_function_static_stack_size = frame_size;
1842 fp_offset = (offset
1843 - original_frame_size
1844 - cfun->machine->frame.saved_regs_size);
1846 /* Store pairs and load pairs have a range only -512 to 504. */
1847 if (offset >= 512)
1849 /* When the frame has a large size, an initial decrease is done on
1850 the stack pointer to jump over the callee-allocated save area for
1851 register varargs, the local variable area and/or the callee-saved
1852 register area. This will allow the pre-index write-back
1853 store pair instructions to be used for setting up the stack frame
1854 efficiently. */
1855 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1856 if (offset >= 512)
1857 offset = cfun->machine->frame.saved_regs_size;
1859 frame_size -= (offset + crtl->outgoing_args_size);
1860 fp_offset = 0;
1862 if (frame_size >= 0x1000000)
1864 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1865 emit_move_insn (op0, GEN_INT (-frame_size));
1866 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1867 aarch64_set_frame_expr (gen_rtx_SET
1868 (Pmode, stack_pointer_rtx,
1869 gen_rtx_PLUS (Pmode,
1870 stack_pointer_rtx,
1871 GEN_INT (-frame_size))));
1873 else if (frame_size > 0)
1875 if ((frame_size & 0xfff) != frame_size)
1877 insn = emit_insn (gen_add2_insn
1878 (stack_pointer_rtx,
1879 GEN_INT (-(frame_size
1880 & ~(HOST_WIDE_INT)0xfff))));
1881 RTX_FRAME_RELATED_P (insn) = 1;
1883 if ((frame_size & 0xfff) != 0)
1885 insn = emit_insn (gen_add2_insn
1886 (stack_pointer_rtx,
1887 GEN_INT (-(frame_size
1888 & (HOST_WIDE_INT)0xfff))));
1889 RTX_FRAME_RELATED_P (insn) = 1;
1893 else
1894 frame_size = -1;
1896 if (offset > 0)
1898 /* Save the frame pointer and lr if the frame pointer is needed
1899 first. Make the frame pointer point to the location of the
1900 old frame pointer on the stack. */
1901 if (frame_pointer_needed)
1903 rtx mem_fp, mem_lr;
1905 if (fp_offset)
1907 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1908 GEN_INT (-offset)));
1909 RTX_FRAME_RELATED_P (insn) = 1;
1910 aarch64_set_frame_expr (gen_rtx_SET
1911 (Pmode, stack_pointer_rtx,
1912 gen_rtx_MINUS (Pmode,
1913 stack_pointer_rtx,
1914 GEN_INT (offset))));
1915 mem_fp = gen_frame_mem (DImode,
1916 plus_constant (Pmode,
1917 stack_pointer_rtx,
1918 fp_offset));
1919 mem_lr = gen_frame_mem (DImode,
1920 plus_constant (Pmode,
1921 stack_pointer_rtx,
1922 fp_offset
1923 + UNITS_PER_WORD));
1924 insn = emit_insn (gen_store_pairdi (mem_fp,
1925 hard_frame_pointer_rtx,
1926 mem_lr,
1927 gen_rtx_REG (DImode,
1928 LR_REGNUM)));
1930 else
1932 insn = emit_insn (gen_storewb_pairdi_di
1933 (stack_pointer_rtx, stack_pointer_rtx,
1934 hard_frame_pointer_rtx,
1935 gen_rtx_REG (DImode, LR_REGNUM),
1936 GEN_INT (-offset),
1937 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1938 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1941 /* The first part of a frame-related parallel insn is always
1942 assumed to be relevant to the frame calculations;
1943 subsequent parts, are only frame-related if explicitly
1944 marked. */
1945 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1946 RTX_FRAME_RELATED_P (insn) = 1;
1948 /* Set up frame pointer to point to the location of the
1949 previous frame pointer on the stack. */
1950 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
1951 stack_pointer_rtx,
1952 GEN_INT (fp_offset)));
1953 aarch64_set_frame_expr (gen_rtx_SET
1954 (Pmode, hard_frame_pointer_rtx,
1955 gen_rtx_PLUS (Pmode,
1956 stack_pointer_rtx,
1957 GEN_INT (fp_offset))));
1958 RTX_FRAME_RELATED_P (insn) = 1;
1959 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
1960 hard_frame_pointer_rtx));
1962 else
1964 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1965 GEN_INT (-offset)));
1966 RTX_FRAME_RELATED_P (insn) = 1;
1969 aarch64_save_or_restore_callee_save_registers
1970 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
1973 /* when offset >= 512,
1974 sub sp, sp, #<outgoing_args_size> */
1975 if (frame_size > -1)
1977 if (crtl->outgoing_args_size > 0)
1979 insn = emit_insn (gen_add2_insn
1980 (stack_pointer_rtx,
1981 GEN_INT (- crtl->outgoing_args_size)));
1982 RTX_FRAME_RELATED_P (insn) = 1;
1987 /* Generate the epilogue instructions for returning from a function. */
1988 void
1989 aarch64_expand_epilogue (bool for_sibcall)
1991 HOST_WIDE_INT original_frame_size, frame_size, offset;
1992 HOST_WIDE_INT fp_offset;
1993 rtx insn;
1994 rtx cfa_reg;
1996 aarch64_layout_frame ();
1997 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1998 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1999 + crtl->outgoing_args_size);
2000 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2001 STACK_BOUNDARY / BITS_PER_UNIT);
2003 fp_offset = (offset
2004 - original_frame_size
2005 - cfun->machine->frame.saved_regs_size);
2007 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2009 /* Store pairs and load pairs have a range only -512 to 504. */
2010 if (offset >= 512)
2012 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2013 if (offset >= 512)
2014 offset = cfun->machine->frame.saved_regs_size;
2016 frame_size -= (offset + crtl->outgoing_args_size);
2017 fp_offset = 0;
2018 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2020 insn = emit_insn (gen_add2_insn
2021 (stack_pointer_rtx,
2022 GEN_INT (crtl->outgoing_args_size)));
2023 RTX_FRAME_RELATED_P (insn) = 1;
2026 else
2027 frame_size = -1;
2029 /* If there were outgoing arguments or we've done dynamic stack
2030 allocation, then restore the stack pointer from the frame
2031 pointer. This is at most one insn and more efficient than using
2032 GCC's internal mechanism. */
2033 if (frame_pointer_needed
2034 && (crtl->outgoing_args_size || cfun->calls_alloca))
2036 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2037 hard_frame_pointer_rtx,
2038 GEN_INT (- fp_offset)));
2039 RTX_FRAME_RELATED_P (insn) = 1;
2040 /* As SP is set to (FP - fp_offset), according to the rules in
2041 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2042 from the value of SP from now on. */
2043 cfa_reg = stack_pointer_rtx;
2046 aarch64_save_or_restore_callee_save_registers
2047 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2049 /* Restore the frame pointer and lr if the frame pointer is needed. */
2050 if (offset > 0)
2052 if (frame_pointer_needed)
2054 rtx mem_fp, mem_lr;
2056 if (fp_offset)
2058 mem_fp = gen_frame_mem (DImode,
2059 plus_constant (Pmode,
2060 stack_pointer_rtx,
2061 fp_offset));
2062 mem_lr = gen_frame_mem (DImode,
2063 plus_constant (Pmode,
2064 stack_pointer_rtx,
2065 fp_offset
2066 + UNITS_PER_WORD));
2067 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2068 mem_fp,
2069 gen_rtx_REG (DImode,
2070 LR_REGNUM),
2071 mem_lr));
2073 else
2075 insn = emit_insn (gen_loadwb_pairdi_di
2076 (stack_pointer_rtx,
2077 stack_pointer_rtx,
2078 hard_frame_pointer_rtx,
2079 gen_rtx_REG (DImode, LR_REGNUM),
2080 GEN_INT (offset),
2081 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2082 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2083 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2084 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2085 plus_constant (Pmode, cfa_reg,
2086 offset))));
2089 /* The first part of a frame-related parallel insn
2090 is always assumed to be relevant to the frame
2091 calculations; subsequent parts, are only
2092 frame-related if explicitly marked. */
2093 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2094 RTX_FRAME_RELATED_P (insn) = 1;
2095 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2096 add_reg_note (insn, REG_CFA_RESTORE,
2097 gen_rtx_REG (DImode, LR_REGNUM));
2099 if (fp_offset)
2101 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2102 GEN_INT (offset)));
2103 RTX_FRAME_RELATED_P (insn) = 1;
2106 else
2108 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2109 GEN_INT (offset)));
2110 RTX_FRAME_RELATED_P (insn) = 1;
2114 /* Stack adjustment for exception handler. */
2115 if (crtl->calls_eh_return)
2117 /* We need to unwind the stack by the offset computed by
2118 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2119 based on SP. Ideally we would update the SP and define the
2120 CFA along the lines of:
2122 SP = SP + EH_RETURN_STACKADJ_RTX
2123 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2125 However the dwarf emitter only understands a constant
2126 register offset.
2128 The solution choosen here is to use the otherwise unused IP0
2129 as a temporary register to hold the current SP value. The
2130 CFA is described using IP0 then SP is modified. */
2132 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2134 insn = emit_move_insn (ip0, stack_pointer_rtx);
2135 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2136 RTX_FRAME_RELATED_P (insn) = 1;
2138 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2140 /* Ensure the assignment to IP0 does not get optimized away. */
2141 emit_use (ip0);
2144 if (frame_size > -1)
2146 if (frame_size >= 0x1000000)
2148 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2149 emit_move_insn (op0, GEN_INT (frame_size));
2150 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2151 aarch64_set_frame_expr (gen_rtx_SET
2152 (Pmode, stack_pointer_rtx,
2153 gen_rtx_PLUS (Pmode,
2154 stack_pointer_rtx,
2155 GEN_INT (frame_size))));
2157 else if (frame_size > 0)
2159 if ((frame_size & 0xfff) != 0)
2161 insn = emit_insn (gen_add2_insn
2162 (stack_pointer_rtx,
2163 GEN_INT ((frame_size
2164 & (HOST_WIDE_INT) 0xfff))));
2165 RTX_FRAME_RELATED_P (insn) = 1;
2167 if ((frame_size & 0xfff) != frame_size)
2169 insn = emit_insn (gen_add2_insn
2170 (stack_pointer_rtx,
2171 GEN_INT ((frame_size
2172 & ~ (HOST_WIDE_INT) 0xfff))));
2173 RTX_FRAME_RELATED_P (insn) = 1;
2177 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2178 gen_rtx_PLUS (Pmode,
2179 stack_pointer_rtx,
2180 GEN_INT (offset))));
2183 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2184 if (!for_sibcall)
2185 emit_jump_insn (ret_rtx);
2188 /* Return the place to copy the exception unwinding return address to.
2189 This will probably be a stack slot, but could (in theory be the
2190 return register). */
2192 aarch64_final_eh_return_addr (void)
2194 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2195 aarch64_layout_frame ();
2196 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2197 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2198 + crtl->outgoing_args_size);
2199 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2200 STACK_BOUNDARY / BITS_PER_UNIT);
2201 fp_offset = offset
2202 - original_frame_size
2203 - cfun->machine->frame.saved_regs_size;
2205 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2206 return gen_rtx_REG (DImode, LR_REGNUM);
2208 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2209 result in a store to save LR introduced by builtin_eh_return () being
2210 incorrectly deleted because the alias is not detected.
2211 So in the calculation of the address to copy the exception unwinding
2212 return address to, we note 2 cases.
2213 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2214 we return a SP-relative location since all the addresses are SP-relative
2215 in this case. This prevents the store from being optimized away.
2216 If the fp_offset is not 0, then the addresses will be FP-relative and
2217 therefore we return a FP-relative location. */
2219 if (frame_pointer_needed)
2221 if (fp_offset)
2222 return gen_frame_mem (DImode,
2223 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2224 else
2225 return gen_frame_mem (DImode,
2226 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2229 /* If FP is not needed, we calculate the location of LR, which would be
2230 at the top of the saved registers block. */
2232 return gen_frame_mem (DImode,
2233 plus_constant (Pmode,
2234 stack_pointer_rtx,
2235 fp_offset
2236 + cfun->machine->frame.saved_regs_size
2237 - 2 * UNITS_PER_WORD));
2240 /* Output code to build up a constant in a register. */
2241 static void
2242 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2244 if (aarch64_bitmask_imm (val, DImode))
2245 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2246 else
2248 int i;
2249 int ncount = 0;
2250 int zcount = 0;
2251 HOST_WIDE_INT valp = val >> 16;
2252 HOST_WIDE_INT valm;
2253 HOST_WIDE_INT tval;
2255 for (i = 16; i < 64; i += 16)
2257 valm = (valp & 0xffff);
2259 if (valm != 0)
2260 ++ zcount;
2262 if (valm != 0xffff)
2263 ++ ncount;
2265 valp >>= 16;
2268 /* zcount contains the number of additional MOVK instructions
2269 required if the constant is built up with an initial MOVZ instruction,
2270 while ncount is the number of MOVK instructions required if starting
2271 with a MOVN instruction. Choose the sequence that yields the fewest
2272 number of instructions, preferring MOVZ instructions when they are both
2273 the same. */
2274 if (ncount < zcount)
2276 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2277 GEN_INT ((~val) & 0xffff));
2278 tval = 0xffff;
2280 else
2282 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2283 GEN_INT (val & 0xffff));
2284 tval = 0;
2287 val >>= 16;
2289 for (i = 16; i < 64; i += 16)
2291 if ((val & 0xffff) != tval)
2292 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2293 GEN_INT (i), GEN_INT (val & 0xffff)));
2294 val >>= 16;
2299 static void
2300 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2302 HOST_WIDE_INT mdelta = delta;
2303 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2304 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2306 if (mdelta < 0)
2307 mdelta = -mdelta;
2309 if (mdelta >= 4096 * 4096)
2311 aarch64_build_constant (scratchreg, delta);
2312 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2314 else if (mdelta > 0)
2316 if (mdelta >= 4096)
2318 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2319 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2320 if (delta < 0)
2321 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2322 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2323 else
2324 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2325 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2327 if (mdelta % 4096 != 0)
2329 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2330 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2331 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2336 /* Output code to add DELTA to the first argument, and then jump
2337 to FUNCTION. Used for C++ multiple inheritance. */
2338 static void
2339 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2340 HOST_WIDE_INT delta,
2341 HOST_WIDE_INT vcall_offset,
2342 tree function)
2344 /* The this pointer is always in x0. Note that this differs from
2345 Arm where the this pointer maybe bumped to r1 if r0 is required
2346 to return a pointer to an aggregate. On AArch64 a result value
2347 pointer will be in x8. */
2348 int this_regno = R0_REGNUM;
2349 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2351 reload_completed = 1;
2352 emit_note (NOTE_INSN_PROLOGUE_END);
2354 if (vcall_offset == 0)
2355 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2356 else
2358 gcc_assert ((vcall_offset & 0x7) == 0);
2360 this_rtx = gen_rtx_REG (Pmode, this_regno);
2361 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2362 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2364 addr = this_rtx;
2365 if (delta != 0)
2367 if (delta >= -256 && delta < 256)
2368 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2369 plus_constant (Pmode, this_rtx, delta));
2370 else
2371 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2374 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2376 if (vcall_offset >= -256 && vcall_offset < 32768)
2377 addr = plus_constant (Pmode, temp0, vcall_offset);
2378 else
2380 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2381 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2384 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2385 emit_insn (gen_add2_insn (this_rtx, temp1));
2388 /* Generate a tail call to the target function. */
2389 if (!TREE_USED (function))
2391 assemble_external (function);
2392 TREE_USED (function) = 1;
2394 funexp = XEXP (DECL_RTL (function), 0);
2395 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2396 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2397 SIBLING_CALL_P (insn) = 1;
2399 insn = get_insns ();
2400 shorten_branches (insn);
2401 final_start_function (insn, file, 1);
2402 final (insn, file, 1);
2403 final_end_function ();
2405 /* Stop pretending to be a post-reload pass. */
2406 reload_completed = 0;
2409 static int
2410 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2412 if (GET_CODE (*x) == SYMBOL_REF)
2413 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2415 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2416 TLS offsets, not real symbol references. */
2417 if (GET_CODE (*x) == UNSPEC
2418 && XINT (*x, 1) == UNSPEC_TLS)
2419 return -1;
2421 return 0;
2424 static bool
2425 aarch64_tls_referenced_p (rtx x)
2427 if (!TARGET_HAVE_TLS)
2428 return false;
2430 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2434 static int
2435 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2437 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2438 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2440 if (*imm1 < *imm2)
2441 return -1;
2442 if (*imm1 > *imm2)
2443 return +1;
2444 return 0;
2448 static void
2449 aarch64_build_bitmask_table (void)
2451 unsigned HOST_WIDE_INT mask, imm;
2452 unsigned int log_e, e, s, r;
2453 unsigned int nimms = 0;
2455 for (log_e = 1; log_e <= 6; log_e++)
2457 e = 1 << log_e;
2458 if (e == 64)
2459 mask = ~(HOST_WIDE_INT) 0;
2460 else
2461 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2462 for (s = 1; s < e; s++)
2464 for (r = 0; r < e; r++)
2466 /* set s consecutive bits to 1 (s < 64) */
2467 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2468 /* rotate right by r */
2469 if (r != 0)
2470 imm = ((imm >> r) | (imm << (e - r))) & mask;
2471 /* replicate the constant depending on SIMD size */
2472 switch (log_e) {
2473 case 1: imm |= (imm << 2);
2474 case 2: imm |= (imm << 4);
2475 case 3: imm |= (imm << 8);
2476 case 4: imm |= (imm << 16);
2477 case 5: imm |= (imm << 32);
2478 case 6:
2479 break;
2480 default:
2481 gcc_unreachable ();
2483 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2484 aarch64_bitmasks[nimms++] = imm;
2489 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2490 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2491 aarch64_bitmasks_cmp);
2495 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2496 a left shift of 0 or 12 bits. */
2497 bool
2498 aarch64_uimm12_shift (HOST_WIDE_INT val)
2500 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2501 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2506 /* Return true if val is an immediate that can be loaded into a
2507 register by a MOVZ instruction. */
2508 static bool
2509 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2511 if (GET_MODE_SIZE (mode) > 4)
2513 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2514 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2515 return 1;
2517 else
2519 /* Ignore sign extension. */
2520 val &= (HOST_WIDE_INT) 0xffffffff;
2522 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2523 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2527 /* Return true if val is a valid bitmask immediate. */
2528 bool
2529 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2531 if (GET_MODE_SIZE (mode) < 8)
2533 /* Replicate bit pattern. */
2534 val &= (HOST_WIDE_INT) 0xffffffff;
2535 val |= val << 32;
2537 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2538 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2542 /* Return true if val is an immediate that can be loaded into a
2543 register in a single instruction. */
2544 bool
2545 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2547 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2548 return 1;
2549 return aarch64_bitmask_imm (val, mode);
2552 static bool
2553 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2555 rtx base, offset;
2556 if (GET_CODE (x) == HIGH)
2557 return true;
2559 split_const (x, &base, &offset);
2560 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2561 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2563 return aarch64_tls_referenced_p (x);
2566 /* Return true if register REGNO is a valid index register.
2567 STRICT_P is true if REG_OK_STRICT is in effect. */
2569 bool
2570 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2572 if (!HARD_REGISTER_NUM_P (regno))
2574 if (!strict_p)
2575 return true;
2577 if (!reg_renumber)
2578 return false;
2580 regno = reg_renumber[regno];
2582 return GP_REGNUM_P (regno);
2585 /* Return true if register REGNO is a valid base register for mode MODE.
2586 STRICT_P is true if REG_OK_STRICT is in effect. */
2588 bool
2589 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2591 if (!HARD_REGISTER_NUM_P (regno))
2593 if (!strict_p)
2594 return true;
2596 if (!reg_renumber)
2597 return false;
2599 regno = reg_renumber[regno];
2602 /* The fake registers will be eliminated to either the stack or
2603 hard frame pointer, both of which are usually valid base registers.
2604 Reload deals with the cases where the eliminated form isn't valid. */
2605 return (GP_REGNUM_P (regno)
2606 || regno == SP_REGNUM
2607 || regno == FRAME_POINTER_REGNUM
2608 || regno == ARG_POINTER_REGNUM);
2611 /* Return true if X is a valid base register for mode MODE.
2612 STRICT_P is true if REG_OK_STRICT is in effect. */
2614 static bool
2615 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2617 if (!strict_p && GET_CODE (x) == SUBREG)
2618 x = SUBREG_REG (x);
2620 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2623 /* Return true if address offset is a valid index. If it is, fill in INFO
2624 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2626 static bool
2627 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2628 enum machine_mode mode, bool strict_p)
2630 enum aarch64_address_type type;
2631 rtx index;
2632 int shift;
2634 /* (reg:P) */
2635 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2636 && GET_MODE (x) == Pmode)
2638 type = ADDRESS_REG_REG;
2639 index = x;
2640 shift = 0;
2642 /* (sign_extend:DI (reg:SI)) */
2643 else if ((GET_CODE (x) == SIGN_EXTEND
2644 || GET_CODE (x) == ZERO_EXTEND)
2645 && GET_MODE (x) == DImode
2646 && GET_MODE (XEXP (x, 0)) == SImode)
2648 type = (GET_CODE (x) == SIGN_EXTEND)
2649 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2650 index = XEXP (x, 0);
2651 shift = 0;
2653 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2654 else if (GET_CODE (x) == MULT
2655 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2656 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2657 && GET_MODE (XEXP (x, 0)) == DImode
2658 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2659 && CONST_INT_P (XEXP (x, 1)))
2661 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2662 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2663 index = XEXP (XEXP (x, 0), 0);
2664 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2666 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2667 else if (GET_CODE (x) == ASHIFT
2668 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2669 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2670 && GET_MODE (XEXP (x, 0)) == DImode
2671 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2672 && CONST_INT_P (XEXP (x, 1)))
2674 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2675 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2676 index = XEXP (XEXP (x, 0), 0);
2677 shift = INTVAL (XEXP (x, 1));
2679 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2680 else if ((GET_CODE (x) == SIGN_EXTRACT
2681 || GET_CODE (x) == ZERO_EXTRACT)
2682 && GET_MODE (x) == DImode
2683 && GET_CODE (XEXP (x, 0)) == MULT
2684 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2685 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2687 type = (GET_CODE (x) == SIGN_EXTRACT)
2688 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2689 index = XEXP (XEXP (x, 0), 0);
2690 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2691 if (INTVAL (XEXP (x, 1)) != 32 + shift
2692 || INTVAL (XEXP (x, 2)) != 0)
2693 shift = -1;
2695 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2696 (const_int 0xffffffff<<shift)) */
2697 else if (GET_CODE (x) == AND
2698 && GET_MODE (x) == DImode
2699 && GET_CODE (XEXP (x, 0)) == MULT
2700 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2701 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2702 && CONST_INT_P (XEXP (x, 1)))
2704 type = ADDRESS_REG_UXTW;
2705 index = XEXP (XEXP (x, 0), 0);
2706 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2707 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2708 shift = -1;
2710 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2711 else if ((GET_CODE (x) == SIGN_EXTRACT
2712 || GET_CODE (x) == ZERO_EXTRACT)
2713 && GET_MODE (x) == DImode
2714 && GET_CODE (XEXP (x, 0)) == ASHIFT
2715 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2716 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2718 type = (GET_CODE (x) == SIGN_EXTRACT)
2719 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2720 index = XEXP (XEXP (x, 0), 0);
2721 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2722 if (INTVAL (XEXP (x, 1)) != 32 + shift
2723 || INTVAL (XEXP (x, 2)) != 0)
2724 shift = -1;
2726 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2727 (const_int 0xffffffff<<shift)) */
2728 else if (GET_CODE (x) == AND
2729 && GET_MODE (x) == DImode
2730 && GET_CODE (XEXP (x, 0)) == ASHIFT
2731 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2732 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2733 && CONST_INT_P (XEXP (x, 1)))
2735 type = ADDRESS_REG_UXTW;
2736 index = XEXP (XEXP (x, 0), 0);
2737 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2738 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2739 shift = -1;
2741 /* (mult:P (reg:P) (const_int scale)) */
2742 else if (GET_CODE (x) == MULT
2743 && GET_MODE (x) == Pmode
2744 && GET_MODE (XEXP (x, 0)) == Pmode
2745 && CONST_INT_P (XEXP (x, 1)))
2747 type = ADDRESS_REG_REG;
2748 index = XEXP (x, 0);
2749 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2751 /* (ashift:P (reg:P) (const_int shift)) */
2752 else if (GET_CODE (x) == ASHIFT
2753 && GET_MODE (x) == Pmode
2754 && GET_MODE (XEXP (x, 0)) == Pmode
2755 && CONST_INT_P (XEXP (x, 1)))
2757 type = ADDRESS_REG_REG;
2758 index = XEXP (x, 0);
2759 shift = INTVAL (XEXP (x, 1));
2761 else
2762 return false;
2764 if (GET_CODE (index) == SUBREG)
2765 index = SUBREG_REG (index);
2767 if ((shift == 0 ||
2768 (shift > 0 && shift <= 3
2769 && (1 << shift) == GET_MODE_SIZE (mode)))
2770 && REG_P (index)
2771 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2773 info->type = type;
2774 info->offset = index;
2775 info->shift = shift;
2776 return true;
2779 return false;
2782 static inline bool
2783 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2785 return (offset >= -64 * GET_MODE_SIZE (mode)
2786 && offset < 64 * GET_MODE_SIZE (mode)
2787 && offset % GET_MODE_SIZE (mode) == 0);
2790 static inline bool
2791 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2792 HOST_WIDE_INT offset)
2794 return offset >= -256 && offset < 256;
2797 static inline bool
2798 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2800 return (offset >= 0
2801 && offset < 4096 * GET_MODE_SIZE (mode)
2802 && offset % GET_MODE_SIZE (mode) == 0);
2805 /* Return true if X is a valid address for machine mode MODE. If it is,
2806 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2807 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2809 static bool
2810 aarch64_classify_address (struct aarch64_address_info *info,
2811 rtx x, enum machine_mode mode,
2812 RTX_CODE outer_code, bool strict_p)
2814 enum rtx_code code = GET_CODE (x);
2815 rtx op0, op1;
2816 bool allow_reg_index_p =
2817 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2819 /* Don't support anything other than POST_INC or REG addressing for
2820 AdvSIMD. */
2821 if (aarch64_vector_mode_p (mode)
2822 && (code != POST_INC && code != REG))
2823 return false;
2825 switch (code)
2827 case REG:
2828 case SUBREG:
2829 info->type = ADDRESS_REG_IMM;
2830 info->base = x;
2831 info->offset = const0_rtx;
2832 return aarch64_base_register_rtx_p (x, strict_p);
2834 case PLUS:
2835 op0 = XEXP (x, 0);
2836 op1 = XEXP (x, 1);
2837 if (GET_MODE_SIZE (mode) != 0
2838 && CONST_INT_P (op1)
2839 && aarch64_base_register_rtx_p (op0, strict_p))
2841 HOST_WIDE_INT offset = INTVAL (op1);
2843 info->type = ADDRESS_REG_IMM;
2844 info->base = op0;
2845 info->offset = op1;
2847 /* TImode and TFmode values are allowed in both pairs of X
2848 registers and individual Q registers. The available
2849 address modes are:
2850 X,X: 7-bit signed scaled offset
2851 Q: 9-bit signed offset
2852 We conservatively require an offset representable in either mode.
2854 if (mode == TImode || mode == TFmode)
2855 return (offset_7bit_signed_scaled_p (mode, offset)
2856 && offset_9bit_signed_unscaled_p (mode, offset));
2858 if (outer_code == PARALLEL)
2859 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2860 && offset_7bit_signed_scaled_p (mode, offset));
2861 else
2862 return (offset_9bit_signed_unscaled_p (mode, offset)
2863 || offset_12bit_unsigned_scaled_p (mode, offset));
2866 if (allow_reg_index_p)
2868 /* Look for base + (scaled/extended) index register. */
2869 if (aarch64_base_register_rtx_p (op0, strict_p)
2870 && aarch64_classify_index (info, op1, mode, strict_p))
2872 info->base = op0;
2873 return true;
2875 if (aarch64_base_register_rtx_p (op1, strict_p)
2876 && aarch64_classify_index (info, op0, mode, strict_p))
2878 info->base = op1;
2879 return true;
2883 return false;
2885 case POST_INC:
2886 case POST_DEC:
2887 case PRE_INC:
2888 case PRE_DEC:
2889 info->type = ADDRESS_REG_WB;
2890 info->base = XEXP (x, 0);
2891 info->offset = NULL_RTX;
2892 return aarch64_base_register_rtx_p (info->base, strict_p);
2894 case POST_MODIFY:
2895 case PRE_MODIFY:
2896 info->type = ADDRESS_REG_WB;
2897 info->base = XEXP (x, 0);
2898 if (GET_CODE (XEXP (x, 1)) == PLUS
2899 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2900 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2901 && aarch64_base_register_rtx_p (info->base, strict_p))
2903 HOST_WIDE_INT offset;
2904 info->offset = XEXP (XEXP (x, 1), 1);
2905 offset = INTVAL (info->offset);
2907 /* TImode and TFmode values are allowed in both pairs of X
2908 registers and individual Q registers. The available
2909 address modes are:
2910 X,X: 7-bit signed scaled offset
2911 Q: 9-bit signed offset
2912 We conservatively require an offset representable in either mode.
2914 if (mode == TImode || mode == TFmode)
2915 return (offset_7bit_signed_scaled_p (mode, offset)
2916 && offset_9bit_signed_unscaled_p (mode, offset));
2918 if (outer_code == PARALLEL)
2919 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2920 && offset_7bit_signed_scaled_p (mode, offset));
2921 else
2922 return offset_9bit_signed_unscaled_p (mode, offset);
2924 return false;
2926 case CONST:
2927 case SYMBOL_REF:
2928 case LABEL_REF:
2929 /* load literal: pc-relative constant pool entry. Only supported
2930 for SI mode or larger. */
2931 info->type = ADDRESS_SYMBOLIC;
2932 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
2934 rtx sym, addend;
2936 split_const (x, &sym, &addend);
2937 return (GET_CODE (sym) == LABEL_REF
2938 || (GET_CODE (sym) == SYMBOL_REF
2939 && CONSTANT_POOL_ADDRESS_P (sym)));
2941 return false;
2943 case LO_SUM:
2944 info->type = ADDRESS_LO_SUM;
2945 info->base = XEXP (x, 0);
2946 info->offset = XEXP (x, 1);
2947 if (allow_reg_index_p
2948 && aarch64_base_register_rtx_p (info->base, strict_p))
2950 rtx sym, offs;
2951 split_const (info->offset, &sym, &offs);
2952 if (GET_CODE (sym) == SYMBOL_REF
2953 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
2954 == SYMBOL_SMALL_ABSOLUTE))
2956 /* The symbol and offset must be aligned to the access size. */
2957 unsigned int align;
2958 unsigned int ref_size;
2960 if (CONSTANT_POOL_ADDRESS_P (sym))
2961 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
2962 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
2964 tree exp = SYMBOL_REF_DECL (sym);
2965 align = TYPE_ALIGN (TREE_TYPE (exp));
2966 align = CONSTANT_ALIGNMENT (exp, align);
2968 else if (SYMBOL_REF_DECL (sym))
2969 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
2970 else
2971 align = BITS_PER_UNIT;
2973 ref_size = GET_MODE_SIZE (mode);
2974 if (ref_size == 0)
2975 ref_size = GET_MODE_SIZE (DImode);
2977 return ((INTVAL (offs) & (ref_size - 1)) == 0
2978 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
2981 return false;
2983 default:
2984 return false;
2988 bool
2989 aarch64_symbolic_address_p (rtx x)
2991 rtx offset;
2993 split_const (x, &x, &offset);
2994 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
2997 /* Classify the base of symbolic expression X, given that X appears in
2998 context CONTEXT. */
2999 static enum aarch64_symbol_type
3000 aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
3002 rtx offset;
3003 split_const (x, &x, &offset);
3004 return aarch64_classify_symbol (x, context);
3008 /* Return TRUE if X is a legitimate address for accessing memory in
3009 mode MODE. */
3010 static bool
3011 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3013 struct aarch64_address_info addr;
3015 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3018 /* Return TRUE if X is a legitimate address for accessing memory in
3019 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3020 pair operation. */
3021 bool
3022 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3023 RTX_CODE outer_code, bool strict_p)
3025 struct aarch64_address_info addr;
3027 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3030 /* Return TRUE if rtx X is immediate constant 0.0 */
3031 bool
3032 aarch64_float_const_zero_rtx_p (rtx x)
3034 REAL_VALUE_TYPE r;
3036 if (GET_MODE (x) == VOIDmode)
3037 return false;
3039 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3040 if (REAL_VALUE_MINUS_ZERO (r))
3041 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3042 return REAL_VALUES_EQUAL (r, dconst0);
3045 /* Return the fixed registers used for condition codes. */
3047 static bool
3048 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3050 *p1 = CC_REGNUM;
3051 *p2 = INVALID_REGNUM;
3052 return true;
3055 enum machine_mode
3056 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3058 /* All floating point compares return CCFP if it is an equality
3059 comparison, and CCFPE otherwise. */
3060 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3062 switch (code)
3064 case EQ:
3065 case NE:
3066 case UNORDERED:
3067 case ORDERED:
3068 case UNLT:
3069 case UNLE:
3070 case UNGT:
3071 case UNGE:
3072 case UNEQ:
3073 case LTGT:
3074 return CCFPmode;
3076 case LT:
3077 case LE:
3078 case GT:
3079 case GE:
3080 return CCFPEmode;
3082 default:
3083 gcc_unreachable ();
3087 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3088 && y == const0_rtx
3089 && (code == EQ || code == NE || code == LT || code == GE)
3090 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3091 || GET_CODE (x) == NEG))
3092 return CC_NZmode;
3094 /* A compare with a shifted operand. Because of canonicalization,
3095 the comparison will have to be swapped when we emit the assembly
3096 code. */
3097 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3098 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3099 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3100 || GET_CODE (x) == LSHIFTRT
3101 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3102 return CC_SWPmode;
3104 /* A compare of a mode narrower than SI mode against zero can be done
3105 by extending the value in the comparison. */
3106 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3107 && y == const0_rtx)
3108 /* Only use sign-extension if we really need it. */
3109 return ((code == GT || code == GE || code == LE || code == LT)
3110 ? CC_SESWPmode : CC_ZESWPmode);
3112 /* For everything else, return CCmode. */
3113 return CCmode;
3116 static unsigned
3117 aarch64_get_condition_code (rtx x)
3119 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3120 enum rtx_code comp_code = GET_CODE (x);
3122 if (GET_MODE_CLASS (mode) != MODE_CC)
3123 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3125 switch (mode)
3127 case CCFPmode:
3128 case CCFPEmode:
3129 switch (comp_code)
3131 case GE: return AARCH64_GE;
3132 case GT: return AARCH64_GT;
3133 case LE: return AARCH64_LS;
3134 case LT: return AARCH64_MI;
3135 case NE: return AARCH64_NE;
3136 case EQ: return AARCH64_EQ;
3137 case ORDERED: return AARCH64_VC;
3138 case UNORDERED: return AARCH64_VS;
3139 case UNLT: return AARCH64_LT;
3140 case UNLE: return AARCH64_LE;
3141 case UNGT: return AARCH64_HI;
3142 case UNGE: return AARCH64_PL;
3143 default: gcc_unreachable ();
3145 break;
3147 case CCmode:
3148 switch (comp_code)
3150 case NE: return AARCH64_NE;
3151 case EQ: return AARCH64_EQ;
3152 case GE: return AARCH64_GE;
3153 case GT: return AARCH64_GT;
3154 case LE: return AARCH64_LE;
3155 case LT: return AARCH64_LT;
3156 case GEU: return AARCH64_CS;
3157 case GTU: return AARCH64_HI;
3158 case LEU: return AARCH64_LS;
3159 case LTU: return AARCH64_CC;
3160 default: gcc_unreachable ();
3162 break;
3164 case CC_SWPmode:
3165 case CC_ZESWPmode:
3166 case CC_SESWPmode:
3167 switch (comp_code)
3169 case NE: return AARCH64_NE;
3170 case EQ: return AARCH64_EQ;
3171 case GE: return AARCH64_LE;
3172 case GT: return AARCH64_LT;
3173 case LE: return AARCH64_GE;
3174 case LT: return AARCH64_GT;
3175 case GEU: return AARCH64_LS;
3176 case GTU: return AARCH64_CC;
3177 case LEU: return AARCH64_CS;
3178 case LTU: return AARCH64_HI;
3179 default: gcc_unreachable ();
3181 break;
3183 case CC_NZmode:
3184 switch (comp_code)
3186 case NE: return AARCH64_NE;
3187 case EQ: return AARCH64_EQ;
3188 case GE: return AARCH64_PL;
3189 case LT: return AARCH64_MI;
3190 default: gcc_unreachable ();
3192 break;
3194 default:
3195 gcc_unreachable ();
3196 break;
3200 static unsigned
3201 bit_count (unsigned HOST_WIDE_INT value)
3203 unsigned count = 0;
3205 while (value)
3207 count++;
3208 value &= value - 1;
3211 return count;
3214 void
3215 aarch64_print_operand (FILE *f, rtx x, char code)
3217 switch (code)
3219 case 'e':
3220 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3222 int n;
3224 if (GET_CODE (x) != CONST_INT
3225 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3227 output_operand_lossage ("invalid operand for '%%%c'", code);
3228 return;
3231 switch (n)
3233 case 3:
3234 fputc ('b', f);
3235 break;
3236 case 4:
3237 fputc ('h', f);
3238 break;
3239 case 5:
3240 fputc ('w', f);
3241 break;
3242 default:
3243 output_operand_lossage ("invalid operand for '%%%c'", code);
3244 return;
3247 break;
3249 case 'p':
3251 int n;
3253 /* Print N such that 2^N == X. */
3254 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3256 output_operand_lossage ("invalid operand for '%%%c'", code);
3257 return;
3260 asm_fprintf (f, "%d", n);
3262 break;
3264 case 'P':
3265 /* Print the number of non-zero bits in X (a const_int). */
3266 if (GET_CODE (x) != CONST_INT)
3268 output_operand_lossage ("invalid operand for '%%%c'", code);
3269 return;
3272 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3273 break;
3275 case 'H':
3276 /* Print the higher numbered register of a pair (TImode) of regs. */
3277 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3279 output_operand_lossage ("invalid operand for '%%%c'", code);
3280 return;
3283 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3284 break;
3286 case 'Q':
3287 /* Print the least significant register of a pair (TImode) of regs. */
3288 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3290 output_operand_lossage ("invalid operand for '%%%c'", code);
3291 return;
3293 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
3294 break;
3296 case 'R':
3297 /* Print the most significant register of a pair (TImode) of regs. */
3298 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3300 output_operand_lossage ("invalid operand for '%%%c'", code);
3301 return;
3303 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
3304 break;
3306 case 'm':
3307 /* Print a condition (eq, ne, etc). */
3309 /* CONST_TRUE_RTX means always -- that's the default. */
3310 if (x == const_true_rtx)
3311 return;
3313 if (!COMPARISON_P (x))
3315 output_operand_lossage ("invalid operand for '%%%c'", code);
3316 return;
3319 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3320 break;
3322 case 'M':
3323 /* Print the inverse of a condition (eq <-> ne, etc). */
3325 /* CONST_TRUE_RTX means never -- that's the default. */
3326 if (x == const_true_rtx)
3328 fputs ("nv", f);
3329 return;
3332 if (!COMPARISON_P (x))
3334 output_operand_lossage ("invalid operand for '%%%c'", code);
3335 return;
3338 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3339 (aarch64_get_condition_code (x))], f);
3340 break;
3342 case 'b':
3343 case 'h':
3344 case 's':
3345 case 'd':
3346 case 'q':
3347 /* Print a scalar FP/SIMD register name. */
3348 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3350 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3351 return;
3353 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3354 break;
3356 case 'S':
3357 case 'T':
3358 case 'U':
3359 case 'V':
3360 /* Print the first FP/SIMD register name in a list. */
3361 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3363 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3364 return;
3366 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3367 break;
3369 case 'X':
3370 /* Print integer constant in hex. */
3371 if (GET_CODE (x) != CONST_INT)
3373 output_operand_lossage ("invalid operand for '%%%c'", code);
3374 return;
3376 asm_fprintf (f, "0x%wx", UINTVAL (x));
3377 break;
3379 case 'w':
3380 case 'x':
3381 /* Print a general register name or the zero register (32-bit or
3382 64-bit). */
3383 if (x == const0_rtx
3384 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3386 asm_fprintf (f, "%czr", code);
3387 break;
3390 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3392 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3393 break;
3396 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3398 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3399 break;
3402 /* Fall through */
3404 case 0:
3405 /* Print a normal operand, if it's a general register, then we
3406 assume DImode. */
3407 if (x == NULL)
3409 output_operand_lossage ("missing operand");
3410 return;
3413 switch (GET_CODE (x))
3415 case REG:
3416 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3417 break;
3419 case MEM:
3420 aarch64_memory_reference_mode = GET_MODE (x);
3421 output_address (XEXP (x, 0));
3422 break;
3424 case LABEL_REF:
3425 case SYMBOL_REF:
3426 output_addr_const (asm_out_file, x);
3427 break;
3429 case CONST_INT:
3430 asm_fprintf (f, "%wd", INTVAL (x));
3431 break;
3433 case CONST_VECTOR:
3434 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3436 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3437 HOST_WIDE_INT_MIN,
3438 HOST_WIDE_INT_MAX));
3439 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3441 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3443 fputc ('0', f);
3445 else
3446 gcc_unreachable ();
3447 break;
3449 case CONST_DOUBLE:
3450 /* CONST_DOUBLE can represent a double-width integer.
3451 In this case, the mode of x is VOIDmode. */
3452 if (GET_MODE (x) == VOIDmode)
3453 ; /* Do Nothing. */
3454 else if (aarch64_float_const_zero_rtx_p (x))
3456 fputc ('0', f);
3457 break;
3459 else if (aarch64_float_const_representable_p (x))
3461 #define buf_size 20
3462 char float_buf[buf_size] = {'\0'};
3463 REAL_VALUE_TYPE r;
3464 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3465 real_to_decimal_for_mode (float_buf, &r,
3466 buf_size, buf_size,
3467 1, GET_MODE (x));
3468 asm_fprintf (asm_out_file, "%s", float_buf);
3469 break;
3470 #undef buf_size
3472 output_operand_lossage ("invalid constant");
3473 return;
3474 default:
3475 output_operand_lossage ("invalid operand");
3476 return;
3478 break;
3480 case 'A':
3481 if (GET_CODE (x) == HIGH)
3482 x = XEXP (x, 0);
3484 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3486 case SYMBOL_SMALL_GOT:
3487 asm_fprintf (asm_out_file, ":got:");
3488 break;
3490 case SYMBOL_SMALL_TLSGD:
3491 asm_fprintf (asm_out_file, ":tlsgd:");
3492 break;
3494 case SYMBOL_SMALL_TLSDESC:
3495 asm_fprintf (asm_out_file, ":tlsdesc:");
3496 break;
3498 case SYMBOL_SMALL_GOTTPREL:
3499 asm_fprintf (asm_out_file, ":gottprel:");
3500 break;
3502 case SYMBOL_SMALL_TPREL:
3503 asm_fprintf (asm_out_file, ":tprel:");
3504 break;
3506 default:
3507 break;
3509 output_addr_const (asm_out_file, x);
3510 break;
3512 case 'L':
3513 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3515 case SYMBOL_SMALL_GOT:
3516 asm_fprintf (asm_out_file, ":lo12:");
3517 break;
3519 case SYMBOL_SMALL_TLSGD:
3520 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3521 break;
3523 case SYMBOL_SMALL_TLSDESC:
3524 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3525 break;
3527 case SYMBOL_SMALL_GOTTPREL:
3528 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3529 break;
3531 case SYMBOL_SMALL_TPREL:
3532 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3533 break;
3535 default:
3536 break;
3538 output_addr_const (asm_out_file, x);
3539 break;
3541 case 'G':
3543 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3545 case SYMBOL_SMALL_TPREL:
3546 asm_fprintf (asm_out_file, ":tprel_hi12:");
3547 break;
3548 default:
3549 break;
3551 output_addr_const (asm_out_file, x);
3552 break;
3554 default:
3555 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3556 return;
3560 void
3561 aarch64_print_operand_address (FILE *f, rtx x)
3563 struct aarch64_address_info addr;
3565 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3566 MEM, true))
3567 switch (addr.type)
3569 case ADDRESS_REG_IMM:
3570 if (addr.offset == const0_rtx)
3571 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3572 else
3573 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3574 INTVAL (addr.offset));
3575 return;
3577 case ADDRESS_REG_REG:
3578 if (addr.shift == 0)
3579 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3580 reg_names [REGNO (addr.offset)]);
3581 else
3582 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3583 reg_names [REGNO (addr.offset)], addr.shift);
3584 return;
3586 case ADDRESS_REG_UXTW:
3587 if (addr.shift == 0)
3588 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3589 REGNO (addr.offset) - R0_REGNUM);
3590 else
3591 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3592 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3593 return;
3595 case ADDRESS_REG_SXTW:
3596 if (addr.shift == 0)
3597 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3598 REGNO (addr.offset) - R0_REGNUM);
3599 else
3600 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3601 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3602 return;
3604 case ADDRESS_REG_WB:
3605 switch (GET_CODE (x))
3607 case PRE_INC:
3608 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3609 GET_MODE_SIZE (aarch64_memory_reference_mode));
3610 return;
3611 case POST_INC:
3612 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3613 GET_MODE_SIZE (aarch64_memory_reference_mode));
3614 return;
3615 case PRE_DEC:
3616 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3617 GET_MODE_SIZE (aarch64_memory_reference_mode));
3618 return;
3619 case POST_DEC:
3620 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3621 GET_MODE_SIZE (aarch64_memory_reference_mode));
3622 return;
3623 case PRE_MODIFY:
3624 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3625 INTVAL (addr.offset));
3626 return;
3627 case POST_MODIFY:
3628 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3629 INTVAL (addr.offset));
3630 return;
3631 default:
3632 break;
3634 break;
3636 case ADDRESS_LO_SUM:
3637 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3638 output_addr_const (f, addr.offset);
3639 asm_fprintf (f, "]");
3640 return;
3642 case ADDRESS_SYMBOLIC:
3643 break;
3646 output_addr_const (f, x);
3649 void
3650 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3651 int labelno ATTRIBUTE_UNUSED)
3653 sorry ("function profiling");
3656 bool
3657 aarch64_label_mentioned_p (rtx x)
3659 const char *fmt;
3660 int i;
3662 if (GET_CODE (x) == LABEL_REF)
3663 return true;
3665 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3666 referencing instruction, but they are constant offsets, not
3667 symbols. */
3668 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3669 return false;
3671 fmt = GET_RTX_FORMAT (GET_CODE (x));
3672 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3674 if (fmt[i] == 'E')
3676 int j;
3678 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3679 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3680 return 1;
3682 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3683 return 1;
3686 return 0;
3689 /* Implement REGNO_REG_CLASS. */
3691 enum reg_class
3692 aarch64_regno_regclass (unsigned regno)
3694 if (GP_REGNUM_P (regno))
3695 return CORE_REGS;
3697 if (regno == SP_REGNUM)
3698 return STACK_REG;
3700 if (regno == FRAME_POINTER_REGNUM
3701 || regno == ARG_POINTER_REGNUM)
3702 return CORE_REGS;
3704 if (FP_REGNUM_P (regno))
3705 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3707 return NO_REGS;
3710 /* Try a machine-dependent way of reloading an illegitimate address
3711 operand. If we find one, push the reload and return the new rtx. */
3714 aarch64_legitimize_reload_address (rtx *x_p,
3715 enum machine_mode mode,
3716 int opnum, int type,
3717 int ind_levels ATTRIBUTE_UNUSED)
3719 rtx x = *x_p;
3721 /* Do not allow mem (plus (reg, const)) if vector mode. */
3722 if (aarch64_vector_mode_p (mode)
3723 && GET_CODE (x) == PLUS
3724 && REG_P (XEXP (x, 0))
3725 && CONST_INT_P (XEXP (x, 1)))
3727 rtx orig_rtx = x;
3728 x = copy_rtx (x);
3729 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3730 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3731 opnum, (enum reload_type) type);
3732 return x;
3735 /* We must recognize output that we have already generated ourselves. */
3736 if (GET_CODE (x) == PLUS
3737 && GET_CODE (XEXP (x, 0)) == PLUS
3738 && REG_P (XEXP (XEXP (x, 0), 0))
3739 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3740 && CONST_INT_P (XEXP (x, 1)))
3742 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3743 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3744 opnum, (enum reload_type) type);
3745 return x;
3748 /* We wish to handle large displacements off a base register by splitting
3749 the addend across an add and the mem insn. This can cut the number of
3750 extra insns needed from 3 to 1. It is only useful for load/store of a
3751 single register with 12 bit offset field. */
3752 if (GET_CODE (x) == PLUS
3753 && REG_P (XEXP (x, 0))
3754 && CONST_INT_P (XEXP (x, 1))
3755 && HARD_REGISTER_P (XEXP (x, 0))
3756 && mode != TImode
3757 && mode != TFmode
3758 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3760 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3761 HOST_WIDE_INT low = val & 0xfff;
3762 HOST_WIDE_INT high = val - low;
3763 HOST_WIDE_INT offs;
3764 rtx cst;
3766 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3767 BLKmode alignment. */
3768 if (GET_MODE_SIZE (mode) == 0)
3769 return NULL_RTX;
3771 offs = low % GET_MODE_SIZE (mode);
3773 /* Align misaligned offset by adjusting high part to compensate. */
3774 if (offs != 0)
3776 if (aarch64_uimm12_shift (high + offs))
3778 /* Align down. */
3779 low = low - offs;
3780 high = high + offs;
3782 else
3784 /* Align up. */
3785 offs = GET_MODE_SIZE (mode) - offs;
3786 low = low + offs;
3787 high = high + (low & 0x1000) - offs;
3788 low &= 0xfff;
3792 /* Check for overflow. */
3793 if (high + low != val)
3794 return NULL_RTX;
3796 cst = GEN_INT (high);
3797 if (!aarch64_uimm12_shift (high))
3798 cst = force_const_mem (Pmode, cst);
3800 /* Reload high part into base reg, leaving the low part
3801 in the mem instruction. */
3802 x = gen_rtx_PLUS (Pmode,
3803 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3804 GEN_INT (low));
3806 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3807 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3808 opnum, (enum reload_type) type);
3809 return x;
3812 return NULL_RTX;
3816 static reg_class_t
3817 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3818 reg_class_t rclass,
3819 enum machine_mode mode,
3820 secondary_reload_info *sri)
3822 /* Address expressions of the form PLUS (SP, large_offset) need two
3823 scratch registers, one for the constant, and one for holding a
3824 copy of SP, since SP cannot be used on the RHS of an add-reg
3825 instruction. */
3826 if (mode == DImode
3827 && GET_CODE (x) == PLUS
3828 && XEXP (x, 0) == stack_pointer_rtx
3829 && CONST_INT_P (XEXP (x, 1))
3830 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3832 sri->icode = CODE_FOR_reload_sp_immediate;
3833 return NO_REGS;
3836 /* Without the TARGET_SIMD instructions we cannot move a Q register
3837 to a Q register directly. We need a scratch. */
3838 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3839 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3840 && reg_class_subset_p (rclass, FP_REGS))
3842 if (mode == TFmode)
3843 sri->icode = CODE_FOR_aarch64_reload_movtf;
3844 else if (mode == TImode)
3845 sri->icode = CODE_FOR_aarch64_reload_movti;
3846 return NO_REGS;
3849 /* A TFmode or TImode memory access should be handled via an FP_REGS
3850 because AArch64 has richer addressing modes for LDR/STR instructions
3851 than LDP/STP instructions. */
3852 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3853 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3854 return FP_REGS;
3856 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3857 return CORE_REGS;
3859 return NO_REGS;
3862 static bool
3863 aarch64_can_eliminate (const int from, const int to)
3865 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3866 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3868 if (frame_pointer_needed)
3870 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3871 return true;
3872 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3873 return false;
3874 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3875 && !cfun->calls_alloca)
3876 return true;
3877 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3878 return true;
3879 return false;
3881 else
3883 /* If we decided that we didn't need a leaf frame pointer but then used
3884 LR in the function, then we'll want a frame pointer after all, so
3885 prevent this elimination to ensure a frame pointer is used.
3887 NOTE: the original value of flag_omit_frame_pointer gets trashed
3888 IFF flag_omit_leaf_frame_pointer is true, so we check the value
3889 of faked_omit_frame_pointer here (which is true when we always
3890 wish to keep non-leaf frame pointers but only wish to keep leaf frame
3891 pointers when LR is clobbered). */
3892 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3893 && df_regs_ever_live_p (LR_REGNUM)
3894 && faked_omit_frame_pointer)
3895 return false;
3898 return true;
3901 HOST_WIDE_INT
3902 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3904 HOST_WIDE_INT frame_size;
3905 HOST_WIDE_INT offset;
3907 aarch64_layout_frame ();
3908 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3909 + crtl->outgoing_args_size
3910 + cfun->machine->saved_varargs_size);
3912 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3913 offset = frame_size;
3915 if (to == HARD_FRAME_POINTER_REGNUM)
3917 if (from == ARG_POINTER_REGNUM)
3918 return offset - crtl->outgoing_args_size;
3920 if (from == FRAME_POINTER_REGNUM)
3921 return cfun->machine->frame.saved_regs_size;
3924 if (to == STACK_POINTER_REGNUM)
3926 if (from == FRAME_POINTER_REGNUM)
3928 HOST_WIDE_INT elim = crtl->outgoing_args_size
3929 + cfun->machine->frame.saved_regs_size
3930 - cfun->machine->frame.fp_lr_offset;
3931 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3932 return elim;
3936 return offset;
3940 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
3941 previous frame. */
3944 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3946 if (count != 0)
3947 return const0_rtx;
3948 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
3952 static void
3953 aarch64_asm_trampoline_template (FILE *f)
3955 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
3956 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
3957 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
3958 assemble_aligned_integer (4, const0_rtx);
3959 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3960 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3963 unsigned
3964 aarch64_trampoline_size (void)
3966 return 32; /* 3 insns + padding + 2 dwords. */
3969 static void
3970 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3972 rtx fnaddr, mem, a_tramp;
3974 /* Don't need to copy the trailing D-words, we fill those in below. */
3975 emit_block_move (m_tramp, assemble_trampoline_template (),
3976 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
3977 mem = adjust_address (m_tramp, DImode, 16);
3978 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3979 emit_move_insn (mem, fnaddr);
3981 mem = adjust_address (m_tramp, DImode, 24);
3982 emit_move_insn (mem, chain_value);
3984 /* XXX We should really define a "clear_cache" pattern and use
3985 gen_clear_cache(). */
3986 a_tramp = XEXP (m_tramp, 0);
3987 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3988 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3989 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3992 static unsigned char
3993 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
3995 switch (regclass)
3997 case CORE_REGS:
3998 case POINTER_REGS:
3999 case GENERAL_REGS:
4000 case ALL_REGS:
4001 case FP_REGS:
4002 case FP_LO_REGS:
4003 return
4004 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4005 (GET_MODE_SIZE (mode) + 7) / 8;
4006 case STACK_REG:
4007 return 1;
4009 case NO_REGS:
4010 return 0;
4012 default:
4013 break;
4015 gcc_unreachable ();
4018 static reg_class_t
4019 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4021 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4022 ? GENERAL_REGS : regclass);
4025 void
4026 aarch64_asm_output_labelref (FILE* f, const char *name)
4028 asm_fprintf (f, "%U%s", name);
4031 static void
4032 aarch64_elf_asm_constructor (rtx symbol, int priority)
4034 if (priority == DEFAULT_INIT_PRIORITY)
4035 default_ctor_section_asm_out_constructor (symbol, priority);
4036 else
4038 section *s;
4039 char buf[18];
4040 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4041 s = get_section (buf, SECTION_WRITE, NULL);
4042 switch_to_section (s);
4043 assemble_align (POINTER_SIZE);
4044 fputs ("\t.dword\t", asm_out_file);
4045 output_addr_const (asm_out_file, symbol);
4046 fputc ('\n', asm_out_file);
4050 static void
4051 aarch64_elf_asm_destructor (rtx symbol, int priority)
4053 if (priority == DEFAULT_INIT_PRIORITY)
4054 default_dtor_section_asm_out_destructor (symbol, priority);
4055 else
4057 section *s;
4058 char buf[18];
4059 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4060 s = get_section (buf, SECTION_WRITE, NULL);
4061 switch_to_section (s);
4062 assemble_align (POINTER_SIZE);
4063 fputs ("\t.dword\t", asm_out_file);
4064 output_addr_const (asm_out_file, symbol);
4065 fputc ('\n', asm_out_file);
4069 const char*
4070 aarch64_output_casesi (rtx *operands)
4072 char buf[100];
4073 char label[100];
4074 rtx diff_vec = PATTERN (next_active_insn (operands[2]));
4075 int index;
4076 static const char *const patterns[4][2] =
4079 "ldrb\t%w3, [%0,%w1,uxtw]",
4080 "add\t%3, %4, %w3, sxtb #2"
4083 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4084 "add\t%3, %4, %w3, sxth #2"
4087 "ldr\t%w3, [%0,%w1,uxtw #2]",
4088 "add\t%3, %4, %w3, sxtw #2"
4090 /* We assume that DImode is only generated when not optimizing and
4091 that we don't really need 64-bit address offsets. That would
4092 imply an object file with 8GB of code in a single function! */
4094 "ldr\t%w3, [%0,%w1,uxtw #2]",
4095 "add\t%3, %4, %w3, sxtw #2"
4099 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4101 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4103 gcc_assert (index >= 0 && index <= 3);
4105 /* Need to implement table size reduction, by chaning the code below. */
4106 output_asm_insn (patterns[index][0], operands);
4107 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4108 snprintf (buf, sizeof (buf),
4109 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4110 output_asm_insn (buf, operands);
4111 output_asm_insn (patterns[index][1], operands);
4112 output_asm_insn ("br\t%3", operands);
4113 assemble_label (asm_out_file, label);
4114 return "";
4118 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4119 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4120 operator. */
4123 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4125 if (shift >= 0 && shift <= 3)
4127 int size;
4128 for (size = 8; size <= 32; size *= 2)
4130 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4131 if (mask == bits << shift)
4132 return size;
4135 return 0;
4138 static bool
4139 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4140 const_rtx x ATTRIBUTE_UNUSED)
4142 /* We can't use blocks for constants when we're using a per-function
4143 constant pool. */
4144 return false;
4147 static section *
4148 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4149 rtx x ATTRIBUTE_UNUSED,
4150 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4152 /* Force all constant pool entries into the current function section. */
4153 return function_section (current_function_decl);
4157 /* Costs. */
4159 /* Helper function for rtx cost calculation. Strip a shift expression
4160 from X. Returns the inner operand if successful, or the original
4161 expression on failure. */
4162 static rtx
4163 aarch64_strip_shift (rtx x)
4165 rtx op = x;
4167 if ((GET_CODE (op) == ASHIFT
4168 || GET_CODE (op) == ASHIFTRT
4169 || GET_CODE (op) == LSHIFTRT)
4170 && CONST_INT_P (XEXP (op, 1)))
4171 return XEXP (op, 0);
4173 if (GET_CODE (op) == MULT
4174 && CONST_INT_P (XEXP (op, 1))
4175 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4176 return XEXP (op, 0);
4178 return x;
4181 /* Helper function for rtx cost calculation. Strip a shift or extend
4182 expression from X. Returns the inner operand if successful, or the
4183 original expression on failure. We deal with a number of possible
4184 canonicalization variations here. */
4185 static rtx
4186 aarch64_strip_shift_or_extend (rtx x)
4188 rtx op = x;
4190 /* Zero and sign extraction of a widened value. */
4191 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4192 && XEXP (op, 2) == const0_rtx
4193 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4194 XEXP (op, 1)))
4195 return XEXP (XEXP (op, 0), 0);
4197 /* It can also be represented (for zero-extend) as an AND with an
4198 immediate. */
4199 if (GET_CODE (op) == AND
4200 && GET_CODE (XEXP (op, 0)) == MULT
4201 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4202 && CONST_INT_P (XEXP (op, 1))
4203 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4204 INTVAL (XEXP (op, 1))) != 0)
4205 return XEXP (XEXP (op, 0), 0);
4207 /* Now handle extended register, as this may also have an optional
4208 left shift by 1..4. */
4209 if (GET_CODE (op) == ASHIFT
4210 && CONST_INT_P (XEXP (op, 1))
4211 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4212 op = XEXP (op, 0);
4214 if (GET_CODE (op) == ZERO_EXTEND
4215 || GET_CODE (op) == SIGN_EXTEND)
4216 op = XEXP (op, 0);
4218 if (op != x)
4219 return op;
4221 return aarch64_strip_shift (x);
4224 /* Calculate the cost of calculating X, storing it in *COST. Result
4225 is true if the total cost of the operation has now been calculated. */
4226 static bool
4227 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4228 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4230 rtx op0, op1;
4231 const struct cpu_rtx_cost_table *extra_cost
4232 = aarch64_tune_params->insn_extra_cost;
4234 switch (code)
4236 case SET:
4237 op0 = SET_DEST (x);
4238 op1 = SET_SRC (x);
4240 switch (GET_CODE (op0))
4242 case MEM:
4243 if (speed)
4244 *cost += extra_cost->memory_store;
4246 if (op1 != const0_rtx)
4247 *cost += rtx_cost (op1, SET, 1, speed);
4248 return true;
4250 case SUBREG:
4251 if (! REG_P (SUBREG_REG (op0)))
4252 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4253 /* Fall through. */
4254 case REG:
4255 /* Cost is just the cost of the RHS of the set. */
4256 *cost += rtx_cost (op1, SET, 1, true);
4257 return true;
4259 case ZERO_EXTRACT: /* Bit-field insertion. */
4260 case SIGN_EXTRACT:
4261 /* Strip any redundant widening of the RHS to meet the width of
4262 the target. */
4263 if (GET_CODE (op1) == SUBREG)
4264 op1 = SUBREG_REG (op1);
4265 if ((GET_CODE (op1) == ZERO_EXTEND
4266 || GET_CODE (op1) == SIGN_EXTEND)
4267 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4268 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4269 >= INTVAL (XEXP (op0, 1))))
4270 op1 = XEXP (op1, 0);
4271 *cost += rtx_cost (op1, SET, 1, speed);
4272 return true;
4274 default:
4275 break;
4277 return false;
4279 case MEM:
4280 if (speed)
4281 *cost += extra_cost->memory_load;
4283 return true;
4285 case NEG:
4286 op0 = CONST0_RTX (GET_MODE (x));
4287 op1 = XEXP (x, 0);
4288 goto cost_minus;
4290 case COMPARE:
4291 op0 = XEXP (x, 0);
4292 op1 = XEXP (x, 1);
4294 if (op1 == const0_rtx
4295 && GET_CODE (op0) == AND)
4297 x = op0;
4298 goto cost_logic;
4301 /* Comparisons can work if the order is swapped.
4302 Canonicalization puts the more complex operation first, but
4303 we want it in op1. */
4304 if (! (REG_P (op0)
4305 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4307 op0 = XEXP (x, 1);
4308 op1 = XEXP (x, 0);
4310 goto cost_minus;
4312 case MINUS:
4313 op0 = XEXP (x, 0);
4314 op1 = XEXP (x, 1);
4316 cost_minus:
4317 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4318 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4319 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4321 if (op0 != const0_rtx)
4322 *cost += rtx_cost (op0, MINUS, 0, speed);
4324 if (CONST_INT_P (op1))
4326 if (!aarch64_uimm12_shift (INTVAL (op1)))
4327 *cost += rtx_cost (op1, MINUS, 1, speed);
4329 else
4331 op1 = aarch64_strip_shift_or_extend (op1);
4332 *cost += rtx_cost (op1, MINUS, 1, speed);
4334 return true;
4337 return false;
4339 case PLUS:
4340 op0 = XEXP (x, 0);
4341 op1 = XEXP (x, 1);
4343 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4345 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4347 *cost += rtx_cost (op0, PLUS, 0, speed);
4349 else
4351 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4353 if (new_op0 == op0
4354 && GET_CODE (op0) == MULT)
4356 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4357 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4358 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4359 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4361 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4362 speed)
4363 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4364 speed)
4365 + rtx_cost (op1, PLUS, 1, speed));
4366 if (speed)
4367 *cost += extra_cost->int_multiply_extend_add;
4368 return true;
4370 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4371 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4372 + rtx_cost (op1, PLUS, 1, speed));
4374 if (speed)
4375 *cost += extra_cost->int_multiply_add;
4378 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4379 + rtx_cost (op1, PLUS, 1, speed));
4381 return true;
4384 return false;
4386 case IOR:
4387 case XOR:
4388 case AND:
4389 cost_logic:
4390 op0 = XEXP (x, 0);
4391 op1 = XEXP (x, 1);
4393 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4395 if (CONST_INT_P (op1)
4396 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4398 *cost += rtx_cost (op0, AND, 0, speed);
4400 else
4402 if (GET_CODE (op0) == NOT)
4403 op0 = XEXP (op0, 0);
4404 op0 = aarch64_strip_shift (op0);
4405 *cost += (rtx_cost (op0, AND, 0, speed)
4406 + rtx_cost (op1, AND, 1, speed));
4408 return true;
4410 return false;
4412 case ZERO_EXTEND:
4413 if ((GET_MODE (x) == DImode
4414 && GET_MODE (XEXP (x, 0)) == SImode)
4415 || GET_CODE (XEXP (x, 0)) == MEM)
4417 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4418 return true;
4420 return false;
4422 case SIGN_EXTEND:
4423 if (GET_CODE (XEXP (x, 0)) == MEM)
4425 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4426 return true;
4428 return false;
4430 case ROTATE:
4431 if (!CONST_INT_P (XEXP (x, 1)))
4432 *cost += COSTS_N_INSNS (2);
4433 /* Fall through. */
4434 case ROTATERT:
4435 case LSHIFTRT:
4436 case ASHIFT:
4437 case ASHIFTRT:
4439 /* Shifting by a register often takes an extra cycle. */
4440 if (speed && !CONST_INT_P (XEXP (x, 1)))
4441 *cost += extra_cost->register_shift;
4443 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4444 return true;
4446 case HIGH:
4447 if (!CONSTANT_P (XEXP (x, 0)))
4448 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4449 return true;
4451 case LO_SUM:
4452 if (!CONSTANT_P (XEXP (x, 1)))
4453 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4454 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4455 return true;
4457 case ZERO_EXTRACT:
4458 case SIGN_EXTRACT:
4459 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4460 return true;
4462 case MULT:
4463 op0 = XEXP (x, 0);
4464 op1 = XEXP (x, 1);
4466 *cost = COSTS_N_INSNS (1);
4467 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4469 if (CONST_INT_P (op1)
4470 && exact_log2 (INTVAL (op1)) > 0)
4472 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4473 return true;
4476 if ((GET_CODE (op0) == ZERO_EXTEND
4477 && GET_CODE (op1) == ZERO_EXTEND)
4478 || (GET_CODE (op0) == SIGN_EXTEND
4479 && GET_CODE (op1) == SIGN_EXTEND))
4481 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4482 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4483 if (speed)
4484 *cost += extra_cost->int_multiply_extend;
4485 return true;
4488 if (speed)
4489 *cost += extra_cost->int_multiply;
4491 else if (speed)
4493 if (GET_MODE (x) == DFmode)
4494 *cost += extra_cost->double_multiply;
4495 else if (GET_MODE (x) == SFmode)
4496 *cost += extra_cost->float_multiply;
4499 return false; /* All arguments need to be in registers. */
4501 case MOD:
4502 case UMOD:
4503 *cost = COSTS_N_INSNS (2);
4504 if (speed)
4506 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4507 *cost += (extra_cost->int_multiply_add
4508 + extra_cost->int_divide);
4509 else if (GET_MODE (x) == DFmode)
4510 *cost += (extra_cost->double_multiply
4511 + extra_cost->double_divide);
4512 else if (GET_MODE (x) == SFmode)
4513 *cost += (extra_cost->float_multiply
4514 + extra_cost->float_divide);
4516 return false; /* All arguments need to be in registers. */
4518 case DIV:
4519 case UDIV:
4520 *cost = COSTS_N_INSNS (1);
4521 if (speed)
4523 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4524 *cost += extra_cost->int_divide;
4525 else if (GET_MODE (x) == DFmode)
4526 *cost += extra_cost->double_divide;
4527 else if (GET_MODE (x) == SFmode)
4528 *cost += extra_cost->float_divide;
4530 return false; /* All arguments need to be in registers. */
4532 default:
4533 break;
4535 return false;
4538 static int
4539 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4540 enum machine_mode mode ATTRIBUTE_UNUSED,
4541 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4543 enum rtx_code c = GET_CODE (x);
4544 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4546 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4547 return addr_cost->pre_modify;
4549 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4550 return addr_cost->post_modify;
4552 if (c == PLUS)
4554 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4555 return addr_cost->imm_offset;
4556 else if (GET_CODE (XEXP (x, 0)) == MULT
4557 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4558 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4559 return addr_cost->register_extend;
4561 return addr_cost->register_offset;
4563 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4564 return addr_cost->imm_offset;
4566 return 0;
4569 static int
4570 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4571 reg_class_t from, reg_class_t to)
4573 const struct cpu_regmove_cost *regmove_cost
4574 = aarch64_tune_params->regmove_cost;
4576 if (from == GENERAL_REGS && to == GENERAL_REGS)
4577 return regmove_cost->GP2GP;
4578 else if (from == GENERAL_REGS)
4579 return regmove_cost->GP2FP;
4580 else if (to == GENERAL_REGS)
4581 return regmove_cost->FP2GP;
4583 /* When AdvSIMD instructions are disabled it is not possible to move
4584 a 128-bit value directly between Q registers. This is handled in
4585 secondary reload. A general register is used as a scratch to move
4586 the upper DI value and the lower DI value is moved directly,
4587 hence the cost is the sum of three moves. */
4589 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4590 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4592 return regmove_cost->FP2FP;
4595 static int
4596 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4597 reg_class_t rclass ATTRIBUTE_UNUSED,
4598 bool in ATTRIBUTE_UNUSED)
4600 return aarch64_tune_params->memmov_cost;
4603 static void initialize_aarch64_code_model (void);
4605 /* Parse the architecture extension string. */
4607 static void
4608 aarch64_parse_extension (char *str)
4610 /* The extension string is parsed left to right. */
4611 const struct aarch64_option_extension *opt = NULL;
4613 /* Flag to say whether we are adding or removing an extension. */
4614 int adding_ext = -1;
4616 while (str != NULL && *str != 0)
4618 char *ext;
4619 size_t len;
4621 str++;
4622 ext = strchr (str, '+');
4624 if (ext != NULL)
4625 len = ext - str;
4626 else
4627 len = strlen (str);
4629 if (len >= 2 && strncmp (str, "no", 2) == 0)
4631 adding_ext = 0;
4632 len -= 2;
4633 str += 2;
4635 else if (len > 0)
4636 adding_ext = 1;
4638 if (len == 0)
4640 error ("missing feature modifier after %qs", "+no");
4641 return;
4644 /* Scan over the extensions table trying to find an exact match. */
4645 for (opt = all_extensions; opt->name != NULL; opt++)
4647 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4649 /* Add or remove the extension. */
4650 if (adding_ext)
4651 aarch64_isa_flags |= opt->flags_on;
4652 else
4653 aarch64_isa_flags &= ~(opt->flags_off);
4654 break;
4658 if (opt->name == NULL)
4660 /* Extension not found in list. */
4661 error ("unknown feature modifier %qs", str);
4662 return;
4665 str = ext;
4668 return;
4671 /* Parse the ARCH string. */
4673 static void
4674 aarch64_parse_arch (void)
4676 char *ext;
4677 const struct processor *arch;
4678 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4679 size_t len;
4681 strcpy (str, aarch64_arch_string);
4683 ext = strchr (str, '+');
4685 if (ext != NULL)
4686 len = ext - str;
4687 else
4688 len = strlen (str);
4690 if (len == 0)
4692 error ("missing arch name in -march=%qs", str);
4693 return;
4696 /* Loop through the list of supported ARCHs to find a match. */
4697 for (arch = all_architectures; arch->name != NULL; arch++)
4699 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4701 selected_arch = arch;
4702 aarch64_isa_flags = selected_arch->flags;
4703 selected_cpu = &all_cores[selected_arch->core];
4705 if (ext != NULL)
4707 /* ARCH string contains at least one extension. */
4708 aarch64_parse_extension (ext);
4711 return;
4715 /* ARCH name not found in list. */
4716 error ("unknown value %qs for -march", str);
4717 return;
4720 /* Parse the CPU string. */
4722 static void
4723 aarch64_parse_cpu (void)
4725 char *ext;
4726 const struct processor *cpu;
4727 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4728 size_t len;
4730 strcpy (str, aarch64_cpu_string);
4732 ext = strchr (str, '+');
4734 if (ext != NULL)
4735 len = ext - str;
4736 else
4737 len = strlen (str);
4739 if (len == 0)
4741 error ("missing cpu name in -mcpu=%qs", str);
4742 return;
4745 /* Loop through the list of supported CPUs to find a match. */
4746 for (cpu = all_cores; cpu->name != NULL; cpu++)
4748 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4750 selected_cpu = cpu;
4751 aarch64_isa_flags = selected_cpu->flags;
4753 if (ext != NULL)
4755 /* CPU string contains at least one extension. */
4756 aarch64_parse_extension (ext);
4759 return;
4763 /* CPU name not found in list. */
4764 error ("unknown value %qs for -mcpu", str);
4765 return;
4768 /* Parse the TUNE string. */
4770 static void
4771 aarch64_parse_tune (void)
4773 const struct processor *cpu;
4774 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4775 strcpy (str, aarch64_tune_string);
4777 /* Loop through the list of supported CPUs to find a match. */
4778 for (cpu = all_cores; cpu->name != NULL; cpu++)
4780 if (strcmp (cpu->name, str) == 0)
4782 selected_tune = cpu;
4783 return;
4787 /* CPU name not found in list. */
4788 error ("unknown value %qs for -mtune", str);
4789 return;
4793 /* Implement TARGET_OPTION_OVERRIDE. */
4795 static void
4796 aarch64_override_options (void)
4798 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4799 otherwise march remains undefined. mtune can be used with either march or
4800 mcpu. */
4802 if (aarch64_arch_string)
4804 aarch64_parse_arch ();
4805 aarch64_cpu_string = NULL;
4808 if (aarch64_cpu_string)
4810 aarch64_parse_cpu ();
4811 selected_arch = NULL;
4814 if (aarch64_tune_string)
4816 aarch64_parse_tune ();
4819 initialize_aarch64_code_model ();
4821 aarch64_build_bitmask_table ();
4823 /* This target defaults to strict volatile bitfields. */
4824 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4825 flag_strict_volatile_bitfields = 1;
4827 /* If the user did not specify a processor, choose the default
4828 one for them. This will be the CPU set during configuration using
4829 --with-cpu, otherwise it is "generic". */
4830 if (!selected_cpu)
4832 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4833 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4836 gcc_assert (selected_cpu);
4838 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4839 if (!selected_tune)
4840 selected_tune = &all_cores[selected_cpu->core];
4842 aarch64_tune_flags = selected_tune->flags;
4843 aarch64_tune = selected_tune->core;
4844 aarch64_tune_params = selected_tune->tune;
4846 aarch64_override_options_after_change ();
4849 /* Implement targetm.override_options_after_change. */
4851 static void
4852 aarch64_override_options_after_change (void)
4854 faked_omit_frame_pointer = false;
4856 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4857 that aarch64_frame_pointer_required will be called. We need to remember
4858 whether flag_omit_frame_pointer was turned on normally or just faked. */
4860 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4862 flag_omit_frame_pointer = true;
4863 faked_omit_frame_pointer = true;
4867 static struct machine_function *
4868 aarch64_init_machine_status (void)
4870 struct machine_function *machine;
4871 machine = ggc_alloc_cleared_machine_function ();
4872 return machine;
4875 void
4876 aarch64_init_expanders (void)
4878 init_machine_status = aarch64_init_machine_status;
4881 /* A checking mechanism for the implementation of the various code models. */
4882 static void
4883 initialize_aarch64_code_model (void)
4885 if (flag_pic)
4887 switch (aarch64_cmodel_var)
4889 case AARCH64_CMODEL_TINY:
4890 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4891 break;
4892 case AARCH64_CMODEL_SMALL:
4893 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4894 break;
4895 case AARCH64_CMODEL_LARGE:
4896 sorry ("code model %qs with -f%s", "large",
4897 flag_pic > 1 ? "PIC" : "pic");
4898 default:
4899 gcc_unreachable ();
4902 else
4903 aarch64_cmodel = aarch64_cmodel_var;
4906 /* Return true if SYMBOL_REF X binds locally. */
4908 static bool
4909 aarch64_symbol_binds_local_p (const_rtx x)
4911 return (SYMBOL_REF_DECL (x)
4912 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4913 : SYMBOL_REF_LOCAL_P (x));
4916 /* Return true if SYMBOL_REF X is thread local */
4917 static bool
4918 aarch64_tls_symbol_p (rtx x)
4920 if (! TARGET_HAVE_TLS)
4921 return false;
4923 if (GET_CODE (x) != SYMBOL_REF)
4924 return false;
4926 return SYMBOL_REF_TLS_MODEL (x) != 0;
4929 /* Classify a TLS symbol into one of the TLS kinds. */
4930 enum aarch64_symbol_type
4931 aarch64_classify_tls_symbol (rtx x)
4933 enum tls_model tls_kind = tls_symbolic_operand_type (x);
4935 switch (tls_kind)
4937 case TLS_MODEL_GLOBAL_DYNAMIC:
4938 case TLS_MODEL_LOCAL_DYNAMIC:
4939 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4941 case TLS_MODEL_INITIAL_EXEC:
4942 return SYMBOL_SMALL_GOTTPREL;
4944 case TLS_MODEL_LOCAL_EXEC:
4945 return SYMBOL_SMALL_TPREL;
4947 case TLS_MODEL_EMULATED:
4948 case TLS_MODEL_NONE:
4949 return SYMBOL_FORCE_TO_MEM;
4951 default:
4952 gcc_unreachable ();
4956 /* Return the method that should be used to access SYMBOL_REF or
4957 LABEL_REF X in context CONTEXT. */
4958 enum aarch64_symbol_type
4959 aarch64_classify_symbol (rtx x,
4960 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
4962 if (GET_CODE (x) == LABEL_REF)
4964 switch (aarch64_cmodel)
4966 case AARCH64_CMODEL_LARGE:
4967 return SYMBOL_FORCE_TO_MEM;
4969 case AARCH64_CMODEL_TINY_PIC:
4970 case AARCH64_CMODEL_TINY:
4971 case AARCH64_CMODEL_SMALL_PIC:
4972 case AARCH64_CMODEL_SMALL:
4973 return SYMBOL_SMALL_ABSOLUTE;
4975 default:
4976 gcc_unreachable ();
4980 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4982 switch (aarch64_cmodel)
4984 case AARCH64_CMODEL_LARGE:
4985 return SYMBOL_FORCE_TO_MEM;
4987 case AARCH64_CMODEL_TINY:
4988 case AARCH64_CMODEL_SMALL:
4990 /* This is needed to get DFmode, TImode constants to be loaded off
4991 the constant pool. Is it necessary to dump TImode values into
4992 the constant pool. We don't handle TImode constant loads properly
4993 yet and hence need to use the constant pool. */
4994 if (CONSTANT_POOL_ADDRESS_P (x))
4995 return SYMBOL_FORCE_TO_MEM;
4997 if (aarch64_tls_symbol_p (x))
4998 return aarch64_classify_tls_symbol (x);
5000 if (SYMBOL_REF_WEAK (x))
5001 return SYMBOL_FORCE_TO_MEM;
5003 return SYMBOL_SMALL_ABSOLUTE;
5005 case AARCH64_CMODEL_TINY_PIC:
5006 case AARCH64_CMODEL_SMALL_PIC:
5008 if (CONSTANT_POOL_ADDRESS_P (x))
5009 return SYMBOL_FORCE_TO_MEM;
5011 if (aarch64_tls_symbol_p (x))
5012 return aarch64_classify_tls_symbol (x);
5014 if (!aarch64_symbol_binds_local_p (x))
5015 return SYMBOL_SMALL_GOT;
5017 return SYMBOL_SMALL_ABSOLUTE;
5019 default:
5020 gcc_unreachable ();
5022 /* By default push everything into the constant pool. */
5023 return SYMBOL_FORCE_TO_MEM;
5026 /* Return true if X is a symbolic constant that can be used in context
5027 CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
5029 bool
5030 aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
5031 enum aarch64_symbol_type *symbol_type)
5033 rtx offset;
5034 split_const (x, &x, &offset);
5035 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
5036 *symbol_type = aarch64_classify_symbol (x, context);
5037 else
5038 return false;
5040 /* No checking of offset at this point. */
5041 return true;
5044 bool
5045 aarch64_constant_address_p (rtx x)
5047 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5050 bool
5051 aarch64_legitimate_pic_operand_p (rtx x)
5053 if (GET_CODE (x) == SYMBOL_REF
5054 || (GET_CODE (x) == CONST
5055 && GET_CODE (XEXP (x, 0)) == PLUS
5056 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5057 return false;
5059 return true;
5062 /* Return true if X holds either a quarter-precision or
5063 floating-point +0.0 constant. */
5064 static bool
5065 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5067 if (!CONST_DOUBLE_P (x))
5068 return false;
5070 /* TODO: We could handle moving 0.0 to a TFmode register,
5071 but first we would like to refactor the movtf_aarch64
5072 to be more amicable to split moves properly and
5073 correctly gate on TARGET_SIMD. For now - reject all
5074 constants which are not to SFmode or DFmode registers. */
5075 if (!(mode == SFmode || mode == DFmode))
5076 return false;
5078 if (aarch64_float_const_zero_rtx_p (x))
5079 return true;
5080 return aarch64_float_const_representable_p (x);
5083 static bool
5084 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5086 /* Do not allow vector struct mode constants. We could support
5087 0 and -1 easily, but they need support in aarch64-simd.md. */
5088 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5089 return false;
5091 /* This could probably go away because
5092 we now decompose CONST_INTs according to expand_mov_immediate. */
5093 if ((GET_CODE (x) == CONST_VECTOR
5094 && aarch64_simd_valid_immediate (x, mode, false,
5095 NULL, NULL, NULL, NULL, NULL) != -1)
5096 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5097 return !targetm.cannot_force_const_mem (mode, x);
5099 if (GET_CODE (x) == HIGH
5100 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5101 return true;
5103 return aarch64_constant_address_p (x);
5107 aarch64_load_tp (rtx target)
5109 if (!target
5110 || GET_MODE (target) != Pmode
5111 || !register_operand (target, Pmode))
5112 target = gen_reg_rtx (Pmode);
5114 /* Can return in any reg. */
5115 emit_insn (gen_aarch64_load_tp_hard (target));
5116 return target;
5119 /* On AAPCS systems, this is the "struct __va_list". */
5120 static GTY(()) tree va_list_type;
5122 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5123 Return the type to use as __builtin_va_list.
5125 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5127 struct __va_list
5129 void *__stack;
5130 void *__gr_top;
5131 void *__vr_top;
5132 int __gr_offs;
5133 int __vr_offs;
5134 }; */
5136 static tree
5137 aarch64_build_builtin_va_list (void)
5139 tree va_list_name;
5140 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5142 /* Create the type. */
5143 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5144 /* Give it the required name. */
5145 va_list_name = build_decl (BUILTINS_LOCATION,
5146 TYPE_DECL,
5147 get_identifier ("__va_list"),
5148 va_list_type);
5149 DECL_ARTIFICIAL (va_list_name) = 1;
5150 TYPE_NAME (va_list_type) = va_list_name;
5151 TYPE_STUB_DECL (va_list_type) = va_list_name;
5153 /* Create the fields. */
5154 f_stack = build_decl (BUILTINS_LOCATION,
5155 FIELD_DECL, get_identifier ("__stack"),
5156 ptr_type_node);
5157 f_grtop = build_decl (BUILTINS_LOCATION,
5158 FIELD_DECL, get_identifier ("__gr_top"),
5159 ptr_type_node);
5160 f_vrtop = build_decl (BUILTINS_LOCATION,
5161 FIELD_DECL, get_identifier ("__vr_top"),
5162 ptr_type_node);
5163 f_groff = build_decl (BUILTINS_LOCATION,
5164 FIELD_DECL, get_identifier ("__gr_offs"),
5165 integer_type_node);
5166 f_vroff = build_decl (BUILTINS_LOCATION,
5167 FIELD_DECL, get_identifier ("__vr_offs"),
5168 integer_type_node);
5170 DECL_ARTIFICIAL (f_stack) = 1;
5171 DECL_ARTIFICIAL (f_grtop) = 1;
5172 DECL_ARTIFICIAL (f_vrtop) = 1;
5173 DECL_ARTIFICIAL (f_groff) = 1;
5174 DECL_ARTIFICIAL (f_vroff) = 1;
5176 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5177 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5178 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5179 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5180 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5182 TYPE_FIELDS (va_list_type) = f_stack;
5183 DECL_CHAIN (f_stack) = f_grtop;
5184 DECL_CHAIN (f_grtop) = f_vrtop;
5185 DECL_CHAIN (f_vrtop) = f_groff;
5186 DECL_CHAIN (f_groff) = f_vroff;
5188 /* Compute its layout. */
5189 layout_type (va_list_type);
5191 return va_list_type;
5194 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5195 static void
5196 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5198 const CUMULATIVE_ARGS *cum;
5199 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5200 tree stack, grtop, vrtop, groff, vroff;
5201 tree t;
5202 int gr_save_area_size;
5203 int vr_save_area_size;
5204 int vr_offset;
5206 cum = &crtl->args.info;
5207 gr_save_area_size
5208 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5209 vr_save_area_size
5210 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5212 if (TARGET_GENERAL_REGS_ONLY)
5214 if (cum->aapcs_nvrn > 0)
5215 sorry ("%qs and floating point or vector arguments",
5216 "-mgeneral-regs-only");
5217 vr_save_area_size = 0;
5220 f_stack = TYPE_FIELDS (va_list_type_node);
5221 f_grtop = DECL_CHAIN (f_stack);
5222 f_vrtop = DECL_CHAIN (f_grtop);
5223 f_groff = DECL_CHAIN (f_vrtop);
5224 f_vroff = DECL_CHAIN (f_groff);
5226 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5227 NULL_TREE);
5228 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5229 NULL_TREE);
5230 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5231 NULL_TREE);
5232 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5233 NULL_TREE);
5234 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5235 NULL_TREE);
5237 /* Emit code to initialize STACK, which points to the next varargs stack
5238 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5239 by named arguments. STACK is 8-byte aligned. */
5240 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5241 if (cum->aapcs_stack_size > 0)
5242 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5243 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5244 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5246 /* Emit code to initialize GRTOP, the top of the GR save area.
5247 virtual_incoming_args_rtx should have been 16 byte aligned. */
5248 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5249 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5250 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5252 /* Emit code to initialize VRTOP, the top of the VR save area.
5253 This address is gr_save_area_bytes below GRTOP, rounded
5254 down to the next 16-byte boundary. */
5255 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5256 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5257 STACK_BOUNDARY / BITS_PER_UNIT);
5259 if (vr_offset)
5260 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5261 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5262 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5264 /* Emit code to initialize GROFF, the offset from GRTOP of the
5265 next GPR argument. */
5266 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5267 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5268 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5270 /* Likewise emit code to initialize VROFF, the offset from FTOP
5271 of the next VR argument. */
5272 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5273 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5274 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5277 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5279 static tree
5280 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5281 gimple_seq *post_p ATTRIBUTE_UNUSED)
5283 tree addr;
5284 bool indirect_p;
5285 bool is_ha; /* is HFA or HVA. */
5286 bool dw_align; /* double-word align. */
5287 enum machine_mode ag_mode = VOIDmode;
5288 int nregs;
5289 enum machine_mode mode;
5291 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5292 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5293 HOST_WIDE_INT size, rsize, adjust, align;
5294 tree t, u, cond1, cond2;
5296 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5297 if (indirect_p)
5298 type = build_pointer_type (type);
5300 mode = TYPE_MODE (type);
5302 f_stack = TYPE_FIELDS (va_list_type_node);
5303 f_grtop = DECL_CHAIN (f_stack);
5304 f_vrtop = DECL_CHAIN (f_grtop);
5305 f_groff = DECL_CHAIN (f_vrtop);
5306 f_vroff = DECL_CHAIN (f_groff);
5308 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5309 f_stack, NULL_TREE);
5310 size = int_size_in_bytes (type);
5311 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5313 dw_align = false;
5314 adjust = 0;
5315 if (aarch64_vfp_is_call_or_return_candidate (mode,
5316 type,
5317 &ag_mode,
5318 &nregs,
5319 &is_ha))
5321 /* TYPE passed in fp/simd registers. */
5322 if (TARGET_GENERAL_REGS_ONLY)
5323 sorry ("%qs and floating point or vector arguments",
5324 "-mgeneral-regs-only");
5326 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5327 unshare_expr (valist), f_vrtop, NULL_TREE);
5328 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5329 unshare_expr (valist), f_vroff, NULL_TREE);
5331 rsize = nregs * UNITS_PER_VREG;
5333 if (is_ha)
5335 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5336 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5338 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5339 && size < UNITS_PER_VREG)
5341 adjust = UNITS_PER_VREG - size;
5344 else
5346 /* TYPE passed in general registers. */
5347 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5348 unshare_expr (valist), f_grtop, NULL_TREE);
5349 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5350 unshare_expr (valist), f_groff, NULL_TREE);
5351 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5352 nregs = rsize / UNITS_PER_WORD;
5354 if (align > 8)
5355 dw_align = true;
5357 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5358 && size < UNITS_PER_WORD)
5360 adjust = UNITS_PER_WORD - size;
5364 /* Get a local temporary for the field value. */
5365 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5367 /* Emit code to branch if off >= 0. */
5368 t = build2 (GE_EXPR, boolean_type_node, off,
5369 build_int_cst (TREE_TYPE (off), 0));
5370 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5372 if (dw_align)
5374 /* Emit: offs = (offs + 15) & -16. */
5375 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5376 build_int_cst (TREE_TYPE (off), 15));
5377 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5378 build_int_cst (TREE_TYPE (off), -16));
5379 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5381 else
5382 roundup = NULL;
5384 /* Update ap.__[g|v]r_offs */
5385 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5386 build_int_cst (TREE_TYPE (off), rsize));
5387 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5389 /* String up. */
5390 if (roundup)
5391 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5393 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5394 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5395 build_int_cst (TREE_TYPE (f_off), 0));
5396 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5398 /* String up: make sure the assignment happens before the use. */
5399 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5400 COND_EXPR_ELSE (cond1) = t;
5402 /* Prepare the trees handling the argument that is passed on the stack;
5403 the top level node will store in ON_STACK. */
5404 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5405 if (align > 8)
5407 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5408 t = fold_convert (intDI_type_node, arg);
5409 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5410 build_int_cst (TREE_TYPE (t), 15));
5411 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5412 build_int_cst (TREE_TYPE (t), -16));
5413 t = fold_convert (TREE_TYPE (arg), t);
5414 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5416 else
5417 roundup = NULL;
5418 /* Advance ap.__stack */
5419 t = fold_convert (intDI_type_node, arg);
5420 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5421 build_int_cst (TREE_TYPE (t), size + 7));
5422 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5423 build_int_cst (TREE_TYPE (t), -8));
5424 t = fold_convert (TREE_TYPE (arg), t);
5425 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5426 /* String up roundup and advance. */
5427 if (roundup)
5428 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5429 /* String up with arg */
5430 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5431 /* Big-endianness related address adjustment. */
5432 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5433 && size < UNITS_PER_WORD)
5435 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5436 size_int (UNITS_PER_WORD - size));
5437 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5440 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5441 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5443 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5444 t = off;
5445 if (adjust)
5446 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5447 build_int_cst (TREE_TYPE (off), adjust));
5449 t = fold_convert (sizetype, t);
5450 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5452 if (is_ha)
5454 /* type ha; // treat as "struct {ftype field[n];}"
5455 ... [computing offs]
5456 for (i = 0; i <nregs; ++i, offs += 16)
5457 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5458 return ha; */
5459 int i;
5460 tree tmp_ha, field_t, field_ptr_t;
5462 /* Declare a local variable. */
5463 tmp_ha = create_tmp_var_raw (type, "ha");
5464 gimple_add_tmp_var (tmp_ha);
5466 /* Establish the base type. */
5467 switch (ag_mode)
5469 case SFmode:
5470 field_t = float_type_node;
5471 field_ptr_t = float_ptr_type_node;
5472 break;
5473 case DFmode:
5474 field_t = double_type_node;
5475 field_ptr_t = double_ptr_type_node;
5476 break;
5477 case TFmode:
5478 field_t = long_double_type_node;
5479 field_ptr_t = long_double_ptr_type_node;
5480 break;
5481 /* The half precision and quad precision are not fully supported yet. Enable
5482 the following code after the support is complete. Need to find the correct
5483 type node for __fp16 *. */
5484 #if 0
5485 case HFmode:
5486 field_t = float_type_node;
5487 field_ptr_t = float_ptr_type_node;
5488 break;
5489 #endif
5490 case V2SImode:
5491 case V4SImode:
5493 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5494 field_t = build_vector_type_for_mode (innertype, ag_mode);
5495 field_ptr_t = build_pointer_type (field_t);
5497 break;
5498 default:
5499 gcc_assert (0);
5502 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5503 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5504 addr = t;
5505 t = fold_convert (field_ptr_t, addr);
5506 t = build2 (MODIFY_EXPR, field_t,
5507 build1 (INDIRECT_REF, field_t, tmp_ha),
5508 build1 (INDIRECT_REF, field_t, t));
5510 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5511 for (i = 1; i < nregs; ++i)
5513 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5514 u = fold_convert (field_ptr_t, addr);
5515 u = build2 (MODIFY_EXPR, field_t,
5516 build2 (MEM_REF, field_t, tmp_ha,
5517 build_int_cst (field_ptr_t,
5518 (i *
5519 int_size_in_bytes (field_t)))),
5520 build1 (INDIRECT_REF, field_t, u));
5521 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5524 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5525 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5528 COND_EXPR_ELSE (cond2) = t;
5529 addr = fold_convert (build_pointer_type (type), cond1);
5530 addr = build_va_arg_indirect_ref (addr);
5532 if (indirect_p)
5533 addr = build_va_arg_indirect_ref (addr);
5535 return addr;
5538 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5540 static void
5541 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5542 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5543 int no_rtl)
5545 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5546 CUMULATIVE_ARGS local_cum;
5547 int gr_saved, vr_saved;
5549 /* The caller has advanced CUM up to, but not beyond, the last named
5550 argument. Advance a local copy of CUM past the last "real" named
5551 argument, to find out how many registers are left over. */
5552 local_cum = *cum;
5553 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5555 /* Found out how many registers we need to save. */
5556 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5557 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5559 if (TARGET_GENERAL_REGS_ONLY)
5561 if (local_cum.aapcs_nvrn > 0)
5562 sorry ("%qs and floating point or vector arguments",
5563 "-mgeneral-regs-only");
5564 vr_saved = 0;
5567 if (!no_rtl)
5569 if (gr_saved > 0)
5571 rtx ptr, mem;
5573 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5574 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5575 - gr_saved * UNITS_PER_WORD);
5576 mem = gen_frame_mem (BLKmode, ptr);
5577 set_mem_alias_set (mem, get_varargs_alias_set ());
5579 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5580 mem, gr_saved);
5582 if (vr_saved > 0)
5584 /* We can't use move_block_from_reg, because it will use
5585 the wrong mode, storing D regs only. */
5586 enum machine_mode mode = TImode;
5587 int off, i;
5589 /* Set OFF to the offset from virtual_incoming_args_rtx of
5590 the first vector register. The VR save area lies below
5591 the GR one, and is aligned to 16 bytes. */
5592 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5593 STACK_BOUNDARY / BITS_PER_UNIT);
5594 off -= vr_saved * UNITS_PER_VREG;
5596 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5598 rtx ptr, mem;
5600 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5601 mem = gen_frame_mem (mode, ptr);
5602 set_mem_alias_set (mem, get_varargs_alias_set ());
5603 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5604 off += UNITS_PER_VREG;
5609 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5610 any complication of having crtl->args.pretend_args_size changed. */
5611 cfun->machine->saved_varargs_size
5612 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5613 STACK_BOUNDARY / BITS_PER_UNIT)
5614 + vr_saved * UNITS_PER_VREG);
5617 static void
5618 aarch64_conditional_register_usage (void)
5620 int i;
5621 if (!TARGET_FLOAT)
5623 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5625 fixed_regs[i] = 1;
5626 call_used_regs[i] = 1;
5631 /* Walk down the type tree of TYPE counting consecutive base elements.
5632 If *MODEP is VOIDmode, then set it to the first valid floating point
5633 type. If a non-floating point type is found, or if a floating point
5634 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5635 otherwise return the count in the sub-tree. */
5636 static int
5637 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5639 enum machine_mode mode;
5640 HOST_WIDE_INT size;
5642 switch (TREE_CODE (type))
5644 case REAL_TYPE:
5645 mode = TYPE_MODE (type);
5646 if (mode != DFmode && mode != SFmode && mode != TFmode)
5647 return -1;
5649 if (*modep == VOIDmode)
5650 *modep = mode;
5652 if (*modep == mode)
5653 return 1;
5655 break;
5657 case COMPLEX_TYPE:
5658 mode = TYPE_MODE (TREE_TYPE (type));
5659 if (mode != DFmode && mode != SFmode && mode != TFmode)
5660 return -1;
5662 if (*modep == VOIDmode)
5663 *modep = mode;
5665 if (*modep == mode)
5666 return 2;
5668 break;
5670 case VECTOR_TYPE:
5671 /* Use V2SImode and V4SImode as representatives of all 64-bit
5672 and 128-bit vector types. */
5673 size = int_size_in_bytes (type);
5674 switch (size)
5676 case 8:
5677 mode = V2SImode;
5678 break;
5679 case 16:
5680 mode = V4SImode;
5681 break;
5682 default:
5683 return -1;
5686 if (*modep == VOIDmode)
5687 *modep = mode;
5689 /* Vector modes are considered to be opaque: two vectors are
5690 equivalent for the purposes of being homogeneous aggregates
5691 if they are the same size. */
5692 if (*modep == mode)
5693 return 1;
5695 break;
5697 case ARRAY_TYPE:
5699 int count;
5700 tree index = TYPE_DOMAIN (type);
5702 /* Can't handle incomplete types. */
5703 if (!COMPLETE_TYPE_P (type))
5704 return -1;
5706 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5707 if (count == -1
5708 || !index
5709 || !TYPE_MAX_VALUE (index)
5710 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5711 || !TYPE_MIN_VALUE (index)
5712 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5713 || count < 0)
5714 return -1;
5716 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5717 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5719 /* There must be no padding. */
5720 if (!host_integerp (TYPE_SIZE (type), 1)
5721 || (tree_low_cst (TYPE_SIZE (type), 1)
5722 != count * GET_MODE_BITSIZE (*modep)))
5723 return -1;
5725 return count;
5728 case RECORD_TYPE:
5730 int count = 0;
5731 int sub_count;
5732 tree field;
5734 /* Can't handle incomplete types. */
5735 if (!COMPLETE_TYPE_P (type))
5736 return -1;
5738 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5740 if (TREE_CODE (field) != FIELD_DECL)
5741 continue;
5743 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5744 if (sub_count < 0)
5745 return -1;
5746 count += sub_count;
5749 /* There must be no padding. */
5750 if (!host_integerp (TYPE_SIZE (type), 1)
5751 || (tree_low_cst (TYPE_SIZE (type), 1)
5752 != count * GET_MODE_BITSIZE (*modep)))
5753 return -1;
5755 return count;
5758 case UNION_TYPE:
5759 case QUAL_UNION_TYPE:
5761 /* These aren't very interesting except in a degenerate case. */
5762 int count = 0;
5763 int sub_count;
5764 tree field;
5766 /* Can't handle incomplete types. */
5767 if (!COMPLETE_TYPE_P (type))
5768 return -1;
5770 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5772 if (TREE_CODE (field) != FIELD_DECL)
5773 continue;
5775 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5776 if (sub_count < 0)
5777 return -1;
5778 count = count > sub_count ? count : sub_count;
5781 /* There must be no padding. */
5782 if (!host_integerp (TYPE_SIZE (type), 1)
5783 || (tree_low_cst (TYPE_SIZE (type), 1)
5784 != count * GET_MODE_BITSIZE (*modep)))
5785 return -1;
5787 return count;
5790 default:
5791 break;
5794 return -1;
5797 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5798 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5799 array types. The C99 floating-point complex types are also considered
5800 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5801 types, which are GCC extensions and out of the scope of AAPCS64, are
5802 treated as composite types here as well.
5804 Note that MODE itself is not sufficient in determining whether a type
5805 is such a composite type or not. This is because
5806 stor-layout.c:compute_record_mode may have already changed the MODE
5807 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5808 structure with only one field may have its MODE set to the mode of the
5809 field. Also an integer mode whose size matches the size of the
5810 RECORD_TYPE type may be used to substitute the original mode
5811 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5812 solely relied on. */
5814 static bool
5815 aarch64_composite_type_p (const_tree type,
5816 enum machine_mode mode)
5818 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5819 return true;
5821 if (mode == BLKmode
5822 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5823 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5824 return true;
5826 return false;
5829 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5830 type as described in AAPCS64 \S 4.1.2.
5832 See the comment above aarch64_composite_type_p for the notes on MODE. */
5834 static bool
5835 aarch64_short_vector_p (const_tree type,
5836 enum machine_mode mode)
5838 HOST_WIDE_INT size = -1;
5840 if (type && TREE_CODE (type) == VECTOR_TYPE)
5841 size = int_size_in_bytes (type);
5842 else if (!aarch64_composite_type_p (type, mode)
5843 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5844 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5845 size = GET_MODE_SIZE (mode);
5847 return (size == 8 || size == 16) ? true : false;
5850 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5851 shall be passed or returned in simd/fp register(s) (providing these
5852 parameter passing registers are available).
5854 Upon successful return, *COUNT returns the number of needed registers,
5855 *BASE_MODE returns the mode of the individual register and when IS_HAF
5856 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5857 floating-point aggregate or a homogeneous short-vector aggregate. */
5859 static bool
5860 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5861 const_tree type,
5862 enum machine_mode *base_mode,
5863 int *count,
5864 bool *is_ha)
5866 enum machine_mode new_mode = VOIDmode;
5867 bool composite_p = aarch64_composite_type_p (type, mode);
5869 if (is_ha != NULL) *is_ha = false;
5871 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5872 || aarch64_short_vector_p (type, mode))
5874 *count = 1;
5875 new_mode = mode;
5877 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5879 if (is_ha != NULL) *is_ha = true;
5880 *count = 2;
5881 new_mode = GET_MODE_INNER (mode);
5883 else if (type && composite_p)
5885 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5887 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5889 if (is_ha != NULL) *is_ha = true;
5890 *count = ag_count;
5892 else
5893 return false;
5895 else
5896 return false;
5898 *base_mode = new_mode;
5899 return true;
5902 /* Implement TARGET_STRUCT_VALUE_RTX. */
5904 static rtx
5905 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5906 int incoming ATTRIBUTE_UNUSED)
5908 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5911 /* Implements target hook vector_mode_supported_p. */
5912 static bool
5913 aarch64_vector_mode_supported_p (enum machine_mode mode)
5915 if (TARGET_SIMD
5916 && (mode == V4SImode || mode == V8HImode
5917 || mode == V16QImode || mode == V2DImode
5918 || mode == V2SImode || mode == V4HImode
5919 || mode == V8QImode || mode == V2SFmode
5920 || mode == V4SFmode || mode == V2DFmode))
5921 return true;
5923 return false;
5926 /* Return quad mode as the preferred SIMD mode. */
5927 static enum machine_mode
5928 aarch64_preferred_simd_mode (enum machine_mode mode)
5930 if (TARGET_SIMD)
5931 switch (mode)
5933 case DFmode:
5934 return V2DFmode;
5935 case SFmode:
5936 return V4SFmode;
5937 case SImode:
5938 return V4SImode;
5939 case HImode:
5940 return V8HImode;
5941 case QImode:
5942 return V16QImode;
5943 case DImode:
5944 return V2DImode;
5945 break;
5947 default:;
5949 return word_mode;
5952 /* Return the bitmask of possible vector sizes for the vectorizer
5953 to iterate over. */
5954 static unsigned int
5955 aarch64_autovectorize_vector_sizes (void)
5957 return (16 | 8);
5960 /* A table to help perform AArch64-specific name mangling for AdvSIMD
5961 vector types in order to conform to the AAPCS64 (see "Procedure
5962 Call Standard for the ARM 64-bit Architecture", Appendix A). To
5963 qualify for emission with the mangled names defined in that document,
5964 a vector type must not only be of the correct mode but also be
5965 composed of AdvSIMD vector element types (e.g.
5966 _builtin_aarch64_simd_qi); these types are registered by
5967 aarch64_init_simd_builtins (). In other words, vector types defined
5968 in other ways e.g. via vector_size attribute will get default
5969 mangled names. */
5970 typedef struct
5972 enum machine_mode mode;
5973 const char *element_type_name;
5974 const char *mangled_name;
5975 } aarch64_simd_mangle_map_entry;
5977 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
5978 /* 64-bit containerized types. */
5979 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
5980 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
5981 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
5982 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
5983 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
5984 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
5985 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
5986 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
5987 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
5988 /* 128-bit containerized types. */
5989 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
5990 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
5991 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
5992 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
5993 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
5994 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
5995 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
5996 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
5997 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
5998 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
5999 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6000 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6001 { VOIDmode, NULL, NULL }
6004 /* Implement TARGET_MANGLE_TYPE. */
6006 static const char *
6007 aarch64_mangle_type (const_tree type)
6009 /* The AArch64 ABI documents say that "__va_list" has to be
6010 managled as if it is in the "std" namespace. */
6011 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6012 return "St9__va_list";
6014 /* Check the mode of the vector type, and the name of the vector
6015 element type, against the table. */
6016 if (TREE_CODE (type) == VECTOR_TYPE)
6018 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6020 while (pos->mode != VOIDmode)
6022 tree elt_type = TREE_TYPE (type);
6024 if (pos->mode == TYPE_MODE (type)
6025 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6026 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6027 pos->element_type_name))
6028 return pos->mangled_name;
6030 pos++;
6034 /* Use the default mangling. */
6035 return NULL;
6038 /* Return the equivalent letter for size. */
6039 static unsigned char
6040 sizetochar (int size)
6042 switch (size)
6044 case 64: return 'd';
6045 case 32: return 's';
6046 case 16: return 'h';
6047 case 8 : return 'b';
6048 default: gcc_unreachable ();
6052 /* Return true iff x is a uniform vector of floating-point
6053 constants, and the constant can be represented in
6054 quarter-precision form. Note, as aarch64_float_const_representable
6055 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6056 static bool
6057 aarch64_vect_float_const_representable_p (rtx x)
6059 int i = 0;
6060 REAL_VALUE_TYPE r0, ri;
6061 rtx x0, xi;
6063 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6064 return false;
6066 x0 = CONST_VECTOR_ELT (x, 0);
6067 if (!CONST_DOUBLE_P (x0))
6068 return false;
6070 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6072 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6074 xi = CONST_VECTOR_ELT (x, i);
6075 if (!CONST_DOUBLE_P (xi))
6076 return false;
6078 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6079 if (!REAL_VALUES_EQUAL (r0, ri))
6080 return false;
6083 return aarch64_float_const_representable_p (x0);
6086 /* TODO: This function returns values similar to those
6087 returned by neon_valid_immediate in gcc/config/arm/arm.c
6088 but the API here is different enough that these magic numbers
6089 are not used. It should be sufficient to return true or false. */
6090 static int
6091 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
6092 rtx *modconst, int *elementwidth,
6093 unsigned char *elementchar,
6094 int *mvn, int *shift)
6096 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6097 matches = 1; \
6098 for (i = 0; i < idx; i += (STRIDE)) \
6099 if (!(TEST)) \
6100 matches = 0; \
6101 if (matches) \
6103 immtype = (CLASS); \
6104 elsize = (ELSIZE); \
6105 elchar = sizetochar (elsize); \
6106 eshift = (SHIFT); \
6107 emvn = (NEG); \
6108 break; \
6111 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6112 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6113 unsigned char bytes[16];
6114 unsigned char elchar = 0;
6115 int immtype = -1, matches;
6116 unsigned int invmask = inverse ? 0xff : 0;
6117 int eshift, emvn;
6119 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6121 bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
6122 int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
6124 if (!(simd_imm_zero
6125 || aarch64_vect_float_const_representable_p (op)))
6126 return -1;
6128 if (modconst)
6129 *modconst = CONST_VECTOR_ELT (op, 0);
6131 if (elementwidth)
6132 *elementwidth = elem_width;
6134 if (elementchar)
6135 *elementchar = sizetochar (elem_width);
6137 if (shift)
6138 *shift = 0;
6140 if (simd_imm_zero)
6141 return 19;
6142 else
6143 return 18;
6146 /* Splat vector constant out into a byte vector. */
6147 for (i = 0; i < n_elts; i++)
6149 rtx el = CONST_VECTOR_ELT (op, i);
6150 unsigned HOST_WIDE_INT elpart;
6151 unsigned int part, parts;
6153 if (GET_CODE (el) == CONST_INT)
6155 elpart = INTVAL (el);
6156 parts = 1;
6158 else if (GET_CODE (el) == CONST_DOUBLE)
6160 elpart = CONST_DOUBLE_LOW (el);
6161 parts = 2;
6163 else
6164 gcc_unreachable ();
6166 for (part = 0; part < parts; part++)
6168 unsigned int byte;
6169 for (byte = 0; byte < innersize; byte++)
6171 bytes[idx++] = (elpart & 0xff) ^ invmask;
6172 elpart >>= BITS_PER_UNIT;
6174 if (GET_CODE (el) == CONST_DOUBLE)
6175 elpart = CONST_DOUBLE_HIGH (el);
6179 /* Sanity check. */
6180 gcc_assert (idx == GET_MODE_SIZE (mode));
6184 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6185 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6187 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6188 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6190 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6191 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6193 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6194 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6196 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6198 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6200 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6201 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6203 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6204 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6206 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6207 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6209 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6210 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6212 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6214 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6216 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6217 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6219 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6220 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6222 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6223 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6225 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6226 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6228 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6230 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6231 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6233 while (0);
6235 /* TODO: Currently the assembler cannot handle types 12 to 15.
6236 And there is no way to specify cmode through the compiler.
6237 Disable them till there is support in the assembler. */
6238 if (immtype == -1
6239 || (immtype >= 12 && immtype <= 15)
6240 || immtype == 18)
6241 return -1;
6244 if (elementwidth)
6245 *elementwidth = elsize;
6247 if (elementchar)
6248 *elementchar = elchar;
6250 if (mvn)
6251 *mvn = emvn;
6253 if (shift)
6254 *shift = eshift;
6256 if (modconst)
6258 unsigned HOST_WIDE_INT imm = 0;
6260 /* Un-invert bytes of recognized vector, if necessary. */
6261 if (invmask != 0)
6262 for (i = 0; i < idx; i++)
6263 bytes[i] ^= invmask;
6265 if (immtype == 17)
6267 /* FIXME: Broken on 32-bit H_W_I hosts. */
6268 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6270 for (i = 0; i < 8; i++)
6271 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6272 << (i * BITS_PER_UNIT);
6274 *modconst = GEN_INT (imm);
6276 else
6278 unsigned HOST_WIDE_INT imm = 0;
6280 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6281 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6283 /* Construct 'abcdefgh' because the assembler cannot handle
6284 generic constants. */
6285 gcc_assert (shift != NULL && mvn != NULL);
6286 if (*mvn)
6287 imm = ~imm;
6288 imm = (imm >> *shift) & 0xff;
6289 *modconst = GEN_INT (imm);
6293 return immtype;
6294 #undef CHECK
6297 /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
6298 (or, implicitly, MVNI) immediate. Write back width per element
6299 to *ELEMENTWIDTH, and a modified constant (whatever should be output
6300 for a MOVI instruction) in *MODCONST. */
6302 aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
6303 rtx *modconst, int *elementwidth,
6304 unsigned char *elementchar,
6305 int *mvn, int *shift)
6307 rtx tmpconst;
6308 int tmpwidth;
6309 unsigned char tmpwidthc;
6310 int tmpmvn = 0, tmpshift = 0;
6311 int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
6312 &tmpwidth, &tmpwidthc,
6313 &tmpmvn, &tmpshift);
6315 if (retval == -1)
6316 return 0;
6318 if (modconst)
6319 *modconst = tmpconst;
6321 if (elementwidth)
6322 *elementwidth = tmpwidth;
6324 if (elementchar)
6325 *elementchar = tmpwidthc;
6327 if (mvn)
6328 *mvn = tmpmvn;
6330 if (shift)
6331 *shift = tmpshift;
6333 return 1;
6336 static bool
6337 aarch64_const_vec_all_same_int_p (rtx x,
6338 HOST_WIDE_INT minval,
6339 HOST_WIDE_INT maxval)
6341 HOST_WIDE_INT firstval;
6342 int count, i;
6344 if (GET_CODE (x) != CONST_VECTOR
6345 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6346 return false;
6348 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6349 if (firstval < minval || firstval > maxval)
6350 return false;
6352 count = CONST_VECTOR_NUNITS (x);
6353 for (i = 1; i < count; i++)
6354 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6355 return false;
6357 return true;
6360 /* Check of immediate shift constants are within range. */
6361 bool
6362 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6364 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6365 if (left)
6366 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6367 else
6368 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6371 /* Return true if X is a uniform vector where all elements
6372 are either the floating-point constant 0.0 or the
6373 integer constant 0. */
6374 bool
6375 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6377 return x == CONST0_RTX (mode);
6380 bool
6381 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6383 HOST_WIDE_INT imm = INTVAL (x);
6384 int i;
6386 for (i = 0; i < 8; i++)
6388 unsigned int byte = imm & 0xff;
6389 if (byte != 0xff && byte != 0)
6390 return false;
6391 imm >>= 8;
6394 return true;
6397 /* Return a const_int vector of VAL. */
6399 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6401 int nunits = GET_MODE_NUNITS (mode);
6402 rtvec v = rtvec_alloc (nunits);
6403 int i;
6405 for (i=0; i < nunits; i++)
6406 RTVEC_ELT (v, i) = GEN_INT (val);
6408 return gen_rtx_CONST_VECTOR (mode, v);
6411 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6413 bool
6414 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6416 enum machine_mode vmode;
6418 gcc_assert (!VECTOR_MODE_P (mode));
6419 vmode = aarch64_preferred_simd_mode (mode);
6420 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6421 int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0,
6422 NULL, NULL, NULL, NULL);
6423 return retval;
6426 /* Construct and return a PARALLEL RTX vector. */
6428 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6430 int nunits = GET_MODE_NUNITS (mode);
6431 rtvec v = rtvec_alloc (nunits / 2);
6432 int base = high ? nunits / 2 : 0;
6433 rtx t1;
6434 int i;
6436 for (i=0; i < nunits / 2; i++)
6437 RTVEC_ELT (v, i) = GEN_INT (base + i);
6439 t1 = gen_rtx_PARALLEL (mode, v);
6440 return t1;
6443 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6444 HIGH (exclusive). */
6445 void
6446 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6448 HOST_WIDE_INT lane;
6449 gcc_assert (GET_CODE (operand) == CONST_INT);
6450 lane = INTVAL (operand);
6452 if (lane < low || lane >= high)
6453 error ("lane out of range");
6456 void
6457 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6459 gcc_assert (GET_CODE (operand) == CONST_INT);
6460 HOST_WIDE_INT lane = INTVAL (operand);
6462 if (lane < low || lane >= high)
6463 error ("constant out of range");
6466 /* Emit code to reinterpret one AdvSIMD type as another,
6467 without altering bits. */
6468 void
6469 aarch64_simd_reinterpret (rtx dest, rtx src)
6471 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6474 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6475 registers). */
6476 void
6477 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6478 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6479 rtx op1)
6481 rtx mem = gen_rtx_MEM (mode, destaddr);
6482 rtx tmp1 = gen_reg_rtx (mode);
6483 rtx tmp2 = gen_reg_rtx (mode);
6485 emit_insn (intfn (tmp1, op1, tmp2));
6487 emit_move_insn (mem, tmp1);
6488 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6489 emit_move_insn (mem, tmp2);
6492 /* Return TRUE if OP is a valid vector addressing mode. */
6493 bool
6494 aarch64_simd_mem_operand_p (rtx op)
6496 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6497 || GET_CODE (XEXP (op, 0)) == REG);
6500 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6501 not to early-clobber SRC registers in the process.
6503 We assume that the operands described by SRC and DEST represent a
6504 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6505 number of components into which the copy has been decomposed. */
6506 void
6507 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6508 rtx *src, unsigned int count)
6510 unsigned int i;
6512 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6513 || REGNO (operands[0]) < REGNO (operands[1]))
6515 for (i = 0; i < count; i++)
6517 operands[2 * i] = dest[i];
6518 operands[2 * i + 1] = src[i];
6521 else
6523 for (i = 0; i < count; i++)
6525 operands[2 * i] = dest[count - i - 1];
6526 operands[2 * i + 1] = src[count - i - 1];
6531 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6532 one of VSTRUCT modes: OI, CI or XI. */
6534 aarch64_simd_attr_length_move (rtx insn)
6536 enum machine_mode mode;
6538 extract_insn_cached (insn);
6540 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6542 mode = GET_MODE (recog_data.operand[0]);
6543 switch (mode)
6545 case OImode:
6546 return 8;
6547 case CImode:
6548 return 12;
6549 case XImode:
6550 return 16;
6551 default:
6552 gcc_unreachable ();
6555 return 4;
6558 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6559 alignment of a vector to 128 bits. */
6560 static HOST_WIDE_INT
6561 aarch64_simd_vector_alignment (const_tree type)
6563 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6564 return MIN (align, 128);
6567 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6568 static bool
6569 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6571 if (is_packed)
6572 return false;
6574 /* We guarantee alignment for vectors up to 128-bits. */
6575 if (tree_int_cst_compare (TYPE_SIZE (type),
6576 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6577 return false;
6579 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6580 return true;
6583 /* If VALS is a vector constant that can be loaded into a register
6584 using DUP, generate instructions to do so and return an RTX to
6585 assign to the register. Otherwise return NULL_RTX. */
6586 static rtx
6587 aarch64_simd_dup_constant (rtx vals)
6589 enum machine_mode mode = GET_MODE (vals);
6590 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6591 int n_elts = GET_MODE_NUNITS (mode);
6592 bool all_same = true;
6593 rtx x;
6594 int i;
6596 if (GET_CODE (vals) != CONST_VECTOR)
6597 return NULL_RTX;
6599 for (i = 1; i < n_elts; ++i)
6601 x = CONST_VECTOR_ELT (vals, i);
6602 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6603 all_same = false;
6606 if (!all_same)
6607 return NULL_RTX;
6609 /* We can load this constant by using DUP and a constant in a
6610 single ARM register. This will be cheaper than a vector
6611 load. */
6612 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6613 return gen_rtx_VEC_DUPLICATE (mode, x);
6617 /* Generate code to load VALS, which is a PARALLEL containing only
6618 constants (for vec_init) or CONST_VECTOR, efficiently into a
6619 register. Returns an RTX to copy into the register, or NULL_RTX
6620 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6621 static rtx
6622 aarch64_simd_make_constant (rtx vals)
6624 enum machine_mode mode = GET_MODE (vals);
6625 rtx const_dup;
6626 rtx const_vec = NULL_RTX;
6627 int n_elts = GET_MODE_NUNITS (mode);
6628 int n_const = 0;
6629 int i;
6631 if (GET_CODE (vals) == CONST_VECTOR)
6632 const_vec = vals;
6633 else if (GET_CODE (vals) == PARALLEL)
6635 /* A CONST_VECTOR must contain only CONST_INTs and
6636 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6637 Only store valid constants in a CONST_VECTOR. */
6638 for (i = 0; i < n_elts; ++i)
6640 rtx x = XVECEXP (vals, 0, i);
6641 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6642 n_const++;
6644 if (n_const == n_elts)
6645 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6647 else
6648 gcc_unreachable ();
6650 if (const_vec != NULL_RTX
6651 && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
6652 NULL, NULL, NULL))
6653 /* Load using MOVI/MVNI. */
6654 return const_vec;
6655 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6656 /* Loaded using DUP. */
6657 return const_dup;
6658 else if (const_vec != NULL_RTX)
6659 /* Load from constant pool. We can not take advantage of single-cycle
6660 LD1 because we need a PC-relative addressing mode. */
6661 return const_vec;
6662 else
6663 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6664 We can not construct an initializer. */
6665 return NULL_RTX;
6668 void
6669 aarch64_expand_vector_init (rtx target, rtx vals)
6671 enum machine_mode mode = GET_MODE (target);
6672 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6673 int n_elts = GET_MODE_NUNITS (mode);
6674 int n_var = 0, one_var = -1;
6675 bool all_same = true;
6676 rtx x, mem;
6677 int i;
6679 x = XVECEXP (vals, 0, 0);
6680 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6681 n_var = 1, one_var = 0;
6683 for (i = 1; i < n_elts; ++i)
6685 x = XVECEXP (vals, 0, i);
6686 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6687 ++n_var, one_var = i;
6689 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6690 all_same = false;
6693 if (n_var == 0)
6695 rtx constant = aarch64_simd_make_constant (vals);
6696 if (constant != NULL_RTX)
6698 emit_move_insn (target, constant);
6699 return;
6703 /* Splat a single non-constant element if we can. */
6704 if (all_same)
6706 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6707 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6708 return;
6711 /* One field is non-constant. Load constant then overwrite varying
6712 field. This is more efficient than using the stack. */
6713 if (n_var == 1)
6715 rtx copy = copy_rtx (vals);
6716 rtx index = GEN_INT (one_var);
6717 enum insn_code icode;
6719 /* Load constant part of vector, substitute neighboring value for
6720 varying element. */
6721 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6722 aarch64_expand_vector_init (target, copy);
6724 /* Insert variable. */
6725 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6726 icode = optab_handler (vec_set_optab, mode);
6727 gcc_assert (icode != CODE_FOR_nothing);
6728 emit_insn (GEN_FCN (icode) (target, x, index));
6729 return;
6732 /* Construct the vector in memory one field at a time
6733 and load the whole vector. */
6734 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6735 for (i = 0; i < n_elts; i++)
6736 emit_move_insn (adjust_address_nv (mem, inner_mode,
6737 i * GET_MODE_SIZE (inner_mode)),
6738 XVECEXP (vals, 0, i));
6739 emit_move_insn (target, mem);
6743 static unsigned HOST_WIDE_INT
6744 aarch64_shift_truncation_mask (enum machine_mode mode)
6746 return
6747 (aarch64_vector_mode_supported_p (mode)
6748 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6751 #ifndef TLS_SECTION_ASM_FLAG
6752 #define TLS_SECTION_ASM_FLAG 'T'
6753 #endif
6755 void
6756 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6757 tree decl ATTRIBUTE_UNUSED)
6759 char flagchars[10], *f = flagchars;
6761 /* If we have already declared this section, we can use an
6762 abbreviated form to switch back to it -- unless this section is
6763 part of a COMDAT groups, in which case GAS requires the full
6764 declaration every time. */
6765 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6766 && (flags & SECTION_DECLARED))
6768 fprintf (asm_out_file, "\t.section\t%s\n", name);
6769 return;
6772 if (!(flags & SECTION_DEBUG))
6773 *f++ = 'a';
6774 if (flags & SECTION_WRITE)
6775 *f++ = 'w';
6776 if (flags & SECTION_CODE)
6777 *f++ = 'x';
6778 if (flags & SECTION_SMALL)
6779 *f++ = 's';
6780 if (flags & SECTION_MERGE)
6781 *f++ = 'M';
6782 if (flags & SECTION_STRINGS)
6783 *f++ = 'S';
6784 if (flags & SECTION_TLS)
6785 *f++ = TLS_SECTION_ASM_FLAG;
6786 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6787 *f++ = 'G';
6788 *f = '\0';
6790 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6792 if (!(flags & SECTION_NOTYPE))
6794 const char *type;
6795 const char *format;
6797 if (flags & SECTION_BSS)
6798 type = "nobits";
6799 else
6800 type = "progbits";
6802 #ifdef TYPE_OPERAND_FMT
6803 format = "," TYPE_OPERAND_FMT;
6804 #else
6805 format = ",@%s";
6806 #endif
6808 fprintf (asm_out_file, format, type);
6810 if (flags & SECTION_ENTSIZE)
6811 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6812 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6814 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6815 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6816 else
6817 fprintf (asm_out_file, ",%s,comdat",
6818 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6822 putc ('\n', asm_out_file);
6825 /* Select a format to encode pointers in exception handling data. */
6827 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6829 int type;
6830 switch (aarch64_cmodel)
6832 case AARCH64_CMODEL_TINY:
6833 case AARCH64_CMODEL_TINY_PIC:
6834 case AARCH64_CMODEL_SMALL:
6835 case AARCH64_CMODEL_SMALL_PIC:
6836 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6837 for everything. */
6838 type = DW_EH_PE_sdata4;
6839 break;
6840 default:
6841 /* No assumptions here. 8-byte relocs required. */
6842 type = DW_EH_PE_sdata8;
6843 break;
6845 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6848 /* Emit load exclusive. */
6850 static void
6851 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6852 rtx mem, rtx model_rtx)
6854 rtx (*gen) (rtx, rtx, rtx);
6856 switch (mode)
6858 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6859 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6860 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6861 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6862 default:
6863 gcc_unreachable ();
6866 emit_insn (gen (rval, mem, model_rtx));
6869 /* Emit store exclusive. */
6871 static void
6872 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6873 rtx rval, rtx mem, rtx model_rtx)
6875 rtx (*gen) (rtx, rtx, rtx, rtx);
6877 switch (mode)
6879 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6880 case HImode: gen = gen_aarch64_store_exclusivehi; break;
6881 case SImode: gen = gen_aarch64_store_exclusivesi; break;
6882 case DImode: gen = gen_aarch64_store_exclusivedi; break;
6883 default:
6884 gcc_unreachable ();
6887 emit_insn (gen (bval, rval, mem, model_rtx));
6890 /* Mark the previous jump instruction as unlikely. */
6892 static void
6893 aarch64_emit_unlikely_jump (rtx insn)
6895 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6897 insn = emit_jump_insn (insn);
6898 add_reg_note (insn, REG_BR_PROB, very_unlikely);
6901 /* Expand a compare and swap pattern. */
6903 void
6904 aarch64_expand_compare_and_swap (rtx operands[])
6906 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6907 enum machine_mode mode, cmp_mode;
6908 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6910 bval = operands[0];
6911 rval = operands[1];
6912 mem = operands[2];
6913 oldval = operands[3];
6914 newval = operands[4];
6915 is_weak = operands[5];
6916 mod_s = operands[6];
6917 mod_f = operands[7];
6918 mode = GET_MODE (mem);
6919 cmp_mode = mode;
6921 /* Normally the succ memory model must be stronger than fail, but in the
6922 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6923 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6925 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6926 && INTVAL (mod_s) == MEMMODEL_RELEASE)
6927 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6929 switch (mode)
6931 case QImode:
6932 case HImode:
6933 /* For short modes, we're going to perform the comparison in SImode,
6934 so do the zero-extension now. */
6935 cmp_mode = SImode;
6936 rval = gen_reg_rtx (SImode);
6937 oldval = convert_modes (SImode, mode, oldval, true);
6938 /* Fall through. */
6940 case SImode:
6941 case DImode:
6942 /* Force the value into a register if needed. */
6943 if (!aarch64_plus_operand (oldval, mode))
6944 oldval = force_reg (cmp_mode, oldval);
6945 break;
6947 default:
6948 gcc_unreachable ();
6951 switch (mode)
6953 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6954 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6955 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6956 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6957 default:
6958 gcc_unreachable ();
6961 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6963 if (mode == QImode || mode == HImode)
6964 emit_move_insn (operands[1], gen_lowpart (mode, rval));
6966 x = gen_rtx_REG (CCmode, CC_REGNUM);
6967 x = gen_rtx_EQ (SImode, x, const0_rtx);
6968 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6971 /* Split a compare and swap pattern. */
6973 void
6974 aarch64_split_compare_and_swap (rtx operands[])
6976 rtx rval, mem, oldval, newval, scratch;
6977 enum machine_mode mode;
6978 bool is_weak;
6979 rtx label1, label2, x, cond;
6981 rval = operands[0];
6982 mem = operands[1];
6983 oldval = operands[2];
6984 newval = operands[3];
6985 is_weak = (operands[4] != const0_rtx);
6986 scratch = operands[7];
6987 mode = GET_MODE (mem);
6989 label1 = NULL_RTX;
6990 if (!is_weak)
6992 label1 = gen_label_rtx ();
6993 emit_label (label1);
6995 label2 = gen_label_rtx ();
6997 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
6999 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7000 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7001 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7002 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7003 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7005 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7007 if (!is_weak)
7009 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7010 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7011 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7012 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7014 else
7016 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7017 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7018 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7021 emit_label (label2);
7024 /* Split an atomic operation. */
7026 void
7027 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7028 rtx value, rtx model_rtx, rtx cond)
7030 enum machine_mode mode = GET_MODE (mem);
7031 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7032 rtx label, x;
7034 label = gen_label_rtx ();
7035 emit_label (label);
7037 if (new_out)
7038 new_out = gen_lowpart (wmode, new_out);
7039 if (old_out)
7040 old_out = gen_lowpart (wmode, old_out);
7041 else
7042 old_out = new_out;
7043 value = simplify_gen_subreg (wmode, value, mode, 0);
7045 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7047 switch (code)
7049 case SET:
7050 new_out = value;
7051 break;
7053 case NOT:
7054 x = gen_rtx_AND (wmode, old_out, value);
7055 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7056 x = gen_rtx_NOT (wmode, new_out);
7057 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7058 break;
7060 case MINUS:
7061 if (CONST_INT_P (value))
7063 value = GEN_INT (-INTVAL (value));
7064 code = PLUS;
7066 /* Fall through. */
7068 default:
7069 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7070 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7071 break;
7074 aarch64_emit_store_exclusive (mode, cond, mem,
7075 gen_lowpart (mode, new_out), model_rtx);
7077 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7078 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7079 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7080 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7083 static void
7084 aarch64_print_extension (void)
7086 const struct aarch64_option_extension *opt = NULL;
7088 for (opt = all_extensions; opt->name != NULL; opt++)
7089 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7090 asm_fprintf (asm_out_file, "+%s", opt->name);
7092 asm_fprintf (asm_out_file, "\n");
7095 static void
7096 aarch64_start_file (void)
7098 if (selected_arch)
7100 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7101 aarch64_print_extension ();
7103 else if (selected_cpu)
7105 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7106 aarch64_print_extension ();
7108 default_file_start();
7111 /* Target hook for c_mode_for_suffix. */
7112 static enum machine_mode
7113 aarch64_c_mode_for_suffix (char suffix)
7115 if (suffix == 'q')
7116 return TFmode;
7118 return VOIDmode;
7121 /* We can only represent floating point constants which will fit in
7122 "quarter-precision" values. These values are characterised by
7123 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7126 (-1)^s * (n/16) * 2^r
7128 Where:
7129 's' is the sign bit.
7130 'n' is an integer in the range 16 <= n <= 31.
7131 'r' is an integer in the range -3 <= r <= 4. */
7133 /* Return true iff X can be represented by a quarter-precision
7134 floating point immediate operand X. Note, we cannot represent 0.0. */
7135 bool
7136 aarch64_float_const_representable_p (rtx x)
7138 /* This represents our current view of how many bits
7139 make up the mantissa. */
7140 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7141 int exponent;
7142 unsigned HOST_WIDE_INT mantissa, mask;
7143 HOST_WIDE_INT m1, m2;
7144 REAL_VALUE_TYPE r, m;
7146 if (!CONST_DOUBLE_P (x))
7147 return false;
7149 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7151 /* We cannot represent infinities, NaNs or +/-zero. We won't
7152 know if we have +zero until we analyse the mantissa, but we
7153 can reject the other invalid values. */
7154 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7155 || REAL_VALUE_MINUS_ZERO (r))
7156 return false;
7158 /* Extract exponent. */
7159 r = real_value_abs (&r);
7160 exponent = REAL_EXP (&r);
7162 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7163 highest (sign) bit, with a fixed binary point at bit point_pos.
7164 m1 holds the low part of the mantissa, m2 the high part.
7165 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7166 bits for the mantissa, this can fail (low bits will be lost). */
7167 real_ldexp (&m, &r, point_pos - exponent);
7168 REAL_VALUE_TO_INT (&m1, &m2, m);
7170 /* If the low part of the mantissa has bits set we cannot represent
7171 the value. */
7172 if (m1 != 0)
7173 return false;
7174 /* We have rejected the lower HOST_WIDE_INT, so update our
7175 understanding of how many bits lie in the mantissa and
7176 look only at the high HOST_WIDE_INT. */
7177 mantissa = m2;
7178 point_pos -= HOST_BITS_PER_WIDE_INT;
7180 /* We can only represent values with a mantissa of the form 1.xxxx. */
7181 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7182 if ((mantissa & mask) != 0)
7183 return false;
7185 /* Having filtered unrepresentable values, we may now remove all
7186 but the highest 5 bits. */
7187 mantissa >>= point_pos - 5;
7189 /* We cannot represent the value 0.0, so reject it. This is handled
7190 elsewhere. */
7191 if (mantissa == 0)
7192 return false;
7194 /* Then, as bit 4 is always set, we can mask it off, leaving
7195 the mantissa in the range [0, 15]. */
7196 mantissa &= ~(1 << 4);
7197 gcc_assert (mantissa <= 15);
7199 /* GCC internally does not use IEEE754-like encoding (where normalized
7200 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7201 Our mantissa values are shifted 4 places to the left relative to
7202 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7203 by 5 places to correct for GCC's representation. */
7204 exponent = 5 - exponent;
7206 return (exponent >= 0 && exponent <= 7);
7209 char*
7210 aarch64_output_simd_mov_immediate (rtx *const_vector,
7211 enum machine_mode mode,
7212 unsigned width)
7214 int is_valid;
7215 unsigned char widthc;
7216 int lane_width_bits;
7217 static char templ[40];
7218 int shift = 0, mvn = 0;
7219 const char *mnemonic;
7220 unsigned int lane_count = 0;
7222 is_valid =
7223 aarch64_simd_immediate_valid_for_move (*const_vector, mode,
7224 const_vector, &lane_width_bits,
7225 &widthc, &mvn, &shift);
7226 gcc_assert (is_valid);
7228 mode = GET_MODE_INNER (mode);
7229 if (mode == SFmode || mode == DFmode)
7231 bool zero_p =
7232 aarch64_float_const_zero_rtx_p (*const_vector);
7233 gcc_assert (shift == 0);
7234 mnemonic = zero_p ? "movi" : "fmov";
7236 else
7237 mnemonic = mvn ? "mvni" : "movi";
7239 gcc_assert (lane_width_bits != 0);
7240 lane_count = width / lane_width_bits;
7242 if (lane_count == 1)
7243 snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
7244 else if (shift)
7245 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
7246 mnemonic, lane_count, widthc, shift);
7247 else
7248 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
7249 mnemonic, lane_count, widthc);
7250 return templ;
7253 /* Split operands into moves from op[1] + op[2] into op[0]. */
7255 void
7256 aarch64_split_combinev16qi (rtx operands[3])
7258 unsigned int dest = REGNO (operands[0]);
7259 unsigned int src1 = REGNO (operands[1]);
7260 unsigned int src2 = REGNO (operands[2]);
7261 enum machine_mode halfmode = GET_MODE (operands[1]);
7262 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7263 rtx destlo, desthi;
7265 gcc_assert (halfmode == V16QImode);
7267 if (src1 == dest && src2 == dest + halfregs)
7269 /* No-op move. Can't split to nothing; emit something. */
7270 emit_note (NOTE_INSN_DELETED);
7271 return;
7274 /* Preserve register attributes for variable tracking. */
7275 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7276 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7277 GET_MODE_SIZE (halfmode));
7279 /* Special case of reversed high/low parts. */
7280 if (reg_overlap_mentioned_p (operands[2], destlo)
7281 && reg_overlap_mentioned_p (operands[1], desthi))
7283 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7284 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7285 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7287 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7289 /* Try to avoid unnecessary moves if part of the result
7290 is in the right place already. */
7291 if (src1 != dest)
7292 emit_move_insn (destlo, operands[1]);
7293 if (src2 != dest + halfregs)
7294 emit_move_insn (desthi, operands[2]);
7296 else
7298 if (src2 != dest + halfregs)
7299 emit_move_insn (desthi, operands[2]);
7300 if (src1 != dest)
7301 emit_move_insn (destlo, operands[1]);
7305 /* vec_perm support. */
7307 #define MAX_VECT_LEN 16
7309 struct expand_vec_perm_d
7311 rtx target, op0, op1;
7312 unsigned char perm[MAX_VECT_LEN];
7313 enum machine_mode vmode;
7314 unsigned char nelt;
7315 bool one_vector_p;
7316 bool testing_p;
7319 /* Generate a variable permutation. */
7321 static void
7322 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7324 enum machine_mode vmode = GET_MODE (target);
7325 bool one_vector_p = rtx_equal_p (op0, op1);
7327 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7328 gcc_checking_assert (GET_MODE (op0) == vmode);
7329 gcc_checking_assert (GET_MODE (op1) == vmode);
7330 gcc_checking_assert (GET_MODE (sel) == vmode);
7331 gcc_checking_assert (TARGET_SIMD);
7333 if (one_vector_p)
7335 if (vmode == V8QImode)
7337 /* Expand the argument to a V16QI mode by duplicating it. */
7338 rtx pair = gen_reg_rtx (V16QImode);
7339 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7340 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7342 else
7344 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7347 else
7349 rtx pair;
7351 if (vmode == V8QImode)
7353 pair = gen_reg_rtx (V16QImode);
7354 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7355 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7357 else
7359 pair = gen_reg_rtx (OImode);
7360 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7361 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7366 void
7367 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7369 enum machine_mode vmode = GET_MODE (target);
7370 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7371 bool one_vector_p = rtx_equal_p (op0, op1);
7372 rtx rmask[MAX_VECT_LEN], mask;
7374 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7376 /* The TBL instruction does not use a modulo index, so we must take care
7377 of that ourselves. */
7378 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7379 for (i = 0; i < nelt; ++i)
7380 rmask[i] = mask;
7381 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7382 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7384 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7387 /* Recognize patterns suitable for the TRN instructions. */
7388 static bool
7389 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7391 unsigned int i, odd, mask, nelt = d->nelt;
7392 rtx out, in0, in1, x;
7393 rtx (*gen) (rtx, rtx, rtx);
7394 enum machine_mode vmode = d->vmode;
7396 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7397 return false;
7399 /* Note that these are little-endian tests.
7400 We correct for big-endian later. */
7401 if (d->perm[0] == 0)
7402 odd = 0;
7403 else if (d->perm[0] == 1)
7404 odd = 1;
7405 else
7406 return false;
7407 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7409 for (i = 0; i < nelt; i += 2)
7411 if (d->perm[i] != i + odd)
7412 return false;
7413 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7414 return false;
7417 /* Success! */
7418 if (d->testing_p)
7419 return true;
7421 in0 = d->op0;
7422 in1 = d->op1;
7423 if (BYTES_BIG_ENDIAN)
7425 x = in0, in0 = in1, in1 = x;
7426 odd = !odd;
7428 out = d->target;
7430 if (odd)
7432 switch (vmode)
7434 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7435 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7436 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7437 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7438 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7439 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7440 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7441 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7442 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7443 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7444 default:
7445 return false;
7448 else
7450 switch (vmode)
7452 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7453 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7454 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7455 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7456 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7457 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7458 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7459 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7460 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7461 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7462 default:
7463 return false;
7467 emit_insn (gen (out, in0, in1));
7468 return true;
7471 /* Recognize patterns suitable for the UZP instructions. */
7472 static bool
7473 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7475 unsigned int i, odd, mask, nelt = d->nelt;
7476 rtx out, in0, in1, x;
7477 rtx (*gen) (rtx, rtx, rtx);
7478 enum machine_mode vmode = d->vmode;
7480 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7481 return false;
7483 /* Note that these are little-endian tests.
7484 We correct for big-endian later. */
7485 if (d->perm[0] == 0)
7486 odd = 0;
7487 else if (d->perm[0] == 1)
7488 odd = 1;
7489 else
7490 return false;
7491 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7493 for (i = 0; i < nelt; i++)
7495 unsigned elt = (i * 2 + odd) & mask;
7496 if (d->perm[i] != elt)
7497 return false;
7500 /* Success! */
7501 if (d->testing_p)
7502 return true;
7504 in0 = d->op0;
7505 in1 = d->op1;
7506 if (BYTES_BIG_ENDIAN)
7508 x = in0, in0 = in1, in1 = x;
7509 odd = !odd;
7511 out = d->target;
7513 if (odd)
7515 switch (vmode)
7517 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7518 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7519 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7520 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7521 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7522 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7523 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7524 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7525 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7526 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7527 default:
7528 return false;
7531 else
7533 switch (vmode)
7535 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7536 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7537 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7538 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7539 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7540 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7541 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7542 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7543 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7544 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7545 default:
7546 return false;
7550 emit_insn (gen (out, in0, in1));
7551 return true;
7554 /* Recognize patterns suitable for the ZIP instructions. */
7555 static bool
7556 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7558 unsigned int i, high, mask, nelt = d->nelt;
7559 rtx out, in0, in1, x;
7560 rtx (*gen) (rtx, rtx, rtx);
7561 enum machine_mode vmode = d->vmode;
7563 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7564 return false;
7566 /* Note that these are little-endian tests.
7567 We correct for big-endian later. */
7568 high = nelt / 2;
7569 if (d->perm[0] == high)
7570 /* Do Nothing. */
7572 else if (d->perm[0] == 0)
7573 high = 0;
7574 else
7575 return false;
7576 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7578 for (i = 0; i < nelt / 2; i++)
7580 unsigned elt = (i + high) & mask;
7581 if (d->perm[i * 2] != elt)
7582 return false;
7583 elt = (elt + nelt) & mask;
7584 if (d->perm[i * 2 + 1] != elt)
7585 return false;
7588 /* Success! */
7589 if (d->testing_p)
7590 return true;
7592 in0 = d->op0;
7593 in1 = d->op1;
7594 if (BYTES_BIG_ENDIAN)
7596 x = in0, in0 = in1, in1 = x;
7597 high = !high;
7599 out = d->target;
7601 if (high)
7603 switch (vmode)
7605 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7606 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7607 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7608 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7609 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7610 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7611 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7612 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7613 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7614 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7615 default:
7616 return false;
7619 else
7621 switch (vmode)
7623 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7624 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7625 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7626 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7627 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7628 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7629 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7630 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7631 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7632 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7633 default:
7634 return false;
7638 emit_insn (gen (out, in0, in1));
7639 return true;
7642 static bool
7643 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7645 rtx rperm[MAX_VECT_LEN], sel;
7646 enum machine_mode vmode = d->vmode;
7647 unsigned int i, nelt = d->nelt;
7649 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7650 numbering of elements for big-endian, we must reverse the order. */
7651 if (BYTES_BIG_ENDIAN)
7652 return false;
7654 if (d->testing_p)
7655 return true;
7657 /* Generic code will try constant permutation twice. Once with the
7658 original mode and again with the elements lowered to QImode.
7659 So wait and don't do the selector expansion ourselves. */
7660 if (vmode != V8QImode && vmode != V16QImode)
7661 return false;
7663 for (i = 0; i < nelt; ++i)
7664 rperm[i] = GEN_INT (d->perm[i]);
7665 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7666 sel = force_reg (vmode, sel);
7668 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7669 return true;
7672 static bool
7673 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7675 /* The pattern matching functions above are written to look for a small
7676 number to begin the sequence (0, 1, N/2). If we begin with an index
7677 from the second operand, we can swap the operands. */
7678 if (d->perm[0] >= d->nelt)
7680 unsigned i, nelt = d->nelt;
7681 rtx x;
7683 for (i = 0; i < nelt; ++i)
7684 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7686 x = d->op0;
7687 d->op0 = d->op1;
7688 d->op1 = x;
7691 if (TARGET_SIMD)
7693 if (aarch64_evpc_zip (d))
7694 return true;
7695 else if (aarch64_evpc_uzp (d))
7696 return true;
7697 else if (aarch64_evpc_trn (d))
7698 return true;
7699 return aarch64_evpc_tbl (d);
7701 return false;
7704 /* Expand a vec_perm_const pattern. */
7706 bool
7707 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7709 struct expand_vec_perm_d d;
7710 int i, nelt, which;
7712 d.target = target;
7713 d.op0 = op0;
7714 d.op1 = op1;
7716 d.vmode = GET_MODE (target);
7717 gcc_assert (VECTOR_MODE_P (d.vmode));
7718 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7719 d.testing_p = false;
7721 for (i = which = 0; i < nelt; ++i)
7723 rtx e = XVECEXP (sel, 0, i);
7724 int ei = INTVAL (e) & (2 * nelt - 1);
7725 which |= (ei < nelt ? 1 : 2);
7726 d.perm[i] = ei;
7729 switch (which)
7731 default:
7732 gcc_unreachable ();
7734 case 3:
7735 d.one_vector_p = false;
7736 if (!rtx_equal_p (op0, op1))
7737 break;
7739 /* The elements of PERM do not suggest that only the first operand
7740 is used, but both operands are identical. Allow easier matching
7741 of the permutation by folding the permutation into the single
7742 input vector. */
7743 /* Fall Through. */
7744 case 2:
7745 for (i = 0; i < nelt; ++i)
7746 d.perm[i] &= nelt - 1;
7747 d.op0 = op1;
7748 d.one_vector_p = true;
7749 break;
7751 case 1:
7752 d.op1 = op0;
7753 d.one_vector_p = true;
7754 break;
7757 return aarch64_expand_vec_perm_const_1 (&d);
7760 static bool
7761 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7762 const unsigned char *sel)
7764 struct expand_vec_perm_d d;
7765 unsigned int i, nelt, which;
7766 bool ret;
7768 d.vmode = vmode;
7769 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7770 d.testing_p = true;
7771 memcpy (d.perm, sel, nelt);
7773 /* Calculate whether all elements are in one vector. */
7774 for (i = which = 0; i < nelt; ++i)
7776 unsigned char e = d.perm[i];
7777 gcc_assert (e < 2 * nelt);
7778 which |= (e < nelt ? 1 : 2);
7781 /* If all elements are from the second vector, reindex as if from the
7782 first vector. */
7783 if (which == 2)
7784 for (i = 0; i < nelt; ++i)
7785 d.perm[i] -= nelt;
7787 /* Check whether the mask can be applied to a single vector. */
7788 d.one_vector_p = (which != 3);
7790 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7791 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7792 if (!d.one_vector_p)
7793 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7795 start_sequence ();
7796 ret = aarch64_expand_vec_perm_const_1 (&d);
7797 end_sequence ();
7799 return ret;
7802 #undef TARGET_ADDRESS_COST
7803 #define TARGET_ADDRESS_COST aarch64_address_cost
7805 /* This hook will determines whether unnamed bitfields affect the alignment
7806 of the containing structure. The hook returns true if the structure
7807 should inherit the alignment requirements of an unnamed bitfield's
7808 type. */
7809 #undef TARGET_ALIGN_ANON_BITFIELD
7810 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7812 #undef TARGET_ASM_ALIGNED_DI_OP
7813 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7815 #undef TARGET_ASM_ALIGNED_HI_OP
7816 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7818 #undef TARGET_ASM_ALIGNED_SI_OP
7819 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7821 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7822 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7823 hook_bool_const_tree_hwi_hwi_const_tree_true
7825 #undef TARGET_ASM_FILE_START
7826 #define TARGET_ASM_FILE_START aarch64_start_file
7828 #undef TARGET_ASM_OUTPUT_MI_THUNK
7829 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7831 #undef TARGET_ASM_SELECT_RTX_SECTION
7832 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7834 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7835 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7837 #undef TARGET_BUILD_BUILTIN_VA_LIST
7838 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7840 #undef TARGET_CALLEE_COPIES
7841 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7843 #undef TARGET_CAN_ELIMINATE
7844 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7846 #undef TARGET_CANNOT_FORCE_CONST_MEM
7847 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7849 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7850 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7852 /* Only the least significant bit is used for initialization guard
7853 variables. */
7854 #undef TARGET_CXX_GUARD_MASK_BIT
7855 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7857 #undef TARGET_C_MODE_FOR_SUFFIX
7858 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7860 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7861 #undef TARGET_DEFAULT_TARGET_FLAGS
7862 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7863 #endif
7865 #undef TARGET_CLASS_MAX_NREGS
7866 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7868 #undef TARGET_BUILTIN_DECL
7869 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7871 #undef TARGET_EXPAND_BUILTIN
7872 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7874 #undef TARGET_EXPAND_BUILTIN_VA_START
7875 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7877 #undef TARGET_FOLD_BUILTIN
7878 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
7880 #undef TARGET_FUNCTION_ARG
7881 #define TARGET_FUNCTION_ARG aarch64_function_arg
7883 #undef TARGET_FUNCTION_ARG_ADVANCE
7884 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7886 #undef TARGET_FUNCTION_ARG_BOUNDARY
7887 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7889 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7890 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7892 #undef TARGET_FUNCTION_VALUE
7893 #define TARGET_FUNCTION_VALUE aarch64_function_value
7895 #undef TARGET_FUNCTION_VALUE_REGNO_P
7896 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7898 #undef TARGET_FRAME_POINTER_REQUIRED
7899 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7901 #undef TARGET_GIMPLE_FOLD_BUILTIN
7902 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
7904 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7905 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7907 #undef TARGET_INIT_BUILTINS
7908 #define TARGET_INIT_BUILTINS aarch64_init_builtins
7910 #undef TARGET_LEGITIMATE_ADDRESS_P
7911 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7913 #undef TARGET_LEGITIMATE_CONSTANT_P
7914 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7916 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7917 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7919 #undef TARGET_MANGLE_TYPE
7920 #define TARGET_MANGLE_TYPE aarch64_mangle_type
7922 #undef TARGET_MEMORY_MOVE_COST
7923 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7925 #undef TARGET_MUST_PASS_IN_STACK
7926 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7928 /* This target hook should return true if accesses to volatile bitfields
7929 should use the narrowest mode possible. It should return false if these
7930 accesses should use the bitfield container type. */
7931 #undef TARGET_NARROW_VOLATILE_BITFIELD
7932 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7934 #undef TARGET_OPTION_OVERRIDE
7935 #define TARGET_OPTION_OVERRIDE aarch64_override_options
7937 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7938 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7939 aarch64_override_options_after_change
7941 #undef TARGET_PASS_BY_REFERENCE
7942 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7944 #undef TARGET_PREFERRED_RELOAD_CLASS
7945 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7947 #undef TARGET_SECONDARY_RELOAD
7948 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7950 #undef TARGET_SHIFT_TRUNCATION_MASK
7951 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7953 #undef TARGET_SETUP_INCOMING_VARARGS
7954 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7956 #undef TARGET_STRUCT_VALUE_RTX
7957 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
7959 #undef TARGET_REGISTER_MOVE_COST
7960 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7962 #undef TARGET_RETURN_IN_MEMORY
7963 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
7965 #undef TARGET_RETURN_IN_MSB
7966 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
7968 #undef TARGET_RTX_COSTS
7969 #define TARGET_RTX_COSTS aarch64_rtx_costs
7971 #undef TARGET_TRAMPOLINE_INIT
7972 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
7974 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
7975 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
7977 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7978 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
7980 #undef TARGET_ARRAY_MODE_SUPPORTED_P
7981 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
7983 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
7984 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
7986 #undef TARGET_VECTORIZE_BUILTINS
7987 #define TARGET_VECTORIZE_BUILTINS
7989 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
7990 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
7991 aarch64_builtin_vectorized_function
7993 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
7994 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
7995 aarch64_autovectorize_vector_sizes
7997 /* Section anchor support. */
7999 #undef TARGET_MIN_ANCHOR_OFFSET
8000 #define TARGET_MIN_ANCHOR_OFFSET -256
8002 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8003 byte offset; we can do much more for larger data types, but have no way
8004 to determine the size of the access. We assume accesses are aligned. */
8005 #undef TARGET_MAX_ANCHOR_OFFSET
8006 #define TARGET_MAX_ANCHOR_OFFSET 4095
8008 #undef TARGET_VECTOR_ALIGNMENT
8009 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8011 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8012 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8013 aarch64_simd_vector_alignment_reachable
8015 /* vec_perm support. */
8017 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8018 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8019 aarch64_vectorize_vec_perm_const_ok
8022 #undef TARGET_FIXED_CONDITION_CODE_REGS
8023 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8025 struct gcc_target targetm = TARGET_INITIALIZER;
8027 #include "gt-aarch64.h"