2013-08-05 Yvan Roux <yvan.roux@linaro.org>
[official-gcc.git] / gcc-4_8-branch / gcc / config / aarch64 / aarch64.c
blob4898728cd1c33ae2cc487d24d03d240da1472c96
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2013 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "regs.h"
30 #include "df.h"
31 #include "hard-reg-set.h"
32 #include "output.h"
33 #include "expr.h"
34 #include "reload.h"
35 #include "toplev.h"
36 #include "target.h"
37 #include "target-def.h"
38 #include "targhooks.h"
39 #include "ggc.h"
40 #include "function.h"
41 #include "tm_p.h"
42 #include "recog.h"
43 #include "langhooks.h"
44 #include "diagnostic-core.h"
45 #include "gimple.h"
46 #include "optabs.h"
47 #include "dwarf2.h"
49 /* Classifies an address.
51 ADDRESS_REG_IMM
52 A simple base register plus immediate offset.
54 ADDRESS_REG_WB
55 A base register indexed by immediate offset with writeback.
57 ADDRESS_REG_REG
58 A base register indexed by (optionally scaled) register.
60 ADDRESS_REG_UXTW
61 A base register indexed by (optionally scaled) zero-extended register.
63 ADDRESS_REG_SXTW
64 A base register indexed by (optionally scaled) sign-extended register.
66 ADDRESS_LO_SUM
67 A LO_SUM rtx with a base register and "LO12" symbol relocation.
69 ADDRESS_SYMBOLIC:
70 A constant symbolic address, in pc-relative literal pool. */
72 enum aarch64_address_type {
73 ADDRESS_REG_IMM,
74 ADDRESS_REG_WB,
75 ADDRESS_REG_REG,
76 ADDRESS_REG_UXTW,
77 ADDRESS_REG_SXTW,
78 ADDRESS_LO_SUM,
79 ADDRESS_SYMBOLIC
82 struct aarch64_address_info {
83 enum aarch64_address_type type;
84 rtx base;
85 rtx offset;
86 int shift;
87 enum aarch64_symbol_type symbol_type;
90 struct simd_immediate_info
92 rtx value;
93 int shift;
94 int element_width;
95 bool mvn;
98 /* The current code model. */
99 enum aarch64_code_model aarch64_cmodel;
101 #ifdef HAVE_AS_TLS
102 #undef TARGET_HAVE_TLS
103 #define TARGET_HAVE_TLS 1
104 #endif
106 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
107 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
108 const_tree,
109 enum machine_mode *, int *,
110 bool *);
111 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
112 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
113 static void aarch64_override_options_after_change (void);
114 static bool aarch64_vector_mode_supported_p (enum machine_mode);
115 static unsigned bit_count (unsigned HOST_WIDE_INT);
116 static bool aarch64_const_vec_all_same_int_p (rtx,
117 HOST_WIDE_INT, HOST_WIDE_INT);
119 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
120 const unsigned char *sel);
122 /* The processor for which instructions should be scheduled. */
123 enum aarch64_processor aarch64_tune = generic;
125 /* The current tuning set. */
126 const struct tune_params *aarch64_tune_params;
128 /* Mask to specify which instructions we are allowed to generate. */
129 unsigned long aarch64_isa_flags = 0;
131 /* Mask to specify which instruction scheduling options should be used. */
132 unsigned long aarch64_tune_flags = 0;
134 /* Tuning parameters. */
136 #if HAVE_DESIGNATED_INITIALIZERS
137 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
138 #else
139 #define NAMED_PARAM(NAME, VAL) (VAL)
140 #endif
142 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
143 __extension__
144 #endif
145 static const struct cpu_rtx_cost_table generic_rtx_cost_table =
147 NAMED_PARAM (memory_load, COSTS_N_INSNS (1)),
148 NAMED_PARAM (memory_store, COSTS_N_INSNS (0)),
149 NAMED_PARAM (register_shift, COSTS_N_INSNS (1)),
150 NAMED_PARAM (int_divide, COSTS_N_INSNS (6)),
151 NAMED_PARAM (float_divide, COSTS_N_INSNS (2)),
152 NAMED_PARAM (double_divide, COSTS_N_INSNS (6)),
153 NAMED_PARAM (int_multiply, COSTS_N_INSNS (1)),
154 NAMED_PARAM (int_multiply_extend, COSTS_N_INSNS (1)),
155 NAMED_PARAM (int_multiply_add, COSTS_N_INSNS (1)),
156 NAMED_PARAM (int_multiply_extend_add, COSTS_N_INSNS (1)),
157 NAMED_PARAM (float_multiply, COSTS_N_INSNS (0)),
158 NAMED_PARAM (double_multiply, COSTS_N_INSNS (1))
161 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
162 __extension__
163 #endif
164 static const struct cpu_addrcost_table generic_addrcost_table =
166 NAMED_PARAM (pre_modify, 0),
167 NAMED_PARAM (post_modify, 0),
168 NAMED_PARAM (register_offset, 0),
169 NAMED_PARAM (register_extend, 0),
170 NAMED_PARAM (imm_offset, 0)
173 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
174 __extension__
175 #endif
176 static const struct cpu_regmove_cost generic_regmove_cost =
178 NAMED_PARAM (GP2GP, 1),
179 NAMED_PARAM (GP2FP, 2),
180 NAMED_PARAM (FP2GP, 2),
181 /* We currently do not provide direct support for TFmode Q->Q move.
182 Therefore we need to raise the cost above 2 in order to have
183 reload handle the situation. */
184 NAMED_PARAM (FP2FP, 4)
187 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
188 __extension__
189 #endif
190 static const struct tune_params generic_tunings =
192 &generic_rtx_cost_table,
193 &generic_addrcost_table,
194 &generic_regmove_cost,
195 NAMED_PARAM (memmov_cost, 4)
198 /* A processor implementing AArch64. */
199 struct processor
201 const char *const name;
202 enum aarch64_processor core;
203 const char *arch;
204 const unsigned long flags;
205 const struct tune_params *const tune;
208 /* Processor cores implementing AArch64. */
209 static const struct processor all_cores[] =
211 #define AARCH64_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
212 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
213 #include "aarch64-cores.def"
214 #undef AARCH64_CORE
215 {"generic", generic, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
216 {NULL, aarch64_none, NULL, 0, NULL}
219 /* Architectures implementing AArch64. */
220 static const struct processor all_architectures[] =
222 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
223 {NAME, CORE, #ARCH, FLAGS, NULL},
224 #include "aarch64-arches.def"
225 #undef AARCH64_ARCH
226 {"generic", generic, "8", AARCH64_FL_FOR_ARCH8, NULL},
227 {NULL, aarch64_none, NULL, 0, NULL}
230 /* Target specification. These are populated as commandline arguments
231 are processed, or NULL if not specified. */
232 static const struct processor *selected_arch;
233 static const struct processor *selected_cpu;
234 static const struct processor *selected_tune;
236 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
238 /* An ISA extension in the co-processor and main instruction set space. */
239 struct aarch64_option_extension
241 const char *const name;
242 const unsigned long flags_on;
243 const unsigned long flags_off;
246 /* ISA extensions in AArch64. */
247 static const struct aarch64_option_extension all_extensions[] =
249 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
250 {NAME, FLAGS_ON, FLAGS_OFF},
251 #include "aarch64-option-extensions.def"
252 #undef AARCH64_OPT_EXTENSION
253 {NULL, 0, 0}
256 /* Used to track the size of an address when generating a pre/post
257 increment address. */
258 static enum machine_mode aarch64_memory_reference_mode;
260 /* Used to force GTY into this file. */
261 static GTY(()) int gty_dummy;
263 /* A table of valid AArch64 "bitmask immediate" values for
264 logical instructions. */
266 #define AARCH64_NUM_BITMASKS 5334
267 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
269 /* Did we set flag_omit_frame_pointer just so
270 aarch64_frame_pointer_required would be called? */
271 static bool faked_omit_frame_pointer;
273 typedef enum aarch64_cond_code
275 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
276 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
277 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
279 aarch64_cc;
281 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
283 /* The condition codes of the processor, and the inverse function. */
284 static const char * const aarch64_condition_codes[] =
286 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
287 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
290 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
291 unsigned
292 aarch64_dbx_register_number (unsigned regno)
294 if (GP_REGNUM_P (regno))
295 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
296 else if (regno == SP_REGNUM)
297 return AARCH64_DWARF_SP;
298 else if (FP_REGNUM_P (regno))
299 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
301 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
302 equivalent DWARF register. */
303 return DWARF_FRAME_REGISTERS;
306 /* Return TRUE if MODE is any of the large INT modes. */
307 static bool
308 aarch64_vect_struct_mode_p (enum machine_mode mode)
310 return mode == OImode || mode == CImode || mode == XImode;
313 /* Return TRUE if MODE is any of the vector modes. */
314 static bool
315 aarch64_vector_mode_p (enum machine_mode mode)
317 return aarch64_vector_mode_supported_p (mode)
318 || aarch64_vect_struct_mode_p (mode);
321 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
322 static bool
323 aarch64_array_mode_supported_p (enum machine_mode mode,
324 unsigned HOST_WIDE_INT nelems)
326 if (TARGET_SIMD
327 && AARCH64_VALID_SIMD_QREG_MODE (mode)
328 && (nelems >= 2 && nelems <= 4))
329 return true;
331 return false;
334 /* Implement HARD_REGNO_NREGS. */
337 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
339 switch (aarch64_regno_regclass (regno))
341 case FP_REGS:
342 case FP_LO_REGS:
343 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
344 default:
345 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
347 gcc_unreachable ();
350 /* Implement HARD_REGNO_MODE_OK. */
353 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
355 if (GET_MODE_CLASS (mode) == MODE_CC)
356 return regno == CC_REGNUM;
358 if (regno == SP_REGNUM || regno == FRAME_POINTER_REGNUM
359 || regno == ARG_POINTER_REGNUM)
360 return mode == Pmode;
362 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
363 return 1;
365 if (FP_REGNUM_P (regno))
367 if (aarch64_vect_struct_mode_p (mode))
368 return
369 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
370 else
371 return 1;
374 return 0;
377 /* Return true if calls to DECL should be treated as
378 long-calls (ie called via a register). */
379 static bool
380 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
382 return false;
385 /* Return true if calls to symbol-ref SYM should be treated as
386 long-calls (ie called via a register). */
387 bool
388 aarch64_is_long_call_p (rtx sym)
390 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
393 /* Return true if the offsets to a zero/sign-extract operation
394 represent an expression that matches an extend operation. The
395 operands represent the paramters from
397 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
398 bool
399 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
400 rtx extract_imm)
402 HOST_WIDE_INT mult_val, extract_val;
404 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
405 return false;
407 mult_val = INTVAL (mult_imm);
408 extract_val = INTVAL (extract_imm);
410 if (extract_val > 8
411 && extract_val < GET_MODE_BITSIZE (mode)
412 && exact_log2 (extract_val & ~7) > 0
413 && (extract_val & 7) <= 4
414 && mult_val == (1 << (extract_val & 7)))
415 return true;
417 return false;
420 /* Emit an insn that's a simple single-set. Both the operands must be
421 known to be valid. */
422 inline static rtx
423 emit_set_insn (rtx x, rtx y)
425 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
428 /* X and Y are two things to compare using CODE. Emit the compare insn and
429 return the rtx for register 0 in the proper mode. */
431 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
433 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
434 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
436 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
437 return cc_reg;
440 /* Build the SYMBOL_REF for __tls_get_addr. */
442 static GTY(()) rtx tls_get_addr_libfunc;
445 aarch64_tls_get_addr (void)
447 if (!tls_get_addr_libfunc)
448 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
449 return tls_get_addr_libfunc;
452 /* Return the TLS model to use for ADDR. */
454 static enum tls_model
455 tls_symbolic_operand_type (rtx addr)
457 enum tls_model tls_kind = TLS_MODEL_NONE;
458 rtx sym, addend;
460 if (GET_CODE (addr) == CONST)
462 split_const (addr, &sym, &addend);
463 if (GET_CODE (sym) == SYMBOL_REF)
464 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
466 else if (GET_CODE (addr) == SYMBOL_REF)
467 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
469 return tls_kind;
472 /* We'll allow lo_sum's in addresses in our legitimate addresses
473 so that combine would take care of combining addresses where
474 necessary, but for generation purposes, we'll generate the address
475 as :
476 RTL Absolute
477 tmp = hi (symbol_ref); adrp x1, foo
478 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
481 PIC TLS
482 adrp x1, :got:foo adrp tmp, :tlsgd:foo
483 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
484 bl __tls_get_addr
487 Load TLS symbol, depending on TLS mechanism and TLS access model.
489 Global Dynamic - Traditional TLS:
490 adrp tmp, :tlsgd:imm
491 add dest, tmp, #:tlsgd_lo12:imm
492 bl __tls_get_addr
494 Global Dynamic - TLS Descriptors:
495 adrp dest, :tlsdesc:imm
496 ldr tmp, [dest, #:tlsdesc_lo12:imm]
497 add dest, dest, #:tlsdesc_lo12:imm
498 blr tmp
499 mrs tp, tpidr_el0
500 add dest, dest, tp
502 Initial Exec:
503 mrs tp, tpidr_el0
504 adrp tmp, :gottprel:imm
505 ldr dest, [tmp, #:gottprel_lo12:imm]
506 add dest, dest, tp
508 Local Exec:
509 mrs tp, tpidr_el0
510 add t0, tp, #:tprel_hi12:imm
511 add t0, #:tprel_lo12_nc:imm
514 static void
515 aarch64_load_symref_appropriately (rtx dest, rtx imm,
516 enum aarch64_symbol_type type)
518 switch (type)
520 case SYMBOL_SMALL_ABSOLUTE:
522 rtx tmp_reg = dest;
523 if (can_create_pseudo_p ())
525 tmp_reg = gen_reg_rtx (Pmode);
528 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
529 emit_insn (gen_add_losym (dest, tmp_reg, imm));
530 return;
533 case SYMBOL_TINY_ABSOLUTE:
534 emit_insn (gen_rtx_SET (Pmode, dest, imm));
535 return;
537 case SYMBOL_SMALL_GOT:
539 rtx tmp_reg = dest;
540 if (can_create_pseudo_p ())
541 tmp_reg = gen_reg_rtx (Pmode);
542 emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
543 emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
544 return;
547 case SYMBOL_SMALL_TLSGD:
549 rtx insns;
550 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
552 start_sequence ();
553 emit_call_insn (gen_tlsgd_small (result, imm));
554 insns = get_insns ();
555 end_sequence ();
557 RTL_CONST_CALL_P (insns) = 1;
558 emit_libcall_block (insns, dest, result, imm);
559 return;
562 case SYMBOL_SMALL_TLSDESC:
564 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
565 rtx tp;
567 emit_insn (gen_tlsdesc_small (imm));
568 tp = aarch64_load_tp (NULL);
569 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
570 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
571 return;
574 case SYMBOL_SMALL_GOTTPREL:
576 rtx tmp_reg = gen_reg_rtx (Pmode);
577 rtx tp = aarch64_load_tp (NULL);
578 emit_insn (gen_tlsie_small (tmp_reg, imm));
579 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
580 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
581 return;
584 case SYMBOL_SMALL_TPREL:
586 rtx tp = aarch64_load_tp (NULL);
587 emit_insn (gen_tlsle_small (dest, tp, imm));
588 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
589 return;
592 default:
593 gcc_unreachable ();
597 /* Emit a move from SRC to DEST. Assume that the move expanders can
598 handle all moves if !can_create_pseudo_p (). The distinction is
599 important because, unlike emit_move_insn, the move expanders know
600 how to force Pmode objects into the constant pool even when the
601 constant pool address is not itself legitimate. */
602 static rtx
603 aarch64_emit_move (rtx dest, rtx src)
605 return (can_create_pseudo_p ()
606 ? emit_move_insn (dest, src)
607 : emit_move_insn_1 (dest, src));
610 void
611 aarch64_split_128bit_move (rtx dst, rtx src)
613 rtx low_dst;
615 gcc_assert (GET_MODE (dst) == TImode);
617 if (REG_P (dst) && REG_P (src))
619 int src_regno = REGNO (src);
620 int dst_regno = REGNO (dst);
622 gcc_assert (GET_MODE (src) == TImode);
624 /* Handle r -> w, w -> r. */
625 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
627 emit_insn (gen_aarch64_movtilow_di (dst,
628 gen_lowpart (word_mode, src)));
629 emit_insn (gen_aarch64_movtihigh_di (dst,
630 gen_highpart (word_mode, src)));
631 return;
633 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
635 emit_insn (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst),
636 src));
637 emit_insn (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst),
638 src));
639 return;
641 /* Fall through to r -> r cases. */
644 low_dst = gen_lowpart (word_mode, dst);
645 if (REG_P (low_dst)
646 && reg_overlap_mentioned_p (low_dst, src))
648 aarch64_emit_move (gen_highpart (word_mode, dst),
649 gen_highpart_mode (word_mode, TImode, src));
650 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
652 else
654 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
655 aarch64_emit_move (gen_highpart (word_mode, dst),
656 gen_highpart_mode (word_mode, TImode, src));
660 bool
661 aarch64_split_128bit_move_p (rtx dst, rtx src)
663 return (! REG_P (src)
664 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
667 /* Split a complex SIMD move. */
669 void
670 aarch64_split_simd_move (rtx dst, rtx src)
672 enum machine_mode src_mode = GET_MODE (src);
673 enum machine_mode dst_mode = GET_MODE (dst);
675 gcc_assert (VECTOR_MODE_P (dst_mode));
677 if (REG_P (dst) && REG_P (src))
679 gcc_assert (VECTOR_MODE_P (src_mode));
681 switch (src_mode)
683 case V16QImode:
684 emit_insn (gen_aarch64_simd_movv16qi (dst, src));
685 break;
686 case V8HImode:
687 emit_insn (gen_aarch64_simd_movv8hi (dst, src));
688 break;
689 case V4SImode:
690 emit_insn (gen_aarch64_simd_movv4si (dst, src));
691 break;
692 case V2DImode:
693 emit_insn (gen_aarch64_simd_movv2di (dst, src));
694 break;
695 case V4SFmode:
696 emit_insn (gen_aarch64_simd_movv4sf (dst, src));
697 break;
698 case V2DFmode:
699 emit_insn (gen_aarch64_simd_movv2df (dst, src));
700 break;
701 default:
702 gcc_unreachable ();
704 return;
708 static rtx
709 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
711 if (can_create_pseudo_p ())
712 return force_reg (mode, value);
713 else
715 x = aarch64_emit_move (x, value);
716 return x;
721 static rtx
722 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
724 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
726 rtx high;
727 /* Load the full offset into a register. This
728 might be improvable in the future. */
729 high = GEN_INT (offset);
730 offset = 0;
731 high = aarch64_force_temporary (mode, temp, high);
732 reg = aarch64_force_temporary (mode, temp,
733 gen_rtx_PLUS (mode, high, reg));
735 return plus_constant (mode, reg, offset);
738 void
739 aarch64_expand_mov_immediate (rtx dest, rtx imm)
741 enum machine_mode mode = GET_MODE (dest);
742 unsigned HOST_WIDE_INT mask;
743 int i;
744 bool first;
745 unsigned HOST_WIDE_INT val;
746 bool subtargets;
747 rtx subtarget;
748 int one_match, zero_match;
750 gcc_assert (mode == SImode || mode == DImode);
752 /* Check on what type of symbol it is. */
753 if (GET_CODE (imm) == SYMBOL_REF
754 || GET_CODE (imm) == LABEL_REF
755 || GET_CODE (imm) == CONST)
757 rtx mem, base, offset;
758 enum aarch64_symbol_type sty;
760 /* If we have (const (plus symbol offset)), separate out the offset
761 before we start classifying the symbol. */
762 split_const (imm, &base, &offset);
764 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
765 switch (sty)
767 case SYMBOL_FORCE_TO_MEM:
768 if (offset != const0_rtx
769 && targetm.cannot_force_const_mem (mode, imm))
771 gcc_assert(can_create_pseudo_p ());
772 base = aarch64_force_temporary (mode, dest, base);
773 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
774 aarch64_emit_move (dest, base);
775 return;
777 mem = force_const_mem (mode, imm);
778 gcc_assert (mem);
779 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
780 return;
782 case SYMBOL_SMALL_TLSGD:
783 case SYMBOL_SMALL_TLSDESC:
784 case SYMBOL_SMALL_GOTTPREL:
785 case SYMBOL_SMALL_GOT:
786 if (offset != const0_rtx)
788 gcc_assert(can_create_pseudo_p ());
789 base = aarch64_force_temporary (mode, dest, base);
790 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
791 aarch64_emit_move (dest, base);
792 return;
794 /* FALLTHRU */
796 case SYMBOL_SMALL_TPREL:
797 case SYMBOL_SMALL_ABSOLUTE:
798 case SYMBOL_TINY_ABSOLUTE:
799 aarch64_load_symref_appropriately (dest, imm, sty);
800 return;
802 default:
803 gcc_unreachable ();
807 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
809 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
810 return;
813 if (!CONST_INT_P (imm))
815 if (GET_CODE (imm) == HIGH)
816 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
817 else
819 rtx mem = force_const_mem (mode, imm);
820 gcc_assert (mem);
821 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
824 return;
827 if (mode == SImode)
829 /* We know we can't do this in 1 insn, and we must be able to do it
830 in two; so don't mess around looking for sequences that don't buy
831 us anything. */
832 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
833 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
834 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
835 return;
838 /* Remaining cases are all for DImode. */
840 val = INTVAL (imm);
841 subtargets = optimize && can_create_pseudo_p ();
843 one_match = 0;
844 zero_match = 0;
845 mask = 0xffff;
847 for (i = 0; i < 64; i += 16, mask <<= 16)
849 if ((val & mask) == 0)
850 zero_match++;
851 else if ((val & mask) == mask)
852 one_match++;
855 if (one_match == 2)
857 mask = 0xffff;
858 for (i = 0; i < 64; i += 16, mask <<= 16)
860 if ((val & mask) != mask)
862 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
863 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
864 GEN_INT ((val >> i) & 0xffff)));
865 return;
868 gcc_unreachable ();
871 if (zero_match == 2)
872 goto simple_sequence;
874 mask = 0x0ffff0000UL;
875 for (i = 16; i < 64; i += 16, mask <<= 16)
877 HOST_WIDE_INT comp = mask & ~(mask - 1);
879 if (aarch64_uimm12_shift (val - (val & mask)))
881 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
883 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
884 emit_insn (gen_adddi3 (dest, subtarget,
885 GEN_INT (val - (val & mask))));
886 return;
888 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
890 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
892 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
893 GEN_INT ((val + comp) & mask)));
894 emit_insn (gen_adddi3 (dest, subtarget,
895 GEN_INT (val - ((val + comp) & mask))));
896 return;
898 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
900 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
902 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
903 GEN_INT ((val - comp) | ~mask)));
904 emit_insn (gen_adddi3 (dest, subtarget,
905 GEN_INT (val - ((val - comp) | ~mask))));
906 return;
908 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
910 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
912 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
913 GEN_INT (val | ~mask)));
914 emit_insn (gen_adddi3 (dest, subtarget,
915 GEN_INT (val - (val | ~mask))));
916 return;
920 /* See if we can do it by arithmetically combining two
921 immediates. */
922 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
924 int j;
925 mask = 0xffff;
927 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
928 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
930 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
931 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
932 GEN_INT (aarch64_bitmasks[i])));
933 emit_insn (gen_adddi3 (dest, subtarget,
934 GEN_INT (val - aarch64_bitmasks[i])));
935 return;
938 for (j = 0; j < 64; j += 16, mask <<= 16)
940 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
942 emit_insn (gen_rtx_SET (VOIDmode, dest,
943 GEN_INT (aarch64_bitmasks[i])));
944 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
945 GEN_INT ((val >> j) & 0xffff)));
946 return;
951 /* See if we can do it by logically combining two immediates. */
952 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
954 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
956 int j;
958 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
959 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
961 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
962 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
963 GEN_INT (aarch64_bitmasks[i])));
964 emit_insn (gen_iordi3 (dest, subtarget,
965 GEN_INT (aarch64_bitmasks[j])));
966 return;
969 else if ((val & aarch64_bitmasks[i]) == val)
971 int j;
973 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
974 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
977 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
978 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
979 GEN_INT (aarch64_bitmasks[j])));
980 emit_insn (gen_anddi3 (dest, subtarget,
981 GEN_INT (aarch64_bitmasks[i])));
982 return;
987 simple_sequence:
988 first = true;
989 mask = 0xffff;
990 for (i = 0; i < 64; i += 16, mask <<= 16)
992 if ((val & mask) != 0)
994 if (first)
996 emit_insn (gen_rtx_SET (VOIDmode, dest,
997 GEN_INT (val & mask)));
998 first = false;
1000 else
1001 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1002 GEN_INT ((val >> i) & 0xffff)));
1007 static bool
1008 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1010 /* Indirect calls are not currently supported. */
1011 if (decl == NULL)
1012 return false;
1014 /* Cannot tail-call to long-calls, since these are outside of the
1015 range of a branch instruction (we could handle this if we added
1016 support for indirect tail-calls. */
1017 if (aarch64_decl_is_long_call_p (decl))
1018 return false;
1020 return true;
1023 /* Implement TARGET_PASS_BY_REFERENCE. */
1025 static bool
1026 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1027 enum machine_mode mode,
1028 const_tree type,
1029 bool named ATTRIBUTE_UNUSED)
1031 HOST_WIDE_INT size;
1032 enum machine_mode dummymode;
1033 int nregs;
1035 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1036 size = (mode == BLKmode && type)
1037 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1039 if (type)
1041 /* Arrays always passed by reference. */
1042 if (TREE_CODE (type) == ARRAY_TYPE)
1043 return true;
1044 /* Other aggregates based on their size. */
1045 if (AGGREGATE_TYPE_P (type))
1046 size = int_size_in_bytes (type);
1049 /* Variable sized arguments are always returned by reference. */
1050 if (size < 0)
1051 return true;
1053 /* Can this be a candidate to be passed in fp/simd register(s)? */
1054 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1055 &dummymode, &nregs,
1056 NULL))
1057 return false;
1059 /* Arguments which are variable sized or larger than 2 registers are
1060 passed by reference unless they are a homogenous floating point
1061 aggregate. */
1062 return size > 2 * UNITS_PER_WORD;
1065 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1066 static bool
1067 aarch64_return_in_msb (const_tree valtype)
1069 enum machine_mode dummy_mode;
1070 int dummy_int;
1072 /* Never happens in little-endian mode. */
1073 if (!BYTES_BIG_ENDIAN)
1074 return false;
1076 /* Only composite types smaller than or equal to 16 bytes can
1077 be potentially returned in registers. */
1078 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1079 || int_size_in_bytes (valtype) <= 0
1080 || int_size_in_bytes (valtype) > 16)
1081 return false;
1083 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1084 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1085 is always passed/returned in the least significant bits of fp/simd
1086 register(s). */
1087 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1088 &dummy_mode, &dummy_int, NULL))
1089 return false;
1091 return true;
1094 /* Implement TARGET_FUNCTION_VALUE.
1095 Define how to find the value returned by a function. */
1097 static rtx
1098 aarch64_function_value (const_tree type, const_tree func,
1099 bool outgoing ATTRIBUTE_UNUSED)
1101 enum machine_mode mode;
1102 int unsignedp;
1103 int count;
1104 enum machine_mode ag_mode;
1106 mode = TYPE_MODE (type);
1107 if (INTEGRAL_TYPE_P (type))
1108 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1110 if (aarch64_return_in_msb (type))
1112 HOST_WIDE_INT size = int_size_in_bytes (type);
1114 if (size % UNITS_PER_WORD != 0)
1116 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1117 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1121 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1122 &ag_mode, &count, NULL))
1124 if (!aarch64_composite_type_p (type, mode))
1126 gcc_assert (count == 1 && mode == ag_mode);
1127 return gen_rtx_REG (mode, V0_REGNUM);
1129 else
1131 int i;
1132 rtx par;
1134 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1135 for (i = 0; i < count; i++)
1137 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1138 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1139 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1140 XVECEXP (par, 0, i) = tmp;
1142 return par;
1145 else
1146 return gen_rtx_REG (mode, R0_REGNUM);
1149 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1150 Return true if REGNO is the number of a hard register in which the values
1151 of called function may come back. */
1153 static bool
1154 aarch64_function_value_regno_p (const unsigned int regno)
1156 /* Maximum of 16 bytes can be returned in the general registers. Examples
1157 of 16-byte return values are: 128-bit integers and 16-byte small
1158 structures (excluding homogeneous floating-point aggregates). */
1159 if (regno == R0_REGNUM || regno == R1_REGNUM)
1160 return true;
1162 /* Up to four fp/simd registers can return a function value, e.g. a
1163 homogeneous floating-point aggregate having four members. */
1164 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1165 return !TARGET_GENERAL_REGS_ONLY;
1167 return false;
1170 /* Implement TARGET_RETURN_IN_MEMORY.
1172 If the type T of the result of a function is such that
1173 void func (T arg)
1174 would require that arg be passed as a value in a register (or set of
1175 registers) according to the parameter passing rules, then the result
1176 is returned in the same registers as would be used for such an
1177 argument. */
1179 static bool
1180 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1182 HOST_WIDE_INT size;
1183 enum machine_mode ag_mode;
1184 int count;
1186 if (!AGGREGATE_TYPE_P (type)
1187 && TREE_CODE (type) != COMPLEX_TYPE
1188 && TREE_CODE (type) != VECTOR_TYPE)
1189 /* Simple scalar types always returned in registers. */
1190 return false;
1192 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1193 type,
1194 &ag_mode,
1195 &count,
1196 NULL))
1197 return false;
1199 /* Types larger than 2 registers returned in memory. */
1200 size = int_size_in_bytes (type);
1201 return (size < 0 || size > 2 * UNITS_PER_WORD);
1204 static bool
1205 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1206 const_tree type, int *nregs)
1208 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1209 return aarch64_vfp_is_call_or_return_candidate (mode,
1210 type,
1211 &pcum->aapcs_vfp_rmode,
1212 nregs,
1213 NULL);
1216 /* Given MODE and TYPE of a function argument, return the alignment in
1217 bits. The idea is to suppress any stronger alignment requested by
1218 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1219 This is a helper function for local use only. */
1221 static unsigned int
1222 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1224 unsigned int alignment;
1226 if (type)
1228 if (!integer_zerop (TYPE_SIZE (type)))
1230 if (TYPE_MODE (type) == mode)
1231 alignment = TYPE_ALIGN (type);
1232 else
1233 alignment = GET_MODE_ALIGNMENT (mode);
1235 else
1236 alignment = 0;
1238 else
1239 alignment = GET_MODE_ALIGNMENT (mode);
1241 return alignment;
1244 /* Layout a function argument according to the AAPCS64 rules. The rule
1245 numbers refer to the rule numbers in the AAPCS64. */
1247 static void
1248 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1249 const_tree type,
1250 bool named ATTRIBUTE_UNUSED)
1252 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1253 int ncrn, nvrn, nregs;
1254 bool allocate_ncrn, allocate_nvrn;
1256 /* We need to do this once per argument. */
1257 if (pcum->aapcs_arg_processed)
1258 return;
1260 pcum->aapcs_arg_processed = true;
1262 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1263 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1264 mode,
1265 type,
1266 &nregs);
1268 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1269 The following code thus handles passing by SIMD/FP registers first. */
1271 nvrn = pcum->aapcs_nvrn;
1273 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1274 and homogenous short-vector aggregates (HVA). */
1275 if (allocate_nvrn)
1277 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1279 pcum->aapcs_nextnvrn = nvrn + nregs;
1280 if (!aarch64_composite_type_p (type, mode))
1282 gcc_assert (nregs == 1);
1283 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1285 else
1287 rtx par;
1288 int i;
1289 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1290 for (i = 0; i < nregs; i++)
1292 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1293 V0_REGNUM + nvrn + i);
1294 tmp = gen_rtx_EXPR_LIST
1295 (VOIDmode, tmp,
1296 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1297 XVECEXP (par, 0, i) = tmp;
1299 pcum->aapcs_reg = par;
1301 return;
1303 else
1305 /* C.3 NSRN is set to 8. */
1306 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1307 goto on_stack;
1311 ncrn = pcum->aapcs_ncrn;
1312 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1313 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1316 /* C6 - C9. though the sign and zero extension semantics are
1317 handled elsewhere. This is the case where the argument fits
1318 entirely general registers. */
1319 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1321 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1323 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1325 /* C.8 if the argument has an alignment of 16 then the NGRN is
1326 rounded up to the next even number. */
1327 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1329 ++ncrn;
1330 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1332 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1333 A reg is still generated for it, but the caller should be smart
1334 enough not to use it. */
1335 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1337 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1339 else
1341 rtx par;
1342 int i;
1344 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1345 for (i = 0; i < nregs; i++)
1347 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1348 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1349 GEN_INT (i * UNITS_PER_WORD));
1350 XVECEXP (par, 0, i) = tmp;
1352 pcum->aapcs_reg = par;
1355 pcum->aapcs_nextncrn = ncrn + nregs;
1356 return;
1359 /* C.11 */
1360 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1362 /* The argument is passed on stack; record the needed number of words for
1363 this argument (we can re-use NREGS) and align the total size if
1364 necessary. */
1365 on_stack:
1366 pcum->aapcs_stack_words = nregs;
1367 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1368 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1369 16 / UNITS_PER_WORD) + 1;
1370 return;
1373 /* Implement TARGET_FUNCTION_ARG. */
1375 static rtx
1376 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1377 const_tree type, bool named)
1379 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1380 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1382 if (mode == VOIDmode)
1383 return NULL_RTX;
1385 aarch64_layout_arg (pcum_v, mode, type, named);
1386 return pcum->aapcs_reg;
1389 void
1390 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1391 const_tree fntype ATTRIBUTE_UNUSED,
1392 rtx libname ATTRIBUTE_UNUSED,
1393 const_tree fndecl ATTRIBUTE_UNUSED,
1394 unsigned n_named ATTRIBUTE_UNUSED)
1396 pcum->aapcs_ncrn = 0;
1397 pcum->aapcs_nvrn = 0;
1398 pcum->aapcs_nextncrn = 0;
1399 pcum->aapcs_nextnvrn = 0;
1400 pcum->pcs_variant = ARM_PCS_AAPCS64;
1401 pcum->aapcs_reg = NULL_RTX;
1402 pcum->aapcs_arg_processed = false;
1403 pcum->aapcs_stack_words = 0;
1404 pcum->aapcs_stack_size = 0;
1406 return;
1409 static void
1410 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1411 enum machine_mode mode,
1412 const_tree type,
1413 bool named)
1415 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1416 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1418 aarch64_layout_arg (pcum_v, mode, type, named);
1419 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1420 != (pcum->aapcs_stack_words != 0));
1421 pcum->aapcs_arg_processed = false;
1422 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1423 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1424 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1425 pcum->aapcs_stack_words = 0;
1426 pcum->aapcs_reg = NULL_RTX;
1430 bool
1431 aarch64_function_arg_regno_p (unsigned regno)
1433 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1434 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1437 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1438 PARM_BOUNDARY bits of alignment, but will be given anything up
1439 to STACK_BOUNDARY bits if the type requires it. This makes sure
1440 that both before and after the layout of each argument, the Next
1441 Stacked Argument Address (NSAA) will have a minimum alignment of
1442 8 bytes. */
1444 static unsigned int
1445 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1447 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1449 if (alignment < PARM_BOUNDARY)
1450 alignment = PARM_BOUNDARY;
1451 if (alignment > STACK_BOUNDARY)
1452 alignment = STACK_BOUNDARY;
1453 return alignment;
1456 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1458 Return true if an argument passed on the stack should be padded upwards,
1459 i.e. if the least-significant byte of the stack slot has useful data.
1461 Small aggregate types are placed in the lowest memory address.
1463 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1465 bool
1466 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1468 /* On little-endian targets, the least significant byte of every stack
1469 argument is passed at the lowest byte address of the stack slot. */
1470 if (!BYTES_BIG_ENDIAN)
1471 return true;
1473 /* Otherwise, integral types and floating point types are padded downward:
1474 the least significant byte of a stack argument is passed at the highest
1475 byte address of the stack slot. */
1476 if (type
1477 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type))
1478 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1479 return false;
1481 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1482 return true;
1485 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1487 It specifies padding for the last (may also be the only)
1488 element of a block move between registers and memory. If
1489 assuming the block is in the memory, padding upward means that
1490 the last element is padded after its highest significant byte,
1491 while in downward padding, the last element is padded at the
1492 its least significant byte side.
1494 Small aggregates and small complex types are always padded
1495 upwards.
1497 We don't need to worry about homogeneous floating-point or
1498 short-vector aggregates; their move is not affected by the
1499 padding direction determined here. Regardless of endianness,
1500 each element of such an aggregate is put in the least
1501 significant bits of a fp/simd register.
1503 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1504 register has useful data, and return the opposite if the most
1505 significant byte does. */
1507 bool
1508 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1509 bool first ATTRIBUTE_UNUSED)
1512 /* Small composite types are always padded upward. */
1513 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1515 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1516 : GET_MODE_SIZE (mode));
1517 if (size < 2 * UNITS_PER_WORD)
1518 return true;
1521 /* Otherwise, use the default padding. */
1522 return !BYTES_BIG_ENDIAN;
1525 static enum machine_mode
1526 aarch64_libgcc_cmp_return_mode (void)
1528 return SImode;
1531 static bool
1532 aarch64_frame_pointer_required (void)
1534 /* If the function contains dynamic stack allocations, we need to
1535 use the frame pointer to access the static parts of the frame. */
1536 if (cfun->calls_alloca)
1537 return true;
1539 /* We may have turned flag_omit_frame_pointer on in order to have this
1540 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1541 and we'll check it here.
1542 If we really did set flag_omit_frame_pointer normally, then we return false
1543 (no frame pointer required) in all cases. */
1545 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1546 return false;
1547 else if (flag_omit_leaf_frame_pointer)
1548 return !crtl->is_leaf;
1549 return true;
1552 /* Mark the registers that need to be saved by the callee and calculate
1553 the size of the callee-saved registers area and frame record (both FP
1554 and LR may be omitted). */
1555 static void
1556 aarch64_layout_frame (void)
1558 HOST_WIDE_INT offset = 0;
1559 int regno;
1561 if (reload_completed && cfun->machine->frame.laid_out)
1562 return;
1564 cfun->machine->frame.fp_lr_offset = 0;
1566 /* First mark all the registers that really need to be saved... */
1567 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1568 cfun->machine->frame.reg_offset[regno] = -1;
1570 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1571 cfun->machine->frame.reg_offset[regno] = -1;
1573 /* ... that includes the eh data registers (if needed)... */
1574 if (crtl->calls_eh_return)
1575 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1576 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1578 /* ... and any callee saved register that dataflow says is live. */
1579 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1580 if (df_regs_ever_live_p (regno)
1581 && !call_used_regs[regno])
1582 cfun->machine->frame.reg_offset[regno] = 0;
1584 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1585 if (df_regs_ever_live_p (regno)
1586 && !call_used_regs[regno])
1587 cfun->machine->frame.reg_offset[regno] = 0;
1589 if (frame_pointer_needed)
1591 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1592 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1593 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1596 /* Now assign stack slots for them. */
1597 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1598 if (cfun->machine->frame.reg_offset[regno] != -1)
1600 cfun->machine->frame.reg_offset[regno] = offset;
1601 offset += UNITS_PER_WORD;
1604 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1605 if (cfun->machine->frame.reg_offset[regno] != -1)
1607 cfun->machine->frame.reg_offset[regno] = offset;
1608 offset += UNITS_PER_WORD;
1611 if (frame_pointer_needed)
1613 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1614 offset += UNITS_PER_WORD;
1615 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1618 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1620 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1621 offset += UNITS_PER_WORD;
1622 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1625 cfun->machine->frame.padding0 =
1626 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1627 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1629 cfun->machine->frame.saved_regs_size = offset;
1630 cfun->machine->frame.laid_out = true;
1633 /* Make the last instruction frame-related and note that it performs
1634 the operation described by FRAME_PATTERN. */
1636 static void
1637 aarch64_set_frame_expr (rtx frame_pattern)
1639 rtx insn;
1641 insn = get_last_insn ();
1642 RTX_FRAME_RELATED_P (insn) = 1;
1643 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1644 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1645 frame_pattern,
1646 REG_NOTES (insn));
1649 static bool
1650 aarch64_register_saved_on_entry (int regno)
1652 return cfun->machine->frame.reg_offset[regno] != -1;
1656 static void
1657 aarch64_save_or_restore_fprs (int start_offset, int increment,
1658 bool restore, rtx base_rtx)
1661 unsigned regno;
1662 unsigned regno2;
1663 rtx insn;
1664 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1667 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1669 if (aarch64_register_saved_on_entry (regno))
1671 rtx mem;
1672 mem = gen_mem_ref (DFmode,
1673 plus_constant (Pmode,
1674 base_rtx,
1675 start_offset));
1677 for (regno2 = regno + 1;
1678 regno2 <= V31_REGNUM
1679 && !aarch64_register_saved_on_entry (regno2);
1680 regno2++)
1682 /* Empty loop. */
1684 if (regno2 <= V31_REGNUM &&
1685 aarch64_register_saved_on_entry (regno2))
1687 rtx mem2;
1688 /* Next highest register to be saved. */
1689 mem2 = gen_mem_ref (DFmode,
1690 plus_constant
1691 (Pmode,
1692 base_rtx,
1693 start_offset + increment));
1694 if (restore == false)
1696 insn = emit_insn
1697 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1698 mem2, gen_rtx_REG (DFmode, regno2)));
1701 else
1703 insn = emit_insn
1704 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1705 gen_rtx_REG (DFmode, regno2), mem2));
1707 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno));
1708 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DFmode, regno2));
1711 /* The first part of a frame-related parallel insn
1712 is always assumed to be relevant to the frame
1713 calculations; subsequent parts, are only
1714 frame-related if explicitly marked. */
1715 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1716 1)) = 1;
1717 regno = regno2;
1718 start_offset += increment * 2;
1720 else
1722 if (restore == false)
1723 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1724 else
1726 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1727 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1729 start_offset += increment;
1731 RTX_FRAME_RELATED_P (insn) = 1;
1738 /* offset from the stack pointer of where the saves and
1739 restore's have to happen. */
1740 static void
1741 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1742 bool restore)
1744 rtx insn;
1745 rtx base_rtx = stack_pointer_rtx;
1746 HOST_WIDE_INT start_offset = offset;
1747 HOST_WIDE_INT increment = UNITS_PER_WORD;
1748 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1749 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1750 unsigned regno;
1751 unsigned regno2;
1753 for (regno = R0_REGNUM; regno <= limit; regno++)
1755 if (aarch64_register_saved_on_entry (regno))
1757 rtx mem;
1758 mem = gen_mem_ref (Pmode,
1759 plus_constant (Pmode,
1760 base_rtx,
1761 start_offset));
1763 for (regno2 = regno + 1;
1764 regno2 <= limit
1765 && !aarch64_register_saved_on_entry (regno2);
1766 regno2++)
1768 /* Empty loop. */
1770 if (regno2 <= limit &&
1771 aarch64_register_saved_on_entry (regno2))
1773 rtx mem2;
1774 /* Next highest register to be saved. */
1775 mem2 = gen_mem_ref (Pmode,
1776 plus_constant
1777 (Pmode,
1778 base_rtx,
1779 start_offset + increment));
1780 if (restore == false)
1782 insn = emit_insn
1783 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1784 mem2, gen_rtx_REG (DImode, regno2)));
1787 else
1789 insn = emit_insn
1790 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1791 gen_rtx_REG (DImode, regno2), mem2));
1793 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1794 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1797 /* The first part of a frame-related parallel insn
1798 is always assumed to be relevant to the frame
1799 calculations; subsequent parts, are only
1800 frame-related if explicitly marked. */
1801 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1802 1)) = 1;
1803 regno = regno2;
1804 start_offset += increment * 2;
1806 else
1808 if (restore == false)
1809 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1810 else
1812 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1813 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1815 start_offset += increment;
1817 RTX_FRAME_RELATED_P (insn) = 1;
1821 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1825 /* AArch64 stack frames generated by this compiler look like:
1827 +-------------------------------+
1829 | incoming stack arguments |
1831 +-------------------------------+ <-- arg_pointer_rtx
1833 | callee-allocated save area |
1834 | for register varargs |
1836 +-------------------------------+
1838 | local variables |
1840 +-------------------------------+ <-- frame_pointer_rtx
1842 | callee-saved registers |
1844 +-------------------------------+
1845 | LR' |
1846 +-------------------------------+
1847 | FP' |
1848 P +-------------------------------+ <-- hard_frame_pointer_rtx
1849 | dynamic allocation |
1850 +-------------------------------+
1852 | outgoing stack arguments |
1854 +-------------------------------+ <-- stack_pointer_rtx
1856 Dynamic stack allocations such as alloca insert data at point P.
1857 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
1858 hard_frame_pointer_rtx unchanged. */
1860 /* Generate the prologue instructions for entry into a function.
1861 Establish the stack frame by decreasing the stack pointer with a
1862 properly calculated size and, if necessary, create a frame record
1863 filled with the values of LR and previous frame pointer. The
1864 current FP is also set up is it is in use. */
1866 void
1867 aarch64_expand_prologue (void)
1869 /* sub sp, sp, #<frame_size>
1870 stp {fp, lr}, [sp, #<frame_size> - 16]
1871 add fp, sp, #<frame_size> - hardfp_offset
1872 stp {cs_reg}, [fp, #-16] etc.
1874 sub sp, sp, <final_adjustment_if_any>
1876 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
1877 HOST_WIDE_INT frame_size, offset;
1878 HOST_WIDE_INT fp_offset; /* FP offset from SP */
1879 rtx insn;
1881 aarch64_layout_frame ();
1882 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
1883 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
1884 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
1885 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
1886 + crtl->outgoing_args_size);
1887 offset = frame_size = AARCH64_ROUND_UP (frame_size,
1888 STACK_BOUNDARY / BITS_PER_UNIT);
1890 if (flag_stack_usage_info)
1891 current_function_static_stack_size = frame_size;
1893 fp_offset = (offset
1894 - original_frame_size
1895 - cfun->machine->frame.saved_regs_size);
1897 /* Store pairs and load pairs have a range only -512 to 504. */
1898 if (offset >= 512)
1900 /* When the frame has a large size, an initial decrease is done on
1901 the stack pointer to jump over the callee-allocated save area for
1902 register varargs, the local variable area and/or the callee-saved
1903 register area. This will allow the pre-index write-back
1904 store pair instructions to be used for setting up the stack frame
1905 efficiently. */
1906 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
1907 if (offset >= 512)
1908 offset = cfun->machine->frame.saved_regs_size;
1910 frame_size -= (offset + crtl->outgoing_args_size);
1911 fp_offset = 0;
1913 if (frame_size >= 0x1000000)
1915 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
1916 emit_move_insn (op0, GEN_INT (-frame_size));
1917 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
1918 aarch64_set_frame_expr (gen_rtx_SET
1919 (Pmode, stack_pointer_rtx,
1920 gen_rtx_PLUS (Pmode,
1921 stack_pointer_rtx,
1922 GEN_INT (-frame_size))));
1924 else if (frame_size > 0)
1926 if ((frame_size & 0xfff) != frame_size)
1928 insn = emit_insn (gen_add2_insn
1929 (stack_pointer_rtx,
1930 GEN_INT (-(frame_size
1931 & ~(HOST_WIDE_INT)0xfff))));
1932 RTX_FRAME_RELATED_P (insn) = 1;
1934 if ((frame_size & 0xfff) != 0)
1936 insn = emit_insn (gen_add2_insn
1937 (stack_pointer_rtx,
1938 GEN_INT (-(frame_size
1939 & (HOST_WIDE_INT)0xfff))));
1940 RTX_FRAME_RELATED_P (insn) = 1;
1944 else
1945 frame_size = -1;
1947 if (offset > 0)
1949 /* Save the frame pointer and lr if the frame pointer is needed
1950 first. Make the frame pointer point to the location of the
1951 old frame pointer on the stack. */
1952 if (frame_pointer_needed)
1954 rtx mem_fp, mem_lr;
1956 if (fp_offset)
1958 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
1959 GEN_INT (-offset)));
1960 RTX_FRAME_RELATED_P (insn) = 1;
1961 aarch64_set_frame_expr (gen_rtx_SET
1962 (Pmode, stack_pointer_rtx,
1963 gen_rtx_MINUS (Pmode,
1964 stack_pointer_rtx,
1965 GEN_INT (offset))));
1966 mem_fp = gen_frame_mem (DImode,
1967 plus_constant (Pmode,
1968 stack_pointer_rtx,
1969 fp_offset));
1970 mem_lr = gen_frame_mem (DImode,
1971 plus_constant (Pmode,
1972 stack_pointer_rtx,
1973 fp_offset
1974 + UNITS_PER_WORD));
1975 insn = emit_insn (gen_store_pairdi (mem_fp,
1976 hard_frame_pointer_rtx,
1977 mem_lr,
1978 gen_rtx_REG (DImode,
1979 LR_REGNUM)));
1981 else
1983 insn = emit_insn (gen_storewb_pairdi_di
1984 (stack_pointer_rtx, stack_pointer_rtx,
1985 hard_frame_pointer_rtx,
1986 gen_rtx_REG (DImode, LR_REGNUM),
1987 GEN_INT (-offset),
1988 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
1989 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1992 /* The first part of a frame-related parallel insn is always
1993 assumed to be relevant to the frame calculations;
1994 subsequent parts, are only frame-related if explicitly
1995 marked. */
1996 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1997 RTX_FRAME_RELATED_P (insn) = 1;
1999 /* Set up frame pointer to point to the location of the
2000 previous frame pointer on the stack. */
2001 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2002 stack_pointer_rtx,
2003 GEN_INT (fp_offset)));
2004 aarch64_set_frame_expr (gen_rtx_SET
2005 (Pmode, hard_frame_pointer_rtx,
2006 gen_rtx_PLUS (Pmode,
2007 stack_pointer_rtx,
2008 GEN_INT (fp_offset))));
2009 RTX_FRAME_RELATED_P (insn) = 1;
2010 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2011 hard_frame_pointer_rtx));
2013 else
2015 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2016 GEN_INT (-offset)));
2017 RTX_FRAME_RELATED_P (insn) = 1;
2020 aarch64_save_or_restore_callee_save_registers
2021 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2024 /* when offset >= 512,
2025 sub sp, sp, #<outgoing_args_size> */
2026 if (frame_size > -1)
2028 if (crtl->outgoing_args_size > 0)
2030 insn = emit_insn (gen_add2_insn
2031 (stack_pointer_rtx,
2032 GEN_INT (- crtl->outgoing_args_size)));
2033 RTX_FRAME_RELATED_P (insn) = 1;
2038 /* Generate the epilogue instructions for returning from a function. */
2039 void
2040 aarch64_expand_epilogue (bool for_sibcall)
2042 HOST_WIDE_INT original_frame_size, frame_size, offset;
2043 HOST_WIDE_INT fp_offset;
2044 rtx insn;
2045 rtx cfa_reg;
2047 aarch64_layout_frame ();
2048 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2049 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2050 + crtl->outgoing_args_size);
2051 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2052 STACK_BOUNDARY / BITS_PER_UNIT);
2054 fp_offset = (offset
2055 - original_frame_size
2056 - cfun->machine->frame.saved_regs_size);
2058 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2060 /* Store pairs and load pairs have a range only -512 to 504. */
2061 if (offset >= 512)
2063 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2064 if (offset >= 512)
2065 offset = cfun->machine->frame.saved_regs_size;
2067 frame_size -= (offset + crtl->outgoing_args_size);
2068 fp_offset = 0;
2069 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2071 insn = emit_insn (gen_add2_insn
2072 (stack_pointer_rtx,
2073 GEN_INT (crtl->outgoing_args_size)));
2074 RTX_FRAME_RELATED_P (insn) = 1;
2077 else
2078 frame_size = -1;
2080 /* If there were outgoing arguments or we've done dynamic stack
2081 allocation, then restore the stack pointer from the frame
2082 pointer. This is at most one insn and more efficient than using
2083 GCC's internal mechanism. */
2084 if (frame_pointer_needed
2085 && (crtl->outgoing_args_size || cfun->calls_alloca))
2087 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2088 hard_frame_pointer_rtx,
2089 GEN_INT (- fp_offset)));
2090 RTX_FRAME_RELATED_P (insn) = 1;
2091 /* As SP is set to (FP - fp_offset), according to the rules in
2092 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2093 from the value of SP from now on. */
2094 cfa_reg = stack_pointer_rtx;
2097 aarch64_save_or_restore_callee_save_registers
2098 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2100 /* Restore the frame pointer and lr if the frame pointer is needed. */
2101 if (offset > 0)
2103 if (frame_pointer_needed)
2105 rtx mem_fp, mem_lr;
2107 if (fp_offset)
2109 mem_fp = gen_frame_mem (DImode,
2110 plus_constant (Pmode,
2111 stack_pointer_rtx,
2112 fp_offset));
2113 mem_lr = gen_frame_mem (DImode,
2114 plus_constant (Pmode,
2115 stack_pointer_rtx,
2116 fp_offset
2117 + UNITS_PER_WORD));
2118 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2119 mem_fp,
2120 gen_rtx_REG (DImode,
2121 LR_REGNUM),
2122 mem_lr));
2124 else
2126 insn = emit_insn (gen_loadwb_pairdi_di
2127 (stack_pointer_rtx,
2128 stack_pointer_rtx,
2129 hard_frame_pointer_rtx,
2130 gen_rtx_REG (DImode, LR_REGNUM),
2131 GEN_INT (offset),
2132 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2133 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2134 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2135 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2136 plus_constant (Pmode, cfa_reg,
2137 offset))));
2140 /* The first part of a frame-related parallel insn
2141 is always assumed to be relevant to the frame
2142 calculations; subsequent parts, are only
2143 frame-related if explicitly marked. */
2144 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2145 RTX_FRAME_RELATED_P (insn) = 1;
2146 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2147 add_reg_note (insn, REG_CFA_RESTORE,
2148 gen_rtx_REG (DImode, LR_REGNUM));
2150 if (fp_offset)
2152 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2153 GEN_INT (offset)));
2154 RTX_FRAME_RELATED_P (insn) = 1;
2157 else
2159 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2160 GEN_INT (offset)));
2161 RTX_FRAME_RELATED_P (insn) = 1;
2165 /* Stack adjustment for exception handler. */
2166 if (crtl->calls_eh_return)
2168 /* We need to unwind the stack by the offset computed by
2169 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2170 based on SP. Ideally we would update the SP and define the
2171 CFA along the lines of:
2173 SP = SP + EH_RETURN_STACKADJ_RTX
2174 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2176 However the dwarf emitter only understands a constant
2177 register offset.
2179 The solution choosen here is to use the otherwise unused IP0
2180 as a temporary register to hold the current SP value. The
2181 CFA is described using IP0 then SP is modified. */
2183 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2185 insn = emit_move_insn (ip0, stack_pointer_rtx);
2186 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2187 RTX_FRAME_RELATED_P (insn) = 1;
2189 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2191 /* Ensure the assignment to IP0 does not get optimized away. */
2192 emit_use (ip0);
2195 if (frame_size > -1)
2197 if (frame_size >= 0x1000000)
2199 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2200 emit_move_insn (op0, GEN_INT (frame_size));
2201 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2202 aarch64_set_frame_expr (gen_rtx_SET
2203 (Pmode, stack_pointer_rtx,
2204 gen_rtx_PLUS (Pmode,
2205 stack_pointer_rtx,
2206 GEN_INT (frame_size))));
2208 else if (frame_size > 0)
2210 if ((frame_size & 0xfff) != 0)
2212 insn = emit_insn (gen_add2_insn
2213 (stack_pointer_rtx,
2214 GEN_INT ((frame_size
2215 & (HOST_WIDE_INT) 0xfff))));
2216 RTX_FRAME_RELATED_P (insn) = 1;
2218 if ((frame_size & 0xfff) != frame_size)
2220 insn = emit_insn (gen_add2_insn
2221 (stack_pointer_rtx,
2222 GEN_INT ((frame_size
2223 & ~ (HOST_WIDE_INT) 0xfff))));
2224 RTX_FRAME_RELATED_P (insn) = 1;
2228 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2229 gen_rtx_PLUS (Pmode,
2230 stack_pointer_rtx,
2231 GEN_INT (offset))));
2234 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2235 if (!for_sibcall)
2236 emit_jump_insn (ret_rtx);
2239 /* Return the place to copy the exception unwinding return address to.
2240 This will probably be a stack slot, but could (in theory be the
2241 return register). */
2243 aarch64_final_eh_return_addr (void)
2245 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2246 aarch64_layout_frame ();
2247 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2248 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2249 + crtl->outgoing_args_size);
2250 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2251 STACK_BOUNDARY / BITS_PER_UNIT);
2252 fp_offset = offset
2253 - original_frame_size
2254 - cfun->machine->frame.saved_regs_size;
2256 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2257 return gen_rtx_REG (DImode, LR_REGNUM);
2259 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2260 result in a store to save LR introduced by builtin_eh_return () being
2261 incorrectly deleted because the alias is not detected.
2262 So in the calculation of the address to copy the exception unwinding
2263 return address to, we note 2 cases.
2264 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2265 we return a SP-relative location since all the addresses are SP-relative
2266 in this case. This prevents the store from being optimized away.
2267 If the fp_offset is not 0, then the addresses will be FP-relative and
2268 therefore we return a FP-relative location. */
2270 if (frame_pointer_needed)
2272 if (fp_offset)
2273 return gen_frame_mem (DImode,
2274 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2275 else
2276 return gen_frame_mem (DImode,
2277 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2280 /* If FP is not needed, we calculate the location of LR, which would be
2281 at the top of the saved registers block. */
2283 return gen_frame_mem (DImode,
2284 plus_constant (Pmode,
2285 stack_pointer_rtx,
2286 fp_offset
2287 + cfun->machine->frame.saved_regs_size
2288 - 2 * UNITS_PER_WORD));
2291 /* Output code to build up a constant in a register. */
2292 static void
2293 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2295 if (aarch64_bitmask_imm (val, DImode))
2296 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2297 else
2299 int i;
2300 int ncount = 0;
2301 int zcount = 0;
2302 HOST_WIDE_INT valp = val >> 16;
2303 HOST_WIDE_INT valm;
2304 HOST_WIDE_INT tval;
2306 for (i = 16; i < 64; i += 16)
2308 valm = (valp & 0xffff);
2310 if (valm != 0)
2311 ++ zcount;
2313 if (valm != 0xffff)
2314 ++ ncount;
2316 valp >>= 16;
2319 /* zcount contains the number of additional MOVK instructions
2320 required if the constant is built up with an initial MOVZ instruction,
2321 while ncount is the number of MOVK instructions required if starting
2322 with a MOVN instruction. Choose the sequence that yields the fewest
2323 number of instructions, preferring MOVZ instructions when they are both
2324 the same. */
2325 if (ncount < zcount)
2327 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2328 GEN_INT ((~val) & 0xffff));
2329 tval = 0xffff;
2331 else
2333 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2334 GEN_INT (val & 0xffff));
2335 tval = 0;
2338 val >>= 16;
2340 for (i = 16; i < 64; i += 16)
2342 if ((val & 0xffff) != tval)
2343 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2344 GEN_INT (i), GEN_INT (val & 0xffff)));
2345 val >>= 16;
2350 static void
2351 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2353 HOST_WIDE_INT mdelta = delta;
2354 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2355 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2357 if (mdelta < 0)
2358 mdelta = -mdelta;
2360 if (mdelta >= 4096 * 4096)
2362 aarch64_build_constant (scratchreg, delta);
2363 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2365 else if (mdelta > 0)
2367 if (mdelta >= 4096)
2369 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2370 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2371 if (delta < 0)
2372 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2373 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2374 else
2375 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2376 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2378 if (mdelta % 4096 != 0)
2380 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2381 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2382 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2387 /* Output code to add DELTA to the first argument, and then jump
2388 to FUNCTION. Used for C++ multiple inheritance. */
2389 static void
2390 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2391 HOST_WIDE_INT delta,
2392 HOST_WIDE_INT vcall_offset,
2393 tree function)
2395 /* The this pointer is always in x0. Note that this differs from
2396 Arm where the this pointer maybe bumped to r1 if r0 is required
2397 to return a pointer to an aggregate. On AArch64 a result value
2398 pointer will be in x8. */
2399 int this_regno = R0_REGNUM;
2400 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2402 reload_completed = 1;
2403 emit_note (NOTE_INSN_PROLOGUE_END);
2405 if (vcall_offset == 0)
2406 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2407 else
2409 gcc_assert ((vcall_offset & 0x7) == 0);
2411 this_rtx = gen_rtx_REG (Pmode, this_regno);
2412 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2413 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2415 addr = this_rtx;
2416 if (delta != 0)
2418 if (delta >= -256 && delta < 256)
2419 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2420 plus_constant (Pmode, this_rtx, delta));
2421 else
2422 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2425 aarch64_emit_move (temp0, gen_rtx_MEM (Pmode, addr));
2427 if (vcall_offset >= -256 && vcall_offset < 32768)
2428 addr = plus_constant (Pmode, temp0, vcall_offset);
2429 else
2431 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2432 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2435 aarch64_emit_move (temp1, gen_rtx_MEM (Pmode,addr));
2436 emit_insn (gen_add2_insn (this_rtx, temp1));
2439 /* Generate a tail call to the target function. */
2440 if (!TREE_USED (function))
2442 assemble_external (function);
2443 TREE_USED (function) = 1;
2445 funexp = XEXP (DECL_RTL (function), 0);
2446 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2447 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2448 SIBLING_CALL_P (insn) = 1;
2450 insn = get_insns ();
2451 shorten_branches (insn);
2452 final_start_function (insn, file, 1);
2453 final (insn, file, 1);
2454 final_end_function ();
2456 /* Stop pretending to be a post-reload pass. */
2457 reload_completed = 0;
2460 static int
2461 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2463 if (GET_CODE (*x) == SYMBOL_REF)
2464 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2466 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2467 TLS offsets, not real symbol references. */
2468 if (GET_CODE (*x) == UNSPEC
2469 && XINT (*x, 1) == UNSPEC_TLS)
2470 return -1;
2472 return 0;
2475 static bool
2476 aarch64_tls_referenced_p (rtx x)
2478 if (!TARGET_HAVE_TLS)
2479 return false;
2481 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2485 static int
2486 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2488 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2489 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2491 if (*imm1 < *imm2)
2492 return -1;
2493 if (*imm1 > *imm2)
2494 return +1;
2495 return 0;
2499 static void
2500 aarch64_build_bitmask_table (void)
2502 unsigned HOST_WIDE_INT mask, imm;
2503 unsigned int log_e, e, s, r;
2504 unsigned int nimms = 0;
2506 for (log_e = 1; log_e <= 6; log_e++)
2508 e = 1 << log_e;
2509 if (e == 64)
2510 mask = ~(HOST_WIDE_INT) 0;
2511 else
2512 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2513 for (s = 1; s < e; s++)
2515 for (r = 0; r < e; r++)
2517 /* set s consecutive bits to 1 (s < 64) */
2518 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2519 /* rotate right by r */
2520 if (r != 0)
2521 imm = ((imm >> r) | (imm << (e - r))) & mask;
2522 /* replicate the constant depending on SIMD size */
2523 switch (log_e) {
2524 case 1: imm |= (imm << 2);
2525 case 2: imm |= (imm << 4);
2526 case 3: imm |= (imm << 8);
2527 case 4: imm |= (imm << 16);
2528 case 5: imm |= (imm << 32);
2529 case 6:
2530 break;
2531 default:
2532 gcc_unreachable ();
2534 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2535 aarch64_bitmasks[nimms++] = imm;
2540 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2541 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2542 aarch64_bitmasks_cmp);
2546 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2547 a left shift of 0 or 12 bits. */
2548 bool
2549 aarch64_uimm12_shift (HOST_WIDE_INT val)
2551 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2552 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2557 /* Return true if val is an immediate that can be loaded into a
2558 register by a MOVZ instruction. */
2559 static bool
2560 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2562 if (GET_MODE_SIZE (mode) > 4)
2564 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2565 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2566 return 1;
2568 else
2570 /* Ignore sign extension. */
2571 val &= (HOST_WIDE_INT) 0xffffffff;
2573 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2574 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2578 /* Return true if val is a valid bitmask immediate. */
2579 bool
2580 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2582 if (GET_MODE_SIZE (mode) < 8)
2584 /* Replicate bit pattern. */
2585 val &= (HOST_WIDE_INT) 0xffffffff;
2586 val |= val << 32;
2588 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2589 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2593 /* Return true if val is an immediate that can be loaded into a
2594 register in a single instruction. */
2595 bool
2596 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2598 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2599 return 1;
2600 return aarch64_bitmask_imm (val, mode);
2603 static bool
2604 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2606 rtx base, offset;
2607 if (GET_CODE (x) == HIGH)
2608 return true;
2610 split_const (x, &base, &offset);
2611 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2612 return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
2614 return aarch64_tls_referenced_p (x);
2617 /* Return true if register REGNO is a valid index register.
2618 STRICT_P is true if REG_OK_STRICT is in effect. */
2620 bool
2621 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2623 if (!HARD_REGISTER_NUM_P (regno))
2625 if (!strict_p)
2626 return true;
2628 if (!reg_renumber)
2629 return false;
2631 regno = reg_renumber[regno];
2633 return GP_REGNUM_P (regno);
2636 /* Return true if register REGNO is a valid base register for mode MODE.
2637 STRICT_P is true if REG_OK_STRICT is in effect. */
2639 bool
2640 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2642 if (!HARD_REGISTER_NUM_P (regno))
2644 if (!strict_p)
2645 return true;
2647 if (!reg_renumber)
2648 return false;
2650 regno = reg_renumber[regno];
2653 /* The fake registers will be eliminated to either the stack or
2654 hard frame pointer, both of which are usually valid base registers.
2655 Reload deals with the cases where the eliminated form isn't valid. */
2656 return (GP_REGNUM_P (regno)
2657 || regno == SP_REGNUM
2658 || regno == FRAME_POINTER_REGNUM
2659 || regno == ARG_POINTER_REGNUM);
2662 /* Return true if X is a valid base register for mode MODE.
2663 STRICT_P is true if REG_OK_STRICT is in effect. */
2665 static bool
2666 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2668 if (!strict_p && GET_CODE (x) == SUBREG)
2669 x = SUBREG_REG (x);
2671 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2674 /* Return true if address offset is a valid index. If it is, fill in INFO
2675 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2677 static bool
2678 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2679 enum machine_mode mode, bool strict_p)
2681 enum aarch64_address_type type;
2682 rtx index;
2683 int shift;
2685 /* (reg:P) */
2686 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2687 && GET_MODE (x) == Pmode)
2689 type = ADDRESS_REG_REG;
2690 index = x;
2691 shift = 0;
2693 /* (sign_extend:DI (reg:SI)) */
2694 else if ((GET_CODE (x) == SIGN_EXTEND
2695 || GET_CODE (x) == ZERO_EXTEND)
2696 && GET_MODE (x) == DImode
2697 && GET_MODE (XEXP (x, 0)) == SImode)
2699 type = (GET_CODE (x) == SIGN_EXTEND)
2700 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2701 index = XEXP (x, 0);
2702 shift = 0;
2704 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2705 else if (GET_CODE (x) == MULT
2706 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2707 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2708 && GET_MODE (XEXP (x, 0)) == DImode
2709 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2710 && CONST_INT_P (XEXP (x, 1)))
2712 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2713 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2714 index = XEXP (XEXP (x, 0), 0);
2715 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2717 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2718 else if (GET_CODE (x) == ASHIFT
2719 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2720 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2721 && GET_MODE (XEXP (x, 0)) == DImode
2722 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2723 && CONST_INT_P (XEXP (x, 1)))
2725 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2726 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2727 index = XEXP (XEXP (x, 0), 0);
2728 shift = INTVAL (XEXP (x, 1));
2730 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2731 else if ((GET_CODE (x) == SIGN_EXTRACT
2732 || GET_CODE (x) == ZERO_EXTRACT)
2733 && GET_MODE (x) == DImode
2734 && GET_CODE (XEXP (x, 0)) == MULT
2735 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2736 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2738 type = (GET_CODE (x) == SIGN_EXTRACT)
2739 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2740 index = XEXP (XEXP (x, 0), 0);
2741 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2742 if (INTVAL (XEXP (x, 1)) != 32 + shift
2743 || INTVAL (XEXP (x, 2)) != 0)
2744 shift = -1;
2746 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2747 (const_int 0xffffffff<<shift)) */
2748 else if (GET_CODE (x) == AND
2749 && GET_MODE (x) == DImode
2750 && GET_CODE (XEXP (x, 0)) == MULT
2751 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2752 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2753 && CONST_INT_P (XEXP (x, 1)))
2755 type = ADDRESS_REG_UXTW;
2756 index = XEXP (XEXP (x, 0), 0);
2757 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2758 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2759 shift = -1;
2761 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2762 else if ((GET_CODE (x) == SIGN_EXTRACT
2763 || GET_CODE (x) == ZERO_EXTRACT)
2764 && GET_MODE (x) == DImode
2765 && GET_CODE (XEXP (x, 0)) == ASHIFT
2766 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2767 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2769 type = (GET_CODE (x) == SIGN_EXTRACT)
2770 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2771 index = XEXP (XEXP (x, 0), 0);
2772 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2773 if (INTVAL (XEXP (x, 1)) != 32 + shift
2774 || INTVAL (XEXP (x, 2)) != 0)
2775 shift = -1;
2777 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2778 (const_int 0xffffffff<<shift)) */
2779 else if (GET_CODE (x) == AND
2780 && GET_MODE (x) == DImode
2781 && GET_CODE (XEXP (x, 0)) == ASHIFT
2782 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2783 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2784 && CONST_INT_P (XEXP (x, 1)))
2786 type = ADDRESS_REG_UXTW;
2787 index = XEXP (XEXP (x, 0), 0);
2788 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2789 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2790 shift = -1;
2792 /* (mult:P (reg:P) (const_int scale)) */
2793 else if (GET_CODE (x) == MULT
2794 && GET_MODE (x) == Pmode
2795 && GET_MODE (XEXP (x, 0)) == Pmode
2796 && CONST_INT_P (XEXP (x, 1)))
2798 type = ADDRESS_REG_REG;
2799 index = XEXP (x, 0);
2800 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2802 /* (ashift:P (reg:P) (const_int shift)) */
2803 else if (GET_CODE (x) == ASHIFT
2804 && GET_MODE (x) == Pmode
2805 && GET_MODE (XEXP (x, 0)) == Pmode
2806 && CONST_INT_P (XEXP (x, 1)))
2808 type = ADDRESS_REG_REG;
2809 index = XEXP (x, 0);
2810 shift = INTVAL (XEXP (x, 1));
2812 else
2813 return false;
2815 if (GET_CODE (index) == SUBREG)
2816 index = SUBREG_REG (index);
2818 if ((shift == 0 ||
2819 (shift > 0 && shift <= 3
2820 && (1 << shift) == GET_MODE_SIZE (mode)))
2821 && REG_P (index)
2822 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
2824 info->type = type;
2825 info->offset = index;
2826 info->shift = shift;
2827 return true;
2830 return false;
2833 static inline bool
2834 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2836 return (offset >= -64 * GET_MODE_SIZE (mode)
2837 && offset < 64 * GET_MODE_SIZE (mode)
2838 && offset % GET_MODE_SIZE (mode) == 0);
2841 static inline bool
2842 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
2843 HOST_WIDE_INT offset)
2845 return offset >= -256 && offset < 256;
2848 static inline bool
2849 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
2851 return (offset >= 0
2852 && offset < 4096 * GET_MODE_SIZE (mode)
2853 && offset % GET_MODE_SIZE (mode) == 0);
2856 /* Return true if X is a valid address for machine mode MODE. If it is,
2857 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
2858 effect. OUTER_CODE is PARALLEL for a load/store pair. */
2860 static bool
2861 aarch64_classify_address (struct aarch64_address_info *info,
2862 rtx x, enum machine_mode mode,
2863 RTX_CODE outer_code, bool strict_p)
2865 enum rtx_code code = GET_CODE (x);
2866 rtx op0, op1;
2867 bool allow_reg_index_p =
2868 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
2870 /* Don't support anything other than POST_INC or REG addressing for
2871 AdvSIMD. */
2872 if (aarch64_vector_mode_p (mode)
2873 && (code != POST_INC && code != REG))
2874 return false;
2876 switch (code)
2878 case REG:
2879 case SUBREG:
2880 info->type = ADDRESS_REG_IMM;
2881 info->base = x;
2882 info->offset = const0_rtx;
2883 return aarch64_base_register_rtx_p (x, strict_p);
2885 case PLUS:
2886 op0 = XEXP (x, 0);
2887 op1 = XEXP (x, 1);
2888 if (GET_MODE_SIZE (mode) != 0
2889 && CONST_INT_P (op1)
2890 && aarch64_base_register_rtx_p (op0, strict_p))
2892 HOST_WIDE_INT offset = INTVAL (op1);
2894 info->type = ADDRESS_REG_IMM;
2895 info->base = op0;
2896 info->offset = op1;
2898 /* TImode and TFmode values are allowed in both pairs of X
2899 registers and individual Q registers. The available
2900 address modes are:
2901 X,X: 7-bit signed scaled offset
2902 Q: 9-bit signed offset
2903 We conservatively require an offset representable in either mode.
2905 if (mode == TImode || mode == TFmode)
2906 return (offset_7bit_signed_scaled_p (mode, offset)
2907 && offset_9bit_signed_unscaled_p (mode, offset));
2909 if (outer_code == PARALLEL)
2910 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2911 && offset_7bit_signed_scaled_p (mode, offset));
2912 else
2913 return (offset_9bit_signed_unscaled_p (mode, offset)
2914 || offset_12bit_unsigned_scaled_p (mode, offset));
2917 if (allow_reg_index_p)
2919 /* Look for base + (scaled/extended) index register. */
2920 if (aarch64_base_register_rtx_p (op0, strict_p)
2921 && aarch64_classify_index (info, op1, mode, strict_p))
2923 info->base = op0;
2924 return true;
2926 if (aarch64_base_register_rtx_p (op1, strict_p)
2927 && aarch64_classify_index (info, op0, mode, strict_p))
2929 info->base = op1;
2930 return true;
2934 return false;
2936 case POST_INC:
2937 case POST_DEC:
2938 case PRE_INC:
2939 case PRE_DEC:
2940 info->type = ADDRESS_REG_WB;
2941 info->base = XEXP (x, 0);
2942 info->offset = NULL_RTX;
2943 return aarch64_base_register_rtx_p (info->base, strict_p);
2945 case POST_MODIFY:
2946 case PRE_MODIFY:
2947 info->type = ADDRESS_REG_WB;
2948 info->base = XEXP (x, 0);
2949 if (GET_CODE (XEXP (x, 1)) == PLUS
2950 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
2951 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
2952 && aarch64_base_register_rtx_p (info->base, strict_p))
2954 HOST_WIDE_INT offset;
2955 info->offset = XEXP (XEXP (x, 1), 1);
2956 offset = INTVAL (info->offset);
2958 /* TImode and TFmode values are allowed in both pairs of X
2959 registers and individual Q registers. The available
2960 address modes are:
2961 X,X: 7-bit signed scaled offset
2962 Q: 9-bit signed offset
2963 We conservatively require an offset representable in either mode.
2965 if (mode == TImode || mode == TFmode)
2966 return (offset_7bit_signed_scaled_p (mode, offset)
2967 && offset_9bit_signed_unscaled_p (mode, offset));
2969 if (outer_code == PARALLEL)
2970 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
2971 && offset_7bit_signed_scaled_p (mode, offset));
2972 else
2973 return offset_9bit_signed_unscaled_p (mode, offset);
2975 return false;
2977 case CONST:
2978 case SYMBOL_REF:
2979 case LABEL_REF:
2980 /* load literal: pc-relative constant pool entry. Only supported
2981 for SI mode or larger. */
2982 info->type = ADDRESS_SYMBOLIC;
2983 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
2985 rtx sym, addend;
2987 split_const (x, &sym, &addend);
2988 return (GET_CODE (sym) == LABEL_REF
2989 || (GET_CODE (sym) == SYMBOL_REF
2990 && CONSTANT_POOL_ADDRESS_P (sym)));
2992 return false;
2994 case LO_SUM:
2995 info->type = ADDRESS_LO_SUM;
2996 info->base = XEXP (x, 0);
2997 info->offset = XEXP (x, 1);
2998 if (allow_reg_index_p
2999 && aarch64_base_register_rtx_p (info->base, strict_p))
3001 rtx sym, offs;
3002 split_const (info->offset, &sym, &offs);
3003 if (GET_CODE (sym) == SYMBOL_REF
3004 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3005 == SYMBOL_SMALL_ABSOLUTE))
3007 /* The symbol and offset must be aligned to the access size. */
3008 unsigned int align;
3009 unsigned int ref_size;
3011 if (CONSTANT_POOL_ADDRESS_P (sym))
3012 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3013 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3015 tree exp = SYMBOL_REF_DECL (sym);
3016 align = TYPE_ALIGN (TREE_TYPE (exp));
3017 align = CONSTANT_ALIGNMENT (exp, align);
3019 else if (SYMBOL_REF_DECL (sym))
3020 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3021 else
3022 align = BITS_PER_UNIT;
3024 ref_size = GET_MODE_SIZE (mode);
3025 if (ref_size == 0)
3026 ref_size = GET_MODE_SIZE (DImode);
3028 return ((INTVAL (offs) & (ref_size - 1)) == 0
3029 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3032 return false;
3034 default:
3035 return false;
3039 bool
3040 aarch64_symbolic_address_p (rtx x)
3042 rtx offset;
3044 split_const (x, &x, &offset);
3045 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3048 /* Classify the base of symbolic expression X, given that X appears in
3049 context CONTEXT. */
3051 enum aarch64_symbol_type
3052 aarch64_classify_symbolic_expression (rtx x,
3053 enum aarch64_symbol_context context)
3055 rtx offset;
3057 split_const (x, &x, &offset);
3058 return aarch64_classify_symbol (x, context);
3062 /* Return TRUE if X is a legitimate address for accessing memory in
3063 mode MODE. */
3064 static bool
3065 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3067 struct aarch64_address_info addr;
3069 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3072 /* Return TRUE if X is a legitimate address for accessing memory in
3073 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3074 pair operation. */
3075 bool
3076 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3077 RTX_CODE outer_code, bool strict_p)
3079 struct aarch64_address_info addr;
3081 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3084 /* Return TRUE if rtx X is immediate constant 0.0 */
3085 bool
3086 aarch64_float_const_zero_rtx_p (rtx x)
3088 REAL_VALUE_TYPE r;
3090 if (GET_MODE (x) == VOIDmode)
3091 return false;
3093 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3094 if (REAL_VALUE_MINUS_ZERO (r))
3095 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3096 return REAL_VALUES_EQUAL (r, dconst0);
3099 /* Return the fixed registers used for condition codes. */
3101 static bool
3102 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3104 *p1 = CC_REGNUM;
3105 *p2 = INVALID_REGNUM;
3106 return true;
3109 enum machine_mode
3110 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3112 /* All floating point compares return CCFP if it is an equality
3113 comparison, and CCFPE otherwise. */
3114 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3116 switch (code)
3118 case EQ:
3119 case NE:
3120 case UNORDERED:
3121 case ORDERED:
3122 case UNLT:
3123 case UNLE:
3124 case UNGT:
3125 case UNGE:
3126 case UNEQ:
3127 case LTGT:
3128 return CCFPmode;
3130 case LT:
3131 case LE:
3132 case GT:
3133 case GE:
3134 return CCFPEmode;
3136 default:
3137 gcc_unreachable ();
3141 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3142 && y == const0_rtx
3143 && (code == EQ || code == NE || code == LT || code == GE)
3144 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3145 || GET_CODE (x) == NEG))
3146 return CC_NZmode;
3148 /* A compare with a shifted operand. Because of canonicalization,
3149 the comparison will have to be swapped when we emit the assembly
3150 code. */
3151 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3152 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3153 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3154 || GET_CODE (x) == LSHIFTRT
3155 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3156 return CC_SWPmode;
3158 /* A compare of a mode narrower than SI mode against zero can be done
3159 by extending the value in the comparison. */
3160 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3161 && y == const0_rtx)
3162 /* Only use sign-extension if we really need it. */
3163 return ((code == GT || code == GE || code == LE || code == LT)
3164 ? CC_SESWPmode : CC_ZESWPmode);
3166 /* For everything else, return CCmode. */
3167 return CCmode;
3170 static unsigned
3171 aarch64_get_condition_code (rtx x)
3173 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3174 enum rtx_code comp_code = GET_CODE (x);
3176 if (GET_MODE_CLASS (mode) != MODE_CC)
3177 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3179 switch (mode)
3181 case CCFPmode:
3182 case CCFPEmode:
3183 switch (comp_code)
3185 case GE: return AARCH64_GE;
3186 case GT: return AARCH64_GT;
3187 case LE: return AARCH64_LS;
3188 case LT: return AARCH64_MI;
3189 case NE: return AARCH64_NE;
3190 case EQ: return AARCH64_EQ;
3191 case ORDERED: return AARCH64_VC;
3192 case UNORDERED: return AARCH64_VS;
3193 case UNLT: return AARCH64_LT;
3194 case UNLE: return AARCH64_LE;
3195 case UNGT: return AARCH64_HI;
3196 case UNGE: return AARCH64_PL;
3197 default: gcc_unreachable ();
3199 break;
3201 case CCmode:
3202 switch (comp_code)
3204 case NE: return AARCH64_NE;
3205 case EQ: return AARCH64_EQ;
3206 case GE: return AARCH64_GE;
3207 case GT: return AARCH64_GT;
3208 case LE: return AARCH64_LE;
3209 case LT: return AARCH64_LT;
3210 case GEU: return AARCH64_CS;
3211 case GTU: return AARCH64_HI;
3212 case LEU: return AARCH64_LS;
3213 case LTU: return AARCH64_CC;
3214 default: gcc_unreachable ();
3216 break;
3218 case CC_SWPmode:
3219 case CC_ZESWPmode:
3220 case CC_SESWPmode:
3221 switch (comp_code)
3223 case NE: return AARCH64_NE;
3224 case EQ: return AARCH64_EQ;
3225 case GE: return AARCH64_LE;
3226 case GT: return AARCH64_LT;
3227 case LE: return AARCH64_GE;
3228 case LT: return AARCH64_GT;
3229 case GEU: return AARCH64_LS;
3230 case GTU: return AARCH64_CC;
3231 case LEU: return AARCH64_CS;
3232 case LTU: return AARCH64_HI;
3233 default: gcc_unreachable ();
3235 break;
3237 case CC_NZmode:
3238 switch (comp_code)
3240 case NE: return AARCH64_NE;
3241 case EQ: return AARCH64_EQ;
3242 case GE: return AARCH64_PL;
3243 case LT: return AARCH64_MI;
3244 default: gcc_unreachable ();
3246 break;
3248 default:
3249 gcc_unreachable ();
3250 break;
3254 static unsigned
3255 bit_count (unsigned HOST_WIDE_INT value)
3257 unsigned count = 0;
3259 while (value)
3261 count++;
3262 value &= value - 1;
3265 return count;
3268 void
3269 aarch64_print_operand (FILE *f, rtx x, char code)
3271 switch (code)
3273 case 'e':
3274 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3276 int n;
3278 if (GET_CODE (x) != CONST_INT
3279 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3281 output_operand_lossage ("invalid operand for '%%%c'", code);
3282 return;
3285 switch (n)
3287 case 3:
3288 fputc ('b', f);
3289 break;
3290 case 4:
3291 fputc ('h', f);
3292 break;
3293 case 5:
3294 fputc ('w', f);
3295 break;
3296 default:
3297 output_operand_lossage ("invalid operand for '%%%c'", code);
3298 return;
3301 break;
3303 case 'p':
3305 int n;
3307 /* Print N such that 2^N == X. */
3308 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3310 output_operand_lossage ("invalid operand for '%%%c'", code);
3311 return;
3314 asm_fprintf (f, "%d", n);
3316 break;
3318 case 'P':
3319 /* Print the number of non-zero bits in X (a const_int). */
3320 if (GET_CODE (x) != CONST_INT)
3322 output_operand_lossage ("invalid operand for '%%%c'", code);
3323 return;
3326 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3327 break;
3329 case 'H':
3330 /* Print the higher numbered register of a pair (TImode) of regs. */
3331 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3333 output_operand_lossage ("invalid operand for '%%%c'", code);
3334 return;
3337 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3338 break;
3340 case 'Q':
3341 /* Print the least significant register of a pair (TImode) of regs. */
3342 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3344 output_operand_lossage ("invalid operand for '%%%c'", code);
3345 return;
3347 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)]);
3348 break;
3350 case 'R':
3351 /* Print the most significant register of a pair (TImode) of regs. */
3352 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3354 output_operand_lossage ("invalid operand for '%%%c'", code);
3355 return;
3357 asm_fprintf (f, "%s", reg_names [REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1)]);
3358 break;
3360 case 'm':
3361 /* Print a condition (eq, ne, etc). */
3363 /* CONST_TRUE_RTX means always -- that's the default. */
3364 if (x == const_true_rtx)
3365 return;
3367 if (!COMPARISON_P (x))
3369 output_operand_lossage ("invalid operand for '%%%c'", code);
3370 return;
3373 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3374 break;
3376 case 'M':
3377 /* Print the inverse of a condition (eq <-> ne, etc). */
3379 /* CONST_TRUE_RTX means never -- that's the default. */
3380 if (x == const_true_rtx)
3382 fputs ("nv", f);
3383 return;
3386 if (!COMPARISON_P (x))
3388 output_operand_lossage ("invalid operand for '%%%c'", code);
3389 return;
3392 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3393 (aarch64_get_condition_code (x))], f);
3394 break;
3396 case 'b':
3397 case 'h':
3398 case 's':
3399 case 'd':
3400 case 'q':
3401 /* Print a scalar FP/SIMD register name. */
3402 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3404 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3405 return;
3407 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3408 break;
3410 case 'S':
3411 case 'T':
3412 case 'U':
3413 case 'V':
3414 /* Print the first FP/SIMD register name in a list. */
3415 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3417 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3418 return;
3420 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3421 break;
3423 case 'X':
3424 /* Print bottom 16 bits of integer constant in hex. */
3425 if (GET_CODE (x) != CONST_INT)
3427 output_operand_lossage ("invalid operand for '%%%c'", code);
3428 return;
3430 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3431 break;
3433 case 'w':
3434 case 'x':
3435 /* Print a general register name or the zero register (32-bit or
3436 64-bit). */
3437 if (x == const0_rtx
3438 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3440 asm_fprintf (f, "%czr", code);
3441 break;
3444 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3446 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3447 break;
3450 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3452 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3453 break;
3456 /* Fall through */
3458 case 0:
3459 /* Print a normal operand, if it's a general register, then we
3460 assume DImode. */
3461 if (x == NULL)
3463 output_operand_lossage ("missing operand");
3464 return;
3467 switch (GET_CODE (x))
3469 case REG:
3470 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3471 break;
3473 case MEM:
3474 aarch64_memory_reference_mode = GET_MODE (x);
3475 output_address (XEXP (x, 0));
3476 break;
3478 case LABEL_REF:
3479 case SYMBOL_REF:
3480 output_addr_const (asm_out_file, x);
3481 break;
3483 case CONST_INT:
3484 asm_fprintf (f, "%wd", INTVAL (x));
3485 break;
3487 case CONST_VECTOR:
3488 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3490 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3491 HOST_WIDE_INT_MIN,
3492 HOST_WIDE_INT_MAX));
3493 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3495 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3497 fputc ('0', f);
3499 else
3500 gcc_unreachable ();
3501 break;
3503 case CONST_DOUBLE:
3504 /* CONST_DOUBLE can represent a double-width integer.
3505 In this case, the mode of x is VOIDmode. */
3506 if (GET_MODE (x) == VOIDmode)
3507 ; /* Do Nothing. */
3508 else if (aarch64_float_const_zero_rtx_p (x))
3510 fputc ('0', f);
3511 break;
3513 else if (aarch64_float_const_representable_p (x))
3515 #define buf_size 20
3516 char float_buf[buf_size] = {'\0'};
3517 REAL_VALUE_TYPE r;
3518 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3519 real_to_decimal_for_mode (float_buf, &r,
3520 buf_size, buf_size,
3521 1, GET_MODE (x));
3522 asm_fprintf (asm_out_file, "%s", float_buf);
3523 break;
3524 #undef buf_size
3526 output_operand_lossage ("invalid constant");
3527 return;
3528 default:
3529 output_operand_lossage ("invalid operand");
3530 return;
3532 break;
3534 case 'A':
3535 if (GET_CODE (x) == HIGH)
3536 x = XEXP (x, 0);
3538 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3540 case SYMBOL_SMALL_GOT:
3541 asm_fprintf (asm_out_file, ":got:");
3542 break;
3544 case SYMBOL_SMALL_TLSGD:
3545 asm_fprintf (asm_out_file, ":tlsgd:");
3546 break;
3548 case SYMBOL_SMALL_TLSDESC:
3549 asm_fprintf (asm_out_file, ":tlsdesc:");
3550 break;
3552 case SYMBOL_SMALL_GOTTPREL:
3553 asm_fprintf (asm_out_file, ":gottprel:");
3554 break;
3556 case SYMBOL_SMALL_TPREL:
3557 asm_fprintf (asm_out_file, ":tprel:");
3558 break;
3560 default:
3561 break;
3563 output_addr_const (asm_out_file, x);
3564 break;
3566 case 'L':
3567 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3569 case SYMBOL_SMALL_GOT:
3570 asm_fprintf (asm_out_file, ":lo12:");
3571 break;
3573 case SYMBOL_SMALL_TLSGD:
3574 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3575 break;
3577 case SYMBOL_SMALL_TLSDESC:
3578 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3579 break;
3581 case SYMBOL_SMALL_GOTTPREL:
3582 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3583 break;
3585 case SYMBOL_SMALL_TPREL:
3586 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3587 break;
3589 default:
3590 break;
3592 output_addr_const (asm_out_file, x);
3593 break;
3595 case 'G':
3597 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3599 case SYMBOL_SMALL_TPREL:
3600 asm_fprintf (asm_out_file, ":tprel_hi12:");
3601 break;
3602 default:
3603 break;
3605 output_addr_const (asm_out_file, x);
3606 break;
3608 default:
3609 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3610 return;
3614 void
3615 aarch64_print_operand_address (FILE *f, rtx x)
3617 struct aarch64_address_info addr;
3619 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3620 MEM, true))
3621 switch (addr.type)
3623 case ADDRESS_REG_IMM:
3624 if (addr.offset == const0_rtx)
3625 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3626 else
3627 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3628 INTVAL (addr.offset));
3629 return;
3631 case ADDRESS_REG_REG:
3632 if (addr.shift == 0)
3633 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3634 reg_names [REGNO (addr.offset)]);
3635 else
3636 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3637 reg_names [REGNO (addr.offset)], addr.shift);
3638 return;
3640 case ADDRESS_REG_UXTW:
3641 if (addr.shift == 0)
3642 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3643 REGNO (addr.offset) - R0_REGNUM);
3644 else
3645 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3646 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3647 return;
3649 case ADDRESS_REG_SXTW:
3650 if (addr.shift == 0)
3651 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3652 REGNO (addr.offset) - R0_REGNUM);
3653 else
3654 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3655 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3656 return;
3658 case ADDRESS_REG_WB:
3659 switch (GET_CODE (x))
3661 case PRE_INC:
3662 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3663 GET_MODE_SIZE (aarch64_memory_reference_mode));
3664 return;
3665 case POST_INC:
3666 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3667 GET_MODE_SIZE (aarch64_memory_reference_mode));
3668 return;
3669 case PRE_DEC:
3670 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3671 GET_MODE_SIZE (aarch64_memory_reference_mode));
3672 return;
3673 case POST_DEC:
3674 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3675 GET_MODE_SIZE (aarch64_memory_reference_mode));
3676 return;
3677 case PRE_MODIFY:
3678 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3679 INTVAL (addr.offset));
3680 return;
3681 case POST_MODIFY:
3682 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3683 INTVAL (addr.offset));
3684 return;
3685 default:
3686 break;
3688 break;
3690 case ADDRESS_LO_SUM:
3691 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3692 output_addr_const (f, addr.offset);
3693 asm_fprintf (f, "]");
3694 return;
3696 case ADDRESS_SYMBOLIC:
3697 break;
3700 output_addr_const (f, x);
3703 void
3704 aarch64_function_profiler (FILE *f ATTRIBUTE_UNUSED,
3705 int labelno ATTRIBUTE_UNUSED)
3707 sorry ("function profiling");
3710 bool
3711 aarch64_label_mentioned_p (rtx x)
3713 const char *fmt;
3714 int i;
3716 if (GET_CODE (x) == LABEL_REF)
3717 return true;
3719 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3720 referencing instruction, but they are constant offsets, not
3721 symbols. */
3722 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3723 return false;
3725 fmt = GET_RTX_FORMAT (GET_CODE (x));
3726 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3728 if (fmt[i] == 'E')
3730 int j;
3732 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3733 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3734 return 1;
3736 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3737 return 1;
3740 return 0;
3743 /* Implement REGNO_REG_CLASS. */
3745 enum reg_class
3746 aarch64_regno_regclass (unsigned regno)
3748 if (GP_REGNUM_P (regno))
3749 return CORE_REGS;
3751 if (regno == SP_REGNUM)
3752 return STACK_REG;
3754 if (regno == FRAME_POINTER_REGNUM
3755 || regno == ARG_POINTER_REGNUM)
3756 return CORE_REGS;
3758 if (FP_REGNUM_P (regno))
3759 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3761 return NO_REGS;
3764 /* Try a machine-dependent way of reloading an illegitimate address
3765 operand. If we find one, push the reload and return the new rtx. */
3768 aarch64_legitimize_reload_address (rtx *x_p,
3769 enum machine_mode mode,
3770 int opnum, int type,
3771 int ind_levels ATTRIBUTE_UNUSED)
3773 rtx x = *x_p;
3775 /* Do not allow mem (plus (reg, const)) if vector mode. */
3776 if (aarch64_vector_mode_p (mode)
3777 && GET_CODE (x) == PLUS
3778 && REG_P (XEXP (x, 0))
3779 && CONST_INT_P (XEXP (x, 1)))
3781 rtx orig_rtx = x;
3782 x = copy_rtx (x);
3783 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3784 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3785 opnum, (enum reload_type) type);
3786 return x;
3789 /* We must recognize output that we have already generated ourselves. */
3790 if (GET_CODE (x) == PLUS
3791 && GET_CODE (XEXP (x, 0)) == PLUS
3792 && REG_P (XEXP (XEXP (x, 0), 0))
3793 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3794 && CONST_INT_P (XEXP (x, 1)))
3796 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3797 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3798 opnum, (enum reload_type) type);
3799 return x;
3802 /* We wish to handle large displacements off a base register by splitting
3803 the addend across an add and the mem insn. This can cut the number of
3804 extra insns needed from 3 to 1. It is only useful for load/store of a
3805 single register with 12 bit offset field. */
3806 if (GET_CODE (x) == PLUS
3807 && REG_P (XEXP (x, 0))
3808 && CONST_INT_P (XEXP (x, 1))
3809 && HARD_REGISTER_P (XEXP (x, 0))
3810 && mode != TImode
3811 && mode != TFmode
3812 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
3814 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
3815 HOST_WIDE_INT low = val & 0xfff;
3816 HOST_WIDE_INT high = val - low;
3817 HOST_WIDE_INT offs;
3818 rtx cst;
3820 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
3821 BLKmode alignment. */
3822 if (GET_MODE_SIZE (mode) == 0)
3823 return NULL_RTX;
3825 offs = low % GET_MODE_SIZE (mode);
3827 /* Align misaligned offset by adjusting high part to compensate. */
3828 if (offs != 0)
3830 if (aarch64_uimm12_shift (high + offs))
3832 /* Align down. */
3833 low = low - offs;
3834 high = high + offs;
3836 else
3838 /* Align up. */
3839 offs = GET_MODE_SIZE (mode) - offs;
3840 low = low + offs;
3841 high = high + (low & 0x1000) - offs;
3842 low &= 0xfff;
3846 /* Check for overflow. */
3847 if (high + low != val)
3848 return NULL_RTX;
3850 cst = GEN_INT (high);
3851 if (!aarch64_uimm12_shift (high))
3852 cst = force_const_mem (Pmode, cst);
3854 /* Reload high part into base reg, leaving the low part
3855 in the mem instruction. */
3856 x = gen_rtx_PLUS (Pmode,
3857 gen_rtx_PLUS (Pmode, XEXP (x, 0), cst),
3858 GEN_INT (low));
3860 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
3861 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
3862 opnum, (enum reload_type) type);
3863 return x;
3866 return NULL_RTX;
3870 static reg_class_t
3871 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
3872 reg_class_t rclass,
3873 enum machine_mode mode,
3874 secondary_reload_info *sri)
3876 /* Address expressions of the form PLUS (SP, large_offset) need two
3877 scratch registers, one for the constant, and one for holding a
3878 copy of SP, since SP cannot be used on the RHS of an add-reg
3879 instruction. */
3880 if (mode == DImode
3881 && GET_CODE (x) == PLUS
3882 && XEXP (x, 0) == stack_pointer_rtx
3883 && CONST_INT_P (XEXP (x, 1))
3884 && !aarch64_uimm12_shift (INTVAL (XEXP (x, 1))))
3886 sri->icode = CODE_FOR_reload_sp_immediate;
3887 return NO_REGS;
3890 /* Without the TARGET_SIMD instructions we cannot move a Q register
3891 to a Q register directly. We need a scratch. */
3892 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
3893 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
3894 && reg_class_subset_p (rclass, FP_REGS))
3896 if (mode == TFmode)
3897 sri->icode = CODE_FOR_aarch64_reload_movtf;
3898 else if (mode == TImode)
3899 sri->icode = CODE_FOR_aarch64_reload_movti;
3900 return NO_REGS;
3903 /* A TFmode or TImode memory access should be handled via an FP_REGS
3904 because AArch64 has richer addressing modes for LDR/STR instructions
3905 than LDP/STP instructions. */
3906 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
3907 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
3908 return FP_REGS;
3910 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
3911 return CORE_REGS;
3913 return NO_REGS;
3916 static bool
3917 aarch64_can_eliminate (const int from, const int to)
3919 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
3920 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
3922 if (frame_pointer_needed)
3924 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3925 return true;
3926 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3927 return false;
3928 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3929 && !cfun->calls_alloca)
3930 return true;
3931 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3932 return true;
3933 return false;
3935 else
3937 /* If we decided that we didn't need a leaf frame pointer but then used
3938 LR in the function, then we'll want a frame pointer after all, so
3939 prevent this elimination to ensure a frame pointer is used.
3941 NOTE: the original value of flag_omit_frame_pointer gets trashed
3942 IFF flag_omit_leaf_frame_pointer is true, so we check the value
3943 of faked_omit_frame_pointer here (which is true when we always
3944 wish to keep non-leaf frame pointers but only wish to keep leaf frame
3945 pointers when LR is clobbered). */
3946 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
3947 && df_regs_ever_live_p (LR_REGNUM)
3948 && faked_omit_frame_pointer)
3949 return false;
3952 return true;
3955 HOST_WIDE_INT
3956 aarch64_initial_elimination_offset (unsigned from, unsigned to)
3958 HOST_WIDE_INT frame_size;
3959 HOST_WIDE_INT offset;
3961 aarch64_layout_frame ();
3962 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
3963 + crtl->outgoing_args_size
3964 + cfun->machine->saved_varargs_size);
3966 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
3967 offset = frame_size;
3969 if (to == HARD_FRAME_POINTER_REGNUM)
3971 if (from == ARG_POINTER_REGNUM)
3972 return offset - crtl->outgoing_args_size;
3974 if (from == FRAME_POINTER_REGNUM)
3975 return cfun->machine->frame.saved_regs_size;
3978 if (to == STACK_POINTER_REGNUM)
3980 if (from == FRAME_POINTER_REGNUM)
3982 HOST_WIDE_INT elim = crtl->outgoing_args_size
3983 + cfun->machine->frame.saved_regs_size
3984 - cfun->machine->frame.fp_lr_offset;
3985 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
3986 return elim;
3990 return offset;
3994 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
3995 previous frame. */
3998 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4000 if (count != 0)
4001 return const0_rtx;
4002 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4006 static void
4007 aarch64_asm_trampoline_template (FILE *f)
4009 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4010 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4011 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4012 assemble_aligned_integer (4, const0_rtx);
4013 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4014 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
4017 unsigned
4018 aarch64_trampoline_size (void)
4020 return 32; /* 3 insns + padding + 2 dwords. */
4023 static void
4024 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4026 rtx fnaddr, mem, a_tramp;
4028 /* Don't need to copy the trailing D-words, we fill those in below. */
4029 emit_block_move (m_tramp, assemble_trampoline_template (),
4030 GEN_INT (TRAMPOLINE_SIZE - 16), BLOCK_OP_NORMAL);
4031 mem = adjust_address (m_tramp, DImode, 16);
4032 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4033 emit_move_insn (mem, fnaddr);
4035 mem = adjust_address (m_tramp, DImode, 24);
4036 emit_move_insn (mem, chain_value);
4038 /* XXX We should really define a "clear_cache" pattern and use
4039 gen_clear_cache(). */
4040 a_tramp = XEXP (m_tramp, 0);
4041 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4042 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
4043 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
4046 static unsigned char
4047 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4049 switch (regclass)
4051 case CORE_REGS:
4052 case POINTER_REGS:
4053 case GENERAL_REGS:
4054 case ALL_REGS:
4055 case FP_REGS:
4056 case FP_LO_REGS:
4057 return
4058 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4059 (GET_MODE_SIZE (mode) + 7) / 8;
4060 case STACK_REG:
4061 return 1;
4063 case NO_REGS:
4064 return 0;
4066 default:
4067 break;
4069 gcc_unreachable ();
4072 static reg_class_t
4073 aarch64_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
4075 return ((regclass == POINTER_REGS || regclass == STACK_REG)
4076 ? GENERAL_REGS : regclass);
4079 void
4080 aarch64_asm_output_labelref (FILE* f, const char *name)
4082 asm_fprintf (f, "%U%s", name);
4085 static void
4086 aarch64_elf_asm_constructor (rtx symbol, int priority)
4088 if (priority == DEFAULT_INIT_PRIORITY)
4089 default_ctor_section_asm_out_constructor (symbol, priority);
4090 else
4092 section *s;
4093 char buf[18];
4094 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4095 s = get_section (buf, SECTION_WRITE, NULL);
4096 switch_to_section (s);
4097 assemble_align (POINTER_SIZE);
4098 fputs ("\t.dword\t", asm_out_file);
4099 output_addr_const (asm_out_file, symbol);
4100 fputc ('\n', asm_out_file);
4104 static void
4105 aarch64_elf_asm_destructor (rtx symbol, int priority)
4107 if (priority == DEFAULT_INIT_PRIORITY)
4108 default_dtor_section_asm_out_destructor (symbol, priority);
4109 else
4111 section *s;
4112 char buf[18];
4113 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4114 s = get_section (buf, SECTION_WRITE, NULL);
4115 switch_to_section (s);
4116 assemble_align (POINTER_SIZE);
4117 fputs ("\t.dword\t", asm_out_file);
4118 output_addr_const (asm_out_file, symbol);
4119 fputc ('\n', asm_out_file);
4123 const char*
4124 aarch64_output_casesi (rtx *operands)
4126 char buf[100];
4127 char label[100];
4128 rtx diff_vec = PATTERN (next_real_insn (operands[2]));
4129 int index;
4130 static const char *const patterns[4][2] =
4133 "ldrb\t%w3, [%0,%w1,uxtw]",
4134 "add\t%3, %4, %w3, sxtb #2"
4137 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4138 "add\t%3, %4, %w3, sxth #2"
4141 "ldr\t%w3, [%0,%w1,uxtw #2]",
4142 "add\t%3, %4, %w3, sxtw #2"
4144 /* We assume that DImode is only generated when not optimizing and
4145 that we don't really need 64-bit address offsets. That would
4146 imply an object file with 8GB of code in a single function! */
4148 "ldr\t%w3, [%0,%w1,uxtw #2]",
4149 "add\t%3, %4, %w3, sxtw #2"
4153 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4155 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4157 gcc_assert (index >= 0 && index <= 3);
4159 /* Need to implement table size reduction, by chaning the code below. */
4160 output_asm_insn (patterns[index][0], operands);
4161 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4162 snprintf (buf, sizeof (buf),
4163 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4164 output_asm_insn (buf, operands);
4165 output_asm_insn (patterns[index][1], operands);
4166 output_asm_insn ("br\t%3", operands);
4167 assemble_label (asm_out_file, label);
4168 return "";
4172 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4173 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4174 operator. */
4177 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4179 if (shift >= 0 && shift <= 3)
4181 int size;
4182 for (size = 8; size <= 32; size *= 2)
4184 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4185 if (mask == bits << shift)
4186 return size;
4189 return 0;
4192 static bool
4193 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4194 const_rtx x ATTRIBUTE_UNUSED)
4196 /* We can't use blocks for constants when we're using a per-function
4197 constant pool. */
4198 return false;
4201 static section *
4202 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4203 rtx x ATTRIBUTE_UNUSED,
4204 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4206 /* Force all constant pool entries into the current function section. */
4207 return function_section (current_function_decl);
4211 /* Costs. */
4213 /* Helper function for rtx cost calculation. Strip a shift expression
4214 from X. Returns the inner operand if successful, or the original
4215 expression on failure. */
4216 static rtx
4217 aarch64_strip_shift (rtx x)
4219 rtx op = x;
4221 if ((GET_CODE (op) == ASHIFT
4222 || GET_CODE (op) == ASHIFTRT
4223 || GET_CODE (op) == LSHIFTRT)
4224 && CONST_INT_P (XEXP (op, 1)))
4225 return XEXP (op, 0);
4227 if (GET_CODE (op) == MULT
4228 && CONST_INT_P (XEXP (op, 1))
4229 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4230 return XEXP (op, 0);
4232 return x;
4235 /* Helper function for rtx cost calculation. Strip a shift or extend
4236 expression from X. Returns the inner operand if successful, or the
4237 original expression on failure. We deal with a number of possible
4238 canonicalization variations here. */
4239 static rtx
4240 aarch64_strip_shift_or_extend (rtx x)
4242 rtx op = x;
4244 /* Zero and sign extraction of a widened value. */
4245 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4246 && XEXP (op, 2) == const0_rtx
4247 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4248 XEXP (op, 1)))
4249 return XEXP (XEXP (op, 0), 0);
4251 /* It can also be represented (for zero-extend) as an AND with an
4252 immediate. */
4253 if (GET_CODE (op) == AND
4254 && GET_CODE (XEXP (op, 0)) == MULT
4255 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4256 && CONST_INT_P (XEXP (op, 1))
4257 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4258 INTVAL (XEXP (op, 1))) != 0)
4259 return XEXP (XEXP (op, 0), 0);
4261 /* Now handle extended register, as this may also have an optional
4262 left shift by 1..4. */
4263 if (GET_CODE (op) == ASHIFT
4264 && CONST_INT_P (XEXP (op, 1))
4265 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4266 op = XEXP (op, 0);
4268 if (GET_CODE (op) == ZERO_EXTEND
4269 || GET_CODE (op) == SIGN_EXTEND)
4270 op = XEXP (op, 0);
4272 if (op != x)
4273 return op;
4275 return aarch64_strip_shift (x);
4278 /* Calculate the cost of calculating X, storing it in *COST. Result
4279 is true if the total cost of the operation has now been calculated. */
4280 static bool
4281 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4282 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4284 rtx op0, op1;
4285 const struct cpu_rtx_cost_table *extra_cost
4286 = aarch64_tune_params->insn_extra_cost;
4288 switch (code)
4290 case SET:
4291 op0 = SET_DEST (x);
4292 op1 = SET_SRC (x);
4294 switch (GET_CODE (op0))
4296 case MEM:
4297 if (speed)
4298 *cost += extra_cost->memory_store;
4300 if (op1 != const0_rtx)
4301 *cost += rtx_cost (op1, SET, 1, speed);
4302 return true;
4304 case SUBREG:
4305 if (! REG_P (SUBREG_REG (op0)))
4306 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4307 /* Fall through. */
4308 case REG:
4309 /* Cost is just the cost of the RHS of the set. */
4310 *cost += rtx_cost (op1, SET, 1, true);
4311 return true;
4313 case ZERO_EXTRACT: /* Bit-field insertion. */
4314 case SIGN_EXTRACT:
4315 /* Strip any redundant widening of the RHS to meet the width of
4316 the target. */
4317 if (GET_CODE (op1) == SUBREG)
4318 op1 = SUBREG_REG (op1);
4319 if ((GET_CODE (op1) == ZERO_EXTEND
4320 || GET_CODE (op1) == SIGN_EXTEND)
4321 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4322 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4323 >= INTVAL (XEXP (op0, 1))))
4324 op1 = XEXP (op1, 0);
4325 *cost += rtx_cost (op1, SET, 1, speed);
4326 return true;
4328 default:
4329 break;
4331 return false;
4333 case MEM:
4334 if (speed)
4335 *cost += extra_cost->memory_load;
4337 return true;
4339 case NEG:
4340 op0 = CONST0_RTX (GET_MODE (x));
4341 op1 = XEXP (x, 0);
4342 goto cost_minus;
4344 case COMPARE:
4345 op0 = XEXP (x, 0);
4346 op1 = XEXP (x, 1);
4348 if (op1 == const0_rtx
4349 && GET_CODE (op0) == AND)
4351 x = op0;
4352 goto cost_logic;
4355 /* Comparisons can work if the order is swapped.
4356 Canonicalization puts the more complex operation first, but
4357 we want it in op1. */
4358 if (! (REG_P (op0)
4359 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4361 op0 = XEXP (x, 1);
4362 op1 = XEXP (x, 0);
4364 goto cost_minus;
4366 case MINUS:
4367 op0 = XEXP (x, 0);
4368 op1 = XEXP (x, 1);
4370 cost_minus:
4371 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4372 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4373 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4375 if (op0 != const0_rtx)
4376 *cost += rtx_cost (op0, MINUS, 0, speed);
4378 if (CONST_INT_P (op1))
4380 if (!aarch64_uimm12_shift (INTVAL (op1)))
4381 *cost += rtx_cost (op1, MINUS, 1, speed);
4383 else
4385 op1 = aarch64_strip_shift_or_extend (op1);
4386 *cost += rtx_cost (op1, MINUS, 1, speed);
4388 return true;
4391 return false;
4393 case PLUS:
4394 op0 = XEXP (x, 0);
4395 op1 = XEXP (x, 1);
4397 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4399 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4401 *cost += rtx_cost (op0, PLUS, 0, speed);
4403 else
4405 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4407 if (new_op0 == op0
4408 && GET_CODE (op0) == MULT)
4410 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4411 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4412 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4413 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4415 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4416 speed)
4417 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4418 speed)
4419 + rtx_cost (op1, PLUS, 1, speed));
4420 if (speed)
4421 *cost += extra_cost->int_multiply_extend_add;
4422 return true;
4424 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4425 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4426 + rtx_cost (op1, PLUS, 1, speed));
4428 if (speed)
4429 *cost += extra_cost->int_multiply_add;
4432 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4433 + rtx_cost (op1, PLUS, 1, speed));
4435 return true;
4438 return false;
4440 case IOR:
4441 case XOR:
4442 case AND:
4443 cost_logic:
4444 op0 = XEXP (x, 0);
4445 op1 = XEXP (x, 1);
4447 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4449 if (CONST_INT_P (op1)
4450 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4452 *cost += rtx_cost (op0, AND, 0, speed);
4454 else
4456 if (GET_CODE (op0) == NOT)
4457 op0 = XEXP (op0, 0);
4458 op0 = aarch64_strip_shift (op0);
4459 *cost += (rtx_cost (op0, AND, 0, speed)
4460 + rtx_cost (op1, AND, 1, speed));
4462 return true;
4464 return false;
4466 case ZERO_EXTEND:
4467 if ((GET_MODE (x) == DImode
4468 && GET_MODE (XEXP (x, 0)) == SImode)
4469 || GET_CODE (XEXP (x, 0)) == MEM)
4471 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4472 return true;
4474 return false;
4476 case SIGN_EXTEND:
4477 if (GET_CODE (XEXP (x, 0)) == MEM)
4479 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4480 return true;
4482 return false;
4484 case ROTATE:
4485 if (!CONST_INT_P (XEXP (x, 1)))
4486 *cost += COSTS_N_INSNS (2);
4487 /* Fall through. */
4488 case ROTATERT:
4489 case LSHIFTRT:
4490 case ASHIFT:
4491 case ASHIFTRT:
4493 /* Shifting by a register often takes an extra cycle. */
4494 if (speed && !CONST_INT_P (XEXP (x, 1)))
4495 *cost += extra_cost->register_shift;
4497 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4498 return true;
4500 case HIGH:
4501 if (!CONSTANT_P (XEXP (x, 0)))
4502 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4503 return true;
4505 case LO_SUM:
4506 if (!CONSTANT_P (XEXP (x, 1)))
4507 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4508 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4509 return true;
4511 case ZERO_EXTRACT:
4512 case SIGN_EXTRACT:
4513 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4514 return true;
4516 case MULT:
4517 op0 = XEXP (x, 0);
4518 op1 = XEXP (x, 1);
4520 *cost = COSTS_N_INSNS (1);
4521 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4523 if (CONST_INT_P (op1)
4524 && exact_log2 (INTVAL (op1)) > 0)
4526 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4527 return true;
4530 if ((GET_CODE (op0) == ZERO_EXTEND
4531 && GET_CODE (op1) == ZERO_EXTEND)
4532 || (GET_CODE (op0) == SIGN_EXTEND
4533 && GET_CODE (op1) == SIGN_EXTEND))
4535 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4536 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4537 if (speed)
4538 *cost += extra_cost->int_multiply_extend;
4539 return true;
4542 if (speed)
4543 *cost += extra_cost->int_multiply;
4545 else if (speed)
4547 if (GET_MODE (x) == DFmode)
4548 *cost += extra_cost->double_multiply;
4549 else if (GET_MODE (x) == SFmode)
4550 *cost += extra_cost->float_multiply;
4553 return false; /* All arguments need to be in registers. */
4555 case MOD:
4556 case UMOD:
4557 *cost = COSTS_N_INSNS (2);
4558 if (speed)
4560 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4561 *cost += (extra_cost->int_multiply_add
4562 + extra_cost->int_divide);
4563 else if (GET_MODE (x) == DFmode)
4564 *cost += (extra_cost->double_multiply
4565 + extra_cost->double_divide);
4566 else if (GET_MODE (x) == SFmode)
4567 *cost += (extra_cost->float_multiply
4568 + extra_cost->float_divide);
4570 return false; /* All arguments need to be in registers. */
4572 case DIV:
4573 case UDIV:
4574 *cost = COSTS_N_INSNS (1);
4575 if (speed)
4577 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4578 *cost += extra_cost->int_divide;
4579 else if (GET_MODE (x) == DFmode)
4580 *cost += extra_cost->double_divide;
4581 else if (GET_MODE (x) == SFmode)
4582 *cost += extra_cost->float_divide;
4584 return false; /* All arguments need to be in registers. */
4586 default:
4587 break;
4589 return false;
4592 static int
4593 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4594 enum machine_mode mode ATTRIBUTE_UNUSED,
4595 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4597 enum rtx_code c = GET_CODE (x);
4598 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4600 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4601 return addr_cost->pre_modify;
4603 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4604 return addr_cost->post_modify;
4606 if (c == PLUS)
4608 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4609 return addr_cost->imm_offset;
4610 else if (GET_CODE (XEXP (x, 0)) == MULT
4611 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4612 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4613 return addr_cost->register_extend;
4615 return addr_cost->register_offset;
4617 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4618 return addr_cost->imm_offset;
4620 return 0;
4623 static int
4624 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4625 reg_class_t from, reg_class_t to)
4627 const struct cpu_regmove_cost *regmove_cost
4628 = aarch64_tune_params->regmove_cost;
4630 if (from == GENERAL_REGS && to == GENERAL_REGS)
4631 return regmove_cost->GP2GP;
4632 else if (from == GENERAL_REGS)
4633 return regmove_cost->GP2FP;
4634 else if (to == GENERAL_REGS)
4635 return regmove_cost->FP2GP;
4637 /* When AdvSIMD instructions are disabled it is not possible to move
4638 a 128-bit value directly between Q registers. This is handled in
4639 secondary reload. A general register is used as a scratch to move
4640 the upper DI value and the lower DI value is moved directly,
4641 hence the cost is the sum of three moves. */
4643 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4644 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4646 return regmove_cost->FP2FP;
4649 static int
4650 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4651 reg_class_t rclass ATTRIBUTE_UNUSED,
4652 bool in ATTRIBUTE_UNUSED)
4654 return aarch64_tune_params->memmov_cost;
4657 static void initialize_aarch64_code_model (void);
4659 /* Parse the architecture extension string. */
4661 static void
4662 aarch64_parse_extension (char *str)
4664 /* The extension string is parsed left to right. */
4665 const struct aarch64_option_extension *opt = NULL;
4667 /* Flag to say whether we are adding or removing an extension. */
4668 int adding_ext = -1;
4670 while (str != NULL && *str != 0)
4672 char *ext;
4673 size_t len;
4675 str++;
4676 ext = strchr (str, '+');
4678 if (ext != NULL)
4679 len = ext - str;
4680 else
4681 len = strlen (str);
4683 if (len >= 2 && strncmp (str, "no", 2) == 0)
4685 adding_ext = 0;
4686 len -= 2;
4687 str += 2;
4689 else if (len > 0)
4690 adding_ext = 1;
4692 if (len == 0)
4694 error ("missing feature modifier after %qs", "+no");
4695 return;
4698 /* Scan over the extensions table trying to find an exact match. */
4699 for (opt = all_extensions; opt->name != NULL; opt++)
4701 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
4703 /* Add or remove the extension. */
4704 if (adding_ext)
4705 aarch64_isa_flags |= opt->flags_on;
4706 else
4707 aarch64_isa_flags &= ~(opt->flags_off);
4708 break;
4712 if (opt->name == NULL)
4714 /* Extension not found in list. */
4715 error ("unknown feature modifier %qs", str);
4716 return;
4719 str = ext;
4722 return;
4725 /* Parse the ARCH string. */
4727 static void
4728 aarch64_parse_arch (void)
4730 char *ext;
4731 const struct processor *arch;
4732 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
4733 size_t len;
4735 strcpy (str, aarch64_arch_string);
4737 ext = strchr (str, '+');
4739 if (ext != NULL)
4740 len = ext - str;
4741 else
4742 len = strlen (str);
4744 if (len == 0)
4746 error ("missing arch name in -march=%qs", str);
4747 return;
4750 /* Loop through the list of supported ARCHs to find a match. */
4751 for (arch = all_architectures; arch->name != NULL; arch++)
4753 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
4755 selected_arch = arch;
4756 aarch64_isa_flags = selected_arch->flags;
4757 selected_cpu = &all_cores[selected_arch->core];
4759 if (ext != NULL)
4761 /* ARCH string contains at least one extension. */
4762 aarch64_parse_extension (ext);
4765 return;
4769 /* ARCH name not found in list. */
4770 error ("unknown value %qs for -march", str);
4771 return;
4774 /* Parse the CPU string. */
4776 static void
4777 aarch64_parse_cpu (void)
4779 char *ext;
4780 const struct processor *cpu;
4781 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
4782 size_t len;
4784 strcpy (str, aarch64_cpu_string);
4786 ext = strchr (str, '+');
4788 if (ext != NULL)
4789 len = ext - str;
4790 else
4791 len = strlen (str);
4793 if (len == 0)
4795 error ("missing cpu name in -mcpu=%qs", str);
4796 return;
4799 /* Loop through the list of supported CPUs to find a match. */
4800 for (cpu = all_cores; cpu->name != NULL; cpu++)
4802 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
4804 selected_cpu = cpu;
4805 aarch64_isa_flags = selected_cpu->flags;
4807 if (ext != NULL)
4809 /* CPU string contains at least one extension. */
4810 aarch64_parse_extension (ext);
4813 return;
4817 /* CPU name not found in list. */
4818 error ("unknown value %qs for -mcpu", str);
4819 return;
4822 /* Parse the TUNE string. */
4824 static void
4825 aarch64_parse_tune (void)
4827 const struct processor *cpu;
4828 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
4829 strcpy (str, aarch64_tune_string);
4831 /* Loop through the list of supported CPUs to find a match. */
4832 for (cpu = all_cores; cpu->name != NULL; cpu++)
4834 if (strcmp (cpu->name, str) == 0)
4836 selected_tune = cpu;
4837 return;
4841 /* CPU name not found in list. */
4842 error ("unknown value %qs for -mtune", str);
4843 return;
4847 /* Implement TARGET_OPTION_OVERRIDE. */
4849 static void
4850 aarch64_override_options (void)
4852 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
4853 otherwise march remains undefined. mtune can be used with either march or
4854 mcpu. */
4856 if (aarch64_arch_string)
4858 aarch64_parse_arch ();
4859 aarch64_cpu_string = NULL;
4862 if (aarch64_cpu_string)
4864 aarch64_parse_cpu ();
4865 selected_arch = NULL;
4868 if (aarch64_tune_string)
4870 aarch64_parse_tune ();
4873 initialize_aarch64_code_model ();
4875 aarch64_build_bitmask_table ();
4877 /* This target defaults to strict volatile bitfields. */
4878 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
4879 flag_strict_volatile_bitfields = 1;
4881 /* If the user did not specify a processor, choose the default
4882 one for them. This will be the CPU set during configuration using
4883 --with-cpu, otherwise it is "generic". */
4884 if (!selected_cpu)
4886 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
4887 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
4890 gcc_assert (selected_cpu);
4892 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
4893 if (!selected_tune)
4894 selected_tune = &all_cores[selected_cpu->core];
4896 aarch64_tune_flags = selected_tune->flags;
4897 aarch64_tune = selected_tune->core;
4898 aarch64_tune_params = selected_tune->tune;
4900 aarch64_override_options_after_change ();
4903 /* Implement targetm.override_options_after_change. */
4905 static void
4906 aarch64_override_options_after_change (void)
4908 faked_omit_frame_pointer = false;
4910 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
4911 that aarch64_frame_pointer_required will be called. We need to remember
4912 whether flag_omit_frame_pointer was turned on normally or just faked. */
4914 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
4916 flag_omit_frame_pointer = true;
4917 faked_omit_frame_pointer = true;
4921 static struct machine_function *
4922 aarch64_init_machine_status (void)
4924 struct machine_function *machine;
4925 machine = ggc_alloc_cleared_machine_function ();
4926 return machine;
4929 void
4930 aarch64_init_expanders (void)
4932 init_machine_status = aarch64_init_machine_status;
4935 /* A checking mechanism for the implementation of the various code models. */
4936 static void
4937 initialize_aarch64_code_model (void)
4939 if (flag_pic)
4941 switch (aarch64_cmodel_var)
4943 case AARCH64_CMODEL_TINY:
4944 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
4945 break;
4946 case AARCH64_CMODEL_SMALL:
4947 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
4948 break;
4949 case AARCH64_CMODEL_LARGE:
4950 sorry ("code model %qs with -f%s", "large",
4951 flag_pic > 1 ? "PIC" : "pic");
4952 default:
4953 gcc_unreachable ();
4956 else
4957 aarch64_cmodel = aarch64_cmodel_var;
4960 /* Return true if SYMBOL_REF X binds locally. */
4962 static bool
4963 aarch64_symbol_binds_local_p (const_rtx x)
4965 return (SYMBOL_REF_DECL (x)
4966 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
4967 : SYMBOL_REF_LOCAL_P (x));
4970 /* Return true if SYMBOL_REF X is thread local */
4971 static bool
4972 aarch64_tls_symbol_p (rtx x)
4974 if (! TARGET_HAVE_TLS)
4975 return false;
4977 if (GET_CODE (x) != SYMBOL_REF)
4978 return false;
4980 return SYMBOL_REF_TLS_MODEL (x) != 0;
4983 /* Classify a TLS symbol into one of the TLS kinds. */
4984 enum aarch64_symbol_type
4985 aarch64_classify_tls_symbol (rtx x)
4987 enum tls_model tls_kind = tls_symbolic_operand_type (x);
4989 switch (tls_kind)
4991 case TLS_MODEL_GLOBAL_DYNAMIC:
4992 case TLS_MODEL_LOCAL_DYNAMIC:
4993 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
4995 case TLS_MODEL_INITIAL_EXEC:
4996 return SYMBOL_SMALL_GOTTPREL;
4998 case TLS_MODEL_LOCAL_EXEC:
4999 return SYMBOL_SMALL_TPREL;
5001 case TLS_MODEL_EMULATED:
5002 case TLS_MODEL_NONE:
5003 return SYMBOL_FORCE_TO_MEM;
5005 default:
5006 gcc_unreachable ();
5010 /* Return the method that should be used to access SYMBOL_REF or
5011 LABEL_REF X in context CONTEXT. */
5013 enum aarch64_symbol_type
5014 aarch64_classify_symbol (rtx x,
5015 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5017 if (GET_CODE (x) == LABEL_REF)
5019 switch (aarch64_cmodel)
5021 case AARCH64_CMODEL_LARGE:
5022 return SYMBOL_FORCE_TO_MEM;
5024 case AARCH64_CMODEL_TINY_PIC:
5025 case AARCH64_CMODEL_TINY:
5026 return SYMBOL_TINY_ABSOLUTE;
5028 case AARCH64_CMODEL_SMALL_PIC:
5029 case AARCH64_CMODEL_SMALL:
5030 return SYMBOL_SMALL_ABSOLUTE;
5032 default:
5033 gcc_unreachable ();
5037 if (GET_CODE (x) == SYMBOL_REF)
5039 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5040 || CONSTANT_POOL_ADDRESS_P (x))
5041 return SYMBOL_FORCE_TO_MEM;
5043 if (aarch64_tls_symbol_p (x))
5044 return aarch64_classify_tls_symbol (x);
5046 switch (aarch64_cmodel)
5048 case AARCH64_CMODEL_TINY:
5049 if (SYMBOL_REF_WEAK (x))
5050 return SYMBOL_FORCE_TO_MEM;
5051 return SYMBOL_TINY_ABSOLUTE;
5053 case AARCH64_CMODEL_SMALL:
5054 if (SYMBOL_REF_WEAK (x))
5055 return SYMBOL_FORCE_TO_MEM;
5056 return SYMBOL_SMALL_ABSOLUTE;
5058 case AARCH64_CMODEL_TINY_PIC:
5059 if (!aarch64_symbol_binds_local_p (x))
5060 return SYMBOL_SMALL_GOT;
5061 return SYMBOL_TINY_ABSOLUTE;
5063 case AARCH64_CMODEL_SMALL_PIC:
5064 if (!aarch64_symbol_binds_local_p (x))
5065 return SYMBOL_SMALL_GOT;
5066 return SYMBOL_SMALL_ABSOLUTE;
5068 default:
5069 gcc_unreachable ();
5073 /* By default push everything into the constant pool. */
5074 return SYMBOL_FORCE_TO_MEM;
5077 bool
5078 aarch64_constant_address_p (rtx x)
5080 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5083 bool
5084 aarch64_legitimate_pic_operand_p (rtx x)
5086 if (GET_CODE (x) == SYMBOL_REF
5087 || (GET_CODE (x) == CONST
5088 && GET_CODE (XEXP (x, 0)) == PLUS
5089 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5090 return false;
5092 return true;
5095 /* Return true if X holds either a quarter-precision or
5096 floating-point +0.0 constant. */
5097 static bool
5098 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5100 if (!CONST_DOUBLE_P (x))
5101 return false;
5103 /* TODO: We could handle moving 0.0 to a TFmode register,
5104 but first we would like to refactor the movtf_aarch64
5105 to be more amicable to split moves properly and
5106 correctly gate on TARGET_SIMD. For now - reject all
5107 constants which are not to SFmode or DFmode registers. */
5108 if (!(mode == SFmode || mode == DFmode))
5109 return false;
5111 if (aarch64_float_const_zero_rtx_p (x))
5112 return true;
5113 return aarch64_float_const_representable_p (x);
5116 static bool
5117 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5119 /* Do not allow vector struct mode constants. We could support
5120 0 and -1 easily, but they need support in aarch64-simd.md. */
5121 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5122 return false;
5124 /* This could probably go away because
5125 we now decompose CONST_INTs according to expand_mov_immediate. */
5126 if ((GET_CODE (x) == CONST_VECTOR
5127 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5128 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5129 return !targetm.cannot_force_const_mem (mode, x);
5131 if (GET_CODE (x) == HIGH
5132 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5133 return true;
5135 return aarch64_constant_address_p (x);
5139 aarch64_load_tp (rtx target)
5141 if (!target
5142 || GET_MODE (target) != Pmode
5143 || !register_operand (target, Pmode))
5144 target = gen_reg_rtx (Pmode);
5146 /* Can return in any reg. */
5147 emit_insn (gen_aarch64_load_tp_hard (target));
5148 return target;
5151 /* On AAPCS systems, this is the "struct __va_list". */
5152 static GTY(()) tree va_list_type;
5154 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5155 Return the type to use as __builtin_va_list.
5157 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5159 struct __va_list
5161 void *__stack;
5162 void *__gr_top;
5163 void *__vr_top;
5164 int __gr_offs;
5165 int __vr_offs;
5166 }; */
5168 static tree
5169 aarch64_build_builtin_va_list (void)
5171 tree va_list_name;
5172 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5174 /* Create the type. */
5175 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5176 /* Give it the required name. */
5177 va_list_name = build_decl (BUILTINS_LOCATION,
5178 TYPE_DECL,
5179 get_identifier ("__va_list"),
5180 va_list_type);
5181 DECL_ARTIFICIAL (va_list_name) = 1;
5182 TYPE_NAME (va_list_type) = va_list_name;
5183 TYPE_STUB_DECL (va_list_type) = va_list_name;
5185 /* Create the fields. */
5186 f_stack = build_decl (BUILTINS_LOCATION,
5187 FIELD_DECL, get_identifier ("__stack"),
5188 ptr_type_node);
5189 f_grtop = build_decl (BUILTINS_LOCATION,
5190 FIELD_DECL, get_identifier ("__gr_top"),
5191 ptr_type_node);
5192 f_vrtop = build_decl (BUILTINS_LOCATION,
5193 FIELD_DECL, get_identifier ("__vr_top"),
5194 ptr_type_node);
5195 f_groff = build_decl (BUILTINS_LOCATION,
5196 FIELD_DECL, get_identifier ("__gr_offs"),
5197 integer_type_node);
5198 f_vroff = build_decl (BUILTINS_LOCATION,
5199 FIELD_DECL, get_identifier ("__vr_offs"),
5200 integer_type_node);
5202 DECL_ARTIFICIAL (f_stack) = 1;
5203 DECL_ARTIFICIAL (f_grtop) = 1;
5204 DECL_ARTIFICIAL (f_vrtop) = 1;
5205 DECL_ARTIFICIAL (f_groff) = 1;
5206 DECL_ARTIFICIAL (f_vroff) = 1;
5208 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5209 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5210 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5211 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5212 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5214 TYPE_FIELDS (va_list_type) = f_stack;
5215 DECL_CHAIN (f_stack) = f_grtop;
5216 DECL_CHAIN (f_grtop) = f_vrtop;
5217 DECL_CHAIN (f_vrtop) = f_groff;
5218 DECL_CHAIN (f_groff) = f_vroff;
5220 /* Compute its layout. */
5221 layout_type (va_list_type);
5223 return va_list_type;
5226 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5227 static void
5228 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5230 const CUMULATIVE_ARGS *cum;
5231 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5232 tree stack, grtop, vrtop, groff, vroff;
5233 tree t;
5234 int gr_save_area_size;
5235 int vr_save_area_size;
5236 int vr_offset;
5238 cum = &crtl->args.info;
5239 gr_save_area_size
5240 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5241 vr_save_area_size
5242 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5244 if (TARGET_GENERAL_REGS_ONLY)
5246 if (cum->aapcs_nvrn > 0)
5247 sorry ("%qs and floating point or vector arguments",
5248 "-mgeneral-regs-only");
5249 vr_save_area_size = 0;
5252 f_stack = TYPE_FIELDS (va_list_type_node);
5253 f_grtop = DECL_CHAIN (f_stack);
5254 f_vrtop = DECL_CHAIN (f_grtop);
5255 f_groff = DECL_CHAIN (f_vrtop);
5256 f_vroff = DECL_CHAIN (f_groff);
5258 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5259 NULL_TREE);
5260 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5261 NULL_TREE);
5262 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5263 NULL_TREE);
5264 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5265 NULL_TREE);
5266 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5267 NULL_TREE);
5269 /* Emit code to initialize STACK, which points to the next varargs stack
5270 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5271 by named arguments. STACK is 8-byte aligned. */
5272 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5273 if (cum->aapcs_stack_size > 0)
5274 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5275 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5276 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5278 /* Emit code to initialize GRTOP, the top of the GR save area.
5279 virtual_incoming_args_rtx should have been 16 byte aligned. */
5280 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5281 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5282 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5284 /* Emit code to initialize VRTOP, the top of the VR save area.
5285 This address is gr_save_area_bytes below GRTOP, rounded
5286 down to the next 16-byte boundary. */
5287 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5288 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5289 STACK_BOUNDARY / BITS_PER_UNIT);
5291 if (vr_offset)
5292 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5293 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5294 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5296 /* Emit code to initialize GROFF, the offset from GRTOP of the
5297 next GPR argument. */
5298 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5299 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5300 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5302 /* Likewise emit code to initialize VROFF, the offset from FTOP
5303 of the next VR argument. */
5304 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5305 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5306 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5309 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5311 static tree
5312 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5313 gimple_seq *post_p ATTRIBUTE_UNUSED)
5315 tree addr;
5316 bool indirect_p;
5317 bool is_ha; /* is HFA or HVA. */
5318 bool dw_align; /* double-word align. */
5319 enum machine_mode ag_mode = VOIDmode;
5320 int nregs;
5321 enum machine_mode mode;
5323 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5324 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5325 HOST_WIDE_INT size, rsize, adjust, align;
5326 tree t, u, cond1, cond2;
5328 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5329 if (indirect_p)
5330 type = build_pointer_type (type);
5332 mode = TYPE_MODE (type);
5334 f_stack = TYPE_FIELDS (va_list_type_node);
5335 f_grtop = DECL_CHAIN (f_stack);
5336 f_vrtop = DECL_CHAIN (f_grtop);
5337 f_groff = DECL_CHAIN (f_vrtop);
5338 f_vroff = DECL_CHAIN (f_groff);
5340 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5341 f_stack, NULL_TREE);
5342 size = int_size_in_bytes (type);
5343 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5345 dw_align = false;
5346 adjust = 0;
5347 if (aarch64_vfp_is_call_or_return_candidate (mode,
5348 type,
5349 &ag_mode,
5350 &nregs,
5351 &is_ha))
5353 /* TYPE passed in fp/simd registers. */
5354 if (TARGET_GENERAL_REGS_ONLY)
5355 sorry ("%qs and floating point or vector arguments",
5356 "-mgeneral-regs-only");
5358 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5359 unshare_expr (valist), f_vrtop, NULL_TREE);
5360 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5361 unshare_expr (valist), f_vroff, NULL_TREE);
5363 rsize = nregs * UNITS_PER_VREG;
5365 if (is_ha)
5367 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5368 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5370 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5371 && size < UNITS_PER_VREG)
5373 adjust = UNITS_PER_VREG - size;
5376 else
5378 /* TYPE passed in general registers. */
5379 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5380 unshare_expr (valist), f_grtop, NULL_TREE);
5381 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5382 unshare_expr (valist), f_groff, NULL_TREE);
5383 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5384 nregs = rsize / UNITS_PER_WORD;
5386 if (align > 8)
5387 dw_align = true;
5389 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5390 && size < UNITS_PER_WORD)
5392 adjust = UNITS_PER_WORD - size;
5396 /* Get a local temporary for the field value. */
5397 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5399 /* Emit code to branch if off >= 0. */
5400 t = build2 (GE_EXPR, boolean_type_node, off,
5401 build_int_cst (TREE_TYPE (off), 0));
5402 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5404 if (dw_align)
5406 /* Emit: offs = (offs + 15) & -16. */
5407 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5408 build_int_cst (TREE_TYPE (off), 15));
5409 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5410 build_int_cst (TREE_TYPE (off), -16));
5411 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5413 else
5414 roundup = NULL;
5416 /* Update ap.__[g|v]r_offs */
5417 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5418 build_int_cst (TREE_TYPE (off), rsize));
5419 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5421 /* String up. */
5422 if (roundup)
5423 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5425 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5426 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5427 build_int_cst (TREE_TYPE (f_off), 0));
5428 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5430 /* String up: make sure the assignment happens before the use. */
5431 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5432 COND_EXPR_ELSE (cond1) = t;
5434 /* Prepare the trees handling the argument that is passed on the stack;
5435 the top level node will store in ON_STACK. */
5436 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5437 if (align > 8)
5439 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5440 t = fold_convert (intDI_type_node, arg);
5441 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5442 build_int_cst (TREE_TYPE (t), 15));
5443 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5444 build_int_cst (TREE_TYPE (t), -16));
5445 t = fold_convert (TREE_TYPE (arg), t);
5446 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5448 else
5449 roundup = NULL;
5450 /* Advance ap.__stack */
5451 t = fold_convert (intDI_type_node, arg);
5452 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5453 build_int_cst (TREE_TYPE (t), size + 7));
5454 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5455 build_int_cst (TREE_TYPE (t), -8));
5456 t = fold_convert (TREE_TYPE (arg), t);
5457 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5458 /* String up roundup and advance. */
5459 if (roundup)
5460 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5461 /* String up with arg */
5462 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5463 /* Big-endianness related address adjustment. */
5464 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5465 && size < UNITS_PER_WORD)
5467 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5468 size_int (UNITS_PER_WORD - size));
5469 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5472 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5473 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5475 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5476 t = off;
5477 if (adjust)
5478 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5479 build_int_cst (TREE_TYPE (off), adjust));
5481 t = fold_convert (sizetype, t);
5482 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5484 if (is_ha)
5486 /* type ha; // treat as "struct {ftype field[n];}"
5487 ... [computing offs]
5488 for (i = 0; i <nregs; ++i, offs += 16)
5489 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5490 return ha; */
5491 int i;
5492 tree tmp_ha, field_t, field_ptr_t;
5494 /* Declare a local variable. */
5495 tmp_ha = create_tmp_var_raw (type, "ha");
5496 gimple_add_tmp_var (tmp_ha);
5498 /* Establish the base type. */
5499 switch (ag_mode)
5501 case SFmode:
5502 field_t = float_type_node;
5503 field_ptr_t = float_ptr_type_node;
5504 break;
5505 case DFmode:
5506 field_t = double_type_node;
5507 field_ptr_t = double_ptr_type_node;
5508 break;
5509 case TFmode:
5510 field_t = long_double_type_node;
5511 field_ptr_t = long_double_ptr_type_node;
5512 break;
5513 /* The half precision and quad precision are not fully supported yet. Enable
5514 the following code after the support is complete. Need to find the correct
5515 type node for __fp16 *. */
5516 #if 0
5517 case HFmode:
5518 field_t = float_type_node;
5519 field_ptr_t = float_ptr_type_node;
5520 break;
5521 #endif
5522 case V2SImode:
5523 case V4SImode:
5525 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5526 field_t = build_vector_type_for_mode (innertype, ag_mode);
5527 field_ptr_t = build_pointer_type (field_t);
5529 break;
5530 default:
5531 gcc_assert (0);
5534 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5535 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5536 addr = t;
5537 t = fold_convert (field_ptr_t, addr);
5538 t = build2 (MODIFY_EXPR, field_t,
5539 build1 (INDIRECT_REF, field_t, tmp_ha),
5540 build1 (INDIRECT_REF, field_t, t));
5542 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5543 for (i = 1; i < nregs; ++i)
5545 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5546 u = fold_convert (field_ptr_t, addr);
5547 u = build2 (MODIFY_EXPR, field_t,
5548 build2 (MEM_REF, field_t, tmp_ha,
5549 build_int_cst (field_ptr_t,
5550 (i *
5551 int_size_in_bytes (field_t)))),
5552 build1 (INDIRECT_REF, field_t, u));
5553 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5556 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5557 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5560 COND_EXPR_ELSE (cond2) = t;
5561 addr = fold_convert (build_pointer_type (type), cond1);
5562 addr = build_va_arg_indirect_ref (addr);
5564 if (indirect_p)
5565 addr = build_va_arg_indirect_ref (addr);
5567 return addr;
5570 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5572 static void
5573 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5574 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5575 int no_rtl)
5577 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5578 CUMULATIVE_ARGS local_cum;
5579 int gr_saved, vr_saved;
5581 /* The caller has advanced CUM up to, but not beyond, the last named
5582 argument. Advance a local copy of CUM past the last "real" named
5583 argument, to find out how many registers are left over. */
5584 local_cum = *cum;
5585 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5587 /* Found out how many registers we need to save. */
5588 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5589 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5591 if (TARGET_GENERAL_REGS_ONLY)
5593 if (local_cum.aapcs_nvrn > 0)
5594 sorry ("%qs and floating point or vector arguments",
5595 "-mgeneral-regs-only");
5596 vr_saved = 0;
5599 if (!no_rtl)
5601 if (gr_saved > 0)
5603 rtx ptr, mem;
5605 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5606 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5607 - gr_saved * UNITS_PER_WORD);
5608 mem = gen_frame_mem (BLKmode, ptr);
5609 set_mem_alias_set (mem, get_varargs_alias_set ());
5611 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5612 mem, gr_saved);
5614 if (vr_saved > 0)
5616 /* We can't use move_block_from_reg, because it will use
5617 the wrong mode, storing D regs only. */
5618 enum machine_mode mode = TImode;
5619 int off, i;
5621 /* Set OFF to the offset from virtual_incoming_args_rtx of
5622 the first vector register. The VR save area lies below
5623 the GR one, and is aligned to 16 bytes. */
5624 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5625 STACK_BOUNDARY / BITS_PER_UNIT);
5626 off -= vr_saved * UNITS_PER_VREG;
5628 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5630 rtx ptr, mem;
5632 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5633 mem = gen_frame_mem (mode, ptr);
5634 set_mem_alias_set (mem, get_varargs_alias_set ());
5635 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5636 off += UNITS_PER_VREG;
5641 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5642 any complication of having crtl->args.pretend_args_size changed. */
5643 cfun->machine->saved_varargs_size
5644 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5645 STACK_BOUNDARY / BITS_PER_UNIT)
5646 + vr_saved * UNITS_PER_VREG);
5649 static void
5650 aarch64_conditional_register_usage (void)
5652 int i;
5653 if (!TARGET_FLOAT)
5655 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
5657 fixed_regs[i] = 1;
5658 call_used_regs[i] = 1;
5663 /* Walk down the type tree of TYPE counting consecutive base elements.
5664 If *MODEP is VOIDmode, then set it to the first valid floating point
5665 type. If a non-floating point type is found, or if a floating point
5666 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5667 otherwise return the count in the sub-tree. */
5668 static int
5669 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
5671 enum machine_mode mode;
5672 HOST_WIDE_INT size;
5674 switch (TREE_CODE (type))
5676 case REAL_TYPE:
5677 mode = TYPE_MODE (type);
5678 if (mode != DFmode && mode != SFmode && mode != TFmode)
5679 return -1;
5681 if (*modep == VOIDmode)
5682 *modep = mode;
5684 if (*modep == mode)
5685 return 1;
5687 break;
5689 case COMPLEX_TYPE:
5690 mode = TYPE_MODE (TREE_TYPE (type));
5691 if (mode != DFmode && mode != SFmode && mode != TFmode)
5692 return -1;
5694 if (*modep == VOIDmode)
5695 *modep = mode;
5697 if (*modep == mode)
5698 return 2;
5700 break;
5702 case VECTOR_TYPE:
5703 /* Use V2SImode and V4SImode as representatives of all 64-bit
5704 and 128-bit vector types. */
5705 size = int_size_in_bytes (type);
5706 switch (size)
5708 case 8:
5709 mode = V2SImode;
5710 break;
5711 case 16:
5712 mode = V4SImode;
5713 break;
5714 default:
5715 return -1;
5718 if (*modep == VOIDmode)
5719 *modep = mode;
5721 /* Vector modes are considered to be opaque: two vectors are
5722 equivalent for the purposes of being homogeneous aggregates
5723 if they are the same size. */
5724 if (*modep == mode)
5725 return 1;
5727 break;
5729 case ARRAY_TYPE:
5731 int count;
5732 tree index = TYPE_DOMAIN (type);
5734 /* Can't handle incomplete types. */
5735 if (!COMPLETE_TYPE_P (type))
5736 return -1;
5738 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5739 if (count == -1
5740 || !index
5741 || !TYPE_MAX_VALUE (index)
5742 || !host_integerp (TYPE_MAX_VALUE (index), 1)
5743 || !TYPE_MIN_VALUE (index)
5744 || !host_integerp (TYPE_MIN_VALUE (index), 1)
5745 || count < 0)
5746 return -1;
5748 count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
5749 - tree_low_cst (TYPE_MIN_VALUE (index), 1));
5751 /* There must be no padding. */
5752 if (!host_integerp (TYPE_SIZE (type), 1)
5753 || (tree_low_cst (TYPE_SIZE (type), 1)
5754 != count * GET_MODE_BITSIZE (*modep)))
5755 return -1;
5757 return count;
5760 case RECORD_TYPE:
5762 int count = 0;
5763 int sub_count;
5764 tree field;
5766 /* Can't handle incomplete types. */
5767 if (!COMPLETE_TYPE_P (type))
5768 return -1;
5770 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5772 if (TREE_CODE (field) != FIELD_DECL)
5773 continue;
5775 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5776 if (sub_count < 0)
5777 return -1;
5778 count += sub_count;
5781 /* There must be no padding. */
5782 if (!host_integerp (TYPE_SIZE (type), 1)
5783 || (tree_low_cst (TYPE_SIZE (type), 1)
5784 != count * GET_MODE_BITSIZE (*modep)))
5785 return -1;
5787 return count;
5790 case UNION_TYPE:
5791 case QUAL_UNION_TYPE:
5793 /* These aren't very interesting except in a degenerate case. */
5794 int count = 0;
5795 int sub_count;
5796 tree field;
5798 /* Can't handle incomplete types. */
5799 if (!COMPLETE_TYPE_P (type))
5800 return -1;
5802 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5804 if (TREE_CODE (field) != FIELD_DECL)
5805 continue;
5807 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5808 if (sub_count < 0)
5809 return -1;
5810 count = count > sub_count ? count : sub_count;
5813 /* There must be no padding. */
5814 if (!host_integerp (TYPE_SIZE (type), 1)
5815 || (tree_low_cst (TYPE_SIZE (type), 1)
5816 != count * GET_MODE_BITSIZE (*modep)))
5817 return -1;
5819 return count;
5822 default:
5823 break;
5826 return -1;
5829 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
5830 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
5831 array types. The C99 floating-point complex types are also considered
5832 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
5833 types, which are GCC extensions and out of the scope of AAPCS64, are
5834 treated as composite types here as well.
5836 Note that MODE itself is not sufficient in determining whether a type
5837 is such a composite type or not. This is because
5838 stor-layout.c:compute_record_mode may have already changed the MODE
5839 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
5840 structure with only one field may have its MODE set to the mode of the
5841 field. Also an integer mode whose size matches the size of the
5842 RECORD_TYPE type may be used to substitute the original mode
5843 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
5844 solely relied on. */
5846 static bool
5847 aarch64_composite_type_p (const_tree type,
5848 enum machine_mode mode)
5850 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
5851 return true;
5853 if (mode == BLKmode
5854 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
5855 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
5856 return true;
5858 return false;
5861 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
5862 type as described in AAPCS64 \S 4.1.2.
5864 See the comment above aarch64_composite_type_p for the notes on MODE. */
5866 static bool
5867 aarch64_short_vector_p (const_tree type,
5868 enum machine_mode mode)
5870 HOST_WIDE_INT size = -1;
5872 if (type && TREE_CODE (type) == VECTOR_TYPE)
5873 size = int_size_in_bytes (type);
5874 else if (!aarch64_composite_type_p (type, mode)
5875 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5876 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
5877 size = GET_MODE_SIZE (mode);
5879 return (size == 8 || size == 16) ? true : false;
5882 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
5883 shall be passed or returned in simd/fp register(s) (providing these
5884 parameter passing registers are available).
5886 Upon successful return, *COUNT returns the number of needed registers,
5887 *BASE_MODE returns the mode of the individual register and when IS_HAF
5888 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
5889 floating-point aggregate or a homogeneous short-vector aggregate. */
5891 static bool
5892 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
5893 const_tree type,
5894 enum machine_mode *base_mode,
5895 int *count,
5896 bool *is_ha)
5898 enum machine_mode new_mode = VOIDmode;
5899 bool composite_p = aarch64_composite_type_p (type, mode);
5901 if (is_ha != NULL) *is_ha = false;
5903 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
5904 || aarch64_short_vector_p (type, mode))
5906 *count = 1;
5907 new_mode = mode;
5909 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5911 if (is_ha != NULL) *is_ha = true;
5912 *count = 2;
5913 new_mode = GET_MODE_INNER (mode);
5915 else if (type && composite_p)
5917 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5919 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
5921 if (is_ha != NULL) *is_ha = true;
5922 *count = ag_count;
5924 else
5925 return false;
5927 else
5928 return false;
5930 *base_mode = new_mode;
5931 return true;
5934 /* Implement TARGET_STRUCT_VALUE_RTX. */
5936 static rtx
5937 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
5938 int incoming ATTRIBUTE_UNUSED)
5940 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
5943 /* Implements target hook vector_mode_supported_p. */
5944 static bool
5945 aarch64_vector_mode_supported_p (enum machine_mode mode)
5947 if (TARGET_SIMD
5948 && (mode == V4SImode || mode == V8HImode
5949 || mode == V16QImode || mode == V2DImode
5950 || mode == V2SImode || mode == V4HImode
5951 || mode == V8QImode || mode == V2SFmode
5952 || mode == V4SFmode || mode == V2DFmode))
5953 return true;
5955 return false;
5958 /* Return appropriate SIMD container
5959 for MODE within a vector of WIDTH bits. */
5960 static enum machine_mode
5961 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
5963 gcc_assert (width == 64 || width == 128);
5964 if (TARGET_SIMD)
5966 if (width == 128)
5967 switch (mode)
5969 case DFmode:
5970 return V2DFmode;
5971 case SFmode:
5972 return V4SFmode;
5973 case SImode:
5974 return V4SImode;
5975 case HImode:
5976 return V8HImode;
5977 case QImode:
5978 return V16QImode;
5979 case DImode:
5980 return V2DImode;
5981 default:
5982 break;
5984 else
5985 switch (mode)
5987 case SFmode:
5988 return V2SFmode;
5989 case SImode:
5990 return V2SImode;
5991 case HImode:
5992 return V4HImode;
5993 case QImode:
5994 return V8QImode;
5995 default:
5996 break;
5999 return word_mode;
6002 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6003 static enum machine_mode
6004 aarch64_preferred_simd_mode (enum machine_mode mode)
6006 return aarch64_simd_container_mode (mode, 128);
6009 /* Return the bitmask of possible vector sizes for the vectorizer
6010 to iterate over. */
6011 static unsigned int
6012 aarch64_autovectorize_vector_sizes (void)
6014 return (16 | 8);
6017 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6018 vector types in order to conform to the AAPCS64 (see "Procedure
6019 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6020 qualify for emission with the mangled names defined in that document,
6021 a vector type must not only be of the correct mode but also be
6022 composed of AdvSIMD vector element types (e.g.
6023 _builtin_aarch64_simd_qi); these types are registered by
6024 aarch64_init_simd_builtins (). In other words, vector types defined
6025 in other ways e.g. via vector_size attribute will get default
6026 mangled names. */
6027 typedef struct
6029 enum machine_mode mode;
6030 const char *element_type_name;
6031 const char *mangled_name;
6032 } aarch64_simd_mangle_map_entry;
6034 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6035 /* 64-bit containerized types. */
6036 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6037 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6038 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6039 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6040 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6041 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6042 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6043 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6044 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6045 /* 128-bit containerized types. */
6046 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6047 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6048 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6049 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6050 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6051 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6052 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6053 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6054 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6055 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6056 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6057 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6058 { VOIDmode, NULL, NULL }
6061 /* Implement TARGET_MANGLE_TYPE. */
6063 static const char *
6064 aarch64_mangle_type (const_tree type)
6066 /* The AArch64 ABI documents say that "__va_list" has to be
6067 managled as if it is in the "std" namespace. */
6068 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6069 return "St9__va_list";
6071 /* Check the mode of the vector type, and the name of the vector
6072 element type, against the table. */
6073 if (TREE_CODE (type) == VECTOR_TYPE)
6075 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6077 while (pos->mode != VOIDmode)
6079 tree elt_type = TREE_TYPE (type);
6081 if (pos->mode == TYPE_MODE (type)
6082 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6083 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6084 pos->element_type_name))
6085 return pos->mangled_name;
6087 pos++;
6091 /* Use the default mangling. */
6092 return NULL;
6095 /* Return the equivalent letter for size. */
6096 static char
6097 sizetochar (int size)
6099 switch (size)
6101 case 64: return 'd';
6102 case 32: return 's';
6103 case 16: return 'h';
6104 case 8 : return 'b';
6105 default: gcc_unreachable ();
6109 /* Return true iff x is a uniform vector of floating-point
6110 constants, and the constant can be represented in
6111 quarter-precision form. Note, as aarch64_float_const_representable
6112 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6113 static bool
6114 aarch64_vect_float_const_representable_p (rtx x)
6116 int i = 0;
6117 REAL_VALUE_TYPE r0, ri;
6118 rtx x0, xi;
6120 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6121 return false;
6123 x0 = CONST_VECTOR_ELT (x, 0);
6124 if (!CONST_DOUBLE_P (x0))
6125 return false;
6127 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6129 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6131 xi = CONST_VECTOR_ELT (x, i);
6132 if (!CONST_DOUBLE_P (xi))
6133 return false;
6135 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6136 if (!REAL_VALUES_EQUAL (r0, ri))
6137 return false;
6140 return aarch64_float_const_representable_p (x0);
6143 /* Return true for valid and false for invalid. */
6144 bool
6145 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6146 struct simd_immediate_info *info)
6148 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6149 matches = 1; \
6150 for (i = 0; i < idx; i += (STRIDE)) \
6151 if (!(TEST)) \
6152 matches = 0; \
6153 if (matches) \
6155 immtype = (CLASS); \
6156 elsize = (ELSIZE); \
6157 eshift = (SHIFT); \
6158 emvn = (NEG); \
6159 break; \
6162 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6163 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6164 unsigned char bytes[16];
6165 int immtype = -1, matches;
6166 unsigned int invmask = inverse ? 0xff : 0;
6167 int eshift, emvn;
6169 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6171 if (! (aarch64_simd_imm_zero_p (op, mode)
6172 || aarch64_vect_float_const_representable_p (op)))
6173 return false;
6175 if (info)
6177 info->value = CONST_VECTOR_ELT (op, 0);
6178 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6179 info->mvn = false;
6180 info->shift = 0;
6183 return true;
6186 /* Splat vector constant out into a byte vector. */
6187 for (i = 0; i < n_elts; i++)
6189 rtx el = CONST_VECTOR_ELT (op, i);
6190 unsigned HOST_WIDE_INT elpart;
6191 unsigned int part, parts;
6193 if (GET_CODE (el) == CONST_INT)
6195 elpart = INTVAL (el);
6196 parts = 1;
6198 else if (GET_CODE (el) == CONST_DOUBLE)
6200 elpart = CONST_DOUBLE_LOW (el);
6201 parts = 2;
6203 else
6204 gcc_unreachable ();
6206 for (part = 0; part < parts; part++)
6208 unsigned int byte;
6209 for (byte = 0; byte < innersize; byte++)
6211 bytes[idx++] = (elpart & 0xff) ^ invmask;
6212 elpart >>= BITS_PER_UNIT;
6214 if (GET_CODE (el) == CONST_DOUBLE)
6215 elpart = CONST_DOUBLE_HIGH (el);
6219 /* Sanity check. */
6220 gcc_assert (idx == GET_MODE_SIZE (mode));
6224 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6225 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6227 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6228 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6230 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6231 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6233 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6234 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6236 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6238 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6240 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6241 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6243 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6244 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6246 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6247 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6249 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6250 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6252 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6254 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6256 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6257 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6259 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6260 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6262 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6263 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 0, 0);
6265 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6266 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 0, 1);
6268 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6270 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6271 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6273 while (0);
6275 /* TODO: Currently the assembler cannot handle types 12 to 15.
6276 And there is no way to specify cmode through the compiler.
6277 Disable them till there is support in the assembler. */
6278 if (immtype == -1
6279 || (immtype >= 12 && immtype <= 15)
6280 || immtype == 18)
6281 return false;
6283 if (info)
6285 info->element_width = elsize;
6286 info->mvn = emvn != 0;
6287 info->shift = eshift;
6289 unsigned HOST_WIDE_INT imm = 0;
6291 /* Un-invert bytes of recognized vector, if necessary. */
6292 if (invmask != 0)
6293 for (i = 0; i < idx; i++)
6294 bytes[i] ^= invmask;
6296 if (immtype == 17)
6298 /* FIXME: Broken on 32-bit H_W_I hosts. */
6299 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6301 for (i = 0; i < 8; i++)
6302 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6303 << (i * BITS_PER_UNIT);
6306 info->value = GEN_INT (imm);
6308 else
6310 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6311 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6313 /* Construct 'abcdefgh' because the assembler cannot handle
6314 generic constants. */
6315 if (info->mvn)
6316 imm = ~imm;
6317 imm = (imm >> info->shift) & 0xff;
6318 info->value = GEN_INT (imm);
6322 return true;
6323 #undef CHECK
6326 static bool
6327 aarch64_const_vec_all_same_int_p (rtx x,
6328 HOST_WIDE_INT minval,
6329 HOST_WIDE_INT maxval)
6331 HOST_WIDE_INT firstval;
6332 int count, i;
6334 if (GET_CODE (x) != CONST_VECTOR
6335 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6336 return false;
6338 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6339 if (firstval < minval || firstval > maxval)
6340 return false;
6342 count = CONST_VECTOR_NUNITS (x);
6343 for (i = 1; i < count; i++)
6344 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6345 return false;
6347 return true;
6350 /* Check of immediate shift constants are within range. */
6351 bool
6352 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6354 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6355 if (left)
6356 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6357 else
6358 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6361 /* Return true if X is a uniform vector where all elements
6362 are either the floating-point constant 0.0 or the
6363 integer constant 0. */
6364 bool
6365 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6367 return x == CONST0_RTX (mode);
6370 bool
6371 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6373 HOST_WIDE_INT imm = INTVAL (x);
6374 int i;
6376 for (i = 0; i < 8; i++)
6378 unsigned int byte = imm & 0xff;
6379 if (byte != 0xff && byte != 0)
6380 return false;
6381 imm >>= 8;
6384 return true;
6387 bool
6388 aarch64_mov_operand_p (rtx x,
6389 enum aarch64_symbol_context context,
6390 enum machine_mode mode)
6392 if (GET_CODE (x) == HIGH
6393 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6394 return true;
6396 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6397 return true;
6399 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6400 return true;
6402 return aarch64_classify_symbolic_expression (x, context)
6403 == SYMBOL_TINY_ABSOLUTE;
6406 /* Return a const_int vector of VAL. */
6408 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6410 int nunits = GET_MODE_NUNITS (mode);
6411 rtvec v = rtvec_alloc (nunits);
6412 int i;
6414 for (i=0; i < nunits; i++)
6415 RTVEC_ELT (v, i) = GEN_INT (val);
6417 return gen_rtx_CONST_VECTOR (mode, v);
6420 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6422 bool
6423 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6425 enum machine_mode vmode;
6427 gcc_assert (!VECTOR_MODE_P (mode));
6428 vmode = aarch64_preferred_simd_mode (mode);
6429 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6430 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6433 /* Construct and return a PARALLEL RTX vector. */
6435 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6437 int nunits = GET_MODE_NUNITS (mode);
6438 rtvec v = rtvec_alloc (nunits / 2);
6439 int base = high ? nunits / 2 : 0;
6440 rtx t1;
6441 int i;
6443 for (i=0; i < nunits / 2; i++)
6444 RTVEC_ELT (v, i) = GEN_INT (base + i);
6446 t1 = gen_rtx_PARALLEL (mode, v);
6447 return t1;
6450 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6451 HIGH (exclusive). */
6452 void
6453 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6455 HOST_WIDE_INT lane;
6456 gcc_assert (GET_CODE (operand) == CONST_INT);
6457 lane = INTVAL (operand);
6459 if (lane < low || lane >= high)
6460 error ("lane out of range");
6463 void
6464 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6466 gcc_assert (GET_CODE (operand) == CONST_INT);
6467 HOST_WIDE_INT lane = INTVAL (operand);
6469 if (lane < low || lane >= high)
6470 error ("constant out of range");
6473 /* Emit code to reinterpret one AdvSIMD type as another,
6474 without altering bits. */
6475 void
6476 aarch64_simd_reinterpret (rtx dest, rtx src)
6478 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6481 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6482 registers). */
6483 void
6484 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6485 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6486 rtx op1)
6488 rtx mem = gen_rtx_MEM (mode, destaddr);
6489 rtx tmp1 = gen_reg_rtx (mode);
6490 rtx tmp2 = gen_reg_rtx (mode);
6492 emit_insn (intfn (tmp1, op1, tmp2));
6494 emit_move_insn (mem, tmp1);
6495 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6496 emit_move_insn (mem, tmp2);
6499 /* Return TRUE if OP is a valid vector addressing mode. */
6500 bool
6501 aarch64_simd_mem_operand_p (rtx op)
6503 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6504 || GET_CODE (XEXP (op, 0)) == REG);
6507 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6508 not to early-clobber SRC registers in the process.
6510 We assume that the operands described by SRC and DEST represent a
6511 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6512 number of components into which the copy has been decomposed. */
6513 void
6514 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6515 rtx *src, unsigned int count)
6517 unsigned int i;
6519 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6520 || REGNO (operands[0]) < REGNO (operands[1]))
6522 for (i = 0; i < count; i++)
6524 operands[2 * i] = dest[i];
6525 operands[2 * i + 1] = src[i];
6528 else
6530 for (i = 0; i < count; i++)
6532 operands[2 * i] = dest[count - i - 1];
6533 operands[2 * i + 1] = src[count - i - 1];
6538 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6539 one of VSTRUCT modes: OI, CI or XI. */
6541 aarch64_simd_attr_length_move (rtx insn)
6543 enum machine_mode mode;
6545 extract_insn_cached (insn);
6547 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6549 mode = GET_MODE (recog_data.operand[0]);
6550 switch (mode)
6552 case OImode:
6553 return 8;
6554 case CImode:
6555 return 12;
6556 case XImode:
6557 return 16;
6558 default:
6559 gcc_unreachable ();
6562 return 4;
6565 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6566 alignment of a vector to 128 bits. */
6567 static HOST_WIDE_INT
6568 aarch64_simd_vector_alignment (const_tree type)
6570 HOST_WIDE_INT align = tree_low_cst (TYPE_SIZE (type), 0);
6571 return MIN (align, 128);
6574 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6575 static bool
6576 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6578 if (is_packed)
6579 return false;
6581 /* We guarantee alignment for vectors up to 128-bits. */
6582 if (tree_int_cst_compare (TYPE_SIZE (type),
6583 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6584 return false;
6586 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6587 return true;
6590 /* If VALS is a vector constant that can be loaded into a register
6591 using DUP, generate instructions to do so and return an RTX to
6592 assign to the register. Otherwise return NULL_RTX. */
6593 static rtx
6594 aarch64_simd_dup_constant (rtx vals)
6596 enum machine_mode mode = GET_MODE (vals);
6597 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6598 int n_elts = GET_MODE_NUNITS (mode);
6599 bool all_same = true;
6600 rtx x;
6601 int i;
6603 if (GET_CODE (vals) != CONST_VECTOR)
6604 return NULL_RTX;
6606 for (i = 1; i < n_elts; ++i)
6608 x = CONST_VECTOR_ELT (vals, i);
6609 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6610 all_same = false;
6613 if (!all_same)
6614 return NULL_RTX;
6616 /* We can load this constant by using DUP and a constant in a
6617 single ARM register. This will be cheaper than a vector
6618 load. */
6619 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6620 return gen_rtx_VEC_DUPLICATE (mode, x);
6624 /* Generate code to load VALS, which is a PARALLEL containing only
6625 constants (for vec_init) or CONST_VECTOR, efficiently into a
6626 register. Returns an RTX to copy into the register, or NULL_RTX
6627 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6628 static rtx
6629 aarch64_simd_make_constant (rtx vals)
6631 enum machine_mode mode = GET_MODE (vals);
6632 rtx const_dup;
6633 rtx const_vec = NULL_RTX;
6634 int n_elts = GET_MODE_NUNITS (mode);
6635 int n_const = 0;
6636 int i;
6638 if (GET_CODE (vals) == CONST_VECTOR)
6639 const_vec = vals;
6640 else if (GET_CODE (vals) == PARALLEL)
6642 /* A CONST_VECTOR must contain only CONST_INTs and
6643 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6644 Only store valid constants in a CONST_VECTOR. */
6645 for (i = 0; i < n_elts; ++i)
6647 rtx x = XVECEXP (vals, 0, i);
6648 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6649 n_const++;
6651 if (n_const == n_elts)
6652 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6654 else
6655 gcc_unreachable ();
6657 if (const_vec != NULL_RTX
6658 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
6659 /* Load using MOVI/MVNI. */
6660 return const_vec;
6661 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
6662 /* Loaded using DUP. */
6663 return const_dup;
6664 else if (const_vec != NULL_RTX)
6665 /* Load from constant pool. We can not take advantage of single-cycle
6666 LD1 because we need a PC-relative addressing mode. */
6667 return const_vec;
6668 else
6669 /* A PARALLEL containing something not valid inside CONST_VECTOR.
6670 We can not construct an initializer. */
6671 return NULL_RTX;
6674 void
6675 aarch64_expand_vector_init (rtx target, rtx vals)
6677 enum machine_mode mode = GET_MODE (target);
6678 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6679 int n_elts = GET_MODE_NUNITS (mode);
6680 int n_var = 0, one_var = -1;
6681 bool all_same = true;
6682 rtx x, mem;
6683 int i;
6685 x = XVECEXP (vals, 0, 0);
6686 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6687 n_var = 1, one_var = 0;
6689 for (i = 1; i < n_elts; ++i)
6691 x = XVECEXP (vals, 0, i);
6692 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
6693 ++n_var, one_var = i;
6695 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6696 all_same = false;
6699 if (n_var == 0)
6701 rtx constant = aarch64_simd_make_constant (vals);
6702 if (constant != NULL_RTX)
6704 emit_move_insn (target, constant);
6705 return;
6709 /* Splat a single non-constant element if we can. */
6710 if (all_same)
6712 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
6713 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
6714 return;
6717 /* One field is non-constant. Load constant then overwrite varying
6718 field. This is more efficient than using the stack. */
6719 if (n_var == 1)
6721 rtx copy = copy_rtx (vals);
6722 rtx index = GEN_INT (one_var);
6723 enum insn_code icode;
6725 /* Load constant part of vector, substitute neighboring value for
6726 varying element. */
6727 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
6728 aarch64_expand_vector_init (target, copy);
6730 /* Insert variable. */
6731 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
6732 icode = optab_handler (vec_set_optab, mode);
6733 gcc_assert (icode != CODE_FOR_nothing);
6734 emit_insn (GEN_FCN (icode) (target, x, index));
6735 return;
6738 /* Construct the vector in memory one field at a time
6739 and load the whole vector. */
6740 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6741 for (i = 0; i < n_elts; i++)
6742 emit_move_insn (adjust_address_nv (mem, inner_mode,
6743 i * GET_MODE_SIZE (inner_mode)),
6744 XVECEXP (vals, 0, i));
6745 emit_move_insn (target, mem);
6749 static unsigned HOST_WIDE_INT
6750 aarch64_shift_truncation_mask (enum machine_mode mode)
6752 return
6753 (aarch64_vector_mode_supported_p (mode)
6754 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
6757 #ifndef TLS_SECTION_ASM_FLAG
6758 #define TLS_SECTION_ASM_FLAG 'T'
6759 #endif
6761 void
6762 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
6763 tree decl ATTRIBUTE_UNUSED)
6765 char flagchars[10], *f = flagchars;
6767 /* If we have already declared this section, we can use an
6768 abbreviated form to switch back to it -- unless this section is
6769 part of a COMDAT groups, in which case GAS requires the full
6770 declaration every time. */
6771 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6772 && (flags & SECTION_DECLARED))
6774 fprintf (asm_out_file, "\t.section\t%s\n", name);
6775 return;
6778 if (!(flags & SECTION_DEBUG))
6779 *f++ = 'a';
6780 if (flags & SECTION_WRITE)
6781 *f++ = 'w';
6782 if (flags & SECTION_CODE)
6783 *f++ = 'x';
6784 if (flags & SECTION_SMALL)
6785 *f++ = 's';
6786 if (flags & SECTION_MERGE)
6787 *f++ = 'M';
6788 if (flags & SECTION_STRINGS)
6789 *f++ = 'S';
6790 if (flags & SECTION_TLS)
6791 *f++ = TLS_SECTION_ASM_FLAG;
6792 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6793 *f++ = 'G';
6794 *f = '\0';
6796 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
6798 if (!(flags & SECTION_NOTYPE))
6800 const char *type;
6801 const char *format;
6803 if (flags & SECTION_BSS)
6804 type = "nobits";
6805 else
6806 type = "progbits";
6808 #ifdef TYPE_OPERAND_FMT
6809 format = "," TYPE_OPERAND_FMT;
6810 #else
6811 format = ",@%s";
6812 #endif
6814 fprintf (asm_out_file, format, type);
6816 if (flags & SECTION_ENTSIZE)
6817 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
6818 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
6820 if (TREE_CODE (decl) == IDENTIFIER_NODE)
6821 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
6822 else
6823 fprintf (asm_out_file, ",%s,comdat",
6824 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
6828 putc ('\n', asm_out_file);
6831 /* Select a format to encode pointers in exception handling data. */
6833 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
6835 int type;
6836 switch (aarch64_cmodel)
6838 case AARCH64_CMODEL_TINY:
6839 case AARCH64_CMODEL_TINY_PIC:
6840 case AARCH64_CMODEL_SMALL:
6841 case AARCH64_CMODEL_SMALL_PIC:
6842 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
6843 for everything. */
6844 type = DW_EH_PE_sdata4;
6845 break;
6846 default:
6847 /* No assumptions here. 8-byte relocs required. */
6848 type = DW_EH_PE_sdata8;
6849 break;
6851 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
6854 /* Emit load exclusive. */
6856 static void
6857 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
6858 rtx mem, rtx model_rtx)
6860 rtx (*gen) (rtx, rtx, rtx);
6862 switch (mode)
6864 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
6865 case HImode: gen = gen_aarch64_load_exclusivehi; break;
6866 case SImode: gen = gen_aarch64_load_exclusivesi; break;
6867 case DImode: gen = gen_aarch64_load_exclusivedi; break;
6868 default:
6869 gcc_unreachable ();
6872 emit_insn (gen (rval, mem, model_rtx));
6875 /* Emit store exclusive. */
6877 static void
6878 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
6879 rtx rval, rtx mem, rtx model_rtx)
6881 rtx (*gen) (rtx, rtx, rtx, rtx);
6883 switch (mode)
6885 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
6886 case HImode: gen = gen_aarch64_store_exclusivehi; break;
6887 case SImode: gen = gen_aarch64_store_exclusivesi; break;
6888 case DImode: gen = gen_aarch64_store_exclusivedi; break;
6889 default:
6890 gcc_unreachable ();
6893 emit_insn (gen (bval, rval, mem, model_rtx));
6896 /* Mark the previous jump instruction as unlikely. */
6898 static void
6899 aarch64_emit_unlikely_jump (rtx insn)
6901 rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
6903 insn = emit_jump_insn (insn);
6904 add_reg_note (insn, REG_BR_PROB, very_unlikely);
6907 /* Expand a compare and swap pattern. */
6909 void
6910 aarch64_expand_compare_and_swap (rtx operands[])
6912 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
6913 enum machine_mode mode, cmp_mode;
6914 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
6916 bval = operands[0];
6917 rval = operands[1];
6918 mem = operands[2];
6919 oldval = operands[3];
6920 newval = operands[4];
6921 is_weak = operands[5];
6922 mod_s = operands[6];
6923 mod_f = operands[7];
6924 mode = GET_MODE (mem);
6925 cmp_mode = mode;
6927 /* Normally the succ memory model must be stronger than fail, but in the
6928 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
6929 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
6931 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
6932 && INTVAL (mod_s) == MEMMODEL_RELEASE)
6933 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
6935 switch (mode)
6937 case QImode:
6938 case HImode:
6939 /* For short modes, we're going to perform the comparison in SImode,
6940 so do the zero-extension now. */
6941 cmp_mode = SImode;
6942 rval = gen_reg_rtx (SImode);
6943 oldval = convert_modes (SImode, mode, oldval, true);
6944 /* Fall through. */
6946 case SImode:
6947 case DImode:
6948 /* Force the value into a register if needed. */
6949 if (!aarch64_plus_operand (oldval, mode))
6950 oldval = force_reg (cmp_mode, oldval);
6951 break;
6953 default:
6954 gcc_unreachable ();
6957 switch (mode)
6959 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
6960 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
6961 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
6962 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
6963 default:
6964 gcc_unreachable ();
6967 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
6969 if (mode == QImode || mode == HImode)
6970 emit_move_insn (operands[1], gen_lowpart (mode, rval));
6972 x = gen_rtx_REG (CCmode, CC_REGNUM);
6973 x = gen_rtx_EQ (SImode, x, const0_rtx);
6974 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
6977 /* Split a compare and swap pattern. */
6979 void
6980 aarch64_split_compare_and_swap (rtx operands[])
6982 rtx rval, mem, oldval, newval, scratch;
6983 enum machine_mode mode;
6984 bool is_weak;
6985 rtx label1, label2, x, cond;
6987 rval = operands[0];
6988 mem = operands[1];
6989 oldval = operands[2];
6990 newval = operands[3];
6991 is_weak = (operands[4] != const0_rtx);
6992 scratch = operands[7];
6993 mode = GET_MODE (mem);
6995 label1 = NULL_RTX;
6996 if (!is_weak)
6998 label1 = gen_label_rtx ();
6999 emit_label (label1);
7001 label2 = gen_label_rtx ();
7003 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7005 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7006 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7007 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7008 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7009 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7011 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7013 if (!is_weak)
7015 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7016 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7017 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7018 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7020 else
7022 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7023 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7024 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7027 emit_label (label2);
7030 /* Split an atomic operation. */
7032 void
7033 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7034 rtx value, rtx model_rtx, rtx cond)
7036 enum machine_mode mode = GET_MODE (mem);
7037 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7038 rtx label, x;
7040 label = gen_label_rtx ();
7041 emit_label (label);
7043 if (new_out)
7044 new_out = gen_lowpart (wmode, new_out);
7045 if (old_out)
7046 old_out = gen_lowpart (wmode, old_out);
7047 else
7048 old_out = new_out;
7049 value = simplify_gen_subreg (wmode, value, mode, 0);
7051 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7053 switch (code)
7055 case SET:
7056 new_out = value;
7057 break;
7059 case NOT:
7060 x = gen_rtx_AND (wmode, old_out, value);
7061 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7062 x = gen_rtx_NOT (wmode, new_out);
7063 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7064 break;
7066 case MINUS:
7067 if (CONST_INT_P (value))
7069 value = GEN_INT (-INTVAL (value));
7070 code = PLUS;
7072 /* Fall through. */
7074 default:
7075 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7076 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7077 break;
7080 aarch64_emit_store_exclusive (mode, cond, mem,
7081 gen_lowpart (mode, new_out), model_rtx);
7083 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7084 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7085 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7086 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7089 static void
7090 aarch64_print_extension (void)
7092 const struct aarch64_option_extension *opt = NULL;
7094 for (opt = all_extensions; opt->name != NULL; opt++)
7095 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7096 asm_fprintf (asm_out_file, "+%s", opt->name);
7098 asm_fprintf (asm_out_file, "\n");
7101 static void
7102 aarch64_start_file (void)
7104 if (selected_arch)
7106 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7107 aarch64_print_extension ();
7109 else if (selected_cpu)
7111 asm_fprintf (asm_out_file, "\t.cpu %s", selected_cpu->name);
7112 aarch64_print_extension ();
7114 default_file_start();
7117 /* Target hook for c_mode_for_suffix. */
7118 static enum machine_mode
7119 aarch64_c_mode_for_suffix (char suffix)
7121 if (suffix == 'q')
7122 return TFmode;
7124 return VOIDmode;
7127 /* We can only represent floating point constants which will fit in
7128 "quarter-precision" values. These values are characterised by
7129 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7132 (-1)^s * (n/16) * 2^r
7134 Where:
7135 's' is the sign bit.
7136 'n' is an integer in the range 16 <= n <= 31.
7137 'r' is an integer in the range -3 <= r <= 4. */
7139 /* Return true iff X can be represented by a quarter-precision
7140 floating point immediate operand X. Note, we cannot represent 0.0. */
7141 bool
7142 aarch64_float_const_representable_p (rtx x)
7144 /* This represents our current view of how many bits
7145 make up the mantissa. */
7146 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7147 int exponent;
7148 unsigned HOST_WIDE_INT mantissa, mask;
7149 HOST_WIDE_INT m1, m2;
7150 REAL_VALUE_TYPE r, m;
7152 if (!CONST_DOUBLE_P (x))
7153 return false;
7155 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7157 /* We cannot represent infinities, NaNs or +/-zero. We won't
7158 know if we have +zero until we analyse the mantissa, but we
7159 can reject the other invalid values. */
7160 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7161 || REAL_VALUE_MINUS_ZERO (r))
7162 return false;
7164 /* Extract exponent. */
7165 r = real_value_abs (&r);
7166 exponent = REAL_EXP (&r);
7168 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7169 highest (sign) bit, with a fixed binary point at bit point_pos.
7170 m1 holds the low part of the mantissa, m2 the high part.
7171 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7172 bits for the mantissa, this can fail (low bits will be lost). */
7173 real_ldexp (&m, &r, point_pos - exponent);
7174 REAL_VALUE_TO_INT (&m1, &m2, m);
7176 /* If the low part of the mantissa has bits set we cannot represent
7177 the value. */
7178 if (m1 != 0)
7179 return false;
7180 /* We have rejected the lower HOST_WIDE_INT, so update our
7181 understanding of how many bits lie in the mantissa and
7182 look only at the high HOST_WIDE_INT. */
7183 mantissa = m2;
7184 point_pos -= HOST_BITS_PER_WIDE_INT;
7186 /* We can only represent values with a mantissa of the form 1.xxxx. */
7187 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7188 if ((mantissa & mask) != 0)
7189 return false;
7191 /* Having filtered unrepresentable values, we may now remove all
7192 but the highest 5 bits. */
7193 mantissa >>= point_pos - 5;
7195 /* We cannot represent the value 0.0, so reject it. This is handled
7196 elsewhere. */
7197 if (mantissa == 0)
7198 return false;
7200 /* Then, as bit 4 is always set, we can mask it off, leaving
7201 the mantissa in the range [0, 15]. */
7202 mantissa &= ~(1 << 4);
7203 gcc_assert (mantissa <= 15);
7205 /* GCC internally does not use IEEE754-like encoding (where normalized
7206 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7207 Our mantissa values are shifted 4 places to the left relative to
7208 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7209 by 5 places to correct for GCC's representation. */
7210 exponent = 5 - exponent;
7212 return (exponent >= 0 && exponent <= 7);
7215 char*
7216 aarch64_output_simd_mov_immediate (rtx const_vector,
7217 enum machine_mode mode,
7218 unsigned width)
7220 bool is_valid;
7221 static char templ[40];
7222 const char *mnemonic;
7223 unsigned int lane_count = 0;
7224 char element_char;
7226 struct simd_immediate_info info;
7228 /* This will return true to show const_vector is legal for use as either
7229 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7230 also update INFO to show how the immediate should be generated. */
7231 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7232 gcc_assert (is_valid);
7234 element_char = sizetochar (info.element_width);
7235 lane_count = width / info.element_width;
7237 mode = GET_MODE_INNER (mode);
7238 if (mode == SFmode || mode == DFmode)
7240 gcc_assert (info.shift == 0 && ! info.mvn);
7241 if (aarch64_float_const_zero_rtx_p (info.value))
7242 info.value = GEN_INT (0);
7243 else
7245 #define buf_size 20
7246 REAL_VALUE_TYPE r;
7247 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7248 char float_buf[buf_size] = {'\0'};
7249 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7250 #undef buf_size
7252 if (lane_count == 1)
7253 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7254 else
7255 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7256 lane_count, element_char, float_buf);
7257 return templ;
7261 mnemonic = info.mvn ? "mvni" : "movi";
7263 if (lane_count == 1)
7264 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7265 mnemonic, UINTVAL (info.value));
7266 else if (info.shift)
7267 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7268 ", lsl %d", mnemonic, lane_count, element_char,
7269 UINTVAL (info.value), info.shift);
7270 else
7271 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7272 mnemonic, lane_count, element_char, UINTVAL (info.value));
7273 return templ;
7276 char*
7277 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7278 enum machine_mode mode)
7280 enum machine_mode vmode;
7282 gcc_assert (!VECTOR_MODE_P (mode));
7283 vmode = aarch64_simd_container_mode (mode, 64);
7284 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7285 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7288 /* Split operands into moves from op[1] + op[2] into op[0]. */
7290 void
7291 aarch64_split_combinev16qi (rtx operands[3])
7293 unsigned int dest = REGNO (operands[0]);
7294 unsigned int src1 = REGNO (operands[1]);
7295 unsigned int src2 = REGNO (operands[2]);
7296 enum machine_mode halfmode = GET_MODE (operands[1]);
7297 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7298 rtx destlo, desthi;
7300 gcc_assert (halfmode == V16QImode);
7302 if (src1 == dest && src2 == dest + halfregs)
7304 /* No-op move. Can't split to nothing; emit something. */
7305 emit_note (NOTE_INSN_DELETED);
7306 return;
7309 /* Preserve register attributes for variable tracking. */
7310 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7311 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7312 GET_MODE_SIZE (halfmode));
7314 /* Special case of reversed high/low parts. */
7315 if (reg_overlap_mentioned_p (operands[2], destlo)
7316 && reg_overlap_mentioned_p (operands[1], desthi))
7318 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7319 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7320 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7322 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7324 /* Try to avoid unnecessary moves if part of the result
7325 is in the right place already. */
7326 if (src1 != dest)
7327 emit_move_insn (destlo, operands[1]);
7328 if (src2 != dest + halfregs)
7329 emit_move_insn (desthi, operands[2]);
7331 else
7333 if (src2 != dest + halfregs)
7334 emit_move_insn (desthi, operands[2]);
7335 if (src1 != dest)
7336 emit_move_insn (destlo, operands[1]);
7340 /* vec_perm support. */
7342 #define MAX_VECT_LEN 16
7344 struct expand_vec_perm_d
7346 rtx target, op0, op1;
7347 unsigned char perm[MAX_VECT_LEN];
7348 enum machine_mode vmode;
7349 unsigned char nelt;
7350 bool one_vector_p;
7351 bool testing_p;
7354 /* Generate a variable permutation. */
7356 static void
7357 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7359 enum machine_mode vmode = GET_MODE (target);
7360 bool one_vector_p = rtx_equal_p (op0, op1);
7362 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7363 gcc_checking_assert (GET_MODE (op0) == vmode);
7364 gcc_checking_assert (GET_MODE (op1) == vmode);
7365 gcc_checking_assert (GET_MODE (sel) == vmode);
7366 gcc_checking_assert (TARGET_SIMD);
7368 if (one_vector_p)
7370 if (vmode == V8QImode)
7372 /* Expand the argument to a V16QI mode by duplicating it. */
7373 rtx pair = gen_reg_rtx (V16QImode);
7374 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7375 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7377 else
7379 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7382 else
7384 rtx pair;
7386 if (vmode == V8QImode)
7388 pair = gen_reg_rtx (V16QImode);
7389 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7390 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7392 else
7394 pair = gen_reg_rtx (OImode);
7395 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7396 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7401 void
7402 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7404 enum machine_mode vmode = GET_MODE (target);
7405 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7406 bool one_vector_p = rtx_equal_p (op0, op1);
7407 rtx rmask[MAX_VECT_LEN], mask;
7409 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7411 /* The TBL instruction does not use a modulo index, so we must take care
7412 of that ourselves. */
7413 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7414 for (i = 0; i < nelt; ++i)
7415 rmask[i] = mask;
7416 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7417 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7419 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7422 /* Recognize patterns suitable for the TRN instructions. */
7423 static bool
7424 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7426 unsigned int i, odd, mask, nelt = d->nelt;
7427 rtx out, in0, in1, x;
7428 rtx (*gen) (rtx, rtx, rtx);
7429 enum machine_mode vmode = d->vmode;
7431 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7432 return false;
7434 /* Note that these are little-endian tests.
7435 We correct for big-endian later. */
7436 if (d->perm[0] == 0)
7437 odd = 0;
7438 else if (d->perm[0] == 1)
7439 odd = 1;
7440 else
7441 return false;
7442 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7444 for (i = 0; i < nelt; i += 2)
7446 if (d->perm[i] != i + odd)
7447 return false;
7448 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7449 return false;
7452 /* Success! */
7453 if (d->testing_p)
7454 return true;
7456 in0 = d->op0;
7457 in1 = d->op1;
7458 if (BYTES_BIG_ENDIAN)
7460 x = in0, in0 = in1, in1 = x;
7461 odd = !odd;
7463 out = d->target;
7465 if (odd)
7467 switch (vmode)
7469 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7470 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7471 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7472 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7473 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7474 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7475 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7476 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7477 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7478 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7479 default:
7480 return false;
7483 else
7485 switch (vmode)
7487 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7488 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7489 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7490 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7491 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7492 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7493 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7494 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7495 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7496 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7497 default:
7498 return false;
7502 emit_insn (gen (out, in0, in1));
7503 return true;
7506 /* Recognize patterns suitable for the UZP instructions. */
7507 static bool
7508 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7510 unsigned int i, odd, mask, nelt = d->nelt;
7511 rtx out, in0, in1, x;
7512 rtx (*gen) (rtx, rtx, rtx);
7513 enum machine_mode vmode = d->vmode;
7515 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7516 return false;
7518 /* Note that these are little-endian tests.
7519 We correct for big-endian later. */
7520 if (d->perm[0] == 0)
7521 odd = 0;
7522 else if (d->perm[0] == 1)
7523 odd = 1;
7524 else
7525 return false;
7526 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7528 for (i = 0; i < nelt; i++)
7530 unsigned elt = (i * 2 + odd) & mask;
7531 if (d->perm[i] != elt)
7532 return false;
7535 /* Success! */
7536 if (d->testing_p)
7537 return true;
7539 in0 = d->op0;
7540 in1 = d->op1;
7541 if (BYTES_BIG_ENDIAN)
7543 x = in0, in0 = in1, in1 = x;
7544 odd = !odd;
7546 out = d->target;
7548 if (odd)
7550 switch (vmode)
7552 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7553 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7554 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7555 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7556 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7557 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7558 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7559 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7560 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7561 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7562 default:
7563 return false;
7566 else
7568 switch (vmode)
7570 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7571 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7572 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7573 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7574 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7575 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7576 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7577 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7578 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7579 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7580 default:
7581 return false;
7585 emit_insn (gen (out, in0, in1));
7586 return true;
7589 /* Recognize patterns suitable for the ZIP instructions. */
7590 static bool
7591 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7593 unsigned int i, high, mask, nelt = d->nelt;
7594 rtx out, in0, in1, x;
7595 rtx (*gen) (rtx, rtx, rtx);
7596 enum machine_mode vmode = d->vmode;
7598 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7599 return false;
7601 /* Note that these are little-endian tests.
7602 We correct for big-endian later. */
7603 high = nelt / 2;
7604 if (d->perm[0] == high)
7605 /* Do Nothing. */
7607 else if (d->perm[0] == 0)
7608 high = 0;
7609 else
7610 return false;
7611 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7613 for (i = 0; i < nelt / 2; i++)
7615 unsigned elt = (i + high) & mask;
7616 if (d->perm[i * 2] != elt)
7617 return false;
7618 elt = (elt + nelt) & mask;
7619 if (d->perm[i * 2 + 1] != elt)
7620 return false;
7623 /* Success! */
7624 if (d->testing_p)
7625 return true;
7627 in0 = d->op0;
7628 in1 = d->op1;
7629 if (BYTES_BIG_ENDIAN)
7631 x = in0, in0 = in1, in1 = x;
7632 high = !high;
7634 out = d->target;
7636 if (high)
7638 switch (vmode)
7640 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7641 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7642 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7643 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7644 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7645 case V2SImode: gen = gen_aarch64_zip2v2si; break;
7646 case V2DImode: gen = gen_aarch64_zip2v2di; break;
7647 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
7648 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
7649 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
7650 default:
7651 return false;
7654 else
7656 switch (vmode)
7658 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
7659 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
7660 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
7661 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
7662 case V4SImode: gen = gen_aarch64_zip1v4si; break;
7663 case V2SImode: gen = gen_aarch64_zip1v2si; break;
7664 case V2DImode: gen = gen_aarch64_zip1v2di; break;
7665 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
7666 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
7667 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
7668 default:
7669 return false;
7673 emit_insn (gen (out, in0, in1));
7674 return true;
7677 static bool
7678 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
7680 rtx rperm[MAX_VECT_LEN], sel;
7681 enum machine_mode vmode = d->vmode;
7682 unsigned int i, nelt = d->nelt;
7684 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
7685 numbering of elements for big-endian, we must reverse the order. */
7686 if (BYTES_BIG_ENDIAN)
7687 return false;
7689 if (d->testing_p)
7690 return true;
7692 /* Generic code will try constant permutation twice. Once with the
7693 original mode and again with the elements lowered to QImode.
7694 So wait and don't do the selector expansion ourselves. */
7695 if (vmode != V8QImode && vmode != V16QImode)
7696 return false;
7698 for (i = 0; i < nelt; ++i)
7699 rperm[i] = GEN_INT (d->perm[i]);
7700 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
7701 sel = force_reg (vmode, sel);
7703 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
7704 return true;
7707 static bool
7708 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
7710 /* The pattern matching functions above are written to look for a small
7711 number to begin the sequence (0, 1, N/2). If we begin with an index
7712 from the second operand, we can swap the operands. */
7713 if (d->perm[0] >= d->nelt)
7715 unsigned i, nelt = d->nelt;
7716 rtx x;
7718 for (i = 0; i < nelt; ++i)
7719 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
7721 x = d->op0;
7722 d->op0 = d->op1;
7723 d->op1 = x;
7726 if (TARGET_SIMD)
7728 if (aarch64_evpc_zip (d))
7729 return true;
7730 else if (aarch64_evpc_uzp (d))
7731 return true;
7732 else if (aarch64_evpc_trn (d))
7733 return true;
7734 return aarch64_evpc_tbl (d);
7736 return false;
7739 /* Expand a vec_perm_const pattern. */
7741 bool
7742 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
7744 struct expand_vec_perm_d d;
7745 int i, nelt, which;
7747 d.target = target;
7748 d.op0 = op0;
7749 d.op1 = op1;
7751 d.vmode = GET_MODE (target);
7752 gcc_assert (VECTOR_MODE_P (d.vmode));
7753 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7754 d.testing_p = false;
7756 for (i = which = 0; i < nelt; ++i)
7758 rtx e = XVECEXP (sel, 0, i);
7759 int ei = INTVAL (e) & (2 * nelt - 1);
7760 which |= (ei < nelt ? 1 : 2);
7761 d.perm[i] = ei;
7764 switch (which)
7766 default:
7767 gcc_unreachable ();
7769 case 3:
7770 d.one_vector_p = false;
7771 if (!rtx_equal_p (op0, op1))
7772 break;
7774 /* The elements of PERM do not suggest that only the first operand
7775 is used, but both operands are identical. Allow easier matching
7776 of the permutation by folding the permutation into the single
7777 input vector. */
7778 /* Fall Through. */
7779 case 2:
7780 for (i = 0; i < nelt; ++i)
7781 d.perm[i] &= nelt - 1;
7782 d.op0 = op1;
7783 d.one_vector_p = true;
7784 break;
7786 case 1:
7787 d.op1 = op0;
7788 d.one_vector_p = true;
7789 break;
7792 return aarch64_expand_vec_perm_const_1 (&d);
7795 static bool
7796 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
7797 const unsigned char *sel)
7799 struct expand_vec_perm_d d;
7800 unsigned int i, nelt, which;
7801 bool ret;
7803 d.vmode = vmode;
7804 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
7805 d.testing_p = true;
7806 memcpy (d.perm, sel, nelt);
7808 /* Calculate whether all elements are in one vector. */
7809 for (i = which = 0; i < nelt; ++i)
7811 unsigned char e = d.perm[i];
7812 gcc_assert (e < 2 * nelt);
7813 which |= (e < nelt ? 1 : 2);
7816 /* If all elements are from the second vector, reindex as if from the
7817 first vector. */
7818 if (which == 2)
7819 for (i = 0; i < nelt; ++i)
7820 d.perm[i] -= nelt;
7822 /* Check whether the mask can be applied to a single vector. */
7823 d.one_vector_p = (which != 3);
7825 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
7826 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
7827 if (!d.one_vector_p)
7828 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
7830 start_sequence ();
7831 ret = aarch64_expand_vec_perm_const_1 (&d);
7832 end_sequence ();
7834 return ret;
7837 #undef TARGET_ADDRESS_COST
7838 #define TARGET_ADDRESS_COST aarch64_address_cost
7840 /* This hook will determines whether unnamed bitfields affect the alignment
7841 of the containing structure. The hook returns true if the structure
7842 should inherit the alignment requirements of an unnamed bitfield's
7843 type. */
7844 #undef TARGET_ALIGN_ANON_BITFIELD
7845 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
7847 #undef TARGET_ASM_ALIGNED_DI_OP
7848 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
7850 #undef TARGET_ASM_ALIGNED_HI_OP
7851 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
7853 #undef TARGET_ASM_ALIGNED_SI_OP
7854 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
7856 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7857 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
7858 hook_bool_const_tree_hwi_hwi_const_tree_true
7860 #undef TARGET_ASM_FILE_START
7861 #define TARGET_ASM_FILE_START aarch64_start_file
7863 #undef TARGET_ASM_OUTPUT_MI_THUNK
7864 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
7866 #undef TARGET_ASM_SELECT_RTX_SECTION
7867 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
7869 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
7870 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
7872 #undef TARGET_BUILD_BUILTIN_VA_LIST
7873 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
7875 #undef TARGET_CALLEE_COPIES
7876 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
7878 #undef TARGET_CAN_ELIMINATE
7879 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
7881 #undef TARGET_CANNOT_FORCE_CONST_MEM
7882 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
7884 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7885 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
7887 /* Only the least significant bit is used for initialization guard
7888 variables. */
7889 #undef TARGET_CXX_GUARD_MASK_BIT
7890 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
7892 #undef TARGET_C_MODE_FOR_SUFFIX
7893 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
7895 #ifdef TARGET_BIG_ENDIAN_DEFAULT
7896 #undef TARGET_DEFAULT_TARGET_FLAGS
7897 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
7898 #endif
7900 #undef TARGET_CLASS_MAX_NREGS
7901 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
7903 #undef TARGET_BUILTIN_DECL
7904 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
7906 #undef TARGET_EXPAND_BUILTIN
7907 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
7909 #undef TARGET_EXPAND_BUILTIN_VA_START
7910 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
7912 #undef TARGET_FOLD_BUILTIN
7913 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
7915 #undef TARGET_FUNCTION_ARG
7916 #define TARGET_FUNCTION_ARG aarch64_function_arg
7918 #undef TARGET_FUNCTION_ARG_ADVANCE
7919 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
7921 #undef TARGET_FUNCTION_ARG_BOUNDARY
7922 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
7924 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7925 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
7927 #undef TARGET_FUNCTION_VALUE
7928 #define TARGET_FUNCTION_VALUE aarch64_function_value
7930 #undef TARGET_FUNCTION_VALUE_REGNO_P
7931 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
7933 #undef TARGET_FRAME_POINTER_REQUIRED
7934 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
7936 #undef TARGET_GIMPLE_FOLD_BUILTIN
7937 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
7939 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7940 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
7942 #undef TARGET_INIT_BUILTINS
7943 #define TARGET_INIT_BUILTINS aarch64_init_builtins
7945 #undef TARGET_LEGITIMATE_ADDRESS_P
7946 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
7948 #undef TARGET_LEGITIMATE_CONSTANT_P
7949 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
7951 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7952 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
7954 #undef TARGET_MANGLE_TYPE
7955 #define TARGET_MANGLE_TYPE aarch64_mangle_type
7957 #undef TARGET_MEMORY_MOVE_COST
7958 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
7960 #undef TARGET_MUST_PASS_IN_STACK
7961 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7963 /* This target hook should return true if accesses to volatile bitfields
7964 should use the narrowest mode possible. It should return false if these
7965 accesses should use the bitfield container type. */
7966 #undef TARGET_NARROW_VOLATILE_BITFIELD
7967 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
7969 #undef TARGET_OPTION_OVERRIDE
7970 #define TARGET_OPTION_OVERRIDE aarch64_override_options
7972 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
7973 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
7974 aarch64_override_options_after_change
7976 #undef TARGET_PASS_BY_REFERENCE
7977 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
7979 #undef TARGET_PREFERRED_RELOAD_CLASS
7980 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
7982 #undef TARGET_SECONDARY_RELOAD
7983 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
7985 #undef TARGET_SHIFT_TRUNCATION_MASK
7986 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
7988 #undef TARGET_SETUP_INCOMING_VARARGS
7989 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
7991 #undef TARGET_STRUCT_VALUE_RTX
7992 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
7994 #undef TARGET_REGISTER_MOVE_COST
7995 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
7997 #undef TARGET_RETURN_IN_MEMORY
7998 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8000 #undef TARGET_RETURN_IN_MSB
8001 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8003 #undef TARGET_RTX_COSTS
8004 #define TARGET_RTX_COSTS aarch64_rtx_costs
8006 #undef TARGET_TRAMPOLINE_INIT
8007 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8009 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8010 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8012 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8013 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8015 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8016 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8018 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8019 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8021 #undef TARGET_VECTORIZE_BUILTINS
8022 #define TARGET_VECTORIZE_BUILTINS
8024 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8025 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8026 aarch64_builtin_vectorized_function
8028 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8029 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8030 aarch64_autovectorize_vector_sizes
8032 /* Section anchor support. */
8034 #undef TARGET_MIN_ANCHOR_OFFSET
8035 #define TARGET_MIN_ANCHOR_OFFSET -256
8037 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8038 byte offset; we can do much more for larger data types, but have no way
8039 to determine the size of the access. We assume accesses are aligned. */
8040 #undef TARGET_MAX_ANCHOR_OFFSET
8041 #define TARGET_MAX_ANCHOR_OFFSET 4095
8043 #undef TARGET_VECTOR_ALIGNMENT
8044 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8046 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8047 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8048 aarch64_simd_vector_alignment_reachable
8050 /* vec_perm support. */
8052 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8053 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8054 aarch64_vectorize_vec_perm_const_ok
8057 #undef TARGET_FIXED_CONDITION_CODE_REGS
8058 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8060 struct gcc_target targetm = TARGET_INITIALIZER;
8062 #include "gt-aarch64.h"