PR target/9744
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blob3b1f6b510491e0eba79f0b87fc62f2105da6c355
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
67 /* Defined for convenience. */
68 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
70 /* Classifies an address.
72 ADDRESS_REG_IMM
73 A simple base register plus immediate offset.
75 ADDRESS_REG_WB
76 A base register indexed by immediate offset with writeback.
78 ADDRESS_REG_REG
79 A base register indexed by (optionally scaled) register.
81 ADDRESS_REG_UXTW
82 A base register indexed by (optionally scaled) zero-extended register.
84 ADDRESS_REG_SXTW
85 A base register indexed by (optionally scaled) sign-extended register.
87 ADDRESS_LO_SUM
88 A LO_SUM rtx with a base register and "LO12" symbol relocation.
90 ADDRESS_SYMBOLIC:
91 A constant symbolic address, in pc-relative literal pool. */
93 enum aarch64_address_type {
94 ADDRESS_REG_IMM,
95 ADDRESS_REG_WB,
96 ADDRESS_REG_REG,
97 ADDRESS_REG_UXTW,
98 ADDRESS_REG_SXTW,
99 ADDRESS_LO_SUM,
100 ADDRESS_SYMBOLIC
103 struct aarch64_address_info {
104 enum aarch64_address_type type;
105 rtx base;
106 rtx offset;
107 int shift;
108 enum aarch64_symbol_type symbol_type;
111 struct simd_immediate_info
113 rtx value;
114 int shift;
115 int element_width;
116 bool mvn;
117 bool msl;
120 /* The current code model. */
121 enum aarch64_code_model aarch64_cmodel;
123 #ifdef HAVE_AS_TLS
124 #undef TARGET_HAVE_TLS
125 #define TARGET_HAVE_TLS 1
126 #endif
128 static bool aarch64_lra_p (void);
129 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
130 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
131 const_tree,
132 enum machine_mode *, int *,
133 bool *);
134 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
135 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_override_options_after_change (void);
137 static bool aarch64_vector_mode_supported_p (enum machine_mode);
138 static unsigned bit_count (unsigned HOST_WIDE_INT);
139 static bool aarch64_const_vec_all_same_int_p (rtx,
140 HOST_WIDE_INT, HOST_WIDE_INT);
142 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
143 const unsigned char *sel);
145 /* The processor for which instructions should be scheduled. */
146 enum aarch64_processor aarch64_tune = cortexa53;
148 /* The current tuning set. */
149 const struct tune_params *aarch64_tune_params;
151 /* Mask to specify which instructions we are allowed to generate. */
152 unsigned long aarch64_isa_flags = 0;
154 /* Mask to specify which instruction scheduling options should be used. */
155 unsigned long aarch64_tune_flags = 0;
157 /* Tuning parameters. */
159 #if HAVE_DESIGNATED_INITIALIZERS
160 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
161 #else
162 #define NAMED_PARAM(NAME, VAL) (VAL)
163 #endif
165 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
166 __extension__
167 #endif
169 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
170 __extension__
171 #endif
172 static const struct cpu_addrcost_table generic_addrcost_table =
174 NAMED_PARAM (pre_modify, 0),
175 NAMED_PARAM (post_modify, 0),
176 NAMED_PARAM (register_offset, 0),
177 NAMED_PARAM (register_extend, 0),
178 NAMED_PARAM (imm_offset, 0)
181 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
182 __extension__
183 #endif
184 static const struct cpu_regmove_cost generic_regmove_cost =
186 NAMED_PARAM (GP2GP, 1),
187 NAMED_PARAM (GP2FP, 2),
188 NAMED_PARAM (FP2GP, 2),
189 /* We currently do not provide direct support for TFmode Q->Q move.
190 Therefore we need to raise the cost above 2 in order to have
191 reload handle the situation. */
192 NAMED_PARAM (FP2FP, 4)
195 /* Generic costs for vector insn classes. */
196 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
197 __extension__
198 #endif
199 static const struct cpu_vector_cost generic_vector_cost =
201 NAMED_PARAM (scalar_stmt_cost, 1),
202 NAMED_PARAM (scalar_load_cost, 1),
203 NAMED_PARAM (scalar_store_cost, 1),
204 NAMED_PARAM (vec_stmt_cost, 1),
205 NAMED_PARAM (vec_to_scalar_cost, 1),
206 NAMED_PARAM (scalar_to_vec_cost, 1),
207 NAMED_PARAM (vec_align_load_cost, 1),
208 NAMED_PARAM (vec_unalign_load_cost, 1),
209 NAMED_PARAM (vec_unalign_store_cost, 1),
210 NAMED_PARAM (vec_store_cost, 1),
211 NAMED_PARAM (cond_taken_branch_cost, 3),
212 NAMED_PARAM (cond_not_taken_branch_cost, 1)
215 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
216 __extension__
217 #endif
218 static const struct tune_params generic_tunings =
220 &generic_extra_costs,
221 &generic_addrcost_table,
222 &generic_regmove_cost,
223 &generic_vector_cost,
224 NAMED_PARAM (memmov_cost, 4)
227 static const struct tune_params cortexa53_tunings =
229 &cortexa53_extra_costs,
230 &generic_addrcost_table,
231 &generic_regmove_cost,
232 &generic_vector_cost,
233 NAMED_PARAM (memmov_cost, 4)
236 /* A processor implementing AArch64. */
237 struct processor
239 const char *const name;
240 enum aarch64_processor core;
241 const char *arch;
242 const unsigned long flags;
243 const struct tune_params *const tune;
246 /* Processor cores implementing AArch64. */
247 static const struct processor all_cores[] =
249 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
250 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
251 #include "aarch64-cores.def"
252 #undef AARCH64_CORE
253 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
254 {NULL, aarch64_none, NULL, 0, NULL}
257 /* Architectures implementing AArch64. */
258 static const struct processor all_architectures[] =
260 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
261 {NAME, CORE, #ARCH, FLAGS, NULL},
262 #include "aarch64-arches.def"
263 #undef AARCH64_ARCH
264 {NULL, aarch64_none, NULL, 0, NULL}
267 /* Target specification. These are populated as commandline arguments
268 are processed, or NULL if not specified. */
269 static const struct processor *selected_arch;
270 static const struct processor *selected_cpu;
271 static const struct processor *selected_tune;
273 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
275 /* An ISA extension in the co-processor and main instruction set space. */
276 struct aarch64_option_extension
278 const char *const name;
279 const unsigned long flags_on;
280 const unsigned long flags_off;
283 /* ISA extensions in AArch64. */
284 static const struct aarch64_option_extension all_extensions[] =
286 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
287 {NAME, FLAGS_ON, FLAGS_OFF},
288 #include "aarch64-option-extensions.def"
289 #undef AARCH64_OPT_EXTENSION
290 {NULL, 0, 0}
293 /* Used to track the size of an address when generating a pre/post
294 increment address. */
295 static enum machine_mode aarch64_memory_reference_mode;
297 /* Used to force GTY into this file. */
298 static GTY(()) int gty_dummy;
300 /* A table of valid AArch64 "bitmask immediate" values for
301 logical instructions. */
303 #define AARCH64_NUM_BITMASKS 5334
304 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
306 /* Did we set flag_omit_frame_pointer just so
307 aarch64_frame_pointer_required would be called? */
308 static bool faked_omit_frame_pointer;
310 typedef enum aarch64_cond_code
312 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
313 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
314 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
316 aarch64_cc;
318 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
320 /* The condition codes of the processor, and the inverse function. */
321 static const char * const aarch64_condition_codes[] =
323 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
324 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
327 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
328 unsigned
329 aarch64_dbx_register_number (unsigned regno)
331 if (GP_REGNUM_P (regno))
332 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
333 else if (regno == SP_REGNUM)
334 return AARCH64_DWARF_SP;
335 else if (FP_REGNUM_P (regno))
336 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
338 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
339 equivalent DWARF register. */
340 return DWARF_FRAME_REGISTERS;
343 /* Return TRUE if MODE is any of the large INT modes. */
344 static bool
345 aarch64_vect_struct_mode_p (enum machine_mode mode)
347 return mode == OImode || mode == CImode || mode == XImode;
350 /* Return TRUE if MODE is any of the vector modes. */
351 static bool
352 aarch64_vector_mode_p (enum machine_mode mode)
354 return aarch64_vector_mode_supported_p (mode)
355 || aarch64_vect_struct_mode_p (mode);
358 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
359 static bool
360 aarch64_array_mode_supported_p (enum machine_mode mode,
361 unsigned HOST_WIDE_INT nelems)
363 if (TARGET_SIMD
364 && AARCH64_VALID_SIMD_QREG_MODE (mode)
365 && (nelems >= 2 && nelems <= 4))
366 return true;
368 return false;
371 /* Implement HARD_REGNO_NREGS. */
374 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
376 switch (aarch64_regno_regclass (regno))
378 case FP_REGS:
379 case FP_LO_REGS:
380 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
381 default:
382 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
384 gcc_unreachable ();
387 /* Implement HARD_REGNO_MODE_OK. */
390 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
392 if (GET_MODE_CLASS (mode) == MODE_CC)
393 return regno == CC_REGNUM;
395 if (regno == SP_REGNUM)
396 /* The purpose of comparing with ptr_mode is to support the
397 global register variable associated with the stack pointer
398 register via the syntax of asm ("wsp") in ILP32. */
399 return mode == Pmode || mode == ptr_mode;
401 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
402 return mode == Pmode;
404 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
405 return 1;
407 if (FP_REGNUM_P (regno))
409 if (aarch64_vect_struct_mode_p (mode))
410 return
411 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
412 else
413 return 1;
416 return 0;
419 /* Return true if calls to DECL should be treated as
420 long-calls (ie called via a register). */
421 static bool
422 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
424 return false;
427 /* Return true if calls to symbol-ref SYM should be treated as
428 long-calls (ie called via a register). */
429 bool
430 aarch64_is_long_call_p (rtx sym)
432 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
435 /* Return true if the offsets to a zero/sign-extract operation
436 represent an expression that matches an extend operation. The
437 operands represent the paramters from
439 (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
440 bool
441 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
442 rtx extract_imm)
444 HOST_WIDE_INT mult_val, extract_val;
446 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
447 return false;
449 mult_val = INTVAL (mult_imm);
450 extract_val = INTVAL (extract_imm);
452 if (extract_val > 8
453 && extract_val < GET_MODE_BITSIZE (mode)
454 && exact_log2 (extract_val & ~7) > 0
455 && (extract_val & 7) <= 4
456 && mult_val == (1 << (extract_val & 7)))
457 return true;
459 return false;
462 /* Emit an insn that's a simple single-set. Both the operands must be
463 known to be valid. */
464 inline static rtx
465 emit_set_insn (rtx x, rtx y)
467 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
470 /* X and Y are two things to compare using CODE. Emit the compare insn and
471 return the rtx for register 0 in the proper mode. */
473 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
475 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
476 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
478 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
479 return cc_reg;
482 /* Build the SYMBOL_REF for __tls_get_addr. */
484 static GTY(()) rtx tls_get_addr_libfunc;
487 aarch64_tls_get_addr (void)
489 if (!tls_get_addr_libfunc)
490 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
491 return tls_get_addr_libfunc;
494 /* Return the TLS model to use for ADDR. */
496 static enum tls_model
497 tls_symbolic_operand_type (rtx addr)
499 enum tls_model tls_kind = TLS_MODEL_NONE;
500 rtx sym, addend;
502 if (GET_CODE (addr) == CONST)
504 split_const (addr, &sym, &addend);
505 if (GET_CODE (sym) == SYMBOL_REF)
506 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
508 else if (GET_CODE (addr) == SYMBOL_REF)
509 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
511 return tls_kind;
514 /* We'll allow lo_sum's in addresses in our legitimate addresses
515 so that combine would take care of combining addresses where
516 necessary, but for generation purposes, we'll generate the address
517 as :
518 RTL Absolute
519 tmp = hi (symbol_ref); adrp x1, foo
520 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
523 PIC TLS
524 adrp x1, :got:foo adrp tmp, :tlsgd:foo
525 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
526 bl __tls_get_addr
529 Load TLS symbol, depending on TLS mechanism and TLS access model.
531 Global Dynamic - Traditional TLS:
532 adrp tmp, :tlsgd:imm
533 add dest, tmp, #:tlsgd_lo12:imm
534 bl __tls_get_addr
536 Global Dynamic - TLS Descriptors:
537 adrp dest, :tlsdesc:imm
538 ldr tmp, [dest, #:tlsdesc_lo12:imm]
539 add dest, dest, #:tlsdesc_lo12:imm
540 blr tmp
541 mrs tp, tpidr_el0
542 add dest, dest, tp
544 Initial Exec:
545 mrs tp, tpidr_el0
546 adrp tmp, :gottprel:imm
547 ldr dest, [tmp, #:gottprel_lo12:imm]
548 add dest, dest, tp
550 Local Exec:
551 mrs tp, tpidr_el0
552 add t0, tp, #:tprel_hi12:imm
553 add t0, #:tprel_lo12_nc:imm
556 static void
557 aarch64_load_symref_appropriately (rtx dest, rtx imm,
558 enum aarch64_symbol_type type)
560 switch (type)
562 case SYMBOL_SMALL_ABSOLUTE:
564 /* In ILP32, the mode of dest can be either SImode or DImode. */
565 rtx tmp_reg = dest;
566 enum machine_mode mode = GET_MODE (dest);
568 gcc_assert (mode == Pmode || mode == ptr_mode);
570 if (can_create_pseudo_p ())
571 tmp_reg = gen_reg_rtx (mode);
573 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
574 emit_insn (gen_add_losym (dest, tmp_reg, imm));
575 return;
578 case SYMBOL_TINY_ABSOLUTE:
579 emit_insn (gen_rtx_SET (Pmode, dest, imm));
580 return;
582 case SYMBOL_SMALL_GOT:
584 /* In ILP32, the mode of dest can be either SImode or DImode,
585 while the got entry is always of SImode size. The mode of
586 dest depends on how dest is used: if dest is assigned to a
587 pointer (e.g. in the memory), it has SImode; it may have
588 DImode if dest is dereferenced to access the memeory.
589 This is why we have to handle three different ldr_got_small
590 patterns here (two patterns for ILP32). */
591 rtx tmp_reg = dest;
592 enum machine_mode mode = GET_MODE (dest);
594 if (can_create_pseudo_p ())
595 tmp_reg = gen_reg_rtx (mode);
597 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
598 if (mode == ptr_mode)
600 if (mode == DImode)
601 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
602 else
603 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
605 else
607 gcc_assert (mode == Pmode);
608 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
611 return;
614 case SYMBOL_SMALL_TLSGD:
616 rtx insns;
617 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
619 start_sequence ();
620 emit_call_insn (gen_tlsgd_small (result, imm));
621 insns = get_insns ();
622 end_sequence ();
624 RTL_CONST_CALL_P (insns) = 1;
625 emit_libcall_block (insns, dest, result, imm);
626 return;
629 case SYMBOL_SMALL_TLSDESC:
631 rtx x0 = gen_rtx_REG (Pmode, R0_REGNUM);
632 rtx tp;
634 emit_insn (gen_tlsdesc_small (imm));
635 tp = aarch64_load_tp (NULL);
636 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, x0)));
637 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
638 return;
641 case SYMBOL_SMALL_GOTTPREL:
643 rtx tmp_reg = gen_reg_rtx (Pmode);
644 rtx tp = aarch64_load_tp (NULL);
645 emit_insn (gen_tlsie_small (tmp_reg, imm));
646 emit_insn (gen_rtx_SET (Pmode, dest, gen_rtx_PLUS (Pmode, tp, tmp_reg)));
647 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
648 return;
651 case SYMBOL_SMALL_TPREL:
653 rtx tp = aarch64_load_tp (NULL);
654 emit_insn (gen_tlsle_small (dest, tp, imm));
655 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
656 return;
659 case SYMBOL_TINY_GOT:
660 emit_insn (gen_ldr_got_tiny (dest, imm));
661 return;
663 default:
664 gcc_unreachable ();
668 /* Emit a move from SRC to DEST. Assume that the move expanders can
669 handle all moves if !can_create_pseudo_p (). The distinction is
670 important because, unlike emit_move_insn, the move expanders know
671 how to force Pmode objects into the constant pool even when the
672 constant pool address is not itself legitimate. */
673 static rtx
674 aarch64_emit_move (rtx dest, rtx src)
676 return (can_create_pseudo_p ()
677 ? emit_move_insn (dest, src)
678 : emit_move_insn_1 (dest, src));
681 void
682 aarch64_split_128bit_move (rtx dst, rtx src)
684 rtx low_dst;
686 enum machine_mode src_mode = GET_MODE (src);
687 enum machine_mode dst_mode = GET_MODE (dst);
688 int src_regno = REGNO (src);
689 int dst_regno = REGNO (dst);
691 gcc_assert (dst_mode == TImode || dst_mode == TFmode);
693 if (REG_P (dst) && REG_P (src))
695 gcc_assert (src_mode == TImode || src_mode == TFmode);
697 /* Handle r -> w, w -> r. */
698 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
700 switch (src_mode) {
701 case TImode:
702 emit_insn
703 (gen_aarch64_movtilow_di (dst, gen_lowpart (word_mode, src)));
704 emit_insn
705 (gen_aarch64_movtihigh_di (dst, gen_highpart (word_mode, src)));
706 return;
707 case TFmode:
708 emit_insn
709 (gen_aarch64_movtflow_di (dst, gen_lowpart (word_mode, src)));
710 emit_insn
711 (gen_aarch64_movtfhigh_di (dst, gen_highpart (word_mode, src)));
712 return;
713 default:
714 gcc_unreachable ();
717 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
719 switch (src_mode) {
720 case TImode:
721 emit_insn
722 (gen_aarch64_movdi_tilow (gen_lowpart (word_mode, dst), src));
723 emit_insn
724 (gen_aarch64_movdi_tihigh (gen_highpart (word_mode, dst), src));
725 return;
726 case TFmode:
727 emit_insn
728 (gen_aarch64_movdi_tflow (gen_lowpart (word_mode, dst), src));
729 emit_insn
730 (gen_aarch64_movdi_tfhigh (gen_highpart (word_mode, dst), src));
731 return;
732 default:
733 gcc_unreachable ();
736 /* Fall through to r -> r cases. */
739 switch (dst_mode) {
740 case TImode:
741 low_dst = gen_lowpart (word_mode, dst);
742 if (REG_P (low_dst)
743 && reg_overlap_mentioned_p (low_dst, src))
745 aarch64_emit_move (gen_highpart (word_mode, dst),
746 gen_highpart_mode (word_mode, TImode, src));
747 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
749 else
751 aarch64_emit_move (low_dst, gen_lowpart (word_mode, src));
752 aarch64_emit_move (gen_highpart (word_mode, dst),
753 gen_highpart_mode (word_mode, TImode, src));
755 return;
756 case TFmode:
757 emit_move_insn (gen_rtx_REG (DFmode, dst_regno),
758 gen_rtx_REG (DFmode, src_regno));
759 emit_move_insn (gen_rtx_REG (DFmode, dst_regno + 1),
760 gen_rtx_REG (DFmode, src_regno + 1));
761 return;
762 default:
763 gcc_unreachable ();
767 bool
768 aarch64_split_128bit_move_p (rtx dst, rtx src)
770 return (! REG_P (src)
771 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
774 /* Split a complex SIMD combine. */
776 void
777 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
779 enum machine_mode src_mode = GET_MODE (src1);
780 enum machine_mode dst_mode = GET_MODE (dst);
782 gcc_assert (VECTOR_MODE_P (dst_mode));
784 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
786 rtx (*gen) (rtx, rtx, rtx);
788 switch (src_mode)
790 case V8QImode:
791 gen = gen_aarch64_simd_combinev8qi;
792 break;
793 case V4HImode:
794 gen = gen_aarch64_simd_combinev4hi;
795 break;
796 case V2SImode:
797 gen = gen_aarch64_simd_combinev2si;
798 break;
799 case V2SFmode:
800 gen = gen_aarch64_simd_combinev2sf;
801 break;
802 case DImode:
803 gen = gen_aarch64_simd_combinedi;
804 break;
805 case DFmode:
806 gen = gen_aarch64_simd_combinedf;
807 break;
808 default:
809 gcc_unreachable ();
812 emit_insn (gen (dst, src1, src2));
813 return;
817 /* Split a complex SIMD move. */
819 void
820 aarch64_split_simd_move (rtx dst, rtx src)
822 enum machine_mode src_mode = GET_MODE (src);
823 enum machine_mode dst_mode = GET_MODE (dst);
825 gcc_assert (VECTOR_MODE_P (dst_mode));
827 if (REG_P (dst) && REG_P (src))
829 rtx (*gen) (rtx, rtx);
831 gcc_assert (VECTOR_MODE_P (src_mode));
833 switch (src_mode)
835 case V16QImode:
836 gen = gen_aarch64_split_simd_movv16qi;
837 break;
838 case V8HImode:
839 gen = gen_aarch64_split_simd_movv8hi;
840 break;
841 case V4SImode:
842 gen = gen_aarch64_split_simd_movv4si;
843 break;
844 case V2DImode:
845 gen = gen_aarch64_split_simd_movv2di;
846 break;
847 case V4SFmode:
848 gen = gen_aarch64_split_simd_movv4sf;
849 break;
850 case V2DFmode:
851 gen = gen_aarch64_split_simd_movv2df;
852 break;
853 default:
854 gcc_unreachable ();
857 emit_insn (gen (dst, src));
858 return;
862 static rtx
863 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
865 if (can_create_pseudo_p ())
866 return force_reg (mode, value);
867 else
869 x = aarch64_emit_move (x, value);
870 return x;
875 static rtx
876 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
878 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
880 rtx high;
881 /* Load the full offset into a register. This
882 might be improvable in the future. */
883 high = GEN_INT (offset);
884 offset = 0;
885 high = aarch64_force_temporary (mode, temp, high);
886 reg = aarch64_force_temporary (mode, temp,
887 gen_rtx_PLUS (mode, high, reg));
889 return plus_constant (mode, reg, offset);
892 void
893 aarch64_expand_mov_immediate (rtx dest, rtx imm)
895 enum machine_mode mode = GET_MODE (dest);
896 unsigned HOST_WIDE_INT mask;
897 int i;
898 bool first;
899 unsigned HOST_WIDE_INT val;
900 bool subtargets;
901 rtx subtarget;
902 int one_match, zero_match;
904 gcc_assert (mode == SImode || mode == DImode);
906 /* Check on what type of symbol it is. */
907 if (GET_CODE (imm) == SYMBOL_REF
908 || GET_CODE (imm) == LABEL_REF
909 || GET_CODE (imm) == CONST)
911 rtx mem, base, offset;
912 enum aarch64_symbol_type sty;
914 /* If we have (const (plus symbol offset)), separate out the offset
915 before we start classifying the symbol. */
916 split_const (imm, &base, &offset);
918 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
919 switch (sty)
921 case SYMBOL_FORCE_TO_MEM:
922 if (offset != const0_rtx
923 && targetm.cannot_force_const_mem (mode, imm))
925 gcc_assert(can_create_pseudo_p ());
926 base = aarch64_force_temporary (mode, dest, base);
927 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
928 aarch64_emit_move (dest, base);
929 return;
931 mem = force_const_mem (ptr_mode, imm);
932 gcc_assert (mem);
933 if (mode != ptr_mode)
934 mem = gen_rtx_ZERO_EXTEND (mode, mem);
935 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
936 return;
938 case SYMBOL_SMALL_TLSGD:
939 case SYMBOL_SMALL_TLSDESC:
940 case SYMBOL_SMALL_GOTTPREL:
941 case SYMBOL_SMALL_GOT:
942 case SYMBOL_TINY_GOT:
943 if (offset != const0_rtx)
945 gcc_assert(can_create_pseudo_p ());
946 base = aarch64_force_temporary (mode, dest, base);
947 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
948 aarch64_emit_move (dest, base);
949 return;
951 /* FALLTHRU */
953 case SYMBOL_SMALL_TPREL:
954 case SYMBOL_SMALL_ABSOLUTE:
955 case SYMBOL_TINY_ABSOLUTE:
956 aarch64_load_symref_appropriately (dest, imm, sty);
957 return;
959 default:
960 gcc_unreachable ();
964 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
966 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
967 return;
970 if (!CONST_INT_P (imm))
972 if (GET_CODE (imm) == HIGH)
973 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
974 else
976 rtx mem = force_const_mem (mode, imm);
977 gcc_assert (mem);
978 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
981 return;
984 if (mode == SImode)
986 /* We know we can't do this in 1 insn, and we must be able to do it
987 in two; so don't mess around looking for sequences that don't buy
988 us anything. */
989 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
990 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
991 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
992 return;
995 /* Remaining cases are all for DImode. */
997 val = INTVAL (imm);
998 subtargets = optimize && can_create_pseudo_p ();
1000 one_match = 0;
1001 zero_match = 0;
1002 mask = 0xffff;
1004 for (i = 0; i < 64; i += 16, mask <<= 16)
1006 if ((val & mask) == 0)
1007 zero_match++;
1008 else if ((val & mask) == mask)
1009 one_match++;
1012 if (one_match == 2)
1014 mask = 0xffff;
1015 for (i = 0; i < 64; i += 16, mask <<= 16)
1017 if ((val & mask) != mask)
1019 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1020 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1021 GEN_INT ((val >> i) & 0xffff)));
1022 return;
1025 gcc_unreachable ();
1028 if (zero_match == 2)
1029 goto simple_sequence;
1031 mask = 0x0ffff0000UL;
1032 for (i = 16; i < 64; i += 16, mask <<= 16)
1034 HOST_WIDE_INT comp = mask & ~(mask - 1);
1036 if (aarch64_uimm12_shift (val - (val & mask)))
1038 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1040 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1041 emit_insn (gen_adddi3 (dest, subtarget,
1042 GEN_INT (val - (val & mask))));
1043 return;
1045 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1047 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1049 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1050 GEN_INT ((val + comp) & mask)));
1051 emit_insn (gen_adddi3 (dest, subtarget,
1052 GEN_INT (val - ((val + comp) & mask))));
1053 return;
1055 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1057 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1059 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1060 GEN_INT ((val - comp) | ~mask)));
1061 emit_insn (gen_adddi3 (dest, subtarget,
1062 GEN_INT (val - ((val - comp) | ~mask))));
1063 return;
1065 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1067 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1069 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1070 GEN_INT (val | ~mask)));
1071 emit_insn (gen_adddi3 (dest, subtarget,
1072 GEN_INT (val - (val | ~mask))));
1073 return;
1077 /* See if we can do it by arithmetically combining two
1078 immediates. */
1079 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1081 int j;
1082 mask = 0xffff;
1084 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1085 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1087 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1088 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1089 GEN_INT (aarch64_bitmasks[i])));
1090 emit_insn (gen_adddi3 (dest, subtarget,
1091 GEN_INT (val - aarch64_bitmasks[i])));
1092 return;
1095 for (j = 0; j < 64; j += 16, mask <<= 16)
1097 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1099 emit_insn (gen_rtx_SET (VOIDmode, dest,
1100 GEN_INT (aarch64_bitmasks[i])));
1101 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1102 GEN_INT ((val >> j) & 0xffff)));
1103 return;
1108 /* See if we can do it by logically combining two immediates. */
1109 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1111 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1113 int j;
1115 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1116 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1118 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1119 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1120 GEN_INT (aarch64_bitmasks[i])));
1121 emit_insn (gen_iordi3 (dest, subtarget,
1122 GEN_INT (aarch64_bitmasks[j])));
1123 return;
1126 else if ((val & aarch64_bitmasks[i]) == val)
1128 int j;
1130 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1131 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1134 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1135 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1136 GEN_INT (aarch64_bitmasks[j])));
1137 emit_insn (gen_anddi3 (dest, subtarget,
1138 GEN_INT (aarch64_bitmasks[i])));
1139 return;
1144 simple_sequence:
1145 first = true;
1146 mask = 0xffff;
1147 for (i = 0; i < 64; i += 16, mask <<= 16)
1149 if ((val & mask) != 0)
1151 if (first)
1153 emit_insn (gen_rtx_SET (VOIDmode, dest,
1154 GEN_INT (val & mask)));
1155 first = false;
1157 else
1158 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1159 GEN_INT ((val >> i) & 0xffff)));
1164 static bool
1165 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1167 /* Indirect calls are not currently supported. */
1168 if (decl == NULL)
1169 return false;
1171 /* Cannot tail-call to long-calls, since these are outside of the
1172 range of a branch instruction (we could handle this if we added
1173 support for indirect tail-calls. */
1174 if (aarch64_decl_is_long_call_p (decl))
1175 return false;
1177 return true;
1180 /* Implement TARGET_PASS_BY_REFERENCE. */
1182 static bool
1183 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1184 enum machine_mode mode,
1185 const_tree type,
1186 bool named ATTRIBUTE_UNUSED)
1188 HOST_WIDE_INT size;
1189 enum machine_mode dummymode;
1190 int nregs;
1192 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1193 size = (mode == BLKmode && type)
1194 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1196 if (type)
1198 /* Arrays always passed by reference. */
1199 if (TREE_CODE (type) == ARRAY_TYPE)
1200 return true;
1201 /* Other aggregates based on their size. */
1202 if (AGGREGATE_TYPE_P (type))
1203 size = int_size_in_bytes (type);
1206 /* Variable sized arguments are always returned by reference. */
1207 if (size < 0)
1208 return true;
1210 /* Can this be a candidate to be passed in fp/simd register(s)? */
1211 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1212 &dummymode, &nregs,
1213 NULL))
1214 return false;
1216 /* Arguments which are variable sized or larger than 2 registers are
1217 passed by reference unless they are a homogenous floating point
1218 aggregate. */
1219 return size > 2 * UNITS_PER_WORD;
1222 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1223 static bool
1224 aarch64_return_in_msb (const_tree valtype)
1226 enum machine_mode dummy_mode;
1227 int dummy_int;
1229 /* Never happens in little-endian mode. */
1230 if (!BYTES_BIG_ENDIAN)
1231 return false;
1233 /* Only composite types smaller than or equal to 16 bytes can
1234 be potentially returned in registers. */
1235 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1236 || int_size_in_bytes (valtype) <= 0
1237 || int_size_in_bytes (valtype) > 16)
1238 return false;
1240 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1241 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1242 is always passed/returned in the least significant bits of fp/simd
1243 register(s). */
1244 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1245 &dummy_mode, &dummy_int, NULL))
1246 return false;
1248 return true;
1251 /* Implement TARGET_FUNCTION_VALUE.
1252 Define how to find the value returned by a function. */
1254 static rtx
1255 aarch64_function_value (const_tree type, const_tree func,
1256 bool outgoing ATTRIBUTE_UNUSED)
1258 enum machine_mode mode;
1259 int unsignedp;
1260 int count;
1261 enum machine_mode ag_mode;
1263 mode = TYPE_MODE (type);
1264 if (INTEGRAL_TYPE_P (type))
1265 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1267 if (aarch64_return_in_msb (type))
1269 HOST_WIDE_INT size = int_size_in_bytes (type);
1271 if (size % UNITS_PER_WORD != 0)
1273 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1274 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1278 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1279 &ag_mode, &count, NULL))
1281 if (!aarch64_composite_type_p (type, mode))
1283 gcc_assert (count == 1 && mode == ag_mode);
1284 return gen_rtx_REG (mode, V0_REGNUM);
1286 else
1288 int i;
1289 rtx par;
1291 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1292 for (i = 0; i < count; i++)
1294 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1295 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1296 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1297 XVECEXP (par, 0, i) = tmp;
1299 return par;
1302 else
1303 return gen_rtx_REG (mode, R0_REGNUM);
1306 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1307 Return true if REGNO is the number of a hard register in which the values
1308 of called function may come back. */
1310 static bool
1311 aarch64_function_value_regno_p (const unsigned int regno)
1313 /* Maximum of 16 bytes can be returned in the general registers. Examples
1314 of 16-byte return values are: 128-bit integers and 16-byte small
1315 structures (excluding homogeneous floating-point aggregates). */
1316 if (regno == R0_REGNUM || regno == R1_REGNUM)
1317 return true;
1319 /* Up to four fp/simd registers can return a function value, e.g. a
1320 homogeneous floating-point aggregate having four members. */
1321 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1322 return !TARGET_GENERAL_REGS_ONLY;
1324 return false;
1327 /* Implement TARGET_RETURN_IN_MEMORY.
1329 If the type T of the result of a function is such that
1330 void func (T arg)
1331 would require that arg be passed as a value in a register (or set of
1332 registers) according to the parameter passing rules, then the result
1333 is returned in the same registers as would be used for such an
1334 argument. */
1336 static bool
1337 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1339 HOST_WIDE_INT size;
1340 enum machine_mode ag_mode;
1341 int count;
1343 if (!AGGREGATE_TYPE_P (type)
1344 && TREE_CODE (type) != COMPLEX_TYPE
1345 && TREE_CODE (type) != VECTOR_TYPE)
1346 /* Simple scalar types always returned in registers. */
1347 return false;
1349 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1350 type,
1351 &ag_mode,
1352 &count,
1353 NULL))
1354 return false;
1356 /* Types larger than 2 registers returned in memory. */
1357 size = int_size_in_bytes (type);
1358 return (size < 0 || size > 2 * UNITS_PER_WORD);
1361 static bool
1362 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1363 const_tree type, int *nregs)
1365 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1366 return aarch64_vfp_is_call_or_return_candidate (mode,
1367 type,
1368 &pcum->aapcs_vfp_rmode,
1369 nregs,
1370 NULL);
1373 /* Given MODE and TYPE of a function argument, return the alignment in
1374 bits. The idea is to suppress any stronger alignment requested by
1375 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1376 This is a helper function for local use only. */
1378 static unsigned int
1379 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1381 unsigned int alignment;
1383 if (type)
1385 if (!integer_zerop (TYPE_SIZE (type)))
1387 if (TYPE_MODE (type) == mode)
1388 alignment = TYPE_ALIGN (type);
1389 else
1390 alignment = GET_MODE_ALIGNMENT (mode);
1392 else
1393 alignment = 0;
1395 else
1396 alignment = GET_MODE_ALIGNMENT (mode);
1398 return alignment;
1401 /* Layout a function argument according to the AAPCS64 rules. The rule
1402 numbers refer to the rule numbers in the AAPCS64. */
1404 static void
1405 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1406 const_tree type,
1407 bool named ATTRIBUTE_UNUSED)
1409 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1410 int ncrn, nvrn, nregs;
1411 bool allocate_ncrn, allocate_nvrn;
1413 /* We need to do this once per argument. */
1414 if (pcum->aapcs_arg_processed)
1415 return;
1417 pcum->aapcs_arg_processed = true;
1419 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1420 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1421 mode,
1422 type,
1423 &nregs);
1425 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1426 The following code thus handles passing by SIMD/FP registers first. */
1428 nvrn = pcum->aapcs_nvrn;
1430 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1431 and homogenous short-vector aggregates (HVA). */
1432 if (allocate_nvrn)
1434 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1436 pcum->aapcs_nextnvrn = nvrn + nregs;
1437 if (!aarch64_composite_type_p (type, mode))
1439 gcc_assert (nregs == 1);
1440 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1442 else
1444 rtx par;
1445 int i;
1446 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1447 for (i = 0; i < nregs; i++)
1449 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1450 V0_REGNUM + nvrn + i);
1451 tmp = gen_rtx_EXPR_LIST
1452 (VOIDmode, tmp,
1453 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1454 XVECEXP (par, 0, i) = tmp;
1456 pcum->aapcs_reg = par;
1458 return;
1460 else
1462 /* C.3 NSRN is set to 8. */
1463 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1464 goto on_stack;
1468 ncrn = pcum->aapcs_ncrn;
1469 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1470 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1473 /* C6 - C9. though the sign and zero extension semantics are
1474 handled elsewhere. This is the case where the argument fits
1475 entirely general registers. */
1476 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1478 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1480 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1482 /* C.8 if the argument has an alignment of 16 then the NGRN is
1483 rounded up to the next even number. */
1484 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1486 ++ncrn;
1487 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1489 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1490 A reg is still generated for it, but the caller should be smart
1491 enough not to use it. */
1492 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1494 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1496 else
1498 rtx par;
1499 int i;
1501 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1502 for (i = 0; i < nregs; i++)
1504 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1505 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1506 GEN_INT (i * UNITS_PER_WORD));
1507 XVECEXP (par, 0, i) = tmp;
1509 pcum->aapcs_reg = par;
1512 pcum->aapcs_nextncrn = ncrn + nregs;
1513 return;
1516 /* C.11 */
1517 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1519 /* The argument is passed on stack; record the needed number of words for
1520 this argument (we can re-use NREGS) and align the total size if
1521 necessary. */
1522 on_stack:
1523 pcum->aapcs_stack_words = nregs;
1524 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1525 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1526 16 / UNITS_PER_WORD) + 1;
1527 return;
1530 /* Implement TARGET_FUNCTION_ARG. */
1532 static rtx
1533 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1534 const_tree type, bool named)
1536 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1537 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1539 if (mode == VOIDmode)
1540 return NULL_RTX;
1542 aarch64_layout_arg (pcum_v, mode, type, named);
1543 return pcum->aapcs_reg;
1546 void
1547 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1548 const_tree fntype ATTRIBUTE_UNUSED,
1549 rtx libname ATTRIBUTE_UNUSED,
1550 const_tree fndecl ATTRIBUTE_UNUSED,
1551 unsigned n_named ATTRIBUTE_UNUSED)
1553 pcum->aapcs_ncrn = 0;
1554 pcum->aapcs_nvrn = 0;
1555 pcum->aapcs_nextncrn = 0;
1556 pcum->aapcs_nextnvrn = 0;
1557 pcum->pcs_variant = ARM_PCS_AAPCS64;
1558 pcum->aapcs_reg = NULL_RTX;
1559 pcum->aapcs_arg_processed = false;
1560 pcum->aapcs_stack_words = 0;
1561 pcum->aapcs_stack_size = 0;
1563 return;
1566 static void
1567 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1568 enum machine_mode mode,
1569 const_tree type,
1570 bool named)
1572 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1573 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1575 aarch64_layout_arg (pcum_v, mode, type, named);
1576 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1577 != (pcum->aapcs_stack_words != 0));
1578 pcum->aapcs_arg_processed = false;
1579 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1580 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1581 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1582 pcum->aapcs_stack_words = 0;
1583 pcum->aapcs_reg = NULL_RTX;
1587 bool
1588 aarch64_function_arg_regno_p (unsigned regno)
1590 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1591 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1594 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1595 PARM_BOUNDARY bits of alignment, but will be given anything up
1596 to STACK_BOUNDARY bits if the type requires it. This makes sure
1597 that both before and after the layout of each argument, the Next
1598 Stacked Argument Address (NSAA) will have a minimum alignment of
1599 8 bytes. */
1601 static unsigned int
1602 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1604 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1606 if (alignment < PARM_BOUNDARY)
1607 alignment = PARM_BOUNDARY;
1608 if (alignment > STACK_BOUNDARY)
1609 alignment = STACK_BOUNDARY;
1610 return alignment;
1613 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1615 Return true if an argument passed on the stack should be padded upwards,
1616 i.e. if the least-significant byte of the stack slot has useful data.
1618 Small aggregate types are placed in the lowest memory address.
1620 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1622 bool
1623 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1625 /* On little-endian targets, the least significant byte of every stack
1626 argument is passed at the lowest byte address of the stack slot. */
1627 if (!BYTES_BIG_ENDIAN)
1628 return true;
1630 /* Otherwise, integral, floating-point and pointer types are padded downward:
1631 the least significant byte of a stack argument is passed at the highest
1632 byte address of the stack slot. */
1633 if (type
1634 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1635 || POINTER_TYPE_P (type))
1636 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1637 return false;
1639 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1640 return true;
1643 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1645 It specifies padding for the last (may also be the only)
1646 element of a block move between registers and memory. If
1647 assuming the block is in the memory, padding upward means that
1648 the last element is padded after its highest significant byte,
1649 while in downward padding, the last element is padded at the
1650 its least significant byte side.
1652 Small aggregates and small complex types are always padded
1653 upwards.
1655 We don't need to worry about homogeneous floating-point or
1656 short-vector aggregates; their move is not affected by the
1657 padding direction determined here. Regardless of endianness,
1658 each element of such an aggregate is put in the least
1659 significant bits of a fp/simd register.
1661 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1662 register has useful data, and return the opposite if the most
1663 significant byte does. */
1665 bool
1666 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1667 bool first ATTRIBUTE_UNUSED)
1670 /* Small composite types are always padded upward. */
1671 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1673 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1674 : GET_MODE_SIZE (mode));
1675 if (size < 2 * UNITS_PER_WORD)
1676 return true;
1679 /* Otherwise, use the default padding. */
1680 return !BYTES_BIG_ENDIAN;
1683 static enum machine_mode
1684 aarch64_libgcc_cmp_return_mode (void)
1686 return SImode;
1689 static bool
1690 aarch64_frame_pointer_required (void)
1692 /* If the function contains dynamic stack allocations, we need to
1693 use the frame pointer to access the static parts of the frame. */
1694 if (cfun->calls_alloca)
1695 return true;
1697 /* We may have turned flag_omit_frame_pointer on in order to have this
1698 function called; if we did, we also set the 'faked_omit_frame_pointer' flag
1699 and we'll check it here.
1700 If we really did set flag_omit_frame_pointer normally, then we return false
1701 (no frame pointer required) in all cases. */
1703 if (flag_omit_frame_pointer && !faked_omit_frame_pointer)
1704 return false;
1705 else if (flag_omit_leaf_frame_pointer)
1706 return !crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM);
1707 return true;
1710 /* Mark the registers that need to be saved by the callee and calculate
1711 the size of the callee-saved registers area and frame record (both FP
1712 and LR may be omitted). */
1713 static void
1714 aarch64_layout_frame (void)
1716 HOST_WIDE_INT offset = 0;
1717 int regno;
1719 if (reload_completed && cfun->machine->frame.laid_out)
1720 return;
1722 cfun->machine->frame.fp_lr_offset = 0;
1724 /* First mark all the registers that really need to be saved... */
1725 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1726 cfun->machine->frame.reg_offset[regno] = -1;
1728 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1729 cfun->machine->frame.reg_offset[regno] = -1;
1731 /* ... that includes the eh data registers (if needed)... */
1732 if (crtl->calls_eh_return)
1733 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1734 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1736 /* ... and any callee saved register that dataflow says is live. */
1737 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1738 if (df_regs_ever_live_p (regno)
1739 && !call_used_regs[regno])
1740 cfun->machine->frame.reg_offset[regno] = 0;
1742 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1743 if (df_regs_ever_live_p (regno)
1744 && !call_used_regs[regno])
1745 cfun->machine->frame.reg_offset[regno] = 0;
1747 if (frame_pointer_needed)
1749 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1750 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1751 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1754 /* Now assign stack slots for them. */
1755 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1756 if (cfun->machine->frame.reg_offset[regno] != -1)
1758 cfun->machine->frame.reg_offset[regno] = offset;
1759 offset += UNITS_PER_WORD;
1762 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1763 if (cfun->machine->frame.reg_offset[regno] != -1)
1765 cfun->machine->frame.reg_offset[regno] = offset;
1766 offset += UNITS_PER_WORD;
1769 if (frame_pointer_needed)
1771 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1772 offset += UNITS_PER_WORD;
1773 cfun->machine->frame.fp_lr_offset = UNITS_PER_WORD;
1776 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1778 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1779 offset += UNITS_PER_WORD;
1780 cfun->machine->frame.fp_lr_offset += UNITS_PER_WORD;
1783 cfun->machine->frame.padding0 =
1784 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1785 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1787 cfun->machine->frame.saved_regs_size = offset;
1788 cfun->machine->frame.laid_out = true;
1791 /* Make the last instruction frame-related and note that it performs
1792 the operation described by FRAME_PATTERN. */
1794 static void
1795 aarch64_set_frame_expr (rtx frame_pattern)
1797 rtx insn;
1799 insn = get_last_insn ();
1800 RTX_FRAME_RELATED_P (insn) = 1;
1801 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1802 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1803 frame_pattern,
1804 REG_NOTES (insn));
1807 static bool
1808 aarch64_register_saved_on_entry (int regno)
1810 return cfun->machine->frame.reg_offset[regno] != -1;
1814 static void
1815 aarch64_save_or_restore_fprs (int start_offset, int increment,
1816 bool restore, rtx base_rtx)
1819 unsigned regno;
1820 unsigned regno2;
1821 rtx insn;
1822 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1823 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1826 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1828 if (aarch64_register_saved_on_entry (regno))
1830 rtx mem;
1831 mem = gen_mem_ref (DFmode,
1832 plus_constant (Pmode,
1833 base_rtx,
1834 start_offset));
1836 for (regno2 = regno + 1;
1837 regno2 <= V31_REGNUM
1838 && !aarch64_register_saved_on_entry (regno2);
1839 regno2++)
1841 /* Empty loop. */
1843 if (regno2 <= V31_REGNUM &&
1844 aarch64_register_saved_on_entry (regno2))
1846 rtx mem2;
1847 /* Next highest register to be saved. */
1848 mem2 = gen_mem_ref (DFmode,
1849 plus_constant
1850 (Pmode,
1851 base_rtx,
1852 start_offset + increment));
1853 if (restore == false)
1855 insn = emit_insn
1856 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1857 mem2, gen_rtx_REG (DFmode, regno2)));
1860 else
1862 insn = emit_insn
1863 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1864 gen_rtx_REG (DFmode, regno2), mem2));
1866 add_reg_note (insn, REG_CFA_RESTORE,
1867 gen_rtx_REG (DFmode, regno));
1868 add_reg_note (insn, REG_CFA_RESTORE,
1869 gen_rtx_REG (DFmode, regno2));
1872 /* The first part of a frame-related parallel insn
1873 is always assumed to be relevant to the frame
1874 calculations; subsequent parts, are only
1875 frame-related if explicitly marked. */
1876 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1877 regno = regno2;
1878 start_offset += increment * 2;
1880 else
1882 if (restore == false)
1883 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1884 else
1886 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1887 add_reg_note (insn, REG_CFA_RESTORE,
1888 gen_rtx_REG (DImode, regno));
1890 start_offset += increment;
1892 RTX_FRAME_RELATED_P (insn) = 1;
1899 /* offset from the stack pointer of where the saves and
1900 restore's have to happen. */
1901 static void
1902 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1903 bool restore)
1905 rtx insn;
1906 rtx base_rtx = stack_pointer_rtx;
1907 HOST_WIDE_INT start_offset = offset;
1908 HOST_WIDE_INT increment = UNITS_PER_WORD;
1909 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1910 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
1911 unsigned regno;
1912 unsigned regno2;
1914 for (regno = R0_REGNUM; regno <= limit; regno++)
1916 if (aarch64_register_saved_on_entry (regno))
1918 rtx mem;
1919 mem = gen_mem_ref (Pmode,
1920 plus_constant (Pmode,
1921 base_rtx,
1922 start_offset));
1924 for (regno2 = regno + 1;
1925 regno2 <= limit
1926 && !aarch64_register_saved_on_entry (regno2);
1927 regno2++)
1929 /* Empty loop. */
1931 if (regno2 <= limit &&
1932 aarch64_register_saved_on_entry (regno2))
1934 rtx mem2;
1935 /* Next highest register to be saved. */
1936 mem2 = gen_mem_ref (Pmode,
1937 plus_constant
1938 (Pmode,
1939 base_rtx,
1940 start_offset + increment));
1941 if (restore == false)
1943 insn = emit_insn
1944 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
1945 mem2, gen_rtx_REG (DImode, regno2)));
1948 else
1950 insn = emit_insn
1951 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
1952 gen_rtx_REG (DImode, regno2), mem2));
1954 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1955 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
1958 /* The first part of a frame-related parallel insn
1959 is always assumed to be relevant to the frame
1960 calculations; subsequent parts, are only
1961 frame-related if explicitly marked. */
1962 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
1963 1)) = 1;
1964 regno = regno2;
1965 start_offset += increment * 2;
1967 else
1969 if (restore == false)
1970 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
1971 else
1973 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
1974 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
1976 start_offset += increment;
1978 RTX_FRAME_RELATED_P (insn) = 1;
1982 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
1986 /* AArch64 stack frames generated by this compiler look like:
1988 +-------------------------------+
1990 | incoming stack arguments |
1992 +-------------------------------+ <-- arg_pointer_rtx
1994 | callee-allocated save area |
1995 | for register varargs |
1997 +-------------------------------+
1999 | local variables |
2001 +-------------------------------+ <-- frame_pointer_rtx
2003 | callee-saved registers |
2005 +-------------------------------+
2006 | LR' |
2007 +-------------------------------+
2008 | FP' |
2009 P +-------------------------------+ <-- hard_frame_pointer_rtx
2010 | dynamic allocation |
2011 +-------------------------------+
2013 | outgoing stack arguments |
2015 +-------------------------------+ <-- stack_pointer_rtx
2017 Dynamic stack allocations such as alloca insert data at point P.
2018 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2019 hard_frame_pointer_rtx unchanged. */
2021 /* Generate the prologue instructions for entry into a function.
2022 Establish the stack frame by decreasing the stack pointer with a
2023 properly calculated size and, if necessary, create a frame record
2024 filled with the values of LR and previous frame pointer. The
2025 current FP is also set up if it is in use. */
2027 void
2028 aarch64_expand_prologue (void)
2030 /* sub sp, sp, #<frame_size>
2031 stp {fp, lr}, [sp, #<frame_size> - 16]
2032 add fp, sp, #<frame_size> - hardfp_offset
2033 stp {cs_reg}, [fp, #-16] etc.
2035 sub sp, sp, <final_adjustment_if_any>
2037 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2038 HOST_WIDE_INT frame_size, offset;
2039 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2040 rtx insn;
2042 aarch64_layout_frame ();
2043 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2044 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2045 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2046 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2047 + crtl->outgoing_args_size);
2048 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2049 STACK_BOUNDARY / BITS_PER_UNIT);
2051 if (flag_stack_usage_info)
2052 current_function_static_stack_size = frame_size;
2054 fp_offset = (offset
2055 - original_frame_size
2056 - cfun->machine->frame.saved_regs_size);
2058 /* Store pairs and load pairs have a range only -512 to 504. */
2059 if (offset >= 512)
2061 /* When the frame has a large size, an initial decrease is done on
2062 the stack pointer to jump over the callee-allocated save area for
2063 register varargs, the local variable area and/or the callee-saved
2064 register area. This will allow the pre-index write-back
2065 store pair instructions to be used for setting up the stack frame
2066 efficiently. */
2067 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2068 if (offset >= 512)
2069 offset = cfun->machine->frame.saved_regs_size;
2071 frame_size -= (offset + crtl->outgoing_args_size);
2072 fp_offset = 0;
2074 if (frame_size >= 0x1000000)
2076 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2077 emit_move_insn (op0, GEN_INT (-frame_size));
2078 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2079 aarch64_set_frame_expr (gen_rtx_SET
2080 (Pmode, stack_pointer_rtx,
2081 plus_constant (Pmode,
2082 stack_pointer_rtx,
2083 -frame_size)));
2085 else if (frame_size > 0)
2087 if ((frame_size & 0xfff) != frame_size)
2089 insn = emit_insn (gen_add2_insn
2090 (stack_pointer_rtx,
2091 GEN_INT (-(frame_size
2092 & ~(HOST_WIDE_INT)0xfff))));
2093 RTX_FRAME_RELATED_P (insn) = 1;
2095 if ((frame_size & 0xfff) != 0)
2097 insn = emit_insn (gen_add2_insn
2098 (stack_pointer_rtx,
2099 GEN_INT (-(frame_size
2100 & (HOST_WIDE_INT)0xfff))));
2101 RTX_FRAME_RELATED_P (insn) = 1;
2105 else
2106 frame_size = -1;
2108 if (offset > 0)
2110 /* Save the frame pointer and lr if the frame pointer is needed
2111 first. Make the frame pointer point to the location of the
2112 old frame pointer on the stack. */
2113 if (frame_pointer_needed)
2115 rtx mem_fp, mem_lr;
2117 if (fp_offset)
2119 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2120 GEN_INT (-offset)));
2121 RTX_FRAME_RELATED_P (insn) = 1;
2122 aarch64_set_frame_expr (gen_rtx_SET
2123 (Pmode, stack_pointer_rtx,
2124 gen_rtx_MINUS (Pmode,
2125 stack_pointer_rtx,
2126 GEN_INT (offset))));
2127 mem_fp = gen_frame_mem (DImode,
2128 plus_constant (Pmode,
2129 stack_pointer_rtx,
2130 fp_offset));
2131 mem_lr = gen_frame_mem (DImode,
2132 plus_constant (Pmode,
2133 stack_pointer_rtx,
2134 fp_offset
2135 + UNITS_PER_WORD));
2136 insn = emit_insn (gen_store_pairdi (mem_fp,
2137 hard_frame_pointer_rtx,
2138 mem_lr,
2139 gen_rtx_REG (DImode,
2140 LR_REGNUM)));
2142 else
2144 insn = emit_insn (gen_storewb_pairdi_di
2145 (stack_pointer_rtx, stack_pointer_rtx,
2146 hard_frame_pointer_rtx,
2147 gen_rtx_REG (DImode, LR_REGNUM),
2148 GEN_INT (-offset),
2149 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2150 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2153 /* The first part of a frame-related parallel insn is always
2154 assumed to be relevant to the frame calculations;
2155 subsequent parts, are only frame-related if explicitly
2156 marked. */
2157 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2158 RTX_FRAME_RELATED_P (insn) = 1;
2160 /* Set up frame pointer to point to the location of the
2161 previous frame pointer on the stack. */
2162 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2163 stack_pointer_rtx,
2164 GEN_INT (fp_offset)));
2165 aarch64_set_frame_expr (gen_rtx_SET
2166 (Pmode, hard_frame_pointer_rtx,
2167 plus_constant (Pmode,
2168 stack_pointer_rtx,
2169 fp_offset)));
2170 RTX_FRAME_RELATED_P (insn) = 1;
2171 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2172 hard_frame_pointer_rtx));
2174 else
2176 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2177 GEN_INT (-offset)));
2178 RTX_FRAME_RELATED_P (insn) = 1;
2181 aarch64_save_or_restore_callee_save_registers
2182 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2185 /* when offset >= 512,
2186 sub sp, sp, #<outgoing_args_size> */
2187 if (frame_size > -1)
2189 if (crtl->outgoing_args_size > 0)
2191 insn = emit_insn (gen_add2_insn
2192 (stack_pointer_rtx,
2193 GEN_INT (- crtl->outgoing_args_size)));
2194 RTX_FRAME_RELATED_P (insn) = 1;
2199 /* Generate the epilogue instructions for returning from a function. */
2200 void
2201 aarch64_expand_epilogue (bool for_sibcall)
2203 HOST_WIDE_INT original_frame_size, frame_size, offset;
2204 HOST_WIDE_INT fp_offset;
2205 rtx insn;
2206 rtx cfa_reg;
2208 aarch64_layout_frame ();
2209 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2210 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2211 + crtl->outgoing_args_size);
2212 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2213 STACK_BOUNDARY / BITS_PER_UNIT);
2215 fp_offset = (offset
2216 - original_frame_size
2217 - cfun->machine->frame.saved_regs_size);
2219 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2221 /* Store pairs and load pairs have a range only -512 to 504. */
2222 if (offset >= 512)
2224 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2225 if (offset >= 512)
2226 offset = cfun->machine->frame.saved_regs_size;
2228 frame_size -= (offset + crtl->outgoing_args_size);
2229 fp_offset = 0;
2230 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2232 insn = emit_insn (gen_add2_insn
2233 (stack_pointer_rtx,
2234 GEN_INT (crtl->outgoing_args_size)));
2235 RTX_FRAME_RELATED_P (insn) = 1;
2238 else
2239 frame_size = -1;
2241 /* If there were outgoing arguments or we've done dynamic stack
2242 allocation, then restore the stack pointer from the frame
2243 pointer. This is at most one insn and more efficient than using
2244 GCC's internal mechanism. */
2245 if (frame_pointer_needed
2246 && (crtl->outgoing_args_size || cfun->calls_alloca))
2248 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2249 hard_frame_pointer_rtx,
2250 GEN_INT (- fp_offset)));
2251 RTX_FRAME_RELATED_P (insn) = 1;
2252 /* As SP is set to (FP - fp_offset), according to the rules in
2253 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2254 from the value of SP from now on. */
2255 cfa_reg = stack_pointer_rtx;
2258 aarch64_save_or_restore_callee_save_registers
2259 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2261 /* Restore the frame pointer and lr if the frame pointer is needed. */
2262 if (offset > 0)
2264 if (frame_pointer_needed)
2266 rtx mem_fp, mem_lr;
2268 if (fp_offset)
2270 mem_fp = gen_frame_mem (DImode,
2271 plus_constant (Pmode,
2272 stack_pointer_rtx,
2273 fp_offset));
2274 mem_lr = gen_frame_mem (DImode,
2275 plus_constant (Pmode,
2276 stack_pointer_rtx,
2277 fp_offset
2278 + UNITS_PER_WORD));
2279 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2280 mem_fp,
2281 gen_rtx_REG (DImode,
2282 LR_REGNUM),
2283 mem_lr));
2285 else
2287 insn = emit_insn (gen_loadwb_pairdi_di
2288 (stack_pointer_rtx,
2289 stack_pointer_rtx,
2290 hard_frame_pointer_rtx,
2291 gen_rtx_REG (DImode, LR_REGNUM),
2292 GEN_INT (offset),
2293 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2294 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2295 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2296 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2297 plus_constant (Pmode, cfa_reg,
2298 offset))));
2301 /* The first part of a frame-related parallel insn
2302 is always assumed to be relevant to the frame
2303 calculations; subsequent parts, are only
2304 frame-related if explicitly marked. */
2305 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2306 RTX_FRAME_RELATED_P (insn) = 1;
2307 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2308 add_reg_note (insn, REG_CFA_RESTORE,
2309 gen_rtx_REG (DImode, LR_REGNUM));
2311 if (fp_offset)
2313 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2314 GEN_INT (offset)));
2315 RTX_FRAME_RELATED_P (insn) = 1;
2318 else
2320 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2321 GEN_INT (offset)));
2322 RTX_FRAME_RELATED_P (insn) = 1;
2326 /* Stack adjustment for exception handler. */
2327 if (crtl->calls_eh_return)
2329 /* We need to unwind the stack by the offset computed by
2330 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2331 based on SP. Ideally we would update the SP and define the
2332 CFA along the lines of:
2334 SP = SP + EH_RETURN_STACKADJ_RTX
2335 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2337 However the dwarf emitter only understands a constant
2338 register offset.
2340 The solution chosen here is to use the otherwise unused IP0
2341 as a temporary register to hold the current SP value. The
2342 CFA is described using IP0 then SP is modified. */
2344 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2346 insn = emit_move_insn (ip0, stack_pointer_rtx);
2347 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2348 RTX_FRAME_RELATED_P (insn) = 1;
2350 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2352 /* Ensure the assignment to IP0 does not get optimized away. */
2353 emit_use (ip0);
2356 if (frame_size > -1)
2358 if (frame_size >= 0x1000000)
2360 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2361 emit_move_insn (op0, GEN_INT (frame_size));
2362 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2363 aarch64_set_frame_expr (gen_rtx_SET
2364 (Pmode, stack_pointer_rtx,
2365 plus_constant (Pmode,
2366 stack_pointer_rtx,
2367 frame_size)));
2369 else if (frame_size > 0)
2371 if ((frame_size & 0xfff) != 0)
2373 insn = emit_insn (gen_add2_insn
2374 (stack_pointer_rtx,
2375 GEN_INT ((frame_size
2376 & (HOST_WIDE_INT) 0xfff))));
2377 RTX_FRAME_RELATED_P (insn) = 1;
2379 if ((frame_size & 0xfff) != frame_size)
2381 insn = emit_insn (gen_add2_insn
2382 (stack_pointer_rtx,
2383 GEN_INT ((frame_size
2384 & ~ (HOST_WIDE_INT) 0xfff))));
2385 RTX_FRAME_RELATED_P (insn) = 1;
2389 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2390 plus_constant (Pmode,
2391 stack_pointer_rtx,
2392 offset)));
2395 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2396 if (!for_sibcall)
2397 emit_jump_insn (ret_rtx);
2400 /* Return the place to copy the exception unwinding return address to.
2401 This will probably be a stack slot, but could (in theory be the
2402 return register). */
2404 aarch64_final_eh_return_addr (void)
2406 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2407 aarch64_layout_frame ();
2408 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2409 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2410 + crtl->outgoing_args_size);
2411 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2412 STACK_BOUNDARY / BITS_PER_UNIT);
2413 fp_offset = offset
2414 - original_frame_size
2415 - cfun->machine->frame.saved_regs_size;
2417 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2418 return gen_rtx_REG (DImode, LR_REGNUM);
2420 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2421 result in a store to save LR introduced by builtin_eh_return () being
2422 incorrectly deleted because the alias is not detected.
2423 So in the calculation of the address to copy the exception unwinding
2424 return address to, we note 2 cases.
2425 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2426 we return a SP-relative location since all the addresses are SP-relative
2427 in this case. This prevents the store from being optimized away.
2428 If the fp_offset is not 0, then the addresses will be FP-relative and
2429 therefore we return a FP-relative location. */
2431 if (frame_pointer_needed)
2433 if (fp_offset)
2434 return gen_frame_mem (DImode,
2435 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2436 else
2437 return gen_frame_mem (DImode,
2438 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2441 /* If FP is not needed, we calculate the location of LR, which would be
2442 at the top of the saved registers block. */
2444 return gen_frame_mem (DImode,
2445 plus_constant (Pmode,
2446 stack_pointer_rtx,
2447 fp_offset
2448 + cfun->machine->frame.saved_regs_size
2449 - 2 * UNITS_PER_WORD));
2452 /* Output code to build up a constant in a register. */
2453 static void
2454 aarch64_build_constant (int regnum, HOST_WIDE_INT val)
2456 if (aarch64_bitmask_imm (val, DImode))
2457 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2458 else
2460 int i;
2461 int ncount = 0;
2462 int zcount = 0;
2463 HOST_WIDE_INT valp = val >> 16;
2464 HOST_WIDE_INT valm;
2465 HOST_WIDE_INT tval;
2467 for (i = 16; i < 64; i += 16)
2469 valm = (valp & 0xffff);
2471 if (valm != 0)
2472 ++ zcount;
2474 if (valm != 0xffff)
2475 ++ ncount;
2477 valp >>= 16;
2480 /* zcount contains the number of additional MOVK instructions
2481 required if the constant is built up with an initial MOVZ instruction,
2482 while ncount is the number of MOVK instructions required if starting
2483 with a MOVN instruction. Choose the sequence that yields the fewest
2484 number of instructions, preferring MOVZ instructions when they are both
2485 the same. */
2486 if (ncount < zcount)
2488 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2489 GEN_INT ((~val) & 0xffff));
2490 tval = 0xffff;
2492 else
2494 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2495 GEN_INT (val & 0xffff));
2496 tval = 0;
2499 val >>= 16;
2501 for (i = 16; i < 64; i += 16)
2503 if ((val & 0xffff) != tval)
2504 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2505 GEN_INT (i), GEN_INT (val & 0xffff)));
2506 val >>= 16;
2511 static void
2512 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2514 HOST_WIDE_INT mdelta = delta;
2515 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2516 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2518 if (mdelta < 0)
2519 mdelta = -mdelta;
2521 if (mdelta >= 4096 * 4096)
2523 aarch64_build_constant (scratchreg, delta);
2524 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2526 else if (mdelta > 0)
2528 if (mdelta >= 4096)
2530 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2531 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2532 if (delta < 0)
2533 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2534 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2535 else
2536 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2537 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2539 if (mdelta % 4096 != 0)
2541 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2542 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2543 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2548 /* Output code to add DELTA to the first argument, and then jump
2549 to FUNCTION. Used for C++ multiple inheritance. */
2550 static void
2551 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2552 HOST_WIDE_INT delta,
2553 HOST_WIDE_INT vcall_offset,
2554 tree function)
2556 /* The this pointer is always in x0. Note that this differs from
2557 Arm where the this pointer maybe bumped to r1 if r0 is required
2558 to return a pointer to an aggregate. On AArch64 a result value
2559 pointer will be in x8. */
2560 int this_regno = R0_REGNUM;
2561 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2563 reload_completed = 1;
2564 emit_note (NOTE_INSN_PROLOGUE_END);
2566 if (vcall_offset == 0)
2567 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2568 else
2570 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2572 this_rtx = gen_rtx_REG (Pmode, this_regno);
2573 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2574 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2576 addr = this_rtx;
2577 if (delta != 0)
2579 if (delta >= -256 && delta < 256)
2580 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2581 plus_constant (Pmode, this_rtx, delta));
2582 else
2583 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2586 if (Pmode == ptr_mode)
2587 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2588 else
2589 aarch64_emit_move (temp0,
2590 gen_rtx_ZERO_EXTEND (Pmode,
2591 gen_rtx_MEM (ptr_mode, addr)));
2593 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2594 addr = plus_constant (Pmode, temp0, vcall_offset);
2595 else
2597 aarch64_build_constant (IP1_REGNUM, vcall_offset);
2598 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2601 if (Pmode == ptr_mode)
2602 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2603 else
2604 aarch64_emit_move (temp1,
2605 gen_rtx_SIGN_EXTEND (Pmode,
2606 gen_rtx_MEM (ptr_mode, addr)));
2608 emit_insn (gen_add2_insn (this_rtx, temp1));
2611 /* Generate a tail call to the target function. */
2612 if (!TREE_USED (function))
2614 assemble_external (function);
2615 TREE_USED (function) = 1;
2617 funexp = XEXP (DECL_RTL (function), 0);
2618 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2619 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2620 SIBLING_CALL_P (insn) = 1;
2622 insn = get_insns ();
2623 shorten_branches (insn);
2624 final_start_function (insn, file, 1);
2625 final (insn, file, 1);
2626 final_end_function ();
2628 /* Stop pretending to be a post-reload pass. */
2629 reload_completed = 0;
2632 static int
2633 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2635 if (GET_CODE (*x) == SYMBOL_REF)
2636 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2638 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2639 TLS offsets, not real symbol references. */
2640 if (GET_CODE (*x) == UNSPEC
2641 && XINT (*x, 1) == UNSPEC_TLS)
2642 return -1;
2644 return 0;
2647 static bool
2648 aarch64_tls_referenced_p (rtx x)
2650 if (!TARGET_HAVE_TLS)
2651 return false;
2653 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2657 static int
2658 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2660 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2661 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2663 if (*imm1 < *imm2)
2664 return -1;
2665 if (*imm1 > *imm2)
2666 return +1;
2667 return 0;
2671 static void
2672 aarch64_build_bitmask_table (void)
2674 unsigned HOST_WIDE_INT mask, imm;
2675 unsigned int log_e, e, s, r;
2676 unsigned int nimms = 0;
2678 for (log_e = 1; log_e <= 6; log_e++)
2680 e = 1 << log_e;
2681 if (e == 64)
2682 mask = ~(HOST_WIDE_INT) 0;
2683 else
2684 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2685 for (s = 1; s < e; s++)
2687 for (r = 0; r < e; r++)
2689 /* set s consecutive bits to 1 (s < 64) */
2690 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2691 /* rotate right by r */
2692 if (r != 0)
2693 imm = ((imm >> r) | (imm << (e - r))) & mask;
2694 /* replicate the constant depending on SIMD size */
2695 switch (log_e) {
2696 case 1: imm |= (imm << 2);
2697 case 2: imm |= (imm << 4);
2698 case 3: imm |= (imm << 8);
2699 case 4: imm |= (imm << 16);
2700 case 5: imm |= (imm << 32);
2701 case 6:
2702 break;
2703 default:
2704 gcc_unreachable ();
2706 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2707 aarch64_bitmasks[nimms++] = imm;
2712 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2713 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2714 aarch64_bitmasks_cmp);
2718 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2719 a left shift of 0 or 12 bits. */
2720 bool
2721 aarch64_uimm12_shift (HOST_WIDE_INT val)
2723 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2724 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2729 /* Return true if val is an immediate that can be loaded into a
2730 register by a MOVZ instruction. */
2731 static bool
2732 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2734 if (GET_MODE_SIZE (mode) > 4)
2736 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2737 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2738 return 1;
2740 else
2742 /* Ignore sign extension. */
2743 val &= (HOST_WIDE_INT) 0xffffffff;
2745 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2746 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2750 /* Return true if val is a valid bitmask immediate. */
2751 bool
2752 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2754 if (GET_MODE_SIZE (mode) < 8)
2756 /* Replicate bit pattern. */
2757 val &= (HOST_WIDE_INT) 0xffffffff;
2758 val |= val << 32;
2760 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2761 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2765 /* Return true if val is an immediate that can be loaded into a
2766 register in a single instruction. */
2767 bool
2768 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2770 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2771 return 1;
2772 return aarch64_bitmask_imm (val, mode);
2775 static bool
2776 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2778 rtx base, offset;
2780 if (GET_CODE (x) == HIGH)
2781 return true;
2783 split_const (x, &base, &offset);
2784 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2786 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2787 != SYMBOL_FORCE_TO_MEM)
2788 return true;
2789 else
2790 /* Avoid generating a 64-bit relocation in ILP32; leave
2791 to aarch64_expand_mov_immediate to handle it properly. */
2792 return mode != ptr_mode;
2795 return aarch64_tls_referenced_p (x);
2798 /* Return true if register REGNO is a valid index register.
2799 STRICT_P is true if REG_OK_STRICT is in effect. */
2801 bool
2802 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2804 if (!HARD_REGISTER_NUM_P (regno))
2806 if (!strict_p)
2807 return true;
2809 if (!reg_renumber)
2810 return false;
2812 regno = reg_renumber[regno];
2814 return GP_REGNUM_P (regno);
2817 /* Return true if register REGNO is a valid base register for mode MODE.
2818 STRICT_P is true if REG_OK_STRICT is in effect. */
2820 bool
2821 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2823 if (!HARD_REGISTER_NUM_P (regno))
2825 if (!strict_p)
2826 return true;
2828 if (!reg_renumber)
2829 return false;
2831 regno = reg_renumber[regno];
2834 /* The fake registers will be eliminated to either the stack or
2835 hard frame pointer, both of which are usually valid base registers.
2836 Reload deals with the cases where the eliminated form isn't valid. */
2837 return (GP_REGNUM_P (regno)
2838 || regno == SP_REGNUM
2839 || regno == FRAME_POINTER_REGNUM
2840 || regno == ARG_POINTER_REGNUM);
2843 /* Return true if X is a valid base register for mode MODE.
2844 STRICT_P is true if REG_OK_STRICT is in effect. */
2846 static bool
2847 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2849 if (!strict_p && GET_CODE (x) == SUBREG)
2850 x = SUBREG_REG (x);
2852 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2855 /* Return true if address offset is a valid index. If it is, fill in INFO
2856 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2858 static bool
2859 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2860 enum machine_mode mode, bool strict_p)
2862 enum aarch64_address_type type;
2863 rtx index;
2864 int shift;
2866 /* (reg:P) */
2867 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2868 && GET_MODE (x) == Pmode)
2870 type = ADDRESS_REG_REG;
2871 index = x;
2872 shift = 0;
2874 /* (sign_extend:DI (reg:SI)) */
2875 else if ((GET_CODE (x) == SIGN_EXTEND
2876 || GET_CODE (x) == ZERO_EXTEND)
2877 && GET_MODE (x) == DImode
2878 && GET_MODE (XEXP (x, 0)) == SImode)
2880 type = (GET_CODE (x) == SIGN_EXTEND)
2881 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2882 index = XEXP (x, 0);
2883 shift = 0;
2885 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
2886 else if (GET_CODE (x) == MULT
2887 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2888 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2889 && GET_MODE (XEXP (x, 0)) == DImode
2890 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2891 && CONST_INT_P (XEXP (x, 1)))
2893 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2894 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2895 index = XEXP (XEXP (x, 0), 0);
2896 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2898 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
2899 else if (GET_CODE (x) == ASHIFT
2900 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
2901 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
2902 && GET_MODE (XEXP (x, 0)) == DImode
2903 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
2904 && CONST_INT_P (XEXP (x, 1)))
2906 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
2907 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2908 index = XEXP (XEXP (x, 0), 0);
2909 shift = INTVAL (XEXP (x, 1));
2911 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
2912 else if ((GET_CODE (x) == SIGN_EXTRACT
2913 || GET_CODE (x) == ZERO_EXTRACT)
2914 && GET_MODE (x) == DImode
2915 && GET_CODE (XEXP (x, 0)) == MULT
2916 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2917 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2919 type = (GET_CODE (x) == SIGN_EXTRACT)
2920 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2921 index = XEXP (XEXP (x, 0), 0);
2922 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2923 if (INTVAL (XEXP (x, 1)) != 32 + shift
2924 || INTVAL (XEXP (x, 2)) != 0)
2925 shift = -1;
2927 /* (and:DI (mult:DI (reg:DI) (const_int scale))
2928 (const_int 0xffffffff<<shift)) */
2929 else if (GET_CODE (x) == AND
2930 && GET_MODE (x) == DImode
2931 && GET_CODE (XEXP (x, 0)) == MULT
2932 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2933 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2934 && CONST_INT_P (XEXP (x, 1)))
2936 type = ADDRESS_REG_UXTW;
2937 index = XEXP (XEXP (x, 0), 0);
2938 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
2939 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2940 shift = -1;
2942 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
2943 else if ((GET_CODE (x) == SIGN_EXTRACT
2944 || GET_CODE (x) == ZERO_EXTRACT)
2945 && GET_MODE (x) == DImode
2946 && GET_CODE (XEXP (x, 0)) == ASHIFT
2947 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2948 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
2950 type = (GET_CODE (x) == SIGN_EXTRACT)
2951 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
2952 index = XEXP (XEXP (x, 0), 0);
2953 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2954 if (INTVAL (XEXP (x, 1)) != 32 + shift
2955 || INTVAL (XEXP (x, 2)) != 0)
2956 shift = -1;
2958 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
2959 (const_int 0xffffffff<<shift)) */
2960 else if (GET_CODE (x) == AND
2961 && GET_MODE (x) == DImode
2962 && GET_CODE (XEXP (x, 0)) == ASHIFT
2963 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
2964 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
2965 && CONST_INT_P (XEXP (x, 1)))
2967 type = ADDRESS_REG_UXTW;
2968 index = XEXP (XEXP (x, 0), 0);
2969 shift = INTVAL (XEXP (XEXP (x, 0), 1));
2970 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
2971 shift = -1;
2973 /* (mult:P (reg:P) (const_int scale)) */
2974 else if (GET_CODE (x) == MULT
2975 && GET_MODE (x) == Pmode
2976 && GET_MODE (XEXP (x, 0)) == Pmode
2977 && CONST_INT_P (XEXP (x, 1)))
2979 type = ADDRESS_REG_REG;
2980 index = XEXP (x, 0);
2981 shift = exact_log2 (INTVAL (XEXP (x, 1)));
2983 /* (ashift:P (reg:P) (const_int shift)) */
2984 else if (GET_CODE (x) == ASHIFT
2985 && GET_MODE (x) == Pmode
2986 && GET_MODE (XEXP (x, 0)) == Pmode
2987 && CONST_INT_P (XEXP (x, 1)))
2989 type = ADDRESS_REG_REG;
2990 index = XEXP (x, 0);
2991 shift = INTVAL (XEXP (x, 1));
2993 else
2994 return false;
2996 if (GET_CODE (index) == SUBREG)
2997 index = SUBREG_REG (index);
2999 if ((shift == 0 ||
3000 (shift > 0 && shift <= 3
3001 && (1 << shift) == GET_MODE_SIZE (mode)))
3002 && REG_P (index)
3003 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3005 info->type = type;
3006 info->offset = index;
3007 info->shift = shift;
3008 return true;
3011 return false;
3014 static inline bool
3015 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3017 return (offset >= -64 * GET_MODE_SIZE (mode)
3018 && offset < 64 * GET_MODE_SIZE (mode)
3019 && offset % GET_MODE_SIZE (mode) == 0);
3022 static inline bool
3023 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3024 HOST_WIDE_INT offset)
3026 return offset >= -256 && offset < 256;
3029 static inline bool
3030 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3032 return (offset >= 0
3033 && offset < 4096 * GET_MODE_SIZE (mode)
3034 && offset % GET_MODE_SIZE (mode) == 0);
3037 /* Return true if X is a valid address for machine mode MODE. If it is,
3038 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3039 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3041 static bool
3042 aarch64_classify_address (struct aarch64_address_info *info,
3043 rtx x, enum machine_mode mode,
3044 RTX_CODE outer_code, bool strict_p)
3046 enum rtx_code code = GET_CODE (x);
3047 rtx op0, op1;
3048 bool allow_reg_index_p =
3049 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3051 /* Don't support anything other than POST_INC or REG addressing for
3052 AdvSIMD. */
3053 if (aarch64_vector_mode_p (mode)
3054 && (code != POST_INC && code != REG))
3055 return false;
3057 switch (code)
3059 case REG:
3060 case SUBREG:
3061 info->type = ADDRESS_REG_IMM;
3062 info->base = x;
3063 info->offset = const0_rtx;
3064 return aarch64_base_register_rtx_p (x, strict_p);
3066 case PLUS:
3067 op0 = XEXP (x, 0);
3068 op1 = XEXP (x, 1);
3069 if (GET_MODE_SIZE (mode) != 0
3070 && CONST_INT_P (op1)
3071 && aarch64_base_register_rtx_p (op0, strict_p))
3073 HOST_WIDE_INT offset = INTVAL (op1);
3075 info->type = ADDRESS_REG_IMM;
3076 info->base = op0;
3077 info->offset = op1;
3079 /* TImode and TFmode values are allowed in both pairs of X
3080 registers and individual Q registers. The available
3081 address modes are:
3082 X,X: 7-bit signed scaled offset
3083 Q: 9-bit signed offset
3084 We conservatively require an offset representable in either mode.
3086 if (mode == TImode || mode == TFmode)
3087 return (offset_7bit_signed_scaled_p (mode, offset)
3088 && offset_9bit_signed_unscaled_p (mode, offset));
3090 if (outer_code == PARALLEL)
3091 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3092 && offset_7bit_signed_scaled_p (mode, offset));
3093 else
3094 return (offset_9bit_signed_unscaled_p (mode, offset)
3095 || offset_12bit_unsigned_scaled_p (mode, offset));
3098 if (allow_reg_index_p)
3100 /* Look for base + (scaled/extended) index register. */
3101 if (aarch64_base_register_rtx_p (op0, strict_p)
3102 && aarch64_classify_index (info, op1, mode, strict_p))
3104 info->base = op0;
3105 return true;
3107 if (aarch64_base_register_rtx_p (op1, strict_p)
3108 && aarch64_classify_index (info, op0, mode, strict_p))
3110 info->base = op1;
3111 return true;
3115 return false;
3117 case POST_INC:
3118 case POST_DEC:
3119 case PRE_INC:
3120 case PRE_DEC:
3121 info->type = ADDRESS_REG_WB;
3122 info->base = XEXP (x, 0);
3123 info->offset = NULL_RTX;
3124 return aarch64_base_register_rtx_p (info->base, strict_p);
3126 case POST_MODIFY:
3127 case PRE_MODIFY:
3128 info->type = ADDRESS_REG_WB;
3129 info->base = XEXP (x, 0);
3130 if (GET_CODE (XEXP (x, 1)) == PLUS
3131 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3132 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3133 && aarch64_base_register_rtx_p (info->base, strict_p))
3135 HOST_WIDE_INT offset;
3136 info->offset = XEXP (XEXP (x, 1), 1);
3137 offset = INTVAL (info->offset);
3139 /* TImode and TFmode values are allowed in both pairs of X
3140 registers and individual Q registers. The available
3141 address modes are:
3142 X,X: 7-bit signed scaled offset
3143 Q: 9-bit signed offset
3144 We conservatively require an offset representable in either mode.
3146 if (mode == TImode || mode == TFmode)
3147 return (offset_7bit_signed_scaled_p (mode, offset)
3148 && offset_9bit_signed_unscaled_p (mode, offset));
3150 if (outer_code == PARALLEL)
3151 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3152 && offset_7bit_signed_scaled_p (mode, offset));
3153 else
3154 return offset_9bit_signed_unscaled_p (mode, offset);
3156 return false;
3158 case CONST:
3159 case SYMBOL_REF:
3160 case LABEL_REF:
3161 /* load literal: pc-relative constant pool entry. Only supported
3162 for SI mode or larger. */
3163 info->type = ADDRESS_SYMBOLIC;
3164 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3166 rtx sym, addend;
3168 split_const (x, &sym, &addend);
3169 return (GET_CODE (sym) == LABEL_REF
3170 || (GET_CODE (sym) == SYMBOL_REF
3171 && CONSTANT_POOL_ADDRESS_P (sym)));
3173 return false;
3175 case LO_SUM:
3176 info->type = ADDRESS_LO_SUM;
3177 info->base = XEXP (x, 0);
3178 info->offset = XEXP (x, 1);
3179 if (allow_reg_index_p
3180 && aarch64_base_register_rtx_p (info->base, strict_p))
3182 rtx sym, offs;
3183 split_const (info->offset, &sym, &offs);
3184 if (GET_CODE (sym) == SYMBOL_REF
3185 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3186 == SYMBOL_SMALL_ABSOLUTE))
3188 /* The symbol and offset must be aligned to the access size. */
3189 unsigned int align;
3190 unsigned int ref_size;
3192 if (CONSTANT_POOL_ADDRESS_P (sym))
3193 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3194 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3196 tree exp = SYMBOL_REF_DECL (sym);
3197 align = TYPE_ALIGN (TREE_TYPE (exp));
3198 align = CONSTANT_ALIGNMENT (exp, align);
3200 else if (SYMBOL_REF_DECL (sym))
3201 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3202 else
3203 align = BITS_PER_UNIT;
3205 ref_size = GET_MODE_SIZE (mode);
3206 if (ref_size == 0)
3207 ref_size = GET_MODE_SIZE (DImode);
3209 return ((INTVAL (offs) & (ref_size - 1)) == 0
3210 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3213 return false;
3215 default:
3216 return false;
3220 bool
3221 aarch64_symbolic_address_p (rtx x)
3223 rtx offset;
3225 split_const (x, &x, &offset);
3226 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3229 /* Classify the base of symbolic expression X, given that X appears in
3230 context CONTEXT. */
3232 enum aarch64_symbol_type
3233 aarch64_classify_symbolic_expression (rtx x,
3234 enum aarch64_symbol_context context)
3236 rtx offset;
3238 split_const (x, &x, &offset);
3239 return aarch64_classify_symbol (x, context);
3243 /* Return TRUE if X is a legitimate address for accessing memory in
3244 mode MODE. */
3245 static bool
3246 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3248 struct aarch64_address_info addr;
3250 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3253 /* Return TRUE if X is a legitimate address for accessing memory in
3254 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3255 pair operation. */
3256 bool
3257 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3258 RTX_CODE outer_code, bool strict_p)
3260 struct aarch64_address_info addr;
3262 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3265 /* Return TRUE if rtx X is immediate constant 0.0 */
3266 bool
3267 aarch64_float_const_zero_rtx_p (rtx x)
3269 REAL_VALUE_TYPE r;
3271 if (GET_MODE (x) == VOIDmode)
3272 return false;
3274 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3275 if (REAL_VALUE_MINUS_ZERO (r))
3276 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3277 return REAL_VALUES_EQUAL (r, dconst0);
3280 /* Return the fixed registers used for condition codes. */
3282 static bool
3283 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3285 *p1 = CC_REGNUM;
3286 *p2 = INVALID_REGNUM;
3287 return true;
3290 enum machine_mode
3291 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3293 /* All floating point compares return CCFP if it is an equality
3294 comparison, and CCFPE otherwise. */
3295 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3297 switch (code)
3299 case EQ:
3300 case NE:
3301 case UNORDERED:
3302 case ORDERED:
3303 case UNLT:
3304 case UNLE:
3305 case UNGT:
3306 case UNGE:
3307 case UNEQ:
3308 case LTGT:
3309 return CCFPmode;
3311 case LT:
3312 case LE:
3313 case GT:
3314 case GE:
3315 return CCFPEmode;
3317 default:
3318 gcc_unreachable ();
3322 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3323 && y == const0_rtx
3324 && (code == EQ || code == NE || code == LT || code == GE)
3325 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3326 || GET_CODE (x) == NEG))
3327 return CC_NZmode;
3329 /* A compare with a shifted operand. Because of canonicalization,
3330 the comparison will have to be swapped when we emit the assembly
3331 code. */
3332 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3333 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3334 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3335 || GET_CODE (x) == LSHIFTRT
3336 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3337 return CC_SWPmode;
3339 /* Similarly for a negated operand, but we can only do this for
3340 equalities. */
3341 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3342 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3343 && (code == EQ || code == NE)
3344 && GET_CODE (x) == NEG)
3345 return CC_Zmode;
3347 /* A compare of a mode narrower than SI mode against zero can be done
3348 by extending the value in the comparison. */
3349 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3350 && y == const0_rtx)
3351 /* Only use sign-extension if we really need it. */
3352 return ((code == GT || code == GE || code == LE || code == LT)
3353 ? CC_SESWPmode : CC_ZESWPmode);
3355 /* For everything else, return CCmode. */
3356 return CCmode;
3359 static unsigned
3360 aarch64_get_condition_code (rtx x)
3362 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3363 enum rtx_code comp_code = GET_CODE (x);
3365 if (GET_MODE_CLASS (mode) != MODE_CC)
3366 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3368 switch (mode)
3370 case CCFPmode:
3371 case CCFPEmode:
3372 switch (comp_code)
3374 case GE: return AARCH64_GE;
3375 case GT: return AARCH64_GT;
3376 case LE: return AARCH64_LS;
3377 case LT: return AARCH64_MI;
3378 case NE: return AARCH64_NE;
3379 case EQ: return AARCH64_EQ;
3380 case ORDERED: return AARCH64_VC;
3381 case UNORDERED: return AARCH64_VS;
3382 case UNLT: return AARCH64_LT;
3383 case UNLE: return AARCH64_LE;
3384 case UNGT: return AARCH64_HI;
3385 case UNGE: return AARCH64_PL;
3386 default: gcc_unreachable ();
3388 break;
3390 case CCmode:
3391 switch (comp_code)
3393 case NE: return AARCH64_NE;
3394 case EQ: return AARCH64_EQ;
3395 case GE: return AARCH64_GE;
3396 case GT: return AARCH64_GT;
3397 case LE: return AARCH64_LE;
3398 case LT: return AARCH64_LT;
3399 case GEU: return AARCH64_CS;
3400 case GTU: return AARCH64_HI;
3401 case LEU: return AARCH64_LS;
3402 case LTU: return AARCH64_CC;
3403 default: gcc_unreachable ();
3405 break;
3407 case CC_SWPmode:
3408 case CC_ZESWPmode:
3409 case CC_SESWPmode:
3410 switch (comp_code)
3412 case NE: return AARCH64_NE;
3413 case EQ: return AARCH64_EQ;
3414 case GE: return AARCH64_LE;
3415 case GT: return AARCH64_LT;
3416 case LE: return AARCH64_GE;
3417 case LT: return AARCH64_GT;
3418 case GEU: return AARCH64_LS;
3419 case GTU: return AARCH64_CC;
3420 case LEU: return AARCH64_CS;
3421 case LTU: return AARCH64_HI;
3422 default: gcc_unreachable ();
3424 break;
3426 case CC_NZmode:
3427 switch (comp_code)
3429 case NE: return AARCH64_NE;
3430 case EQ: return AARCH64_EQ;
3431 case GE: return AARCH64_PL;
3432 case LT: return AARCH64_MI;
3433 default: gcc_unreachable ();
3435 break;
3437 case CC_Zmode:
3438 switch (comp_code)
3440 case NE: return AARCH64_NE;
3441 case EQ: return AARCH64_EQ;
3442 default: gcc_unreachable ();
3444 break;
3446 default:
3447 gcc_unreachable ();
3448 break;
3452 static unsigned
3453 bit_count (unsigned HOST_WIDE_INT value)
3455 unsigned count = 0;
3457 while (value)
3459 count++;
3460 value &= value - 1;
3463 return count;
3466 void
3467 aarch64_print_operand (FILE *f, rtx x, char code)
3469 switch (code)
3471 /* An integer or symbol address without a preceding # sign. */
3472 case 'c':
3473 switch (GET_CODE (x))
3475 case CONST_INT:
3476 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3477 break;
3479 case SYMBOL_REF:
3480 output_addr_const (f, x);
3481 break;
3483 case CONST:
3484 if (GET_CODE (XEXP (x, 0)) == PLUS
3485 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3487 output_addr_const (f, x);
3488 break;
3490 /* Fall through. */
3492 default:
3493 output_operand_lossage ("Unsupported operand for code '%c'", code);
3495 break;
3497 case 'e':
3498 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3500 int n;
3502 if (GET_CODE (x) != CONST_INT
3503 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3505 output_operand_lossage ("invalid operand for '%%%c'", code);
3506 return;
3509 switch (n)
3511 case 3:
3512 fputc ('b', f);
3513 break;
3514 case 4:
3515 fputc ('h', f);
3516 break;
3517 case 5:
3518 fputc ('w', f);
3519 break;
3520 default:
3521 output_operand_lossage ("invalid operand for '%%%c'", code);
3522 return;
3525 break;
3527 case 'p':
3529 int n;
3531 /* Print N such that 2^N == X. */
3532 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3534 output_operand_lossage ("invalid operand for '%%%c'", code);
3535 return;
3538 asm_fprintf (f, "%d", n);
3540 break;
3542 case 'P':
3543 /* Print the number of non-zero bits in X (a const_int). */
3544 if (GET_CODE (x) != CONST_INT)
3546 output_operand_lossage ("invalid operand for '%%%c'", code);
3547 return;
3550 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3551 break;
3553 case 'H':
3554 /* Print the higher numbered register of a pair (TImode) of regs. */
3555 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3557 output_operand_lossage ("invalid operand for '%%%c'", code);
3558 return;
3561 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3562 break;
3564 case 'm':
3565 /* Print a condition (eq, ne, etc). */
3567 /* CONST_TRUE_RTX means always -- that's the default. */
3568 if (x == const_true_rtx)
3569 return;
3571 if (!COMPARISON_P (x))
3573 output_operand_lossage ("invalid operand for '%%%c'", code);
3574 return;
3577 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3578 break;
3580 case 'M':
3581 /* Print the inverse of a condition (eq <-> ne, etc). */
3583 /* CONST_TRUE_RTX means never -- that's the default. */
3584 if (x == const_true_rtx)
3586 fputs ("nv", f);
3587 return;
3590 if (!COMPARISON_P (x))
3592 output_operand_lossage ("invalid operand for '%%%c'", code);
3593 return;
3596 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3597 (aarch64_get_condition_code (x))], f);
3598 break;
3600 case 'b':
3601 case 'h':
3602 case 's':
3603 case 'd':
3604 case 'q':
3605 /* Print a scalar FP/SIMD register name. */
3606 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3608 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3609 return;
3611 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3612 break;
3614 case 'S':
3615 case 'T':
3616 case 'U':
3617 case 'V':
3618 /* Print the first FP/SIMD register name in a list. */
3619 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3621 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3622 return;
3624 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3625 break;
3627 case 'X':
3628 /* Print bottom 16 bits of integer constant in hex. */
3629 if (GET_CODE (x) != CONST_INT)
3631 output_operand_lossage ("invalid operand for '%%%c'", code);
3632 return;
3634 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3635 break;
3637 case 'w':
3638 case 'x':
3639 /* Print a general register name or the zero register (32-bit or
3640 64-bit). */
3641 if (x == const0_rtx
3642 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3644 asm_fprintf (f, "%czr", code);
3645 break;
3648 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3650 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3651 break;
3654 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3656 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3657 break;
3660 /* Fall through */
3662 case 0:
3663 /* Print a normal operand, if it's a general register, then we
3664 assume DImode. */
3665 if (x == NULL)
3667 output_operand_lossage ("missing operand");
3668 return;
3671 switch (GET_CODE (x))
3673 case REG:
3674 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3675 break;
3677 case MEM:
3678 aarch64_memory_reference_mode = GET_MODE (x);
3679 output_address (XEXP (x, 0));
3680 break;
3682 case LABEL_REF:
3683 case SYMBOL_REF:
3684 output_addr_const (asm_out_file, x);
3685 break;
3687 case CONST_INT:
3688 asm_fprintf (f, "%wd", INTVAL (x));
3689 break;
3691 case CONST_VECTOR:
3692 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3694 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3695 HOST_WIDE_INT_MIN,
3696 HOST_WIDE_INT_MAX));
3697 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3699 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3701 fputc ('0', f);
3703 else
3704 gcc_unreachable ();
3705 break;
3707 case CONST_DOUBLE:
3708 /* CONST_DOUBLE can represent a double-width integer.
3709 In this case, the mode of x is VOIDmode. */
3710 if (GET_MODE (x) == VOIDmode)
3711 ; /* Do Nothing. */
3712 else if (aarch64_float_const_zero_rtx_p (x))
3714 fputc ('0', f);
3715 break;
3717 else if (aarch64_float_const_representable_p (x))
3719 #define buf_size 20
3720 char float_buf[buf_size] = {'\0'};
3721 REAL_VALUE_TYPE r;
3722 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3723 real_to_decimal_for_mode (float_buf, &r,
3724 buf_size, buf_size,
3725 1, GET_MODE (x));
3726 asm_fprintf (asm_out_file, "%s", float_buf);
3727 break;
3728 #undef buf_size
3730 output_operand_lossage ("invalid constant");
3731 return;
3732 default:
3733 output_operand_lossage ("invalid operand");
3734 return;
3736 break;
3738 case 'A':
3739 if (GET_CODE (x) == HIGH)
3740 x = XEXP (x, 0);
3742 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3744 case SYMBOL_SMALL_GOT:
3745 asm_fprintf (asm_out_file, ":got:");
3746 break;
3748 case SYMBOL_SMALL_TLSGD:
3749 asm_fprintf (asm_out_file, ":tlsgd:");
3750 break;
3752 case SYMBOL_SMALL_TLSDESC:
3753 asm_fprintf (asm_out_file, ":tlsdesc:");
3754 break;
3756 case SYMBOL_SMALL_GOTTPREL:
3757 asm_fprintf (asm_out_file, ":gottprel:");
3758 break;
3760 case SYMBOL_SMALL_TPREL:
3761 asm_fprintf (asm_out_file, ":tprel:");
3762 break;
3764 case SYMBOL_TINY_GOT:
3765 gcc_unreachable ();
3766 break;
3768 default:
3769 break;
3771 output_addr_const (asm_out_file, x);
3772 break;
3774 case 'L':
3775 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3777 case SYMBOL_SMALL_GOT:
3778 asm_fprintf (asm_out_file, ":lo12:");
3779 break;
3781 case SYMBOL_SMALL_TLSGD:
3782 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3783 break;
3785 case SYMBOL_SMALL_TLSDESC:
3786 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3787 break;
3789 case SYMBOL_SMALL_GOTTPREL:
3790 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3791 break;
3793 case SYMBOL_SMALL_TPREL:
3794 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3795 break;
3797 case SYMBOL_TINY_GOT:
3798 asm_fprintf (asm_out_file, ":got:");
3799 break;
3801 default:
3802 break;
3804 output_addr_const (asm_out_file, x);
3805 break;
3807 case 'G':
3809 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3811 case SYMBOL_SMALL_TPREL:
3812 asm_fprintf (asm_out_file, ":tprel_hi12:");
3813 break;
3814 default:
3815 break;
3817 output_addr_const (asm_out_file, x);
3818 break;
3820 default:
3821 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3822 return;
3826 void
3827 aarch64_print_operand_address (FILE *f, rtx x)
3829 struct aarch64_address_info addr;
3831 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3832 MEM, true))
3833 switch (addr.type)
3835 case ADDRESS_REG_IMM:
3836 if (addr.offset == const0_rtx)
3837 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3838 else
3839 asm_fprintf (f, "[%s,%wd]", reg_names [REGNO (addr.base)],
3840 INTVAL (addr.offset));
3841 return;
3843 case ADDRESS_REG_REG:
3844 if (addr.shift == 0)
3845 asm_fprintf (f, "[%s,%s]", reg_names [REGNO (addr.base)],
3846 reg_names [REGNO (addr.offset)]);
3847 else
3848 asm_fprintf (f, "[%s,%s,lsl %u]", reg_names [REGNO (addr.base)],
3849 reg_names [REGNO (addr.offset)], addr.shift);
3850 return;
3852 case ADDRESS_REG_UXTW:
3853 if (addr.shift == 0)
3854 asm_fprintf (f, "[%s,w%d,uxtw]", reg_names [REGNO (addr.base)],
3855 REGNO (addr.offset) - R0_REGNUM);
3856 else
3857 asm_fprintf (f, "[%s,w%d,uxtw %u]", reg_names [REGNO (addr.base)],
3858 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3859 return;
3861 case ADDRESS_REG_SXTW:
3862 if (addr.shift == 0)
3863 asm_fprintf (f, "[%s,w%d,sxtw]", reg_names [REGNO (addr.base)],
3864 REGNO (addr.offset) - R0_REGNUM);
3865 else
3866 asm_fprintf (f, "[%s,w%d,sxtw %u]", reg_names [REGNO (addr.base)],
3867 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3868 return;
3870 case ADDRESS_REG_WB:
3871 switch (GET_CODE (x))
3873 case PRE_INC:
3874 asm_fprintf (f, "[%s,%d]!", reg_names [REGNO (addr.base)],
3875 GET_MODE_SIZE (aarch64_memory_reference_mode));
3876 return;
3877 case POST_INC:
3878 asm_fprintf (f, "[%s],%d", reg_names [REGNO (addr.base)],
3879 GET_MODE_SIZE (aarch64_memory_reference_mode));
3880 return;
3881 case PRE_DEC:
3882 asm_fprintf (f, "[%s,-%d]!", reg_names [REGNO (addr.base)],
3883 GET_MODE_SIZE (aarch64_memory_reference_mode));
3884 return;
3885 case POST_DEC:
3886 asm_fprintf (f, "[%s],-%d", reg_names [REGNO (addr.base)],
3887 GET_MODE_SIZE (aarch64_memory_reference_mode));
3888 return;
3889 case PRE_MODIFY:
3890 asm_fprintf (f, "[%s,%wd]!", reg_names [REGNO (addr.base)],
3891 INTVAL (addr.offset));
3892 return;
3893 case POST_MODIFY:
3894 asm_fprintf (f, "[%s],%wd", reg_names [REGNO (addr.base)],
3895 INTVAL (addr.offset));
3896 return;
3897 default:
3898 break;
3900 break;
3902 case ADDRESS_LO_SUM:
3903 asm_fprintf (f, "[%s,#:lo12:", reg_names [REGNO (addr.base)]);
3904 output_addr_const (f, addr.offset);
3905 asm_fprintf (f, "]");
3906 return;
3908 case ADDRESS_SYMBOLIC:
3909 break;
3912 output_addr_const (f, x);
3915 bool
3916 aarch64_label_mentioned_p (rtx x)
3918 const char *fmt;
3919 int i;
3921 if (GET_CODE (x) == LABEL_REF)
3922 return true;
3924 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
3925 referencing instruction, but they are constant offsets, not
3926 symbols. */
3927 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
3928 return false;
3930 fmt = GET_RTX_FORMAT (GET_CODE (x));
3931 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3933 if (fmt[i] == 'E')
3935 int j;
3937 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3938 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
3939 return 1;
3941 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
3942 return 1;
3945 return 0;
3948 /* Implement REGNO_REG_CLASS. */
3950 enum reg_class
3951 aarch64_regno_regclass (unsigned regno)
3953 if (GP_REGNUM_P (regno))
3954 return CORE_REGS;
3956 if (regno == SP_REGNUM)
3957 return STACK_REG;
3959 if (regno == FRAME_POINTER_REGNUM
3960 || regno == ARG_POINTER_REGNUM)
3961 return POINTER_REGS;
3963 if (FP_REGNUM_P (regno))
3964 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
3966 return NO_REGS;
3969 /* Try a machine-dependent way of reloading an illegitimate address
3970 operand. If we find one, push the reload and return the new rtx. */
3973 aarch64_legitimize_reload_address (rtx *x_p,
3974 enum machine_mode mode,
3975 int opnum, int type,
3976 int ind_levels ATTRIBUTE_UNUSED)
3978 rtx x = *x_p;
3980 /* Do not allow mem (plus (reg, const)) if vector mode. */
3981 if (aarch64_vector_mode_p (mode)
3982 && GET_CODE (x) == PLUS
3983 && REG_P (XEXP (x, 0))
3984 && CONST_INT_P (XEXP (x, 1)))
3986 rtx orig_rtx = x;
3987 x = copy_rtx (x);
3988 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
3989 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
3990 opnum, (enum reload_type) type);
3991 return x;
3994 /* We must recognize output that we have already generated ourselves. */
3995 if (GET_CODE (x) == PLUS
3996 && GET_CODE (XEXP (x, 0)) == PLUS
3997 && REG_P (XEXP (XEXP (x, 0), 0))
3998 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3999 && CONST_INT_P (XEXP (x, 1)))
4001 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4002 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4003 opnum, (enum reload_type) type);
4004 return x;
4007 /* We wish to handle large displacements off a base register by splitting
4008 the addend across an add and the mem insn. This can cut the number of
4009 extra insns needed from 3 to 1. It is only useful for load/store of a
4010 single register with 12 bit offset field. */
4011 if (GET_CODE (x) == PLUS
4012 && REG_P (XEXP (x, 0))
4013 && CONST_INT_P (XEXP (x, 1))
4014 && HARD_REGISTER_P (XEXP (x, 0))
4015 && mode != TImode
4016 && mode != TFmode
4017 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4019 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4020 HOST_WIDE_INT low = val & 0xfff;
4021 HOST_WIDE_INT high = val - low;
4022 HOST_WIDE_INT offs;
4023 rtx cst;
4024 enum machine_mode xmode = GET_MODE (x);
4026 /* In ILP32, xmode can be either DImode or SImode. */
4027 gcc_assert (xmode == DImode || xmode == SImode);
4029 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4030 BLKmode alignment. */
4031 if (GET_MODE_SIZE (mode) == 0)
4032 return NULL_RTX;
4034 offs = low % GET_MODE_SIZE (mode);
4036 /* Align misaligned offset by adjusting high part to compensate. */
4037 if (offs != 0)
4039 if (aarch64_uimm12_shift (high + offs))
4041 /* Align down. */
4042 low = low - offs;
4043 high = high + offs;
4045 else
4047 /* Align up. */
4048 offs = GET_MODE_SIZE (mode) - offs;
4049 low = low + offs;
4050 high = high + (low & 0x1000) - offs;
4051 low &= 0xfff;
4055 /* Check for overflow. */
4056 if (high + low != val)
4057 return NULL_RTX;
4059 cst = GEN_INT (high);
4060 if (!aarch64_uimm12_shift (high))
4061 cst = force_const_mem (xmode, cst);
4063 /* Reload high part into base reg, leaving the low part
4064 in the mem instruction.
4065 Note that replacing this gen_rtx_PLUS with plus_constant is
4066 wrong in this case because we rely on the
4067 (plus (plus reg c1) c2) structure being preserved so that
4068 XEXP (*p, 0) in push_reload below uses the correct term. */
4069 x = gen_rtx_PLUS (xmode,
4070 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4071 GEN_INT (low));
4073 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4074 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4075 opnum, (enum reload_type) type);
4076 return x;
4079 return NULL_RTX;
4083 static reg_class_t
4084 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4085 reg_class_t rclass,
4086 enum machine_mode mode,
4087 secondary_reload_info *sri)
4089 /* Without the TARGET_SIMD instructions we cannot move a Q register
4090 to a Q register directly. We need a scratch. */
4091 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4092 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4093 && reg_class_subset_p (rclass, FP_REGS))
4095 if (mode == TFmode)
4096 sri->icode = CODE_FOR_aarch64_reload_movtf;
4097 else if (mode == TImode)
4098 sri->icode = CODE_FOR_aarch64_reload_movti;
4099 return NO_REGS;
4102 /* A TFmode or TImode memory access should be handled via an FP_REGS
4103 because AArch64 has richer addressing modes for LDR/STR instructions
4104 than LDP/STP instructions. */
4105 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4106 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4107 return FP_REGS;
4109 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4110 return CORE_REGS;
4112 return NO_REGS;
4115 static bool
4116 aarch64_can_eliminate (const int from, const int to)
4118 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4119 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4121 if (frame_pointer_needed)
4123 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4124 return true;
4125 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4126 return false;
4127 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4128 && !cfun->calls_alloca)
4129 return true;
4130 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4131 return true;
4132 return false;
4134 else
4136 /* If we decided that we didn't need a leaf frame pointer but then used
4137 LR in the function, then we'll want a frame pointer after all, so
4138 prevent this elimination to ensure a frame pointer is used.
4140 NOTE: the original value of flag_omit_frame_pointer gets trashed
4141 IFF flag_omit_leaf_frame_pointer is true, so we check the value
4142 of faked_omit_frame_pointer here (which is true when we always
4143 wish to keep non-leaf frame pointers but only wish to keep leaf frame
4144 pointers when LR is clobbered). */
4145 if (to == STACK_POINTER_REGNUM
4146 && df_regs_ever_live_p (LR_REGNUM)
4147 && faked_omit_frame_pointer)
4148 return false;
4151 return true;
4154 HOST_WIDE_INT
4155 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4157 HOST_WIDE_INT frame_size;
4158 HOST_WIDE_INT offset;
4160 aarch64_layout_frame ();
4161 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4162 + crtl->outgoing_args_size
4163 + cfun->machine->saved_varargs_size);
4165 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4166 offset = frame_size;
4168 if (to == HARD_FRAME_POINTER_REGNUM)
4170 if (from == ARG_POINTER_REGNUM)
4171 return offset - crtl->outgoing_args_size;
4173 if (from == FRAME_POINTER_REGNUM)
4174 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4177 if (to == STACK_POINTER_REGNUM)
4179 if (from == FRAME_POINTER_REGNUM)
4181 HOST_WIDE_INT elim = crtl->outgoing_args_size
4182 + cfun->machine->frame.saved_regs_size
4183 + get_frame_size ()
4184 - cfun->machine->frame.fp_lr_offset;
4185 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4186 return elim;
4190 return offset;
4194 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4195 previous frame. */
4198 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4200 if (count != 0)
4201 return const0_rtx;
4202 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4206 static void
4207 aarch64_asm_trampoline_template (FILE *f)
4209 if (TARGET_ILP32)
4211 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4212 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4214 else
4216 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4217 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4219 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4220 assemble_aligned_integer (4, const0_rtx);
4221 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4222 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4225 static void
4226 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4228 rtx fnaddr, mem, a_tramp;
4229 const int tramp_code_sz = 16;
4231 /* Don't need to copy the trailing D-words, we fill those in below. */
4232 emit_block_move (m_tramp, assemble_trampoline_template (),
4233 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4234 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4235 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4236 if (GET_MODE (fnaddr) != ptr_mode)
4237 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4238 emit_move_insn (mem, fnaddr);
4240 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4241 emit_move_insn (mem, chain_value);
4243 /* XXX We should really define a "clear_cache" pattern and use
4244 gen_clear_cache(). */
4245 a_tramp = XEXP (m_tramp, 0);
4246 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4247 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4248 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4249 ptr_mode);
4252 static unsigned char
4253 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4255 switch (regclass)
4257 case CORE_REGS:
4258 case POINTER_REGS:
4259 case GENERAL_REGS:
4260 case ALL_REGS:
4261 case FP_REGS:
4262 case FP_LO_REGS:
4263 return
4264 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4265 (GET_MODE_SIZE (mode) + 7) / 8;
4266 case STACK_REG:
4267 return 1;
4269 case NO_REGS:
4270 return 0;
4272 default:
4273 break;
4275 gcc_unreachable ();
4278 static reg_class_t
4279 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4281 if (regclass == POINTER_REGS)
4282 return GENERAL_REGS;
4284 if (regclass == STACK_REG)
4286 if (REG_P(x)
4287 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4288 return regclass;
4290 return NO_REGS;
4293 /* If it's an integer immediate that MOVI can't handle, then
4294 FP_REGS is not an option, so we return NO_REGS instead. */
4295 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4296 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4297 return NO_REGS;
4299 /* Register eliminiation can result in a request for
4300 SP+constant->FP_REGS. We cannot support such operations which
4301 use SP as source and an FP_REG as destination, so reject out
4302 right now. */
4303 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4305 rtx lhs = XEXP (x, 0);
4307 /* Look through a possible SUBREG introduced by ILP32. */
4308 if (GET_CODE (lhs) == SUBREG)
4309 lhs = SUBREG_REG (lhs);
4311 gcc_assert (REG_P (lhs));
4312 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4313 POINTER_REGS));
4314 return NO_REGS;
4317 return regclass;
4320 void
4321 aarch64_asm_output_labelref (FILE* f, const char *name)
4323 asm_fprintf (f, "%U%s", name);
4326 static void
4327 aarch64_elf_asm_constructor (rtx symbol, int priority)
4329 if (priority == DEFAULT_INIT_PRIORITY)
4330 default_ctor_section_asm_out_constructor (symbol, priority);
4331 else
4333 section *s;
4334 char buf[18];
4335 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4336 s = get_section (buf, SECTION_WRITE, NULL);
4337 switch_to_section (s);
4338 assemble_align (POINTER_SIZE);
4339 assemble_aligned_integer (POINTER_BYTES, symbol);
4343 static void
4344 aarch64_elf_asm_destructor (rtx symbol, int priority)
4346 if (priority == DEFAULT_INIT_PRIORITY)
4347 default_dtor_section_asm_out_destructor (symbol, priority);
4348 else
4350 section *s;
4351 char buf[18];
4352 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4353 s = get_section (buf, SECTION_WRITE, NULL);
4354 switch_to_section (s);
4355 assemble_align (POINTER_SIZE);
4356 assemble_aligned_integer (POINTER_BYTES, symbol);
4360 const char*
4361 aarch64_output_casesi (rtx *operands)
4363 char buf[100];
4364 char label[100];
4365 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4366 int index;
4367 static const char *const patterns[4][2] =
4370 "ldrb\t%w3, [%0,%w1,uxtw]",
4371 "add\t%3, %4, %w3, sxtb #2"
4374 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4375 "add\t%3, %4, %w3, sxth #2"
4378 "ldr\t%w3, [%0,%w1,uxtw #2]",
4379 "add\t%3, %4, %w3, sxtw #2"
4381 /* We assume that DImode is only generated when not optimizing and
4382 that we don't really need 64-bit address offsets. That would
4383 imply an object file with 8GB of code in a single function! */
4385 "ldr\t%w3, [%0,%w1,uxtw #2]",
4386 "add\t%3, %4, %w3, sxtw #2"
4390 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4392 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4394 gcc_assert (index >= 0 && index <= 3);
4396 /* Need to implement table size reduction, by chaning the code below. */
4397 output_asm_insn (patterns[index][0], operands);
4398 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4399 snprintf (buf, sizeof (buf),
4400 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4401 output_asm_insn (buf, operands);
4402 output_asm_insn (patterns[index][1], operands);
4403 output_asm_insn ("br\t%3", operands);
4404 assemble_label (asm_out_file, label);
4405 return "";
4409 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4410 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4411 operator. */
4414 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4416 if (shift >= 0 && shift <= 3)
4418 int size;
4419 for (size = 8; size <= 32; size *= 2)
4421 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4422 if (mask == bits << shift)
4423 return size;
4426 return 0;
4429 static bool
4430 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4431 const_rtx x ATTRIBUTE_UNUSED)
4433 /* We can't use blocks for constants when we're using a per-function
4434 constant pool. */
4435 return false;
4438 static section *
4439 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4440 rtx x ATTRIBUTE_UNUSED,
4441 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4443 /* Force all constant pool entries into the current function section. */
4444 return function_section (current_function_decl);
4448 /* Costs. */
4450 /* Helper function for rtx cost calculation. Strip a shift expression
4451 from X. Returns the inner operand if successful, or the original
4452 expression on failure. */
4453 static rtx
4454 aarch64_strip_shift (rtx x)
4456 rtx op = x;
4458 if ((GET_CODE (op) == ASHIFT
4459 || GET_CODE (op) == ASHIFTRT
4460 || GET_CODE (op) == LSHIFTRT)
4461 && CONST_INT_P (XEXP (op, 1)))
4462 return XEXP (op, 0);
4464 if (GET_CODE (op) == MULT
4465 && CONST_INT_P (XEXP (op, 1))
4466 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4467 return XEXP (op, 0);
4469 return x;
4472 /* Helper function for rtx cost calculation. Strip a shift or extend
4473 expression from X. Returns the inner operand if successful, or the
4474 original expression on failure. We deal with a number of possible
4475 canonicalization variations here. */
4476 static rtx
4477 aarch64_strip_shift_or_extend (rtx x)
4479 rtx op = x;
4481 /* Zero and sign extraction of a widened value. */
4482 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4483 && XEXP (op, 2) == const0_rtx
4484 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4485 XEXP (op, 1)))
4486 return XEXP (XEXP (op, 0), 0);
4488 /* It can also be represented (for zero-extend) as an AND with an
4489 immediate. */
4490 if (GET_CODE (op) == AND
4491 && GET_CODE (XEXP (op, 0)) == MULT
4492 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4493 && CONST_INT_P (XEXP (op, 1))
4494 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4495 INTVAL (XEXP (op, 1))) != 0)
4496 return XEXP (XEXP (op, 0), 0);
4498 /* Now handle extended register, as this may also have an optional
4499 left shift by 1..4. */
4500 if (GET_CODE (op) == ASHIFT
4501 && CONST_INT_P (XEXP (op, 1))
4502 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4503 op = XEXP (op, 0);
4505 if (GET_CODE (op) == ZERO_EXTEND
4506 || GET_CODE (op) == SIGN_EXTEND)
4507 op = XEXP (op, 0);
4509 if (op != x)
4510 return op;
4512 return aarch64_strip_shift (x);
4515 /* Calculate the cost of calculating X, storing it in *COST. Result
4516 is true if the total cost of the operation has now been calculated. */
4517 static bool
4518 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4519 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4521 rtx op0, op1;
4522 const struct cpu_cost_table *extra_cost
4523 = aarch64_tune_params->insn_extra_cost;
4525 switch (code)
4527 case SET:
4528 op0 = SET_DEST (x);
4529 op1 = SET_SRC (x);
4531 switch (GET_CODE (op0))
4533 case MEM:
4534 if (speed)
4535 *cost += extra_cost->ldst.store;
4537 if (op1 != const0_rtx)
4538 *cost += rtx_cost (op1, SET, 1, speed);
4539 return true;
4541 case SUBREG:
4542 if (! REG_P (SUBREG_REG (op0)))
4543 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4544 /* Fall through. */
4545 case REG:
4546 /* Cost is just the cost of the RHS of the set. */
4547 *cost += rtx_cost (op1, SET, 1, true);
4548 return true;
4550 case ZERO_EXTRACT: /* Bit-field insertion. */
4551 case SIGN_EXTRACT:
4552 /* Strip any redundant widening of the RHS to meet the width of
4553 the target. */
4554 if (GET_CODE (op1) == SUBREG)
4555 op1 = SUBREG_REG (op1);
4556 if ((GET_CODE (op1) == ZERO_EXTEND
4557 || GET_CODE (op1) == SIGN_EXTEND)
4558 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4559 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4560 >= INTVAL (XEXP (op0, 1))))
4561 op1 = XEXP (op1, 0);
4562 *cost += rtx_cost (op1, SET, 1, speed);
4563 return true;
4565 default:
4566 break;
4568 return false;
4570 case MEM:
4571 if (speed)
4572 *cost += extra_cost->ldst.load;
4574 return true;
4576 case NEG:
4577 op0 = CONST0_RTX (GET_MODE (x));
4578 op1 = XEXP (x, 0);
4579 goto cost_minus;
4581 case COMPARE:
4582 op0 = XEXP (x, 0);
4583 op1 = XEXP (x, 1);
4585 if (op1 == const0_rtx
4586 && GET_CODE (op0) == AND)
4588 x = op0;
4589 goto cost_logic;
4592 /* Comparisons can work if the order is swapped.
4593 Canonicalization puts the more complex operation first, but
4594 we want it in op1. */
4595 if (! (REG_P (op0)
4596 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
4598 op0 = XEXP (x, 1);
4599 op1 = XEXP (x, 0);
4601 goto cost_minus;
4603 case MINUS:
4604 op0 = XEXP (x, 0);
4605 op1 = XEXP (x, 1);
4607 cost_minus:
4608 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
4609 || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
4610 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
4612 if (op0 != const0_rtx)
4613 *cost += rtx_cost (op0, MINUS, 0, speed);
4615 if (CONST_INT_P (op1))
4617 if (!aarch64_uimm12_shift (INTVAL (op1)))
4618 *cost += rtx_cost (op1, MINUS, 1, speed);
4620 else
4622 op1 = aarch64_strip_shift_or_extend (op1);
4623 *cost += rtx_cost (op1, MINUS, 1, speed);
4625 return true;
4628 return false;
4630 case PLUS:
4631 op0 = XEXP (x, 0);
4632 op1 = XEXP (x, 1);
4634 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4636 if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
4638 *cost += rtx_cost (op0, PLUS, 0, speed);
4640 else
4642 rtx new_op0 = aarch64_strip_shift_or_extend (op0);
4644 if (new_op0 == op0
4645 && GET_CODE (op0) == MULT)
4647 if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
4648 && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
4649 || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
4650 && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
4652 *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
4653 speed)
4654 + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
4655 speed)
4656 + rtx_cost (op1, PLUS, 1, speed));
4657 if (speed)
4658 *cost +=
4659 extra_cost->mult[GET_MODE (x) == DImode].extend_add;
4660 return true;
4663 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4664 + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
4665 + rtx_cost (op1, PLUS, 1, speed));
4667 if (speed)
4668 *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
4670 return true;
4673 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
4674 + rtx_cost (op1, PLUS, 1, speed));
4676 return true;
4679 return false;
4681 case IOR:
4682 case XOR:
4683 case AND:
4684 cost_logic:
4685 op0 = XEXP (x, 0);
4686 op1 = XEXP (x, 1);
4688 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4690 if (CONST_INT_P (op1)
4691 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
4693 *cost += rtx_cost (op0, AND, 0, speed);
4695 else
4697 if (GET_CODE (op0) == NOT)
4698 op0 = XEXP (op0, 0);
4699 op0 = aarch64_strip_shift (op0);
4700 *cost += (rtx_cost (op0, AND, 0, speed)
4701 + rtx_cost (op1, AND, 1, speed));
4703 return true;
4705 return false;
4707 case ZERO_EXTEND:
4708 if ((GET_MODE (x) == DImode
4709 && GET_MODE (XEXP (x, 0)) == SImode)
4710 || GET_CODE (XEXP (x, 0)) == MEM)
4712 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
4713 return true;
4715 return false;
4717 case SIGN_EXTEND:
4718 if (GET_CODE (XEXP (x, 0)) == MEM)
4720 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
4721 return true;
4723 return false;
4725 case ROTATE:
4726 if (!CONST_INT_P (XEXP (x, 1)))
4727 *cost += COSTS_N_INSNS (2);
4728 /* Fall through. */
4729 case ROTATERT:
4730 case LSHIFTRT:
4731 case ASHIFT:
4732 case ASHIFTRT:
4734 /* Shifting by a register often takes an extra cycle. */
4735 if (speed && !CONST_INT_P (XEXP (x, 1)))
4736 *cost += extra_cost->alu.arith_shift_reg;
4738 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
4739 return true;
4741 case HIGH:
4742 if (!CONSTANT_P (XEXP (x, 0)))
4743 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
4744 return true;
4746 case LO_SUM:
4747 if (!CONSTANT_P (XEXP (x, 1)))
4748 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
4749 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
4750 return true;
4752 case ZERO_EXTRACT:
4753 case SIGN_EXTRACT:
4754 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
4755 return true;
4757 case MULT:
4758 op0 = XEXP (x, 0);
4759 op1 = XEXP (x, 1);
4761 *cost = COSTS_N_INSNS (1);
4762 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4764 if (CONST_INT_P (op1)
4765 && exact_log2 (INTVAL (op1)) > 0)
4767 *cost += rtx_cost (op0, ASHIFT, 0, speed);
4768 return true;
4771 if ((GET_CODE (op0) == ZERO_EXTEND
4772 && GET_CODE (op1) == ZERO_EXTEND)
4773 || (GET_CODE (op0) == SIGN_EXTEND
4774 && GET_CODE (op1) == SIGN_EXTEND))
4776 *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4777 + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
4778 if (speed)
4779 *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
4780 return true;
4783 if (speed)
4784 *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
4786 else if (speed)
4788 if (GET_MODE (x) == DFmode)
4789 *cost += extra_cost->fp[1].mult;
4790 else if (GET_MODE (x) == SFmode)
4791 *cost += extra_cost->fp[0].mult;
4794 return false; /* All arguments need to be in registers. */
4796 case MOD:
4797 case UMOD:
4798 *cost = COSTS_N_INSNS (2);
4799 if (speed)
4801 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4802 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
4803 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
4804 else if (GET_MODE (x) == DFmode)
4805 *cost += (extra_cost->fp[1].mult
4806 + extra_cost->fp[1].div);
4807 else if (GET_MODE (x) == SFmode)
4808 *cost += (extra_cost->fp[0].mult
4809 + extra_cost->fp[0].div);
4811 return false; /* All arguments need to be in registers. */
4813 case DIV:
4814 case UDIV:
4815 *cost = COSTS_N_INSNS (1);
4816 if (speed)
4818 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4819 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
4820 else if (GET_MODE (x) == DFmode)
4821 *cost += extra_cost->fp[1].div;
4822 else if (GET_MODE (x) == SFmode)
4823 *cost += extra_cost->fp[0].div;
4825 return false; /* All arguments need to be in registers. */
4827 default:
4828 break;
4830 return false;
4833 static int
4834 aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
4835 enum machine_mode mode ATTRIBUTE_UNUSED,
4836 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
4838 enum rtx_code c = GET_CODE (x);
4839 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4841 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4842 return addr_cost->pre_modify;
4844 if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4845 return addr_cost->post_modify;
4847 if (c == PLUS)
4849 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4850 return addr_cost->imm_offset;
4851 else if (GET_CODE (XEXP (x, 0)) == MULT
4852 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4853 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
4854 return addr_cost->register_extend;
4856 return addr_cost->register_offset;
4858 else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4859 return addr_cost->imm_offset;
4861 return 0;
4864 static int
4865 aarch64_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4866 reg_class_t from, reg_class_t to)
4868 const struct cpu_regmove_cost *regmove_cost
4869 = aarch64_tune_params->regmove_cost;
4871 if (from == GENERAL_REGS && to == GENERAL_REGS)
4872 return regmove_cost->GP2GP;
4873 else if (from == GENERAL_REGS)
4874 return regmove_cost->GP2FP;
4875 else if (to == GENERAL_REGS)
4876 return regmove_cost->FP2GP;
4878 /* When AdvSIMD instructions are disabled it is not possible to move
4879 a 128-bit value directly between Q registers. This is handled in
4880 secondary reload. A general register is used as a scratch to move
4881 the upper DI value and the lower DI value is moved directly,
4882 hence the cost is the sum of three moves. */
4884 if (! TARGET_SIMD && GET_MODE_SIZE (from) == 128 && GET_MODE_SIZE (to) == 128)
4885 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
4887 return regmove_cost->FP2FP;
4890 static int
4891 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
4892 reg_class_t rclass ATTRIBUTE_UNUSED,
4893 bool in ATTRIBUTE_UNUSED)
4895 return aarch64_tune_params->memmov_cost;
4898 /* Vectorizer cost model target hooks. */
4900 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4901 static int
4902 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4903 tree vectype,
4904 int misalign ATTRIBUTE_UNUSED)
4906 unsigned elements;
4908 switch (type_of_cost)
4910 case scalar_stmt:
4911 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
4913 case scalar_load:
4914 return aarch64_tune_params->vec_costs->scalar_load_cost;
4916 case scalar_store:
4917 return aarch64_tune_params->vec_costs->scalar_store_cost;
4919 case vector_stmt:
4920 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4922 case vector_load:
4923 return aarch64_tune_params->vec_costs->vec_align_load_cost;
4925 case vector_store:
4926 return aarch64_tune_params->vec_costs->vec_store_cost;
4928 case vec_to_scalar:
4929 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
4931 case scalar_to_vec:
4932 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
4934 case unaligned_load:
4935 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
4937 case unaligned_store:
4938 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
4940 case cond_branch_taken:
4941 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
4943 case cond_branch_not_taken:
4944 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
4946 case vec_perm:
4947 case vec_promote_demote:
4948 return aarch64_tune_params->vec_costs->vec_stmt_cost;
4950 case vec_construct:
4951 elements = TYPE_VECTOR_SUBPARTS (vectype);
4952 return elements / 2 + 1;
4954 default:
4955 gcc_unreachable ();
4959 /* Implement targetm.vectorize.add_stmt_cost. */
4960 static unsigned
4961 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4962 struct _stmt_vec_info *stmt_info, int misalign,
4963 enum vect_cost_model_location where)
4965 unsigned *cost = (unsigned *) data;
4966 unsigned retval = 0;
4968 if (flag_vect_cost_model)
4970 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4971 int stmt_cost =
4972 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
4974 /* Statements in an inner loop relative to the loop being
4975 vectorized are weighted more heavily. The value here is
4976 a function (linear for now) of the loop nest level. */
4977 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4979 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
4980 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
4981 unsigned nest_level = loop_depth (loop);
4983 count *= nest_level;
4986 retval = (unsigned) (count * stmt_cost);
4987 cost[where] += retval;
4990 return retval;
4993 static void initialize_aarch64_code_model (void);
4995 /* Parse the architecture extension string. */
4997 static void
4998 aarch64_parse_extension (char *str)
5000 /* The extension string is parsed left to right. */
5001 const struct aarch64_option_extension *opt = NULL;
5003 /* Flag to say whether we are adding or removing an extension. */
5004 int adding_ext = -1;
5006 while (str != NULL && *str != 0)
5008 char *ext;
5009 size_t len;
5011 str++;
5012 ext = strchr (str, '+');
5014 if (ext != NULL)
5015 len = ext - str;
5016 else
5017 len = strlen (str);
5019 if (len >= 2 && strncmp (str, "no", 2) == 0)
5021 adding_ext = 0;
5022 len -= 2;
5023 str += 2;
5025 else if (len > 0)
5026 adding_ext = 1;
5028 if (len == 0)
5030 error ("missing feature modifier after %qs", "+no");
5031 return;
5034 /* Scan over the extensions table trying to find an exact match. */
5035 for (opt = all_extensions; opt->name != NULL; opt++)
5037 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5039 /* Add or remove the extension. */
5040 if (adding_ext)
5041 aarch64_isa_flags |= opt->flags_on;
5042 else
5043 aarch64_isa_flags &= ~(opt->flags_off);
5044 break;
5048 if (opt->name == NULL)
5050 /* Extension not found in list. */
5051 error ("unknown feature modifier %qs", str);
5052 return;
5055 str = ext;
5058 return;
5061 /* Parse the ARCH string. */
5063 static void
5064 aarch64_parse_arch (void)
5066 char *ext;
5067 const struct processor *arch;
5068 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5069 size_t len;
5071 strcpy (str, aarch64_arch_string);
5073 ext = strchr (str, '+');
5075 if (ext != NULL)
5076 len = ext - str;
5077 else
5078 len = strlen (str);
5080 if (len == 0)
5082 error ("missing arch name in -march=%qs", str);
5083 return;
5086 /* Loop through the list of supported ARCHs to find a match. */
5087 for (arch = all_architectures; arch->name != NULL; arch++)
5089 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5091 selected_arch = arch;
5092 aarch64_isa_flags = selected_arch->flags;
5093 selected_cpu = &all_cores[selected_arch->core];
5095 if (ext != NULL)
5097 /* ARCH string contains at least one extension. */
5098 aarch64_parse_extension (ext);
5101 return;
5105 /* ARCH name not found in list. */
5106 error ("unknown value %qs for -march", str);
5107 return;
5110 /* Parse the CPU string. */
5112 static void
5113 aarch64_parse_cpu (void)
5115 char *ext;
5116 const struct processor *cpu;
5117 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5118 size_t len;
5120 strcpy (str, aarch64_cpu_string);
5122 ext = strchr (str, '+');
5124 if (ext != NULL)
5125 len = ext - str;
5126 else
5127 len = strlen (str);
5129 if (len == 0)
5131 error ("missing cpu name in -mcpu=%qs", str);
5132 return;
5135 /* Loop through the list of supported CPUs to find a match. */
5136 for (cpu = all_cores; cpu->name != NULL; cpu++)
5138 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5140 selected_cpu = cpu;
5141 selected_tune = cpu;
5142 aarch64_isa_flags = selected_cpu->flags;
5144 if (ext != NULL)
5146 /* CPU string contains at least one extension. */
5147 aarch64_parse_extension (ext);
5150 return;
5154 /* CPU name not found in list. */
5155 error ("unknown value %qs for -mcpu", str);
5156 return;
5159 /* Parse the TUNE string. */
5161 static void
5162 aarch64_parse_tune (void)
5164 const struct processor *cpu;
5165 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5166 strcpy (str, aarch64_tune_string);
5168 /* Loop through the list of supported CPUs to find a match. */
5169 for (cpu = all_cores; cpu->name != NULL; cpu++)
5171 if (strcmp (cpu->name, str) == 0)
5173 selected_tune = cpu;
5174 return;
5178 /* CPU name not found in list. */
5179 error ("unknown value %qs for -mtune", str);
5180 return;
5184 /* Implement TARGET_OPTION_OVERRIDE. */
5186 static void
5187 aarch64_override_options (void)
5189 /* march wins over mcpu, so when march is defined, mcpu takes the same value,
5190 otherwise march remains undefined. mtune can be used with either march or
5191 mcpu. */
5193 if (aarch64_arch_string)
5195 aarch64_parse_arch ();
5196 aarch64_cpu_string = NULL;
5199 if (aarch64_cpu_string)
5201 aarch64_parse_cpu ();
5202 selected_arch = NULL;
5205 if (aarch64_tune_string)
5207 aarch64_parse_tune ();
5210 #ifndef HAVE_AS_MABI_OPTION
5211 /* The compiler may have been configured with 2.23.* binutils, which does
5212 not have support for ILP32. */
5213 if (TARGET_ILP32)
5214 error ("Assembler does not support -mabi=ilp32");
5215 #endif
5217 initialize_aarch64_code_model ();
5219 aarch64_build_bitmask_table ();
5221 /* This target defaults to strict volatile bitfields. */
5222 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5223 flag_strict_volatile_bitfields = 1;
5225 /* If the user did not specify a processor, choose the default
5226 one for them. This will be the CPU set during configuration using
5227 --with-cpu, otherwise it is "coretex-a53". */
5228 if (!selected_cpu)
5230 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5231 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5234 gcc_assert (selected_cpu);
5236 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5237 if (!selected_tune)
5238 selected_tune = &all_cores[selected_cpu->core];
5240 aarch64_tune_flags = selected_tune->flags;
5241 aarch64_tune = selected_tune->core;
5242 aarch64_tune_params = selected_tune->tune;
5244 aarch64_override_options_after_change ();
5247 /* Implement targetm.override_options_after_change. */
5249 static void
5250 aarch64_override_options_after_change (void)
5252 faked_omit_frame_pointer = false;
5254 /* To omit leaf frame pointers, we need to turn flag_omit_frame_pointer on so
5255 that aarch64_frame_pointer_required will be called. We need to remember
5256 whether flag_omit_frame_pointer was turned on normally or just faked. */
5258 if (flag_omit_leaf_frame_pointer && !flag_omit_frame_pointer)
5260 flag_omit_frame_pointer = true;
5261 faked_omit_frame_pointer = true;
5265 static struct machine_function *
5266 aarch64_init_machine_status (void)
5268 struct machine_function *machine;
5269 machine = ggc_alloc_cleared_machine_function ();
5270 return machine;
5273 void
5274 aarch64_init_expanders (void)
5276 init_machine_status = aarch64_init_machine_status;
5279 /* A checking mechanism for the implementation of the various code models. */
5280 static void
5281 initialize_aarch64_code_model (void)
5283 if (flag_pic)
5285 switch (aarch64_cmodel_var)
5287 case AARCH64_CMODEL_TINY:
5288 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5289 break;
5290 case AARCH64_CMODEL_SMALL:
5291 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5292 break;
5293 case AARCH64_CMODEL_LARGE:
5294 sorry ("code model %qs with -f%s", "large",
5295 flag_pic > 1 ? "PIC" : "pic");
5296 default:
5297 gcc_unreachable ();
5300 else
5301 aarch64_cmodel = aarch64_cmodel_var;
5304 /* Return true if SYMBOL_REF X binds locally. */
5306 static bool
5307 aarch64_symbol_binds_local_p (const_rtx x)
5309 return (SYMBOL_REF_DECL (x)
5310 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5311 : SYMBOL_REF_LOCAL_P (x));
5314 /* Return true if SYMBOL_REF X is thread local */
5315 static bool
5316 aarch64_tls_symbol_p (rtx x)
5318 if (! TARGET_HAVE_TLS)
5319 return false;
5321 if (GET_CODE (x) != SYMBOL_REF)
5322 return false;
5324 return SYMBOL_REF_TLS_MODEL (x) != 0;
5327 /* Classify a TLS symbol into one of the TLS kinds. */
5328 enum aarch64_symbol_type
5329 aarch64_classify_tls_symbol (rtx x)
5331 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5333 switch (tls_kind)
5335 case TLS_MODEL_GLOBAL_DYNAMIC:
5336 case TLS_MODEL_LOCAL_DYNAMIC:
5337 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5339 case TLS_MODEL_INITIAL_EXEC:
5340 return SYMBOL_SMALL_GOTTPREL;
5342 case TLS_MODEL_LOCAL_EXEC:
5343 return SYMBOL_SMALL_TPREL;
5345 case TLS_MODEL_EMULATED:
5346 case TLS_MODEL_NONE:
5347 return SYMBOL_FORCE_TO_MEM;
5349 default:
5350 gcc_unreachable ();
5354 /* Return the method that should be used to access SYMBOL_REF or
5355 LABEL_REF X in context CONTEXT. */
5357 enum aarch64_symbol_type
5358 aarch64_classify_symbol (rtx x,
5359 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5361 if (GET_CODE (x) == LABEL_REF)
5363 switch (aarch64_cmodel)
5365 case AARCH64_CMODEL_LARGE:
5366 return SYMBOL_FORCE_TO_MEM;
5368 case AARCH64_CMODEL_TINY_PIC:
5369 case AARCH64_CMODEL_TINY:
5370 return SYMBOL_TINY_ABSOLUTE;
5372 case AARCH64_CMODEL_SMALL_PIC:
5373 case AARCH64_CMODEL_SMALL:
5374 return SYMBOL_SMALL_ABSOLUTE;
5376 default:
5377 gcc_unreachable ();
5381 if (GET_CODE (x) == SYMBOL_REF)
5383 if (aarch64_cmodel == AARCH64_CMODEL_LARGE
5384 || CONSTANT_POOL_ADDRESS_P (x))
5385 return SYMBOL_FORCE_TO_MEM;
5387 if (aarch64_tls_symbol_p (x))
5388 return aarch64_classify_tls_symbol (x);
5390 switch (aarch64_cmodel)
5392 case AARCH64_CMODEL_TINY:
5393 if (SYMBOL_REF_WEAK (x))
5394 return SYMBOL_FORCE_TO_MEM;
5395 return SYMBOL_TINY_ABSOLUTE;
5397 case AARCH64_CMODEL_SMALL:
5398 if (SYMBOL_REF_WEAK (x))
5399 return SYMBOL_FORCE_TO_MEM;
5400 return SYMBOL_SMALL_ABSOLUTE;
5402 case AARCH64_CMODEL_TINY_PIC:
5403 if (!aarch64_symbol_binds_local_p (x))
5404 return SYMBOL_TINY_GOT;
5405 return SYMBOL_TINY_ABSOLUTE;
5407 case AARCH64_CMODEL_SMALL_PIC:
5408 if (!aarch64_symbol_binds_local_p (x))
5409 return SYMBOL_SMALL_GOT;
5410 return SYMBOL_SMALL_ABSOLUTE;
5412 default:
5413 gcc_unreachable ();
5417 /* By default push everything into the constant pool. */
5418 return SYMBOL_FORCE_TO_MEM;
5421 bool
5422 aarch64_constant_address_p (rtx x)
5424 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5427 bool
5428 aarch64_legitimate_pic_operand_p (rtx x)
5430 if (GET_CODE (x) == SYMBOL_REF
5431 || (GET_CODE (x) == CONST
5432 && GET_CODE (XEXP (x, 0)) == PLUS
5433 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5434 return false;
5436 return true;
5439 /* Return true if X holds either a quarter-precision or
5440 floating-point +0.0 constant. */
5441 static bool
5442 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5444 if (!CONST_DOUBLE_P (x))
5445 return false;
5447 /* TODO: We could handle moving 0.0 to a TFmode register,
5448 but first we would like to refactor the movtf_aarch64
5449 to be more amicable to split moves properly and
5450 correctly gate on TARGET_SIMD. For now - reject all
5451 constants which are not to SFmode or DFmode registers. */
5452 if (!(mode == SFmode || mode == DFmode))
5453 return false;
5455 if (aarch64_float_const_zero_rtx_p (x))
5456 return true;
5457 return aarch64_float_const_representable_p (x);
5460 static bool
5461 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5463 /* Do not allow vector struct mode constants. We could support
5464 0 and -1 easily, but they need support in aarch64-simd.md. */
5465 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5466 return false;
5468 /* This could probably go away because
5469 we now decompose CONST_INTs according to expand_mov_immediate. */
5470 if ((GET_CODE (x) == CONST_VECTOR
5471 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5472 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5473 return !targetm.cannot_force_const_mem (mode, x);
5475 if (GET_CODE (x) == HIGH
5476 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5477 return true;
5479 return aarch64_constant_address_p (x);
5483 aarch64_load_tp (rtx target)
5485 if (!target
5486 || GET_MODE (target) != Pmode
5487 || !register_operand (target, Pmode))
5488 target = gen_reg_rtx (Pmode);
5490 /* Can return in any reg. */
5491 emit_insn (gen_aarch64_load_tp_hard (target));
5492 return target;
5495 /* On AAPCS systems, this is the "struct __va_list". */
5496 static GTY(()) tree va_list_type;
5498 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5499 Return the type to use as __builtin_va_list.
5501 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5503 struct __va_list
5505 void *__stack;
5506 void *__gr_top;
5507 void *__vr_top;
5508 int __gr_offs;
5509 int __vr_offs;
5510 }; */
5512 static tree
5513 aarch64_build_builtin_va_list (void)
5515 tree va_list_name;
5516 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5518 /* Create the type. */
5519 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5520 /* Give it the required name. */
5521 va_list_name = build_decl (BUILTINS_LOCATION,
5522 TYPE_DECL,
5523 get_identifier ("__va_list"),
5524 va_list_type);
5525 DECL_ARTIFICIAL (va_list_name) = 1;
5526 TYPE_NAME (va_list_type) = va_list_name;
5527 TYPE_STUB_DECL (va_list_type) = va_list_name;
5529 /* Create the fields. */
5530 f_stack = build_decl (BUILTINS_LOCATION,
5531 FIELD_DECL, get_identifier ("__stack"),
5532 ptr_type_node);
5533 f_grtop = build_decl (BUILTINS_LOCATION,
5534 FIELD_DECL, get_identifier ("__gr_top"),
5535 ptr_type_node);
5536 f_vrtop = build_decl (BUILTINS_LOCATION,
5537 FIELD_DECL, get_identifier ("__vr_top"),
5538 ptr_type_node);
5539 f_groff = build_decl (BUILTINS_LOCATION,
5540 FIELD_DECL, get_identifier ("__gr_offs"),
5541 integer_type_node);
5542 f_vroff = build_decl (BUILTINS_LOCATION,
5543 FIELD_DECL, get_identifier ("__vr_offs"),
5544 integer_type_node);
5546 DECL_ARTIFICIAL (f_stack) = 1;
5547 DECL_ARTIFICIAL (f_grtop) = 1;
5548 DECL_ARTIFICIAL (f_vrtop) = 1;
5549 DECL_ARTIFICIAL (f_groff) = 1;
5550 DECL_ARTIFICIAL (f_vroff) = 1;
5552 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
5553 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
5554 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
5555 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
5556 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
5558 TYPE_FIELDS (va_list_type) = f_stack;
5559 DECL_CHAIN (f_stack) = f_grtop;
5560 DECL_CHAIN (f_grtop) = f_vrtop;
5561 DECL_CHAIN (f_vrtop) = f_groff;
5562 DECL_CHAIN (f_groff) = f_vroff;
5564 /* Compute its layout. */
5565 layout_type (va_list_type);
5567 return va_list_type;
5570 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
5571 static void
5572 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
5574 const CUMULATIVE_ARGS *cum;
5575 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5576 tree stack, grtop, vrtop, groff, vroff;
5577 tree t;
5578 int gr_save_area_size;
5579 int vr_save_area_size;
5580 int vr_offset;
5582 cum = &crtl->args.info;
5583 gr_save_area_size
5584 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
5585 vr_save_area_size
5586 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
5588 if (TARGET_GENERAL_REGS_ONLY)
5590 if (cum->aapcs_nvrn > 0)
5591 sorry ("%qs and floating point or vector arguments",
5592 "-mgeneral-regs-only");
5593 vr_save_area_size = 0;
5596 f_stack = TYPE_FIELDS (va_list_type_node);
5597 f_grtop = DECL_CHAIN (f_stack);
5598 f_vrtop = DECL_CHAIN (f_grtop);
5599 f_groff = DECL_CHAIN (f_vrtop);
5600 f_vroff = DECL_CHAIN (f_groff);
5602 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
5603 NULL_TREE);
5604 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
5605 NULL_TREE);
5606 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
5607 NULL_TREE);
5608 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
5609 NULL_TREE);
5610 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
5611 NULL_TREE);
5613 /* Emit code to initialize STACK, which points to the next varargs stack
5614 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
5615 by named arguments. STACK is 8-byte aligned. */
5616 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
5617 if (cum->aapcs_stack_size > 0)
5618 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
5619 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
5620 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5622 /* Emit code to initialize GRTOP, the top of the GR save area.
5623 virtual_incoming_args_rtx should have been 16 byte aligned. */
5624 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
5625 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
5626 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5628 /* Emit code to initialize VRTOP, the top of the VR save area.
5629 This address is gr_save_area_bytes below GRTOP, rounded
5630 down to the next 16-byte boundary. */
5631 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
5632 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
5633 STACK_BOUNDARY / BITS_PER_UNIT);
5635 if (vr_offset)
5636 t = fold_build_pointer_plus_hwi (t, -vr_offset);
5637 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
5638 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5640 /* Emit code to initialize GROFF, the offset from GRTOP of the
5641 next GPR argument. */
5642 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
5643 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
5644 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5646 /* Likewise emit code to initialize VROFF, the offset from FTOP
5647 of the next VR argument. */
5648 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
5649 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
5650 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5653 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
5655 static tree
5656 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
5657 gimple_seq *post_p ATTRIBUTE_UNUSED)
5659 tree addr;
5660 bool indirect_p;
5661 bool is_ha; /* is HFA or HVA. */
5662 bool dw_align; /* double-word align. */
5663 enum machine_mode ag_mode = VOIDmode;
5664 int nregs;
5665 enum machine_mode mode;
5667 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5668 tree stack, f_top, f_off, off, arg, roundup, on_stack;
5669 HOST_WIDE_INT size, rsize, adjust, align;
5670 tree t, u, cond1, cond2;
5672 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5673 if (indirect_p)
5674 type = build_pointer_type (type);
5676 mode = TYPE_MODE (type);
5678 f_stack = TYPE_FIELDS (va_list_type_node);
5679 f_grtop = DECL_CHAIN (f_stack);
5680 f_vrtop = DECL_CHAIN (f_grtop);
5681 f_groff = DECL_CHAIN (f_vrtop);
5682 f_vroff = DECL_CHAIN (f_groff);
5684 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
5685 f_stack, NULL_TREE);
5686 size = int_size_in_bytes (type);
5687 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
5689 dw_align = false;
5690 adjust = 0;
5691 if (aarch64_vfp_is_call_or_return_candidate (mode,
5692 type,
5693 &ag_mode,
5694 &nregs,
5695 &is_ha))
5697 /* TYPE passed in fp/simd registers. */
5698 if (TARGET_GENERAL_REGS_ONLY)
5699 sorry ("%qs and floating point or vector arguments",
5700 "-mgeneral-regs-only");
5702 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
5703 unshare_expr (valist), f_vrtop, NULL_TREE);
5704 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
5705 unshare_expr (valist), f_vroff, NULL_TREE);
5707 rsize = nregs * UNITS_PER_VREG;
5709 if (is_ha)
5711 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
5712 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
5714 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
5715 && size < UNITS_PER_VREG)
5717 adjust = UNITS_PER_VREG - size;
5720 else
5722 /* TYPE passed in general registers. */
5723 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
5724 unshare_expr (valist), f_grtop, NULL_TREE);
5725 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
5726 unshare_expr (valist), f_groff, NULL_TREE);
5727 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
5728 nregs = rsize / UNITS_PER_WORD;
5730 if (align > 8)
5731 dw_align = true;
5733 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5734 && size < UNITS_PER_WORD)
5736 adjust = UNITS_PER_WORD - size;
5740 /* Get a local temporary for the field value. */
5741 off = get_initialized_tmp_var (f_off, pre_p, NULL);
5743 /* Emit code to branch if off >= 0. */
5744 t = build2 (GE_EXPR, boolean_type_node, off,
5745 build_int_cst (TREE_TYPE (off), 0));
5746 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
5748 if (dw_align)
5750 /* Emit: offs = (offs + 15) & -16. */
5751 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5752 build_int_cst (TREE_TYPE (off), 15));
5753 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
5754 build_int_cst (TREE_TYPE (off), -16));
5755 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
5757 else
5758 roundup = NULL;
5760 /* Update ap.__[g|v]r_offs */
5761 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
5762 build_int_cst (TREE_TYPE (off), rsize));
5763 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
5765 /* String up. */
5766 if (roundup)
5767 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5769 /* [cond2] if (ap.__[g|v]r_offs > 0) */
5770 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
5771 build_int_cst (TREE_TYPE (f_off), 0));
5772 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
5774 /* String up: make sure the assignment happens before the use. */
5775 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
5776 COND_EXPR_ELSE (cond1) = t;
5778 /* Prepare the trees handling the argument that is passed on the stack;
5779 the top level node will store in ON_STACK. */
5780 arg = get_initialized_tmp_var (stack, pre_p, NULL);
5781 if (align > 8)
5783 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
5784 t = fold_convert (intDI_type_node, arg);
5785 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5786 build_int_cst (TREE_TYPE (t), 15));
5787 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5788 build_int_cst (TREE_TYPE (t), -16));
5789 t = fold_convert (TREE_TYPE (arg), t);
5790 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
5792 else
5793 roundup = NULL;
5794 /* Advance ap.__stack */
5795 t = fold_convert (intDI_type_node, arg);
5796 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5797 build_int_cst (TREE_TYPE (t), size + 7));
5798 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5799 build_int_cst (TREE_TYPE (t), -8));
5800 t = fold_convert (TREE_TYPE (arg), t);
5801 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
5802 /* String up roundup and advance. */
5803 if (roundup)
5804 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
5805 /* String up with arg */
5806 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
5807 /* Big-endianness related address adjustment. */
5808 if (BLOCK_REG_PADDING (mode, type, 1) == downward
5809 && size < UNITS_PER_WORD)
5811 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
5812 size_int (UNITS_PER_WORD - size));
5813 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
5816 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
5817 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
5819 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
5820 t = off;
5821 if (adjust)
5822 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
5823 build_int_cst (TREE_TYPE (off), adjust));
5825 t = fold_convert (sizetype, t);
5826 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
5828 if (is_ha)
5830 /* type ha; // treat as "struct {ftype field[n];}"
5831 ... [computing offs]
5832 for (i = 0; i <nregs; ++i, offs += 16)
5833 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
5834 return ha; */
5835 int i;
5836 tree tmp_ha, field_t, field_ptr_t;
5838 /* Declare a local variable. */
5839 tmp_ha = create_tmp_var_raw (type, "ha");
5840 gimple_add_tmp_var (tmp_ha);
5842 /* Establish the base type. */
5843 switch (ag_mode)
5845 case SFmode:
5846 field_t = float_type_node;
5847 field_ptr_t = float_ptr_type_node;
5848 break;
5849 case DFmode:
5850 field_t = double_type_node;
5851 field_ptr_t = double_ptr_type_node;
5852 break;
5853 case TFmode:
5854 field_t = long_double_type_node;
5855 field_ptr_t = long_double_ptr_type_node;
5856 break;
5857 /* The half precision and quad precision are not fully supported yet. Enable
5858 the following code after the support is complete. Need to find the correct
5859 type node for __fp16 *. */
5860 #if 0
5861 case HFmode:
5862 field_t = float_type_node;
5863 field_ptr_t = float_ptr_type_node;
5864 break;
5865 #endif
5866 case V2SImode:
5867 case V4SImode:
5869 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
5870 field_t = build_vector_type_for_mode (innertype, ag_mode);
5871 field_ptr_t = build_pointer_type (field_t);
5873 break;
5874 default:
5875 gcc_assert (0);
5878 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
5879 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
5880 addr = t;
5881 t = fold_convert (field_ptr_t, addr);
5882 t = build2 (MODIFY_EXPR, field_t,
5883 build1 (INDIRECT_REF, field_t, tmp_ha),
5884 build1 (INDIRECT_REF, field_t, t));
5886 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
5887 for (i = 1; i < nregs; ++i)
5889 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
5890 u = fold_convert (field_ptr_t, addr);
5891 u = build2 (MODIFY_EXPR, field_t,
5892 build2 (MEM_REF, field_t, tmp_ha,
5893 build_int_cst (field_ptr_t,
5894 (i *
5895 int_size_in_bytes (field_t)))),
5896 build1 (INDIRECT_REF, field_t, u));
5897 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
5900 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
5901 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
5904 COND_EXPR_ELSE (cond2) = t;
5905 addr = fold_convert (build_pointer_type (type), cond1);
5906 addr = build_va_arg_indirect_ref (addr);
5908 if (indirect_p)
5909 addr = build_va_arg_indirect_ref (addr);
5911 return addr;
5914 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
5916 static void
5917 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
5918 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5919 int no_rtl)
5921 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
5922 CUMULATIVE_ARGS local_cum;
5923 int gr_saved, vr_saved;
5925 /* The caller has advanced CUM up to, but not beyond, the last named
5926 argument. Advance a local copy of CUM past the last "real" named
5927 argument, to find out how many registers are left over. */
5928 local_cum = *cum;
5929 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
5931 /* Found out how many registers we need to save. */
5932 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
5933 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
5935 if (TARGET_GENERAL_REGS_ONLY)
5937 if (local_cum.aapcs_nvrn > 0)
5938 sorry ("%qs and floating point or vector arguments",
5939 "-mgeneral-regs-only");
5940 vr_saved = 0;
5943 if (!no_rtl)
5945 if (gr_saved > 0)
5947 rtx ptr, mem;
5949 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
5950 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
5951 - gr_saved * UNITS_PER_WORD);
5952 mem = gen_frame_mem (BLKmode, ptr);
5953 set_mem_alias_set (mem, get_varargs_alias_set ());
5955 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
5956 mem, gr_saved);
5958 if (vr_saved > 0)
5960 /* We can't use move_block_from_reg, because it will use
5961 the wrong mode, storing D regs only. */
5962 enum machine_mode mode = TImode;
5963 int off, i;
5965 /* Set OFF to the offset from virtual_incoming_args_rtx of
5966 the first vector register. The VR save area lies below
5967 the GR one, and is aligned to 16 bytes. */
5968 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5969 STACK_BOUNDARY / BITS_PER_UNIT);
5970 off -= vr_saved * UNITS_PER_VREG;
5972 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
5974 rtx ptr, mem;
5976 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
5977 mem = gen_frame_mem (mode, ptr);
5978 set_mem_alias_set (mem, get_varargs_alias_set ());
5979 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
5980 off += UNITS_PER_VREG;
5985 /* We don't save the size into *PRETEND_SIZE because we want to avoid
5986 any complication of having crtl->args.pretend_args_size changed. */
5987 cfun->machine->saved_varargs_size
5988 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
5989 STACK_BOUNDARY / BITS_PER_UNIT)
5990 + vr_saved * UNITS_PER_VREG);
5993 static void
5994 aarch64_conditional_register_usage (void)
5996 int i;
5997 if (!TARGET_FLOAT)
5999 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6001 fixed_regs[i] = 1;
6002 call_used_regs[i] = 1;
6007 /* Walk down the type tree of TYPE counting consecutive base elements.
6008 If *MODEP is VOIDmode, then set it to the first valid floating point
6009 type. If a non-floating point type is found, or if a floating point
6010 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6011 otherwise return the count in the sub-tree. */
6012 static int
6013 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6015 enum machine_mode mode;
6016 HOST_WIDE_INT size;
6018 switch (TREE_CODE (type))
6020 case REAL_TYPE:
6021 mode = TYPE_MODE (type);
6022 if (mode != DFmode && mode != SFmode && mode != TFmode)
6023 return -1;
6025 if (*modep == VOIDmode)
6026 *modep = mode;
6028 if (*modep == mode)
6029 return 1;
6031 break;
6033 case COMPLEX_TYPE:
6034 mode = TYPE_MODE (TREE_TYPE (type));
6035 if (mode != DFmode && mode != SFmode && mode != TFmode)
6036 return -1;
6038 if (*modep == VOIDmode)
6039 *modep = mode;
6041 if (*modep == mode)
6042 return 2;
6044 break;
6046 case VECTOR_TYPE:
6047 /* Use V2SImode and V4SImode as representatives of all 64-bit
6048 and 128-bit vector types. */
6049 size = int_size_in_bytes (type);
6050 switch (size)
6052 case 8:
6053 mode = V2SImode;
6054 break;
6055 case 16:
6056 mode = V4SImode;
6057 break;
6058 default:
6059 return -1;
6062 if (*modep == VOIDmode)
6063 *modep = mode;
6065 /* Vector modes are considered to be opaque: two vectors are
6066 equivalent for the purposes of being homogeneous aggregates
6067 if they are the same size. */
6068 if (*modep == mode)
6069 return 1;
6071 break;
6073 case ARRAY_TYPE:
6075 int count;
6076 tree index = TYPE_DOMAIN (type);
6078 /* Can't handle incomplete types. */
6079 if (!COMPLETE_TYPE_P (type))
6080 return -1;
6082 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6083 if (count == -1
6084 || !index
6085 || !TYPE_MAX_VALUE (index)
6086 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6087 || !TYPE_MIN_VALUE (index)
6088 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6089 || count < 0)
6090 return -1;
6092 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6093 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6095 /* There must be no padding. */
6096 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6097 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6098 != count * GET_MODE_BITSIZE (*modep)))
6099 return -1;
6101 return count;
6104 case RECORD_TYPE:
6106 int count = 0;
6107 int sub_count;
6108 tree field;
6110 /* Can't handle incomplete types. */
6111 if (!COMPLETE_TYPE_P (type))
6112 return -1;
6114 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6116 if (TREE_CODE (field) != FIELD_DECL)
6117 continue;
6119 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6120 if (sub_count < 0)
6121 return -1;
6122 count += sub_count;
6125 /* There must be no padding. */
6126 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6127 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6128 != count * GET_MODE_BITSIZE (*modep)))
6129 return -1;
6131 return count;
6134 case UNION_TYPE:
6135 case QUAL_UNION_TYPE:
6137 /* These aren't very interesting except in a degenerate case. */
6138 int count = 0;
6139 int sub_count;
6140 tree field;
6142 /* Can't handle incomplete types. */
6143 if (!COMPLETE_TYPE_P (type))
6144 return -1;
6146 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6148 if (TREE_CODE (field) != FIELD_DECL)
6149 continue;
6151 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6152 if (sub_count < 0)
6153 return -1;
6154 count = count > sub_count ? count : sub_count;
6157 /* There must be no padding. */
6158 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
6159 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
6160 != count * GET_MODE_BITSIZE (*modep)))
6161 return -1;
6163 return count;
6166 default:
6167 break;
6170 return -1;
6173 /* Return true if we use LRA instead of reload pass. */
6174 static bool
6175 aarch64_lra_p (void)
6177 return aarch64_lra_flag;
6180 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6181 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6182 array types. The C99 floating-point complex types are also considered
6183 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6184 types, which are GCC extensions and out of the scope of AAPCS64, are
6185 treated as composite types here as well.
6187 Note that MODE itself is not sufficient in determining whether a type
6188 is such a composite type or not. This is because
6189 stor-layout.c:compute_record_mode may have already changed the MODE
6190 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6191 structure with only one field may have its MODE set to the mode of the
6192 field. Also an integer mode whose size matches the size of the
6193 RECORD_TYPE type may be used to substitute the original mode
6194 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6195 solely relied on. */
6197 static bool
6198 aarch64_composite_type_p (const_tree type,
6199 enum machine_mode mode)
6201 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6202 return true;
6204 if (mode == BLKmode
6205 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6206 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6207 return true;
6209 return false;
6212 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6213 type as described in AAPCS64 \S 4.1.2.
6215 See the comment above aarch64_composite_type_p for the notes on MODE. */
6217 static bool
6218 aarch64_short_vector_p (const_tree type,
6219 enum machine_mode mode)
6221 HOST_WIDE_INT size = -1;
6223 if (type && TREE_CODE (type) == VECTOR_TYPE)
6224 size = int_size_in_bytes (type);
6225 else if (!aarch64_composite_type_p (type, mode)
6226 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6227 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6228 size = GET_MODE_SIZE (mode);
6230 return (size == 8 || size == 16) ? true : false;
6233 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6234 shall be passed or returned in simd/fp register(s) (providing these
6235 parameter passing registers are available).
6237 Upon successful return, *COUNT returns the number of needed registers,
6238 *BASE_MODE returns the mode of the individual register and when IS_HAF
6239 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6240 floating-point aggregate or a homogeneous short-vector aggregate. */
6242 static bool
6243 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6244 const_tree type,
6245 enum machine_mode *base_mode,
6246 int *count,
6247 bool *is_ha)
6249 enum machine_mode new_mode = VOIDmode;
6250 bool composite_p = aarch64_composite_type_p (type, mode);
6252 if (is_ha != NULL) *is_ha = false;
6254 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6255 || aarch64_short_vector_p (type, mode))
6257 *count = 1;
6258 new_mode = mode;
6260 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6262 if (is_ha != NULL) *is_ha = true;
6263 *count = 2;
6264 new_mode = GET_MODE_INNER (mode);
6266 else if (type && composite_p)
6268 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6270 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6272 if (is_ha != NULL) *is_ha = true;
6273 *count = ag_count;
6275 else
6276 return false;
6278 else
6279 return false;
6281 *base_mode = new_mode;
6282 return true;
6285 /* Implement TARGET_STRUCT_VALUE_RTX. */
6287 static rtx
6288 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6289 int incoming ATTRIBUTE_UNUSED)
6291 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6294 /* Implements target hook vector_mode_supported_p. */
6295 static bool
6296 aarch64_vector_mode_supported_p (enum machine_mode mode)
6298 if (TARGET_SIMD
6299 && (mode == V4SImode || mode == V8HImode
6300 || mode == V16QImode || mode == V2DImode
6301 || mode == V2SImode || mode == V4HImode
6302 || mode == V8QImode || mode == V2SFmode
6303 || mode == V4SFmode || mode == V2DFmode))
6304 return true;
6306 return false;
6309 /* Return appropriate SIMD container
6310 for MODE within a vector of WIDTH bits. */
6311 static enum machine_mode
6312 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6314 gcc_assert (width == 64 || width == 128);
6315 if (TARGET_SIMD)
6317 if (width == 128)
6318 switch (mode)
6320 case DFmode:
6321 return V2DFmode;
6322 case SFmode:
6323 return V4SFmode;
6324 case SImode:
6325 return V4SImode;
6326 case HImode:
6327 return V8HImode;
6328 case QImode:
6329 return V16QImode;
6330 case DImode:
6331 return V2DImode;
6332 default:
6333 break;
6335 else
6336 switch (mode)
6338 case SFmode:
6339 return V2SFmode;
6340 case SImode:
6341 return V2SImode;
6342 case HImode:
6343 return V4HImode;
6344 case QImode:
6345 return V8QImode;
6346 default:
6347 break;
6350 return word_mode;
6353 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6354 static enum machine_mode
6355 aarch64_preferred_simd_mode (enum machine_mode mode)
6357 return aarch64_simd_container_mode (mode, 128);
6360 /* Return the bitmask of possible vector sizes for the vectorizer
6361 to iterate over. */
6362 static unsigned int
6363 aarch64_autovectorize_vector_sizes (void)
6365 return (16 | 8);
6368 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6369 vector types in order to conform to the AAPCS64 (see "Procedure
6370 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6371 qualify for emission with the mangled names defined in that document,
6372 a vector type must not only be of the correct mode but also be
6373 composed of AdvSIMD vector element types (e.g.
6374 _builtin_aarch64_simd_qi); these types are registered by
6375 aarch64_init_simd_builtins (). In other words, vector types defined
6376 in other ways e.g. via vector_size attribute will get default
6377 mangled names. */
6378 typedef struct
6380 enum machine_mode mode;
6381 const char *element_type_name;
6382 const char *mangled_name;
6383 } aarch64_simd_mangle_map_entry;
6385 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6386 /* 64-bit containerized types. */
6387 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6388 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6389 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6390 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6391 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6392 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6393 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6394 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6395 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6396 /* 128-bit containerized types. */
6397 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6398 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6399 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6400 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6401 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6402 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6403 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6404 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6405 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6406 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6407 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6408 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6409 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6410 { VOIDmode, NULL, NULL }
6413 /* Implement TARGET_MANGLE_TYPE. */
6415 static const char *
6416 aarch64_mangle_type (const_tree type)
6418 /* The AArch64 ABI documents say that "__va_list" has to be
6419 managled as if it is in the "std" namespace. */
6420 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6421 return "St9__va_list";
6423 /* Check the mode of the vector type, and the name of the vector
6424 element type, against the table. */
6425 if (TREE_CODE (type) == VECTOR_TYPE)
6427 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6429 while (pos->mode != VOIDmode)
6431 tree elt_type = TREE_TYPE (type);
6433 if (pos->mode == TYPE_MODE (type)
6434 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6435 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6436 pos->element_type_name))
6437 return pos->mangled_name;
6439 pos++;
6443 /* Use the default mangling. */
6444 return NULL;
6447 /* Return the equivalent letter for size. */
6448 static char
6449 sizetochar (int size)
6451 switch (size)
6453 case 64: return 'd';
6454 case 32: return 's';
6455 case 16: return 'h';
6456 case 8 : return 'b';
6457 default: gcc_unreachable ();
6461 /* Return true iff x is a uniform vector of floating-point
6462 constants, and the constant can be represented in
6463 quarter-precision form. Note, as aarch64_float_const_representable
6464 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6465 static bool
6466 aarch64_vect_float_const_representable_p (rtx x)
6468 int i = 0;
6469 REAL_VALUE_TYPE r0, ri;
6470 rtx x0, xi;
6472 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6473 return false;
6475 x0 = CONST_VECTOR_ELT (x, 0);
6476 if (!CONST_DOUBLE_P (x0))
6477 return false;
6479 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6481 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6483 xi = CONST_VECTOR_ELT (x, i);
6484 if (!CONST_DOUBLE_P (xi))
6485 return false;
6487 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6488 if (!REAL_VALUES_EQUAL (r0, ri))
6489 return false;
6492 return aarch64_float_const_representable_p (x0);
6495 /* Return true for valid and false for invalid. */
6496 bool
6497 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6498 struct simd_immediate_info *info)
6500 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6501 matches = 1; \
6502 for (i = 0; i < idx; i += (STRIDE)) \
6503 if (!(TEST)) \
6504 matches = 0; \
6505 if (matches) \
6507 immtype = (CLASS); \
6508 elsize = (ELSIZE); \
6509 eshift = (SHIFT); \
6510 emvn = (NEG); \
6511 break; \
6514 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6515 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6516 unsigned char bytes[16];
6517 int immtype = -1, matches;
6518 unsigned int invmask = inverse ? 0xff : 0;
6519 int eshift, emvn;
6521 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6523 if (! (aarch64_simd_imm_zero_p (op, mode)
6524 || aarch64_vect_float_const_representable_p (op)))
6525 return false;
6527 if (info)
6529 info->value = CONST_VECTOR_ELT (op, 0);
6530 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6531 info->mvn = false;
6532 info->shift = 0;
6535 return true;
6538 /* Splat vector constant out into a byte vector. */
6539 for (i = 0; i < n_elts; i++)
6541 rtx el = CONST_VECTOR_ELT (op, i);
6542 unsigned HOST_WIDE_INT elpart;
6543 unsigned int part, parts;
6545 if (GET_CODE (el) == CONST_INT)
6547 elpart = INTVAL (el);
6548 parts = 1;
6550 else if (GET_CODE (el) == CONST_DOUBLE)
6552 elpart = CONST_DOUBLE_LOW (el);
6553 parts = 2;
6555 else
6556 gcc_unreachable ();
6558 for (part = 0; part < parts; part++)
6560 unsigned int byte;
6561 for (byte = 0; byte < innersize; byte++)
6563 bytes[idx++] = (elpart & 0xff) ^ invmask;
6564 elpart >>= BITS_PER_UNIT;
6566 if (GET_CODE (el) == CONST_DOUBLE)
6567 elpart = CONST_DOUBLE_HIGH (el);
6571 /* Sanity check. */
6572 gcc_assert (idx == GET_MODE_SIZE (mode));
6576 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
6577 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
6579 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6580 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6582 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
6583 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6585 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
6586 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
6588 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
6590 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
6592 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
6593 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
6595 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6596 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6598 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
6599 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6601 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
6602 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
6604 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
6606 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
6608 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
6609 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
6611 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
6612 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
6614 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
6615 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
6617 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
6618 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
6620 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
6622 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
6623 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
6625 while (0);
6627 if (immtype == -1)
6628 return false;
6630 if (info)
6632 info->element_width = elsize;
6633 info->mvn = emvn != 0;
6634 info->shift = eshift;
6636 unsigned HOST_WIDE_INT imm = 0;
6638 if (immtype >= 12 && immtype <= 15)
6639 info->msl = true;
6641 /* Un-invert bytes of recognized vector, if necessary. */
6642 if (invmask != 0)
6643 for (i = 0; i < idx; i++)
6644 bytes[i] ^= invmask;
6646 if (immtype == 17)
6648 /* FIXME: Broken on 32-bit H_W_I hosts. */
6649 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
6651 for (i = 0; i < 8; i++)
6652 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
6653 << (i * BITS_PER_UNIT);
6656 info->value = GEN_INT (imm);
6658 else
6660 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
6661 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
6663 /* Construct 'abcdefgh' because the assembler cannot handle
6664 generic constants. */
6665 if (info->mvn)
6666 imm = ~imm;
6667 imm = (imm >> info->shift) & 0xff;
6668 info->value = GEN_INT (imm);
6672 return true;
6673 #undef CHECK
6676 static bool
6677 aarch64_const_vec_all_same_int_p (rtx x,
6678 HOST_WIDE_INT minval,
6679 HOST_WIDE_INT maxval)
6681 HOST_WIDE_INT firstval;
6682 int count, i;
6684 if (GET_CODE (x) != CONST_VECTOR
6685 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
6686 return false;
6688 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
6689 if (firstval < minval || firstval > maxval)
6690 return false;
6692 count = CONST_VECTOR_NUNITS (x);
6693 for (i = 1; i < count; i++)
6694 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
6695 return false;
6697 return true;
6700 /* Check of immediate shift constants are within range. */
6701 bool
6702 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
6704 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
6705 if (left)
6706 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
6707 else
6708 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
6711 /* Return true if X is a uniform vector where all elements
6712 are either the floating-point constant 0.0 or the
6713 integer constant 0. */
6714 bool
6715 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
6717 return x == CONST0_RTX (mode);
6720 bool
6721 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
6723 HOST_WIDE_INT imm = INTVAL (x);
6724 int i;
6726 for (i = 0; i < 8; i++)
6728 unsigned int byte = imm & 0xff;
6729 if (byte != 0xff && byte != 0)
6730 return false;
6731 imm >>= 8;
6734 return true;
6737 bool
6738 aarch64_mov_operand_p (rtx x,
6739 enum aarch64_symbol_context context,
6740 enum machine_mode mode)
6742 if (GET_CODE (x) == HIGH
6743 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6744 return true;
6746 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
6747 return true;
6749 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
6750 return true;
6752 return aarch64_classify_symbolic_expression (x, context)
6753 == SYMBOL_TINY_ABSOLUTE;
6756 /* Return a const_int vector of VAL. */
6758 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
6760 int nunits = GET_MODE_NUNITS (mode);
6761 rtvec v = rtvec_alloc (nunits);
6762 int i;
6764 for (i=0; i < nunits; i++)
6765 RTVEC_ELT (v, i) = GEN_INT (val);
6767 return gen_rtx_CONST_VECTOR (mode, v);
6770 /* Check OP is a legal scalar immediate for the MOVI instruction. */
6772 bool
6773 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
6775 enum machine_mode vmode;
6777 gcc_assert (!VECTOR_MODE_P (mode));
6778 vmode = aarch64_preferred_simd_mode (mode);
6779 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
6780 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
6783 /* Construct and return a PARALLEL RTX vector. */
6785 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
6787 int nunits = GET_MODE_NUNITS (mode);
6788 rtvec v = rtvec_alloc (nunits / 2);
6789 int base = high ? nunits / 2 : 0;
6790 rtx t1;
6791 int i;
6793 for (i=0; i < nunits / 2; i++)
6794 RTVEC_ELT (v, i) = GEN_INT (base + i);
6796 t1 = gen_rtx_PARALLEL (mode, v);
6797 return t1;
6800 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
6801 HIGH (exclusive). */
6802 void
6803 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6805 HOST_WIDE_INT lane;
6806 gcc_assert (GET_CODE (operand) == CONST_INT);
6807 lane = INTVAL (operand);
6809 if (lane < low || lane >= high)
6810 error ("lane out of range");
6813 void
6814 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
6816 gcc_assert (GET_CODE (operand) == CONST_INT);
6817 HOST_WIDE_INT lane = INTVAL (operand);
6819 if (lane < low || lane >= high)
6820 error ("constant out of range");
6823 /* Emit code to reinterpret one AdvSIMD type as another,
6824 without altering bits. */
6825 void
6826 aarch64_simd_reinterpret (rtx dest, rtx src)
6828 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
6831 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
6832 registers). */
6833 void
6834 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
6835 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
6836 rtx op1)
6838 rtx mem = gen_rtx_MEM (mode, destaddr);
6839 rtx tmp1 = gen_reg_rtx (mode);
6840 rtx tmp2 = gen_reg_rtx (mode);
6842 emit_insn (intfn (tmp1, op1, tmp2));
6844 emit_move_insn (mem, tmp1);
6845 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
6846 emit_move_insn (mem, tmp2);
6849 /* Return TRUE if OP is a valid vector addressing mode. */
6850 bool
6851 aarch64_simd_mem_operand_p (rtx op)
6853 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
6854 || GET_CODE (XEXP (op, 0)) == REG);
6857 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
6858 not to early-clobber SRC registers in the process.
6860 We assume that the operands described by SRC and DEST represent a
6861 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
6862 number of components into which the copy has been decomposed. */
6863 void
6864 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
6865 rtx *src, unsigned int count)
6867 unsigned int i;
6869 if (!reg_overlap_mentioned_p (operands[0], operands[1])
6870 || REGNO (operands[0]) < REGNO (operands[1]))
6872 for (i = 0; i < count; i++)
6874 operands[2 * i] = dest[i];
6875 operands[2 * i + 1] = src[i];
6878 else
6880 for (i = 0; i < count; i++)
6882 operands[2 * i] = dest[count - i - 1];
6883 operands[2 * i + 1] = src[count - i - 1];
6888 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
6889 one of VSTRUCT modes: OI, CI or XI. */
6891 aarch64_simd_attr_length_move (rtx insn)
6893 enum machine_mode mode;
6895 extract_insn_cached (insn);
6897 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
6899 mode = GET_MODE (recog_data.operand[0]);
6900 switch (mode)
6902 case OImode:
6903 return 8;
6904 case CImode:
6905 return 12;
6906 case XImode:
6907 return 16;
6908 default:
6909 gcc_unreachable ();
6912 return 4;
6915 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
6916 alignment of a vector to 128 bits. */
6917 static HOST_WIDE_INT
6918 aarch64_simd_vector_alignment (const_tree type)
6920 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
6921 return MIN (align, 128);
6924 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
6925 static bool
6926 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
6928 if (is_packed)
6929 return false;
6931 /* We guarantee alignment for vectors up to 128-bits. */
6932 if (tree_int_cst_compare (TYPE_SIZE (type),
6933 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
6934 return false;
6936 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
6937 return true;
6940 /* If VALS is a vector constant that can be loaded into a register
6941 using DUP, generate instructions to do so and return an RTX to
6942 assign to the register. Otherwise return NULL_RTX. */
6943 static rtx
6944 aarch64_simd_dup_constant (rtx vals)
6946 enum machine_mode mode = GET_MODE (vals);
6947 enum machine_mode inner_mode = GET_MODE_INNER (mode);
6948 int n_elts = GET_MODE_NUNITS (mode);
6949 bool all_same = true;
6950 rtx x;
6951 int i;
6953 if (GET_CODE (vals) != CONST_VECTOR)
6954 return NULL_RTX;
6956 for (i = 1; i < n_elts; ++i)
6958 x = CONST_VECTOR_ELT (vals, i);
6959 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
6960 all_same = false;
6963 if (!all_same)
6964 return NULL_RTX;
6966 /* We can load this constant by using DUP and a constant in a
6967 single ARM register. This will be cheaper than a vector
6968 load. */
6969 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
6970 return gen_rtx_VEC_DUPLICATE (mode, x);
6974 /* Generate code to load VALS, which is a PARALLEL containing only
6975 constants (for vec_init) or CONST_VECTOR, efficiently into a
6976 register. Returns an RTX to copy into the register, or NULL_RTX
6977 for a PARALLEL that can not be converted into a CONST_VECTOR. */
6978 static rtx
6979 aarch64_simd_make_constant (rtx vals)
6981 enum machine_mode mode = GET_MODE (vals);
6982 rtx const_dup;
6983 rtx const_vec = NULL_RTX;
6984 int n_elts = GET_MODE_NUNITS (mode);
6985 int n_const = 0;
6986 int i;
6988 if (GET_CODE (vals) == CONST_VECTOR)
6989 const_vec = vals;
6990 else if (GET_CODE (vals) == PARALLEL)
6992 /* A CONST_VECTOR must contain only CONST_INTs and
6993 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
6994 Only store valid constants in a CONST_VECTOR. */
6995 for (i = 0; i < n_elts; ++i)
6997 rtx x = XVECEXP (vals, 0, i);
6998 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
6999 n_const++;
7001 if (n_const == n_elts)
7002 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7004 else
7005 gcc_unreachable ();
7007 if (const_vec != NULL_RTX
7008 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7009 /* Load using MOVI/MVNI. */
7010 return const_vec;
7011 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7012 /* Loaded using DUP. */
7013 return const_dup;
7014 else if (const_vec != NULL_RTX)
7015 /* Load from constant pool. We can not take advantage of single-cycle
7016 LD1 because we need a PC-relative addressing mode. */
7017 return const_vec;
7018 else
7019 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7020 We can not construct an initializer. */
7021 return NULL_RTX;
7024 void
7025 aarch64_expand_vector_init (rtx target, rtx vals)
7027 enum machine_mode mode = GET_MODE (target);
7028 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7029 int n_elts = GET_MODE_NUNITS (mode);
7030 int n_var = 0, one_var = -1;
7031 bool all_same = true;
7032 rtx x, mem;
7033 int i;
7035 x = XVECEXP (vals, 0, 0);
7036 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7037 n_var = 1, one_var = 0;
7039 for (i = 1; i < n_elts; ++i)
7041 x = XVECEXP (vals, 0, i);
7042 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7043 ++n_var, one_var = i;
7045 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7046 all_same = false;
7049 if (n_var == 0)
7051 rtx constant = aarch64_simd_make_constant (vals);
7052 if (constant != NULL_RTX)
7054 emit_move_insn (target, constant);
7055 return;
7059 /* Splat a single non-constant element if we can. */
7060 if (all_same)
7062 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7063 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7064 return;
7067 /* One field is non-constant. Load constant then overwrite varying
7068 field. This is more efficient than using the stack. */
7069 if (n_var == 1)
7071 rtx copy = copy_rtx (vals);
7072 rtx index = GEN_INT (one_var);
7073 enum insn_code icode;
7075 /* Load constant part of vector, substitute neighboring value for
7076 varying element. */
7077 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7078 aarch64_expand_vector_init (target, copy);
7080 /* Insert variable. */
7081 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7082 icode = optab_handler (vec_set_optab, mode);
7083 gcc_assert (icode != CODE_FOR_nothing);
7084 emit_insn (GEN_FCN (icode) (target, x, index));
7085 return;
7088 /* Construct the vector in memory one field at a time
7089 and load the whole vector. */
7090 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7091 for (i = 0; i < n_elts; i++)
7092 emit_move_insn (adjust_address_nv (mem, inner_mode,
7093 i * GET_MODE_SIZE (inner_mode)),
7094 XVECEXP (vals, 0, i));
7095 emit_move_insn (target, mem);
7099 static unsigned HOST_WIDE_INT
7100 aarch64_shift_truncation_mask (enum machine_mode mode)
7102 return
7103 (aarch64_vector_mode_supported_p (mode)
7104 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7107 #ifndef TLS_SECTION_ASM_FLAG
7108 #define TLS_SECTION_ASM_FLAG 'T'
7109 #endif
7111 void
7112 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7113 tree decl ATTRIBUTE_UNUSED)
7115 char flagchars[10], *f = flagchars;
7117 /* If we have already declared this section, we can use an
7118 abbreviated form to switch back to it -- unless this section is
7119 part of a COMDAT groups, in which case GAS requires the full
7120 declaration every time. */
7121 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7122 && (flags & SECTION_DECLARED))
7124 fprintf (asm_out_file, "\t.section\t%s\n", name);
7125 return;
7128 if (!(flags & SECTION_DEBUG))
7129 *f++ = 'a';
7130 if (flags & SECTION_WRITE)
7131 *f++ = 'w';
7132 if (flags & SECTION_CODE)
7133 *f++ = 'x';
7134 if (flags & SECTION_SMALL)
7135 *f++ = 's';
7136 if (flags & SECTION_MERGE)
7137 *f++ = 'M';
7138 if (flags & SECTION_STRINGS)
7139 *f++ = 'S';
7140 if (flags & SECTION_TLS)
7141 *f++ = TLS_SECTION_ASM_FLAG;
7142 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7143 *f++ = 'G';
7144 *f = '\0';
7146 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7148 if (!(flags & SECTION_NOTYPE))
7150 const char *type;
7151 const char *format;
7153 if (flags & SECTION_BSS)
7154 type = "nobits";
7155 else
7156 type = "progbits";
7158 #ifdef TYPE_OPERAND_FMT
7159 format = "," TYPE_OPERAND_FMT;
7160 #else
7161 format = ",@%s";
7162 #endif
7164 fprintf (asm_out_file, format, type);
7166 if (flags & SECTION_ENTSIZE)
7167 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7168 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7170 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7171 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7172 else
7173 fprintf (asm_out_file, ",%s,comdat",
7174 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7178 putc ('\n', asm_out_file);
7181 /* Select a format to encode pointers in exception handling data. */
7183 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7185 int type;
7186 switch (aarch64_cmodel)
7188 case AARCH64_CMODEL_TINY:
7189 case AARCH64_CMODEL_TINY_PIC:
7190 case AARCH64_CMODEL_SMALL:
7191 case AARCH64_CMODEL_SMALL_PIC:
7192 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7193 for everything. */
7194 type = DW_EH_PE_sdata4;
7195 break;
7196 default:
7197 /* No assumptions here. 8-byte relocs required. */
7198 type = DW_EH_PE_sdata8;
7199 break;
7201 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7204 /* Emit load exclusive. */
7206 static void
7207 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7208 rtx mem, rtx model_rtx)
7210 rtx (*gen) (rtx, rtx, rtx);
7212 switch (mode)
7214 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7215 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7216 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7217 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7218 default:
7219 gcc_unreachable ();
7222 emit_insn (gen (rval, mem, model_rtx));
7225 /* Emit store exclusive. */
7227 static void
7228 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7229 rtx rval, rtx mem, rtx model_rtx)
7231 rtx (*gen) (rtx, rtx, rtx, rtx);
7233 switch (mode)
7235 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7236 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7237 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7238 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7239 default:
7240 gcc_unreachable ();
7243 emit_insn (gen (bval, rval, mem, model_rtx));
7246 /* Mark the previous jump instruction as unlikely. */
7248 static void
7249 aarch64_emit_unlikely_jump (rtx insn)
7251 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7253 insn = emit_jump_insn (insn);
7254 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7257 /* Expand a compare and swap pattern. */
7259 void
7260 aarch64_expand_compare_and_swap (rtx operands[])
7262 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7263 enum machine_mode mode, cmp_mode;
7264 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7266 bval = operands[0];
7267 rval = operands[1];
7268 mem = operands[2];
7269 oldval = operands[3];
7270 newval = operands[4];
7271 is_weak = operands[5];
7272 mod_s = operands[6];
7273 mod_f = operands[7];
7274 mode = GET_MODE (mem);
7275 cmp_mode = mode;
7277 /* Normally the succ memory model must be stronger than fail, but in the
7278 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7279 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7281 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7282 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7283 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7285 switch (mode)
7287 case QImode:
7288 case HImode:
7289 /* For short modes, we're going to perform the comparison in SImode,
7290 so do the zero-extension now. */
7291 cmp_mode = SImode;
7292 rval = gen_reg_rtx (SImode);
7293 oldval = convert_modes (SImode, mode, oldval, true);
7294 /* Fall through. */
7296 case SImode:
7297 case DImode:
7298 /* Force the value into a register if needed. */
7299 if (!aarch64_plus_operand (oldval, mode))
7300 oldval = force_reg (cmp_mode, oldval);
7301 break;
7303 default:
7304 gcc_unreachable ();
7307 switch (mode)
7309 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7310 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7311 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7312 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7313 default:
7314 gcc_unreachable ();
7317 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7319 if (mode == QImode || mode == HImode)
7320 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7322 x = gen_rtx_REG (CCmode, CC_REGNUM);
7323 x = gen_rtx_EQ (SImode, x, const0_rtx);
7324 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7327 /* Split a compare and swap pattern. */
7329 void
7330 aarch64_split_compare_and_swap (rtx operands[])
7332 rtx rval, mem, oldval, newval, scratch;
7333 enum machine_mode mode;
7334 bool is_weak;
7335 rtx label1, label2, x, cond;
7337 rval = operands[0];
7338 mem = operands[1];
7339 oldval = operands[2];
7340 newval = operands[3];
7341 is_weak = (operands[4] != const0_rtx);
7342 scratch = operands[7];
7343 mode = GET_MODE (mem);
7345 label1 = NULL_RTX;
7346 if (!is_weak)
7348 label1 = gen_label_rtx ();
7349 emit_label (label1);
7351 label2 = gen_label_rtx ();
7353 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7355 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7356 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7357 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7358 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7359 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7361 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7363 if (!is_weak)
7365 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7366 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7367 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7368 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7370 else
7372 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7373 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7374 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7377 emit_label (label2);
7380 /* Split an atomic operation. */
7382 void
7383 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7384 rtx value, rtx model_rtx, rtx cond)
7386 enum machine_mode mode = GET_MODE (mem);
7387 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7388 rtx label, x;
7390 label = gen_label_rtx ();
7391 emit_label (label);
7393 if (new_out)
7394 new_out = gen_lowpart (wmode, new_out);
7395 if (old_out)
7396 old_out = gen_lowpart (wmode, old_out);
7397 else
7398 old_out = new_out;
7399 value = simplify_gen_subreg (wmode, value, mode, 0);
7401 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7403 switch (code)
7405 case SET:
7406 new_out = value;
7407 break;
7409 case NOT:
7410 x = gen_rtx_AND (wmode, old_out, value);
7411 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7412 x = gen_rtx_NOT (wmode, new_out);
7413 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7414 break;
7416 case MINUS:
7417 if (CONST_INT_P (value))
7419 value = GEN_INT (-INTVAL (value));
7420 code = PLUS;
7422 /* Fall through. */
7424 default:
7425 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7426 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7427 break;
7430 aarch64_emit_store_exclusive (mode, cond, mem,
7431 gen_lowpart (mode, new_out), model_rtx);
7433 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7434 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7435 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7436 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7439 static void
7440 aarch64_print_extension (void)
7442 const struct aarch64_option_extension *opt = NULL;
7444 for (opt = all_extensions; opt->name != NULL; opt++)
7445 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7446 asm_fprintf (asm_out_file, "+%s", opt->name);
7448 asm_fprintf (asm_out_file, "\n");
7451 static void
7452 aarch64_start_file (void)
7454 if (selected_arch)
7456 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7457 aarch64_print_extension ();
7459 else if (selected_cpu)
7461 const char *truncated_name
7462 = aarch64_rewrite_selected_cpu (selected_cpu->name);
7463 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
7464 aarch64_print_extension ();
7466 default_file_start();
7469 /* Target hook for c_mode_for_suffix. */
7470 static enum machine_mode
7471 aarch64_c_mode_for_suffix (char suffix)
7473 if (suffix == 'q')
7474 return TFmode;
7476 return VOIDmode;
7479 /* We can only represent floating point constants which will fit in
7480 "quarter-precision" values. These values are characterised by
7481 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7484 (-1)^s * (n/16) * 2^r
7486 Where:
7487 's' is the sign bit.
7488 'n' is an integer in the range 16 <= n <= 31.
7489 'r' is an integer in the range -3 <= r <= 4. */
7491 /* Return true iff X can be represented by a quarter-precision
7492 floating point immediate operand X. Note, we cannot represent 0.0. */
7493 bool
7494 aarch64_float_const_representable_p (rtx x)
7496 /* This represents our current view of how many bits
7497 make up the mantissa. */
7498 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7499 int exponent;
7500 unsigned HOST_WIDE_INT mantissa, mask;
7501 HOST_WIDE_INT m1, m2;
7502 REAL_VALUE_TYPE r, m;
7504 if (!CONST_DOUBLE_P (x))
7505 return false;
7507 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7509 /* We cannot represent infinities, NaNs or +/-zero. We won't
7510 know if we have +zero until we analyse the mantissa, but we
7511 can reject the other invalid values. */
7512 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7513 || REAL_VALUE_MINUS_ZERO (r))
7514 return false;
7516 /* Extract exponent. */
7517 r = real_value_abs (&r);
7518 exponent = REAL_EXP (&r);
7520 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7521 highest (sign) bit, with a fixed binary point at bit point_pos.
7522 m1 holds the low part of the mantissa, m2 the high part.
7523 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7524 bits for the mantissa, this can fail (low bits will be lost). */
7525 real_ldexp (&m, &r, point_pos - exponent);
7526 REAL_VALUE_TO_INT (&m1, &m2, m);
7528 /* If the low part of the mantissa has bits set we cannot represent
7529 the value. */
7530 if (m1 != 0)
7531 return false;
7532 /* We have rejected the lower HOST_WIDE_INT, so update our
7533 understanding of how many bits lie in the mantissa and
7534 look only at the high HOST_WIDE_INT. */
7535 mantissa = m2;
7536 point_pos -= HOST_BITS_PER_WIDE_INT;
7538 /* We can only represent values with a mantissa of the form 1.xxxx. */
7539 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
7540 if ((mantissa & mask) != 0)
7541 return false;
7543 /* Having filtered unrepresentable values, we may now remove all
7544 but the highest 5 bits. */
7545 mantissa >>= point_pos - 5;
7547 /* We cannot represent the value 0.0, so reject it. This is handled
7548 elsewhere. */
7549 if (mantissa == 0)
7550 return false;
7552 /* Then, as bit 4 is always set, we can mask it off, leaving
7553 the mantissa in the range [0, 15]. */
7554 mantissa &= ~(1 << 4);
7555 gcc_assert (mantissa <= 15);
7557 /* GCC internally does not use IEEE754-like encoding (where normalized
7558 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
7559 Our mantissa values are shifted 4 places to the left relative to
7560 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
7561 by 5 places to correct for GCC's representation. */
7562 exponent = 5 - exponent;
7564 return (exponent >= 0 && exponent <= 7);
7567 char*
7568 aarch64_output_simd_mov_immediate (rtx const_vector,
7569 enum machine_mode mode,
7570 unsigned width)
7572 bool is_valid;
7573 static char templ[40];
7574 const char *mnemonic;
7575 const char *shift_op;
7576 unsigned int lane_count = 0;
7577 char element_char;
7579 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
7581 /* This will return true to show const_vector is legal for use as either
7582 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
7583 also update INFO to show how the immediate should be generated. */
7584 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
7585 gcc_assert (is_valid);
7587 element_char = sizetochar (info.element_width);
7588 lane_count = width / info.element_width;
7590 mode = GET_MODE_INNER (mode);
7591 if (mode == SFmode || mode == DFmode)
7593 gcc_assert (info.shift == 0 && ! info.mvn);
7594 if (aarch64_float_const_zero_rtx_p (info.value))
7595 info.value = GEN_INT (0);
7596 else
7598 #define buf_size 20
7599 REAL_VALUE_TYPE r;
7600 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
7601 char float_buf[buf_size] = {'\0'};
7602 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
7603 #undef buf_size
7605 if (lane_count == 1)
7606 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
7607 else
7608 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
7609 lane_count, element_char, float_buf);
7610 return templ;
7614 mnemonic = info.mvn ? "mvni" : "movi";
7615 shift_op = info.msl ? "msl" : "lsl";
7617 if (lane_count == 1)
7618 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
7619 mnemonic, UINTVAL (info.value));
7620 else if (info.shift)
7621 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
7622 ", %s %d", mnemonic, lane_count, element_char,
7623 UINTVAL (info.value), shift_op, info.shift);
7624 else
7625 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
7626 mnemonic, lane_count, element_char, UINTVAL (info.value));
7627 return templ;
7630 char*
7631 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
7632 enum machine_mode mode)
7634 enum machine_mode vmode;
7636 gcc_assert (!VECTOR_MODE_P (mode));
7637 vmode = aarch64_simd_container_mode (mode, 64);
7638 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
7639 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
7642 /* Split operands into moves from op[1] + op[2] into op[0]. */
7644 void
7645 aarch64_split_combinev16qi (rtx operands[3])
7647 unsigned int dest = REGNO (operands[0]);
7648 unsigned int src1 = REGNO (operands[1]);
7649 unsigned int src2 = REGNO (operands[2]);
7650 enum machine_mode halfmode = GET_MODE (operands[1]);
7651 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
7652 rtx destlo, desthi;
7654 gcc_assert (halfmode == V16QImode);
7656 if (src1 == dest && src2 == dest + halfregs)
7658 /* No-op move. Can't split to nothing; emit something. */
7659 emit_note (NOTE_INSN_DELETED);
7660 return;
7663 /* Preserve register attributes for variable tracking. */
7664 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
7665 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
7666 GET_MODE_SIZE (halfmode));
7668 /* Special case of reversed high/low parts. */
7669 if (reg_overlap_mentioned_p (operands[2], destlo)
7670 && reg_overlap_mentioned_p (operands[1], desthi))
7672 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7673 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
7674 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
7676 else if (!reg_overlap_mentioned_p (operands[2], destlo))
7678 /* Try to avoid unnecessary moves if part of the result
7679 is in the right place already. */
7680 if (src1 != dest)
7681 emit_move_insn (destlo, operands[1]);
7682 if (src2 != dest + halfregs)
7683 emit_move_insn (desthi, operands[2]);
7685 else
7687 if (src2 != dest + halfregs)
7688 emit_move_insn (desthi, operands[2]);
7689 if (src1 != dest)
7690 emit_move_insn (destlo, operands[1]);
7694 /* vec_perm support. */
7696 #define MAX_VECT_LEN 16
7698 struct expand_vec_perm_d
7700 rtx target, op0, op1;
7701 unsigned char perm[MAX_VECT_LEN];
7702 enum machine_mode vmode;
7703 unsigned char nelt;
7704 bool one_vector_p;
7705 bool testing_p;
7708 /* Generate a variable permutation. */
7710 static void
7711 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
7713 enum machine_mode vmode = GET_MODE (target);
7714 bool one_vector_p = rtx_equal_p (op0, op1);
7716 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
7717 gcc_checking_assert (GET_MODE (op0) == vmode);
7718 gcc_checking_assert (GET_MODE (op1) == vmode);
7719 gcc_checking_assert (GET_MODE (sel) == vmode);
7720 gcc_checking_assert (TARGET_SIMD);
7722 if (one_vector_p)
7724 if (vmode == V8QImode)
7726 /* Expand the argument to a V16QI mode by duplicating it. */
7727 rtx pair = gen_reg_rtx (V16QImode);
7728 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
7729 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7731 else
7733 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
7736 else
7738 rtx pair;
7740 if (vmode == V8QImode)
7742 pair = gen_reg_rtx (V16QImode);
7743 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
7744 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
7746 else
7748 pair = gen_reg_rtx (OImode);
7749 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
7750 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
7755 void
7756 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
7758 enum machine_mode vmode = GET_MODE (target);
7759 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
7760 bool one_vector_p = rtx_equal_p (op0, op1);
7761 rtx rmask[MAX_VECT_LEN], mask;
7763 gcc_checking_assert (!BYTES_BIG_ENDIAN);
7765 /* The TBL instruction does not use a modulo index, so we must take care
7766 of that ourselves. */
7767 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
7768 for (i = 0; i < nelt; ++i)
7769 rmask[i] = mask;
7770 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
7771 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
7773 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
7776 /* Recognize patterns suitable for the TRN instructions. */
7777 static bool
7778 aarch64_evpc_trn (struct expand_vec_perm_d *d)
7780 unsigned int i, odd, mask, nelt = d->nelt;
7781 rtx out, in0, in1, x;
7782 rtx (*gen) (rtx, rtx, rtx);
7783 enum machine_mode vmode = d->vmode;
7785 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7786 return false;
7788 /* Note that these are little-endian tests.
7789 We correct for big-endian later. */
7790 if (d->perm[0] == 0)
7791 odd = 0;
7792 else if (d->perm[0] == 1)
7793 odd = 1;
7794 else
7795 return false;
7796 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7798 for (i = 0; i < nelt; i += 2)
7800 if (d->perm[i] != i + odd)
7801 return false;
7802 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
7803 return false;
7806 /* Success! */
7807 if (d->testing_p)
7808 return true;
7810 in0 = d->op0;
7811 in1 = d->op1;
7812 if (BYTES_BIG_ENDIAN)
7814 x = in0, in0 = in1, in1 = x;
7815 odd = !odd;
7817 out = d->target;
7819 if (odd)
7821 switch (vmode)
7823 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
7824 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
7825 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
7826 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
7827 case V4SImode: gen = gen_aarch64_trn2v4si; break;
7828 case V2SImode: gen = gen_aarch64_trn2v2si; break;
7829 case V2DImode: gen = gen_aarch64_trn2v2di; break;
7830 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
7831 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
7832 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
7833 default:
7834 return false;
7837 else
7839 switch (vmode)
7841 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
7842 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
7843 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
7844 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
7845 case V4SImode: gen = gen_aarch64_trn1v4si; break;
7846 case V2SImode: gen = gen_aarch64_trn1v2si; break;
7847 case V2DImode: gen = gen_aarch64_trn1v2di; break;
7848 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
7849 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
7850 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
7851 default:
7852 return false;
7856 emit_insn (gen (out, in0, in1));
7857 return true;
7860 /* Recognize patterns suitable for the UZP instructions. */
7861 static bool
7862 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
7864 unsigned int i, odd, mask, nelt = d->nelt;
7865 rtx out, in0, in1, x;
7866 rtx (*gen) (rtx, rtx, rtx);
7867 enum machine_mode vmode = d->vmode;
7869 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7870 return false;
7872 /* Note that these are little-endian tests.
7873 We correct for big-endian later. */
7874 if (d->perm[0] == 0)
7875 odd = 0;
7876 else if (d->perm[0] == 1)
7877 odd = 1;
7878 else
7879 return false;
7880 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7882 for (i = 0; i < nelt; i++)
7884 unsigned elt = (i * 2 + odd) & mask;
7885 if (d->perm[i] != elt)
7886 return false;
7889 /* Success! */
7890 if (d->testing_p)
7891 return true;
7893 in0 = d->op0;
7894 in1 = d->op1;
7895 if (BYTES_BIG_ENDIAN)
7897 x = in0, in0 = in1, in1 = x;
7898 odd = !odd;
7900 out = d->target;
7902 if (odd)
7904 switch (vmode)
7906 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
7907 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
7908 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
7909 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
7910 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
7911 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
7912 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
7913 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
7914 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
7915 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
7916 default:
7917 return false;
7920 else
7922 switch (vmode)
7924 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
7925 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
7926 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
7927 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
7928 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
7929 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
7930 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
7931 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
7932 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
7933 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
7934 default:
7935 return false;
7939 emit_insn (gen (out, in0, in1));
7940 return true;
7943 /* Recognize patterns suitable for the ZIP instructions. */
7944 static bool
7945 aarch64_evpc_zip (struct expand_vec_perm_d *d)
7947 unsigned int i, high, mask, nelt = d->nelt;
7948 rtx out, in0, in1, x;
7949 rtx (*gen) (rtx, rtx, rtx);
7950 enum machine_mode vmode = d->vmode;
7952 if (GET_MODE_UNIT_SIZE (vmode) > 8)
7953 return false;
7955 /* Note that these are little-endian tests.
7956 We correct for big-endian later. */
7957 high = nelt / 2;
7958 if (d->perm[0] == high)
7959 /* Do Nothing. */
7961 else if (d->perm[0] == 0)
7962 high = 0;
7963 else
7964 return false;
7965 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
7967 for (i = 0; i < nelt / 2; i++)
7969 unsigned elt = (i + high) & mask;
7970 if (d->perm[i * 2] != elt)
7971 return false;
7972 elt = (elt + nelt) & mask;
7973 if (d->perm[i * 2 + 1] != elt)
7974 return false;
7977 /* Success! */
7978 if (d->testing_p)
7979 return true;
7981 in0 = d->op0;
7982 in1 = d->op1;
7983 if (BYTES_BIG_ENDIAN)
7985 x = in0, in0 = in1, in1 = x;
7986 high = !high;
7988 out = d->target;
7990 if (high)
7992 switch (vmode)
7994 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
7995 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
7996 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
7997 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
7998 case V4SImode: gen = gen_aarch64_zip2v4si; break;
7999 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8000 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8001 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8002 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8003 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8004 default:
8005 return false;
8008 else
8010 switch (vmode)
8012 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8013 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8014 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8015 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8016 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8017 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8018 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8019 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8020 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8021 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8022 default:
8023 return false;
8027 emit_insn (gen (out, in0, in1));
8028 return true;
8031 static bool
8032 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8034 rtx (*gen) (rtx, rtx, rtx);
8035 rtx out = d->target;
8036 rtx in0;
8037 enum machine_mode vmode = d->vmode;
8038 unsigned int i, elt, nelt = d->nelt;
8039 rtx lane;
8041 /* TODO: This may not be big-endian safe. */
8042 if (BYTES_BIG_ENDIAN)
8043 return false;
8045 elt = d->perm[0];
8046 for (i = 1; i < nelt; i++)
8048 if (elt != d->perm[i])
8049 return false;
8052 /* The generic preparation in aarch64_expand_vec_perm_const_1
8053 swaps the operand order and the permute indices if it finds
8054 d->perm[0] to be in the second operand. Thus, we can always
8055 use d->op0 and need not do any extra arithmetic to get the
8056 correct lane number. */
8057 in0 = d->op0;
8058 lane = GEN_INT (elt);
8060 switch (vmode)
8062 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8063 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8064 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8065 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8066 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8067 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8068 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8069 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8070 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8071 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8072 default:
8073 return false;
8076 emit_insn (gen (out, in0, lane));
8077 return true;
8080 static bool
8081 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8083 rtx rperm[MAX_VECT_LEN], sel;
8084 enum machine_mode vmode = d->vmode;
8085 unsigned int i, nelt = d->nelt;
8087 /* TODO: ARM's TBL indexing is little-endian. In order to handle GCC's
8088 numbering of elements for big-endian, we must reverse the order. */
8089 if (BYTES_BIG_ENDIAN)
8090 return false;
8092 if (d->testing_p)
8093 return true;
8095 /* Generic code will try constant permutation twice. Once with the
8096 original mode and again with the elements lowered to QImode.
8097 So wait and don't do the selector expansion ourselves. */
8098 if (vmode != V8QImode && vmode != V16QImode)
8099 return false;
8101 for (i = 0; i < nelt; ++i)
8102 rperm[i] = GEN_INT (d->perm[i]);
8103 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8104 sel = force_reg (vmode, sel);
8106 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8107 return true;
8110 static bool
8111 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8113 /* The pattern matching functions above are written to look for a small
8114 number to begin the sequence (0, 1, N/2). If we begin with an index
8115 from the second operand, we can swap the operands. */
8116 if (d->perm[0] >= d->nelt)
8118 unsigned i, nelt = d->nelt;
8119 rtx x;
8121 for (i = 0; i < nelt; ++i)
8122 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
8124 x = d->op0;
8125 d->op0 = d->op1;
8126 d->op1 = x;
8129 if (TARGET_SIMD)
8131 if (aarch64_evpc_zip (d))
8132 return true;
8133 else if (aarch64_evpc_uzp (d))
8134 return true;
8135 else if (aarch64_evpc_trn (d))
8136 return true;
8137 else if (aarch64_evpc_dup (d))
8138 return true;
8139 return aarch64_evpc_tbl (d);
8141 return false;
8144 /* Expand a vec_perm_const pattern. */
8146 bool
8147 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8149 struct expand_vec_perm_d d;
8150 int i, nelt, which;
8152 d.target = target;
8153 d.op0 = op0;
8154 d.op1 = op1;
8156 d.vmode = GET_MODE (target);
8157 gcc_assert (VECTOR_MODE_P (d.vmode));
8158 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8159 d.testing_p = false;
8161 for (i = which = 0; i < nelt; ++i)
8163 rtx e = XVECEXP (sel, 0, i);
8164 int ei = INTVAL (e) & (2 * nelt - 1);
8165 which |= (ei < nelt ? 1 : 2);
8166 d.perm[i] = ei;
8169 switch (which)
8171 default:
8172 gcc_unreachable ();
8174 case 3:
8175 d.one_vector_p = false;
8176 if (!rtx_equal_p (op0, op1))
8177 break;
8179 /* The elements of PERM do not suggest that only the first operand
8180 is used, but both operands are identical. Allow easier matching
8181 of the permutation by folding the permutation into the single
8182 input vector. */
8183 /* Fall Through. */
8184 case 2:
8185 for (i = 0; i < nelt; ++i)
8186 d.perm[i] &= nelt - 1;
8187 d.op0 = op1;
8188 d.one_vector_p = true;
8189 break;
8191 case 1:
8192 d.op1 = op0;
8193 d.one_vector_p = true;
8194 break;
8197 return aarch64_expand_vec_perm_const_1 (&d);
8200 static bool
8201 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8202 const unsigned char *sel)
8204 struct expand_vec_perm_d d;
8205 unsigned int i, nelt, which;
8206 bool ret;
8208 d.vmode = vmode;
8209 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8210 d.testing_p = true;
8211 memcpy (d.perm, sel, nelt);
8213 /* Calculate whether all elements are in one vector. */
8214 for (i = which = 0; i < nelt; ++i)
8216 unsigned char e = d.perm[i];
8217 gcc_assert (e < 2 * nelt);
8218 which |= (e < nelt ? 1 : 2);
8221 /* If all elements are from the second vector, reindex as if from the
8222 first vector. */
8223 if (which == 2)
8224 for (i = 0; i < nelt; ++i)
8225 d.perm[i] -= nelt;
8227 /* Check whether the mask can be applied to a single vector. */
8228 d.one_vector_p = (which != 3);
8230 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8231 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8232 if (!d.one_vector_p)
8233 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8235 start_sequence ();
8236 ret = aarch64_expand_vec_perm_const_1 (&d);
8237 end_sequence ();
8239 return ret;
8242 #undef TARGET_ADDRESS_COST
8243 #define TARGET_ADDRESS_COST aarch64_address_cost
8245 /* This hook will determines whether unnamed bitfields affect the alignment
8246 of the containing structure. The hook returns true if the structure
8247 should inherit the alignment requirements of an unnamed bitfield's
8248 type. */
8249 #undef TARGET_ALIGN_ANON_BITFIELD
8250 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8252 #undef TARGET_ASM_ALIGNED_DI_OP
8253 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8255 #undef TARGET_ASM_ALIGNED_HI_OP
8256 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8258 #undef TARGET_ASM_ALIGNED_SI_OP
8259 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8261 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8262 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8263 hook_bool_const_tree_hwi_hwi_const_tree_true
8265 #undef TARGET_ASM_FILE_START
8266 #define TARGET_ASM_FILE_START aarch64_start_file
8268 #undef TARGET_ASM_OUTPUT_MI_THUNK
8269 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8271 #undef TARGET_ASM_SELECT_RTX_SECTION
8272 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8274 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8275 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8277 #undef TARGET_BUILD_BUILTIN_VA_LIST
8278 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8280 #undef TARGET_CALLEE_COPIES
8281 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8283 #undef TARGET_CAN_ELIMINATE
8284 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8286 #undef TARGET_CANNOT_FORCE_CONST_MEM
8287 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8289 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8290 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8292 /* Only the least significant bit is used for initialization guard
8293 variables. */
8294 #undef TARGET_CXX_GUARD_MASK_BIT
8295 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8297 #undef TARGET_C_MODE_FOR_SUFFIX
8298 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8300 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8301 #undef TARGET_DEFAULT_TARGET_FLAGS
8302 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8303 #endif
8305 #undef TARGET_CLASS_MAX_NREGS
8306 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8308 #undef TARGET_BUILTIN_DECL
8309 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8311 #undef TARGET_EXPAND_BUILTIN
8312 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8314 #undef TARGET_EXPAND_BUILTIN_VA_START
8315 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8317 #undef TARGET_FOLD_BUILTIN
8318 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8320 #undef TARGET_FUNCTION_ARG
8321 #define TARGET_FUNCTION_ARG aarch64_function_arg
8323 #undef TARGET_FUNCTION_ARG_ADVANCE
8324 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8326 #undef TARGET_FUNCTION_ARG_BOUNDARY
8327 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8329 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8330 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8332 #undef TARGET_FUNCTION_VALUE
8333 #define TARGET_FUNCTION_VALUE aarch64_function_value
8335 #undef TARGET_FUNCTION_VALUE_REGNO_P
8336 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8338 #undef TARGET_FRAME_POINTER_REQUIRED
8339 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8341 #undef TARGET_GIMPLE_FOLD_BUILTIN
8342 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8344 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8345 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8347 #undef TARGET_INIT_BUILTINS
8348 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8350 #undef TARGET_LEGITIMATE_ADDRESS_P
8351 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8353 #undef TARGET_LEGITIMATE_CONSTANT_P
8354 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8356 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8357 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8359 #undef TARGET_LRA_P
8360 #define TARGET_LRA_P aarch64_lra_p
8362 #undef TARGET_MANGLE_TYPE
8363 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8365 #undef TARGET_MEMORY_MOVE_COST
8366 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8368 #undef TARGET_MUST_PASS_IN_STACK
8369 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8371 /* This target hook should return true if accesses to volatile bitfields
8372 should use the narrowest mode possible. It should return false if these
8373 accesses should use the bitfield container type. */
8374 #undef TARGET_NARROW_VOLATILE_BITFIELD
8375 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8377 #undef TARGET_OPTION_OVERRIDE
8378 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8380 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8381 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8382 aarch64_override_options_after_change
8384 #undef TARGET_PASS_BY_REFERENCE
8385 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8387 #undef TARGET_PREFERRED_RELOAD_CLASS
8388 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8390 #undef TARGET_SECONDARY_RELOAD
8391 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8393 #undef TARGET_SHIFT_TRUNCATION_MASK
8394 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8396 #undef TARGET_SETUP_INCOMING_VARARGS
8397 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8399 #undef TARGET_STRUCT_VALUE_RTX
8400 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8402 #undef TARGET_REGISTER_MOVE_COST
8403 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8405 #undef TARGET_RETURN_IN_MEMORY
8406 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8408 #undef TARGET_RETURN_IN_MSB
8409 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8411 #undef TARGET_RTX_COSTS
8412 #define TARGET_RTX_COSTS aarch64_rtx_costs
8414 #undef TARGET_TRAMPOLINE_INIT
8415 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8417 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8418 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8420 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8421 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8423 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8424 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8426 #undef TARGET_VECTORIZE_ADD_STMT_COST
8427 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8429 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8430 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8431 aarch64_builtin_vectorization_cost
8433 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8434 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8436 #undef TARGET_VECTORIZE_BUILTINS
8437 #define TARGET_VECTORIZE_BUILTINS
8439 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8440 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8441 aarch64_builtin_vectorized_function
8443 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8444 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8445 aarch64_autovectorize_vector_sizes
8447 /* Section anchor support. */
8449 #undef TARGET_MIN_ANCHOR_OFFSET
8450 #define TARGET_MIN_ANCHOR_OFFSET -256
8452 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8453 byte offset; we can do much more for larger data types, but have no way
8454 to determine the size of the access. We assume accesses are aligned. */
8455 #undef TARGET_MAX_ANCHOR_OFFSET
8456 #define TARGET_MAX_ANCHOR_OFFSET 4095
8458 #undef TARGET_VECTOR_ALIGNMENT
8459 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
8461 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
8462 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
8463 aarch64_simd_vector_alignment_reachable
8465 /* vec_perm support. */
8467 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
8468 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
8469 aarch64_vectorize_vec_perm_const_ok
8472 #undef TARGET_FIXED_CONDITION_CODE_REGS
8473 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
8475 struct gcc_target targetm = TARGET_INITIALIZER;
8477 #include "gt-aarch64.h"