[AArch64 costs 7/18] Improve SET cost.
[official-gcc.git] / gcc / config / aarch64 / aarch64.c
blob92fbd4d0f1c89293deb2ee8cb31c9481e41467f0
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
66 #include "dumpfile.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
73 ADDRESS_REG_IMM
74 A simple base register plus immediate offset.
76 ADDRESS_REG_WB
77 A base register indexed by immediate offset with writeback.
79 ADDRESS_REG_REG
80 A base register indexed by (optionally scaled) register.
82 ADDRESS_REG_UXTW
83 A base register indexed by (optionally scaled) zero-extended register.
85 ADDRESS_REG_SXTW
86 A base register indexed by (optionally scaled) sign-extended register.
88 ADDRESS_LO_SUM
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
91 ADDRESS_SYMBOLIC:
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type {
95 ADDRESS_REG_IMM,
96 ADDRESS_REG_WB,
97 ADDRESS_REG_REG,
98 ADDRESS_REG_UXTW,
99 ADDRESS_REG_SXTW,
100 ADDRESS_LO_SUM,
101 ADDRESS_SYMBOLIC
104 struct aarch64_address_info {
105 enum aarch64_address_type type;
106 rtx base;
107 rtx offset;
108 int shift;
109 enum aarch64_symbol_type symbol_type;
112 struct simd_immediate_info
114 rtx value;
115 int shift;
116 int element_width;
117 bool mvn;
118 bool msl;
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel;
124 #ifdef HAVE_AS_TLS
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
127 #endif
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
132 const_tree,
133 enum machine_mode *, int *,
134 bool *);
135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
139 static unsigned bit_count (unsigned HOST_WIDE_INT);
140 static bool aarch64_const_vec_all_same_int_p (rtx,
141 HOST_WIDE_INT, HOST_WIDE_INT);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
144 const unsigned char *sel);
146 /* The processor for which instructions should be scheduled. */
147 enum aarch64_processor aarch64_tune = cortexa53;
149 /* The current tuning set. */
150 const struct tune_params *aarch64_tune_params;
152 /* Mask to specify which instructions we are allowed to generate. */
153 unsigned long aarch64_isa_flags = 0;
155 /* Mask to specify which instruction scheduling options should be used. */
156 unsigned long aarch64_tune_flags = 0;
158 /* Tuning parameters. */
160 #if HAVE_DESIGNATED_INITIALIZERS
161 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
162 #else
163 #define NAMED_PARAM(NAME, VAL) (VAL)
164 #endif
166 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
167 __extension__
168 #endif
170 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
171 __extension__
172 #endif
173 static const struct cpu_addrcost_table generic_addrcost_table =
175 #if HAVE_DESIGNATED_INITIALIZERS
176 .addr_scale_costs =
177 #endif
179 NAMED_PARAM (qi, 0),
180 NAMED_PARAM (hi, 0),
181 NAMED_PARAM (si, 0),
182 NAMED_PARAM (ti, 0),
184 NAMED_PARAM (pre_modify, 0),
185 NAMED_PARAM (post_modify, 0),
186 NAMED_PARAM (register_offset, 0),
187 NAMED_PARAM (register_extend, 0),
188 NAMED_PARAM (imm_offset, 0)
191 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
192 __extension__
193 #endif
194 static const struct cpu_addrcost_table cortexa57_addrcost_table =
196 #if HAVE_DESIGNATED_INITIALIZERS
197 .addr_scale_costs =
198 #endif
200 NAMED_PARAM (qi, 0),
201 NAMED_PARAM (hi, 1),
202 NAMED_PARAM (si, 0),
203 NAMED_PARAM (ti, 1),
205 NAMED_PARAM (pre_modify, 0),
206 NAMED_PARAM (post_modify, 0),
207 NAMED_PARAM (register_offset, 0),
208 NAMED_PARAM (register_extend, 0),
209 NAMED_PARAM (imm_offset, 0),
212 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
213 __extension__
214 #endif
215 static const struct cpu_regmove_cost generic_regmove_cost =
217 NAMED_PARAM (GP2GP, 1),
218 NAMED_PARAM (GP2FP, 2),
219 NAMED_PARAM (FP2GP, 2),
220 /* We currently do not provide direct support for TFmode Q->Q move.
221 Therefore we need to raise the cost above 2 in order to have
222 reload handle the situation. */
223 NAMED_PARAM (FP2FP, 4)
226 /* Generic costs for vector insn classes. */
227 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
228 __extension__
229 #endif
230 static const struct cpu_vector_cost generic_vector_cost =
232 NAMED_PARAM (scalar_stmt_cost, 1),
233 NAMED_PARAM (scalar_load_cost, 1),
234 NAMED_PARAM (scalar_store_cost, 1),
235 NAMED_PARAM (vec_stmt_cost, 1),
236 NAMED_PARAM (vec_to_scalar_cost, 1),
237 NAMED_PARAM (scalar_to_vec_cost, 1),
238 NAMED_PARAM (vec_align_load_cost, 1),
239 NAMED_PARAM (vec_unalign_load_cost, 1),
240 NAMED_PARAM (vec_unalign_store_cost, 1),
241 NAMED_PARAM (vec_store_cost, 1),
242 NAMED_PARAM (cond_taken_branch_cost, 3),
243 NAMED_PARAM (cond_not_taken_branch_cost, 1)
246 /* Generic costs for vector insn classes. */
247 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
248 __extension__
249 #endif
250 static const struct cpu_vector_cost cortexa57_vector_cost =
252 NAMED_PARAM (scalar_stmt_cost, 1),
253 NAMED_PARAM (scalar_load_cost, 4),
254 NAMED_PARAM (scalar_store_cost, 1),
255 NAMED_PARAM (vec_stmt_cost, 3),
256 NAMED_PARAM (vec_to_scalar_cost, 8),
257 NAMED_PARAM (scalar_to_vec_cost, 8),
258 NAMED_PARAM (vec_align_load_cost, 5),
259 NAMED_PARAM (vec_unalign_load_cost, 5),
260 NAMED_PARAM (vec_unalign_store_cost, 1),
261 NAMED_PARAM (vec_store_cost, 1),
262 NAMED_PARAM (cond_taken_branch_cost, 1),
263 NAMED_PARAM (cond_not_taken_branch_cost, 1)
266 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
267 __extension__
268 #endif
269 static const struct tune_params generic_tunings =
271 &cortexa57_extra_costs,
272 &generic_addrcost_table,
273 &generic_regmove_cost,
274 &generic_vector_cost,
275 NAMED_PARAM (memmov_cost, 4),
276 NAMED_PARAM (issue_rate, 2)
279 static const struct tune_params cortexa53_tunings =
281 &cortexa53_extra_costs,
282 &generic_addrcost_table,
283 &generic_regmove_cost,
284 &generic_vector_cost,
285 NAMED_PARAM (memmov_cost, 4),
286 NAMED_PARAM (issue_rate, 2)
289 static const struct tune_params cortexa57_tunings =
291 &cortexa57_extra_costs,
292 &cortexa57_addrcost_table,
293 &generic_regmove_cost,
294 &cortexa57_vector_cost,
295 NAMED_PARAM (memmov_cost, 4),
296 NAMED_PARAM (issue_rate, 3)
299 /* A processor implementing AArch64. */
300 struct processor
302 const char *const name;
303 enum aarch64_processor core;
304 const char *arch;
305 const unsigned long flags;
306 const struct tune_params *const tune;
309 /* Processor cores implementing AArch64. */
310 static const struct processor all_cores[] =
312 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
313 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
314 #include "aarch64-cores.def"
315 #undef AARCH64_CORE
316 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
317 {NULL, aarch64_none, NULL, 0, NULL}
320 /* Architectures implementing AArch64. */
321 static const struct processor all_architectures[] =
323 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
324 {NAME, CORE, #ARCH, FLAGS, NULL},
325 #include "aarch64-arches.def"
326 #undef AARCH64_ARCH
327 {NULL, aarch64_none, NULL, 0, NULL}
330 /* Target specification. These are populated as commandline arguments
331 are processed, or NULL if not specified. */
332 static const struct processor *selected_arch;
333 static const struct processor *selected_cpu;
334 static const struct processor *selected_tune;
336 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
338 /* An ISA extension in the co-processor and main instruction set space. */
339 struct aarch64_option_extension
341 const char *const name;
342 const unsigned long flags_on;
343 const unsigned long flags_off;
346 /* ISA extensions in AArch64. */
347 static const struct aarch64_option_extension all_extensions[] =
349 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
350 {NAME, FLAGS_ON, FLAGS_OFF},
351 #include "aarch64-option-extensions.def"
352 #undef AARCH64_OPT_EXTENSION
353 {NULL, 0, 0}
356 /* Used to track the size of an address when generating a pre/post
357 increment address. */
358 static enum machine_mode aarch64_memory_reference_mode;
360 /* Used to force GTY into this file. */
361 static GTY(()) int gty_dummy;
363 /* A table of valid AArch64 "bitmask immediate" values for
364 logical instructions. */
366 #define AARCH64_NUM_BITMASKS 5334
367 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
369 typedef enum aarch64_cond_code
371 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
372 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
373 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
375 aarch64_cc;
377 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
379 /* The condition codes of the processor, and the inverse function. */
380 static const char * const aarch64_condition_codes[] =
382 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
383 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
386 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
387 unsigned
388 aarch64_dbx_register_number (unsigned regno)
390 if (GP_REGNUM_P (regno))
391 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
392 else if (regno == SP_REGNUM)
393 return AARCH64_DWARF_SP;
394 else if (FP_REGNUM_P (regno))
395 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
397 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
398 equivalent DWARF register. */
399 return DWARF_FRAME_REGISTERS;
402 /* Return TRUE if MODE is any of the large INT modes. */
403 static bool
404 aarch64_vect_struct_mode_p (enum machine_mode mode)
406 return mode == OImode || mode == CImode || mode == XImode;
409 /* Return TRUE if MODE is any of the vector modes. */
410 static bool
411 aarch64_vector_mode_p (enum machine_mode mode)
413 return aarch64_vector_mode_supported_p (mode)
414 || aarch64_vect_struct_mode_p (mode);
417 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
418 static bool
419 aarch64_array_mode_supported_p (enum machine_mode mode,
420 unsigned HOST_WIDE_INT nelems)
422 if (TARGET_SIMD
423 && AARCH64_VALID_SIMD_QREG_MODE (mode)
424 && (nelems >= 2 && nelems <= 4))
425 return true;
427 return false;
430 /* Implement HARD_REGNO_NREGS. */
433 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
435 switch (aarch64_regno_regclass (regno))
437 case FP_REGS:
438 case FP_LO_REGS:
439 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
440 default:
441 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
443 gcc_unreachable ();
446 /* Implement HARD_REGNO_MODE_OK. */
449 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
451 if (GET_MODE_CLASS (mode) == MODE_CC)
452 return regno == CC_REGNUM;
454 if (regno == SP_REGNUM)
455 /* The purpose of comparing with ptr_mode is to support the
456 global register variable associated with the stack pointer
457 register via the syntax of asm ("wsp") in ILP32. */
458 return mode == Pmode || mode == ptr_mode;
460 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
461 return mode == Pmode;
463 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
464 return 1;
466 if (FP_REGNUM_P (regno))
468 if (aarch64_vect_struct_mode_p (mode))
469 return
470 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
471 else
472 return 1;
475 return 0;
478 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
479 enum machine_mode
480 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
481 enum machine_mode mode)
483 /* Handle modes that fit within single registers. */
484 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
486 if (GET_MODE_SIZE (mode) >= 4)
487 return mode;
488 else
489 return SImode;
491 /* Fall back to generic for multi-reg and very large modes. */
492 else
493 return choose_hard_reg_mode (regno, nregs, false);
496 /* Return true if calls to DECL should be treated as
497 long-calls (ie called via a register). */
498 static bool
499 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
501 return false;
504 /* Return true if calls to symbol-ref SYM should be treated as
505 long-calls (ie called via a register). */
506 bool
507 aarch64_is_long_call_p (rtx sym)
509 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
512 /* Return true if the offsets to a zero/sign-extract operation
513 represent an expression that matches an extend operation. The
514 operands represent the paramters from
516 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
517 bool
518 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
519 rtx extract_imm)
521 HOST_WIDE_INT mult_val, extract_val;
523 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
524 return false;
526 mult_val = INTVAL (mult_imm);
527 extract_val = INTVAL (extract_imm);
529 if (extract_val > 8
530 && extract_val < GET_MODE_BITSIZE (mode)
531 && exact_log2 (extract_val & ~7) > 0
532 && (extract_val & 7) <= 4
533 && mult_val == (1 << (extract_val & 7)))
534 return true;
536 return false;
539 /* Emit an insn that's a simple single-set. Both the operands must be
540 known to be valid. */
541 inline static rtx
542 emit_set_insn (rtx x, rtx y)
544 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
547 /* X and Y are two things to compare using CODE. Emit the compare insn and
548 return the rtx for register 0 in the proper mode. */
550 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
552 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
553 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
555 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
556 return cc_reg;
559 /* Build the SYMBOL_REF for __tls_get_addr. */
561 static GTY(()) rtx tls_get_addr_libfunc;
564 aarch64_tls_get_addr (void)
566 if (!tls_get_addr_libfunc)
567 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
568 return tls_get_addr_libfunc;
571 /* Return the TLS model to use for ADDR. */
573 static enum tls_model
574 tls_symbolic_operand_type (rtx addr)
576 enum tls_model tls_kind = TLS_MODEL_NONE;
577 rtx sym, addend;
579 if (GET_CODE (addr) == CONST)
581 split_const (addr, &sym, &addend);
582 if (GET_CODE (sym) == SYMBOL_REF)
583 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
585 else if (GET_CODE (addr) == SYMBOL_REF)
586 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
588 return tls_kind;
591 /* We'll allow lo_sum's in addresses in our legitimate addresses
592 so that combine would take care of combining addresses where
593 necessary, but for generation purposes, we'll generate the address
594 as :
595 RTL Absolute
596 tmp = hi (symbol_ref); adrp x1, foo
597 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
600 PIC TLS
601 adrp x1, :got:foo adrp tmp, :tlsgd:foo
602 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
603 bl __tls_get_addr
606 Load TLS symbol, depending on TLS mechanism and TLS access model.
608 Global Dynamic - Traditional TLS:
609 adrp tmp, :tlsgd:imm
610 add dest, tmp, #:tlsgd_lo12:imm
611 bl __tls_get_addr
613 Global Dynamic - TLS Descriptors:
614 adrp dest, :tlsdesc:imm
615 ldr tmp, [dest, #:tlsdesc_lo12:imm]
616 add dest, dest, #:tlsdesc_lo12:imm
617 blr tmp
618 mrs tp, tpidr_el0
619 add dest, dest, tp
621 Initial Exec:
622 mrs tp, tpidr_el0
623 adrp tmp, :gottprel:imm
624 ldr dest, [tmp, #:gottprel_lo12:imm]
625 add dest, dest, tp
627 Local Exec:
628 mrs tp, tpidr_el0
629 add t0, tp, #:tprel_hi12:imm
630 add t0, #:tprel_lo12_nc:imm
633 static void
634 aarch64_load_symref_appropriately (rtx dest, rtx imm,
635 enum aarch64_symbol_type type)
637 switch (type)
639 case SYMBOL_SMALL_ABSOLUTE:
641 /* In ILP32, the mode of dest can be either SImode or DImode. */
642 rtx tmp_reg = dest;
643 enum machine_mode mode = GET_MODE (dest);
645 gcc_assert (mode == Pmode || mode == ptr_mode);
647 if (can_create_pseudo_p ())
648 tmp_reg = gen_reg_rtx (mode);
650 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
651 emit_insn (gen_add_losym (dest, tmp_reg, imm));
652 return;
655 case SYMBOL_TINY_ABSOLUTE:
656 emit_insn (gen_rtx_SET (Pmode, dest, imm));
657 return;
659 case SYMBOL_SMALL_GOT:
661 /* In ILP32, the mode of dest can be either SImode or DImode,
662 while the got entry is always of SImode size. The mode of
663 dest depends on how dest is used: if dest is assigned to a
664 pointer (e.g. in the memory), it has SImode; it may have
665 DImode if dest is dereferenced to access the memeory.
666 This is why we have to handle three different ldr_got_small
667 patterns here (two patterns for ILP32). */
668 rtx tmp_reg = dest;
669 enum machine_mode mode = GET_MODE (dest);
671 if (can_create_pseudo_p ())
672 tmp_reg = gen_reg_rtx (mode);
674 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
675 if (mode == ptr_mode)
677 if (mode == DImode)
678 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
679 else
680 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
682 else
684 gcc_assert (mode == Pmode);
685 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
688 return;
691 case SYMBOL_SMALL_TLSGD:
693 rtx insns;
694 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
696 start_sequence ();
697 emit_call_insn (gen_tlsgd_small (result, imm));
698 insns = get_insns ();
699 end_sequence ();
701 RTL_CONST_CALL_P (insns) = 1;
702 emit_libcall_block (insns, dest, result, imm);
703 return;
706 case SYMBOL_SMALL_TLSDESC:
708 enum machine_mode mode = GET_MODE (dest);
709 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
710 rtx tp;
712 gcc_assert (mode == Pmode || mode == ptr_mode);
714 /* In ILP32, the got entry is always of SImode size. Unlike
715 small GOT, the dest is fixed at reg 0. */
716 if (TARGET_ILP32)
717 emit_insn (gen_tlsdesc_small_si (imm));
718 else
719 emit_insn (gen_tlsdesc_small_di (imm));
720 tp = aarch64_load_tp (NULL);
722 if (mode != Pmode)
723 tp = gen_lowpart (mode, tp);
725 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
726 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
727 return;
730 case SYMBOL_SMALL_GOTTPREL:
732 /* In ILP32, the mode of dest can be either SImode or DImode,
733 while the got entry is always of SImode size. The mode of
734 dest depends on how dest is used: if dest is assigned to a
735 pointer (e.g. in the memory), it has SImode; it may have
736 DImode if dest is dereferenced to access the memeory.
737 This is why we have to handle three different tlsie_small
738 patterns here (two patterns for ILP32). */
739 enum machine_mode mode = GET_MODE (dest);
740 rtx tmp_reg = gen_reg_rtx (mode);
741 rtx tp = aarch64_load_tp (NULL);
743 if (mode == ptr_mode)
745 if (mode == DImode)
746 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
747 else
749 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
750 tp = gen_lowpart (mode, tp);
753 else
755 gcc_assert (mode == Pmode);
756 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
759 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
760 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
761 return;
764 case SYMBOL_SMALL_TPREL:
766 rtx tp = aarch64_load_tp (NULL);
767 emit_insn (gen_tlsle_small (dest, tp, imm));
768 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
769 return;
772 case SYMBOL_TINY_GOT:
773 emit_insn (gen_ldr_got_tiny (dest, imm));
774 return;
776 default:
777 gcc_unreachable ();
781 /* Emit a move from SRC to DEST. Assume that the move expanders can
782 handle all moves if !can_create_pseudo_p (). The distinction is
783 important because, unlike emit_move_insn, the move expanders know
784 how to force Pmode objects into the constant pool even when the
785 constant pool address is not itself legitimate. */
786 static rtx
787 aarch64_emit_move (rtx dest, rtx src)
789 return (can_create_pseudo_p ()
790 ? emit_move_insn (dest, src)
791 : emit_move_insn_1 (dest, src));
794 /* Split a 128-bit move operation into two 64-bit move operations,
795 taking care to handle partial overlap of register to register
796 copies. Special cases are needed when moving between GP regs and
797 FP regs. SRC can be a register, constant or memory; DST a register
798 or memory. If either operand is memory it must not have any side
799 effects. */
800 void
801 aarch64_split_128bit_move (rtx dst, rtx src)
803 rtx dst_lo, dst_hi;
804 rtx src_lo, src_hi;
806 enum machine_mode mode = GET_MODE (dst);
808 gcc_assert (mode == TImode || mode == TFmode);
809 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
810 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
812 if (REG_P (dst) && REG_P (src))
814 int src_regno = REGNO (src);
815 int dst_regno = REGNO (dst);
817 /* Handle FP <-> GP regs. */
818 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
820 src_lo = gen_lowpart (word_mode, src);
821 src_hi = gen_highpart (word_mode, src);
823 if (mode == TImode)
825 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
826 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
828 else
830 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
831 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
833 return;
835 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
837 dst_lo = gen_lowpart (word_mode, dst);
838 dst_hi = gen_highpart (word_mode, dst);
840 if (mode == TImode)
842 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
843 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
845 else
847 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
848 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
850 return;
854 dst_lo = gen_lowpart (word_mode, dst);
855 dst_hi = gen_highpart (word_mode, dst);
856 src_lo = gen_lowpart (word_mode, src);
857 src_hi = gen_highpart_mode (word_mode, mode, src);
859 /* At most one pairing may overlap. */
860 if (reg_overlap_mentioned_p (dst_lo, src_hi))
862 aarch64_emit_move (dst_hi, src_hi);
863 aarch64_emit_move (dst_lo, src_lo);
865 else
867 aarch64_emit_move (dst_lo, src_lo);
868 aarch64_emit_move (dst_hi, src_hi);
872 bool
873 aarch64_split_128bit_move_p (rtx dst, rtx src)
875 return (! REG_P (src)
876 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
879 /* Split a complex SIMD combine. */
881 void
882 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
884 enum machine_mode src_mode = GET_MODE (src1);
885 enum machine_mode dst_mode = GET_MODE (dst);
887 gcc_assert (VECTOR_MODE_P (dst_mode));
889 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
891 rtx (*gen) (rtx, rtx, rtx);
893 switch (src_mode)
895 case V8QImode:
896 gen = gen_aarch64_simd_combinev8qi;
897 break;
898 case V4HImode:
899 gen = gen_aarch64_simd_combinev4hi;
900 break;
901 case V2SImode:
902 gen = gen_aarch64_simd_combinev2si;
903 break;
904 case V2SFmode:
905 gen = gen_aarch64_simd_combinev2sf;
906 break;
907 case DImode:
908 gen = gen_aarch64_simd_combinedi;
909 break;
910 case DFmode:
911 gen = gen_aarch64_simd_combinedf;
912 break;
913 default:
914 gcc_unreachable ();
917 emit_insn (gen (dst, src1, src2));
918 return;
922 /* Split a complex SIMD move. */
924 void
925 aarch64_split_simd_move (rtx dst, rtx src)
927 enum machine_mode src_mode = GET_MODE (src);
928 enum machine_mode dst_mode = GET_MODE (dst);
930 gcc_assert (VECTOR_MODE_P (dst_mode));
932 if (REG_P (dst) && REG_P (src))
934 rtx (*gen) (rtx, rtx);
936 gcc_assert (VECTOR_MODE_P (src_mode));
938 switch (src_mode)
940 case V16QImode:
941 gen = gen_aarch64_split_simd_movv16qi;
942 break;
943 case V8HImode:
944 gen = gen_aarch64_split_simd_movv8hi;
945 break;
946 case V4SImode:
947 gen = gen_aarch64_split_simd_movv4si;
948 break;
949 case V2DImode:
950 gen = gen_aarch64_split_simd_movv2di;
951 break;
952 case V4SFmode:
953 gen = gen_aarch64_split_simd_movv4sf;
954 break;
955 case V2DFmode:
956 gen = gen_aarch64_split_simd_movv2df;
957 break;
958 default:
959 gcc_unreachable ();
962 emit_insn (gen (dst, src));
963 return;
967 static rtx
968 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
970 if (can_create_pseudo_p ())
971 return force_reg (mode, value);
972 else
974 x = aarch64_emit_move (x, value);
975 return x;
980 static rtx
981 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
983 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
985 rtx high;
986 /* Load the full offset into a register. This
987 might be improvable in the future. */
988 high = GEN_INT (offset);
989 offset = 0;
990 high = aarch64_force_temporary (mode, temp, high);
991 reg = aarch64_force_temporary (mode, temp,
992 gen_rtx_PLUS (mode, high, reg));
994 return plus_constant (mode, reg, offset);
997 void
998 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1000 enum machine_mode mode = GET_MODE (dest);
1001 unsigned HOST_WIDE_INT mask;
1002 int i;
1003 bool first;
1004 unsigned HOST_WIDE_INT val;
1005 bool subtargets;
1006 rtx subtarget;
1007 int one_match, zero_match;
1009 gcc_assert (mode == SImode || mode == DImode);
1011 /* Check on what type of symbol it is. */
1012 if (GET_CODE (imm) == SYMBOL_REF
1013 || GET_CODE (imm) == LABEL_REF
1014 || GET_CODE (imm) == CONST)
1016 rtx mem, base, offset;
1017 enum aarch64_symbol_type sty;
1019 /* If we have (const (plus symbol offset)), separate out the offset
1020 before we start classifying the symbol. */
1021 split_const (imm, &base, &offset);
1023 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1024 switch (sty)
1026 case SYMBOL_FORCE_TO_MEM:
1027 if (offset != const0_rtx
1028 && targetm.cannot_force_const_mem (mode, imm))
1030 gcc_assert (can_create_pseudo_p ());
1031 base = aarch64_force_temporary (mode, dest, base);
1032 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1033 aarch64_emit_move (dest, base);
1034 return;
1036 mem = force_const_mem (ptr_mode, imm);
1037 gcc_assert (mem);
1038 if (mode != ptr_mode)
1039 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1040 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1041 return;
1043 case SYMBOL_SMALL_TLSGD:
1044 case SYMBOL_SMALL_TLSDESC:
1045 case SYMBOL_SMALL_GOTTPREL:
1046 case SYMBOL_SMALL_GOT:
1047 case SYMBOL_TINY_GOT:
1048 if (offset != const0_rtx)
1050 gcc_assert(can_create_pseudo_p ());
1051 base = aarch64_force_temporary (mode, dest, base);
1052 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1053 aarch64_emit_move (dest, base);
1054 return;
1056 /* FALLTHRU */
1058 case SYMBOL_SMALL_TPREL:
1059 case SYMBOL_SMALL_ABSOLUTE:
1060 case SYMBOL_TINY_ABSOLUTE:
1061 aarch64_load_symref_appropriately (dest, imm, sty);
1062 return;
1064 default:
1065 gcc_unreachable ();
1069 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1071 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1072 return;
1075 if (!CONST_INT_P (imm))
1077 if (GET_CODE (imm) == HIGH)
1078 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1079 else
1081 rtx mem = force_const_mem (mode, imm);
1082 gcc_assert (mem);
1083 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1086 return;
1089 if (mode == SImode)
1091 /* We know we can't do this in 1 insn, and we must be able to do it
1092 in two; so don't mess around looking for sequences that don't buy
1093 us anything. */
1094 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1095 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1096 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1097 return;
1100 /* Remaining cases are all for DImode. */
1102 val = INTVAL (imm);
1103 subtargets = optimize && can_create_pseudo_p ();
1105 one_match = 0;
1106 zero_match = 0;
1107 mask = 0xffff;
1109 for (i = 0; i < 64; i += 16, mask <<= 16)
1111 if ((val & mask) == 0)
1112 zero_match++;
1113 else if ((val & mask) == mask)
1114 one_match++;
1117 if (one_match == 2)
1119 mask = 0xffff;
1120 for (i = 0; i < 64; i += 16, mask <<= 16)
1122 if ((val & mask) != mask)
1124 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1125 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1126 GEN_INT ((val >> i) & 0xffff)));
1127 return;
1130 gcc_unreachable ();
1133 if (zero_match == 2)
1134 goto simple_sequence;
1136 mask = 0x0ffff0000UL;
1137 for (i = 16; i < 64; i += 16, mask <<= 16)
1139 HOST_WIDE_INT comp = mask & ~(mask - 1);
1141 if (aarch64_uimm12_shift (val - (val & mask)))
1143 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1145 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1146 emit_insn (gen_adddi3 (dest, subtarget,
1147 GEN_INT (val - (val & mask))));
1148 return;
1150 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1152 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1154 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1155 GEN_INT ((val + comp) & mask)));
1156 emit_insn (gen_adddi3 (dest, subtarget,
1157 GEN_INT (val - ((val + comp) & mask))));
1158 return;
1160 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1162 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1164 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1165 GEN_INT ((val - comp) | ~mask)));
1166 emit_insn (gen_adddi3 (dest, subtarget,
1167 GEN_INT (val - ((val - comp) | ~mask))));
1168 return;
1170 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1172 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1174 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1175 GEN_INT (val | ~mask)));
1176 emit_insn (gen_adddi3 (dest, subtarget,
1177 GEN_INT (val - (val | ~mask))));
1178 return;
1182 /* See if we can do it by arithmetically combining two
1183 immediates. */
1184 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1186 int j;
1187 mask = 0xffff;
1189 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1190 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1192 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1193 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1194 GEN_INT (aarch64_bitmasks[i])));
1195 emit_insn (gen_adddi3 (dest, subtarget,
1196 GEN_INT (val - aarch64_bitmasks[i])));
1197 return;
1200 for (j = 0; j < 64; j += 16, mask <<= 16)
1202 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1204 emit_insn (gen_rtx_SET (VOIDmode, dest,
1205 GEN_INT (aarch64_bitmasks[i])));
1206 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1207 GEN_INT ((val >> j) & 0xffff)));
1208 return;
1213 /* See if we can do it by logically combining two immediates. */
1214 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1216 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1218 int j;
1220 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1221 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1223 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1224 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1225 GEN_INT (aarch64_bitmasks[i])));
1226 emit_insn (gen_iordi3 (dest, subtarget,
1227 GEN_INT (aarch64_bitmasks[j])));
1228 return;
1231 else if ((val & aarch64_bitmasks[i]) == val)
1233 int j;
1235 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1236 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1239 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1240 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1241 GEN_INT (aarch64_bitmasks[j])));
1242 emit_insn (gen_anddi3 (dest, subtarget,
1243 GEN_INT (aarch64_bitmasks[i])));
1244 return;
1249 simple_sequence:
1250 first = true;
1251 mask = 0xffff;
1252 for (i = 0; i < 64; i += 16, mask <<= 16)
1254 if ((val & mask) != 0)
1256 if (first)
1258 emit_insn (gen_rtx_SET (VOIDmode, dest,
1259 GEN_INT (val & mask)));
1260 first = false;
1262 else
1263 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1264 GEN_INT ((val >> i) & 0xffff)));
1269 static bool
1270 aarch64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
1272 /* Indirect calls are not currently supported. */
1273 if (decl == NULL)
1274 return false;
1276 /* Cannot tail-call to long-calls, since these are outside of the
1277 range of a branch instruction (we could handle this if we added
1278 support for indirect tail-calls. */
1279 if (aarch64_decl_is_long_call_p (decl))
1280 return false;
1282 return true;
1285 /* Implement TARGET_PASS_BY_REFERENCE. */
1287 static bool
1288 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1289 enum machine_mode mode,
1290 const_tree type,
1291 bool named ATTRIBUTE_UNUSED)
1293 HOST_WIDE_INT size;
1294 enum machine_mode dummymode;
1295 int nregs;
1297 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1298 size = (mode == BLKmode && type)
1299 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1301 /* Aggregates are passed by reference based on their size. */
1302 if (type && AGGREGATE_TYPE_P (type))
1304 size = int_size_in_bytes (type);
1307 /* Variable sized arguments are always returned by reference. */
1308 if (size < 0)
1309 return true;
1311 /* Can this be a candidate to be passed in fp/simd register(s)? */
1312 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1313 &dummymode, &nregs,
1314 NULL))
1315 return false;
1317 /* Arguments which are variable sized or larger than 2 registers are
1318 passed by reference unless they are a homogenous floating point
1319 aggregate. */
1320 return size > 2 * UNITS_PER_WORD;
1323 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1324 static bool
1325 aarch64_return_in_msb (const_tree valtype)
1327 enum machine_mode dummy_mode;
1328 int dummy_int;
1330 /* Never happens in little-endian mode. */
1331 if (!BYTES_BIG_ENDIAN)
1332 return false;
1334 /* Only composite types smaller than or equal to 16 bytes can
1335 be potentially returned in registers. */
1336 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1337 || int_size_in_bytes (valtype) <= 0
1338 || int_size_in_bytes (valtype) > 16)
1339 return false;
1341 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1342 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1343 is always passed/returned in the least significant bits of fp/simd
1344 register(s). */
1345 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1346 &dummy_mode, &dummy_int, NULL))
1347 return false;
1349 return true;
1352 /* Implement TARGET_FUNCTION_VALUE.
1353 Define how to find the value returned by a function. */
1355 static rtx
1356 aarch64_function_value (const_tree type, const_tree func,
1357 bool outgoing ATTRIBUTE_UNUSED)
1359 enum machine_mode mode;
1360 int unsignedp;
1361 int count;
1362 enum machine_mode ag_mode;
1364 mode = TYPE_MODE (type);
1365 if (INTEGRAL_TYPE_P (type))
1366 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1368 if (aarch64_return_in_msb (type))
1370 HOST_WIDE_INT size = int_size_in_bytes (type);
1372 if (size % UNITS_PER_WORD != 0)
1374 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1375 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1379 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1380 &ag_mode, &count, NULL))
1382 if (!aarch64_composite_type_p (type, mode))
1384 gcc_assert (count == 1 && mode == ag_mode);
1385 return gen_rtx_REG (mode, V0_REGNUM);
1387 else
1389 int i;
1390 rtx par;
1392 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1393 for (i = 0; i < count; i++)
1395 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1396 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1397 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1398 XVECEXP (par, 0, i) = tmp;
1400 return par;
1403 else
1404 return gen_rtx_REG (mode, R0_REGNUM);
1407 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1408 Return true if REGNO is the number of a hard register in which the values
1409 of called function may come back. */
1411 static bool
1412 aarch64_function_value_regno_p (const unsigned int regno)
1414 /* Maximum of 16 bytes can be returned in the general registers. Examples
1415 of 16-byte return values are: 128-bit integers and 16-byte small
1416 structures (excluding homogeneous floating-point aggregates). */
1417 if (regno == R0_REGNUM || regno == R1_REGNUM)
1418 return true;
1420 /* Up to four fp/simd registers can return a function value, e.g. a
1421 homogeneous floating-point aggregate having four members. */
1422 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1423 return !TARGET_GENERAL_REGS_ONLY;
1425 return false;
1428 /* Implement TARGET_RETURN_IN_MEMORY.
1430 If the type T of the result of a function is such that
1431 void func (T arg)
1432 would require that arg be passed as a value in a register (or set of
1433 registers) according to the parameter passing rules, then the result
1434 is returned in the same registers as would be used for such an
1435 argument. */
1437 static bool
1438 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1440 HOST_WIDE_INT size;
1441 enum machine_mode ag_mode;
1442 int count;
1444 if (!AGGREGATE_TYPE_P (type)
1445 && TREE_CODE (type) != COMPLEX_TYPE
1446 && TREE_CODE (type) != VECTOR_TYPE)
1447 /* Simple scalar types always returned in registers. */
1448 return false;
1450 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1451 type,
1452 &ag_mode,
1453 &count,
1454 NULL))
1455 return false;
1457 /* Types larger than 2 registers returned in memory. */
1458 size = int_size_in_bytes (type);
1459 return (size < 0 || size > 2 * UNITS_PER_WORD);
1462 static bool
1463 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1464 const_tree type, int *nregs)
1466 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1467 return aarch64_vfp_is_call_or_return_candidate (mode,
1468 type,
1469 &pcum->aapcs_vfp_rmode,
1470 nregs,
1471 NULL);
1474 /* Given MODE and TYPE of a function argument, return the alignment in
1475 bits. The idea is to suppress any stronger alignment requested by
1476 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1477 This is a helper function for local use only. */
1479 static unsigned int
1480 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1482 unsigned int alignment;
1484 if (type)
1486 if (!integer_zerop (TYPE_SIZE (type)))
1488 if (TYPE_MODE (type) == mode)
1489 alignment = TYPE_ALIGN (type);
1490 else
1491 alignment = GET_MODE_ALIGNMENT (mode);
1493 else
1494 alignment = 0;
1496 else
1497 alignment = GET_MODE_ALIGNMENT (mode);
1499 return alignment;
1502 /* Layout a function argument according to the AAPCS64 rules. The rule
1503 numbers refer to the rule numbers in the AAPCS64. */
1505 static void
1506 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1507 const_tree type,
1508 bool named ATTRIBUTE_UNUSED)
1510 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1511 int ncrn, nvrn, nregs;
1512 bool allocate_ncrn, allocate_nvrn;
1514 /* We need to do this once per argument. */
1515 if (pcum->aapcs_arg_processed)
1516 return;
1518 pcum->aapcs_arg_processed = true;
1520 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1521 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1522 mode,
1523 type,
1524 &nregs);
1526 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1527 The following code thus handles passing by SIMD/FP registers first. */
1529 nvrn = pcum->aapcs_nvrn;
1531 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1532 and homogenous short-vector aggregates (HVA). */
1533 if (allocate_nvrn)
1535 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1537 pcum->aapcs_nextnvrn = nvrn + nregs;
1538 if (!aarch64_composite_type_p (type, mode))
1540 gcc_assert (nregs == 1);
1541 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1543 else
1545 rtx par;
1546 int i;
1547 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1548 for (i = 0; i < nregs; i++)
1550 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1551 V0_REGNUM + nvrn + i);
1552 tmp = gen_rtx_EXPR_LIST
1553 (VOIDmode, tmp,
1554 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1555 XVECEXP (par, 0, i) = tmp;
1557 pcum->aapcs_reg = par;
1559 return;
1561 else
1563 /* C.3 NSRN is set to 8. */
1564 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1565 goto on_stack;
1569 ncrn = pcum->aapcs_ncrn;
1570 nregs = ((type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode))
1571 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1574 /* C6 - C9. though the sign and zero extension semantics are
1575 handled elsewhere. This is the case where the argument fits
1576 entirely general registers. */
1577 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1579 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1581 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1583 /* C.8 if the argument has an alignment of 16 then the NGRN is
1584 rounded up to the next even number. */
1585 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1587 ++ncrn;
1588 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1590 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1591 A reg is still generated for it, but the caller should be smart
1592 enough not to use it. */
1593 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1595 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1597 else
1599 rtx par;
1600 int i;
1602 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1603 for (i = 0; i < nregs; i++)
1605 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1606 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1607 GEN_INT (i * UNITS_PER_WORD));
1608 XVECEXP (par, 0, i) = tmp;
1610 pcum->aapcs_reg = par;
1613 pcum->aapcs_nextncrn = ncrn + nregs;
1614 return;
1617 /* C.11 */
1618 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1620 /* The argument is passed on stack; record the needed number of words for
1621 this argument (we can re-use NREGS) and align the total size if
1622 necessary. */
1623 on_stack:
1624 pcum->aapcs_stack_words = nregs;
1625 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1626 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1627 16 / UNITS_PER_WORD) + 1;
1628 return;
1631 /* Implement TARGET_FUNCTION_ARG. */
1633 static rtx
1634 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1635 const_tree type, bool named)
1637 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1638 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1640 if (mode == VOIDmode)
1641 return NULL_RTX;
1643 aarch64_layout_arg (pcum_v, mode, type, named);
1644 return pcum->aapcs_reg;
1647 void
1648 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1649 const_tree fntype ATTRIBUTE_UNUSED,
1650 rtx libname ATTRIBUTE_UNUSED,
1651 const_tree fndecl ATTRIBUTE_UNUSED,
1652 unsigned n_named ATTRIBUTE_UNUSED)
1654 pcum->aapcs_ncrn = 0;
1655 pcum->aapcs_nvrn = 0;
1656 pcum->aapcs_nextncrn = 0;
1657 pcum->aapcs_nextnvrn = 0;
1658 pcum->pcs_variant = ARM_PCS_AAPCS64;
1659 pcum->aapcs_reg = NULL_RTX;
1660 pcum->aapcs_arg_processed = false;
1661 pcum->aapcs_stack_words = 0;
1662 pcum->aapcs_stack_size = 0;
1664 return;
1667 static void
1668 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1669 enum machine_mode mode,
1670 const_tree type,
1671 bool named)
1673 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1674 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1676 aarch64_layout_arg (pcum_v, mode, type, named);
1677 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1678 != (pcum->aapcs_stack_words != 0));
1679 pcum->aapcs_arg_processed = false;
1680 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1681 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1682 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1683 pcum->aapcs_stack_words = 0;
1684 pcum->aapcs_reg = NULL_RTX;
1688 bool
1689 aarch64_function_arg_regno_p (unsigned regno)
1691 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1692 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1695 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1696 PARM_BOUNDARY bits of alignment, but will be given anything up
1697 to STACK_BOUNDARY bits if the type requires it. This makes sure
1698 that both before and after the layout of each argument, the Next
1699 Stacked Argument Address (NSAA) will have a minimum alignment of
1700 8 bytes. */
1702 static unsigned int
1703 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1705 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1707 if (alignment < PARM_BOUNDARY)
1708 alignment = PARM_BOUNDARY;
1709 if (alignment > STACK_BOUNDARY)
1710 alignment = STACK_BOUNDARY;
1711 return alignment;
1714 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1716 Return true if an argument passed on the stack should be padded upwards,
1717 i.e. if the least-significant byte of the stack slot has useful data.
1719 Small aggregate types are placed in the lowest memory address.
1721 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1723 bool
1724 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1726 /* On little-endian targets, the least significant byte of every stack
1727 argument is passed at the lowest byte address of the stack slot. */
1728 if (!BYTES_BIG_ENDIAN)
1729 return true;
1731 /* Otherwise, integral, floating-point and pointer types are padded downward:
1732 the least significant byte of a stack argument is passed at the highest
1733 byte address of the stack slot. */
1734 if (type
1735 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1736 || POINTER_TYPE_P (type))
1737 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1738 return false;
1740 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1741 return true;
1744 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1746 It specifies padding for the last (may also be the only)
1747 element of a block move between registers and memory. If
1748 assuming the block is in the memory, padding upward means that
1749 the last element is padded after its highest significant byte,
1750 while in downward padding, the last element is padded at the
1751 its least significant byte side.
1753 Small aggregates and small complex types are always padded
1754 upwards.
1756 We don't need to worry about homogeneous floating-point or
1757 short-vector aggregates; their move is not affected by the
1758 padding direction determined here. Regardless of endianness,
1759 each element of such an aggregate is put in the least
1760 significant bits of a fp/simd register.
1762 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1763 register has useful data, and return the opposite if the most
1764 significant byte does. */
1766 bool
1767 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1768 bool first ATTRIBUTE_UNUSED)
1771 /* Small composite types are always padded upward. */
1772 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1774 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1775 : GET_MODE_SIZE (mode));
1776 if (size < 2 * UNITS_PER_WORD)
1777 return true;
1780 /* Otherwise, use the default padding. */
1781 return !BYTES_BIG_ENDIAN;
1784 static enum machine_mode
1785 aarch64_libgcc_cmp_return_mode (void)
1787 return SImode;
1790 static bool
1791 aarch64_frame_pointer_required (void)
1793 /* If the function contains dynamic stack allocations, we need to
1794 use the frame pointer to access the static parts of the frame. */
1795 if (cfun->calls_alloca)
1796 return true;
1798 /* In aarch64_override_options_after_change
1799 flag_omit_leaf_frame_pointer turns off the frame pointer by
1800 default. Turn it back on now if we've not got a leaf
1801 function. */
1802 if (flag_omit_leaf_frame_pointer
1803 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1804 return true;
1806 return false;
1809 /* Mark the registers that need to be saved by the callee and calculate
1810 the size of the callee-saved registers area and frame record (both FP
1811 and LR may be omitted). */
1812 static void
1813 aarch64_layout_frame (void)
1815 HOST_WIDE_INT offset = 0;
1816 int regno;
1818 if (reload_completed && cfun->machine->frame.laid_out)
1819 return;
1821 /* First mark all the registers that really need to be saved... */
1822 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1823 cfun->machine->frame.reg_offset[regno] = -1;
1825 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1826 cfun->machine->frame.reg_offset[regno] = -1;
1828 /* ... that includes the eh data registers (if needed)... */
1829 if (crtl->calls_eh_return)
1830 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1831 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] = 0;
1833 /* ... and any callee saved register that dataflow says is live. */
1834 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1835 if (df_regs_ever_live_p (regno)
1836 && !call_used_regs[regno])
1837 cfun->machine->frame.reg_offset[regno] = 0;
1839 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1840 if (df_regs_ever_live_p (regno)
1841 && !call_used_regs[regno])
1842 cfun->machine->frame.reg_offset[regno] = 0;
1844 if (frame_pointer_needed)
1846 cfun->machine->frame.reg_offset[R30_REGNUM] = 0;
1847 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1848 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1851 /* Now assign stack slots for them. */
1852 for (regno = R0_REGNUM; regno <= R28_REGNUM; regno++)
1853 if (cfun->machine->frame.reg_offset[regno] != -1)
1855 cfun->machine->frame.reg_offset[regno] = offset;
1856 offset += UNITS_PER_WORD;
1859 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1860 if (cfun->machine->frame.reg_offset[regno] != -1)
1862 cfun->machine->frame.reg_offset[regno] = offset;
1863 offset += UNITS_PER_WORD;
1866 if (frame_pointer_needed)
1868 cfun->machine->frame.reg_offset[R29_REGNUM] = offset;
1869 offset += UNITS_PER_WORD;
1872 if (cfun->machine->frame.reg_offset[R30_REGNUM] != -1)
1874 cfun->machine->frame.reg_offset[R30_REGNUM] = offset;
1875 offset += UNITS_PER_WORD;
1878 cfun->machine->frame.padding0 =
1879 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1880 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1882 cfun->machine->frame.saved_regs_size = offset;
1883 cfun->machine->frame.laid_out = true;
1886 /* Make the last instruction frame-related and note that it performs
1887 the operation described by FRAME_PATTERN. */
1889 static void
1890 aarch64_set_frame_expr (rtx frame_pattern)
1892 rtx insn;
1894 insn = get_last_insn ();
1895 RTX_FRAME_RELATED_P (insn) = 1;
1896 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1897 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1898 frame_pattern,
1899 REG_NOTES (insn));
1902 static bool
1903 aarch64_register_saved_on_entry (int regno)
1905 return cfun->machine->frame.reg_offset[regno] != -1;
1909 static void
1910 aarch64_save_or_restore_fprs (int start_offset, int increment,
1911 bool restore, rtx base_rtx)
1914 unsigned regno;
1915 unsigned regno2;
1916 rtx insn;
1917 rtx (*gen_mem_ref)(enum machine_mode, rtx)
1918 = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
1921 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1923 if (aarch64_register_saved_on_entry (regno))
1925 rtx mem;
1926 mem = gen_mem_ref (DFmode,
1927 plus_constant (Pmode,
1928 base_rtx,
1929 start_offset));
1931 for (regno2 = regno + 1;
1932 regno2 <= V31_REGNUM
1933 && !aarch64_register_saved_on_entry (regno2);
1934 regno2++)
1936 /* Empty loop. */
1938 if (regno2 <= V31_REGNUM &&
1939 aarch64_register_saved_on_entry (regno2))
1941 rtx mem2;
1942 /* Next highest register to be saved. */
1943 mem2 = gen_mem_ref (DFmode,
1944 plus_constant
1945 (Pmode,
1946 base_rtx,
1947 start_offset + increment));
1948 if (restore == false)
1950 insn = emit_insn
1951 ( gen_store_pairdf (mem, gen_rtx_REG (DFmode, regno),
1952 mem2, gen_rtx_REG (DFmode, regno2)));
1955 else
1957 insn = emit_insn
1958 ( gen_load_pairdf (gen_rtx_REG (DFmode, regno), mem,
1959 gen_rtx_REG (DFmode, regno2), mem2));
1961 add_reg_note (insn, REG_CFA_RESTORE,
1962 gen_rtx_REG (DFmode, regno));
1963 add_reg_note (insn, REG_CFA_RESTORE,
1964 gen_rtx_REG (DFmode, regno2));
1967 /* The first part of a frame-related parallel insn
1968 is always assumed to be relevant to the frame
1969 calculations; subsequent parts, are only
1970 frame-related if explicitly marked. */
1971 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1972 regno = regno2;
1973 start_offset += increment * 2;
1975 else
1977 if (restore == false)
1978 insn = emit_move_insn (mem, gen_rtx_REG (DFmode, regno));
1979 else
1981 insn = emit_move_insn (gen_rtx_REG (DFmode, regno), mem);
1982 add_reg_note (insn, REG_CFA_RESTORE,
1983 gen_rtx_REG (DImode, regno));
1985 start_offset += increment;
1987 RTX_FRAME_RELATED_P (insn) = 1;
1994 /* offset from the stack pointer of where the saves and
1995 restore's have to happen. */
1996 static void
1997 aarch64_save_or_restore_callee_save_registers (HOST_WIDE_INT offset,
1998 bool restore)
2000 rtx insn;
2001 rtx base_rtx = stack_pointer_rtx;
2002 HOST_WIDE_INT start_offset = offset;
2003 HOST_WIDE_INT increment = UNITS_PER_WORD;
2004 rtx (*gen_mem_ref)(enum machine_mode, rtx) = (frame_pointer_needed)? gen_frame_mem : gen_rtx_MEM;
2005 unsigned limit = (frame_pointer_needed)? R28_REGNUM: R30_REGNUM;
2006 unsigned regno;
2007 unsigned regno2;
2009 for (regno = R0_REGNUM; regno <= limit; regno++)
2011 if (aarch64_register_saved_on_entry (regno))
2013 rtx mem;
2014 mem = gen_mem_ref (Pmode,
2015 plus_constant (Pmode,
2016 base_rtx,
2017 start_offset));
2019 for (regno2 = regno + 1;
2020 regno2 <= limit
2021 && !aarch64_register_saved_on_entry (regno2);
2022 regno2++)
2024 /* Empty loop. */
2026 if (regno2 <= limit &&
2027 aarch64_register_saved_on_entry (regno2))
2029 rtx mem2;
2030 /* Next highest register to be saved. */
2031 mem2 = gen_mem_ref (Pmode,
2032 plus_constant
2033 (Pmode,
2034 base_rtx,
2035 start_offset + increment));
2036 if (restore == false)
2038 insn = emit_insn
2039 ( gen_store_pairdi (mem, gen_rtx_REG (DImode, regno),
2040 mem2, gen_rtx_REG (DImode, regno2)));
2043 else
2045 insn = emit_insn
2046 ( gen_load_pairdi (gen_rtx_REG (DImode, regno), mem,
2047 gen_rtx_REG (DImode, regno2), mem2));
2049 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2050 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno2));
2053 /* The first part of a frame-related parallel insn
2054 is always assumed to be relevant to the frame
2055 calculations; subsequent parts, are only
2056 frame-related if explicitly marked. */
2057 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0,
2058 1)) = 1;
2059 regno = regno2;
2060 start_offset += increment * 2;
2062 else
2064 if (restore == false)
2065 insn = emit_move_insn (mem, gen_rtx_REG (DImode, regno));
2066 else
2068 insn = emit_move_insn (gen_rtx_REG (DImode, regno), mem);
2069 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, regno));
2071 start_offset += increment;
2073 RTX_FRAME_RELATED_P (insn) = 1;
2077 aarch64_save_or_restore_fprs (start_offset, increment, restore, base_rtx);
2081 /* AArch64 stack frames generated by this compiler look like:
2083 +-------------------------------+
2085 | incoming stack arguments |
2087 +-------------------------------+ <-- arg_pointer_rtx
2089 | callee-allocated save area |
2090 | for register varargs |
2092 +-------------------------------+ <-- frame_pointer_rtx
2094 | local variables |
2096 +-------------------------------+
2097 | padding0 | \
2098 +-------------------------------+ |
2099 | | |
2100 | | |
2101 | callee-saved registers | | frame.saved_regs_size
2102 | | |
2103 +-------------------------------+ |
2104 | LR' | |
2105 +-------------------------------+ |
2106 | FP' | /
2107 P +-------------------------------+ <-- hard_frame_pointer_rtx
2108 | dynamic allocation |
2109 +-------------------------------+
2111 | outgoing stack arguments |
2113 +-------------------------------+ <-- stack_pointer_rtx
2115 Dynamic stack allocations such as alloca insert data at point P.
2116 They decrease stack_pointer_rtx but leave frame_pointer_rtx and
2117 hard_frame_pointer_rtx unchanged. */
2119 /* Generate the prologue instructions for entry into a function.
2120 Establish the stack frame by decreasing the stack pointer with a
2121 properly calculated size and, if necessary, create a frame record
2122 filled with the values of LR and previous frame pointer. The
2123 current FP is also set up if it is in use. */
2125 void
2126 aarch64_expand_prologue (void)
2128 /* sub sp, sp, #<frame_size>
2129 stp {fp, lr}, [sp, #<frame_size> - 16]
2130 add fp, sp, #<frame_size> - hardfp_offset
2131 stp {cs_reg}, [fp, #-16] etc.
2133 sub sp, sp, <final_adjustment_if_any>
2135 HOST_WIDE_INT original_frame_size; /* local variables + vararg save */
2136 HOST_WIDE_INT frame_size, offset;
2137 HOST_WIDE_INT fp_offset; /* FP offset from SP */
2138 rtx insn;
2140 aarch64_layout_frame ();
2141 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2142 gcc_assert ((!cfun->machine->saved_varargs_size || cfun->stdarg)
2143 && (cfun->stdarg || !cfun->machine->saved_varargs_size));
2144 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2145 + crtl->outgoing_args_size);
2146 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2147 STACK_BOUNDARY / BITS_PER_UNIT);
2149 if (flag_stack_usage_info)
2150 current_function_static_stack_size = frame_size;
2152 fp_offset = (offset
2153 - original_frame_size
2154 - cfun->machine->frame.saved_regs_size);
2156 /* Store pairs and load pairs have a range only -512 to 504. */
2157 if (offset >= 512)
2159 /* When the frame has a large size, an initial decrease is done on
2160 the stack pointer to jump over the callee-allocated save area for
2161 register varargs, the local variable area and/or the callee-saved
2162 register area. This will allow the pre-index write-back
2163 store pair instructions to be used for setting up the stack frame
2164 efficiently. */
2165 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2166 if (offset >= 512)
2167 offset = cfun->machine->frame.saved_regs_size;
2169 frame_size -= (offset + crtl->outgoing_args_size);
2170 fp_offset = 0;
2172 if (frame_size >= 0x1000000)
2174 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2175 emit_move_insn (op0, GEN_INT (-frame_size));
2176 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2177 aarch64_set_frame_expr (gen_rtx_SET
2178 (Pmode, stack_pointer_rtx,
2179 plus_constant (Pmode,
2180 stack_pointer_rtx,
2181 -frame_size)));
2183 else if (frame_size > 0)
2185 if ((frame_size & 0xfff) != frame_size)
2187 insn = emit_insn (gen_add2_insn
2188 (stack_pointer_rtx,
2189 GEN_INT (-(frame_size
2190 & ~(HOST_WIDE_INT)0xfff))));
2191 RTX_FRAME_RELATED_P (insn) = 1;
2193 if ((frame_size & 0xfff) != 0)
2195 insn = emit_insn (gen_add2_insn
2196 (stack_pointer_rtx,
2197 GEN_INT (-(frame_size
2198 & (HOST_WIDE_INT)0xfff))));
2199 RTX_FRAME_RELATED_P (insn) = 1;
2203 else
2204 frame_size = -1;
2206 if (offset > 0)
2208 /* Save the frame pointer and lr if the frame pointer is needed
2209 first. Make the frame pointer point to the location of the
2210 old frame pointer on the stack. */
2211 if (frame_pointer_needed)
2213 rtx mem_fp, mem_lr;
2215 if (fp_offset)
2217 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2218 GEN_INT (-offset)));
2219 RTX_FRAME_RELATED_P (insn) = 1;
2220 aarch64_set_frame_expr (gen_rtx_SET
2221 (Pmode, stack_pointer_rtx,
2222 gen_rtx_MINUS (Pmode,
2223 stack_pointer_rtx,
2224 GEN_INT (offset))));
2225 mem_fp = gen_frame_mem (DImode,
2226 plus_constant (Pmode,
2227 stack_pointer_rtx,
2228 fp_offset));
2229 mem_lr = gen_frame_mem (DImode,
2230 plus_constant (Pmode,
2231 stack_pointer_rtx,
2232 fp_offset
2233 + UNITS_PER_WORD));
2234 insn = emit_insn (gen_store_pairdi (mem_fp,
2235 hard_frame_pointer_rtx,
2236 mem_lr,
2237 gen_rtx_REG (DImode,
2238 LR_REGNUM)));
2240 else
2242 insn = emit_insn (gen_storewb_pairdi_di
2243 (stack_pointer_rtx, stack_pointer_rtx,
2244 hard_frame_pointer_rtx,
2245 gen_rtx_REG (DImode, LR_REGNUM),
2246 GEN_INT (-offset),
2247 GEN_INT (GET_MODE_SIZE (DImode) - offset)));
2248 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2251 /* The first part of a frame-related parallel insn is always
2252 assumed to be relevant to the frame calculations;
2253 subsequent parts, are only frame-related if explicitly
2254 marked. */
2255 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2256 RTX_FRAME_RELATED_P (insn) = 1;
2258 /* Set up frame pointer to point to the location of the
2259 previous frame pointer on the stack. */
2260 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2261 stack_pointer_rtx,
2262 GEN_INT (fp_offset)));
2263 aarch64_set_frame_expr (gen_rtx_SET
2264 (Pmode, hard_frame_pointer_rtx,
2265 plus_constant (Pmode,
2266 stack_pointer_rtx,
2267 fp_offset)));
2268 RTX_FRAME_RELATED_P (insn) = 1;
2269 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2270 hard_frame_pointer_rtx));
2272 else
2274 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2275 GEN_INT (-offset)));
2276 RTX_FRAME_RELATED_P (insn) = 1;
2279 aarch64_save_or_restore_callee_save_registers
2280 (fp_offset + cfun->machine->frame.hardfp_offset, 0);
2283 /* when offset >= 512,
2284 sub sp, sp, #<outgoing_args_size> */
2285 if (frame_size > -1)
2287 if (crtl->outgoing_args_size > 0)
2289 insn = emit_insn (gen_add2_insn
2290 (stack_pointer_rtx,
2291 GEN_INT (- crtl->outgoing_args_size)));
2292 RTX_FRAME_RELATED_P (insn) = 1;
2297 /* Generate the epilogue instructions for returning from a function. */
2298 void
2299 aarch64_expand_epilogue (bool for_sibcall)
2301 HOST_WIDE_INT original_frame_size, frame_size, offset;
2302 HOST_WIDE_INT fp_offset;
2303 rtx insn;
2304 rtx cfa_reg;
2306 aarch64_layout_frame ();
2307 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2308 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2309 + crtl->outgoing_args_size);
2310 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2311 STACK_BOUNDARY / BITS_PER_UNIT);
2313 fp_offset = (offset
2314 - original_frame_size
2315 - cfun->machine->frame.saved_regs_size);
2317 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2319 /* Store pairs and load pairs have a range only -512 to 504. */
2320 if (offset >= 512)
2322 offset = original_frame_size + cfun->machine->frame.saved_regs_size;
2323 if (offset >= 512)
2324 offset = cfun->machine->frame.saved_regs_size;
2326 frame_size -= (offset + crtl->outgoing_args_size);
2327 fp_offset = 0;
2328 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2330 insn = emit_insn (gen_add2_insn
2331 (stack_pointer_rtx,
2332 GEN_INT (crtl->outgoing_args_size)));
2333 RTX_FRAME_RELATED_P (insn) = 1;
2336 else
2337 frame_size = -1;
2339 /* If there were outgoing arguments or we've done dynamic stack
2340 allocation, then restore the stack pointer from the frame
2341 pointer. This is at most one insn and more efficient than using
2342 GCC's internal mechanism. */
2343 if (frame_pointer_needed
2344 && (crtl->outgoing_args_size || cfun->calls_alloca))
2346 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2347 hard_frame_pointer_rtx,
2348 GEN_INT (- fp_offset)));
2349 RTX_FRAME_RELATED_P (insn) = 1;
2350 /* As SP is set to (FP - fp_offset), according to the rules in
2351 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2352 from the value of SP from now on. */
2353 cfa_reg = stack_pointer_rtx;
2356 aarch64_save_or_restore_callee_save_registers
2357 (fp_offset + cfun->machine->frame.hardfp_offset, 1);
2359 /* Restore the frame pointer and lr if the frame pointer is needed. */
2360 if (offset > 0)
2362 if (frame_pointer_needed)
2364 rtx mem_fp, mem_lr;
2366 if (fp_offset)
2368 mem_fp = gen_frame_mem (DImode,
2369 plus_constant (Pmode,
2370 stack_pointer_rtx,
2371 fp_offset));
2372 mem_lr = gen_frame_mem (DImode,
2373 plus_constant (Pmode,
2374 stack_pointer_rtx,
2375 fp_offset
2376 + UNITS_PER_WORD));
2377 insn = emit_insn (gen_load_pairdi (hard_frame_pointer_rtx,
2378 mem_fp,
2379 gen_rtx_REG (DImode,
2380 LR_REGNUM),
2381 mem_lr));
2383 else
2385 insn = emit_insn (gen_loadwb_pairdi_di
2386 (stack_pointer_rtx,
2387 stack_pointer_rtx,
2388 hard_frame_pointer_rtx,
2389 gen_rtx_REG (DImode, LR_REGNUM),
2390 GEN_INT (offset),
2391 GEN_INT (GET_MODE_SIZE (DImode) + offset)));
2392 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2393 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2394 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2395 plus_constant (Pmode, cfa_reg,
2396 offset))));
2399 /* The first part of a frame-related parallel insn
2400 is always assumed to be relevant to the frame
2401 calculations; subsequent parts, are only
2402 frame-related if explicitly marked. */
2403 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2404 RTX_FRAME_RELATED_P (insn) = 1;
2405 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
2406 add_reg_note (insn, REG_CFA_RESTORE,
2407 gen_rtx_REG (DImode, LR_REGNUM));
2409 if (fp_offset)
2411 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2412 GEN_INT (offset)));
2413 RTX_FRAME_RELATED_P (insn) = 1;
2416 else
2418 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2419 GEN_INT (offset)));
2420 RTX_FRAME_RELATED_P (insn) = 1;
2424 /* Stack adjustment for exception handler. */
2425 if (crtl->calls_eh_return)
2427 /* We need to unwind the stack by the offset computed by
2428 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2429 based on SP. Ideally we would update the SP and define the
2430 CFA along the lines of:
2432 SP = SP + EH_RETURN_STACKADJ_RTX
2433 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2435 However the dwarf emitter only understands a constant
2436 register offset.
2438 The solution chosen here is to use the otherwise unused IP0
2439 as a temporary register to hold the current SP value. The
2440 CFA is described using IP0 then SP is modified. */
2442 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2444 insn = emit_move_insn (ip0, stack_pointer_rtx);
2445 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2446 RTX_FRAME_RELATED_P (insn) = 1;
2448 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2450 /* Ensure the assignment to IP0 does not get optimized away. */
2451 emit_use (ip0);
2454 if (frame_size > -1)
2456 if (frame_size >= 0x1000000)
2458 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2459 emit_move_insn (op0, GEN_INT (frame_size));
2460 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2461 aarch64_set_frame_expr (gen_rtx_SET
2462 (Pmode, stack_pointer_rtx,
2463 plus_constant (Pmode,
2464 stack_pointer_rtx,
2465 frame_size)));
2467 else if (frame_size > 0)
2469 if ((frame_size & 0xfff) != 0)
2471 insn = emit_insn (gen_add2_insn
2472 (stack_pointer_rtx,
2473 GEN_INT ((frame_size
2474 & (HOST_WIDE_INT) 0xfff))));
2475 RTX_FRAME_RELATED_P (insn) = 1;
2477 if ((frame_size & 0xfff) != frame_size)
2479 insn = emit_insn (gen_add2_insn
2480 (stack_pointer_rtx,
2481 GEN_INT ((frame_size
2482 & ~ (HOST_WIDE_INT) 0xfff))));
2483 RTX_FRAME_RELATED_P (insn) = 1;
2487 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2488 plus_constant (Pmode,
2489 stack_pointer_rtx,
2490 offset)));
2493 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2494 if (!for_sibcall)
2495 emit_jump_insn (ret_rtx);
2498 /* Return the place to copy the exception unwinding return address to.
2499 This will probably be a stack slot, but could (in theory be the
2500 return register). */
2502 aarch64_final_eh_return_addr (void)
2504 HOST_WIDE_INT original_frame_size, frame_size, offset, fp_offset;
2505 aarch64_layout_frame ();
2506 original_frame_size = get_frame_size () + cfun->machine->saved_varargs_size;
2507 frame_size = (original_frame_size + cfun->machine->frame.saved_regs_size
2508 + crtl->outgoing_args_size);
2509 offset = frame_size = AARCH64_ROUND_UP (frame_size,
2510 STACK_BOUNDARY / BITS_PER_UNIT);
2511 fp_offset = offset
2512 - original_frame_size
2513 - cfun->machine->frame.saved_regs_size;
2515 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2516 return gen_rtx_REG (DImode, LR_REGNUM);
2518 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2519 result in a store to save LR introduced by builtin_eh_return () being
2520 incorrectly deleted because the alias is not detected.
2521 So in the calculation of the address to copy the exception unwinding
2522 return address to, we note 2 cases.
2523 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2524 we return a SP-relative location since all the addresses are SP-relative
2525 in this case. This prevents the store from being optimized away.
2526 If the fp_offset is not 0, then the addresses will be FP-relative and
2527 therefore we return a FP-relative location. */
2529 if (frame_pointer_needed)
2531 if (fp_offset)
2532 return gen_frame_mem (DImode,
2533 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2534 else
2535 return gen_frame_mem (DImode,
2536 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2539 /* If FP is not needed, we calculate the location of LR, which would be
2540 at the top of the saved registers block. */
2542 return gen_frame_mem (DImode,
2543 plus_constant (Pmode,
2544 stack_pointer_rtx,
2545 fp_offset
2546 + cfun->machine->frame.saved_regs_size
2547 - 2 * UNITS_PER_WORD));
2550 /* Possibly output code to build up a constant in a register. For
2551 the benefit of the costs infrastructure, returns the number of
2552 instructions which would be emitted. GENERATE inhibits or
2553 enables code generation. */
2555 static int
2556 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2558 int insns = 0;
2560 if (aarch64_bitmask_imm (val, DImode))
2562 if (generate)
2563 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2564 insns = 1;
2566 else
2568 int i;
2569 int ncount = 0;
2570 int zcount = 0;
2571 HOST_WIDE_INT valp = val >> 16;
2572 HOST_WIDE_INT valm;
2573 HOST_WIDE_INT tval;
2575 for (i = 16; i < 64; i += 16)
2577 valm = (valp & 0xffff);
2579 if (valm != 0)
2580 ++ zcount;
2582 if (valm != 0xffff)
2583 ++ ncount;
2585 valp >>= 16;
2588 /* zcount contains the number of additional MOVK instructions
2589 required if the constant is built up with an initial MOVZ instruction,
2590 while ncount is the number of MOVK instructions required if starting
2591 with a MOVN instruction. Choose the sequence that yields the fewest
2592 number of instructions, preferring MOVZ instructions when they are both
2593 the same. */
2594 if (ncount < zcount)
2596 if (generate)
2597 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2598 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2599 tval = 0xffff;
2600 insns++;
2602 else
2604 if (generate)
2605 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2606 GEN_INT (val & 0xffff));
2607 tval = 0;
2608 insns++;
2611 val >>= 16;
2613 for (i = 16; i < 64; i += 16)
2615 if ((val & 0xffff) != tval)
2617 if (generate)
2618 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2619 GEN_INT (i),
2620 GEN_INT (val & 0xffff)));
2621 insns++;
2623 val >>= 16;
2626 return insns;
2629 static void
2630 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2632 HOST_WIDE_INT mdelta = delta;
2633 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2634 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2636 if (mdelta < 0)
2637 mdelta = -mdelta;
2639 if (mdelta >= 4096 * 4096)
2641 (void) aarch64_build_constant (scratchreg, delta, true);
2642 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2644 else if (mdelta > 0)
2646 if (mdelta >= 4096)
2648 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2649 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2650 if (delta < 0)
2651 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2652 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2653 else
2654 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2655 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2657 if (mdelta % 4096 != 0)
2659 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2660 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2661 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2666 /* Output code to add DELTA to the first argument, and then jump
2667 to FUNCTION. Used for C++ multiple inheritance. */
2668 static void
2669 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2670 HOST_WIDE_INT delta,
2671 HOST_WIDE_INT vcall_offset,
2672 tree function)
2674 /* The this pointer is always in x0. Note that this differs from
2675 Arm where the this pointer maybe bumped to r1 if r0 is required
2676 to return a pointer to an aggregate. On AArch64 a result value
2677 pointer will be in x8. */
2678 int this_regno = R0_REGNUM;
2679 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2681 reload_completed = 1;
2682 emit_note (NOTE_INSN_PROLOGUE_END);
2684 if (vcall_offset == 0)
2685 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2686 else
2688 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2690 this_rtx = gen_rtx_REG (Pmode, this_regno);
2691 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2692 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2694 addr = this_rtx;
2695 if (delta != 0)
2697 if (delta >= -256 && delta < 256)
2698 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2699 plus_constant (Pmode, this_rtx, delta));
2700 else
2701 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2704 if (Pmode == ptr_mode)
2705 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2706 else
2707 aarch64_emit_move (temp0,
2708 gen_rtx_ZERO_EXTEND (Pmode,
2709 gen_rtx_MEM (ptr_mode, addr)));
2711 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2712 addr = plus_constant (Pmode, temp0, vcall_offset);
2713 else
2715 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2716 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2719 if (Pmode == ptr_mode)
2720 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2721 else
2722 aarch64_emit_move (temp1,
2723 gen_rtx_SIGN_EXTEND (Pmode,
2724 gen_rtx_MEM (ptr_mode, addr)));
2726 emit_insn (gen_add2_insn (this_rtx, temp1));
2729 /* Generate a tail call to the target function. */
2730 if (!TREE_USED (function))
2732 assemble_external (function);
2733 TREE_USED (function) = 1;
2735 funexp = XEXP (DECL_RTL (function), 0);
2736 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2737 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2738 SIBLING_CALL_P (insn) = 1;
2740 insn = get_insns ();
2741 shorten_branches (insn);
2742 final_start_function (insn, file, 1);
2743 final (insn, file, 1);
2744 final_end_function ();
2746 /* Stop pretending to be a post-reload pass. */
2747 reload_completed = 0;
2750 static int
2751 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2753 if (GET_CODE (*x) == SYMBOL_REF)
2754 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2756 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2757 TLS offsets, not real symbol references. */
2758 if (GET_CODE (*x) == UNSPEC
2759 && XINT (*x, 1) == UNSPEC_TLS)
2760 return -1;
2762 return 0;
2765 static bool
2766 aarch64_tls_referenced_p (rtx x)
2768 if (!TARGET_HAVE_TLS)
2769 return false;
2771 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2775 static int
2776 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2778 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2779 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2781 if (*imm1 < *imm2)
2782 return -1;
2783 if (*imm1 > *imm2)
2784 return +1;
2785 return 0;
2789 static void
2790 aarch64_build_bitmask_table (void)
2792 unsigned HOST_WIDE_INT mask, imm;
2793 unsigned int log_e, e, s, r;
2794 unsigned int nimms = 0;
2796 for (log_e = 1; log_e <= 6; log_e++)
2798 e = 1 << log_e;
2799 if (e == 64)
2800 mask = ~(HOST_WIDE_INT) 0;
2801 else
2802 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2803 for (s = 1; s < e; s++)
2805 for (r = 0; r < e; r++)
2807 /* set s consecutive bits to 1 (s < 64) */
2808 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2809 /* rotate right by r */
2810 if (r != 0)
2811 imm = ((imm >> r) | (imm << (e - r))) & mask;
2812 /* replicate the constant depending on SIMD size */
2813 switch (log_e) {
2814 case 1: imm |= (imm << 2);
2815 case 2: imm |= (imm << 4);
2816 case 3: imm |= (imm << 8);
2817 case 4: imm |= (imm << 16);
2818 case 5: imm |= (imm << 32);
2819 case 6:
2820 break;
2821 default:
2822 gcc_unreachable ();
2824 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2825 aarch64_bitmasks[nimms++] = imm;
2830 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2831 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2832 aarch64_bitmasks_cmp);
2836 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2837 a left shift of 0 or 12 bits. */
2838 bool
2839 aarch64_uimm12_shift (HOST_WIDE_INT val)
2841 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2842 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2847 /* Return true if val is an immediate that can be loaded into a
2848 register by a MOVZ instruction. */
2849 static bool
2850 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2852 if (GET_MODE_SIZE (mode) > 4)
2854 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2855 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2856 return 1;
2858 else
2860 /* Ignore sign extension. */
2861 val &= (HOST_WIDE_INT) 0xffffffff;
2863 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2864 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2868 /* Return true if val is a valid bitmask immediate. */
2869 bool
2870 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2872 if (GET_MODE_SIZE (mode) < 8)
2874 /* Replicate bit pattern. */
2875 val &= (HOST_WIDE_INT) 0xffffffff;
2876 val |= val << 32;
2878 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2879 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2883 /* Return true if val is an immediate that can be loaded into a
2884 register in a single instruction. */
2885 bool
2886 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2888 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2889 return 1;
2890 return aarch64_bitmask_imm (val, mode);
2893 static bool
2894 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2896 rtx base, offset;
2898 if (GET_CODE (x) == HIGH)
2899 return true;
2901 split_const (x, &base, &offset);
2902 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2904 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2905 != SYMBOL_FORCE_TO_MEM)
2906 return true;
2907 else
2908 /* Avoid generating a 64-bit relocation in ILP32; leave
2909 to aarch64_expand_mov_immediate to handle it properly. */
2910 return mode != ptr_mode;
2913 return aarch64_tls_referenced_p (x);
2916 /* Return true if register REGNO is a valid index register.
2917 STRICT_P is true if REG_OK_STRICT is in effect. */
2919 bool
2920 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2922 if (!HARD_REGISTER_NUM_P (regno))
2924 if (!strict_p)
2925 return true;
2927 if (!reg_renumber)
2928 return false;
2930 regno = reg_renumber[regno];
2932 return GP_REGNUM_P (regno);
2935 /* Return true if register REGNO is a valid base register for mode MODE.
2936 STRICT_P is true if REG_OK_STRICT is in effect. */
2938 bool
2939 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
2941 if (!HARD_REGISTER_NUM_P (regno))
2943 if (!strict_p)
2944 return true;
2946 if (!reg_renumber)
2947 return false;
2949 regno = reg_renumber[regno];
2952 /* The fake registers will be eliminated to either the stack or
2953 hard frame pointer, both of which are usually valid base registers.
2954 Reload deals with the cases where the eliminated form isn't valid. */
2955 return (GP_REGNUM_P (regno)
2956 || regno == SP_REGNUM
2957 || regno == FRAME_POINTER_REGNUM
2958 || regno == ARG_POINTER_REGNUM);
2961 /* Return true if X is a valid base register for mode MODE.
2962 STRICT_P is true if REG_OK_STRICT is in effect. */
2964 static bool
2965 aarch64_base_register_rtx_p (rtx x, bool strict_p)
2967 if (!strict_p && GET_CODE (x) == SUBREG)
2968 x = SUBREG_REG (x);
2970 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
2973 /* Return true if address offset is a valid index. If it is, fill in INFO
2974 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
2976 static bool
2977 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
2978 enum machine_mode mode, bool strict_p)
2980 enum aarch64_address_type type;
2981 rtx index;
2982 int shift;
2984 /* (reg:P) */
2985 if ((REG_P (x) || GET_CODE (x) == SUBREG)
2986 && GET_MODE (x) == Pmode)
2988 type = ADDRESS_REG_REG;
2989 index = x;
2990 shift = 0;
2992 /* (sign_extend:DI (reg:SI)) */
2993 else if ((GET_CODE (x) == SIGN_EXTEND
2994 || GET_CODE (x) == ZERO_EXTEND)
2995 && GET_MODE (x) == DImode
2996 && GET_MODE (XEXP (x, 0)) == SImode)
2998 type = (GET_CODE (x) == SIGN_EXTEND)
2999 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3000 index = XEXP (x, 0);
3001 shift = 0;
3003 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3004 else if (GET_CODE (x) == MULT
3005 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3006 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3007 && GET_MODE (XEXP (x, 0)) == DImode
3008 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3009 && CONST_INT_P (XEXP (x, 1)))
3011 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3012 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3013 index = XEXP (XEXP (x, 0), 0);
3014 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3016 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3017 else if (GET_CODE (x) == ASHIFT
3018 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3019 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3020 && GET_MODE (XEXP (x, 0)) == DImode
3021 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3022 && CONST_INT_P (XEXP (x, 1)))
3024 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3025 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3026 index = XEXP (XEXP (x, 0), 0);
3027 shift = INTVAL (XEXP (x, 1));
3029 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3030 else if ((GET_CODE (x) == SIGN_EXTRACT
3031 || GET_CODE (x) == ZERO_EXTRACT)
3032 && GET_MODE (x) == DImode
3033 && GET_CODE (XEXP (x, 0)) == MULT
3034 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3035 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3037 type = (GET_CODE (x) == SIGN_EXTRACT)
3038 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3039 index = XEXP (XEXP (x, 0), 0);
3040 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3041 if (INTVAL (XEXP (x, 1)) != 32 + shift
3042 || INTVAL (XEXP (x, 2)) != 0)
3043 shift = -1;
3045 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3046 (const_int 0xffffffff<<shift)) */
3047 else if (GET_CODE (x) == AND
3048 && GET_MODE (x) == DImode
3049 && GET_CODE (XEXP (x, 0)) == MULT
3050 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3051 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3052 && CONST_INT_P (XEXP (x, 1)))
3054 type = ADDRESS_REG_UXTW;
3055 index = XEXP (XEXP (x, 0), 0);
3056 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3057 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3058 shift = -1;
3060 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3061 else if ((GET_CODE (x) == SIGN_EXTRACT
3062 || GET_CODE (x) == ZERO_EXTRACT)
3063 && GET_MODE (x) == DImode
3064 && GET_CODE (XEXP (x, 0)) == ASHIFT
3065 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3066 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3068 type = (GET_CODE (x) == SIGN_EXTRACT)
3069 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3070 index = XEXP (XEXP (x, 0), 0);
3071 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3072 if (INTVAL (XEXP (x, 1)) != 32 + shift
3073 || INTVAL (XEXP (x, 2)) != 0)
3074 shift = -1;
3076 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3077 (const_int 0xffffffff<<shift)) */
3078 else if (GET_CODE (x) == AND
3079 && GET_MODE (x) == DImode
3080 && GET_CODE (XEXP (x, 0)) == ASHIFT
3081 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3082 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3083 && CONST_INT_P (XEXP (x, 1)))
3085 type = ADDRESS_REG_UXTW;
3086 index = XEXP (XEXP (x, 0), 0);
3087 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3088 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3089 shift = -1;
3091 /* (mult:P (reg:P) (const_int scale)) */
3092 else if (GET_CODE (x) == MULT
3093 && GET_MODE (x) == Pmode
3094 && GET_MODE (XEXP (x, 0)) == Pmode
3095 && CONST_INT_P (XEXP (x, 1)))
3097 type = ADDRESS_REG_REG;
3098 index = XEXP (x, 0);
3099 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3101 /* (ashift:P (reg:P) (const_int shift)) */
3102 else if (GET_CODE (x) == ASHIFT
3103 && GET_MODE (x) == Pmode
3104 && GET_MODE (XEXP (x, 0)) == Pmode
3105 && CONST_INT_P (XEXP (x, 1)))
3107 type = ADDRESS_REG_REG;
3108 index = XEXP (x, 0);
3109 shift = INTVAL (XEXP (x, 1));
3111 else
3112 return false;
3114 if (GET_CODE (index) == SUBREG)
3115 index = SUBREG_REG (index);
3117 if ((shift == 0 ||
3118 (shift > 0 && shift <= 3
3119 && (1 << shift) == GET_MODE_SIZE (mode)))
3120 && REG_P (index)
3121 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3123 info->type = type;
3124 info->offset = index;
3125 info->shift = shift;
3126 return true;
3129 return false;
3132 static inline bool
3133 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3135 return (offset >= -64 * GET_MODE_SIZE (mode)
3136 && offset < 64 * GET_MODE_SIZE (mode)
3137 && offset % GET_MODE_SIZE (mode) == 0);
3140 static inline bool
3141 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3142 HOST_WIDE_INT offset)
3144 return offset >= -256 && offset < 256;
3147 static inline bool
3148 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3150 return (offset >= 0
3151 && offset < 4096 * GET_MODE_SIZE (mode)
3152 && offset % GET_MODE_SIZE (mode) == 0);
3155 /* Return true if X is a valid address for machine mode MODE. If it is,
3156 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3157 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3159 static bool
3160 aarch64_classify_address (struct aarch64_address_info *info,
3161 rtx x, enum machine_mode mode,
3162 RTX_CODE outer_code, bool strict_p)
3164 enum rtx_code code = GET_CODE (x);
3165 rtx op0, op1;
3166 bool allow_reg_index_p =
3167 outer_code != PARALLEL && GET_MODE_SIZE(mode) != 16;
3169 /* Don't support anything other than POST_INC or REG addressing for
3170 AdvSIMD. */
3171 if (aarch64_vector_mode_p (mode)
3172 && (code != POST_INC && code != REG))
3173 return false;
3175 switch (code)
3177 case REG:
3178 case SUBREG:
3179 info->type = ADDRESS_REG_IMM;
3180 info->base = x;
3181 info->offset = const0_rtx;
3182 return aarch64_base_register_rtx_p (x, strict_p);
3184 case PLUS:
3185 op0 = XEXP (x, 0);
3186 op1 = XEXP (x, 1);
3187 if (GET_MODE_SIZE (mode) != 0
3188 && CONST_INT_P (op1)
3189 && aarch64_base_register_rtx_p (op0, strict_p))
3191 HOST_WIDE_INT offset = INTVAL (op1);
3193 info->type = ADDRESS_REG_IMM;
3194 info->base = op0;
3195 info->offset = op1;
3197 /* TImode and TFmode values are allowed in both pairs of X
3198 registers and individual Q registers. The available
3199 address modes are:
3200 X,X: 7-bit signed scaled offset
3201 Q: 9-bit signed offset
3202 We conservatively require an offset representable in either mode.
3204 if (mode == TImode || mode == TFmode)
3205 return (offset_7bit_signed_scaled_p (mode, offset)
3206 && offset_9bit_signed_unscaled_p (mode, offset));
3208 if (outer_code == PARALLEL)
3209 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3210 && offset_7bit_signed_scaled_p (mode, offset));
3211 else
3212 return (offset_9bit_signed_unscaled_p (mode, offset)
3213 || offset_12bit_unsigned_scaled_p (mode, offset));
3216 if (allow_reg_index_p)
3218 /* Look for base + (scaled/extended) index register. */
3219 if (aarch64_base_register_rtx_p (op0, strict_p)
3220 && aarch64_classify_index (info, op1, mode, strict_p))
3222 info->base = op0;
3223 return true;
3225 if (aarch64_base_register_rtx_p (op1, strict_p)
3226 && aarch64_classify_index (info, op0, mode, strict_p))
3228 info->base = op1;
3229 return true;
3233 return false;
3235 case POST_INC:
3236 case POST_DEC:
3237 case PRE_INC:
3238 case PRE_DEC:
3239 info->type = ADDRESS_REG_WB;
3240 info->base = XEXP (x, 0);
3241 info->offset = NULL_RTX;
3242 return aarch64_base_register_rtx_p (info->base, strict_p);
3244 case POST_MODIFY:
3245 case PRE_MODIFY:
3246 info->type = ADDRESS_REG_WB;
3247 info->base = XEXP (x, 0);
3248 if (GET_CODE (XEXP (x, 1)) == PLUS
3249 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3250 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3251 && aarch64_base_register_rtx_p (info->base, strict_p))
3253 HOST_WIDE_INT offset;
3254 info->offset = XEXP (XEXP (x, 1), 1);
3255 offset = INTVAL (info->offset);
3257 /* TImode and TFmode values are allowed in both pairs of X
3258 registers and individual Q registers. The available
3259 address modes are:
3260 X,X: 7-bit signed scaled offset
3261 Q: 9-bit signed offset
3262 We conservatively require an offset representable in either mode.
3264 if (mode == TImode || mode == TFmode)
3265 return (offset_7bit_signed_scaled_p (mode, offset)
3266 && offset_9bit_signed_unscaled_p (mode, offset));
3268 if (outer_code == PARALLEL)
3269 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3270 && offset_7bit_signed_scaled_p (mode, offset));
3271 else
3272 return offset_9bit_signed_unscaled_p (mode, offset);
3274 return false;
3276 case CONST:
3277 case SYMBOL_REF:
3278 case LABEL_REF:
3279 /* load literal: pc-relative constant pool entry. Only supported
3280 for SI mode or larger. */
3281 info->type = ADDRESS_SYMBOLIC;
3282 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3284 rtx sym, addend;
3286 split_const (x, &sym, &addend);
3287 return (GET_CODE (sym) == LABEL_REF
3288 || (GET_CODE (sym) == SYMBOL_REF
3289 && CONSTANT_POOL_ADDRESS_P (sym)));
3291 return false;
3293 case LO_SUM:
3294 info->type = ADDRESS_LO_SUM;
3295 info->base = XEXP (x, 0);
3296 info->offset = XEXP (x, 1);
3297 if (allow_reg_index_p
3298 && aarch64_base_register_rtx_p (info->base, strict_p))
3300 rtx sym, offs;
3301 split_const (info->offset, &sym, &offs);
3302 if (GET_CODE (sym) == SYMBOL_REF
3303 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3304 == SYMBOL_SMALL_ABSOLUTE))
3306 /* The symbol and offset must be aligned to the access size. */
3307 unsigned int align;
3308 unsigned int ref_size;
3310 if (CONSTANT_POOL_ADDRESS_P (sym))
3311 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3312 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3314 tree exp = SYMBOL_REF_DECL (sym);
3315 align = TYPE_ALIGN (TREE_TYPE (exp));
3316 align = CONSTANT_ALIGNMENT (exp, align);
3318 else if (SYMBOL_REF_DECL (sym))
3319 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3320 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3321 && SYMBOL_REF_BLOCK (sym) != NULL)
3322 align = SYMBOL_REF_BLOCK (sym)->alignment;
3323 else
3324 align = BITS_PER_UNIT;
3326 ref_size = GET_MODE_SIZE (mode);
3327 if (ref_size == 0)
3328 ref_size = GET_MODE_SIZE (DImode);
3330 return ((INTVAL (offs) & (ref_size - 1)) == 0
3331 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3334 return false;
3336 default:
3337 return false;
3341 bool
3342 aarch64_symbolic_address_p (rtx x)
3344 rtx offset;
3346 split_const (x, &x, &offset);
3347 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3350 /* Classify the base of symbolic expression X, given that X appears in
3351 context CONTEXT. */
3353 enum aarch64_symbol_type
3354 aarch64_classify_symbolic_expression (rtx x,
3355 enum aarch64_symbol_context context)
3357 rtx offset;
3359 split_const (x, &x, &offset);
3360 return aarch64_classify_symbol (x, context);
3364 /* Return TRUE if X is a legitimate address for accessing memory in
3365 mode MODE. */
3366 static bool
3367 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3369 struct aarch64_address_info addr;
3371 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3374 /* Return TRUE if X is a legitimate address for accessing memory in
3375 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3376 pair operation. */
3377 bool
3378 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3379 RTX_CODE outer_code, bool strict_p)
3381 struct aarch64_address_info addr;
3383 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3386 /* Return TRUE if rtx X is immediate constant 0.0 */
3387 bool
3388 aarch64_float_const_zero_rtx_p (rtx x)
3390 REAL_VALUE_TYPE r;
3392 if (GET_MODE (x) == VOIDmode)
3393 return false;
3395 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3396 if (REAL_VALUE_MINUS_ZERO (r))
3397 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3398 return REAL_VALUES_EQUAL (r, dconst0);
3401 /* Return the fixed registers used for condition codes. */
3403 static bool
3404 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3406 *p1 = CC_REGNUM;
3407 *p2 = INVALID_REGNUM;
3408 return true;
3411 enum machine_mode
3412 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3414 /* All floating point compares return CCFP if it is an equality
3415 comparison, and CCFPE otherwise. */
3416 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3418 switch (code)
3420 case EQ:
3421 case NE:
3422 case UNORDERED:
3423 case ORDERED:
3424 case UNLT:
3425 case UNLE:
3426 case UNGT:
3427 case UNGE:
3428 case UNEQ:
3429 case LTGT:
3430 return CCFPmode;
3432 case LT:
3433 case LE:
3434 case GT:
3435 case GE:
3436 return CCFPEmode;
3438 default:
3439 gcc_unreachable ();
3443 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3444 && y == const0_rtx
3445 && (code == EQ || code == NE || code == LT || code == GE)
3446 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3447 || GET_CODE (x) == NEG))
3448 return CC_NZmode;
3450 /* A compare with a shifted operand. Because of canonicalization,
3451 the comparison will have to be swapped when we emit the assembly
3452 code. */
3453 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3454 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3455 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3456 || GET_CODE (x) == LSHIFTRT
3457 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3458 return CC_SWPmode;
3460 /* Similarly for a negated operand, but we can only do this for
3461 equalities. */
3462 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3463 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3464 && (code == EQ || code == NE)
3465 && GET_CODE (x) == NEG)
3466 return CC_Zmode;
3468 /* A compare of a mode narrower than SI mode against zero can be done
3469 by extending the value in the comparison. */
3470 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3471 && y == const0_rtx)
3472 /* Only use sign-extension if we really need it. */
3473 return ((code == GT || code == GE || code == LE || code == LT)
3474 ? CC_SESWPmode : CC_ZESWPmode);
3476 /* For everything else, return CCmode. */
3477 return CCmode;
3480 static unsigned
3481 aarch64_get_condition_code (rtx x)
3483 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3484 enum rtx_code comp_code = GET_CODE (x);
3486 if (GET_MODE_CLASS (mode) != MODE_CC)
3487 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3489 switch (mode)
3491 case CCFPmode:
3492 case CCFPEmode:
3493 switch (comp_code)
3495 case GE: return AARCH64_GE;
3496 case GT: return AARCH64_GT;
3497 case LE: return AARCH64_LS;
3498 case LT: return AARCH64_MI;
3499 case NE: return AARCH64_NE;
3500 case EQ: return AARCH64_EQ;
3501 case ORDERED: return AARCH64_VC;
3502 case UNORDERED: return AARCH64_VS;
3503 case UNLT: return AARCH64_LT;
3504 case UNLE: return AARCH64_LE;
3505 case UNGT: return AARCH64_HI;
3506 case UNGE: return AARCH64_PL;
3507 default: gcc_unreachable ();
3509 break;
3511 case CCmode:
3512 switch (comp_code)
3514 case NE: return AARCH64_NE;
3515 case EQ: return AARCH64_EQ;
3516 case GE: return AARCH64_GE;
3517 case GT: return AARCH64_GT;
3518 case LE: return AARCH64_LE;
3519 case LT: return AARCH64_LT;
3520 case GEU: return AARCH64_CS;
3521 case GTU: return AARCH64_HI;
3522 case LEU: return AARCH64_LS;
3523 case LTU: return AARCH64_CC;
3524 default: gcc_unreachable ();
3526 break;
3528 case CC_SWPmode:
3529 case CC_ZESWPmode:
3530 case CC_SESWPmode:
3531 switch (comp_code)
3533 case NE: return AARCH64_NE;
3534 case EQ: return AARCH64_EQ;
3535 case GE: return AARCH64_LE;
3536 case GT: return AARCH64_LT;
3537 case LE: return AARCH64_GE;
3538 case LT: return AARCH64_GT;
3539 case GEU: return AARCH64_LS;
3540 case GTU: return AARCH64_CC;
3541 case LEU: return AARCH64_CS;
3542 case LTU: return AARCH64_HI;
3543 default: gcc_unreachable ();
3545 break;
3547 case CC_NZmode:
3548 switch (comp_code)
3550 case NE: return AARCH64_NE;
3551 case EQ: return AARCH64_EQ;
3552 case GE: return AARCH64_PL;
3553 case LT: return AARCH64_MI;
3554 default: gcc_unreachable ();
3556 break;
3558 case CC_Zmode:
3559 switch (comp_code)
3561 case NE: return AARCH64_NE;
3562 case EQ: return AARCH64_EQ;
3563 default: gcc_unreachable ();
3565 break;
3567 default:
3568 gcc_unreachable ();
3569 break;
3573 static unsigned
3574 bit_count (unsigned HOST_WIDE_INT value)
3576 unsigned count = 0;
3578 while (value)
3580 count++;
3581 value &= value - 1;
3584 return count;
3587 void
3588 aarch64_print_operand (FILE *f, rtx x, char code)
3590 switch (code)
3592 /* An integer or symbol address without a preceding # sign. */
3593 case 'c':
3594 switch (GET_CODE (x))
3596 case CONST_INT:
3597 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3598 break;
3600 case SYMBOL_REF:
3601 output_addr_const (f, x);
3602 break;
3604 case CONST:
3605 if (GET_CODE (XEXP (x, 0)) == PLUS
3606 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3608 output_addr_const (f, x);
3609 break;
3611 /* Fall through. */
3613 default:
3614 output_operand_lossage ("Unsupported operand for code '%c'", code);
3616 break;
3618 case 'e':
3619 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3621 int n;
3623 if (GET_CODE (x) != CONST_INT
3624 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3626 output_operand_lossage ("invalid operand for '%%%c'", code);
3627 return;
3630 switch (n)
3632 case 3:
3633 fputc ('b', f);
3634 break;
3635 case 4:
3636 fputc ('h', f);
3637 break;
3638 case 5:
3639 fputc ('w', f);
3640 break;
3641 default:
3642 output_operand_lossage ("invalid operand for '%%%c'", code);
3643 return;
3646 break;
3648 case 'p':
3650 int n;
3652 /* Print N such that 2^N == X. */
3653 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3655 output_operand_lossage ("invalid operand for '%%%c'", code);
3656 return;
3659 asm_fprintf (f, "%d", n);
3661 break;
3663 case 'P':
3664 /* Print the number of non-zero bits in X (a const_int). */
3665 if (GET_CODE (x) != CONST_INT)
3667 output_operand_lossage ("invalid operand for '%%%c'", code);
3668 return;
3671 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3672 break;
3674 case 'H':
3675 /* Print the higher numbered register of a pair (TImode) of regs. */
3676 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3678 output_operand_lossage ("invalid operand for '%%%c'", code);
3679 return;
3682 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3683 break;
3685 case 'm':
3686 /* Print a condition (eq, ne, etc). */
3688 /* CONST_TRUE_RTX means always -- that's the default. */
3689 if (x == const_true_rtx)
3690 return;
3692 if (!COMPARISON_P (x))
3694 output_operand_lossage ("invalid operand for '%%%c'", code);
3695 return;
3698 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3699 break;
3701 case 'M':
3702 /* Print the inverse of a condition (eq <-> ne, etc). */
3704 /* CONST_TRUE_RTX means never -- that's the default. */
3705 if (x == const_true_rtx)
3707 fputs ("nv", f);
3708 return;
3711 if (!COMPARISON_P (x))
3713 output_operand_lossage ("invalid operand for '%%%c'", code);
3714 return;
3717 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3718 (aarch64_get_condition_code (x))], f);
3719 break;
3721 case 'b':
3722 case 'h':
3723 case 's':
3724 case 'd':
3725 case 'q':
3726 /* Print a scalar FP/SIMD register name. */
3727 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3729 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3730 return;
3732 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3733 break;
3735 case 'S':
3736 case 'T':
3737 case 'U':
3738 case 'V':
3739 /* Print the first FP/SIMD register name in a list. */
3740 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3742 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3743 return;
3745 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3746 break;
3748 case 'X':
3749 /* Print bottom 16 bits of integer constant in hex. */
3750 if (GET_CODE (x) != CONST_INT)
3752 output_operand_lossage ("invalid operand for '%%%c'", code);
3753 return;
3755 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3756 break;
3758 case 'w':
3759 case 'x':
3760 /* Print a general register name or the zero register (32-bit or
3761 64-bit). */
3762 if (x == const0_rtx
3763 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3765 asm_fprintf (f, "%czr", code);
3766 break;
3769 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3771 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3772 break;
3775 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3777 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3778 break;
3781 /* Fall through */
3783 case 0:
3784 /* Print a normal operand, if it's a general register, then we
3785 assume DImode. */
3786 if (x == NULL)
3788 output_operand_lossage ("missing operand");
3789 return;
3792 switch (GET_CODE (x))
3794 case REG:
3795 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3796 break;
3798 case MEM:
3799 aarch64_memory_reference_mode = GET_MODE (x);
3800 output_address (XEXP (x, 0));
3801 break;
3803 case LABEL_REF:
3804 case SYMBOL_REF:
3805 output_addr_const (asm_out_file, x);
3806 break;
3808 case CONST_INT:
3809 asm_fprintf (f, "%wd", INTVAL (x));
3810 break;
3812 case CONST_VECTOR:
3813 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3815 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3816 HOST_WIDE_INT_MIN,
3817 HOST_WIDE_INT_MAX));
3818 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3820 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3822 fputc ('0', f);
3824 else
3825 gcc_unreachable ();
3826 break;
3828 case CONST_DOUBLE:
3829 /* CONST_DOUBLE can represent a double-width integer.
3830 In this case, the mode of x is VOIDmode. */
3831 if (GET_MODE (x) == VOIDmode)
3832 ; /* Do Nothing. */
3833 else if (aarch64_float_const_zero_rtx_p (x))
3835 fputc ('0', f);
3836 break;
3838 else if (aarch64_float_const_representable_p (x))
3840 #define buf_size 20
3841 char float_buf[buf_size] = {'\0'};
3842 REAL_VALUE_TYPE r;
3843 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3844 real_to_decimal_for_mode (float_buf, &r,
3845 buf_size, buf_size,
3846 1, GET_MODE (x));
3847 asm_fprintf (asm_out_file, "%s", float_buf);
3848 break;
3849 #undef buf_size
3851 output_operand_lossage ("invalid constant");
3852 return;
3853 default:
3854 output_operand_lossage ("invalid operand");
3855 return;
3857 break;
3859 case 'A':
3860 if (GET_CODE (x) == HIGH)
3861 x = XEXP (x, 0);
3863 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3865 case SYMBOL_SMALL_GOT:
3866 asm_fprintf (asm_out_file, ":got:");
3867 break;
3869 case SYMBOL_SMALL_TLSGD:
3870 asm_fprintf (asm_out_file, ":tlsgd:");
3871 break;
3873 case SYMBOL_SMALL_TLSDESC:
3874 asm_fprintf (asm_out_file, ":tlsdesc:");
3875 break;
3877 case SYMBOL_SMALL_GOTTPREL:
3878 asm_fprintf (asm_out_file, ":gottprel:");
3879 break;
3881 case SYMBOL_SMALL_TPREL:
3882 asm_fprintf (asm_out_file, ":tprel:");
3883 break;
3885 case SYMBOL_TINY_GOT:
3886 gcc_unreachable ();
3887 break;
3889 default:
3890 break;
3892 output_addr_const (asm_out_file, x);
3893 break;
3895 case 'L':
3896 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3898 case SYMBOL_SMALL_GOT:
3899 asm_fprintf (asm_out_file, ":lo12:");
3900 break;
3902 case SYMBOL_SMALL_TLSGD:
3903 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3904 break;
3906 case SYMBOL_SMALL_TLSDESC:
3907 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3908 break;
3910 case SYMBOL_SMALL_GOTTPREL:
3911 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3912 break;
3914 case SYMBOL_SMALL_TPREL:
3915 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3916 break;
3918 case SYMBOL_TINY_GOT:
3919 asm_fprintf (asm_out_file, ":got:");
3920 break;
3922 default:
3923 break;
3925 output_addr_const (asm_out_file, x);
3926 break;
3928 case 'G':
3930 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3932 case SYMBOL_SMALL_TPREL:
3933 asm_fprintf (asm_out_file, ":tprel_hi12:");
3934 break;
3935 default:
3936 break;
3938 output_addr_const (asm_out_file, x);
3939 break;
3941 default:
3942 output_operand_lossage ("invalid operand prefix '%%%c'", code);
3943 return;
3947 void
3948 aarch64_print_operand_address (FILE *f, rtx x)
3950 struct aarch64_address_info addr;
3952 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
3953 MEM, true))
3954 switch (addr.type)
3956 case ADDRESS_REG_IMM:
3957 if (addr.offset == const0_rtx)
3958 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
3959 else
3960 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
3961 INTVAL (addr.offset));
3962 return;
3964 case ADDRESS_REG_REG:
3965 if (addr.shift == 0)
3966 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
3967 reg_names [REGNO (addr.offset)]);
3968 else
3969 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
3970 reg_names [REGNO (addr.offset)], addr.shift);
3971 return;
3973 case ADDRESS_REG_UXTW:
3974 if (addr.shift == 0)
3975 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
3976 REGNO (addr.offset) - R0_REGNUM);
3977 else
3978 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
3979 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3980 return;
3982 case ADDRESS_REG_SXTW:
3983 if (addr.shift == 0)
3984 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
3985 REGNO (addr.offset) - R0_REGNUM);
3986 else
3987 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
3988 REGNO (addr.offset) - R0_REGNUM, addr.shift);
3989 return;
3991 case ADDRESS_REG_WB:
3992 switch (GET_CODE (x))
3994 case PRE_INC:
3995 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
3996 GET_MODE_SIZE (aarch64_memory_reference_mode));
3997 return;
3998 case POST_INC:
3999 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4000 GET_MODE_SIZE (aarch64_memory_reference_mode));
4001 return;
4002 case PRE_DEC:
4003 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4004 GET_MODE_SIZE (aarch64_memory_reference_mode));
4005 return;
4006 case POST_DEC:
4007 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4008 GET_MODE_SIZE (aarch64_memory_reference_mode));
4009 return;
4010 case PRE_MODIFY:
4011 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4012 INTVAL (addr.offset));
4013 return;
4014 case POST_MODIFY:
4015 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4016 INTVAL (addr.offset));
4017 return;
4018 default:
4019 break;
4021 break;
4023 case ADDRESS_LO_SUM:
4024 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4025 output_addr_const (f, addr.offset);
4026 asm_fprintf (f, "]");
4027 return;
4029 case ADDRESS_SYMBOLIC:
4030 break;
4033 output_addr_const (f, x);
4036 bool
4037 aarch64_label_mentioned_p (rtx x)
4039 const char *fmt;
4040 int i;
4042 if (GET_CODE (x) == LABEL_REF)
4043 return true;
4045 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4046 referencing instruction, but they are constant offsets, not
4047 symbols. */
4048 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4049 return false;
4051 fmt = GET_RTX_FORMAT (GET_CODE (x));
4052 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4054 if (fmt[i] == 'E')
4056 int j;
4058 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4059 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4060 return 1;
4062 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4063 return 1;
4066 return 0;
4069 /* Implement REGNO_REG_CLASS. */
4071 enum reg_class
4072 aarch64_regno_regclass (unsigned regno)
4074 if (GP_REGNUM_P (regno))
4075 return CORE_REGS;
4077 if (regno == SP_REGNUM)
4078 return STACK_REG;
4080 if (regno == FRAME_POINTER_REGNUM
4081 || regno == ARG_POINTER_REGNUM)
4082 return POINTER_REGS;
4084 if (FP_REGNUM_P (regno))
4085 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4087 return NO_REGS;
4090 /* Try a machine-dependent way of reloading an illegitimate address
4091 operand. If we find one, push the reload and return the new rtx. */
4094 aarch64_legitimize_reload_address (rtx *x_p,
4095 enum machine_mode mode,
4096 int opnum, int type,
4097 int ind_levels ATTRIBUTE_UNUSED)
4099 rtx x = *x_p;
4101 /* Do not allow mem (plus (reg, const)) if vector mode. */
4102 if (aarch64_vector_mode_p (mode)
4103 && GET_CODE (x) == PLUS
4104 && REG_P (XEXP (x, 0))
4105 && CONST_INT_P (XEXP (x, 1)))
4107 rtx orig_rtx = x;
4108 x = copy_rtx (x);
4109 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4110 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4111 opnum, (enum reload_type) type);
4112 return x;
4115 /* We must recognize output that we have already generated ourselves. */
4116 if (GET_CODE (x) == PLUS
4117 && GET_CODE (XEXP (x, 0)) == PLUS
4118 && REG_P (XEXP (XEXP (x, 0), 0))
4119 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4120 && CONST_INT_P (XEXP (x, 1)))
4122 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4123 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4124 opnum, (enum reload_type) type);
4125 return x;
4128 /* We wish to handle large displacements off a base register by splitting
4129 the addend across an add and the mem insn. This can cut the number of
4130 extra insns needed from 3 to 1. It is only useful for load/store of a
4131 single register with 12 bit offset field. */
4132 if (GET_CODE (x) == PLUS
4133 && REG_P (XEXP (x, 0))
4134 && CONST_INT_P (XEXP (x, 1))
4135 && HARD_REGISTER_P (XEXP (x, 0))
4136 && mode != TImode
4137 && mode != TFmode
4138 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4140 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4141 HOST_WIDE_INT low = val & 0xfff;
4142 HOST_WIDE_INT high = val - low;
4143 HOST_WIDE_INT offs;
4144 rtx cst;
4145 enum machine_mode xmode = GET_MODE (x);
4147 /* In ILP32, xmode can be either DImode or SImode. */
4148 gcc_assert (xmode == DImode || xmode == SImode);
4150 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4151 BLKmode alignment. */
4152 if (GET_MODE_SIZE (mode) == 0)
4153 return NULL_RTX;
4155 offs = low % GET_MODE_SIZE (mode);
4157 /* Align misaligned offset by adjusting high part to compensate. */
4158 if (offs != 0)
4160 if (aarch64_uimm12_shift (high + offs))
4162 /* Align down. */
4163 low = low - offs;
4164 high = high + offs;
4166 else
4168 /* Align up. */
4169 offs = GET_MODE_SIZE (mode) - offs;
4170 low = low + offs;
4171 high = high + (low & 0x1000) - offs;
4172 low &= 0xfff;
4176 /* Check for overflow. */
4177 if (high + low != val)
4178 return NULL_RTX;
4180 cst = GEN_INT (high);
4181 if (!aarch64_uimm12_shift (high))
4182 cst = force_const_mem (xmode, cst);
4184 /* Reload high part into base reg, leaving the low part
4185 in the mem instruction.
4186 Note that replacing this gen_rtx_PLUS with plus_constant is
4187 wrong in this case because we rely on the
4188 (plus (plus reg c1) c2) structure being preserved so that
4189 XEXP (*p, 0) in push_reload below uses the correct term. */
4190 x = gen_rtx_PLUS (xmode,
4191 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4192 GEN_INT (low));
4194 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4195 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4196 opnum, (enum reload_type) type);
4197 return x;
4200 return NULL_RTX;
4204 static reg_class_t
4205 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4206 reg_class_t rclass,
4207 enum machine_mode mode,
4208 secondary_reload_info *sri)
4210 /* Without the TARGET_SIMD instructions we cannot move a Q register
4211 to a Q register directly. We need a scratch. */
4212 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4213 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4214 && reg_class_subset_p (rclass, FP_REGS))
4216 if (mode == TFmode)
4217 sri->icode = CODE_FOR_aarch64_reload_movtf;
4218 else if (mode == TImode)
4219 sri->icode = CODE_FOR_aarch64_reload_movti;
4220 return NO_REGS;
4223 /* A TFmode or TImode memory access should be handled via an FP_REGS
4224 because AArch64 has richer addressing modes for LDR/STR instructions
4225 than LDP/STP instructions. */
4226 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4227 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4228 return FP_REGS;
4230 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4231 return CORE_REGS;
4233 return NO_REGS;
4236 static bool
4237 aarch64_can_eliminate (const int from, const int to)
4239 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4240 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4242 if (frame_pointer_needed)
4244 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4245 return true;
4246 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4247 return false;
4248 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4249 && !cfun->calls_alloca)
4250 return true;
4251 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4252 return true;
4254 return false;
4257 return true;
4260 HOST_WIDE_INT
4261 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4263 HOST_WIDE_INT frame_size;
4264 HOST_WIDE_INT offset;
4266 aarch64_layout_frame ();
4267 frame_size = (get_frame_size () + cfun->machine->frame.saved_regs_size
4268 + crtl->outgoing_args_size
4269 + cfun->machine->saved_varargs_size);
4271 frame_size = AARCH64_ROUND_UP (frame_size, STACK_BOUNDARY / BITS_PER_UNIT);
4272 offset = frame_size;
4274 if (to == HARD_FRAME_POINTER_REGNUM)
4276 if (from == ARG_POINTER_REGNUM)
4277 return offset - crtl->outgoing_args_size;
4279 if (from == FRAME_POINTER_REGNUM)
4280 return cfun->machine->frame.saved_regs_size + get_frame_size ();
4283 if (to == STACK_POINTER_REGNUM)
4285 if (from == FRAME_POINTER_REGNUM)
4287 HOST_WIDE_INT elim = crtl->outgoing_args_size
4288 + cfun->machine->frame.saved_regs_size
4289 + get_frame_size ();
4290 elim = AARCH64_ROUND_UP (elim, STACK_BOUNDARY / BITS_PER_UNIT);
4291 return elim;
4295 return offset;
4299 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4300 previous frame. */
4303 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4305 if (count != 0)
4306 return const0_rtx;
4307 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4311 static void
4312 aarch64_asm_trampoline_template (FILE *f)
4314 if (TARGET_ILP32)
4316 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4317 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4319 else
4321 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4322 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4324 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4325 assemble_aligned_integer (4, const0_rtx);
4326 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4327 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4330 static void
4331 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4333 rtx fnaddr, mem, a_tramp;
4334 const int tramp_code_sz = 16;
4336 /* Don't need to copy the trailing D-words, we fill those in below. */
4337 emit_block_move (m_tramp, assemble_trampoline_template (),
4338 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4339 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4340 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4341 if (GET_MODE (fnaddr) != ptr_mode)
4342 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4343 emit_move_insn (mem, fnaddr);
4345 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4346 emit_move_insn (mem, chain_value);
4348 /* XXX We should really define a "clear_cache" pattern and use
4349 gen_clear_cache(). */
4350 a_tramp = XEXP (m_tramp, 0);
4351 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4352 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4353 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4354 ptr_mode);
4357 static unsigned char
4358 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4360 switch (regclass)
4362 case CORE_REGS:
4363 case POINTER_REGS:
4364 case GENERAL_REGS:
4365 case ALL_REGS:
4366 case FP_REGS:
4367 case FP_LO_REGS:
4368 return
4369 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4370 (GET_MODE_SIZE (mode) + 7) / 8;
4371 case STACK_REG:
4372 return 1;
4374 case NO_REGS:
4375 return 0;
4377 default:
4378 break;
4380 gcc_unreachable ();
4383 static reg_class_t
4384 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4386 if (regclass == POINTER_REGS)
4387 return GENERAL_REGS;
4389 if (regclass == STACK_REG)
4391 if (REG_P(x)
4392 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4393 return regclass;
4395 return NO_REGS;
4398 /* If it's an integer immediate that MOVI can't handle, then
4399 FP_REGS is not an option, so we return NO_REGS instead. */
4400 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4401 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4402 return NO_REGS;
4404 /* Register eliminiation can result in a request for
4405 SP+constant->FP_REGS. We cannot support such operations which
4406 use SP as source and an FP_REG as destination, so reject out
4407 right now. */
4408 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4410 rtx lhs = XEXP (x, 0);
4412 /* Look through a possible SUBREG introduced by ILP32. */
4413 if (GET_CODE (lhs) == SUBREG)
4414 lhs = SUBREG_REG (lhs);
4416 gcc_assert (REG_P (lhs));
4417 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4418 POINTER_REGS));
4419 return NO_REGS;
4422 return regclass;
4425 void
4426 aarch64_asm_output_labelref (FILE* f, const char *name)
4428 asm_fprintf (f, "%U%s", name);
4431 static void
4432 aarch64_elf_asm_constructor (rtx symbol, int priority)
4434 if (priority == DEFAULT_INIT_PRIORITY)
4435 default_ctor_section_asm_out_constructor (symbol, priority);
4436 else
4438 section *s;
4439 char buf[18];
4440 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4441 s = get_section (buf, SECTION_WRITE, NULL);
4442 switch_to_section (s);
4443 assemble_align (POINTER_SIZE);
4444 assemble_aligned_integer (POINTER_BYTES, symbol);
4448 static void
4449 aarch64_elf_asm_destructor (rtx symbol, int priority)
4451 if (priority == DEFAULT_INIT_PRIORITY)
4452 default_dtor_section_asm_out_destructor (symbol, priority);
4453 else
4455 section *s;
4456 char buf[18];
4457 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4458 s = get_section (buf, SECTION_WRITE, NULL);
4459 switch_to_section (s);
4460 assemble_align (POINTER_SIZE);
4461 assemble_aligned_integer (POINTER_BYTES, symbol);
4465 const char*
4466 aarch64_output_casesi (rtx *operands)
4468 char buf[100];
4469 char label[100];
4470 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4471 int index;
4472 static const char *const patterns[4][2] =
4475 "ldrb\t%w3, [%0,%w1,uxtw]",
4476 "add\t%3, %4, %w3, sxtb #2"
4479 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4480 "add\t%3, %4, %w3, sxth #2"
4483 "ldr\t%w3, [%0,%w1,uxtw #2]",
4484 "add\t%3, %4, %w3, sxtw #2"
4486 /* We assume that DImode is only generated when not optimizing and
4487 that we don't really need 64-bit address offsets. That would
4488 imply an object file with 8GB of code in a single function! */
4490 "ldr\t%w3, [%0,%w1,uxtw #2]",
4491 "add\t%3, %4, %w3, sxtw #2"
4495 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4497 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4499 gcc_assert (index >= 0 && index <= 3);
4501 /* Need to implement table size reduction, by chaning the code below. */
4502 output_asm_insn (patterns[index][0], operands);
4503 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4504 snprintf (buf, sizeof (buf),
4505 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4506 output_asm_insn (buf, operands);
4507 output_asm_insn (patterns[index][1], operands);
4508 output_asm_insn ("br\t%3", operands);
4509 assemble_label (asm_out_file, label);
4510 return "";
4514 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4515 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4516 operator. */
4519 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4521 if (shift >= 0 && shift <= 3)
4523 int size;
4524 for (size = 8; size <= 32; size *= 2)
4526 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4527 if (mask == bits << shift)
4528 return size;
4531 return 0;
4534 static bool
4535 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4536 const_rtx x ATTRIBUTE_UNUSED)
4538 /* We can't use blocks for constants when we're using a per-function
4539 constant pool. */
4540 return false;
4543 static section *
4544 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4545 rtx x ATTRIBUTE_UNUSED,
4546 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4548 /* Force all constant pool entries into the current function section. */
4549 return function_section (current_function_decl);
4553 /* Costs. */
4555 /* Helper function for rtx cost calculation. Strip a shift expression
4556 from X. Returns the inner operand if successful, or the original
4557 expression on failure. */
4558 static rtx
4559 aarch64_strip_shift (rtx x)
4561 rtx op = x;
4563 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4564 we can convert both to ROR during final output. */
4565 if ((GET_CODE (op) == ASHIFT
4566 || GET_CODE (op) == ASHIFTRT
4567 || GET_CODE (op) == LSHIFTRT
4568 || GET_CODE (op) == ROTATERT
4569 || GET_CODE (op) == ROTATE)
4570 && CONST_INT_P (XEXP (op, 1)))
4571 return XEXP (op, 0);
4573 if (GET_CODE (op) == MULT
4574 && CONST_INT_P (XEXP (op, 1))
4575 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4576 return XEXP (op, 0);
4578 return x;
4581 /* Helper function for rtx cost calculation. Strip an extend
4582 expression from X. Returns the inner operand if successful, or the
4583 original expression on failure. We deal with a number of possible
4584 canonicalization variations here. */
4585 static rtx
4586 aarch64_strip_extend (rtx x)
4588 rtx op = x;
4590 /* Zero and sign extraction of a widened value. */
4591 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4592 && XEXP (op, 2) == const0_rtx
4593 && GET_CODE (XEXP (op, 0)) == MULT
4594 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4595 XEXP (op, 1)))
4596 return XEXP (XEXP (op, 0), 0);
4598 /* It can also be represented (for zero-extend) as an AND with an
4599 immediate. */
4600 if (GET_CODE (op) == AND
4601 && GET_CODE (XEXP (op, 0)) == MULT
4602 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4603 && CONST_INT_P (XEXP (op, 1))
4604 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4605 INTVAL (XEXP (op, 1))) != 0)
4606 return XEXP (XEXP (op, 0), 0);
4608 /* Now handle extended register, as this may also have an optional
4609 left shift by 1..4. */
4610 if (GET_CODE (op) == ASHIFT
4611 && CONST_INT_P (XEXP (op, 1))
4612 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4613 op = XEXP (op, 0);
4615 if (GET_CODE (op) == ZERO_EXTEND
4616 || GET_CODE (op) == SIGN_EXTEND)
4617 op = XEXP (op, 0);
4619 if (op != x)
4620 return op;
4622 return x;
4625 /* Helper function for rtx cost calculation. Calculate the cost of
4626 a MULT, which may be part of a multiply-accumulate rtx. Return
4627 the calculated cost of the expression, recursing manually in to
4628 operands where needed. */
4630 static int
4631 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4633 rtx op0, op1;
4634 const struct cpu_cost_table *extra_cost
4635 = aarch64_tune_params->insn_extra_cost;
4636 int cost = 0;
4637 bool maybe_fma = (outer == PLUS || outer == MINUS);
4638 enum machine_mode mode = GET_MODE (x);
4640 gcc_checking_assert (code == MULT);
4642 op0 = XEXP (x, 0);
4643 op1 = XEXP (x, 1);
4645 if (VECTOR_MODE_P (mode))
4646 mode = GET_MODE_INNER (mode);
4648 /* Integer multiply/fma. */
4649 if (GET_MODE_CLASS (mode) == MODE_INT)
4651 /* The multiply will be canonicalized as a shift, cost it as such. */
4652 if (CONST_INT_P (op1)
4653 && exact_log2 (INTVAL (op1)) > 0)
4655 if (speed)
4657 if (maybe_fma)
4658 /* ADD (shifted register). */
4659 cost += extra_cost->alu.arith_shift;
4660 else
4661 /* LSL (immediate). */
4662 cost += extra_cost->alu.shift;
4665 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4667 return cost;
4670 /* Integer multiplies or FMAs have zero/sign extending variants. */
4671 if ((GET_CODE (op0) == ZERO_EXTEND
4672 && GET_CODE (op1) == ZERO_EXTEND)
4673 || (GET_CODE (op0) == SIGN_EXTEND
4674 && GET_CODE (op1) == SIGN_EXTEND))
4676 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4677 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4679 if (speed)
4681 if (maybe_fma)
4682 /* MADD/SMADDL/UMADDL. */
4683 cost += extra_cost->mult[0].extend_add;
4684 else
4685 /* MUL/SMULL/UMULL. */
4686 cost += extra_cost->mult[0].extend;
4689 return cost;
4692 /* This is either an integer multiply or an FMA. In both cases
4693 we want to recurse and cost the operands. */
4694 cost += rtx_cost (op0, MULT, 0, speed)
4695 + rtx_cost (op1, MULT, 1, speed);
4697 if (speed)
4699 if (maybe_fma)
4700 /* MADD. */
4701 cost += extra_cost->mult[mode == DImode].add;
4702 else
4703 /* MUL. */
4704 cost += extra_cost->mult[mode == DImode].simple;
4707 return cost;
4709 else
4711 if (speed)
4713 /* Floating-point FMA can also support negations of the
4714 operands. */
4715 if (GET_CODE (op0) == NEG)
4717 maybe_fma = true;
4718 op0 = XEXP (op0, 0);
4720 if (GET_CODE (op1) == NEG)
4722 maybe_fma = true;
4723 op1 = XEXP (op1, 0);
4726 if (maybe_fma)
4727 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4728 cost += extra_cost->fp[mode == DFmode].fma;
4729 else
4730 /* FMUL. */
4731 cost += extra_cost->fp[mode == DFmode].mult;
4734 cost += rtx_cost (op0, MULT, 0, speed)
4735 + rtx_cost (op1, MULT, 1, speed);
4736 return cost;
4740 static int
4741 aarch64_address_cost (rtx x,
4742 enum machine_mode mode,
4743 addr_space_t as ATTRIBUTE_UNUSED,
4744 bool speed)
4746 enum rtx_code c = GET_CODE (x);
4747 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4748 struct aarch64_address_info info;
4749 int cost = 0;
4750 info.shift = 0;
4752 if (!aarch64_classify_address (&info, x, mode, c, false))
4754 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4756 /* This is a CONST or SYMBOL ref which will be split
4757 in a different way depending on the code model in use.
4758 Cost it through the generic infrastructure. */
4759 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4760 /* Divide through by the cost of one instruction to
4761 bring it to the same units as the address costs. */
4762 cost_symbol_ref /= COSTS_N_INSNS (1);
4763 /* The cost is then the cost of preparing the address,
4764 followed by an immediate (possibly 0) offset. */
4765 return cost_symbol_ref + addr_cost->imm_offset;
4767 else
4769 /* This is most likely a jump table from a case
4770 statement. */
4771 return addr_cost->register_offset;
4775 switch (info.type)
4777 case ADDRESS_LO_SUM:
4778 case ADDRESS_SYMBOLIC:
4779 case ADDRESS_REG_IMM:
4780 cost += addr_cost->imm_offset;
4781 break;
4783 case ADDRESS_REG_WB:
4784 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4785 cost += addr_cost->pre_modify;
4786 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4787 cost += addr_cost->post_modify;
4788 else
4789 gcc_unreachable ();
4791 break;
4793 case ADDRESS_REG_REG:
4794 cost += addr_cost->register_offset;
4795 break;
4797 case ADDRESS_REG_UXTW:
4798 case ADDRESS_REG_SXTW:
4799 cost += addr_cost->register_extend;
4800 break;
4802 default:
4803 gcc_unreachable ();
4807 if (info.shift > 0)
4809 /* For the sake of calculating the cost of the shifted register
4810 component, we can treat same sized modes in the same way. */
4811 switch (GET_MODE_BITSIZE (mode))
4813 case 16:
4814 cost += addr_cost->addr_scale_costs.hi;
4815 break;
4817 case 32:
4818 cost += addr_cost->addr_scale_costs.si;
4819 break;
4821 case 64:
4822 cost += addr_cost->addr_scale_costs.di;
4823 break;
4825 /* We can't tell, or this is a 128-bit vector. */
4826 default:
4827 cost += addr_cost->addr_scale_costs.ti;
4828 break;
4832 return cost;
4835 /* Calculate the cost of calculating X, storing it in *COST. Result
4836 is true if the total cost of the operation has now been calculated. */
4837 static bool
4838 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
4839 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
4841 rtx op0, op1;
4842 const struct cpu_cost_table *extra_cost
4843 = aarch64_tune_params->insn_extra_cost;
4844 enum machine_mode mode = GET_MODE (x);
4846 /* By default, assume that everything has equivalent cost to the
4847 cheapest instruction. Any additional costs are applied as a delta
4848 above this default. */
4849 *cost = COSTS_N_INSNS (1);
4851 /* TODO: The cost infrastructure currently does not handle
4852 vector operations. Assume that all vector operations
4853 are equally expensive. */
4854 if (VECTOR_MODE_P (mode))
4856 if (speed)
4857 *cost += extra_cost->vect.alu;
4858 return true;
4861 switch (code)
4863 case SET:
4864 /* The cost depends entirely on the operands to SET. */
4865 *cost = 0;
4866 op0 = SET_DEST (x);
4867 op1 = SET_SRC (x);
4869 switch (GET_CODE (op0))
4871 case MEM:
4872 if (speed)
4873 *cost += extra_cost->ldst.store;
4875 *cost += rtx_cost (op1, SET, 1, speed);
4876 return true;
4878 case SUBREG:
4879 if (! REG_P (SUBREG_REG (op0)))
4880 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
4882 /* Fall through. */
4883 case REG:
4884 /* const0_rtx is in general free, but we will use an
4885 instruction to set a register to 0. */
4886 if (REG_P (op1) || op1 == const0_rtx)
4888 /* The cost is 1 per register copied. */
4889 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
4890 / UNITS_PER_WORD;
4891 *cost = COSTS_N_INSNS (n_minus_1 + 1);
4893 else
4894 /* Cost is just the cost of the RHS of the set. */
4895 *cost += rtx_cost (op1, SET, 1, speed);
4896 return true;
4898 case ZERO_EXTRACT:
4899 case SIGN_EXTRACT:
4900 /* Bit-field insertion. Strip any redundant widening of
4901 the RHS to meet the width of the target. */
4902 if (GET_CODE (op1) == SUBREG)
4903 op1 = SUBREG_REG (op1);
4904 if ((GET_CODE (op1) == ZERO_EXTEND
4905 || GET_CODE (op1) == SIGN_EXTEND)
4906 && GET_CODE (XEXP (op0, 1)) == CONST_INT
4907 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
4908 >= INTVAL (XEXP (op0, 1))))
4909 op1 = XEXP (op1, 0);
4911 if (CONST_INT_P (op1))
4913 /* MOV immediate is assumed to always be cheap. */
4914 *cost = COSTS_N_INSNS (1);
4916 else
4918 /* BFM. */
4919 if (speed)
4920 *cost += extra_cost->alu.bfi;
4921 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
4924 return true;
4926 default:
4927 /* We can't make sense of this, assume default cost. */
4928 *cost = COSTS_N_INSNS (1);
4929 break;
4931 return false;
4933 case CONST_INT:
4934 /* If an instruction can incorporate a constant within the
4935 instruction, the instruction's expression avoids calling
4936 rtx_cost() on the constant. If rtx_cost() is called on a
4937 constant, then it is usually because the constant must be
4938 moved into a register by one or more instructions.
4940 The exception is constant 0, which can be expressed
4941 as XZR/WZR and is therefore free. The exception to this is
4942 if we have (set (reg) (const0_rtx)) in which case we must cost
4943 the move. However, we can catch that when we cost the SET, so
4944 we don't need to consider that here. */
4945 if (x == const0_rtx)
4946 *cost = 0;
4947 else
4949 /* To an approximation, building any other constant is
4950 proportionally expensive to the number of instructions
4951 required to build that constant. This is true whether we
4952 are compiling for SPEED or otherwise. */
4953 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
4954 INTVAL (x),
4955 false));
4957 return true;
4959 case CONST_DOUBLE:
4960 if (speed)
4962 /* mov[df,sf]_aarch64. */
4963 if (aarch64_float_const_representable_p (x))
4964 /* FMOV (scalar immediate). */
4965 *cost += extra_cost->fp[mode == DFmode].fpconst;
4966 else if (!aarch64_float_const_zero_rtx_p (x))
4968 /* This will be a load from memory. */
4969 if (mode == DFmode)
4970 *cost += extra_cost->ldst.loadd;
4971 else
4972 *cost += extra_cost->ldst.loadf;
4974 else
4975 /* Otherwise this is +0.0. We get this using MOVI d0, #0
4976 or MOV v0.s[0], wzr - neither of which are modeled by the
4977 cost tables. Just use the default cost. */
4982 return true;
4984 case MEM:
4985 if (speed)
4986 *cost += extra_cost->ldst.load;
4988 return true;
4990 case NEG:
4991 op0 = XEXP (x, 0);
4993 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
4995 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
4996 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
4998 /* CSETM. */
4999 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5000 return true;
5003 /* Cost this as SUB wzr, X. */
5004 op0 = CONST0_RTX (GET_MODE (x));
5005 op1 = XEXP (x, 0);
5006 goto cost_minus;
5009 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5011 /* Support (neg(fma...)) as a single instruction only if
5012 sign of zeros is unimportant. This matches the decision
5013 making in aarch64.md. */
5014 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5016 /* FNMADD. */
5017 *cost = rtx_cost (op0, NEG, 0, speed);
5018 return true;
5020 if (speed)
5021 /* FNEG. */
5022 *cost += extra_cost->fp[mode == DFmode].neg;
5023 return false;
5026 return false;
5028 case COMPARE:
5029 op0 = XEXP (x, 0);
5030 op1 = XEXP (x, 1);
5032 if (op1 == const0_rtx
5033 && GET_CODE (op0) == AND)
5035 x = op0;
5036 goto cost_logic;
5039 /* Comparisons can work if the order is swapped.
5040 Canonicalization puts the more complex operation first, but
5041 we want it in op1. */
5042 if (! (REG_P (op0)
5043 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5045 op0 = XEXP (x, 1);
5046 op1 = XEXP (x, 0);
5048 goto cost_minus;
5050 case MINUS:
5052 op0 = XEXP (x, 0);
5053 op1 = XEXP (x, 1);
5055 cost_minus:
5056 /* Detect valid immediates. */
5057 if ((GET_MODE_CLASS (mode) == MODE_INT
5058 || (GET_MODE_CLASS (mode) == MODE_CC
5059 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5060 && CONST_INT_P (op1)
5061 && aarch64_uimm12_shift (INTVAL (op1)))
5063 *cost += rtx_cost (op0, MINUS, 0, speed);
5065 if (speed)
5066 /* SUB(S) (immediate). */
5067 *cost += extra_cost->alu.arith;
5068 return true;
5072 rtx new_op1 = aarch64_strip_extend (op1);
5074 /* Cost this as an FMA-alike operation. */
5075 if ((GET_CODE (new_op1) == MULT
5076 || GET_CODE (new_op1) == ASHIFT)
5077 && code != COMPARE)
5079 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5080 (enum rtx_code) code,
5081 speed);
5082 *cost += rtx_cost (op0, MINUS, 0, speed);
5083 return true;
5086 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5088 if (speed)
5090 if (GET_MODE_CLASS (mode) == MODE_INT)
5091 /* SUB(S). */
5092 *cost += extra_cost->alu.arith;
5093 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5094 /* FSUB. */
5095 *cost += extra_cost->fp[mode == DFmode].addsub;
5097 return true;
5100 case PLUS:
5102 rtx new_op0;
5104 op0 = XEXP (x, 0);
5105 op1 = XEXP (x, 1);
5107 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5108 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5110 /* CSINC. */
5111 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5112 *cost += rtx_cost (op1, PLUS, 1, speed);
5113 return true;
5116 if (GET_MODE_CLASS (mode) == MODE_INT
5117 && CONST_INT_P (op1)
5118 && aarch64_uimm12_shift (INTVAL (op1)))
5120 *cost += rtx_cost (op0, PLUS, 0, speed);
5122 if (speed)
5123 /* ADD (immediate). */
5124 *cost += extra_cost->alu.arith;
5125 return true;
5128 /* Strip any extend, leave shifts behind as we will
5129 cost them through mult_cost. */
5130 new_op0 = aarch64_strip_extend (op0);
5132 if (GET_CODE (new_op0) == MULT
5133 || GET_CODE (new_op0) == ASHIFT)
5135 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5136 speed);
5137 *cost += rtx_cost (op1, PLUS, 1, speed);
5138 return true;
5141 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5142 + rtx_cost (op1, PLUS, 1, speed));
5144 if (speed)
5146 if (GET_MODE_CLASS (mode) == MODE_INT)
5147 /* ADD. */
5148 *cost += extra_cost->alu.arith;
5149 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5150 /* FADD. */
5151 *cost += extra_cost->fp[mode == DFmode].addsub;
5153 return true;
5156 case BSWAP:
5157 *cost = COSTS_N_INSNS (1);
5159 if (speed)
5160 *cost += extra_cost->alu.rev;
5162 return false;
5164 case IOR:
5165 if (aarch_rev16_p (x))
5167 *cost = COSTS_N_INSNS (1);
5169 if (speed)
5170 *cost += extra_cost->alu.rev;
5172 return true;
5174 /* Fall through. */
5175 case XOR:
5176 case AND:
5177 cost_logic:
5178 op0 = XEXP (x, 0);
5179 op1 = XEXP (x, 1);
5181 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5183 if (CONST_INT_P (op1)
5184 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5186 *cost += rtx_cost (op0, AND, 0, speed);
5188 else
5190 if (GET_CODE (op0) == NOT)
5191 op0 = XEXP (op0, 0);
5192 op0 = aarch64_strip_shift (op0);
5193 *cost += (rtx_cost (op0, AND, 0, speed)
5194 + rtx_cost (op1, AND, 1, speed));
5196 return true;
5198 return false;
5200 case ZERO_EXTEND:
5201 if ((GET_MODE (x) == DImode
5202 && GET_MODE (XEXP (x, 0)) == SImode)
5203 || GET_CODE (XEXP (x, 0)) == MEM)
5205 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5206 return true;
5208 return false;
5210 case SIGN_EXTEND:
5211 if (GET_CODE (XEXP (x, 0)) == MEM)
5213 *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
5214 return true;
5216 return false;
5218 case ROTATE:
5219 if (!CONST_INT_P (XEXP (x, 1)))
5220 *cost += COSTS_N_INSNS (2);
5221 /* Fall through. */
5222 case ROTATERT:
5223 case LSHIFTRT:
5224 case ASHIFT:
5225 case ASHIFTRT:
5227 /* Shifting by a register often takes an extra cycle. */
5228 if (speed && !CONST_INT_P (XEXP (x, 1)))
5229 *cost += extra_cost->alu.arith_shift_reg;
5231 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
5232 return true;
5234 case HIGH:
5235 if (!CONSTANT_P (XEXP (x, 0)))
5236 *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
5237 return true;
5239 case LO_SUM:
5240 if (!CONSTANT_P (XEXP (x, 1)))
5241 *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
5242 *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
5243 return true;
5245 case ZERO_EXTRACT:
5246 case SIGN_EXTRACT:
5247 *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
5248 return true;
5250 case MULT:
5251 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5252 /* aarch64_rtx_mult_cost always handles recursion to its
5253 operands. */
5254 return true;
5256 case MOD:
5257 case UMOD:
5258 *cost = COSTS_N_INSNS (2);
5259 if (speed)
5261 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5262 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5263 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5264 else if (GET_MODE (x) == DFmode)
5265 *cost += (extra_cost->fp[1].mult
5266 + extra_cost->fp[1].div);
5267 else if (GET_MODE (x) == SFmode)
5268 *cost += (extra_cost->fp[0].mult
5269 + extra_cost->fp[0].div);
5271 return false; /* All arguments need to be in registers. */
5273 case DIV:
5274 case UDIV:
5275 *cost = COSTS_N_INSNS (1);
5276 if (speed)
5278 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5279 *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
5280 else if (GET_MODE (x) == DFmode)
5281 *cost += extra_cost->fp[1].div;
5282 else if (GET_MODE (x) == SFmode)
5283 *cost += extra_cost->fp[0].div;
5285 return false; /* All arguments need to be in registers. */
5287 default:
5288 break;
5290 return false;
5293 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5294 calculated for X. This cost is stored in *COST. Returns true
5295 if the total cost of X was calculated. */
5296 static bool
5297 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5298 int param, int *cost, bool speed)
5300 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5302 if (dump_file && (dump_flags & TDF_DETAILS))
5304 print_rtl_single (dump_file, x);
5305 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5306 speed ? "Hot" : "Cold",
5307 *cost, result ? "final" : "partial");
5310 return result;
5313 static int
5314 aarch64_register_move_cost (enum machine_mode mode,
5315 reg_class_t from_i, reg_class_t to_i)
5317 enum reg_class from = (enum reg_class) from_i;
5318 enum reg_class to = (enum reg_class) to_i;
5319 const struct cpu_regmove_cost *regmove_cost
5320 = aarch64_tune_params->regmove_cost;
5322 /* Moving between GPR and stack cost is the same as GP2GP. */
5323 if ((from == GENERAL_REGS && to == STACK_REG)
5324 || (to == GENERAL_REGS && from == STACK_REG))
5325 return regmove_cost->GP2GP;
5327 /* To/From the stack register, we move via the gprs. */
5328 if (to == STACK_REG || from == STACK_REG)
5329 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5330 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5332 if (from == GENERAL_REGS && to == GENERAL_REGS)
5333 return regmove_cost->GP2GP;
5334 else if (from == GENERAL_REGS)
5335 return regmove_cost->GP2FP;
5336 else if (to == GENERAL_REGS)
5337 return regmove_cost->FP2GP;
5339 /* When AdvSIMD instructions are disabled it is not possible to move
5340 a 128-bit value directly between Q registers. This is handled in
5341 secondary reload. A general register is used as a scratch to move
5342 the upper DI value and the lower DI value is moved directly,
5343 hence the cost is the sum of three moves. */
5344 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5345 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5347 return regmove_cost->FP2FP;
5350 static int
5351 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5352 reg_class_t rclass ATTRIBUTE_UNUSED,
5353 bool in ATTRIBUTE_UNUSED)
5355 return aarch64_tune_params->memmov_cost;
5358 /* Return the number of instructions that can be issued per cycle. */
5359 static int
5360 aarch64_sched_issue_rate (void)
5362 return aarch64_tune_params->issue_rate;
5365 /* Vectorizer cost model target hooks. */
5367 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5368 static int
5369 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5370 tree vectype,
5371 int misalign ATTRIBUTE_UNUSED)
5373 unsigned elements;
5375 switch (type_of_cost)
5377 case scalar_stmt:
5378 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
5380 case scalar_load:
5381 return aarch64_tune_params->vec_costs->scalar_load_cost;
5383 case scalar_store:
5384 return aarch64_tune_params->vec_costs->scalar_store_cost;
5386 case vector_stmt:
5387 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5389 case vector_load:
5390 return aarch64_tune_params->vec_costs->vec_align_load_cost;
5392 case vector_store:
5393 return aarch64_tune_params->vec_costs->vec_store_cost;
5395 case vec_to_scalar:
5396 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
5398 case scalar_to_vec:
5399 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
5401 case unaligned_load:
5402 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
5404 case unaligned_store:
5405 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
5407 case cond_branch_taken:
5408 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
5410 case cond_branch_not_taken:
5411 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
5413 case vec_perm:
5414 case vec_promote_demote:
5415 return aarch64_tune_params->vec_costs->vec_stmt_cost;
5417 case vec_construct:
5418 elements = TYPE_VECTOR_SUBPARTS (vectype);
5419 return elements / 2 + 1;
5421 default:
5422 gcc_unreachable ();
5426 /* Implement targetm.vectorize.add_stmt_cost. */
5427 static unsigned
5428 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
5429 struct _stmt_vec_info *stmt_info, int misalign,
5430 enum vect_cost_model_location where)
5432 unsigned *cost = (unsigned *) data;
5433 unsigned retval = 0;
5435 if (flag_vect_cost_model)
5437 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
5438 int stmt_cost =
5439 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
5441 /* Statements in an inner loop relative to the loop being
5442 vectorized are weighted more heavily. The value here is
5443 a function (linear for now) of the loop nest level. */
5444 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
5446 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
5447 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
5448 unsigned nest_level = loop_depth (loop);
5450 count *= nest_level;
5453 retval = (unsigned) (count * stmt_cost);
5454 cost[where] += retval;
5457 return retval;
5460 static void initialize_aarch64_code_model (void);
5462 /* Parse the architecture extension string. */
5464 static void
5465 aarch64_parse_extension (char *str)
5467 /* The extension string is parsed left to right. */
5468 const struct aarch64_option_extension *opt = NULL;
5470 /* Flag to say whether we are adding or removing an extension. */
5471 int adding_ext = -1;
5473 while (str != NULL && *str != 0)
5475 char *ext;
5476 size_t len;
5478 str++;
5479 ext = strchr (str, '+');
5481 if (ext != NULL)
5482 len = ext - str;
5483 else
5484 len = strlen (str);
5486 if (len >= 2 && strncmp (str, "no", 2) == 0)
5488 adding_ext = 0;
5489 len -= 2;
5490 str += 2;
5492 else if (len > 0)
5493 adding_ext = 1;
5495 if (len == 0)
5497 error ("missing feature modifier after %qs", "+no");
5498 return;
5501 /* Scan over the extensions table trying to find an exact match. */
5502 for (opt = all_extensions; opt->name != NULL; opt++)
5504 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
5506 /* Add or remove the extension. */
5507 if (adding_ext)
5508 aarch64_isa_flags |= opt->flags_on;
5509 else
5510 aarch64_isa_flags &= ~(opt->flags_off);
5511 break;
5515 if (opt->name == NULL)
5517 /* Extension not found in list. */
5518 error ("unknown feature modifier %qs", str);
5519 return;
5522 str = ext;
5525 return;
5528 /* Parse the ARCH string. */
5530 static void
5531 aarch64_parse_arch (void)
5533 char *ext;
5534 const struct processor *arch;
5535 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
5536 size_t len;
5538 strcpy (str, aarch64_arch_string);
5540 ext = strchr (str, '+');
5542 if (ext != NULL)
5543 len = ext - str;
5544 else
5545 len = strlen (str);
5547 if (len == 0)
5549 error ("missing arch name in -march=%qs", str);
5550 return;
5553 /* Loop through the list of supported ARCHs to find a match. */
5554 for (arch = all_architectures; arch->name != NULL; arch++)
5556 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
5558 selected_arch = arch;
5559 aarch64_isa_flags = selected_arch->flags;
5561 if (!selected_cpu)
5562 selected_cpu = &all_cores[selected_arch->core];
5564 if (ext != NULL)
5566 /* ARCH string contains at least one extension. */
5567 aarch64_parse_extension (ext);
5570 if (strcmp (selected_arch->arch, selected_cpu->arch))
5572 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
5573 selected_cpu->name, selected_arch->name);
5576 return;
5580 /* ARCH name not found in list. */
5581 error ("unknown value %qs for -march", str);
5582 return;
5585 /* Parse the CPU string. */
5587 static void
5588 aarch64_parse_cpu (void)
5590 char *ext;
5591 const struct processor *cpu;
5592 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
5593 size_t len;
5595 strcpy (str, aarch64_cpu_string);
5597 ext = strchr (str, '+');
5599 if (ext != NULL)
5600 len = ext - str;
5601 else
5602 len = strlen (str);
5604 if (len == 0)
5606 error ("missing cpu name in -mcpu=%qs", str);
5607 return;
5610 /* Loop through the list of supported CPUs to find a match. */
5611 for (cpu = all_cores; cpu->name != NULL; cpu++)
5613 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
5615 selected_cpu = cpu;
5616 selected_tune = cpu;
5617 aarch64_isa_flags = selected_cpu->flags;
5619 if (ext != NULL)
5621 /* CPU string contains at least one extension. */
5622 aarch64_parse_extension (ext);
5625 return;
5629 /* CPU name not found in list. */
5630 error ("unknown value %qs for -mcpu", str);
5631 return;
5634 /* Parse the TUNE string. */
5636 static void
5637 aarch64_parse_tune (void)
5639 const struct processor *cpu;
5640 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
5641 strcpy (str, aarch64_tune_string);
5643 /* Loop through the list of supported CPUs to find a match. */
5644 for (cpu = all_cores; cpu->name != NULL; cpu++)
5646 if (strcmp (cpu->name, str) == 0)
5648 selected_tune = cpu;
5649 return;
5653 /* CPU name not found in list. */
5654 error ("unknown value %qs for -mtune", str);
5655 return;
5659 /* Implement TARGET_OPTION_OVERRIDE. */
5661 static void
5662 aarch64_override_options (void)
5664 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
5665 If either of -march or -mtune is given, they override their
5666 respective component of -mcpu.
5668 So, first parse AARCH64_CPU_STRING, then the others, be careful
5669 with -march as, if -mcpu is not present on the command line, march
5670 must set a sensible default CPU. */
5671 if (aarch64_cpu_string)
5673 aarch64_parse_cpu ();
5676 if (aarch64_arch_string)
5678 aarch64_parse_arch ();
5681 if (aarch64_tune_string)
5683 aarch64_parse_tune ();
5686 #ifndef HAVE_AS_MABI_OPTION
5687 /* The compiler may have been configured with 2.23.* binutils, which does
5688 not have support for ILP32. */
5689 if (TARGET_ILP32)
5690 error ("Assembler does not support -mabi=ilp32");
5691 #endif
5693 initialize_aarch64_code_model ();
5695 aarch64_build_bitmask_table ();
5697 /* This target defaults to strict volatile bitfields. */
5698 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
5699 flag_strict_volatile_bitfields = 1;
5701 /* If the user did not specify a processor, choose the default
5702 one for them. This will be the CPU set during configuration using
5703 --with-cpu, otherwise it is "generic". */
5704 if (!selected_cpu)
5706 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
5707 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
5710 gcc_assert (selected_cpu);
5712 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
5713 if (!selected_tune)
5714 selected_tune = &all_cores[selected_cpu->core];
5716 aarch64_tune_flags = selected_tune->flags;
5717 aarch64_tune = selected_tune->core;
5718 aarch64_tune_params = selected_tune->tune;
5720 aarch64_override_options_after_change ();
5723 /* Implement targetm.override_options_after_change. */
5725 static void
5726 aarch64_override_options_after_change (void)
5728 if (flag_omit_frame_pointer)
5729 flag_omit_leaf_frame_pointer = false;
5730 else if (flag_omit_leaf_frame_pointer)
5731 flag_omit_frame_pointer = true;
5734 static struct machine_function *
5735 aarch64_init_machine_status (void)
5737 struct machine_function *machine;
5738 machine = ggc_alloc_cleared_machine_function ();
5739 return machine;
5742 void
5743 aarch64_init_expanders (void)
5745 init_machine_status = aarch64_init_machine_status;
5748 /* A checking mechanism for the implementation of the various code models. */
5749 static void
5750 initialize_aarch64_code_model (void)
5752 if (flag_pic)
5754 switch (aarch64_cmodel_var)
5756 case AARCH64_CMODEL_TINY:
5757 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
5758 break;
5759 case AARCH64_CMODEL_SMALL:
5760 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
5761 break;
5762 case AARCH64_CMODEL_LARGE:
5763 sorry ("code model %qs with -f%s", "large",
5764 flag_pic > 1 ? "PIC" : "pic");
5765 default:
5766 gcc_unreachable ();
5769 else
5770 aarch64_cmodel = aarch64_cmodel_var;
5773 /* Return true if SYMBOL_REF X binds locally. */
5775 static bool
5776 aarch64_symbol_binds_local_p (const_rtx x)
5778 return (SYMBOL_REF_DECL (x)
5779 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
5780 : SYMBOL_REF_LOCAL_P (x));
5783 /* Return true if SYMBOL_REF X is thread local */
5784 static bool
5785 aarch64_tls_symbol_p (rtx x)
5787 if (! TARGET_HAVE_TLS)
5788 return false;
5790 if (GET_CODE (x) != SYMBOL_REF)
5791 return false;
5793 return SYMBOL_REF_TLS_MODEL (x) != 0;
5796 /* Classify a TLS symbol into one of the TLS kinds. */
5797 enum aarch64_symbol_type
5798 aarch64_classify_tls_symbol (rtx x)
5800 enum tls_model tls_kind = tls_symbolic_operand_type (x);
5802 switch (tls_kind)
5804 case TLS_MODEL_GLOBAL_DYNAMIC:
5805 case TLS_MODEL_LOCAL_DYNAMIC:
5806 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
5808 case TLS_MODEL_INITIAL_EXEC:
5809 return SYMBOL_SMALL_GOTTPREL;
5811 case TLS_MODEL_LOCAL_EXEC:
5812 return SYMBOL_SMALL_TPREL;
5814 case TLS_MODEL_EMULATED:
5815 case TLS_MODEL_NONE:
5816 return SYMBOL_FORCE_TO_MEM;
5818 default:
5819 gcc_unreachable ();
5823 /* Return the method that should be used to access SYMBOL_REF or
5824 LABEL_REF X in context CONTEXT. */
5826 enum aarch64_symbol_type
5827 aarch64_classify_symbol (rtx x,
5828 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
5830 if (GET_CODE (x) == LABEL_REF)
5832 switch (aarch64_cmodel)
5834 case AARCH64_CMODEL_LARGE:
5835 return SYMBOL_FORCE_TO_MEM;
5837 case AARCH64_CMODEL_TINY_PIC:
5838 case AARCH64_CMODEL_TINY:
5839 return SYMBOL_TINY_ABSOLUTE;
5841 case AARCH64_CMODEL_SMALL_PIC:
5842 case AARCH64_CMODEL_SMALL:
5843 return SYMBOL_SMALL_ABSOLUTE;
5845 default:
5846 gcc_unreachable ();
5850 if (GET_CODE (x) == SYMBOL_REF)
5852 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5853 return SYMBOL_FORCE_TO_MEM;
5855 if (aarch64_tls_symbol_p (x))
5856 return aarch64_classify_tls_symbol (x);
5858 switch (aarch64_cmodel)
5860 case AARCH64_CMODEL_TINY:
5861 if (SYMBOL_REF_WEAK (x))
5862 return SYMBOL_FORCE_TO_MEM;
5863 return SYMBOL_TINY_ABSOLUTE;
5865 case AARCH64_CMODEL_SMALL:
5866 if (SYMBOL_REF_WEAK (x))
5867 return SYMBOL_FORCE_TO_MEM;
5868 return SYMBOL_SMALL_ABSOLUTE;
5870 case AARCH64_CMODEL_TINY_PIC:
5871 if (!aarch64_symbol_binds_local_p (x))
5872 return SYMBOL_TINY_GOT;
5873 return SYMBOL_TINY_ABSOLUTE;
5875 case AARCH64_CMODEL_SMALL_PIC:
5876 if (!aarch64_symbol_binds_local_p (x))
5877 return SYMBOL_SMALL_GOT;
5878 return SYMBOL_SMALL_ABSOLUTE;
5880 default:
5881 gcc_unreachable ();
5885 /* By default push everything into the constant pool. */
5886 return SYMBOL_FORCE_TO_MEM;
5889 bool
5890 aarch64_constant_address_p (rtx x)
5892 return (CONSTANT_P (x) && memory_address_p (DImode, x));
5895 bool
5896 aarch64_legitimate_pic_operand_p (rtx x)
5898 if (GET_CODE (x) == SYMBOL_REF
5899 || (GET_CODE (x) == CONST
5900 && GET_CODE (XEXP (x, 0)) == PLUS
5901 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
5902 return false;
5904 return true;
5907 /* Return true if X holds either a quarter-precision or
5908 floating-point +0.0 constant. */
5909 static bool
5910 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
5912 if (!CONST_DOUBLE_P (x))
5913 return false;
5915 /* TODO: We could handle moving 0.0 to a TFmode register,
5916 but first we would like to refactor the movtf_aarch64
5917 to be more amicable to split moves properly and
5918 correctly gate on TARGET_SIMD. For now - reject all
5919 constants which are not to SFmode or DFmode registers. */
5920 if (!(mode == SFmode || mode == DFmode))
5921 return false;
5923 if (aarch64_float_const_zero_rtx_p (x))
5924 return true;
5925 return aarch64_float_const_representable_p (x);
5928 static bool
5929 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
5931 /* Do not allow vector struct mode constants. We could support
5932 0 and -1 easily, but they need support in aarch64-simd.md. */
5933 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
5934 return false;
5936 /* This could probably go away because
5937 we now decompose CONST_INTs according to expand_mov_immediate. */
5938 if ((GET_CODE (x) == CONST_VECTOR
5939 && aarch64_simd_valid_immediate (x, mode, false, NULL))
5940 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
5941 return !targetm.cannot_force_const_mem (mode, x);
5943 if (GET_CODE (x) == HIGH
5944 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
5945 return true;
5947 return aarch64_constant_address_p (x);
5951 aarch64_load_tp (rtx target)
5953 if (!target
5954 || GET_MODE (target) != Pmode
5955 || !register_operand (target, Pmode))
5956 target = gen_reg_rtx (Pmode);
5958 /* Can return in any reg. */
5959 emit_insn (gen_aarch64_load_tp_hard (target));
5960 return target;
5963 /* On AAPCS systems, this is the "struct __va_list". */
5964 static GTY(()) tree va_list_type;
5966 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
5967 Return the type to use as __builtin_va_list.
5969 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
5971 struct __va_list
5973 void *__stack;
5974 void *__gr_top;
5975 void *__vr_top;
5976 int __gr_offs;
5977 int __vr_offs;
5978 }; */
5980 static tree
5981 aarch64_build_builtin_va_list (void)
5983 tree va_list_name;
5984 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
5986 /* Create the type. */
5987 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
5988 /* Give it the required name. */
5989 va_list_name = build_decl (BUILTINS_LOCATION,
5990 TYPE_DECL,
5991 get_identifier ("__va_list"),
5992 va_list_type);
5993 DECL_ARTIFICIAL (va_list_name) = 1;
5994 TYPE_NAME (va_list_type) = va_list_name;
5995 TYPE_STUB_DECL (va_list_type) = va_list_name;
5997 /* Create the fields. */
5998 f_stack = build_decl (BUILTINS_LOCATION,
5999 FIELD_DECL, get_identifier ("__stack"),
6000 ptr_type_node);
6001 f_grtop = build_decl (BUILTINS_LOCATION,
6002 FIELD_DECL, get_identifier ("__gr_top"),
6003 ptr_type_node);
6004 f_vrtop = build_decl (BUILTINS_LOCATION,
6005 FIELD_DECL, get_identifier ("__vr_top"),
6006 ptr_type_node);
6007 f_groff = build_decl (BUILTINS_LOCATION,
6008 FIELD_DECL, get_identifier ("__gr_offs"),
6009 integer_type_node);
6010 f_vroff = build_decl (BUILTINS_LOCATION,
6011 FIELD_DECL, get_identifier ("__vr_offs"),
6012 integer_type_node);
6014 DECL_ARTIFICIAL (f_stack) = 1;
6015 DECL_ARTIFICIAL (f_grtop) = 1;
6016 DECL_ARTIFICIAL (f_vrtop) = 1;
6017 DECL_ARTIFICIAL (f_groff) = 1;
6018 DECL_ARTIFICIAL (f_vroff) = 1;
6020 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6021 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6022 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6023 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6024 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6026 TYPE_FIELDS (va_list_type) = f_stack;
6027 DECL_CHAIN (f_stack) = f_grtop;
6028 DECL_CHAIN (f_grtop) = f_vrtop;
6029 DECL_CHAIN (f_vrtop) = f_groff;
6030 DECL_CHAIN (f_groff) = f_vroff;
6032 /* Compute its layout. */
6033 layout_type (va_list_type);
6035 return va_list_type;
6038 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6039 static void
6040 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6042 const CUMULATIVE_ARGS *cum;
6043 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6044 tree stack, grtop, vrtop, groff, vroff;
6045 tree t;
6046 int gr_save_area_size;
6047 int vr_save_area_size;
6048 int vr_offset;
6050 cum = &crtl->args.info;
6051 gr_save_area_size
6052 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6053 vr_save_area_size
6054 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6056 if (TARGET_GENERAL_REGS_ONLY)
6058 if (cum->aapcs_nvrn > 0)
6059 sorry ("%qs and floating point or vector arguments",
6060 "-mgeneral-regs-only");
6061 vr_save_area_size = 0;
6064 f_stack = TYPE_FIELDS (va_list_type_node);
6065 f_grtop = DECL_CHAIN (f_stack);
6066 f_vrtop = DECL_CHAIN (f_grtop);
6067 f_groff = DECL_CHAIN (f_vrtop);
6068 f_vroff = DECL_CHAIN (f_groff);
6070 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6071 NULL_TREE);
6072 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6073 NULL_TREE);
6074 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6075 NULL_TREE);
6076 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6077 NULL_TREE);
6078 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6079 NULL_TREE);
6081 /* Emit code to initialize STACK, which points to the next varargs stack
6082 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6083 by named arguments. STACK is 8-byte aligned. */
6084 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6085 if (cum->aapcs_stack_size > 0)
6086 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6087 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6088 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6090 /* Emit code to initialize GRTOP, the top of the GR save area.
6091 virtual_incoming_args_rtx should have been 16 byte aligned. */
6092 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6093 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6094 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6096 /* Emit code to initialize VRTOP, the top of the VR save area.
6097 This address is gr_save_area_bytes below GRTOP, rounded
6098 down to the next 16-byte boundary. */
6099 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6100 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6101 STACK_BOUNDARY / BITS_PER_UNIT);
6103 if (vr_offset)
6104 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6105 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6106 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6108 /* Emit code to initialize GROFF, the offset from GRTOP of the
6109 next GPR argument. */
6110 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6111 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6112 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6114 /* Likewise emit code to initialize VROFF, the offset from FTOP
6115 of the next VR argument. */
6116 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6117 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6118 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6121 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6123 static tree
6124 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6125 gimple_seq *post_p ATTRIBUTE_UNUSED)
6127 tree addr;
6128 bool indirect_p;
6129 bool is_ha; /* is HFA or HVA. */
6130 bool dw_align; /* double-word align. */
6131 enum machine_mode ag_mode = VOIDmode;
6132 int nregs;
6133 enum machine_mode mode;
6135 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6136 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6137 HOST_WIDE_INT size, rsize, adjust, align;
6138 tree t, u, cond1, cond2;
6140 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6141 if (indirect_p)
6142 type = build_pointer_type (type);
6144 mode = TYPE_MODE (type);
6146 f_stack = TYPE_FIELDS (va_list_type_node);
6147 f_grtop = DECL_CHAIN (f_stack);
6148 f_vrtop = DECL_CHAIN (f_grtop);
6149 f_groff = DECL_CHAIN (f_vrtop);
6150 f_vroff = DECL_CHAIN (f_groff);
6152 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6153 f_stack, NULL_TREE);
6154 size = int_size_in_bytes (type);
6155 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6157 dw_align = false;
6158 adjust = 0;
6159 if (aarch64_vfp_is_call_or_return_candidate (mode,
6160 type,
6161 &ag_mode,
6162 &nregs,
6163 &is_ha))
6165 /* TYPE passed in fp/simd registers. */
6166 if (TARGET_GENERAL_REGS_ONLY)
6167 sorry ("%qs and floating point or vector arguments",
6168 "-mgeneral-regs-only");
6170 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6171 unshare_expr (valist), f_vrtop, NULL_TREE);
6172 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6173 unshare_expr (valist), f_vroff, NULL_TREE);
6175 rsize = nregs * UNITS_PER_VREG;
6177 if (is_ha)
6179 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6180 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6182 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6183 && size < UNITS_PER_VREG)
6185 adjust = UNITS_PER_VREG - size;
6188 else
6190 /* TYPE passed in general registers. */
6191 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6192 unshare_expr (valist), f_grtop, NULL_TREE);
6193 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6194 unshare_expr (valist), f_groff, NULL_TREE);
6195 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6196 nregs = rsize / UNITS_PER_WORD;
6198 if (align > 8)
6199 dw_align = true;
6201 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6202 && size < UNITS_PER_WORD)
6204 adjust = UNITS_PER_WORD - size;
6208 /* Get a local temporary for the field value. */
6209 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6211 /* Emit code to branch if off >= 0. */
6212 t = build2 (GE_EXPR, boolean_type_node, off,
6213 build_int_cst (TREE_TYPE (off), 0));
6214 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6216 if (dw_align)
6218 /* Emit: offs = (offs + 15) & -16. */
6219 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6220 build_int_cst (TREE_TYPE (off), 15));
6221 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6222 build_int_cst (TREE_TYPE (off), -16));
6223 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6225 else
6226 roundup = NULL;
6228 /* Update ap.__[g|v]r_offs */
6229 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6230 build_int_cst (TREE_TYPE (off), rsize));
6231 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6233 /* String up. */
6234 if (roundup)
6235 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6237 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6238 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6239 build_int_cst (TREE_TYPE (f_off), 0));
6240 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6242 /* String up: make sure the assignment happens before the use. */
6243 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6244 COND_EXPR_ELSE (cond1) = t;
6246 /* Prepare the trees handling the argument that is passed on the stack;
6247 the top level node will store in ON_STACK. */
6248 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6249 if (align > 8)
6251 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6252 t = fold_convert (intDI_type_node, arg);
6253 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6254 build_int_cst (TREE_TYPE (t), 15));
6255 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6256 build_int_cst (TREE_TYPE (t), -16));
6257 t = fold_convert (TREE_TYPE (arg), t);
6258 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6260 else
6261 roundup = NULL;
6262 /* Advance ap.__stack */
6263 t = fold_convert (intDI_type_node, arg);
6264 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6265 build_int_cst (TREE_TYPE (t), size + 7));
6266 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6267 build_int_cst (TREE_TYPE (t), -8));
6268 t = fold_convert (TREE_TYPE (arg), t);
6269 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6270 /* String up roundup and advance. */
6271 if (roundup)
6272 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6273 /* String up with arg */
6274 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6275 /* Big-endianness related address adjustment. */
6276 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6277 && size < UNITS_PER_WORD)
6279 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6280 size_int (UNITS_PER_WORD - size));
6281 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6284 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6285 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6287 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6288 t = off;
6289 if (adjust)
6290 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6291 build_int_cst (TREE_TYPE (off), adjust));
6293 t = fold_convert (sizetype, t);
6294 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6296 if (is_ha)
6298 /* type ha; // treat as "struct {ftype field[n];}"
6299 ... [computing offs]
6300 for (i = 0; i <nregs; ++i, offs += 16)
6301 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6302 return ha; */
6303 int i;
6304 tree tmp_ha, field_t, field_ptr_t;
6306 /* Declare a local variable. */
6307 tmp_ha = create_tmp_var_raw (type, "ha");
6308 gimple_add_tmp_var (tmp_ha);
6310 /* Establish the base type. */
6311 switch (ag_mode)
6313 case SFmode:
6314 field_t = float_type_node;
6315 field_ptr_t = float_ptr_type_node;
6316 break;
6317 case DFmode:
6318 field_t = double_type_node;
6319 field_ptr_t = double_ptr_type_node;
6320 break;
6321 case TFmode:
6322 field_t = long_double_type_node;
6323 field_ptr_t = long_double_ptr_type_node;
6324 break;
6325 /* The half precision and quad precision are not fully supported yet. Enable
6326 the following code after the support is complete. Need to find the correct
6327 type node for __fp16 *. */
6328 #if 0
6329 case HFmode:
6330 field_t = float_type_node;
6331 field_ptr_t = float_ptr_type_node;
6332 break;
6333 #endif
6334 case V2SImode:
6335 case V4SImode:
6337 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6338 field_t = build_vector_type_for_mode (innertype, ag_mode);
6339 field_ptr_t = build_pointer_type (field_t);
6341 break;
6342 default:
6343 gcc_assert (0);
6346 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6347 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6348 addr = t;
6349 t = fold_convert (field_ptr_t, addr);
6350 t = build2 (MODIFY_EXPR, field_t,
6351 build1 (INDIRECT_REF, field_t, tmp_ha),
6352 build1 (INDIRECT_REF, field_t, t));
6354 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6355 for (i = 1; i < nregs; ++i)
6357 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6358 u = fold_convert (field_ptr_t, addr);
6359 u = build2 (MODIFY_EXPR, field_t,
6360 build2 (MEM_REF, field_t, tmp_ha,
6361 build_int_cst (field_ptr_t,
6362 (i *
6363 int_size_in_bytes (field_t)))),
6364 build1 (INDIRECT_REF, field_t, u));
6365 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
6368 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
6369 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
6372 COND_EXPR_ELSE (cond2) = t;
6373 addr = fold_convert (build_pointer_type (type), cond1);
6374 addr = build_va_arg_indirect_ref (addr);
6376 if (indirect_p)
6377 addr = build_va_arg_indirect_ref (addr);
6379 return addr;
6382 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
6384 static void
6385 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
6386 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6387 int no_rtl)
6389 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
6390 CUMULATIVE_ARGS local_cum;
6391 int gr_saved, vr_saved;
6393 /* The caller has advanced CUM up to, but not beyond, the last named
6394 argument. Advance a local copy of CUM past the last "real" named
6395 argument, to find out how many registers are left over. */
6396 local_cum = *cum;
6397 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
6399 /* Found out how many registers we need to save. */
6400 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
6401 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
6403 if (TARGET_GENERAL_REGS_ONLY)
6405 if (local_cum.aapcs_nvrn > 0)
6406 sorry ("%qs and floating point or vector arguments",
6407 "-mgeneral-regs-only");
6408 vr_saved = 0;
6411 if (!no_rtl)
6413 if (gr_saved > 0)
6415 rtx ptr, mem;
6417 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
6418 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
6419 - gr_saved * UNITS_PER_WORD);
6420 mem = gen_frame_mem (BLKmode, ptr);
6421 set_mem_alias_set (mem, get_varargs_alias_set ());
6423 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
6424 mem, gr_saved);
6426 if (vr_saved > 0)
6428 /* We can't use move_block_from_reg, because it will use
6429 the wrong mode, storing D regs only. */
6430 enum machine_mode mode = TImode;
6431 int off, i;
6433 /* Set OFF to the offset from virtual_incoming_args_rtx of
6434 the first vector register. The VR save area lies below
6435 the GR one, and is aligned to 16 bytes. */
6436 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6437 STACK_BOUNDARY / BITS_PER_UNIT);
6438 off -= vr_saved * UNITS_PER_VREG;
6440 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
6442 rtx ptr, mem;
6444 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
6445 mem = gen_frame_mem (mode, ptr);
6446 set_mem_alias_set (mem, get_varargs_alias_set ());
6447 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
6448 off += UNITS_PER_VREG;
6453 /* We don't save the size into *PRETEND_SIZE because we want to avoid
6454 any complication of having crtl->args.pretend_args_size changed. */
6455 cfun->machine->saved_varargs_size
6456 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
6457 STACK_BOUNDARY / BITS_PER_UNIT)
6458 + vr_saved * UNITS_PER_VREG);
6461 static void
6462 aarch64_conditional_register_usage (void)
6464 int i;
6465 if (!TARGET_FLOAT)
6467 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
6469 fixed_regs[i] = 1;
6470 call_used_regs[i] = 1;
6475 /* Walk down the type tree of TYPE counting consecutive base elements.
6476 If *MODEP is VOIDmode, then set it to the first valid floating point
6477 type. If a non-floating point type is found, or if a floating point
6478 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
6479 otherwise return the count in the sub-tree. */
6480 static int
6481 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
6483 enum machine_mode mode;
6484 HOST_WIDE_INT size;
6486 switch (TREE_CODE (type))
6488 case REAL_TYPE:
6489 mode = TYPE_MODE (type);
6490 if (mode != DFmode && mode != SFmode && mode != TFmode)
6491 return -1;
6493 if (*modep == VOIDmode)
6494 *modep = mode;
6496 if (*modep == mode)
6497 return 1;
6499 break;
6501 case COMPLEX_TYPE:
6502 mode = TYPE_MODE (TREE_TYPE (type));
6503 if (mode != DFmode && mode != SFmode && mode != TFmode)
6504 return -1;
6506 if (*modep == VOIDmode)
6507 *modep = mode;
6509 if (*modep == mode)
6510 return 2;
6512 break;
6514 case VECTOR_TYPE:
6515 /* Use V2SImode and V4SImode as representatives of all 64-bit
6516 and 128-bit vector types. */
6517 size = int_size_in_bytes (type);
6518 switch (size)
6520 case 8:
6521 mode = V2SImode;
6522 break;
6523 case 16:
6524 mode = V4SImode;
6525 break;
6526 default:
6527 return -1;
6530 if (*modep == VOIDmode)
6531 *modep = mode;
6533 /* Vector modes are considered to be opaque: two vectors are
6534 equivalent for the purposes of being homogeneous aggregates
6535 if they are the same size. */
6536 if (*modep == mode)
6537 return 1;
6539 break;
6541 case ARRAY_TYPE:
6543 int count;
6544 tree index = TYPE_DOMAIN (type);
6546 /* Can't handle incomplete types nor sizes that are not
6547 fixed. */
6548 if (!COMPLETE_TYPE_P (type)
6549 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6550 return -1;
6552 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
6553 if (count == -1
6554 || !index
6555 || !TYPE_MAX_VALUE (index)
6556 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
6557 || !TYPE_MIN_VALUE (index)
6558 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
6559 || count < 0)
6560 return -1;
6562 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
6563 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
6565 /* There must be no padding. */
6566 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6567 return -1;
6569 return count;
6572 case RECORD_TYPE:
6574 int count = 0;
6575 int sub_count;
6576 tree field;
6578 /* Can't handle incomplete types nor sizes that are not
6579 fixed. */
6580 if (!COMPLETE_TYPE_P (type)
6581 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6582 return -1;
6584 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6586 if (TREE_CODE (field) != FIELD_DECL)
6587 continue;
6589 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6590 if (sub_count < 0)
6591 return -1;
6592 count += sub_count;
6595 /* There must be no padding. */
6596 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6597 return -1;
6599 return count;
6602 case UNION_TYPE:
6603 case QUAL_UNION_TYPE:
6605 /* These aren't very interesting except in a degenerate case. */
6606 int count = 0;
6607 int sub_count;
6608 tree field;
6610 /* Can't handle incomplete types nor sizes that are not
6611 fixed. */
6612 if (!COMPLETE_TYPE_P (type)
6613 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6614 return -1;
6616 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6618 if (TREE_CODE (field) != FIELD_DECL)
6619 continue;
6621 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6622 if (sub_count < 0)
6623 return -1;
6624 count = count > sub_count ? count : sub_count;
6627 /* There must be no padding. */
6628 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
6629 return -1;
6631 return count;
6634 default:
6635 break;
6638 return -1;
6641 /* Return true if we use LRA instead of reload pass. */
6642 static bool
6643 aarch64_lra_p (void)
6645 return aarch64_lra_flag;
6648 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
6649 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
6650 array types. The C99 floating-point complex types are also considered
6651 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
6652 types, which are GCC extensions and out of the scope of AAPCS64, are
6653 treated as composite types here as well.
6655 Note that MODE itself is not sufficient in determining whether a type
6656 is such a composite type or not. This is because
6657 stor-layout.c:compute_record_mode may have already changed the MODE
6658 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
6659 structure with only one field may have its MODE set to the mode of the
6660 field. Also an integer mode whose size matches the size of the
6661 RECORD_TYPE type may be used to substitute the original mode
6662 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
6663 solely relied on. */
6665 static bool
6666 aarch64_composite_type_p (const_tree type,
6667 enum machine_mode mode)
6669 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
6670 return true;
6672 if (mode == BLKmode
6673 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
6674 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
6675 return true;
6677 return false;
6680 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
6681 type as described in AAPCS64 \S 4.1.2.
6683 See the comment above aarch64_composite_type_p for the notes on MODE. */
6685 static bool
6686 aarch64_short_vector_p (const_tree type,
6687 enum machine_mode mode)
6689 HOST_WIDE_INT size = -1;
6691 if (type && TREE_CODE (type) == VECTOR_TYPE)
6692 size = int_size_in_bytes (type);
6693 else if (!aarch64_composite_type_p (type, mode)
6694 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6695 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
6696 size = GET_MODE_SIZE (mode);
6698 return (size == 8 || size == 16) ? true : false;
6701 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
6702 shall be passed or returned in simd/fp register(s) (providing these
6703 parameter passing registers are available).
6705 Upon successful return, *COUNT returns the number of needed registers,
6706 *BASE_MODE returns the mode of the individual register and when IS_HAF
6707 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
6708 floating-point aggregate or a homogeneous short-vector aggregate. */
6710 static bool
6711 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
6712 const_tree type,
6713 enum machine_mode *base_mode,
6714 int *count,
6715 bool *is_ha)
6717 enum machine_mode new_mode = VOIDmode;
6718 bool composite_p = aarch64_composite_type_p (type, mode);
6720 if (is_ha != NULL) *is_ha = false;
6722 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
6723 || aarch64_short_vector_p (type, mode))
6725 *count = 1;
6726 new_mode = mode;
6728 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6730 if (is_ha != NULL) *is_ha = true;
6731 *count = 2;
6732 new_mode = GET_MODE_INNER (mode);
6734 else if (type && composite_p)
6736 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6738 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
6740 if (is_ha != NULL) *is_ha = true;
6741 *count = ag_count;
6743 else
6744 return false;
6746 else
6747 return false;
6749 *base_mode = new_mode;
6750 return true;
6753 /* Implement TARGET_STRUCT_VALUE_RTX. */
6755 static rtx
6756 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
6757 int incoming ATTRIBUTE_UNUSED)
6759 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
6762 /* Implements target hook vector_mode_supported_p. */
6763 static bool
6764 aarch64_vector_mode_supported_p (enum machine_mode mode)
6766 if (TARGET_SIMD
6767 && (mode == V4SImode || mode == V8HImode
6768 || mode == V16QImode || mode == V2DImode
6769 || mode == V2SImode || mode == V4HImode
6770 || mode == V8QImode || mode == V2SFmode
6771 || mode == V4SFmode || mode == V2DFmode))
6772 return true;
6774 return false;
6777 /* Return appropriate SIMD container
6778 for MODE within a vector of WIDTH bits. */
6779 static enum machine_mode
6780 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
6782 gcc_assert (width == 64 || width == 128);
6783 if (TARGET_SIMD)
6785 if (width == 128)
6786 switch (mode)
6788 case DFmode:
6789 return V2DFmode;
6790 case SFmode:
6791 return V4SFmode;
6792 case SImode:
6793 return V4SImode;
6794 case HImode:
6795 return V8HImode;
6796 case QImode:
6797 return V16QImode;
6798 case DImode:
6799 return V2DImode;
6800 default:
6801 break;
6803 else
6804 switch (mode)
6806 case SFmode:
6807 return V2SFmode;
6808 case SImode:
6809 return V2SImode;
6810 case HImode:
6811 return V4HImode;
6812 case QImode:
6813 return V8QImode;
6814 default:
6815 break;
6818 return word_mode;
6821 /* Return 128-bit container as the preferred SIMD mode for MODE. */
6822 static enum machine_mode
6823 aarch64_preferred_simd_mode (enum machine_mode mode)
6825 return aarch64_simd_container_mode (mode, 128);
6828 /* Return the bitmask of possible vector sizes for the vectorizer
6829 to iterate over. */
6830 static unsigned int
6831 aarch64_autovectorize_vector_sizes (void)
6833 return (16 | 8);
6836 /* A table to help perform AArch64-specific name mangling for AdvSIMD
6837 vector types in order to conform to the AAPCS64 (see "Procedure
6838 Call Standard for the ARM 64-bit Architecture", Appendix A). To
6839 qualify for emission with the mangled names defined in that document,
6840 a vector type must not only be of the correct mode but also be
6841 composed of AdvSIMD vector element types (e.g.
6842 _builtin_aarch64_simd_qi); these types are registered by
6843 aarch64_init_simd_builtins (). In other words, vector types defined
6844 in other ways e.g. via vector_size attribute will get default
6845 mangled names. */
6846 typedef struct
6848 enum machine_mode mode;
6849 const char *element_type_name;
6850 const char *mangled_name;
6851 } aarch64_simd_mangle_map_entry;
6853 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
6854 /* 64-bit containerized types. */
6855 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
6856 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
6857 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
6858 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
6859 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
6860 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
6861 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
6862 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
6863 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
6864 /* 128-bit containerized types. */
6865 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
6866 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
6867 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
6868 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
6869 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
6870 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
6871 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
6872 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
6873 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
6874 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
6875 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
6876 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
6877 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
6878 { VOIDmode, NULL, NULL }
6881 /* Implement TARGET_MANGLE_TYPE. */
6883 static const char *
6884 aarch64_mangle_type (const_tree type)
6886 /* The AArch64 ABI documents say that "__va_list" has to be
6887 managled as if it is in the "std" namespace. */
6888 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
6889 return "St9__va_list";
6891 /* Check the mode of the vector type, and the name of the vector
6892 element type, against the table. */
6893 if (TREE_CODE (type) == VECTOR_TYPE)
6895 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
6897 while (pos->mode != VOIDmode)
6899 tree elt_type = TREE_TYPE (type);
6901 if (pos->mode == TYPE_MODE (type)
6902 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
6903 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
6904 pos->element_type_name))
6905 return pos->mangled_name;
6907 pos++;
6911 /* Use the default mangling. */
6912 return NULL;
6915 /* Return the equivalent letter for size. */
6916 static char
6917 sizetochar (int size)
6919 switch (size)
6921 case 64: return 'd';
6922 case 32: return 's';
6923 case 16: return 'h';
6924 case 8 : return 'b';
6925 default: gcc_unreachable ();
6929 /* Return true iff x is a uniform vector of floating-point
6930 constants, and the constant can be represented in
6931 quarter-precision form. Note, as aarch64_float_const_representable
6932 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
6933 static bool
6934 aarch64_vect_float_const_representable_p (rtx x)
6936 int i = 0;
6937 REAL_VALUE_TYPE r0, ri;
6938 rtx x0, xi;
6940 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
6941 return false;
6943 x0 = CONST_VECTOR_ELT (x, 0);
6944 if (!CONST_DOUBLE_P (x0))
6945 return false;
6947 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
6949 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
6951 xi = CONST_VECTOR_ELT (x, i);
6952 if (!CONST_DOUBLE_P (xi))
6953 return false;
6955 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
6956 if (!REAL_VALUES_EQUAL (r0, ri))
6957 return false;
6960 return aarch64_float_const_representable_p (x0);
6963 /* Return true for valid and false for invalid. */
6964 bool
6965 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
6966 struct simd_immediate_info *info)
6968 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
6969 matches = 1; \
6970 for (i = 0; i < idx; i += (STRIDE)) \
6971 if (!(TEST)) \
6972 matches = 0; \
6973 if (matches) \
6975 immtype = (CLASS); \
6976 elsize = (ELSIZE); \
6977 eshift = (SHIFT); \
6978 emvn = (NEG); \
6979 break; \
6982 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
6983 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
6984 unsigned char bytes[16];
6985 int immtype = -1, matches;
6986 unsigned int invmask = inverse ? 0xff : 0;
6987 int eshift, emvn;
6989 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6991 if (! (aarch64_simd_imm_zero_p (op, mode)
6992 || aarch64_vect_float_const_representable_p (op)))
6993 return false;
6995 if (info)
6997 info->value = CONST_VECTOR_ELT (op, 0);
6998 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
6999 info->mvn = false;
7000 info->shift = 0;
7003 return true;
7006 /* Splat vector constant out into a byte vector. */
7007 for (i = 0; i < n_elts; i++)
7009 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7010 it must be laid out in the vector register in reverse order. */
7011 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7012 unsigned HOST_WIDE_INT elpart;
7013 unsigned int part, parts;
7015 if (GET_CODE (el) == CONST_INT)
7017 elpart = INTVAL (el);
7018 parts = 1;
7020 else if (GET_CODE (el) == CONST_DOUBLE)
7022 elpart = CONST_DOUBLE_LOW (el);
7023 parts = 2;
7025 else
7026 gcc_unreachable ();
7028 for (part = 0; part < parts; part++)
7030 unsigned int byte;
7031 for (byte = 0; byte < innersize; byte++)
7033 bytes[idx++] = (elpart & 0xff) ^ invmask;
7034 elpart >>= BITS_PER_UNIT;
7036 if (GET_CODE (el) == CONST_DOUBLE)
7037 elpart = CONST_DOUBLE_HIGH (el);
7041 /* Sanity check. */
7042 gcc_assert (idx == GET_MODE_SIZE (mode));
7046 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7047 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7049 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7050 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7052 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7053 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7055 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7056 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7058 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7060 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7062 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7063 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7065 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7066 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7068 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7069 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7071 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7072 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7074 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7076 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7078 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7079 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7081 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7082 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7084 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7085 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7087 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7088 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7090 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7092 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7093 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7095 while (0);
7097 if (immtype == -1)
7098 return false;
7100 if (info)
7102 info->element_width = elsize;
7103 info->mvn = emvn != 0;
7104 info->shift = eshift;
7106 unsigned HOST_WIDE_INT imm = 0;
7108 if (immtype >= 12 && immtype <= 15)
7109 info->msl = true;
7111 /* Un-invert bytes of recognized vector, if necessary. */
7112 if (invmask != 0)
7113 for (i = 0; i < idx; i++)
7114 bytes[i] ^= invmask;
7116 if (immtype == 17)
7118 /* FIXME: Broken on 32-bit H_W_I hosts. */
7119 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7121 for (i = 0; i < 8; i++)
7122 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7123 << (i * BITS_PER_UNIT);
7126 info->value = GEN_INT (imm);
7128 else
7130 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7131 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7133 /* Construct 'abcdefgh' because the assembler cannot handle
7134 generic constants. */
7135 if (info->mvn)
7136 imm = ~imm;
7137 imm = (imm >> info->shift) & 0xff;
7138 info->value = GEN_INT (imm);
7142 return true;
7143 #undef CHECK
7146 static bool
7147 aarch64_const_vec_all_same_int_p (rtx x,
7148 HOST_WIDE_INT minval,
7149 HOST_WIDE_INT maxval)
7151 HOST_WIDE_INT firstval;
7152 int count, i;
7154 if (GET_CODE (x) != CONST_VECTOR
7155 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7156 return false;
7158 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7159 if (firstval < minval || firstval > maxval)
7160 return false;
7162 count = CONST_VECTOR_NUNITS (x);
7163 for (i = 1; i < count; i++)
7164 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7165 return false;
7167 return true;
7170 /* Check of immediate shift constants are within range. */
7171 bool
7172 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7174 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7175 if (left)
7176 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7177 else
7178 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7181 /* Return true if X is a uniform vector where all elements
7182 are either the floating-point constant 0.0 or the
7183 integer constant 0. */
7184 bool
7185 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7187 return x == CONST0_RTX (mode);
7190 bool
7191 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7193 HOST_WIDE_INT imm = INTVAL (x);
7194 int i;
7196 for (i = 0; i < 8; i++)
7198 unsigned int byte = imm & 0xff;
7199 if (byte != 0xff && byte != 0)
7200 return false;
7201 imm >>= 8;
7204 return true;
7207 bool
7208 aarch64_mov_operand_p (rtx x,
7209 enum aarch64_symbol_context context,
7210 enum machine_mode mode)
7212 if (GET_CODE (x) == HIGH
7213 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7214 return true;
7216 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7217 return true;
7219 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7220 return true;
7222 return aarch64_classify_symbolic_expression (x, context)
7223 == SYMBOL_TINY_ABSOLUTE;
7226 /* Return a const_int vector of VAL. */
7228 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7230 int nunits = GET_MODE_NUNITS (mode);
7231 rtvec v = rtvec_alloc (nunits);
7232 int i;
7234 for (i=0; i < nunits; i++)
7235 RTVEC_ELT (v, i) = GEN_INT (val);
7237 return gen_rtx_CONST_VECTOR (mode, v);
7240 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7242 bool
7243 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7245 enum machine_mode vmode;
7247 gcc_assert (!VECTOR_MODE_P (mode));
7248 vmode = aarch64_preferred_simd_mode (mode);
7249 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7250 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7253 /* Construct and return a PARALLEL RTX vector. */
7255 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7257 int nunits = GET_MODE_NUNITS (mode);
7258 rtvec v = rtvec_alloc (nunits / 2);
7259 int base = high ? nunits / 2 : 0;
7260 rtx t1;
7261 int i;
7263 for (i=0; i < nunits / 2; i++)
7264 RTVEC_ELT (v, i) = GEN_INT (base + i);
7266 t1 = gen_rtx_PARALLEL (mode, v);
7267 return t1;
7270 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7271 HIGH (exclusive). */
7272 void
7273 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7275 HOST_WIDE_INT lane;
7276 gcc_assert (GET_CODE (operand) == CONST_INT);
7277 lane = INTVAL (operand);
7279 if (lane < low || lane >= high)
7280 error ("lane out of range");
7283 void
7284 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7286 gcc_assert (GET_CODE (operand) == CONST_INT);
7287 HOST_WIDE_INT lane = INTVAL (operand);
7289 if (lane < low || lane >= high)
7290 error ("constant out of range");
7293 /* Emit code to reinterpret one AdvSIMD type as another,
7294 without altering bits. */
7295 void
7296 aarch64_simd_reinterpret (rtx dest, rtx src)
7298 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7301 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7302 registers). */
7303 void
7304 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7305 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7306 rtx op1)
7308 rtx mem = gen_rtx_MEM (mode, destaddr);
7309 rtx tmp1 = gen_reg_rtx (mode);
7310 rtx tmp2 = gen_reg_rtx (mode);
7312 emit_insn (intfn (tmp1, op1, tmp2));
7314 emit_move_insn (mem, tmp1);
7315 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7316 emit_move_insn (mem, tmp2);
7319 /* Return TRUE if OP is a valid vector addressing mode. */
7320 bool
7321 aarch64_simd_mem_operand_p (rtx op)
7323 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7324 || GET_CODE (XEXP (op, 0)) == REG);
7327 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7328 not to early-clobber SRC registers in the process.
7330 We assume that the operands described by SRC and DEST represent a
7331 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7332 number of components into which the copy has been decomposed. */
7333 void
7334 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7335 rtx *src, unsigned int count)
7337 unsigned int i;
7339 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7340 || REGNO (operands[0]) < REGNO (operands[1]))
7342 for (i = 0; i < count; i++)
7344 operands[2 * i] = dest[i];
7345 operands[2 * i + 1] = src[i];
7348 else
7350 for (i = 0; i < count; i++)
7352 operands[2 * i] = dest[count - i - 1];
7353 operands[2 * i + 1] = src[count - i - 1];
7358 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7359 one of VSTRUCT modes: OI, CI or XI. */
7361 aarch64_simd_attr_length_move (rtx insn)
7363 enum machine_mode mode;
7365 extract_insn_cached (insn);
7367 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
7369 mode = GET_MODE (recog_data.operand[0]);
7370 switch (mode)
7372 case OImode:
7373 return 8;
7374 case CImode:
7375 return 12;
7376 case XImode:
7377 return 16;
7378 default:
7379 gcc_unreachable ();
7382 return 4;
7385 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
7386 alignment of a vector to 128 bits. */
7387 static HOST_WIDE_INT
7388 aarch64_simd_vector_alignment (const_tree type)
7390 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
7391 return MIN (align, 128);
7394 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
7395 static bool
7396 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
7398 if (is_packed)
7399 return false;
7401 /* We guarantee alignment for vectors up to 128-bits. */
7402 if (tree_int_cst_compare (TYPE_SIZE (type),
7403 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
7404 return false;
7406 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
7407 return true;
7410 /* If VALS is a vector constant that can be loaded into a register
7411 using DUP, generate instructions to do so and return an RTX to
7412 assign to the register. Otherwise return NULL_RTX. */
7413 static rtx
7414 aarch64_simd_dup_constant (rtx vals)
7416 enum machine_mode mode = GET_MODE (vals);
7417 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7418 int n_elts = GET_MODE_NUNITS (mode);
7419 bool all_same = true;
7420 rtx x;
7421 int i;
7423 if (GET_CODE (vals) != CONST_VECTOR)
7424 return NULL_RTX;
7426 for (i = 1; i < n_elts; ++i)
7428 x = CONST_VECTOR_ELT (vals, i);
7429 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
7430 all_same = false;
7433 if (!all_same)
7434 return NULL_RTX;
7436 /* We can load this constant by using DUP and a constant in a
7437 single ARM register. This will be cheaper than a vector
7438 load. */
7439 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
7440 return gen_rtx_VEC_DUPLICATE (mode, x);
7444 /* Generate code to load VALS, which is a PARALLEL containing only
7445 constants (for vec_init) or CONST_VECTOR, efficiently into a
7446 register. Returns an RTX to copy into the register, or NULL_RTX
7447 for a PARALLEL that can not be converted into a CONST_VECTOR. */
7448 static rtx
7449 aarch64_simd_make_constant (rtx vals)
7451 enum machine_mode mode = GET_MODE (vals);
7452 rtx const_dup;
7453 rtx const_vec = NULL_RTX;
7454 int n_elts = GET_MODE_NUNITS (mode);
7455 int n_const = 0;
7456 int i;
7458 if (GET_CODE (vals) == CONST_VECTOR)
7459 const_vec = vals;
7460 else if (GET_CODE (vals) == PARALLEL)
7462 /* A CONST_VECTOR must contain only CONST_INTs and
7463 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
7464 Only store valid constants in a CONST_VECTOR. */
7465 for (i = 0; i < n_elts; ++i)
7467 rtx x = XVECEXP (vals, 0, i);
7468 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
7469 n_const++;
7471 if (n_const == n_elts)
7472 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
7474 else
7475 gcc_unreachable ();
7477 if (const_vec != NULL_RTX
7478 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
7479 /* Load using MOVI/MVNI. */
7480 return const_vec;
7481 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
7482 /* Loaded using DUP. */
7483 return const_dup;
7484 else if (const_vec != NULL_RTX)
7485 /* Load from constant pool. We can not take advantage of single-cycle
7486 LD1 because we need a PC-relative addressing mode. */
7487 return const_vec;
7488 else
7489 /* A PARALLEL containing something not valid inside CONST_VECTOR.
7490 We can not construct an initializer. */
7491 return NULL_RTX;
7494 void
7495 aarch64_expand_vector_init (rtx target, rtx vals)
7497 enum machine_mode mode = GET_MODE (target);
7498 enum machine_mode inner_mode = GET_MODE_INNER (mode);
7499 int n_elts = GET_MODE_NUNITS (mode);
7500 int n_var = 0, one_var = -1;
7501 bool all_same = true;
7502 rtx x, mem;
7503 int i;
7505 x = XVECEXP (vals, 0, 0);
7506 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7507 n_var = 1, one_var = 0;
7509 for (i = 1; i < n_elts; ++i)
7511 x = XVECEXP (vals, 0, i);
7512 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
7513 ++n_var, one_var = i;
7515 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
7516 all_same = false;
7519 if (n_var == 0)
7521 rtx constant = aarch64_simd_make_constant (vals);
7522 if (constant != NULL_RTX)
7524 emit_move_insn (target, constant);
7525 return;
7529 /* Splat a single non-constant element if we can. */
7530 if (all_same)
7532 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
7533 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
7534 return;
7537 /* One field is non-constant. Load constant then overwrite varying
7538 field. This is more efficient than using the stack. */
7539 if (n_var == 1)
7541 rtx copy = copy_rtx (vals);
7542 rtx index = GEN_INT (one_var);
7543 enum insn_code icode;
7545 /* Load constant part of vector, substitute neighboring value for
7546 varying element. */
7547 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
7548 aarch64_expand_vector_init (target, copy);
7550 /* Insert variable. */
7551 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
7552 icode = optab_handler (vec_set_optab, mode);
7553 gcc_assert (icode != CODE_FOR_nothing);
7554 emit_insn (GEN_FCN (icode) (target, x, index));
7555 return;
7558 /* Construct the vector in memory one field at a time
7559 and load the whole vector. */
7560 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7561 for (i = 0; i < n_elts; i++)
7562 emit_move_insn (adjust_address_nv (mem, inner_mode,
7563 i * GET_MODE_SIZE (inner_mode)),
7564 XVECEXP (vals, 0, i));
7565 emit_move_insn (target, mem);
7569 static unsigned HOST_WIDE_INT
7570 aarch64_shift_truncation_mask (enum machine_mode mode)
7572 return
7573 (aarch64_vector_mode_supported_p (mode)
7574 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
7577 #ifndef TLS_SECTION_ASM_FLAG
7578 #define TLS_SECTION_ASM_FLAG 'T'
7579 #endif
7581 void
7582 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
7583 tree decl ATTRIBUTE_UNUSED)
7585 char flagchars[10], *f = flagchars;
7587 /* If we have already declared this section, we can use an
7588 abbreviated form to switch back to it -- unless this section is
7589 part of a COMDAT groups, in which case GAS requires the full
7590 declaration every time. */
7591 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7592 && (flags & SECTION_DECLARED))
7594 fprintf (asm_out_file, "\t.section\t%s\n", name);
7595 return;
7598 if (!(flags & SECTION_DEBUG))
7599 *f++ = 'a';
7600 if (flags & SECTION_WRITE)
7601 *f++ = 'w';
7602 if (flags & SECTION_CODE)
7603 *f++ = 'x';
7604 if (flags & SECTION_SMALL)
7605 *f++ = 's';
7606 if (flags & SECTION_MERGE)
7607 *f++ = 'M';
7608 if (flags & SECTION_STRINGS)
7609 *f++ = 'S';
7610 if (flags & SECTION_TLS)
7611 *f++ = TLS_SECTION_ASM_FLAG;
7612 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7613 *f++ = 'G';
7614 *f = '\0';
7616 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
7618 if (!(flags & SECTION_NOTYPE))
7620 const char *type;
7621 const char *format;
7623 if (flags & SECTION_BSS)
7624 type = "nobits";
7625 else
7626 type = "progbits";
7628 #ifdef TYPE_OPERAND_FMT
7629 format = "," TYPE_OPERAND_FMT;
7630 #else
7631 format = ",@%s";
7632 #endif
7634 fprintf (asm_out_file, format, type);
7636 if (flags & SECTION_ENTSIZE)
7637 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
7638 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
7640 if (TREE_CODE (decl) == IDENTIFIER_NODE)
7641 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
7642 else
7643 fprintf (asm_out_file, ",%s,comdat",
7644 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
7648 putc ('\n', asm_out_file);
7651 /* Select a format to encode pointers in exception handling data. */
7653 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
7655 int type;
7656 switch (aarch64_cmodel)
7658 case AARCH64_CMODEL_TINY:
7659 case AARCH64_CMODEL_TINY_PIC:
7660 case AARCH64_CMODEL_SMALL:
7661 case AARCH64_CMODEL_SMALL_PIC:
7662 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
7663 for everything. */
7664 type = DW_EH_PE_sdata4;
7665 break;
7666 default:
7667 /* No assumptions here. 8-byte relocs required. */
7668 type = DW_EH_PE_sdata8;
7669 break;
7671 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
7674 /* Emit load exclusive. */
7676 static void
7677 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
7678 rtx mem, rtx model_rtx)
7680 rtx (*gen) (rtx, rtx, rtx);
7682 switch (mode)
7684 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
7685 case HImode: gen = gen_aarch64_load_exclusivehi; break;
7686 case SImode: gen = gen_aarch64_load_exclusivesi; break;
7687 case DImode: gen = gen_aarch64_load_exclusivedi; break;
7688 default:
7689 gcc_unreachable ();
7692 emit_insn (gen (rval, mem, model_rtx));
7695 /* Emit store exclusive. */
7697 static void
7698 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
7699 rtx rval, rtx mem, rtx model_rtx)
7701 rtx (*gen) (rtx, rtx, rtx, rtx);
7703 switch (mode)
7705 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
7706 case HImode: gen = gen_aarch64_store_exclusivehi; break;
7707 case SImode: gen = gen_aarch64_store_exclusivesi; break;
7708 case DImode: gen = gen_aarch64_store_exclusivedi; break;
7709 default:
7710 gcc_unreachable ();
7713 emit_insn (gen (bval, rval, mem, model_rtx));
7716 /* Mark the previous jump instruction as unlikely. */
7718 static void
7719 aarch64_emit_unlikely_jump (rtx insn)
7721 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
7723 insn = emit_jump_insn (insn);
7724 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
7727 /* Expand a compare and swap pattern. */
7729 void
7730 aarch64_expand_compare_and_swap (rtx operands[])
7732 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
7733 enum machine_mode mode, cmp_mode;
7734 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
7736 bval = operands[0];
7737 rval = operands[1];
7738 mem = operands[2];
7739 oldval = operands[3];
7740 newval = operands[4];
7741 is_weak = operands[5];
7742 mod_s = operands[6];
7743 mod_f = operands[7];
7744 mode = GET_MODE (mem);
7745 cmp_mode = mode;
7747 /* Normally the succ memory model must be stronger than fail, but in the
7748 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
7749 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
7751 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
7752 && INTVAL (mod_s) == MEMMODEL_RELEASE)
7753 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
7755 switch (mode)
7757 case QImode:
7758 case HImode:
7759 /* For short modes, we're going to perform the comparison in SImode,
7760 so do the zero-extension now. */
7761 cmp_mode = SImode;
7762 rval = gen_reg_rtx (SImode);
7763 oldval = convert_modes (SImode, mode, oldval, true);
7764 /* Fall through. */
7766 case SImode:
7767 case DImode:
7768 /* Force the value into a register if needed. */
7769 if (!aarch64_plus_operand (oldval, mode))
7770 oldval = force_reg (cmp_mode, oldval);
7771 break;
7773 default:
7774 gcc_unreachable ();
7777 switch (mode)
7779 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
7780 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
7781 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
7782 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
7783 default:
7784 gcc_unreachable ();
7787 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
7789 if (mode == QImode || mode == HImode)
7790 emit_move_insn (operands[1], gen_lowpart (mode, rval));
7792 x = gen_rtx_REG (CCmode, CC_REGNUM);
7793 x = gen_rtx_EQ (SImode, x, const0_rtx);
7794 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
7797 /* Split a compare and swap pattern. */
7799 void
7800 aarch64_split_compare_and_swap (rtx operands[])
7802 rtx rval, mem, oldval, newval, scratch;
7803 enum machine_mode mode;
7804 bool is_weak;
7805 rtx label1, label2, x, cond;
7807 rval = operands[0];
7808 mem = operands[1];
7809 oldval = operands[2];
7810 newval = operands[3];
7811 is_weak = (operands[4] != const0_rtx);
7812 scratch = operands[7];
7813 mode = GET_MODE (mem);
7815 label1 = NULL_RTX;
7816 if (!is_weak)
7818 label1 = gen_label_rtx ();
7819 emit_label (label1);
7821 label2 = gen_label_rtx ();
7823 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
7825 cond = aarch64_gen_compare_reg (NE, rval, oldval);
7826 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7827 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7828 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
7829 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7831 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
7833 if (!is_weak)
7835 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
7836 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7837 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
7838 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7840 else
7842 cond = gen_rtx_REG (CCmode, CC_REGNUM);
7843 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
7844 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
7847 emit_label (label2);
7850 /* Split an atomic operation. */
7852 void
7853 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
7854 rtx value, rtx model_rtx, rtx cond)
7856 enum machine_mode mode = GET_MODE (mem);
7857 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
7858 rtx label, x;
7860 label = gen_label_rtx ();
7861 emit_label (label);
7863 if (new_out)
7864 new_out = gen_lowpart (wmode, new_out);
7865 if (old_out)
7866 old_out = gen_lowpart (wmode, old_out);
7867 else
7868 old_out = new_out;
7869 value = simplify_gen_subreg (wmode, value, mode, 0);
7871 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
7873 switch (code)
7875 case SET:
7876 new_out = value;
7877 break;
7879 case NOT:
7880 x = gen_rtx_AND (wmode, old_out, value);
7881 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7882 x = gen_rtx_NOT (wmode, new_out);
7883 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7884 break;
7886 case MINUS:
7887 if (CONST_INT_P (value))
7889 value = GEN_INT (-INTVAL (value));
7890 code = PLUS;
7892 /* Fall through. */
7894 default:
7895 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
7896 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
7897 break;
7900 aarch64_emit_store_exclusive (mode, cond, mem,
7901 gen_lowpart (mode, new_out), model_rtx);
7903 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
7904 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
7905 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
7906 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
7909 static void
7910 aarch64_print_extension (void)
7912 const struct aarch64_option_extension *opt = NULL;
7914 for (opt = all_extensions; opt->name != NULL; opt++)
7915 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
7916 asm_fprintf (asm_out_file, "+%s", opt->name);
7918 asm_fprintf (asm_out_file, "\n");
7921 static void
7922 aarch64_start_file (void)
7924 if (selected_arch)
7926 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
7927 aarch64_print_extension ();
7929 else if (selected_cpu)
7931 const char *truncated_name
7932 = aarch64_rewrite_selected_cpu (selected_cpu->name);
7933 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
7934 aarch64_print_extension ();
7936 default_file_start();
7939 /* Target hook for c_mode_for_suffix. */
7940 static enum machine_mode
7941 aarch64_c_mode_for_suffix (char suffix)
7943 if (suffix == 'q')
7944 return TFmode;
7946 return VOIDmode;
7949 /* We can only represent floating point constants which will fit in
7950 "quarter-precision" values. These values are characterised by
7951 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
7954 (-1)^s * (n/16) * 2^r
7956 Where:
7957 's' is the sign bit.
7958 'n' is an integer in the range 16 <= n <= 31.
7959 'r' is an integer in the range -3 <= r <= 4. */
7961 /* Return true iff X can be represented by a quarter-precision
7962 floating point immediate operand X. Note, we cannot represent 0.0. */
7963 bool
7964 aarch64_float_const_representable_p (rtx x)
7966 /* This represents our current view of how many bits
7967 make up the mantissa. */
7968 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
7969 int exponent;
7970 unsigned HOST_WIDE_INT mantissa, mask;
7971 REAL_VALUE_TYPE r, m;
7972 bool fail;
7974 if (!CONST_DOUBLE_P (x))
7975 return false;
7977 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7979 /* We cannot represent infinities, NaNs or +/-zero. We won't
7980 know if we have +zero until we analyse the mantissa, but we
7981 can reject the other invalid values. */
7982 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
7983 || REAL_VALUE_MINUS_ZERO (r))
7984 return false;
7986 /* Extract exponent. */
7987 r = real_value_abs (&r);
7988 exponent = REAL_EXP (&r);
7990 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7991 highest (sign) bit, with a fixed binary point at bit point_pos.
7992 m1 holds the low part of the mantissa, m2 the high part.
7993 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
7994 bits for the mantissa, this can fail (low bits will be lost). */
7995 real_ldexp (&m, &r, point_pos - exponent);
7996 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
7998 /* If the low part of the mantissa has bits set we cannot represent
7999 the value. */
8000 if (w.elt (0) != 0)
8001 return false;
8002 /* We have rejected the lower HOST_WIDE_INT, so update our
8003 understanding of how many bits lie in the mantissa and
8004 look only at the high HOST_WIDE_INT. */
8005 mantissa = w.elt (1);
8006 point_pos -= HOST_BITS_PER_WIDE_INT;
8008 /* We can only represent values with a mantissa of the form 1.xxxx. */
8009 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8010 if ((mantissa & mask) != 0)
8011 return false;
8013 /* Having filtered unrepresentable values, we may now remove all
8014 but the highest 5 bits. */
8015 mantissa >>= point_pos - 5;
8017 /* We cannot represent the value 0.0, so reject it. This is handled
8018 elsewhere. */
8019 if (mantissa == 0)
8020 return false;
8022 /* Then, as bit 4 is always set, we can mask it off, leaving
8023 the mantissa in the range [0, 15]. */
8024 mantissa &= ~(1 << 4);
8025 gcc_assert (mantissa <= 15);
8027 /* GCC internally does not use IEEE754-like encoding (where normalized
8028 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8029 Our mantissa values are shifted 4 places to the left relative to
8030 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8031 by 5 places to correct for GCC's representation. */
8032 exponent = 5 - exponent;
8034 return (exponent >= 0 && exponent <= 7);
8037 char*
8038 aarch64_output_simd_mov_immediate (rtx const_vector,
8039 enum machine_mode mode,
8040 unsigned width)
8042 bool is_valid;
8043 static char templ[40];
8044 const char *mnemonic;
8045 const char *shift_op;
8046 unsigned int lane_count = 0;
8047 char element_char;
8049 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8051 /* This will return true to show const_vector is legal for use as either
8052 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8053 also update INFO to show how the immediate should be generated. */
8054 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8055 gcc_assert (is_valid);
8057 element_char = sizetochar (info.element_width);
8058 lane_count = width / info.element_width;
8060 mode = GET_MODE_INNER (mode);
8061 if (mode == SFmode || mode == DFmode)
8063 gcc_assert (info.shift == 0 && ! info.mvn);
8064 if (aarch64_float_const_zero_rtx_p (info.value))
8065 info.value = GEN_INT (0);
8066 else
8068 #define buf_size 20
8069 REAL_VALUE_TYPE r;
8070 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8071 char float_buf[buf_size] = {'\0'};
8072 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8073 #undef buf_size
8075 if (lane_count == 1)
8076 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8077 else
8078 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8079 lane_count, element_char, float_buf);
8080 return templ;
8084 mnemonic = info.mvn ? "mvni" : "movi";
8085 shift_op = info.msl ? "msl" : "lsl";
8087 if (lane_count == 1)
8088 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8089 mnemonic, UINTVAL (info.value));
8090 else if (info.shift)
8091 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8092 ", %s %d", mnemonic, lane_count, element_char,
8093 UINTVAL (info.value), shift_op, info.shift);
8094 else
8095 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8096 mnemonic, lane_count, element_char, UINTVAL (info.value));
8097 return templ;
8100 char*
8101 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8102 enum machine_mode mode)
8104 enum machine_mode vmode;
8106 gcc_assert (!VECTOR_MODE_P (mode));
8107 vmode = aarch64_simd_container_mode (mode, 64);
8108 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8109 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8112 /* Split operands into moves from op[1] + op[2] into op[0]. */
8114 void
8115 aarch64_split_combinev16qi (rtx operands[3])
8117 unsigned int dest = REGNO (operands[0]);
8118 unsigned int src1 = REGNO (operands[1]);
8119 unsigned int src2 = REGNO (operands[2]);
8120 enum machine_mode halfmode = GET_MODE (operands[1]);
8121 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8122 rtx destlo, desthi;
8124 gcc_assert (halfmode == V16QImode);
8126 if (src1 == dest && src2 == dest + halfregs)
8128 /* No-op move. Can't split to nothing; emit something. */
8129 emit_note (NOTE_INSN_DELETED);
8130 return;
8133 /* Preserve register attributes for variable tracking. */
8134 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8135 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8136 GET_MODE_SIZE (halfmode));
8138 /* Special case of reversed high/low parts. */
8139 if (reg_overlap_mentioned_p (operands[2], destlo)
8140 && reg_overlap_mentioned_p (operands[1], desthi))
8142 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8143 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8144 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8146 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8148 /* Try to avoid unnecessary moves if part of the result
8149 is in the right place already. */
8150 if (src1 != dest)
8151 emit_move_insn (destlo, operands[1]);
8152 if (src2 != dest + halfregs)
8153 emit_move_insn (desthi, operands[2]);
8155 else
8157 if (src2 != dest + halfregs)
8158 emit_move_insn (desthi, operands[2]);
8159 if (src1 != dest)
8160 emit_move_insn (destlo, operands[1]);
8164 /* vec_perm support. */
8166 #define MAX_VECT_LEN 16
8168 struct expand_vec_perm_d
8170 rtx target, op0, op1;
8171 unsigned char perm[MAX_VECT_LEN];
8172 enum machine_mode vmode;
8173 unsigned char nelt;
8174 bool one_vector_p;
8175 bool testing_p;
8178 /* Generate a variable permutation. */
8180 static void
8181 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8183 enum machine_mode vmode = GET_MODE (target);
8184 bool one_vector_p = rtx_equal_p (op0, op1);
8186 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8187 gcc_checking_assert (GET_MODE (op0) == vmode);
8188 gcc_checking_assert (GET_MODE (op1) == vmode);
8189 gcc_checking_assert (GET_MODE (sel) == vmode);
8190 gcc_checking_assert (TARGET_SIMD);
8192 if (one_vector_p)
8194 if (vmode == V8QImode)
8196 /* Expand the argument to a V16QI mode by duplicating it. */
8197 rtx pair = gen_reg_rtx (V16QImode);
8198 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8199 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8201 else
8203 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8206 else
8208 rtx pair;
8210 if (vmode == V8QImode)
8212 pair = gen_reg_rtx (V16QImode);
8213 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8214 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8216 else
8218 pair = gen_reg_rtx (OImode);
8219 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8220 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8225 void
8226 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8228 enum machine_mode vmode = GET_MODE (target);
8229 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
8230 bool one_vector_p = rtx_equal_p (op0, op1);
8231 rtx rmask[MAX_VECT_LEN], mask;
8233 gcc_checking_assert (!BYTES_BIG_ENDIAN);
8235 /* The TBL instruction does not use a modulo index, so we must take care
8236 of that ourselves. */
8237 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
8238 for (i = 0; i < nelt; ++i)
8239 rmask[i] = mask;
8240 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
8241 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8243 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8246 /* Recognize patterns suitable for the TRN instructions. */
8247 static bool
8248 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8250 unsigned int i, odd, mask, nelt = d->nelt;
8251 rtx out, in0, in1, x;
8252 rtx (*gen) (rtx, rtx, rtx);
8253 enum machine_mode vmode = d->vmode;
8255 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8256 return false;
8258 /* Note that these are little-endian tests.
8259 We correct for big-endian later. */
8260 if (d->perm[0] == 0)
8261 odd = 0;
8262 else if (d->perm[0] == 1)
8263 odd = 1;
8264 else
8265 return false;
8266 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8268 for (i = 0; i < nelt; i += 2)
8270 if (d->perm[i] != i + odd)
8271 return false;
8272 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8273 return false;
8276 /* Success! */
8277 if (d->testing_p)
8278 return true;
8280 in0 = d->op0;
8281 in1 = d->op1;
8282 if (BYTES_BIG_ENDIAN)
8284 x = in0, in0 = in1, in1 = x;
8285 odd = !odd;
8287 out = d->target;
8289 if (odd)
8291 switch (vmode)
8293 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8294 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8295 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8296 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8297 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8298 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8299 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8300 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8301 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8302 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8303 default:
8304 return false;
8307 else
8309 switch (vmode)
8311 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8312 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8313 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8314 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8315 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8316 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8317 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8318 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8319 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8320 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8321 default:
8322 return false;
8326 emit_insn (gen (out, in0, in1));
8327 return true;
8330 /* Recognize patterns suitable for the UZP instructions. */
8331 static bool
8332 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8334 unsigned int i, odd, mask, nelt = d->nelt;
8335 rtx out, in0, in1, x;
8336 rtx (*gen) (rtx, rtx, rtx);
8337 enum machine_mode vmode = d->vmode;
8339 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8340 return false;
8342 /* Note that these are little-endian tests.
8343 We correct for big-endian later. */
8344 if (d->perm[0] == 0)
8345 odd = 0;
8346 else if (d->perm[0] == 1)
8347 odd = 1;
8348 else
8349 return false;
8350 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8352 for (i = 0; i < nelt; i++)
8354 unsigned elt = (i * 2 + odd) & mask;
8355 if (d->perm[i] != elt)
8356 return false;
8359 /* Success! */
8360 if (d->testing_p)
8361 return true;
8363 in0 = d->op0;
8364 in1 = d->op1;
8365 if (BYTES_BIG_ENDIAN)
8367 x = in0, in0 = in1, in1 = x;
8368 odd = !odd;
8370 out = d->target;
8372 if (odd)
8374 switch (vmode)
8376 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
8377 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
8378 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
8379 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
8380 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
8381 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
8382 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
8383 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
8384 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
8385 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
8386 default:
8387 return false;
8390 else
8392 switch (vmode)
8394 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
8395 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
8396 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
8397 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
8398 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
8399 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
8400 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
8401 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
8402 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
8403 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
8404 default:
8405 return false;
8409 emit_insn (gen (out, in0, in1));
8410 return true;
8413 /* Recognize patterns suitable for the ZIP instructions. */
8414 static bool
8415 aarch64_evpc_zip (struct expand_vec_perm_d *d)
8417 unsigned int i, high, mask, nelt = d->nelt;
8418 rtx out, in0, in1, x;
8419 rtx (*gen) (rtx, rtx, rtx);
8420 enum machine_mode vmode = d->vmode;
8422 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8423 return false;
8425 /* Note that these are little-endian tests.
8426 We correct for big-endian later. */
8427 high = nelt / 2;
8428 if (d->perm[0] == high)
8429 /* Do Nothing. */
8431 else if (d->perm[0] == 0)
8432 high = 0;
8433 else
8434 return false;
8435 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8437 for (i = 0; i < nelt / 2; i++)
8439 unsigned elt = (i + high) & mask;
8440 if (d->perm[i * 2] != elt)
8441 return false;
8442 elt = (elt + nelt) & mask;
8443 if (d->perm[i * 2 + 1] != elt)
8444 return false;
8447 /* Success! */
8448 if (d->testing_p)
8449 return true;
8451 in0 = d->op0;
8452 in1 = d->op1;
8453 if (BYTES_BIG_ENDIAN)
8455 x = in0, in0 = in1, in1 = x;
8456 high = !high;
8458 out = d->target;
8460 if (high)
8462 switch (vmode)
8464 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
8465 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
8466 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
8467 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
8468 case V4SImode: gen = gen_aarch64_zip2v4si; break;
8469 case V2SImode: gen = gen_aarch64_zip2v2si; break;
8470 case V2DImode: gen = gen_aarch64_zip2v2di; break;
8471 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
8472 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
8473 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
8474 default:
8475 return false;
8478 else
8480 switch (vmode)
8482 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
8483 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
8484 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
8485 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
8486 case V4SImode: gen = gen_aarch64_zip1v4si; break;
8487 case V2SImode: gen = gen_aarch64_zip1v2si; break;
8488 case V2DImode: gen = gen_aarch64_zip1v2di; break;
8489 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
8490 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
8491 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
8492 default:
8493 return false;
8497 emit_insn (gen (out, in0, in1));
8498 return true;
8501 static bool
8502 aarch64_evpc_dup (struct expand_vec_perm_d *d)
8504 rtx (*gen) (rtx, rtx, rtx);
8505 rtx out = d->target;
8506 rtx in0;
8507 enum machine_mode vmode = d->vmode;
8508 unsigned int i, elt, nelt = d->nelt;
8509 rtx lane;
8511 /* TODO: This may not be big-endian safe. */
8512 if (BYTES_BIG_ENDIAN)
8513 return false;
8515 elt = d->perm[0];
8516 for (i = 1; i < nelt; i++)
8518 if (elt != d->perm[i])
8519 return false;
8522 /* The generic preparation in aarch64_expand_vec_perm_const_1
8523 swaps the operand order and the permute indices if it finds
8524 d->perm[0] to be in the second operand. Thus, we can always
8525 use d->op0 and need not do any extra arithmetic to get the
8526 correct lane number. */
8527 in0 = d->op0;
8528 lane = GEN_INT (elt);
8530 switch (vmode)
8532 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
8533 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
8534 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
8535 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
8536 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
8537 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
8538 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
8539 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
8540 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
8541 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
8542 default:
8543 return false;
8546 emit_insn (gen (out, in0, lane));
8547 return true;
8550 static bool
8551 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
8553 rtx rperm[MAX_VECT_LEN], sel;
8554 enum machine_mode vmode = d->vmode;
8555 unsigned int i, nelt = d->nelt;
8557 if (d->testing_p)
8558 return true;
8560 /* Generic code will try constant permutation twice. Once with the
8561 original mode and again with the elements lowered to QImode.
8562 So wait and don't do the selector expansion ourselves. */
8563 if (vmode != V8QImode && vmode != V16QImode)
8564 return false;
8566 for (i = 0; i < nelt; ++i)
8568 int nunits = GET_MODE_NUNITS (vmode);
8570 /* If big-endian and two vectors we end up with a weird mixed-endian
8571 mode on NEON. Reverse the index within each word but not the word
8572 itself. */
8573 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
8574 : d->perm[i]);
8576 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
8577 sel = force_reg (vmode, sel);
8579 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
8580 return true;
8583 static bool
8584 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
8586 /* The pattern matching functions above are written to look for a small
8587 number to begin the sequence (0, 1, N/2). If we begin with an index
8588 from the second operand, we can swap the operands. */
8589 if (d->perm[0] >= d->nelt)
8591 unsigned i, nelt = d->nelt;
8592 rtx x;
8594 gcc_assert (nelt == (nelt & -nelt));
8595 for (i = 0; i < nelt; ++i)
8596 d->perm[i] ^= nelt; /* Keep the same index, but in the other vector. */
8598 x = d->op0;
8599 d->op0 = d->op1;
8600 d->op1 = x;
8603 if (TARGET_SIMD)
8605 if (aarch64_evpc_zip (d))
8606 return true;
8607 else if (aarch64_evpc_uzp (d))
8608 return true;
8609 else if (aarch64_evpc_trn (d))
8610 return true;
8611 else if (aarch64_evpc_dup (d))
8612 return true;
8613 return aarch64_evpc_tbl (d);
8615 return false;
8618 /* Expand a vec_perm_const pattern. */
8620 bool
8621 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
8623 struct expand_vec_perm_d d;
8624 int i, nelt, which;
8626 d.target = target;
8627 d.op0 = op0;
8628 d.op1 = op1;
8630 d.vmode = GET_MODE (target);
8631 gcc_assert (VECTOR_MODE_P (d.vmode));
8632 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8633 d.testing_p = false;
8635 for (i = which = 0; i < nelt; ++i)
8637 rtx e = XVECEXP (sel, 0, i);
8638 int ei = INTVAL (e) & (2 * nelt - 1);
8639 which |= (ei < nelt ? 1 : 2);
8640 d.perm[i] = ei;
8643 switch (which)
8645 default:
8646 gcc_unreachable ();
8648 case 3:
8649 d.one_vector_p = false;
8650 if (!rtx_equal_p (op0, op1))
8651 break;
8653 /* The elements of PERM do not suggest that only the first operand
8654 is used, but both operands are identical. Allow easier matching
8655 of the permutation by folding the permutation into the single
8656 input vector. */
8657 /* Fall Through. */
8658 case 2:
8659 for (i = 0; i < nelt; ++i)
8660 d.perm[i] &= nelt - 1;
8661 d.op0 = op1;
8662 d.one_vector_p = true;
8663 break;
8665 case 1:
8666 d.op1 = op0;
8667 d.one_vector_p = true;
8668 break;
8671 return aarch64_expand_vec_perm_const_1 (&d);
8674 static bool
8675 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
8676 const unsigned char *sel)
8678 struct expand_vec_perm_d d;
8679 unsigned int i, nelt, which;
8680 bool ret;
8682 d.vmode = vmode;
8683 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
8684 d.testing_p = true;
8685 memcpy (d.perm, sel, nelt);
8687 /* Calculate whether all elements are in one vector. */
8688 for (i = which = 0; i < nelt; ++i)
8690 unsigned char e = d.perm[i];
8691 gcc_assert (e < 2 * nelt);
8692 which |= (e < nelt ? 1 : 2);
8695 /* If all elements are from the second vector, reindex as if from the
8696 first vector. */
8697 if (which == 2)
8698 for (i = 0; i < nelt; ++i)
8699 d.perm[i] -= nelt;
8701 /* Check whether the mask can be applied to a single vector. */
8702 d.one_vector_p = (which != 3);
8704 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
8705 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
8706 if (!d.one_vector_p)
8707 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
8709 start_sequence ();
8710 ret = aarch64_expand_vec_perm_const_1 (&d);
8711 end_sequence ();
8713 return ret;
8716 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
8717 bool
8718 aarch64_cannot_change_mode_class (enum machine_mode from,
8719 enum machine_mode to,
8720 enum reg_class rclass)
8722 /* Full-reg subregs are allowed on general regs or any class if they are
8723 the same size. */
8724 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
8725 || !reg_classes_intersect_p (FP_REGS, rclass))
8726 return false;
8728 /* Limited combinations of subregs are safe on FPREGs. Particularly,
8729 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
8730 2. Scalar to Scalar for integer modes or same size float modes.
8731 3. Vector to Vector modes.
8732 4. On little-endian only, Vector-Structure to Vector modes. */
8733 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
8735 if (aarch64_vector_mode_supported_p (from)
8736 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
8737 return false;
8739 if (GET_MODE_NUNITS (from) == 1
8740 && GET_MODE_NUNITS (to) == 1
8741 && (GET_MODE_CLASS (from) == MODE_INT
8742 || from == to))
8743 return false;
8745 if (aarch64_vector_mode_supported_p (from)
8746 && aarch64_vector_mode_supported_p (to))
8747 return false;
8749 /* Within an vector structure straddling multiple vector registers
8750 we are in a mixed-endian representation. As such, we can't
8751 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
8752 switch between vectors and vector structures cheaply. */
8753 if (!BYTES_BIG_ENDIAN)
8754 if ((aarch64_vector_mode_supported_p (from)
8755 && aarch64_vect_struct_mode_p (to))
8756 || (aarch64_vector_mode_supported_p (to)
8757 && aarch64_vect_struct_mode_p (from)))
8758 return false;
8761 return true;
8764 /* Implement MODES_TIEABLE_P. */
8766 bool
8767 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
8769 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
8770 return true;
8772 /* We specifically want to allow elements of "structure" modes to
8773 be tieable to the structure. This more general condition allows
8774 other rarer situations too. */
8775 if (TARGET_SIMD
8776 && aarch64_vector_mode_p (mode1)
8777 && aarch64_vector_mode_p (mode2))
8778 return true;
8780 return false;
8783 #undef TARGET_ADDRESS_COST
8784 #define TARGET_ADDRESS_COST aarch64_address_cost
8786 /* This hook will determines whether unnamed bitfields affect the alignment
8787 of the containing structure. The hook returns true if the structure
8788 should inherit the alignment requirements of an unnamed bitfield's
8789 type. */
8790 #undef TARGET_ALIGN_ANON_BITFIELD
8791 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
8793 #undef TARGET_ASM_ALIGNED_DI_OP
8794 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
8796 #undef TARGET_ASM_ALIGNED_HI_OP
8797 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
8799 #undef TARGET_ASM_ALIGNED_SI_OP
8800 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
8802 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
8803 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
8804 hook_bool_const_tree_hwi_hwi_const_tree_true
8806 #undef TARGET_ASM_FILE_START
8807 #define TARGET_ASM_FILE_START aarch64_start_file
8809 #undef TARGET_ASM_OUTPUT_MI_THUNK
8810 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
8812 #undef TARGET_ASM_SELECT_RTX_SECTION
8813 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
8815 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
8816 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
8818 #undef TARGET_BUILD_BUILTIN_VA_LIST
8819 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
8821 #undef TARGET_CALLEE_COPIES
8822 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
8824 #undef TARGET_CAN_ELIMINATE
8825 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
8827 #undef TARGET_CANNOT_FORCE_CONST_MEM
8828 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
8830 #undef TARGET_CONDITIONAL_REGISTER_USAGE
8831 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
8833 /* Only the least significant bit is used for initialization guard
8834 variables. */
8835 #undef TARGET_CXX_GUARD_MASK_BIT
8836 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
8838 #undef TARGET_C_MODE_FOR_SUFFIX
8839 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
8841 #ifdef TARGET_BIG_ENDIAN_DEFAULT
8842 #undef TARGET_DEFAULT_TARGET_FLAGS
8843 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
8844 #endif
8846 #undef TARGET_CLASS_MAX_NREGS
8847 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
8849 #undef TARGET_BUILTIN_DECL
8850 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
8852 #undef TARGET_EXPAND_BUILTIN
8853 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
8855 #undef TARGET_EXPAND_BUILTIN_VA_START
8856 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
8858 #undef TARGET_FOLD_BUILTIN
8859 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
8861 #undef TARGET_FUNCTION_ARG
8862 #define TARGET_FUNCTION_ARG aarch64_function_arg
8864 #undef TARGET_FUNCTION_ARG_ADVANCE
8865 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
8867 #undef TARGET_FUNCTION_ARG_BOUNDARY
8868 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
8870 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
8871 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
8873 #undef TARGET_FUNCTION_VALUE
8874 #define TARGET_FUNCTION_VALUE aarch64_function_value
8876 #undef TARGET_FUNCTION_VALUE_REGNO_P
8877 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
8879 #undef TARGET_FRAME_POINTER_REQUIRED
8880 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
8882 #undef TARGET_GIMPLE_FOLD_BUILTIN
8883 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
8885 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
8886 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
8888 #undef TARGET_INIT_BUILTINS
8889 #define TARGET_INIT_BUILTINS aarch64_init_builtins
8891 #undef TARGET_LEGITIMATE_ADDRESS_P
8892 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
8894 #undef TARGET_LEGITIMATE_CONSTANT_P
8895 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
8897 #undef TARGET_LIBGCC_CMP_RETURN_MODE
8898 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
8900 #undef TARGET_LRA_P
8901 #define TARGET_LRA_P aarch64_lra_p
8903 #undef TARGET_MANGLE_TYPE
8904 #define TARGET_MANGLE_TYPE aarch64_mangle_type
8906 #undef TARGET_MEMORY_MOVE_COST
8907 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
8909 #undef TARGET_MUST_PASS_IN_STACK
8910 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
8912 /* This target hook should return true if accesses to volatile bitfields
8913 should use the narrowest mode possible. It should return false if these
8914 accesses should use the bitfield container type. */
8915 #undef TARGET_NARROW_VOLATILE_BITFIELD
8916 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
8918 #undef TARGET_OPTION_OVERRIDE
8919 #define TARGET_OPTION_OVERRIDE aarch64_override_options
8921 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
8922 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
8923 aarch64_override_options_after_change
8925 #undef TARGET_PASS_BY_REFERENCE
8926 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
8928 #undef TARGET_PREFERRED_RELOAD_CLASS
8929 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
8931 #undef TARGET_SECONDARY_RELOAD
8932 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
8934 #undef TARGET_SHIFT_TRUNCATION_MASK
8935 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
8937 #undef TARGET_SETUP_INCOMING_VARARGS
8938 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
8940 #undef TARGET_STRUCT_VALUE_RTX
8941 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
8943 #undef TARGET_REGISTER_MOVE_COST
8944 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
8946 #undef TARGET_RETURN_IN_MEMORY
8947 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
8949 #undef TARGET_RETURN_IN_MSB
8950 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
8952 #undef TARGET_RTX_COSTS
8953 #define TARGET_RTX_COSTS aarch64_rtx_costs
8955 #undef TARGET_SCHED_ISSUE_RATE
8956 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
8958 #undef TARGET_TRAMPOLINE_INIT
8959 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
8961 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
8962 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
8964 #undef TARGET_VECTOR_MODE_SUPPORTED_P
8965 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
8967 #undef TARGET_ARRAY_MODE_SUPPORTED_P
8968 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
8970 #undef TARGET_VECTORIZE_ADD_STMT_COST
8971 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
8973 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
8974 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
8975 aarch64_builtin_vectorization_cost
8977 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
8978 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
8980 #undef TARGET_VECTORIZE_BUILTINS
8981 #define TARGET_VECTORIZE_BUILTINS
8983 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
8984 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
8985 aarch64_builtin_vectorized_function
8987 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
8988 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
8989 aarch64_autovectorize_vector_sizes
8991 /* Section anchor support. */
8993 #undef TARGET_MIN_ANCHOR_OFFSET
8994 #define TARGET_MIN_ANCHOR_OFFSET -256
8996 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
8997 byte offset; we can do much more for larger data types, but have no way
8998 to determine the size of the access. We assume accesses are aligned. */
8999 #undef TARGET_MAX_ANCHOR_OFFSET
9000 #define TARGET_MAX_ANCHOR_OFFSET 4095
9002 #undef TARGET_VECTOR_ALIGNMENT
9003 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9005 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9006 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9007 aarch64_simd_vector_alignment_reachable
9009 /* vec_perm support. */
9011 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9012 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9013 aarch64_vectorize_vec_perm_const_ok
9016 #undef TARGET_FIXED_CONDITION_CODE_REGS
9017 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9019 #undef TARGET_FLAGS_REGNUM
9020 #define TARGET_FLAGS_REGNUM CC_REGNUM
9022 struct gcc_target targetm = TARGET_INITIALIZER;
9024 #include "gt-aarch64.h"