2014-08-11 Yvan Roux <yvan.roux@linaro.org>
[official-gcc.git] / gcc-4_9-branch / gcc / config / aarch64 / aarch64.c
blob91821ed7ce2a298663b6574d34878d3e22370c74
1 /* Machine description for AArch64 architecture.
2 Copyright (C) 2009-2014 Free Software Foundation, Inc.
3 Contributed by ARM Ltd.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "insn-codes.h"
26 #include "rtl.h"
27 #include "insn-attr.h"
28 #include "tree.h"
29 #include "stringpool.h"
30 #include "stor-layout.h"
31 #include "calls.h"
32 #include "varasm.h"
33 #include "regs.h"
34 #include "df.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "expr.h"
38 #include "reload.h"
39 #include "toplev.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "targhooks.h"
43 #include "ggc.h"
44 #include "function.h"
45 #include "tm_p.h"
46 #include "recog.h"
47 #include "langhooks.h"
48 #include "diagnostic-core.h"
49 #include "pointer-set.h"
50 #include "hash-table.h"
51 #include "vec.h"
52 #include "basic-block.h"
53 #include "tree-ssa-alias.h"
54 #include "internal-fn.h"
55 #include "gimple-fold.h"
56 #include "tree-eh.h"
57 #include "gimple-expr.h"
58 #include "is-a.h"
59 #include "gimple.h"
60 #include "gimplify.h"
61 #include "optabs.h"
62 #include "dwarf2.h"
63 #include "cfgloop.h"
64 #include "tree-vectorizer.h"
65 #include "config/arm/aarch-cost-tables.h"
66 #include "dumpfile.h"
68 /* Defined for convenience. */
69 #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
71 /* Classifies an address.
73 ADDRESS_REG_IMM
74 A simple base register plus immediate offset.
76 ADDRESS_REG_WB
77 A base register indexed by immediate offset with writeback.
79 ADDRESS_REG_REG
80 A base register indexed by (optionally scaled) register.
82 ADDRESS_REG_UXTW
83 A base register indexed by (optionally scaled) zero-extended register.
85 ADDRESS_REG_SXTW
86 A base register indexed by (optionally scaled) sign-extended register.
88 ADDRESS_LO_SUM
89 A LO_SUM rtx with a base register and "LO12" symbol relocation.
91 ADDRESS_SYMBOLIC:
92 A constant symbolic address, in pc-relative literal pool. */
94 enum aarch64_address_type {
95 ADDRESS_REG_IMM,
96 ADDRESS_REG_WB,
97 ADDRESS_REG_REG,
98 ADDRESS_REG_UXTW,
99 ADDRESS_REG_SXTW,
100 ADDRESS_LO_SUM,
101 ADDRESS_SYMBOLIC
104 struct aarch64_address_info {
105 enum aarch64_address_type type;
106 rtx base;
107 rtx offset;
108 int shift;
109 enum aarch64_symbol_type symbol_type;
112 struct simd_immediate_info
114 rtx value;
115 int shift;
116 int element_width;
117 bool mvn;
118 bool msl;
121 /* The current code model. */
122 enum aarch64_code_model aarch64_cmodel;
124 #ifdef HAVE_AS_TLS
125 #undef TARGET_HAVE_TLS
126 #define TARGET_HAVE_TLS 1
127 #endif
129 static bool aarch64_lra_p (void);
130 static bool aarch64_composite_type_p (const_tree, enum machine_mode);
131 static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
132 const_tree,
133 enum machine_mode *, int *,
134 bool *);
135 static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
136 static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
137 static void aarch64_override_options_after_change (void);
138 static bool aarch64_vector_mode_supported_p (enum machine_mode);
139 static unsigned bit_count (unsigned HOST_WIDE_INT);
140 static bool aarch64_const_vec_all_same_int_p (rtx,
141 HOST_WIDE_INT, HOST_WIDE_INT);
143 static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
144 const unsigned char *sel);
145 static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
147 /* The processor for which instructions should be scheduled. */
148 enum aarch64_processor aarch64_tune = cortexa53;
150 /* The current tuning set. */
151 const struct tune_params *aarch64_tune_params;
153 /* Mask to specify which instructions we are allowed to generate. */
154 unsigned long aarch64_isa_flags = 0;
156 /* Mask to specify which instruction scheduling options should be used. */
157 unsigned long aarch64_tune_flags = 0;
159 /* Tuning parameters. */
161 #if HAVE_DESIGNATED_INITIALIZERS
162 #define NAMED_PARAM(NAME, VAL) .NAME = (VAL)
163 #else
164 #define NAMED_PARAM(NAME, VAL) (VAL)
165 #endif
167 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
168 __extension__
169 #endif
171 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
172 __extension__
173 #endif
174 static const struct cpu_addrcost_table generic_addrcost_table =
176 #if HAVE_DESIGNATED_INITIALIZERS
177 .addr_scale_costs =
178 #endif
180 NAMED_PARAM (qi, 0),
181 NAMED_PARAM (hi, 0),
182 NAMED_PARAM (si, 0),
183 NAMED_PARAM (ti, 0),
185 NAMED_PARAM (pre_modify, 0),
186 NAMED_PARAM (post_modify, 0),
187 NAMED_PARAM (register_offset, 0),
188 NAMED_PARAM (register_extend, 0),
189 NAMED_PARAM (imm_offset, 0)
192 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
193 __extension__
194 #endif
195 static const struct cpu_addrcost_table cortexa57_addrcost_table =
197 #if HAVE_DESIGNATED_INITIALIZERS
198 .addr_scale_costs =
199 #endif
201 NAMED_PARAM (qi, 0),
202 NAMED_PARAM (hi, 1),
203 NAMED_PARAM (si, 0),
204 NAMED_PARAM (ti, 1),
206 NAMED_PARAM (pre_modify, 0),
207 NAMED_PARAM (post_modify, 0),
208 NAMED_PARAM (register_offset, 0),
209 NAMED_PARAM (register_extend, 0),
210 NAMED_PARAM (imm_offset, 0),
213 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
214 __extension__
215 #endif
216 static const struct cpu_regmove_cost generic_regmove_cost =
218 NAMED_PARAM (GP2GP, 1),
219 NAMED_PARAM (GP2FP, 2),
220 NAMED_PARAM (FP2GP, 2),
221 /* We currently do not provide direct support for TFmode Q->Q move.
222 Therefore we need to raise the cost above 2 in order to have
223 reload handle the situation. */
224 NAMED_PARAM (FP2FP, 4)
227 /* Generic costs for vector insn classes. */
228 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
229 __extension__
230 #endif
231 static const struct cpu_vector_cost generic_vector_cost =
233 NAMED_PARAM (scalar_stmt_cost, 1),
234 NAMED_PARAM (scalar_load_cost, 1),
235 NAMED_PARAM (scalar_store_cost, 1),
236 NAMED_PARAM (vec_stmt_cost, 1),
237 NAMED_PARAM (vec_to_scalar_cost, 1),
238 NAMED_PARAM (scalar_to_vec_cost, 1),
239 NAMED_PARAM (vec_align_load_cost, 1),
240 NAMED_PARAM (vec_unalign_load_cost, 1),
241 NAMED_PARAM (vec_unalign_store_cost, 1),
242 NAMED_PARAM (vec_store_cost, 1),
243 NAMED_PARAM (cond_taken_branch_cost, 3),
244 NAMED_PARAM (cond_not_taken_branch_cost, 1)
247 /* Generic costs for vector insn classes. */
248 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
249 __extension__
250 #endif
251 static const struct cpu_vector_cost cortexa57_vector_cost =
253 NAMED_PARAM (scalar_stmt_cost, 1),
254 NAMED_PARAM (scalar_load_cost, 4),
255 NAMED_PARAM (scalar_store_cost, 1),
256 NAMED_PARAM (vec_stmt_cost, 3),
257 NAMED_PARAM (vec_to_scalar_cost, 8),
258 NAMED_PARAM (scalar_to_vec_cost, 8),
259 NAMED_PARAM (vec_align_load_cost, 5),
260 NAMED_PARAM (vec_unalign_load_cost, 5),
261 NAMED_PARAM (vec_unalign_store_cost, 1),
262 NAMED_PARAM (vec_store_cost, 1),
263 NAMED_PARAM (cond_taken_branch_cost, 1),
264 NAMED_PARAM (cond_not_taken_branch_cost, 1)
267 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
268 __extension__
269 #endif
270 static const struct tune_params generic_tunings =
272 &cortexa57_extra_costs,
273 &generic_addrcost_table,
274 &generic_regmove_cost,
275 &generic_vector_cost,
276 NAMED_PARAM (memmov_cost, 4),
277 NAMED_PARAM (issue_rate, 2)
280 static const struct tune_params cortexa53_tunings =
282 &cortexa53_extra_costs,
283 &generic_addrcost_table,
284 &generic_regmove_cost,
285 &generic_vector_cost,
286 NAMED_PARAM (memmov_cost, 4),
287 NAMED_PARAM (issue_rate, 2)
290 static const struct tune_params cortexa57_tunings =
292 &cortexa57_extra_costs,
293 &cortexa57_addrcost_table,
294 &generic_regmove_cost,
295 &cortexa57_vector_cost,
296 NAMED_PARAM (memmov_cost, 4),
297 NAMED_PARAM (issue_rate, 3)
300 /* A processor implementing AArch64. */
301 struct processor
303 const char *const name;
304 enum aarch64_processor core;
305 const char *arch;
306 const unsigned long flags;
307 const struct tune_params *const tune;
310 /* Processor cores implementing AArch64. */
311 static const struct processor all_cores[] =
313 #define AARCH64_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
314 {NAME, IDENT, #ARCH, FLAGS | AARCH64_FL_FOR_ARCH##ARCH, &COSTS##_tunings},
315 #include "aarch64-cores.def"
316 #undef AARCH64_CORE
317 {"generic", cortexa53, "8", AARCH64_FL_FPSIMD | AARCH64_FL_FOR_ARCH8, &generic_tunings},
318 {NULL, aarch64_none, NULL, 0, NULL}
321 /* Architectures implementing AArch64. */
322 static const struct processor all_architectures[] =
324 #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
325 {NAME, CORE, #ARCH, FLAGS, NULL},
326 #include "aarch64-arches.def"
327 #undef AARCH64_ARCH
328 {NULL, aarch64_none, NULL, 0, NULL}
331 /* Target specification. These are populated as commandline arguments
332 are processed, or NULL if not specified. */
333 static const struct processor *selected_arch;
334 static const struct processor *selected_cpu;
335 static const struct processor *selected_tune;
337 #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
339 /* An ISA extension in the co-processor and main instruction set space. */
340 struct aarch64_option_extension
342 const char *const name;
343 const unsigned long flags_on;
344 const unsigned long flags_off;
347 /* ISA extensions in AArch64. */
348 static const struct aarch64_option_extension all_extensions[] =
350 #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
351 {NAME, FLAGS_ON, FLAGS_OFF},
352 #include "aarch64-option-extensions.def"
353 #undef AARCH64_OPT_EXTENSION
354 {NULL, 0, 0}
357 /* Used to track the size of an address when generating a pre/post
358 increment address. */
359 static enum machine_mode aarch64_memory_reference_mode;
361 /* Used to force GTY into this file. */
362 static GTY(()) int gty_dummy;
364 /* A table of valid AArch64 "bitmask immediate" values for
365 logical instructions. */
367 #define AARCH64_NUM_BITMASKS 5334
368 static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
370 typedef enum aarch64_cond_code
372 AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
373 AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
374 AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
376 aarch64_cc;
378 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
380 /* The condition codes of the processor, and the inverse function. */
381 static const char * const aarch64_condition_codes[] =
383 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
384 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
387 /* Provide a mapping from gcc register numbers to dwarf register numbers. */
388 unsigned
389 aarch64_dbx_register_number (unsigned regno)
391 if (GP_REGNUM_P (regno))
392 return AARCH64_DWARF_R0 + regno - R0_REGNUM;
393 else if (regno == SP_REGNUM)
394 return AARCH64_DWARF_SP;
395 else if (FP_REGNUM_P (regno))
396 return AARCH64_DWARF_V0 + regno - V0_REGNUM;
398 /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
399 equivalent DWARF register. */
400 return DWARF_FRAME_REGISTERS;
403 /* Return TRUE if MODE is any of the large INT modes. */
404 static bool
405 aarch64_vect_struct_mode_p (enum machine_mode mode)
407 return mode == OImode || mode == CImode || mode == XImode;
410 /* Return TRUE if MODE is any of the vector modes. */
411 static bool
412 aarch64_vector_mode_p (enum machine_mode mode)
414 return aarch64_vector_mode_supported_p (mode)
415 || aarch64_vect_struct_mode_p (mode);
418 /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
419 static bool
420 aarch64_array_mode_supported_p (enum machine_mode mode,
421 unsigned HOST_WIDE_INT nelems)
423 if (TARGET_SIMD
424 && AARCH64_VALID_SIMD_QREG_MODE (mode)
425 && (nelems >= 2 && nelems <= 4))
426 return true;
428 return false;
431 /* Implement HARD_REGNO_NREGS. */
434 aarch64_hard_regno_nregs (unsigned regno, enum machine_mode mode)
436 switch (aarch64_regno_regclass (regno))
438 case FP_REGS:
439 case FP_LO_REGS:
440 return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
441 default:
442 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
444 gcc_unreachable ();
447 /* Implement HARD_REGNO_MODE_OK. */
450 aarch64_hard_regno_mode_ok (unsigned regno, enum machine_mode mode)
452 if (GET_MODE_CLASS (mode) == MODE_CC)
453 return regno == CC_REGNUM;
455 if (regno == SP_REGNUM)
456 /* The purpose of comparing with ptr_mode is to support the
457 global register variable associated with the stack pointer
458 register via the syntax of asm ("wsp") in ILP32. */
459 return mode == Pmode || mode == ptr_mode;
461 if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
462 return mode == Pmode;
464 if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
465 return 1;
467 if (FP_REGNUM_P (regno))
469 if (aarch64_vect_struct_mode_p (mode))
470 return
471 (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
472 else
473 return 1;
476 return 0;
479 /* Implement HARD_REGNO_CALLER_SAVE_MODE. */
480 enum machine_mode
481 aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
482 enum machine_mode mode)
484 /* Handle modes that fit within single registers. */
485 if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
487 if (GET_MODE_SIZE (mode) >= 4)
488 return mode;
489 else
490 return SImode;
492 /* Fall back to generic for multi-reg and very large modes. */
493 else
494 return choose_hard_reg_mode (regno, nregs, false);
497 /* Return true if calls to DECL should be treated as
498 long-calls (ie called via a register). */
499 static bool
500 aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
502 return false;
505 /* Return true if calls to symbol-ref SYM should be treated as
506 long-calls (ie called via a register). */
507 bool
508 aarch64_is_long_call_p (rtx sym)
510 return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
513 /* Return true if the offsets to a zero/sign-extract operation
514 represent an expression that matches an extend operation. The
515 operands represent the paramters from
517 (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
518 bool
519 aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
520 rtx extract_imm)
522 HOST_WIDE_INT mult_val, extract_val;
524 if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
525 return false;
527 mult_val = INTVAL (mult_imm);
528 extract_val = INTVAL (extract_imm);
530 if (extract_val > 8
531 && extract_val < GET_MODE_BITSIZE (mode)
532 && exact_log2 (extract_val & ~7) > 0
533 && (extract_val & 7) <= 4
534 && mult_val == (1 << (extract_val & 7)))
535 return true;
537 return false;
540 /* Emit an insn that's a simple single-set. Both the operands must be
541 known to be valid. */
542 inline static rtx
543 emit_set_insn (rtx x, rtx y)
545 return emit_insn (gen_rtx_SET (VOIDmode, x, y));
548 /* X and Y are two things to compare using CODE. Emit the compare insn and
549 return the rtx for register 0 in the proper mode. */
551 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
553 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
554 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
556 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
557 return cc_reg;
560 /* Build the SYMBOL_REF for __tls_get_addr. */
562 static GTY(()) rtx tls_get_addr_libfunc;
565 aarch64_tls_get_addr (void)
567 if (!tls_get_addr_libfunc)
568 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
569 return tls_get_addr_libfunc;
572 /* Return the TLS model to use for ADDR. */
574 static enum tls_model
575 tls_symbolic_operand_type (rtx addr)
577 enum tls_model tls_kind = TLS_MODEL_NONE;
578 rtx sym, addend;
580 if (GET_CODE (addr) == CONST)
582 split_const (addr, &sym, &addend);
583 if (GET_CODE (sym) == SYMBOL_REF)
584 tls_kind = SYMBOL_REF_TLS_MODEL (sym);
586 else if (GET_CODE (addr) == SYMBOL_REF)
587 tls_kind = SYMBOL_REF_TLS_MODEL (addr);
589 return tls_kind;
592 /* We'll allow lo_sum's in addresses in our legitimate addresses
593 so that combine would take care of combining addresses where
594 necessary, but for generation purposes, we'll generate the address
595 as :
596 RTL Absolute
597 tmp = hi (symbol_ref); adrp x1, foo
598 dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
601 PIC TLS
602 adrp x1, :got:foo adrp tmp, :tlsgd:foo
603 ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
604 bl __tls_get_addr
607 Load TLS symbol, depending on TLS mechanism and TLS access model.
609 Global Dynamic - Traditional TLS:
610 adrp tmp, :tlsgd:imm
611 add dest, tmp, #:tlsgd_lo12:imm
612 bl __tls_get_addr
614 Global Dynamic - TLS Descriptors:
615 adrp dest, :tlsdesc:imm
616 ldr tmp, [dest, #:tlsdesc_lo12:imm]
617 add dest, dest, #:tlsdesc_lo12:imm
618 blr tmp
619 mrs tp, tpidr_el0
620 add dest, dest, tp
622 Initial Exec:
623 mrs tp, tpidr_el0
624 adrp tmp, :gottprel:imm
625 ldr dest, [tmp, #:gottprel_lo12:imm]
626 add dest, dest, tp
628 Local Exec:
629 mrs tp, tpidr_el0
630 add t0, tp, #:tprel_hi12:imm
631 add t0, #:tprel_lo12_nc:imm
634 static void
635 aarch64_load_symref_appropriately (rtx dest, rtx imm,
636 enum aarch64_symbol_type type)
638 switch (type)
640 case SYMBOL_SMALL_ABSOLUTE:
642 /* In ILP32, the mode of dest can be either SImode or DImode. */
643 rtx tmp_reg = dest;
644 enum machine_mode mode = GET_MODE (dest);
646 gcc_assert (mode == Pmode || mode == ptr_mode);
648 if (can_create_pseudo_p ())
649 tmp_reg = gen_reg_rtx (mode);
651 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
652 emit_insn (gen_add_losym (dest, tmp_reg, imm));
653 return;
656 case SYMBOL_TINY_ABSOLUTE:
657 emit_insn (gen_rtx_SET (Pmode, dest, imm));
658 return;
660 case SYMBOL_SMALL_GOT:
662 /* In ILP32, the mode of dest can be either SImode or DImode,
663 while the got entry is always of SImode size. The mode of
664 dest depends on how dest is used: if dest is assigned to a
665 pointer (e.g. in the memory), it has SImode; it may have
666 DImode if dest is dereferenced to access the memeory.
667 This is why we have to handle three different ldr_got_small
668 patterns here (two patterns for ILP32). */
669 rtx tmp_reg = dest;
670 enum machine_mode mode = GET_MODE (dest);
672 if (can_create_pseudo_p ())
673 tmp_reg = gen_reg_rtx (mode);
675 emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
676 if (mode == ptr_mode)
678 if (mode == DImode)
679 emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
680 else
681 emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
683 else
685 gcc_assert (mode == Pmode);
686 emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
689 return;
692 case SYMBOL_SMALL_TLSGD:
694 rtx insns;
695 rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
697 start_sequence ();
698 emit_call_insn (gen_tlsgd_small (result, imm));
699 insns = get_insns ();
700 end_sequence ();
702 RTL_CONST_CALL_P (insns) = 1;
703 emit_libcall_block (insns, dest, result, imm);
704 return;
707 case SYMBOL_SMALL_TLSDESC:
709 enum machine_mode mode = GET_MODE (dest);
710 rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
711 rtx tp;
713 gcc_assert (mode == Pmode || mode == ptr_mode);
715 /* In ILP32, the got entry is always of SImode size. Unlike
716 small GOT, the dest is fixed at reg 0. */
717 if (TARGET_ILP32)
718 emit_insn (gen_tlsdesc_small_si (imm));
719 else
720 emit_insn (gen_tlsdesc_small_di (imm));
721 tp = aarch64_load_tp (NULL);
723 if (mode != Pmode)
724 tp = gen_lowpart (mode, tp);
726 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
727 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
728 return;
731 case SYMBOL_SMALL_GOTTPREL:
733 /* In ILP32, the mode of dest can be either SImode or DImode,
734 while the got entry is always of SImode size. The mode of
735 dest depends on how dest is used: if dest is assigned to a
736 pointer (e.g. in the memory), it has SImode; it may have
737 DImode if dest is dereferenced to access the memeory.
738 This is why we have to handle three different tlsie_small
739 patterns here (two patterns for ILP32). */
740 enum machine_mode mode = GET_MODE (dest);
741 rtx tmp_reg = gen_reg_rtx (mode);
742 rtx tp = aarch64_load_tp (NULL);
744 if (mode == ptr_mode)
746 if (mode == DImode)
747 emit_insn (gen_tlsie_small_di (tmp_reg, imm));
748 else
750 emit_insn (gen_tlsie_small_si (tmp_reg, imm));
751 tp = gen_lowpart (mode, tp);
754 else
756 gcc_assert (mode == Pmode);
757 emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
760 emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
761 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
762 return;
765 case SYMBOL_SMALL_TPREL:
767 rtx tp = aarch64_load_tp (NULL);
768 emit_insn (gen_tlsle_small (dest, tp, imm));
769 set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
770 return;
773 case SYMBOL_TINY_GOT:
774 emit_insn (gen_ldr_got_tiny (dest, imm));
775 return;
777 default:
778 gcc_unreachable ();
782 /* Emit a move from SRC to DEST. Assume that the move expanders can
783 handle all moves if !can_create_pseudo_p (). The distinction is
784 important because, unlike emit_move_insn, the move expanders know
785 how to force Pmode objects into the constant pool even when the
786 constant pool address is not itself legitimate. */
787 static rtx
788 aarch64_emit_move (rtx dest, rtx src)
790 return (can_create_pseudo_p ()
791 ? emit_move_insn (dest, src)
792 : emit_move_insn_1 (dest, src));
795 /* Split a 128-bit move operation into two 64-bit move operations,
796 taking care to handle partial overlap of register to register
797 copies. Special cases are needed when moving between GP regs and
798 FP regs. SRC can be a register, constant or memory; DST a register
799 or memory. If either operand is memory it must not have any side
800 effects. */
801 void
802 aarch64_split_128bit_move (rtx dst, rtx src)
804 rtx dst_lo, dst_hi;
805 rtx src_lo, src_hi;
807 enum machine_mode mode = GET_MODE (dst);
809 gcc_assert (mode == TImode || mode == TFmode);
810 gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
811 gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
813 if (REG_P (dst) && REG_P (src))
815 int src_regno = REGNO (src);
816 int dst_regno = REGNO (dst);
818 /* Handle FP <-> GP regs. */
819 if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
821 src_lo = gen_lowpart (word_mode, src);
822 src_hi = gen_highpart (word_mode, src);
824 if (mode == TImode)
826 emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
827 emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
829 else
831 emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
832 emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
834 return;
836 else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
838 dst_lo = gen_lowpart (word_mode, dst);
839 dst_hi = gen_highpart (word_mode, dst);
841 if (mode == TImode)
843 emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
844 emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
846 else
848 emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
849 emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
851 return;
855 dst_lo = gen_lowpart (word_mode, dst);
856 dst_hi = gen_highpart (word_mode, dst);
857 src_lo = gen_lowpart (word_mode, src);
858 src_hi = gen_highpart_mode (word_mode, mode, src);
860 /* At most one pairing may overlap. */
861 if (reg_overlap_mentioned_p (dst_lo, src_hi))
863 aarch64_emit_move (dst_hi, src_hi);
864 aarch64_emit_move (dst_lo, src_lo);
866 else
868 aarch64_emit_move (dst_lo, src_lo);
869 aarch64_emit_move (dst_hi, src_hi);
873 bool
874 aarch64_split_128bit_move_p (rtx dst, rtx src)
876 return (! REG_P (src)
877 || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
880 /* Split a complex SIMD combine. */
882 void
883 aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
885 enum machine_mode src_mode = GET_MODE (src1);
886 enum machine_mode dst_mode = GET_MODE (dst);
888 gcc_assert (VECTOR_MODE_P (dst_mode));
890 if (REG_P (dst) && REG_P (src1) && REG_P (src2))
892 rtx (*gen) (rtx, rtx, rtx);
894 switch (src_mode)
896 case V8QImode:
897 gen = gen_aarch64_simd_combinev8qi;
898 break;
899 case V4HImode:
900 gen = gen_aarch64_simd_combinev4hi;
901 break;
902 case V2SImode:
903 gen = gen_aarch64_simd_combinev2si;
904 break;
905 case V2SFmode:
906 gen = gen_aarch64_simd_combinev2sf;
907 break;
908 case DImode:
909 gen = gen_aarch64_simd_combinedi;
910 break;
911 case DFmode:
912 gen = gen_aarch64_simd_combinedf;
913 break;
914 default:
915 gcc_unreachable ();
918 emit_insn (gen (dst, src1, src2));
919 return;
923 /* Split a complex SIMD move. */
925 void
926 aarch64_split_simd_move (rtx dst, rtx src)
928 enum machine_mode src_mode = GET_MODE (src);
929 enum machine_mode dst_mode = GET_MODE (dst);
931 gcc_assert (VECTOR_MODE_P (dst_mode));
933 if (REG_P (dst) && REG_P (src))
935 rtx (*gen) (rtx, rtx);
937 gcc_assert (VECTOR_MODE_P (src_mode));
939 switch (src_mode)
941 case V16QImode:
942 gen = gen_aarch64_split_simd_movv16qi;
943 break;
944 case V8HImode:
945 gen = gen_aarch64_split_simd_movv8hi;
946 break;
947 case V4SImode:
948 gen = gen_aarch64_split_simd_movv4si;
949 break;
950 case V2DImode:
951 gen = gen_aarch64_split_simd_movv2di;
952 break;
953 case V4SFmode:
954 gen = gen_aarch64_split_simd_movv4sf;
955 break;
956 case V2DFmode:
957 gen = gen_aarch64_split_simd_movv2df;
958 break;
959 default:
960 gcc_unreachable ();
963 emit_insn (gen (dst, src));
964 return;
968 static rtx
969 aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
971 if (can_create_pseudo_p ())
972 return force_reg (mode, value);
973 else
975 x = aarch64_emit_move (x, value);
976 return x;
981 static rtx
982 aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
984 if (!aarch64_plus_immediate (GEN_INT (offset), mode))
986 rtx high;
987 /* Load the full offset into a register. This
988 might be improvable in the future. */
989 high = GEN_INT (offset);
990 offset = 0;
991 high = aarch64_force_temporary (mode, temp, high);
992 reg = aarch64_force_temporary (mode, temp,
993 gen_rtx_PLUS (mode, high, reg));
995 return plus_constant (mode, reg, offset);
998 void
999 aarch64_expand_mov_immediate (rtx dest, rtx imm)
1001 enum machine_mode mode = GET_MODE (dest);
1002 unsigned HOST_WIDE_INT mask;
1003 int i;
1004 bool first;
1005 unsigned HOST_WIDE_INT val;
1006 bool subtargets;
1007 rtx subtarget;
1008 int one_match, zero_match;
1010 gcc_assert (mode == SImode || mode == DImode);
1012 /* Check on what type of symbol it is. */
1013 if (GET_CODE (imm) == SYMBOL_REF
1014 || GET_CODE (imm) == LABEL_REF
1015 || GET_CODE (imm) == CONST)
1017 rtx mem, base, offset;
1018 enum aarch64_symbol_type sty;
1020 /* If we have (const (plus symbol offset)), separate out the offset
1021 before we start classifying the symbol. */
1022 split_const (imm, &base, &offset);
1024 sty = aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR);
1025 switch (sty)
1027 case SYMBOL_FORCE_TO_MEM:
1028 if (offset != const0_rtx
1029 && targetm.cannot_force_const_mem (mode, imm))
1031 gcc_assert (can_create_pseudo_p ());
1032 base = aarch64_force_temporary (mode, dest, base);
1033 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1034 aarch64_emit_move (dest, base);
1035 return;
1037 mem = force_const_mem (ptr_mode, imm);
1038 gcc_assert (mem);
1039 if (mode != ptr_mode)
1040 mem = gen_rtx_ZERO_EXTEND (mode, mem);
1041 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1042 return;
1044 case SYMBOL_SMALL_TLSGD:
1045 case SYMBOL_SMALL_TLSDESC:
1046 case SYMBOL_SMALL_GOTTPREL:
1047 case SYMBOL_SMALL_GOT:
1048 case SYMBOL_TINY_GOT:
1049 if (offset != const0_rtx)
1051 gcc_assert(can_create_pseudo_p ());
1052 base = aarch64_force_temporary (mode, dest, base);
1053 base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
1054 aarch64_emit_move (dest, base);
1055 return;
1057 /* FALLTHRU */
1059 case SYMBOL_SMALL_TPREL:
1060 case SYMBOL_SMALL_ABSOLUTE:
1061 case SYMBOL_TINY_ABSOLUTE:
1062 aarch64_load_symref_appropriately (dest, imm, sty);
1063 return;
1065 default:
1066 gcc_unreachable ();
1070 if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
1072 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1073 return;
1076 if (!CONST_INT_P (imm))
1078 if (GET_CODE (imm) == HIGH)
1079 emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
1080 else
1082 rtx mem = force_const_mem (mode, imm);
1083 gcc_assert (mem);
1084 emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
1087 return;
1090 if (mode == SImode)
1092 /* We know we can't do this in 1 insn, and we must be able to do it
1093 in two; so don't mess around looking for sequences that don't buy
1094 us anything. */
1095 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (INTVAL (imm) & 0xffff)));
1096 emit_insn (gen_insv_immsi (dest, GEN_INT (16),
1097 GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
1098 return;
1101 /* Remaining cases are all for DImode. */
1103 val = INTVAL (imm);
1104 subtargets = optimize && can_create_pseudo_p ();
1106 one_match = 0;
1107 zero_match = 0;
1108 mask = 0xffff;
1110 for (i = 0; i < 64; i += 16, mask <<= 16)
1112 if ((val & mask) == 0)
1113 zero_match++;
1114 else if ((val & mask) == mask)
1115 one_match++;
1118 if (one_match == 2)
1120 mask = 0xffff;
1121 for (i = 0; i < 64; i += 16, mask <<= 16)
1123 if ((val & mask) != mask)
1125 emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
1126 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1127 GEN_INT ((val >> i) & 0xffff)));
1128 return;
1131 gcc_unreachable ();
1134 if (zero_match == 2)
1135 goto simple_sequence;
1137 mask = 0x0ffff0000UL;
1138 for (i = 16; i < 64; i += 16, mask <<= 16)
1140 HOST_WIDE_INT comp = mask & ~(mask - 1);
1142 if (aarch64_uimm12_shift (val - (val & mask)))
1144 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1146 emit_insn (gen_rtx_SET (VOIDmode, subtarget, GEN_INT (val & mask)));
1147 emit_insn (gen_adddi3 (dest, subtarget,
1148 GEN_INT (val - (val & mask))));
1149 return;
1151 else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
1153 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1155 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1156 GEN_INT ((val + comp) & mask)));
1157 emit_insn (gen_adddi3 (dest, subtarget,
1158 GEN_INT (val - ((val + comp) & mask))));
1159 return;
1161 else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
1163 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1165 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1166 GEN_INT ((val - comp) | ~mask)));
1167 emit_insn (gen_adddi3 (dest, subtarget,
1168 GEN_INT (val - ((val - comp) | ~mask))));
1169 return;
1171 else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
1173 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1175 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1176 GEN_INT (val | ~mask)));
1177 emit_insn (gen_adddi3 (dest, subtarget,
1178 GEN_INT (val - (val | ~mask))));
1179 return;
1183 /* See if we can do it by arithmetically combining two
1184 immediates. */
1185 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1187 int j;
1188 mask = 0xffff;
1190 if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
1191 || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
1193 subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
1194 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1195 GEN_INT (aarch64_bitmasks[i])));
1196 emit_insn (gen_adddi3 (dest, subtarget,
1197 GEN_INT (val - aarch64_bitmasks[i])));
1198 return;
1201 for (j = 0; j < 64; j += 16, mask <<= 16)
1203 if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
1205 emit_insn (gen_rtx_SET (VOIDmode, dest,
1206 GEN_INT (aarch64_bitmasks[i])));
1207 emit_insn (gen_insv_immdi (dest, GEN_INT (j),
1208 GEN_INT ((val >> j) & 0xffff)));
1209 return;
1214 /* See if we can do it by logically combining two immediates. */
1215 for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
1217 if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
1219 int j;
1221 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1222 if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
1224 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1225 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1226 GEN_INT (aarch64_bitmasks[i])));
1227 emit_insn (gen_iordi3 (dest, subtarget,
1228 GEN_INT (aarch64_bitmasks[j])));
1229 return;
1232 else if ((val & aarch64_bitmasks[i]) == val)
1234 int j;
1236 for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
1237 if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
1240 subtarget = subtargets ? gen_reg_rtx (mode) : dest;
1241 emit_insn (gen_rtx_SET (VOIDmode, subtarget,
1242 GEN_INT (aarch64_bitmasks[j])));
1243 emit_insn (gen_anddi3 (dest, subtarget,
1244 GEN_INT (aarch64_bitmasks[i])));
1245 return;
1250 simple_sequence:
1251 first = true;
1252 mask = 0xffff;
1253 for (i = 0; i < 64; i += 16, mask <<= 16)
1255 if ((val & mask) != 0)
1257 if (first)
1259 emit_insn (gen_rtx_SET (VOIDmode, dest,
1260 GEN_INT (val & mask)));
1261 first = false;
1263 else
1264 emit_insn (gen_insv_immdi (dest, GEN_INT (i),
1265 GEN_INT ((val >> i) & 0xffff)));
1270 static bool
1271 aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
1272 tree exp ATTRIBUTE_UNUSED)
1274 /* Currently, always true. */
1275 return true;
1278 /* Implement TARGET_PASS_BY_REFERENCE. */
1280 static bool
1281 aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
1282 enum machine_mode mode,
1283 const_tree type,
1284 bool named ATTRIBUTE_UNUSED)
1286 HOST_WIDE_INT size;
1287 enum machine_mode dummymode;
1288 int nregs;
1290 /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
1291 size = (mode == BLKmode && type)
1292 ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1294 /* Aggregates are passed by reference based on their size. */
1295 if (type && AGGREGATE_TYPE_P (type))
1297 size = int_size_in_bytes (type);
1300 /* Variable sized arguments are always returned by reference. */
1301 if (size < 0)
1302 return true;
1304 /* Can this be a candidate to be passed in fp/simd register(s)? */
1305 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1306 &dummymode, &nregs,
1307 NULL))
1308 return false;
1310 /* Arguments which are variable sized or larger than 2 registers are
1311 passed by reference unless they are a homogenous floating point
1312 aggregate. */
1313 return size > 2 * UNITS_PER_WORD;
1316 /* Return TRUE if VALTYPE is padded to its least significant bits. */
1317 static bool
1318 aarch64_return_in_msb (const_tree valtype)
1320 enum machine_mode dummy_mode;
1321 int dummy_int;
1323 /* Never happens in little-endian mode. */
1324 if (!BYTES_BIG_ENDIAN)
1325 return false;
1327 /* Only composite types smaller than or equal to 16 bytes can
1328 be potentially returned in registers. */
1329 if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
1330 || int_size_in_bytes (valtype) <= 0
1331 || int_size_in_bytes (valtype) > 16)
1332 return false;
1334 /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
1335 or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
1336 is always passed/returned in the least significant bits of fp/simd
1337 register(s). */
1338 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
1339 &dummy_mode, &dummy_int, NULL))
1340 return false;
1342 return true;
1345 /* Implement TARGET_FUNCTION_VALUE.
1346 Define how to find the value returned by a function. */
1348 static rtx
1349 aarch64_function_value (const_tree type, const_tree func,
1350 bool outgoing ATTRIBUTE_UNUSED)
1352 enum machine_mode mode;
1353 int unsignedp;
1354 int count;
1355 enum machine_mode ag_mode;
1357 mode = TYPE_MODE (type);
1358 if (INTEGRAL_TYPE_P (type))
1359 mode = promote_function_mode (type, mode, &unsignedp, func, 1);
1361 if (aarch64_return_in_msb (type))
1363 HOST_WIDE_INT size = int_size_in_bytes (type);
1365 if (size % UNITS_PER_WORD != 0)
1367 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
1368 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
1372 if (aarch64_vfp_is_call_or_return_candidate (mode, type,
1373 &ag_mode, &count, NULL))
1375 if (!aarch64_composite_type_p (type, mode))
1377 gcc_assert (count == 1 && mode == ag_mode);
1378 return gen_rtx_REG (mode, V0_REGNUM);
1380 else
1382 int i;
1383 rtx par;
1385 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
1386 for (i = 0; i < count; i++)
1388 rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
1389 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1390 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
1391 XVECEXP (par, 0, i) = tmp;
1393 return par;
1396 else
1397 return gen_rtx_REG (mode, R0_REGNUM);
1400 /* Implements TARGET_FUNCTION_VALUE_REGNO_P.
1401 Return true if REGNO is the number of a hard register in which the values
1402 of called function may come back. */
1404 static bool
1405 aarch64_function_value_regno_p (const unsigned int regno)
1407 /* Maximum of 16 bytes can be returned in the general registers. Examples
1408 of 16-byte return values are: 128-bit integers and 16-byte small
1409 structures (excluding homogeneous floating-point aggregates). */
1410 if (regno == R0_REGNUM || regno == R1_REGNUM)
1411 return true;
1413 /* Up to four fp/simd registers can return a function value, e.g. a
1414 homogeneous floating-point aggregate having four members. */
1415 if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
1416 return !TARGET_GENERAL_REGS_ONLY;
1418 return false;
1421 /* Implement TARGET_RETURN_IN_MEMORY.
1423 If the type T of the result of a function is such that
1424 void func (T arg)
1425 would require that arg be passed as a value in a register (or set of
1426 registers) according to the parameter passing rules, then the result
1427 is returned in the same registers as would be used for such an
1428 argument. */
1430 static bool
1431 aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
1433 HOST_WIDE_INT size;
1434 enum machine_mode ag_mode;
1435 int count;
1437 if (!AGGREGATE_TYPE_P (type)
1438 && TREE_CODE (type) != COMPLEX_TYPE
1439 && TREE_CODE (type) != VECTOR_TYPE)
1440 /* Simple scalar types always returned in registers. */
1441 return false;
1443 if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
1444 type,
1445 &ag_mode,
1446 &count,
1447 NULL))
1448 return false;
1450 /* Types larger than 2 registers returned in memory. */
1451 size = int_size_in_bytes (type);
1452 return (size < 0 || size > 2 * UNITS_PER_WORD);
1455 static bool
1456 aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, enum machine_mode mode,
1457 const_tree type, int *nregs)
1459 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1460 return aarch64_vfp_is_call_or_return_candidate (mode,
1461 type,
1462 &pcum->aapcs_vfp_rmode,
1463 nregs,
1464 NULL);
1467 /* Given MODE and TYPE of a function argument, return the alignment in
1468 bits. The idea is to suppress any stronger alignment requested by
1469 the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
1470 This is a helper function for local use only. */
1472 static unsigned int
1473 aarch64_function_arg_alignment (enum machine_mode mode, const_tree type)
1475 unsigned int alignment;
1477 if (type)
1479 if (!integer_zerop (TYPE_SIZE (type)))
1481 if (TYPE_MODE (type) == mode)
1482 alignment = TYPE_ALIGN (type);
1483 else
1484 alignment = GET_MODE_ALIGNMENT (mode);
1486 else
1487 alignment = 0;
1489 else
1490 alignment = GET_MODE_ALIGNMENT (mode);
1492 return alignment;
1495 /* Layout a function argument according to the AAPCS64 rules. The rule
1496 numbers refer to the rule numbers in the AAPCS64. */
1498 static void
1499 aarch64_layout_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1500 const_tree type,
1501 bool named ATTRIBUTE_UNUSED)
1503 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1504 int ncrn, nvrn, nregs;
1505 bool allocate_ncrn, allocate_nvrn;
1506 HOST_WIDE_INT size;
1508 /* We need to do this once per argument. */
1509 if (pcum->aapcs_arg_processed)
1510 return;
1512 pcum->aapcs_arg_processed = true;
1514 /* Size in bytes, rounded to the nearest multiple of 8 bytes. */
1515 size
1516 = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
1517 UNITS_PER_WORD);
1519 allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
1520 allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
1521 mode,
1522 type,
1523 &nregs);
1525 /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
1526 The following code thus handles passing by SIMD/FP registers first. */
1528 nvrn = pcum->aapcs_nvrn;
1530 /* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
1531 and homogenous short-vector aggregates (HVA). */
1532 if (allocate_nvrn)
1534 if (nvrn + nregs <= NUM_FP_ARG_REGS)
1536 pcum->aapcs_nextnvrn = nvrn + nregs;
1537 if (!aarch64_composite_type_p (type, mode))
1539 gcc_assert (nregs == 1);
1540 pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
1542 else
1544 rtx par;
1545 int i;
1546 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1547 for (i = 0; i < nregs; i++)
1549 rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
1550 V0_REGNUM + nvrn + i);
1551 tmp = gen_rtx_EXPR_LIST
1552 (VOIDmode, tmp,
1553 GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
1554 XVECEXP (par, 0, i) = tmp;
1556 pcum->aapcs_reg = par;
1558 return;
1560 else
1562 /* C.3 NSRN is set to 8. */
1563 pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
1564 goto on_stack;
1568 ncrn = pcum->aapcs_ncrn;
1569 nregs = size / UNITS_PER_WORD;
1571 /* C6 - C9. though the sign and zero extension semantics are
1572 handled elsewhere. This is the case where the argument fits
1573 entirely general registers. */
1574 if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
1576 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1578 gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
1580 /* C.8 if the argument has an alignment of 16 then the NGRN is
1581 rounded up to the next even number. */
1582 if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
1584 ++ncrn;
1585 gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
1587 /* NREGS can be 0 when e.g. an empty structure is to be passed.
1588 A reg is still generated for it, but the caller should be smart
1589 enough not to use it. */
1590 if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
1592 pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
1594 else
1596 rtx par;
1597 int i;
1599 par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
1600 for (i = 0; i < nregs; i++)
1602 rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
1603 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
1604 GEN_INT (i * UNITS_PER_WORD));
1605 XVECEXP (par, 0, i) = tmp;
1607 pcum->aapcs_reg = par;
1610 pcum->aapcs_nextncrn = ncrn + nregs;
1611 return;
1614 /* C.11 */
1615 pcum->aapcs_nextncrn = NUM_ARG_REGS;
1617 /* The argument is passed on stack; record the needed number of words for
1618 this argument and align the total size if necessary. */
1619 on_stack:
1620 pcum->aapcs_stack_words = size / UNITS_PER_WORD;
1621 if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
1622 pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
1623 16 / UNITS_PER_WORD);
1624 return;
1627 /* Implement TARGET_FUNCTION_ARG. */
1629 static rtx
1630 aarch64_function_arg (cumulative_args_t pcum_v, enum machine_mode mode,
1631 const_tree type, bool named)
1633 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1634 gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
1636 if (mode == VOIDmode)
1637 return NULL_RTX;
1639 aarch64_layout_arg (pcum_v, mode, type, named);
1640 return pcum->aapcs_reg;
1643 void
1644 aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
1645 const_tree fntype ATTRIBUTE_UNUSED,
1646 rtx libname ATTRIBUTE_UNUSED,
1647 const_tree fndecl ATTRIBUTE_UNUSED,
1648 unsigned n_named ATTRIBUTE_UNUSED)
1650 pcum->aapcs_ncrn = 0;
1651 pcum->aapcs_nvrn = 0;
1652 pcum->aapcs_nextncrn = 0;
1653 pcum->aapcs_nextnvrn = 0;
1654 pcum->pcs_variant = ARM_PCS_AAPCS64;
1655 pcum->aapcs_reg = NULL_RTX;
1656 pcum->aapcs_arg_processed = false;
1657 pcum->aapcs_stack_words = 0;
1658 pcum->aapcs_stack_size = 0;
1660 return;
1663 static void
1664 aarch64_function_arg_advance (cumulative_args_t pcum_v,
1665 enum machine_mode mode,
1666 const_tree type,
1667 bool named)
1669 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
1670 if (pcum->pcs_variant == ARM_PCS_AAPCS64)
1672 aarch64_layout_arg (pcum_v, mode, type, named);
1673 gcc_assert ((pcum->aapcs_reg != NULL_RTX)
1674 != (pcum->aapcs_stack_words != 0));
1675 pcum->aapcs_arg_processed = false;
1676 pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
1677 pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
1678 pcum->aapcs_stack_size += pcum->aapcs_stack_words;
1679 pcum->aapcs_stack_words = 0;
1680 pcum->aapcs_reg = NULL_RTX;
1684 bool
1685 aarch64_function_arg_regno_p (unsigned regno)
1687 return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
1688 || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
1691 /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
1692 PARM_BOUNDARY bits of alignment, but will be given anything up
1693 to STACK_BOUNDARY bits if the type requires it. This makes sure
1694 that both before and after the layout of each argument, the Next
1695 Stacked Argument Address (NSAA) will have a minimum alignment of
1696 8 bytes. */
1698 static unsigned int
1699 aarch64_function_arg_boundary (enum machine_mode mode, const_tree type)
1701 unsigned int alignment = aarch64_function_arg_alignment (mode, type);
1703 if (alignment < PARM_BOUNDARY)
1704 alignment = PARM_BOUNDARY;
1705 if (alignment > STACK_BOUNDARY)
1706 alignment = STACK_BOUNDARY;
1707 return alignment;
1710 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
1712 Return true if an argument passed on the stack should be padded upwards,
1713 i.e. if the least-significant byte of the stack slot has useful data.
1715 Small aggregate types are placed in the lowest memory address.
1717 The related parameter passing rules are B.4, C.3, C.5 and C.14. */
1719 bool
1720 aarch64_pad_arg_upward (enum machine_mode mode, const_tree type)
1722 /* On little-endian targets, the least significant byte of every stack
1723 argument is passed at the lowest byte address of the stack slot. */
1724 if (!BYTES_BIG_ENDIAN)
1725 return true;
1727 /* Otherwise, integral, floating-point and pointer types are padded downward:
1728 the least significant byte of a stack argument is passed at the highest
1729 byte address of the stack slot. */
1730 if (type
1731 ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
1732 || POINTER_TYPE_P (type))
1733 : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
1734 return false;
1736 /* Everything else padded upward, i.e. data in first byte of stack slot. */
1737 return true;
1740 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
1742 It specifies padding for the last (may also be the only)
1743 element of a block move between registers and memory. If
1744 assuming the block is in the memory, padding upward means that
1745 the last element is padded after its highest significant byte,
1746 while in downward padding, the last element is padded at the
1747 its least significant byte side.
1749 Small aggregates and small complex types are always padded
1750 upwards.
1752 We don't need to worry about homogeneous floating-point or
1753 short-vector aggregates; their move is not affected by the
1754 padding direction determined here. Regardless of endianness,
1755 each element of such an aggregate is put in the least
1756 significant bits of a fp/simd register.
1758 Return !BYTES_BIG_ENDIAN if the least significant byte of the
1759 register has useful data, and return the opposite if the most
1760 significant byte does. */
1762 bool
1763 aarch64_pad_reg_upward (enum machine_mode mode, const_tree type,
1764 bool first ATTRIBUTE_UNUSED)
1767 /* Small composite types are always padded upward. */
1768 if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
1770 HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
1771 : GET_MODE_SIZE (mode));
1772 if (size < 2 * UNITS_PER_WORD)
1773 return true;
1776 /* Otherwise, use the default padding. */
1777 return !BYTES_BIG_ENDIAN;
1780 static enum machine_mode
1781 aarch64_libgcc_cmp_return_mode (void)
1783 return SImode;
1786 static bool
1787 aarch64_frame_pointer_required (void)
1789 /* If the function contains dynamic stack allocations, we need to
1790 use the frame pointer to access the static parts of the frame. */
1791 if (cfun->calls_alloca)
1792 return true;
1794 /* In aarch64_override_options_after_change
1795 flag_omit_leaf_frame_pointer turns off the frame pointer by
1796 default. Turn it back on now if we've not got a leaf
1797 function. */
1798 if (flag_omit_leaf_frame_pointer
1799 && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
1800 return true;
1802 return false;
1805 /* Mark the registers that need to be saved by the callee and calculate
1806 the size of the callee-saved registers area and frame record (both FP
1807 and LR may be omitted). */
1808 static void
1809 aarch64_layout_frame (void)
1811 HOST_WIDE_INT offset = 0;
1812 int regno;
1814 if (reload_completed && cfun->machine->frame.laid_out)
1815 return;
1817 #define SLOT_NOT_REQUIRED (-2)
1818 #define SLOT_REQUIRED (-1)
1820 cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
1821 cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
1823 /* First mark all the registers that really need to be saved... */
1824 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1825 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1827 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1828 cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
1830 /* ... that includes the eh data registers (if needed)... */
1831 if (crtl->calls_eh_return)
1832 for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
1833 cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
1834 = SLOT_REQUIRED;
1836 /* ... and any callee saved register that dataflow says is live. */
1837 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1838 if (df_regs_ever_live_p (regno)
1839 && !call_used_regs[regno])
1840 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1842 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1843 if (df_regs_ever_live_p (regno)
1844 && !call_used_regs[regno])
1845 cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
1847 if (frame_pointer_needed)
1849 /* FP and LR are placed in the linkage record. */
1850 cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
1851 cfun->machine->frame.wb_candidate1 = R29_REGNUM;
1852 cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
1853 cfun->machine->frame.wb_candidate2 = R30_REGNUM;
1854 cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
1855 offset += 2 * UNITS_PER_WORD;
1858 /* Now assign stack slots for them. */
1859 for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
1860 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1862 cfun->machine->frame.reg_offset[regno] = offset;
1863 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1864 cfun->machine->frame.wb_candidate1 = regno;
1865 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
1866 cfun->machine->frame.wb_candidate2 = regno;
1867 offset += UNITS_PER_WORD;
1870 for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
1871 if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
1873 cfun->machine->frame.reg_offset[regno] = offset;
1874 if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
1875 cfun->machine->frame.wb_candidate1 = regno;
1876 else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
1877 && cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
1878 cfun->machine->frame.wb_candidate2 = regno;
1879 offset += UNITS_PER_WORD;
1882 cfun->machine->frame.padding0 =
1883 (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
1884 offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
1886 cfun->machine->frame.saved_regs_size = offset;
1888 cfun->machine->frame.hard_fp_offset
1889 = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
1890 + get_frame_size ()
1891 + cfun->machine->frame.saved_regs_size,
1892 STACK_BOUNDARY / BITS_PER_UNIT);
1894 cfun->machine->frame.frame_size
1895 = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
1896 + crtl->outgoing_args_size,
1897 STACK_BOUNDARY / BITS_PER_UNIT);
1899 cfun->machine->frame.laid_out = true;
1902 /* Make the last instruction frame-related and note that it performs
1903 the operation described by FRAME_PATTERN. */
1905 static void
1906 aarch64_set_frame_expr (rtx frame_pattern)
1908 rtx insn;
1910 insn = get_last_insn ();
1911 RTX_FRAME_RELATED_P (insn) = 1;
1912 RTX_FRAME_RELATED_P (frame_pattern) = 1;
1913 REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1914 frame_pattern,
1915 REG_NOTES (insn));
1918 static bool
1919 aarch64_register_saved_on_entry (int regno)
1921 return cfun->machine->frame.reg_offset[regno] >= 0;
1924 static unsigned
1925 aarch64_next_callee_save (unsigned regno, unsigned limit)
1927 while (regno <= limit && !aarch64_register_saved_on_entry (regno))
1928 regno ++;
1929 return regno;
1932 static void
1933 aarch64_pushwb_single_reg (enum machine_mode mode, unsigned regno,
1934 HOST_WIDE_INT adjustment)
1936 rtx base_rtx = stack_pointer_rtx;
1937 rtx insn, reg, mem;
1939 reg = gen_rtx_REG (mode, regno);
1940 mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
1941 plus_constant (Pmode, base_rtx, -adjustment));
1942 mem = gen_rtx_MEM (mode, mem);
1944 insn = emit_move_insn (mem, reg);
1945 RTX_FRAME_RELATED_P (insn) = 1;
1948 static void
1949 aarch64_popwb_single_reg (enum machine_mode mode, unsigned regno,
1950 HOST_WIDE_INT adjustment)
1952 rtx base_rtx = stack_pointer_rtx;
1953 rtx insn, reg, mem;
1955 reg = gen_rtx_REG (mode, regno);
1956 mem = gen_rtx_POST_MODIFY (Pmode, base_rtx,
1957 plus_constant (Pmode, base_rtx, adjustment));
1958 mem = gen_rtx_MEM (mode, mem);
1960 insn = emit_move_insn (reg, mem);
1961 add_reg_note (insn, REG_CFA_RESTORE, reg);
1962 RTX_FRAME_RELATED_P (insn) = 1;
1965 static rtx
1966 aarch64_gen_storewb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
1967 HOST_WIDE_INT adjustment)
1969 switch (mode)
1971 case DImode:
1972 return gen_storewb_pairdi_di (base, base, reg, reg2,
1973 GEN_INT (-adjustment),
1974 GEN_INT (UNITS_PER_WORD - adjustment));
1975 case DFmode:
1976 return gen_storewb_pairdf_di (base, base, reg, reg2,
1977 GEN_INT (-adjustment),
1978 GEN_INT (UNITS_PER_WORD - adjustment));
1979 default:
1980 gcc_unreachable ();
1984 static void
1985 aarch64_pushwb_pair_reg (enum machine_mode mode, unsigned regno1,
1986 unsigned regno2, HOST_WIDE_INT adjustment)
1988 rtx insn;
1989 rtx reg1 = gen_rtx_REG (mode, regno1);
1990 rtx reg2 = gen_rtx_REG (mode, regno2);
1992 insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
1993 reg2, adjustment));
1994 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
1996 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
1997 RTX_FRAME_RELATED_P (insn) = 1;
2000 static rtx
2001 aarch64_gen_loadwb_pair (enum machine_mode mode, rtx base, rtx reg, rtx reg2,
2002 HOST_WIDE_INT adjustment)
2004 switch (mode)
2006 case DImode:
2007 return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
2008 GEN_INT (adjustment + UNITS_PER_WORD));
2009 case DFmode:
2010 return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
2011 GEN_INT (adjustment + UNITS_PER_WORD));
2012 default:
2013 gcc_unreachable ();
2017 static void
2018 aarch64_popwb_pair_reg (enum machine_mode mode, unsigned regno1,
2019 unsigned regno2, HOST_WIDE_INT adjustment, rtx cfa)
2021 rtx insn;
2022 rtx reg1 = gen_rtx_REG (mode, regno1);
2023 rtx reg2 = gen_rtx_REG (mode, regno2);
2025 insn = emit_insn (aarch64_gen_loadwb_pair (mode, stack_pointer_rtx, reg1,
2026 reg2, adjustment));
2027 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
2028 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2029 RTX_FRAME_RELATED_P (insn) = 1;
2031 if (cfa)
2032 add_reg_note (insn, REG_CFA_ADJUST_CFA,
2033 (gen_rtx_SET (Pmode, stack_pointer_rtx,
2034 plus_constant (Pmode, cfa, adjustment))));
2036 add_reg_note (insn, REG_CFA_RESTORE, reg1);
2037 add_reg_note (insn, REG_CFA_RESTORE, reg2);
2040 static rtx
2041 aarch64_gen_store_pair (enum machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
2042 rtx reg2)
2044 switch (mode)
2046 case DImode:
2047 return gen_store_pairdi (mem1, reg1, mem2, reg2);
2049 case DFmode:
2050 return gen_store_pairdf (mem1, reg1, mem2, reg2);
2052 default:
2053 gcc_unreachable ();
2057 static rtx
2058 aarch64_gen_load_pair (enum machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
2059 rtx mem2)
2061 switch (mode)
2063 case DImode:
2064 return gen_load_pairdi (reg1, mem1, reg2, mem2);
2066 case DFmode:
2067 return gen_load_pairdf (reg1, mem1, reg2, mem2);
2069 default:
2070 gcc_unreachable ();
2075 static void
2076 aarch64_save_callee_saves (enum machine_mode mode, HOST_WIDE_INT start_offset,
2077 unsigned start, unsigned limit, bool skip_wb)
2079 rtx insn;
2080 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2081 ? gen_frame_mem : gen_rtx_MEM);
2082 unsigned regno;
2083 unsigned regno2;
2085 for (regno = aarch64_next_callee_save (start, limit);
2086 regno <= limit;
2087 regno = aarch64_next_callee_save (regno + 1, limit))
2089 rtx reg, mem;
2090 HOST_WIDE_INT offset;
2092 if (skip_wb
2093 && (regno == cfun->machine->frame.wb_candidate1
2094 || regno == cfun->machine->frame.wb_candidate2))
2095 continue;
2097 reg = gen_rtx_REG (mode, regno);
2098 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2099 mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2100 offset));
2102 regno2 = aarch64_next_callee_save (regno + 1, limit);
2104 if (regno2 <= limit
2105 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2106 == cfun->machine->frame.reg_offset[regno2]))
2109 rtx reg2 = gen_rtx_REG (mode, regno2);
2110 rtx mem2;
2112 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2113 mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
2114 offset));
2115 insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
2116 reg2));
2118 /* The first part of a frame-related parallel insn is
2119 always assumed to be relevant to the frame
2120 calculations; subsequent parts, are only
2121 frame-related if explicitly marked. */
2122 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2123 regno = regno2;
2125 else
2126 insn = emit_move_insn (mem, reg);
2128 RTX_FRAME_RELATED_P (insn) = 1;
2132 static void
2133 aarch64_restore_callee_saves (enum machine_mode mode,
2134 HOST_WIDE_INT start_offset, unsigned start,
2135 unsigned limit, bool skip_wb)
2137 rtx insn;
2138 rtx base_rtx = stack_pointer_rtx;
2139 rtx (*gen_mem_ref) (enum machine_mode, rtx) = (frame_pointer_needed
2140 ? gen_frame_mem : gen_rtx_MEM);
2141 unsigned regno;
2142 unsigned regno2;
2143 HOST_WIDE_INT offset;
2145 for (regno = aarch64_next_callee_save (start, limit);
2146 regno <= limit;
2147 regno = aarch64_next_callee_save (regno + 1, limit))
2149 rtx reg, mem;
2151 if (skip_wb
2152 && (regno == cfun->machine->frame.wb_candidate1
2153 || regno == cfun->machine->frame.wb_candidate2))
2154 continue;
2156 reg = gen_rtx_REG (mode, regno);
2157 offset = start_offset + cfun->machine->frame.reg_offset[regno];
2158 mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2160 regno2 = aarch64_next_callee_save (regno + 1, limit);
2162 if (regno2 <= limit
2163 && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
2164 == cfun->machine->frame.reg_offset[regno2]))
2166 rtx reg2 = gen_rtx_REG (mode, regno2);
2167 rtx mem2;
2169 offset = start_offset + cfun->machine->frame.reg_offset[regno2];
2170 mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
2171 insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2,
2172 mem2));
2173 add_reg_note (insn, REG_CFA_RESTORE, reg);
2174 add_reg_note (insn, REG_CFA_RESTORE, reg2);
2176 /* The first part of a frame-related parallel insn is
2177 always assumed to be relevant to the frame
2178 calculations; subsequent parts, are only
2179 frame-related if explicitly marked. */
2180 RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
2181 regno = regno2;
2183 else
2185 insn = emit_move_insn (reg, mem);
2186 add_reg_note (insn, REG_CFA_RESTORE, reg);
2189 RTX_FRAME_RELATED_P (insn) = 1;
2193 /* AArch64 stack frames generated by this compiler look like:
2195 +-------------------------------+
2197 | incoming stack arguments |
2199 +-------------------------------+
2200 | | <-- incoming stack pointer (aligned)
2201 | callee-allocated save area |
2202 | for register varargs |
2204 +-------------------------------+
2205 | local variables | <-- frame_pointer_rtx
2207 +-------------------------------+
2208 | padding0 | \
2209 +-------------------------------+ |
2210 | callee-saved registers | | frame.saved_regs_size
2211 +-------------------------------+ |
2212 | LR' | |
2213 +-------------------------------+ |
2214 | FP' | / <- hard_frame_pointer_rtx (aligned)
2215 +-------------------------------+
2216 | dynamic allocation |
2217 +-------------------------------+
2218 | padding |
2219 +-------------------------------+
2220 | outgoing stack arguments | <-- arg_pointer
2222 +-------------------------------+
2223 | | <-- stack_pointer_rtx (aligned)
2225 Dynamic stack allocations via alloca() decrease stack_pointer_rtx
2226 but leave frame_pointer_rtx and hard_frame_pointer_rtx
2227 unchanged. */
2229 /* Generate the prologue instructions for entry into a function.
2230 Establish the stack frame by decreasing the stack pointer with a
2231 properly calculated size and, if necessary, create a frame record
2232 filled with the values of LR and previous frame pointer. The
2233 current FP is also set up if it is in use. */
2235 void
2236 aarch64_expand_prologue (void)
2238 /* sub sp, sp, #<frame_size>
2239 stp {fp, lr}, [sp, #<frame_size> - 16]
2240 add fp, sp, #<frame_size> - hardfp_offset
2241 stp {cs_reg}, [fp, #-16] etc.
2243 sub sp, sp, <final_adjustment_if_any>
2245 HOST_WIDE_INT frame_size, offset;
2246 HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
2247 rtx insn;
2249 aarch64_layout_frame ();
2251 if (flag_stack_usage_info)
2252 current_function_static_stack_size = cfun->machine->frame.frame_size;
2254 frame_size = cfun->machine->frame.frame_size;
2255 offset = cfun->machine->frame.frame_size;
2257 fp_offset = cfun->machine->frame.frame_size
2258 - cfun->machine->frame.hard_fp_offset;
2260 /* Store pairs and load pairs have a range only -512 to 504. */
2261 if (offset >= 512)
2263 /* When the frame has a large size, an initial decrease is done on
2264 the stack pointer to jump over the callee-allocated save area for
2265 register varargs, the local variable area and/or the callee-saved
2266 register area. This will allow the pre-index write-back
2267 store pair instructions to be used for setting up the stack frame
2268 efficiently. */
2269 offset = cfun->machine->frame.hard_fp_offset;
2270 if (offset >= 512)
2271 offset = cfun->machine->frame.saved_regs_size;
2273 frame_size -= (offset + crtl->outgoing_args_size);
2274 fp_offset = 0;
2276 if (frame_size >= 0x1000000)
2278 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2279 emit_move_insn (op0, GEN_INT (-frame_size));
2280 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2281 aarch64_set_frame_expr (gen_rtx_SET
2282 (Pmode, stack_pointer_rtx,
2283 plus_constant (Pmode,
2284 stack_pointer_rtx,
2285 -frame_size)));
2287 else if (frame_size > 0)
2289 if ((frame_size & 0xfff) != frame_size)
2291 insn = emit_insn (gen_add2_insn
2292 (stack_pointer_rtx,
2293 GEN_INT (-(frame_size
2294 & ~(HOST_WIDE_INT)0xfff))));
2295 RTX_FRAME_RELATED_P (insn) = 1;
2297 if ((frame_size & 0xfff) != 0)
2299 insn = emit_insn (gen_add2_insn
2300 (stack_pointer_rtx,
2301 GEN_INT (-(frame_size
2302 & (HOST_WIDE_INT)0xfff))));
2303 RTX_FRAME_RELATED_P (insn) = 1;
2307 else
2308 frame_size = -1;
2310 if (offset > 0)
2312 bool skip_wb = false;
2314 if (frame_pointer_needed)
2316 skip_wb = true;
2318 if (fp_offset)
2320 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2321 GEN_INT (-offset)));
2322 RTX_FRAME_RELATED_P (insn) = 1;
2323 aarch64_set_frame_expr (gen_rtx_SET
2324 (Pmode, stack_pointer_rtx,
2325 gen_rtx_MINUS (Pmode, stack_pointer_rtx,
2326 GEN_INT (offset))));
2328 aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
2329 R30_REGNUM, false);
2331 else
2332 aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
2334 /* Set up frame pointer to point to the location of the
2335 previous frame pointer on the stack. */
2336 insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
2337 stack_pointer_rtx,
2338 GEN_INT (fp_offset)));
2339 aarch64_set_frame_expr (gen_rtx_SET
2340 (Pmode, hard_frame_pointer_rtx,
2341 plus_constant (Pmode,
2342 stack_pointer_rtx,
2343 fp_offset)));
2344 RTX_FRAME_RELATED_P (insn) = 1;
2345 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
2346 hard_frame_pointer_rtx));
2348 else
2350 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2351 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2353 if (fp_offset
2354 || reg1 == FIRST_PSEUDO_REGISTER
2355 || (reg2 == FIRST_PSEUDO_REGISTER
2356 && offset >= 256))
2358 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2359 GEN_INT (-offset)));
2360 RTX_FRAME_RELATED_P (insn) = 1;
2362 else
2364 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2366 skip_wb = true;
2368 if (reg2 == FIRST_PSEUDO_REGISTER)
2369 aarch64_pushwb_single_reg (mode1, reg1, offset);
2370 else
2371 aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
2375 aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2376 skip_wb);
2377 aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2378 skip_wb);
2381 /* when offset >= 512,
2382 sub sp, sp, #<outgoing_args_size> */
2383 if (frame_size > -1)
2385 if (crtl->outgoing_args_size > 0)
2387 insn = emit_insn (gen_add2_insn
2388 (stack_pointer_rtx,
2389 GEN_INT (- crtl->outgoing_args_size)));
2390 RTX_FRAME_RELATED_P (insn) = 1;
2395 /* Generate the epilogue instructions for returning from a function. */
2396 void
2397 aarch64_expand_epilogue (bool for_sibcall)
2399 HOST_WIDE_INT frame_size, offset;
2400 HOST_WIDE_INT fp_offset;
2401 rtx insn;
2402 rtx cfa_reg;
2404 aarch64_layout_frame ();
2406 offset = frame_size = cfun->machine->frame.frame_size;
2407 fp_offset = cfun->machine->frame.frame_size
2408 - cfun->machine->frame.hard_fp_offset;
2410 cfa_reg = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx;
2412 /* Store pairs and load pairs have a range only -512 to 504. */
2413 if (offset >= 512)
2415 offset = cfun->machine->frame.hard_fp_offset;
2416 if (offset >= 512)
2417 offset = cfun->machine->frame.saved_regs_size;
2419 frame_size -= (offset + crtl->outgoing_args_size);
2420 fp_offset = 0;
2421 if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
2423 insn = emit_insn (gen_add2_insn
2424 (stack_pointer_rtx,
2425 GEN_INT (crtl->outgoing_args_size)));
2426 RTX_FRAME_RELATED_P (insn) = 1;
2429 else
2430 frame_size = -1;
2432 /* If there were outgoing arguments or we've done dynamic stack
2433 allocation, then restore the stack pointer from the frame
2434 pointer. This is at most one insn and more efficient than using
2435 GCC's internal mechanism. */
2436 if (frame_pointer_needed
2437 && (crtl->outgoing_args_size || cfun->calls_alloca))
2439 insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
2440 hard_frame_pointer_rtx,
2441 GEN_INT (0)));
2442 offset = offset - fp_offset;
2443 RTX_FRAME_RELATED_P (insn) = 1;
2444 /* As SP is set to (FP - fp_offset), according to the rules in
2445 dwarf2cfi.c:dwarf2out_frame_debug_expr, CFA should be calculated
2446 from the value of SP from now on. */
2447 cfa_reg = stack_pointer_rtx;
2450 if (offset > 0)
2452 unsigned reg1 = cfun->machine->frame.wb_candidate1;
2453 unsigned reg2 = cfun->machine->frame.wb_candidate2;
2454 bool skip_wb = true;
2456 if (frame_pointer_needed)
2457 fp_offset = 0;
2458 else if (fp_offset
2459 || reg1 == FIRST_PSEUDO_REGISTER
2460 || (reg2 == FIRST_PSEUDO_REGISTER
2461 && offset >= 256))
2462 skip_wb = false;
2464 aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
2465 skip_wb);
2466 aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
2467 skip_wb);
2469 if (skip_wb)
2471 enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
2473 if (reg2 == FIRST_PSEUDO_REGISTER)
2474 aarch64_popwb_single_reg (mode1, reg1, offset);
2475 else
2477 if (reg1 != HARD_FRAME_POINTER_REGNUM)
2478 cfa_reg = NULL;
2480 aarch64_popwb_pair_reg (mode1, reg1, reg2, offset, cfa_reg);
2483 else
2485 insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
2486 GEN_INT (offset)));
2487 RTX_FRAME_RELATED_P (insn) = 1;
2491 /* Stack adjustment for exception handler. */
2492 if (crtl->calls_eh_return)
2494 /* We need to unwind the stack by the offset computed by
2495 EH_RETURN_STACKADJ_RTX. However, at this point the CFA is
2496 based on SP. Ideally we would update the SP and define the
2497 CFA along the lines of:
2499 SP = SP + EH_RETURN_STACKADJ_RTX
2500 (regnote CFA = SP - EH_RETURN_STACKADJ_RTX)
2502 However the dwarf emitter only understands a constant
2503 register offset.
2505 The solution chosen here is to use the otherwise unused IP0
2506 as a temporary register to hold the current SP value. The
2507 CFA is described using IP0 then SP is modified. */
2509 rtx ip0 = gen_rtx_REG (DImode, IP0_REGNUM);
2511 insn = emit_move_insn (ip0, stack_pointer_rtx);
2512 add_reg_note (insn, REG_CFA_DEF_CFA, ip0);
2513 RTX_FRAME_RELATED_P (insn) = 1;
2515 emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
2517 /* Ensure the assignment to IP0 does not get optimized away. */
2518 emit_use (ip0);
2521 if (frame_size > -1)
2523 if (frame_size >= 0x1000000)
2525 rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2526 emit_move_insn (op0, GEN_INT (frame_size));
2527 emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
2528 aarch64_set_frame_expr (gen_rtx_SET
2529 (Pmode, stack_pointer_rtx,
2530 plus_constant (Pmode,
2531 stack_pointer_rtx,
2532 frame_size)));
2534 else if (frame_size > 0)
2536 if ((frame_size & 0xfff) != 0)
2538 insn = emit_insn (gen_add2_insn
2539 (stack_pointer_rtx,
2540 GEN_INT ((frame_size
2541 & (HOST_WIDE_INT) 0xfff))));
2542 RTX_FRAME_RELATED_P (insn) = 1;
2544 if ((frame_size & 0xfff) != frame_size)
2546 insn = emit_insn (gen_add2_insn
2547 (stack_pointer_rtx,
2548 GEN_INT ((frame_size
2549 & ~ (HOST_WIDE_INT) 0xfff))));
2550 RTX_FRAME_RELATED_P (insn) = 1;
2554 aarch64_set_frame_expr (gen_rtx_SET (Pmode, stack_pointer_rtx,
2555 plus_constant (Pmode,
2556 stack_pointer_rtx,
2557 offset)));
2560 emit_use (gen_rtx_REG (DImode, LR_REGNUM));
2561 if (!for_sibcall)
2562 emit_jump_insn (ret_rtx);
2565 /* Return the place to copy the exception unwinding return address to.
2566 This will probably be a stack slot, but could (in theory be the
2567 return register). */
2569 aarch64_final_eh_return_addr (void)
2571 HOST_WIDE_INT fp_offset;
2573 aarch64_layout_frame ();
2575 fp_offset = cfun->machine->frame.frame_size
2576 - cfun->machine->frame.hard_fp_offset;
2578 if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
2579 return gen_rtx_REG (DImode, LR_REGNUM);
2581 /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
2582 result in a store to save LR introduced by builtin_eh_return () being
2583 incorrectly deleted because the alias is not detected.
2584 So in the calculation of the address to copy the exception unwinding
2585 return address to, we note 2 cases.
2586 If FP is needed and the fp_offset is 0, it means that SP = FP and hence
2587 we return a SP-relative location since all the addresses are SP-relative
2588 in this case. This prevents the store from being optimized away.
2589 If the fp_offset is not 0, then the addresses will be FP-relative and
2590 therefore we return a FP-relative location. */
2592 if (frame_pointer_needed)
2594 if (fp_offset)
2595 return gen_frame_mem (DImode,
2596 plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
2597 else
2598 return gen_frame_mem (DImode,
2599 plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
2602 /* If FP is not needed, we calculate the location of LR, which would be
2603 at the top of the saved registers block. */
2605 return gen_frame_mem (DImode,
2606 plus_constant (Pmode,
2607 stack_pointer_rtx,
2608 fp_offset
2609 + cfun->machine->frame.saved_regs_size
2610 - 2 * UNITS_PER_WORD));
2613 /* Possibly output code to build up a constant in a register. For
2614 the benefit of the costs infrastructure, returns the number of
2615 instructions which would be emitted. GENERATE inhibits or
2616 enables code generation. */
2618 static int
2619 aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
2621 int insns = 0;
2623 if (aarch64_bitmask_imm (val, DImode))
2625 if (generate)
2626 emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
2627 insns = 1;
2629 else
2631 int i;
2632 int ncount = 0;
2633 int zcount = 0;
2634 HOST_WIDE_INT valp = val >> 16;
2635 HOST_WIDE_INT valm;
2636 HOST_WIDE_INT tval;
2638 for (i = 16; i < 64; i += 16)
2640 valm = (valp & 0xffff);
2642 if (valm != 0)
2643 ++ zcount;
2645 if (valm != 0xffff)
2646 ++ ncount;
2648 valp >>= 16;
2651 /* zcount contains the number of additional MOVK instructions
2652 required if the constant is built up with an initial MOVZ instruction,
2653 while ncount is the number of MOVK instructions required if starting
2654 with a MOVN instruction. Choose the sequence that yields the fewest
2655 number of instructions, preferring MOVZ instructions when they are both
2656 the same. */
2657 if (ncount < zcount)
2659 if (generate)
2660 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2661 GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
2662 tval = 0xffff;
2663 insns++;
2665 else
2667 if (generate)
2668 emit_move_insn (gen_rtx_REG (Pmode, regnum),
2669 GEN_INT (val & 0xffff));
2670 tval = 0;
2671 insns++;
2674 val >>= 16;
2676 for (i = 16; i < 64; i += 16)
2678 if ((val & 0xffff) != tval)
2680 if (generate)
2681 emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
2682 GEN_INT (i),
2683 GEN_INT (val & 0xffff)));
2684 insns++;
2686 val >>= 16;
2689 return insns;
2692 static void
2693 aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
2695 HOST_WIDE_INT mdelta = delta;
2696 rtx this_rtx = gen_rtx_REG (Pmode, regnum);
2697 rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
2699 if (mdelta < 0)
2700 mdelta = -mdelta;
2702 if (mdelta >= 4096 * 4096)
2704 (void) aarch64_build_constant (scratchreg, delta, true);
2705 emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
2707 else if (mdelta > 0)
2709 if (mdelta >= 4096)
2711 emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
2712 rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
2713 if (delta < 0)
2714 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2715 gen_rtx_MINUS (Pmode, this_rtx, shift)));
2716 else
2717 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2718 gen_rtx_PLUS (Pmode, this_rtx, shift)));
2720 if (mdelta % 4096 != 0)
2722 scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
2723 emit_insn (gen_rtx_SET (Pmode, this_rtx,
2724 gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
2729 /* Output code to add DELTA to the first argument, and then jump
2730 to FUNCTION. Used for C++ multiple inheritance. */
2731 static void
2732 aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
2733 HOST_WIDE_INT delta,
2734 HOST_WIDE_INT vcall_offset,
2735 tree function)
2737 /* The this pointer is always in x0. Note that this differs from
2738 Arm where the this pointer maybe bumped to r1 if r0 is required
2739 to return a pointer to an aggregate. On AArch64 a result value
2740 pointer will be in x8. */
2741 int this_regno = R0_REGNUM;
2742 rtx this_rtx, temp0, temp1, addr, insn, funexp;
2744 reload_completed = 1;
2745 emit_note (NOTE_INSN_PROLOGUE_END);
2747 if (vcall_offset == 0)
2748 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2749 else
2751 gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
2753 this_rtx = gen_rtx_REG (Pmode, this_regno);
2754 temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
2755 temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
2757 addr = this_rtx;
2758 if (delta != 0)
2760 if (delta >= -256 && delta < 256)
2761 addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
2762 plus_constant (Pmode, this_rtx, delta));
2763 else
2764 aarch64_add_constant (this_regno, IP1_REGNUM, delta);
2767 if (Pmode == ptr_mode)
2768 aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
2769 else
2770 aarch64_emit_move (temp0,
2771 gen_rtx_ZERO_EXTEND (Pmode,
2772 gen_rtx_MEM (ptr_mode, addr)));
2774 if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
2775 addr = plus_constant (Pmode, temp0, vcall_offset);
2776 else
2778 (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
2779 addr = gen_rtx_PLUS (Pmode, temp0, temp1);
2782 if (Pmode == ptr_mode)
2783 aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
2784 else
2785 aarch64_emit_move (temp1,
2786 gen_rtx_SIGN_EXTEND (Pmode,
2787 gen_rtx_MEM (ptr_mode, addr)));
2789 emit_insn (gen_add2_insn (this_rtx, temp1));
2792 /* Generate a tail call to the target function. */
2793 if (!TREE_USED (function))
2795 assemble_external (function);
2796 TREE_USED (function) = 1;
2798 funexp = XEXP (DECL_RTL (function), 0);
2799 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
2800 insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
2801 SIBLING_CALL_P (insn) = 1;
2803 insn = get_insns ();
2804 shorten_branches (insn);
2805 final_start_function (insn, file, 1);
2806 final (insn, file, 1);
2807 final_end_function ();
2809 /* Stop pretending to be a post-reload pass. */
2810 reload_completed = 0;
2813 static int
2814 aarch64_tls_operand_p_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
2816 if (GET_CODE (*x) == SYMBOL_REF)
2817 return SYMBOL_REF_TLS_MODEL (*x) != 0;
2819 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
2820 TLS offsets, not real symbol references. */
2821 if (GET_CODE (*x) == UNSPEC
2822 && XINT (*x, 1) == UNSPEC_TLS)
2823 return -1;
2825 return 0;
2828 static bool
2829 aarch64_tls_referenced_p (rtx x)
2831 if (!TARGET_HAVE_TLS)
2832 return false;
2834 return for_each_rtx (&x, aarch64_tls_operand_p_1, NULL);
2838 static int
2839 aarch64_bitmasks_cmp (const void *i1, const void *i2)
2841 const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
2842 const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
2844 if (*imm1 < *imm2)
2845 return -1;
2846 if (*imm1 > *imm2)
2847 return +1;
2848 return 0;
2852 static void
2853 aarch64_build_bitmask_table (void)
2855 unsigned HOST_WIDE_INT mask, imm;
2856 unsigned int log_e, e, s, r;
2857 unsigned int nimms = 0;
2859 for (log_e = 1; log_e <= 6; log_e++)
2861 e = 1 << log_e;
2862 if (e == 64)
2863 mask = ~(HOST_WIDE_INT) 0;
2864 else
2865 mask = ((HOST_WIDE_INT) 1 << e) - 1;
2866 for (s = 1; s < e; s++)
2868 for (r = 0; r < e; r++)
2870 /* set s consecutive bits to 1 (s < 64) */
2871 imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
2872 /* rotate right by r */
2873 if (r != 0)
2874 imm = ((imm >> r) | (imm << (e - r))) & mask;
2875 /* replicate the constant depending on SIMD size */
2876 switch (log_e) {
2877 case 1: imm |= (imm << 2);
2878 case 2: imm |= (imm << 4);
2879 case 3: imm |= (imm << 8);
2880 case 4: imm |= (imm << 16);
2881 case 5: imm |= (imm << 32);
2882 case 6:
2883 break;
2884 default:
2885 gcc_unreachable ();
2887 gcc_assert (nimms < AARCH64_NUM_BITMASKS);
2888 aarch64_bitmasks[nimms++] = imm;
2893 gcc_assert (nimms == AARCH64_NUM_BITMASKS);
2894 qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
2895 aarch64_bitmasks_cmp);
2899 /* Return true if val can be encoded as a 12-bit unsigned immediate with
2900 a left shift of 0 or 12 bits. */
2901 bool
2902 aarch64_uimm12_shift (HOST_WIDE_INT val)
2904 return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
2905 || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
2910 /* Return true if val is an immediate that can be loaded into a
2911 register by a MOVZ instruction. */
2912 static bool
2913 aarch64_movw_imm (HOST_WIDE_INT val, enum machine_mode mode)
2915 if (GET_MODE_SIZE (mode) > 4)
2917 if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
2918 || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
2919 return 1;
2921 else
2923 /* Ignore sign extension. */
2924 val &= (HOST_WIDE_INT) 0xffffffff;
2926 return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
2927 || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
2931 /* Return true if val is a valid bitmask immediate. */
2932 bool
2933 aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode mode)
2935 if (GET_MODE_SIZE (mode) < 8)
2937 /* Replicate bit pattern. */
2938 val &= (HOST_WIDE_INT) 0xffffffff;
2939 val |= val << 32;
2941 return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
2942 sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
2946 /* Return true if val is an immediate that can be loaded into a
2947 register in a single instruction. */
2948 bool
2949 aarch64_move_imm (HOST_WIDE_INT val, enum machine_mode mode)
2951 if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
2952 return 1;
2953 return aarch64_bitmask_imm (val, mode);
2956 static bool
2957 aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2959 rtx base, offset;
2961 if (GET_CODE (x) == HIGH)
2962 return true;
2964 split_const (x, &base, &offset);
2965 if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
2967 if (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
2968 != SYMBOL_FORCE_TO_MEM)
2969 return true;
2970 else
2971 /* Avoid generating a 64-bit relocation in ILP32; leave
2972 to aarch64_expand_mov_immediate to handle it properly. */
2973 return mode != ptr_mode;
2976 return aarch64_tls_referenced_p (x);
2979 /* Return true if register REGNO is a valid index register.
2980 STRICT_P is true if REG_OK_STRICT is in effect. */
2982 bool
2983 aarch64_regno_ok_for_index_p (int regno, bool strict_p)
2985 if (!HARD_REGISTER_NUM_P (regno))
2987 if (!strict_p)
2988 return true;
2990 if (!reg_renumber)
2991 return false;
2993 regno = reg_renumber[regno];
2995 return GP_REGNUM_P (regno);
2998 /* Return true if register REGNO is a valid base register for mode MODE.
2999 STRICT_P is true if REG_OK_STRICT is in effect. */
3001 bool
3002 aarch64_regno_ok_for_base_p (int regno, bool strict_p)
3004 if (!HARD_REGISTER_NUM_P (regno))
3006 if (!strict_p)
3007 return true;
3009 if (!reg_renumber)
3010 return false;
3012 regno = reg_renumber[regno];
3015 /* The fake registers will be eliminated to either the stack or
3016 hard frame pointer, both of which are usually valid base registers.
3017 Reload deals with the cases where the eliminated form isn't valid. */
3018 return (GP_REGNUM_P (regno)
3019 || regno == SP_REGNUM
3020 || regno == FRAME_POINTER_REGNUM
3021 || regno == ARG_POINTER_REGNUM);
3024 /* Return true if X is a valid base register for mode MODE.
3025 STRICT_P is true if REG_OK_STRICT is in effect. */
3027 static bool
3028 aarch64_base_register_rtx_p (rtx x, bool strict_p)
3030 if (!strict_p && GET_CODE (x) == SUBREG)
3031 x = SUBREG_REG (x);
3033 return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
3036 /* Return true if address offset is a valid index. If it is, fill in INFO
3037 appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
3039 static bool
3040 aarch64_classify_index (struct aarch64_address_info *info, rtx x,
3041 enum machine_mode mode, bool strict_p)
3043 enum aarch64_address_type type;
3044 rtx index;
3045 int shift;
3047 /* (reg:P) */
3048 if ((REG_P (x) || GET_CODE (x) == SUBREG)
3049 && GET_MODE (x) == Pmode)
3051 type = ADDRESS_REG_REG;
3052 index = x;
3053 shift = 0;
3055 /* (sign_extend:DI (reg:SI)) */
3056 else if ((GET_CODE (x) == SIGN_EXTEND
3057 || GET_CODE (x) == ZERO_EXTEND)
3058 && GET_MODE (x) == DImode
3059 && GET_MODE (XEXP (x, 0)) == SImode)
3061 type = (GET_CODE (x) == SIGN_EXTEND)
3062 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3063 index = XEXP (x, 0);
3064 shift = 0;
3066 /* (mult:DI (sign_extend:DI (reg:SI)) (const_int scale)) */
3067 else if (GET_CODE (x) == MULT
3068 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3069 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3070 && GET_MODE (XEXP (x, 0)) == DImode
3071 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3072 && CONST_INT_P (XEXP (x, 1)))
3074 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3075 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3076 index = XEXP (XEXP (x, 0), 0);
3077 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3079 /* (ashift:DI (sign_extend:DI (reg:SI)) (const_int shift)) */
3080 else if (GET_CODE (x) == ASHIFT
3081 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
3082 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
3083 && GET_MODE (XEXP (x, 0)) == DImode
3084 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode
3085 && CONST_INT_P (XEXP (x, 1)))
3087 type = (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
3088 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3089 index = XEXP (XEXP (x, 0), 0);
3090 shift = INTVAL (XEXP (x, 1));
3092 /* (sign_extract:DI (mult:DI (reg:DI) (const_int scale)) 32+shift 0) */
3093 else if ((GET_CODE (x) == SIGN_EXTRACT
3094 || GET_CODE (x) == ZERO_EXTRACT)
3095 && GET_MODE (x) == DImode
3096 && GET_CODE (XEXP (x, 0)) == MULT
3097 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3098 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3100 type = (GET_CODE (x) == SIGN_EXTRACT)
3101 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3102 index = XEXP (XEXP (x, 0), 0);
3103 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3104 if (INTVAL (XEXP (x, 1)) != 32 + shift
3105 || INTVAL (XEXP (x, 2)) != 0)
3106 shift = -1;
3108 /* (and:DI (mult:DI (reg:DI) (const_int scale))
3109 (const_int 0xffffffff<<shift)) */
3110 else if (GET_CODE (x) == AND
3111 && GET_MODE (x) == DImode
3112 && GET_CODE (XEXP (x, 0)) == MULT
3113 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3114 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3115 && CONST_INT_P (XEXP (x, 1)))
3117 type = ADDRESS_REG_UXTW;
3118 index = XEXP (XEXP (x, 0), 0);
3119 shift = exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)));
3120 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3121 shift = -1;
3123 /* (sign_extract:DI (ashift:DI (reg:DI) (const_int shift)) 32+shift 0) */
3124 else if ((GET_CODE (x) == SIGN_EXTRACT
3125 || GET_CODE (x) == ZERO_EXTRACT)
3126 && GET_MODE (x) == DImode
3127 && GET_CODE (XEXP (x, 0)) == ASHIFT
3128 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3129 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3131 type = (GET_CODE (x) == SIGN_EXTRACT)
3132 ? ADDRESS_REG_SXTW : ADDRESS_REG_UXTW;
3133 index = XEXP (XEXP (x, 0), 0);
3134 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3135 if (INTVAL (XEXP (x, 1)) != 32 + shift
3136 || INTVAL (XEXP (x, 2)) != 0)
3137 shift = -1;
3139 /* (and:DI (ashift:DI (reg:DI) (const_int shift))
3140 (const_int 0xffffffff<<shift)) */
3141 else if (GET_CODE (x) == AND
3142 && GET_MODE (x) == DImode
3143 && GET_CODE (XEXP (x, 0)) == ASHIFT
3144 && GET_MODE (XEXP (XEXP (x, 0), 0)) == DImode
3145 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3146 && CONST_INT_P (XEXP (x, 1)))
3148 type = ADDRESS_REG_UXTW;
3149 index = XEXP (XEXP (x, 0), 0);
3150 shift = INTVAL (XEXP (XEXP (x, 0), 1));
3151 if (INTVAL (XEXP (x, 1)) != (HOST_WIDE_INT)0xffffffff << shift)
3152 shift = -1;
3154 /* (mult:P (reg:P) (const_int scale)) */
3155 else if (GET_CODE (x) == MULT
3156 && GET_MODE (x) == Pmode
3157 && GET_MODE (XEXP (x, 0)) == Pmode
3158 && CONST_INT_P (XEXP (x, 1)))
3160 type = ADDRESS_REG_REG;
3161 index = XEXP (x, 0);
3162 shift = exact_log2 (INTVAL (XEXP (x, 1)));
3164 /* (ashift:P (reg:P) (const_int shift)) */
3165 else if (GET_CODE (x) == ASHIFT
3166 && GET_MODE (x) == Pmode
3167 && GET_MODE (XEXP (x, 0)) == Pmode
3168 && CONST_INT_P (XEXP (x, 1)))
3170 type = ADDRESS_REG_REG;
3171 index = XEXP (x, 0);
3172 shift = INTVAL (XEXP (x, 1));
3174 else
3175 return false;
3177 if (GET_CODE (index) == SUBREG)
3178 index = SUBREG_REG (index);
3180 if ((shift == 0 ||
3181 (shift > 0 && shift <= 3
3182 && (1 << shift) == GET_MODE_SIZE (mode)))
3183 && REG_P (index)
3184 && aarch64_regno_ok_for_index_p (REGNO (index), strict_p))
3186 info->type = type;
3187 info->offset = index;
3188 info->shift = shift;
3189 return true;
3192 return false;
3195 static inline bool
3196 offset_7bit_signed_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3198 return (offset >= -64 * GET_MODE_SIZE (mode)
3199 && offset < 64 * GET_MODE_SIZE (mode)
3200 && offset % GET_MODE_SIZE (mode) == 0);
3203 static inline bool
3204 offset_9bit_signed_unscaled_p (enum machine_mode mode ATTRIBUTE_UNUSED,
3205 HOST_WIDE_INT offset)
3207 return offset >= -256 && offset < 256;
3210 static inline bool
3211 offset_12bit_unsigned_scaled_p (enum machine_mode mode, HOST_WIDE_INT offset)
3213 return (offset >= 0
3214 && offset < 4096 * GET_MODE_SIZE (mode)
3215 && offset % GET_MODE_SIZE (mode) == 0);
3218 /* Return true if X is a valid address for machine mode MODE. If it is,
3219 fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in
3220 effect. OUTER_CODE is PARALLEL for a load/store pair. */
3222 static bool
3223 aarch64_classify_address (struct aarch64_address_info *info,
3224 rtx x, enum machine_mode mode,
3225 RTX_CODE outer_code, bool strict_p)
3227 enum rtx_code code = GET_CODE (x);
3228 rtx op0, op1;
3229 bool allow_reg_index_p =
3230 outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16
3231 || aarch64_vector_mode_supported_p (mode));
3232 /* Don't support anything other than POST_INC or REG addressing for
3233 AdvSIMD. */
3234 if (aarch64_vect_struct_mode_p (mode)
3235 && (code != POST_INC && code != REG))
3236 return false;
3238 switch (code)
3240 case REG:
3241 case SUBREG:
3242 info->type = ADDRESS_REG_IMM;
3243 info->base = x;
3244 info->offset = const0_rtx;
3245 return aarch64_base_register_rtx_p (x, strict_p);
3247 case PLUS:
3248 op0 = XEXP (x, 0);
3249 op1 = XEXP (x, 1);
3250 if (GET_MODE_SIZE (mode) != 0
3251 && CONST_INT_P (op1)
3252 && aarch64_base_register_rtx_p (op0, strict_p))
3254 HOST_WIDE_INT offset = INTVAL (op1);
3256 info->type = ADDRESS_REG_IMM;
3257 info->base = op0;
3258 info->offset = op1;
3260 /* TImode and TFmode values are allowed in both pairs of X
3261 registers and individual Q registers. The available
3262 address modes are:
3263 X,X: 7-bit signed scaled offset
3264 Q: 9-bit signed offset
3265 We conservatively require an offset representable in either mode.
3267 if (mode == TImode || mode == TFmode)
3268 return (offset_7bit_signed_scaled_p (mode, offset)
3269 && offset_9bit_signed_unscaled_p (mode, offset));
3271 if (outer_code == PARALLEL)
3272 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3273 && offset_7bit_signed_scaled_p (mode, offset));
3274 else
3275 return (offset_9bit_signed_unscaled_p (mode, offset)
3276 || offset_12bit_unsigned_scaled_p (mode, offset));
3279 if (allow_reg_index_p)
3281 /* Look for base + (scaled/extended) index register. */
3282 if (aarch64_base_register_rtx_p (op0, strict_p)
3283 && aarch64_classify_index (info, op1, mode, strict_p))
3285 info->base = op0;
3286 return true;
3288 if (aarch64_base_register_rtx_p (op1, strict_p)
3289 && aarch64_classify_index (info, op0, mode, strict_p))
3291 info->base = op1;
3292 return true;
3296 return false;
3298 case POST_INC:
3299 case POST_DEC:
3300 case PRE_INC:
3301 case PRE_DEC:
3302 info->type = ADDRESS_REG_WB;
3303 info->base = XEXP (x, 0);
3304 info->offset = NULL_RTX;
3305 return aarch64_base_register_rtx_p (info->base, strict_p);
3307 case POST_MODIFY:
3308 case PRE_MODIFY:
3309 info->type = ADDRESS_REG_WB;
3310 info->base = XEXP (x, 0);
3311 if (GET_CODE (XEXP (x, 1)) == PLUS
3312 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3313 && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
3314 && aarch64_base_register_rtx_p (info->base, strict_p))
3316 HOST_WIDE_INT offset;
3317 info->offset = XEXP (XEXP (x, 1), 1);
3318 offset = INTVAL (info->offset);
3320 /* TImode and TFmode values are allowed in both pairs of X
3321 registers and individual Q registers. The available
3322 address modes are:
3323 X,X: 7-bit signed scaled offset
3324 Q: 9-bit signed offset
3325 We conservatively require an offset representable in either mode.
3327 if (mode == TImode || mode == TFmode)
3328 return (offset_7bit_signed_scaled_p (mode, offset)
3329 && offset_9bit_signed_unscaled_p (mode, offset));
3331 if (outer_code == PARALLEL)
3332 return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
3333 && offset_7bit_signed_scaled_p (mode, offset));
3334 else
3335 return offset_9bit_signed_unscaled_p (mode, offset);
3337 return false;
3339 case CONST:
3340 case SYMBOL_REF:
3341 case LABEL_REF:
3342 /* load literal: pc-relative constant pool entry. Only supported
3343 for SI mode or larger. */
3344 info->type = ADDRESS_SYMBOLIC;
3345 if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
3347 rtx sym, addend;
3349 split_const (x, &sym, &addend);
3350 return (GET_CODE (sym) == LABEL_REF
3351 || (GET_CODE (sym) == SYMBOL_REF
3352 && CONSTANT_POOL_ADDRESS_P (sym)));
3354 return false;
3356 case LO_SUM:
3357 info->type = ADDRESS_LO_SUM;
3358 info->base = XEXP (x, 0);
3359 info->offset = XEXP (x, 1);
3360 if (allow_reg_index_p
3361 && aarch64_base_register_rtx_p (info->base, strict_p))
3363 rtx sym, offs;
3364 split_const (info->offset, &sym, &offs);
3365 if (GET_CODE (sym) == SYMBOL_REF
3366 && (aarch64_classify_symbol (sym, SYMBOL_CONTEXT_MEM)
3367 == SYMBOL_SMALL_ABSOLUTE))
3369 /* The symbol and offset must be aligned to the access size. */
3370 unsigned int align;
3371 unsigned int ref_size;
3373 if (CONSTANT_POOL_ADDRESS_P (sym))
3374 align = GET_MODE_ALIGNMENT (get_pool_mode (sym));
3375 else if (TREE_CONSTANT_POOL_ADDRESS_P (sym))
3377 tree exp = SYMBOL_REF_DECL (sym);
3378 align = TYPE_ALIGN (TREE_TYPE (exp));
3379 align = CONSTANT_ALIGNMENT (exp, align);
3381 else if (SYMBOL_REF_DECL (sym))
3382 align = DECL_ALIGN (SYMBOL_REF_DECL (sym));
3383 else if (SYMBOL_REF_HAS_BLOCK_INFO_P (sym)
3384 && SYMBOL_REF_BLOCK (sym) != NULL)
3385 align = SYMBOL_REF_BLOCK (sym)->alignment;
3386 else
3387 align = BITS_PER_UNIT;
3389 ref_size = GET_MODE_SIZE (mode);
3390 if (ref_size == 0)
3391 ref_size = GET_MODE_SIZE (DImode);
3393 return ((INTVAL (offs) & (ref_size - 1)) == 0
3394 && ((align / BITS_PER_UNIT) & (ref_size - 1)) == 0);
3397 return false;
3399 default:
3400 return false;
3404 bool
3405 aarch64_symbolic_address_p (rtx x)
3407 rtx offset;
3409 split_const (x, &x, &offset);
3410 return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF;
3413 /* Classify the base of symbolic expression X, given that X appears in
3414 context CONTEXT. */
3416 enum aarch64_symbol_type
3417 aarch64_classify_symbolic_expression (rtx x,
3418 enum aarch64_symbol_context context)
3420 rtx offset;
3422 split_const (x, &x, &offset);
3423 return aarch64_classify_symbol (x, context);
3427 /* Return TRUE if X is a legitimate address for accessing memory in
3428 mode MODE. */
3429 static bool
3430 aarch64_legitimate_address_hook_p (enum machine_mode mode, rtx x, bool strict_p)
3432 struct aarch64_address_info addr;
3434 return aarch64_classify_address (&addr, x, mode, MEM, strict_p);
3437 /* Return TRUE if X is a legitimate address for accessing memory in
3438 mode MODE. OUTER_CODE will be PARALLEL if this is a load/store
3439 pair operation. */
3440 bool
3441 aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
3442 RTX_CODE outer_code, bool strict_p)
3444 struct aarch64_address_info addr;
3446 return aarch64_classify_address (&addr, x, mode, outer_code, strict_p);
3449 /* Return TRUE if rtx X is immediate constant 0.0 */
3450 bool
3451 aarch64_float_const_zero_rtx_p (rtx x)
3453 REAL_VALUE_TYPE r;
3455 if (GET_MODE (x) == VOIDmode)
3456 return false;
3458 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3459 if (REAL_VALUE_MINUS_ZERO (r))
3460 return !HONOR_SIGNED_ZEROS (GET_MODE (x));
3461 return REAL_VALUES_EQUAL (r, dconst0);
3464 /* Return the fixed registers used for condition codes. */
3466 static bool
3467 aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3469 *p1 = CC_REGNUM;
3470 *p2 = INVALID_REGNUM;
3471 return true;
3474 enum machine_mode
3475 aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
3477 /* All floating point compares return CCFP if it is an equality
3478 comparison, and CCFPE otherwise. */
3479 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3481 switch (code)
3483 case EQ:
3484 case NE:
3485 case UNORDERED:
3486 case ORDERED:
3487 case UNLT:
3488 case UNLE:
3489 case UNGT:
3490 case UNGE:
3491 case UNEQ:
3492 case LTGT:
3493 return CCFPmode;
3495 case LT:
3496 case LE:
3497 case GT:
3498 case GE:
3499 return CCFPEmode;
3501 default:
3502 gcc_unreachable ();
3506 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3507 && y == const0_rtx
3508 && (code == EQ || code == NE || code == LT || code == GE)
3509 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
3510 || GET_CODE (x) == NEG))
3511 return CC_NZmode;
3513 /* A compare with a shifted operand. Because of canonicalization,
3514 the comparison will have to be swapped when we emit the assembly
3515 code. */
3516 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3517 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3518 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
3519 || GET_CODE (x) == LSHIFTRT
3520 || GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND))
3521 return CC_SWPmode;
3523 /* Similarly for a negated operand, but we can only do this for
3524 equalities. */
3525 if ((GET_MODE (x) == SImode || GET_MODE (x) == DImode)
3526 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)
3527 && (code == EQ || code == NE)
3528 && GET_CODE (x) == NEG)
3529 return CC_Zmode;
3531 /* A compare of a mode narrower than SI mode against zero can be done
3532 by extending the value in the comparison. */
3533 if ((GET_MODE (x) == QImode || GET_MODE (x) == HImode)
3534 && y == const0_rtx)
3535 /* Only use sign-extension if we really need it. */
3536 return ((code == GT || code == GE || code == LE || code == LT)
3537 ? CC_SESWPmode : CC_ZESWPmode);
3539 /* For everything else, return CCmode. */
3540 return CCmode;
3543 static unsigned
3544 aarch64_get_condition_code (rtx x)
3546 enum machine_mode mode = GET_MODE (XEXP (x, 0));
3547 enum rtx_code comp_code = GET_CODE (x);
3549 if (GET_MODE_CLASS (mode) != MODE_CC)
3550 mode = SELECT_CC_MODE (comp_code, XEXP (x, 0), XEXP (x, 1));
3552 switch (mode)
3554 case CCFPmode:
3555 case CCFPEmode:
3556 switch (comp_code)
3558 case GE: return AARCH64_GE;
3559 case GT: return AARCH64_GT;
3560 case LE: return AARCH64_LS;
3561 case LT: return AARCH64_MI;
3562 case NE: return AARCH64_NE;
3563 case EQ: return AARCH64_EQ;
3564 case ORDERED: return AARCH64_VC;
3565 case UNORDERED: return AARCH64_VS;
3566 case UNLT: return AARCH64_LT;
3567 case UNLE: return AARCH64_LE;
3568 case UNGT: return AARCH64_HI;
3569 case UNGE: return AARCH64_PL;
3570 default: gcc_unreachable ();
3572 break;
3574 case CCmode:
3575 switch (comp_code)
3577 case NE: return AARCH64_NE;
3578 case EQ: return AARCH64_EQ;
3579 case GE: return AARCH64_GE;
3580 case GT: return AARCH64_GT;
3581 case LE: return AARCH64_LE;
3582 case LT: return AARCH64_LT;
3583 case GEU: return AARCH64_CS;
3584 case GTU: return AARCH64_HI;
3585 case LEU: return AARCH64_LS;
3586 case LTU: return AARCH64_CC;
3587 default: gcc_unreachable ();
3589 break;
3591 case CC_SWPmode:
3592 case CC_ZESWPmode:
3593 case CC_SESWPmode:
3594 switch (comp_code)
3596 case NE: return AARCH64_NE;
3597 case EQ: return AARCH64_EQ;
3598 case GE: return AARCH64_LE;
3599 case GT: return AARCH64_LT;
3600 case LE: return AARCH64_GE;
3601 case LT: return AARCH64_GT;
3602 case GEU: return AARCH64_LS;
3603 case GTU: return AARCH64_CC;
3604 case LEU: return AARCH64_CS;
3605 case LTU: return AARCH64_HI;
3606 default: gcc_unreachable ();
3608 break;
3610 case CC_NZmode:
3611 switch (comp_code)
3613 case NE: return AARCH64_NE;
3614 case EQ: return AARCH64_EQ;
3615 case GE: return AARCH64_PL;
3616 case LT: return AARCH64_MI;
3617 default: gcc_unreachable ();
3619 break;
3621 case CC_Zmode:
3622 switch (comp_code)
3624 case NE: return AARCH64_NE;
3625 case EQ: return AARCH64_EQ;
3626 default: gcc_unreachable ();
3628 break;
3630 default:
3631 gcc_unreachable ();
3632 break;
3636 static unsigned
3637 bit_count (unsigned HOST_WIDE_INT value)
3639 unsigned count = 0;
3641 while (value)
3643 count++;
3644 value &= value - 1;
3647 return count;
3650 void
3651 aarch64_print_operand (FILE *f, rtx x, char code)
3653 switch (code)
3655 /* An integer or symbol address without a preceding # sign. */
3656 case 'c':
3657 switch (GET_CODE (x))
3659 case CONST_INT:
3660 fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
3661 break;
3663 case SYMBOL_REF:
3664 output_addr_const (f, x);
3665 break;
3667 case CONST:
3668 if (GET_CODE (XEXP (x, 0)) == PLUS
3669 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
3671 output_addr_const (f, x);
3672 break;
3674 /* Fall through. */
3676 default:
3677 output_operand_lossage ("Unsupported operand for code '%c'", code);
3679 break;
3681 case 'e':
3682 /* Print the sign/zero-extend size as a character 8->b, 16->h, 32->w. */
3684 int n;
3686 if (GET_CODE (x) != CONST_INT
3687 || (n = exact_log2 (INTVAL (x) & ~7)) <= 0)
3689 output_operand_lossage ("invalid operand for '%%%c'", code);
3690 return;
3693 switch (n)
3695 case 3:
3696 fputc ('b', f);
3697 break;
3698 case 4:
3699 fputc ('h', f);
3700 break;
3701 case 5:
3702 fputc ('w', f);
3703 break;
3704 default:
3705 output_operand_lossage ("invalid operand for '%%%c'", code);
3706 return;
3709 break;
3711 case 'p':
3713 int n;
3715 /* Print N such that 2^N == X. */
3716 if (GET_CODE (x) != CONST_INT || (n = exact_log2 (INTVAL (x))) < 0)
3718 output_operand_lossage ("invalid operand for '%%%c'", code);
3719 return;
3722 asm_fprintf (f, "%d", n);
3724 break;
3726 case 'P':
3727 /* Print the number of non-zero bits in X (a const_int). */
3728 if (GET_CODE (x) != CONST_INT)
3730 output_operand_lossage ("invalid operand for '%%%c'", code);
3731 return;
3734 asm_fprintf (f, "%u", bit_count (INTVAL (x)));
3735 break;
3737 case 'H':
3738 /* Print the higher numbered register of a pair (TImode) of regs. */
3739 if (GET_CODE (x) != REG || !GP_REGNUM_P (REGNO (x) + 1))
3741 output_operand_lossage ("invalid operand for '%%%c'", code);
3742 return;
3745 asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
3746 break;
3748 case 'm':
3749 /* Print a condition (eq, ne, etc). */
3751 /* CONST_TRUE_RTX means always -- that's the default. */
3752 if (x == const_true_rtx)
3753 return;
3755 if (!COMPARISON_P (x))
3757 output_operand_lossage ("invalid operand for '%%%c'", code);
3758 return;
3761 fputs (aarch64_condition_codes[aarch64_get_condition_code (x)], f);
3762 break;
3764 case 'M':
3765 /* Print the inverse of a condition (eq <-> ne, etc). */
3767 /* CONST_TRUE_RTX means never -- that's the default. */
3768 if (x == const_true_rtx)
3770 fputs ("nv", f);
3771 return;
3774 if (!COMPARISON_P (x))
3776 output_operand_lossage ("invalid operand for '%%%c'", code);
3777 return;
3780 fputs (aarch64_condition_codes[AARCH64_INVERSE_CONDITION_CODE
3781 (aarch64_get_condition_code (x))], f);
3782 break;
3784 case 'b':
3785 case 'h':
3786 case 's':
3787 case 'd':
3788 case 'q':
3789 /* Print a scalar FP/SIMD register name. */
3790 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3792 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3793 return;
3795 asm_fprintf (f, "%c%d", code, REGNO (x) - V0_REGNUM);
3796 break;
3798 case 'S':
3799 case 'T':
3800 case 'U':
3801 case 'V':
3802 /* Print the first FP/SIMD register name in a list. */
3803 if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
3805 output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
3806 return;
3808 asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
3809 break;
3811 case 'X':
3812 /* Print bottom 16 bits of integer constant in hex. */
3813 if (GET_CODE (x) != CONST_INT)
3815 output_operand_lossage ("invalid operand for '%%%c'", code);
3816 return;
3818 asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
3819 break;
3821 case 'w':
3822 case 'x':
3823 /* Print a general register name or the zero register (32-bit or
3824 64-bit). */
3825 if (x == const0_rtx
3826 || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
3828 asm_fprintf (f, "%czr", code);
3829 break;
3832 if (REG_P (x) && GP_REGNUM_P (REGNO (x)))
3834 asm_fprintf (f, "%c%d", code, REGNO (x) - R0_REGNUM);
3835 break;
3838 if (REG_P (x) && REGNO (x) == SP_REGNUM)
3840 asm_fprintf (f, "%ssp", code == 'w' ? "w" : "");
3841 break;
3844 /* Fall through */
3846 case 0:
3847 /* Print a normal operand, if it's a general register, then we
3848 assume DImode. */
3849 if (x == NULL)
3851 output_operand_lossage ("missing operand");
3852 return;
3855 switch (GET_CODE (x))
3857 case REG:
3858 asm_fprintf (f, "%s", reg_names [REGNO (x)]);
3859 break;
3861 case MEM:
3862 aarch64_memory_reference_mode = GET_MODE (x);
3863 output_address (XEXP (x, 0));
3864 break;
3866 case LABEL_REF:
3867 case SYMBOL_REF:
3868 output_addr_const (asm_out_file, x);
3869 break;
3871 case CONST_INT:
3872 asm_fprintf (f, "%wd", INTVAL (x));
3873 break;
3875 case CONST_VECTOR:
3876 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
3878 gcc_assert (aarch64_const_vec_all_same_int_p (x,
3879 HOST_WIDE_INT_MIN,
3880 HOST_WIDE_INT_MAX));
3881 asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
3883 else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
3885 fputc ('0', f);
3887 else
3888 gcc_unreachable ();
3889 break;
3891 case CONST_DOUBLE:
3892 /* CONST_DOUBLE can represent a double-width integer.
3893 In this case, the mode of x is VOIDmode. */
3894 if (GET_MODE (x) == VOIDmode)
3895 ; /* Do Nothing. */
3896 else if (aarch64_float_const_zero_rtx_p (x))
3898 fputc ('0', f);
3899 break;
3901 else if (aarch64_float_const_representable_p (x))
3903 #define buf_size 20
3904 char float_buf[buf_size] = {'\0'};
3905 REAL_VALUE_TYPE r;
3906 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
3907 real_to_decimal_for_mode (float_buf, &r,
3908 buf_size, buf_size,
3909 1, GET_MODE (x));
3910 asm_fprintf (asm_out_file, "%s", float_buf);
3911 break;
3912 #undef buf_size
3914 output_operand_lossage ("invalid constant");
3915 return;
3916 default:
3917 output_operand_lossage ("invalid operand");
3918 return;
3920 break;
3922 case 'A':
3923 if (GET_CODE (x) == HIGH)
3924 x = XEXP (x, 0);
3926 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3928 case SYMBOL_SMALL_GOT:
3929 asm_fprintf (asm_out_file, ":got:");
3930 break;
3932 case SYMBOL_SMALL_TLSGD:
3933 asm_fprintf (asm_out_file, ":tlsgd:");
3934 break;
3936 case SYMBOL_SMALL_TLSDESC:
3937 asm_fprintf (asm_out_file, ":tlsdesc:");
3938 break;
3940 case SYMBOL_SMALL_GOTTPREL:
3941 asm_fprintf (asm_out_file, ":gottprel:");
3942 break;
3944 case SYMBOL_SMALL_TPREL:
3945 asm_fprintf (asm_out_file, ":tprel:");
3946 break;
3948 case SYMBOL_TINY_GOT:
3949 gcc_unreachable ();
3950 break;
3952 default:
3953 break;
3955 output_addr_const (asm_out_file, x);
3956 break;
3958 case 'L':
3959 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3961 case SYMBOL_SMALL_GOT:
3962 asm_fprintf (asm_out_file, ":lo12:");
3963 break;
3965 case SYMBOL_SMALL_TLSGD:
3966 asm_fprintf (asm_out_file, ":tlsgd_lo12:");
3967 break;
3969 case SYMBOL_SMALL_TLSDESC:
3970 asm_fprintf (asm_out_file, ":tlsdesc_lo12:");
3971 break;
3973 case SYMBOL_SMALL_GOTTPREL:
3974 asm_fprintf (asm_out_file, ":gottprel_lo12:");
3975 break;
3977 case SYMBOL_SMALL_TPREL:
3978 asm_fprintf (asm_out_file, ":tprel_lo12_nc:");
3979 break;
3981 case SYMBOL_TINY_GOT:
3982 asm_fprintf (asm_out_file, ":got:");
3983 break;
3985 default:
3986 break;
3988 output_addr_const (asm_out_file, x);
3989 break;
3991 case 'G':
3993 switch (aarch64_classify_symbolic_expression (x, SYMBOL_CONTEXT_ADR))
3995 case SYMBOL_SMALL_TPREL:
3996 asm_fprintf (asm_out_file, ":tprel_hi12:");
3997 break;
3998 default:
3999 break;
4001 output_addr_const (asm_out_file, x);
4002 break;
4004 default:
4005 output_operand_lossage ("invalid operand prefix '%%%c'", code);
4006 return;
4010 void
4011 aarch64_print_operand_address (FILE *f, rtx x)
4013 struct aarch64_address_info addr;
4015 if (aarch64_classify_address (&addr, x, aarch64_memory_reference_mode,
4016 MEM, true))
4017 switch (addr.type)
4019 case ADDRESS_REG_IMM:
4020 if (addr.offset == const0_rtx)
4021 asm_fprintf (f, "[%s]", reg_names [REGNO (addr.base)]);
4022 else
4023 asm_fprintf (f, "[%s, %wd]", reg_names [REGNO (addr.base)],
4024 INTVAL (addr.offset));
4025 return;
4027 case ADDRESS_REG_REG:
4028 if (addr.shift == 0)
4029 asm_fprintf (f, "[%s, %s]", reg_names [REGNO (addr.base)],
4030 reg_names [REGNO (addr.offset)]);
4031 else
4032 asm_fprintf (f, "[%s, %s, lsl %u]", reg_names [REGNO (addr.base)],
4033 reg_names [REGNO (addr.offset)], addr.shift);
4034 return;
4036 case ADDRESS_REG_UXTW:
4037 if (addr.shift == 0)
4038 asm_fprintf (f, "[%s, w%d, uxtw]", reg_names [REGNO (addr.base)],
4039 REGNO (addr.offset) - R0_REGNUM);
4040 else
4041 asm_fprintf (f, "[%s, w%d, uxtw %u]", reg_names [REGNO (addr.base)],
4042 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4043 return;
4045 case ADDRESS_REG_SXTW:
4046 if (addr.shift == 0)
4047 asm_fprintf (f, "[%s, w%d, sxtw]", reg_names [REGNO (addr.base)],
4048 REGNO (addr.offset) - R0_REGNUM);
4049 else
4050 asm_fprintf (f, "[%s, w%d, sxtw %u]", reg_names [REGNO (addr.base)],
4051 REGNO (addr.offset) - R0_REGNUM, addr.shift);
4052 return;
4054 case ADDRESS_REG_WB:
4055 switch (GET_CODE (x))
4057 case PRE_INC:
4058 asm_fprintf (f, "[%s, %d]!", reg_names [REGNO (addr.base)],
4059 GET_MODE_SIZE (aarch64_memory_reference_mode));
4060 return;
4061 case POST_INC:
4062 asm_fprintf (f, "[%s], %d", reg_names [REGNO (addr.base)],
4063 GET_MODE_SIZE (aarch64_memory_reference_mode));
4064 return;
4065 case PRE_DEC:
4066 asm_fprintf (f, "[%s, -%d]!", reg_names [REGNO (addr.base)],
4067 GET_MODE_SIZE (aarch64_memory_reference_mode));
4068 return;
4069 case POST_DEC:
4070 asm_fprintf (f, "[%s], -%d", reg_names [REGNO (addr.base)],
4071 GET_MODE_SIZE (aarch64_memory_reference_mode));
4072 return;
4073 case PRE_MODIFY:
4074 asm_fprintf (f, "[%s, %wd]!", reg_names [REGNO (addr.base)],
4075 INTVAL (addr.offset));
4076 return;
4077 case POST_MODIFY:
4078 asm_fprintf (f, "[%s], %wd", reg_names [REGNO (addr.base)],
4079 INTVAL (addr.offset));
4080 return;
4081 default:
4082 break;
4084 break;
4086 case ADDRESS_LO_SUM:
4087 asm_fprintf (f, "[%s, #:lo12:", reg_names [REGNO (addr.base)]);
4088 output_addr_const (f, addr.offset);
4089 asm_fprintf (f, "]");
4090 return;
4092 case ADDRESS_SYMBOLIC:
4093 break;
4096 output_addr_const (f, x);
4099 bool
4100 aarch64_label_mentioned_p (rtx x)
4102 const char *fmt;
4103 int i;
4105 if (GET_CODE (x) == LABEL_REF)
4106 return true;
4108 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the
4109 referencing instruction, but they are constant offsets, not
4110 symbols. */
4111 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
4112 return false;
4114 fmt = GET_RTX_FORMAT (GET_CODE (x));
4115 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
4117 if (fmt[i] == 'E')
4119 int j;
4121 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4122 if (aarch64_label_mentioned_p (XVECEXP (x, i, j)))
4123 return 1;
4125 else if (fmt[i] == 'e' && aarch64_label_mentioned_p (XEXP (x, i)))
4126 return 1;
4129 return 0;
4132 /* Implement REGNO_REG_CLASS. */
4134 enum reg_class
4135 aarch64_regno_regclass (unsigned regno)
4137 if (GP_REGNUM_P (regno))
4138 return CORE_REGS;
4140 if (regno == SP_REGNUM)
4141 return STACK_REG;
4143 if (regno == FRAME_POINTER_REGNUM
4144 || regno == ARG_POINTER_REGNUM)
4145 return POINTER_REGS;
4147 if (FP_REGNUM_P (regno))
4148 return FP_LO_REGNUM_P (regno) ? FP_LO_REGS : FP_REGS;
4150 return NO_REGS;
4153 /* Try a machine-dependent way of reloading an illegitimate address
4154 operand. If we find one, push the reload and return the new rtx. */
4157 aarch64_legitimize_reload_address (rtx *x_p,
4158 enum machine_mode mode,
4159 int opnum, int type,
4160 int ind_levels ATTRIBUTE_UNUSED)
4162 rtx x = *x_p;
4164 /* Do not allow mem (plus (reg, const)) if vector struct mode. */
4165 if (aarch64_vect_struct_mode_p (mode)
4166 && GET_CODE (x) == PLUS
4167 && REG_P (XEXP (x, 0))
4168 && CONST_INT_P (XEXP (x, 1)))
4170 rtx orig_rtx = x;
4171 x = copy_rtx (x);
4172 push_reload (orig_rtx, NULL_RTX, x_p, NULL,
4173 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4174 opnum, (enum reload_type) type);
4175 return x;
4178 /* We must recognize output that we have already generated ourselves. */
4179 if (GET_CODE (x) == PLUS
4180 && GET_CODE (XEXP (x, 0)) == PLUS
4181 && REG_P (XEXP (XEXP (x, 0), 0))
4182 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
4183 && CONST_INT_P (XEXP (x, 1)))
4185 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4186 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4187 opnum, (enum reload_type) type);
4188 return x;
4191 /* We wish to handle large displacements off a base register by splitting
4192 the addend across an add and the mem insn. This can cut the number of
4193 extra insns needed from 3 to 1. It is only useful for load/store of a
4194 single register with 12 bit offset field. */
4195 if (GET_CODE (x) == PLUS
4196 && REG_P (XEXP (x, 0))
4197 && CONST_INT_P (XEXP (x, 1))
4198 && HARD_REGISTER_P (XEXP (x, 0))
4199 && mode != TImode
4200 && mode != TFmode
4201 && aarch64_regno_ok_for_base_p (REGNO (XEXP (x, 0)), true))
4203 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4204 HOST_WIDE_INT low = val & 0xfff;
4205 HOST_WIDE_INT high = val - low;
4206 HOST_WIDE_INT offs;
4207 rtx cst;
4208 enum machine_mode xmode = GET_MODE (x);
4210 /* In ILP32, xmode can be either DImode or SImode. */
4211 gcc_assert (xmode == DImode || xmode == SImode);
4213 /* Reload non-zero BLKmode offsets. This is because we cannot ascertain
4214 BLKmode alignment. */
4215 if (GET_MODE_SIZE (mode) == 0)
4216 return NULL_RTX;
4218 offs = low % GET_MODE_SIZE (mode);
4220 /* Align misaligned offset by adjusting high part to compensate. */
4221 if (offs != 0)
4223 if (aarch64_uimm12_shift (high + offs))
4225 /* Align down. */
4226 low = low - offs;
4227 high = high + offs;
4229 else
4231 /* Align up. */
4232 offs = GET_MODE_SIZE (mode) - offs;
4233 low = low + offs;
4234 high = high + (low & 0x1000) - offs;
4235 low &= 0xfff;
4239 /* Check for overflow. */
4240 if (high + low != val)
4241 return NULL_RTX;
4243 cst = GEN_INT (high);
4244 if (!aarch64_uimm12_shift (high))
4245 cst = force_const_mem (xmode, cst);
4247 /* Reload high part into base reg, leaving the low part
4248 in the mem instruction.
4249 Note that replacing this gen_rtx_PLUS with plus_constant is
4250 wrong in this case because we rely on the
4251 (plus (plus reg c1) c2) structure being preserved so that
4252 XEXP (*p, 0) in push_reload below uses the correct term. */
4253 x = gen_rtx_PLUS (xmode,
4254 gen_rtx_PLUS (xmode, XEXP (x, 0), cst),
4255 GEN_INT (low));
4257 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4258 BASE_REG_CLASS, xmode, VOIDmode, 0, 0,
4259 opnum, (enum reload_type) type);
4260 return x;
4263 return NULL_RTX;
4267 static reg_class_t
4268 aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
4269 reg_class_t rclass,
4270 enum machine_mode mode,
4271 secondary_reload_info *sri)
4273 /* Without the TARGET_SIMD instructions we cannot move a Q register
4274 to a Q register directly. We need a scratch. */
4275 if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
4276 && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
4277 && reg_class_subset_p (rclass, FP_REGS))
4279 if (mode == TFmode)
4280 sri->icode = CODE_FOR_aarch64_reload_movtf;
4281 else if (mode == TImode)
4282 sri->icode = CODE_FOR_aarch64_reload_movti;
4283 return NO_REGS;
4286 /* A TFmode or TImode memory access should be handled via an FP_REGS
4287 because AArch64 has richer addressing modes for LDR/STR instructions
4288 than LDP/STP instructions. */
4289 if (!TARGET_GENERAL_REGS_ONLY && rclass == CORE_REGS
4290 && GET_MODE_SIZE (mode) == 16 && MEM_P (x))
4291 return FP_REGS;
4293 if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
4294 return CORE_REGS;
4296 return NO_REGS;
4299 static bool
4300 aarch64_can_eliminate (const int from, const int to)
4302 /* If we need a frame pointer, we must eliminate FRAME_POINTER_REGNUM into
4303 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM. */
4305 if (frame_pointer_needed)
4307 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4308 return true;
4309 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
4310 return false;
4311 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM
4312 && !cfun->calls_alloca)
4313 return true;
4314 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4315 return true;
4317 return false;
4320 return true;
4323 HOST_WIDE_INT
4324 aarch64_initial_elimination_offset (unsigned from, unsigned to)
4326 aarch64_layout_frame ();
4328 if (to == HARD_FRAME_POINTER_REGNUM)
4330 if (from == ARG_POINTER_REGNUM)
4331 return cfun->machine->frame.frame_size - crtl->outgoing_args_size;
4333 if (from == FRAME_POINTER_REGNUM)
4334 return (cfun->machine->frame.hard_fp_offset
4335 - cfun->machine->frame.saved_varargs_size);
4338 if (to == STACK_POINTER_REGNUM)
4340 if (from == FRAME_POINTER_REGNUM)
4341 return (cfun->machine->frame.frame_size
4342 - cfun->machine->frame.saved_varargs_size);
4345 return cfun->machine->frame.frame_size;
4348 /* Implement RETURN_ADDR_RTX. We do not support moving back to a
4349 previous frame. */
4352 aarch64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
4354 if (count != 0)
4355 return const0_rtx;
4356 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
4360 static void
4361 aarch64_asm_trampoline_template (FILE *f)
4363 if (TARGET_ILP32)
4365 asm_fprintf (f, "\tldr\tw%d, .+16\n", IP1_REGNUM - R0_REGNUM);
4366 asm_fprintf (f, "\tldr\tw%d, .+16\n", STATIC_CHAIN_REGNUM - R0_REGNUM);
4368 else
4370 asm_fprintf (f, "\tldr\t%s, .+16\n", reg_names [IP1_REGNUM]);
4371 asm_fprintf (f, "\tldr\t%s, .+20\n", reg_names [STATIC_CHAIN_REGNUM]);
4373 asm_fprintf (f, "\tbr\t%s\n", reg_names [IP1_REGNUM]);
4374 assemble_aligned_integer (4, const0_rtx);
4375 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4376 assemble_aligned_integer (POINTER_BYTES, const0_rtx);
4379 static void
4380 aarch64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
4382 rtx fnaddr, mem, a_tramp;
4383 const int tramp_code_sz = 16;
4385 /* Don't need to copy the trailing D-words, we fill those in below. */
4386 emit_block_move (m_tramp, assemble_trampoline_template (),
4387 GEN_INT (tramp_code_sz), BLOCK_OP_NORMAL);
4388 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz);
4389 fnaddr = XEXP (DECL_RTL (fndecl), 0);
4390 if (GET_MODE (fnaddr) != ptr_mode)
4391 fnaddr = convert_memory_address (ptr_mode, fnaddr);
4392 emit_move_insn (mem, fnaddr);
4394 mem = adjust_address (m_tramp, ptr_mode, tramp_code_sz + POINTER_BYTES);
4395 emit_move_insn (mem, chain_value);
4397 /* XXX We should really define a "clear_cache" pattern and use
4398 gen_clear_cache(). */
4399 a_tramp = XEXP (m_tramp, 0);
4400 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
4401 LCT_NORMAL, VOIDmode, 2, a_tramp, ptr_mode,
4402 plus_constant (ptr_mode, a_tramp, TRAMPOLINE_SIZE),
4403 ptr_mode);
4406 static unsigned char
4407 aarch64_class_max_nregs (reg_class_t regclass, enum machine_mode mode)
4409 switch (regclass)
4411 case CALLER_SAVE_REGS:
4412 case CORE_REGS:
4413 case POINTER_REGS:
4414 case GENERAL_REGS:
4415 case ALL_REGS:
4416 case FP_REGS:
4417 case FP_LO_REGS:
4418 return
4419 aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + 15) / 16 :
4420 (GET_MODE_SIZE (mode) + 7) / 8;
4421 case STACK_REG:
4422 return 1;
4424 case NO_REGS:
4425 return 0;
4427 default:
4428 break;
4430 gcc_unreachable ();
4433 static reg_class_t
4434 aarch64_preferred_reload_class (rtx x, reg_class_t regclass)
4436 if (regclass == POINTER_REGS)
4437 return GENERAL_REGS;
4439 if (regclass == STACK_REG)
4441 if (REG_P(x)
4442 && reg_class_subset_p (REGNO_REG_CLASS (REGNO (x)), POINTER_REGS))
4443 return regclass;
4445 return NO_REGS;
4448 /* If it's an integer immediate that MOVI can't handle, then
4449 FP_REGS is not an option, so we return NO_REGS instead. */
4450 if (CONST_INT_P (x) && reg_class_subset_p (regclass, FP_REGS)
4451 && !aarch64_simd_imm_scalar_p (x, GET_MODE (x)))
4452 return NO_REGS;
4454 /* Register eliminiation can result in a request for
4455 SP+constant->FP_REGS. We cannot support such operations which
4456 use SP as source and an FP_REG as destination, so reject out
4457 right now. */
4458 if (! reg_class_subset_p (regclass, GENERAL_REGS) && GET_CODE (x) == PLUS)
4460 rtx lhs = XEXP (x, 0);
4462 /* Look through a possible SUBREG introduced by ILP32. */
4463 if (GET_CODE (lhs) == SUBREG)
4464 lhs = SUBREG_REG (lhs);
4466 gcc_assert (REG_P (lhs));
4467 gcc_assert (reg_class_subset_p (REGNO_REG_CLASS (REGNO (lhs)),
4468 POINTER_REGS));
4469 return NO_REGS;
4472 return regclass;
4475 void
4476 aarch64_asm_output_labelref (FILE* f, const char *name)
4478 asm_fprintf (f, "%U%s", name);
4481 static void
4482 aarch64_elf_asm_constructor (rtx symbol, int priority)
4484 if (priority == DEFAULT_INIT_PRIORITY)
4485 default_ctor_section_asm_out_constructor (symbol, priority);
4486 else
4488 section *s;
4489 char buf[18];
4490 snprintf (buf, sizeof (buf), ".init_array.%.5u", priority);
4491 s = get_section (buf, SECTION_WRITE, NULL);
4492 switch_to_section (s);
4493 assemble_align (POINTER_SIZE);
4494 assemble_aligned_integer (POINTER_BYTES, symbol);
4498 static void
4499 aarch64_elf_asm_destructor (rtx symbol, int priority)
4501 if (priority == DEFAULT_INIT_PRIORITY)
4502 default_dtor_section_asm_out_destructor (symbol, priority);
4503 else
4505 section *s;
4506 char buf[18];
4507 snprintf (buf, sizeof (buf), ".fini_array.%.5u", priority);
4508 s = get_section (buf, SECTION_WRITE, NULL);
4509 switch_to_section (s);
4510 assemble_align (POINTER_SIZE);
4511 assemble_aligned_integer (POINTER_BYTES, symbol);
4515 const char*
4516 aarch64_output_casesi (rtx *operands)
4518 char buf[100];
4519 char label[100];
4520 rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
4521 int index;
4522 static const char *const patterns[4][2] =
4525 "ldrb\t%w3, [%0,%w1,uxtw]",
4526 "add\t%3, %4, %w3, sxtb #2"
4529 "ldrh\t%w3, [%0,%w1,uxtw #1]",
4530 "add\t%3, %4, %w3, sxth #2"
4533 "ldr\t%w3, [%0,%w1,uxtw #2]",
4534 "add\t%3, %4, %w3, sxtw #2"
4536 /* We assume that DImode is only generated when not optimizing and
4537 that we don't really need 64-bit address offsets. That would
4538 imply an object file with 8GB of code in a single function! */
4540 "ldr\t%w3, [%0,%w1,uxtw #2]",
4541 "add\t%3, %4, %w3, sxtw #2"
4545 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
4547 index = exact_log2 (GET_MODE_SIZE (GET_MODE (diff_vec)));
4549 gcc_assert (index >= 0 && index <= 3);
4551 /* Need to implement table size reduction, by chaning the code below. */
4552 output_asm_insn (patterns[index][0], operands);
4553 ASM_GENERATE_INTERNAL_LABEL (label, "Lrtx", CODE_LABEL_NUMBER (operands[2]));
4554 snprintf (buf, sizeof (buf),
4555 "adr\t%%4, %s", targetm.strip_name_encoding (label));
4556 output_asm_insn (buf, operands);
4557 output_asm_insn (patterns[index][1], operands);
4558 output_asm_insn ("br\t%3", operands);
4559 assemble_label (asm_out_file, label);
4560 return "";
4564 /* Return size in bits of an arithmetic operand which is shifted/scaled and
4565 masked such that it is suitable for a UXTB, UXTH, or UXTW extend
4566 operator. */
4569 aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
4571 if (shift >= 0 && shift <= 3)
4573 int size;
4574 for (size = 8; size <= 32; size *= 2)
4576 HOST_WIDE_INT bits = ((HOST_WIDE_INT)1U << size) - 1;
4577 if (mask == bits << shift)
4578 return size;
4581 return 0;
4584 static bool
4585 aarch64_use_blocks_for_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED,
4586 const_rtx x ATTRIBUTE_UNUSED)
4588 /* We can't use blocks for constants when we're using a per-function
4589 constant pool. */
4590 return false;
4593 static section *
4594 aarch64_select_rtx_section (enum machine_mode mode ATTRIBUTE_UNUSED,
4595 rtx x ATTRIBUTE_UNUSED,
4596 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
4598 /* Force all constant pool entries into the current function section. */
4599 return function_section (current_function_decl);
4603 /* Costs. */
4605 /* Helper function for rtx cost calculation. Strip a shift expression
4606 from X. Returns the inner operand if successful, or the original
4607 expression on failure. */
4608 static rtx
4609 aarch64_strip_shift (rtx x)
4611 rtx op = x;
4613 /* We accept both ROTATERT and ROTATE: since the RHS must be a constant
4614 we can convert both to ROR during final output. */
4615 if ((GET_CODE (op) == ASHIFT
4616 || GET_CODE (op) == ASHIFTRT
4617 || GET_CODE (op) == LSHIFTRT
4618 || GET_CODE (op) == ROTATERT
4619 || GET_CODE (op) == ROTATE)
4620 && CONST_INT_P (XEXP (op, 1)))
4621 return XEXP (op, 0);
4623 if (GET_CODE (op) == MULT
4624 && CONST_INT_P (XEXP (op, 1))
4625 && ((unsigned) exact_log2 (INTVAL (XEXP (op, 1)))) < 64)
4626 return XEXP (op, 0);
4628 return x;
4631 /* Helper function for rtx cost calculation. Strip an extend
4632 expression from X. Returns the inner operand if successful, or the
4633 original expression on failure. We deal with a number of possible
4634 canonicalization variations here. */
4635 static rtx
4636 aarch64_strip_extend (rtx x)
4638 rtx op = x;
4640 /* Zero and sign extraction of a widened value. */
4641 if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
4642 && XEXP (op, 2) == const0_rtx
4643 && GET_CODE (XEXP (op, 0)) == MULT
4644 && aarch64_is_extend_from_extract (GET_MODE (op), XEXP (XEXP (op, 0), 1),
4645 XEXP (op, 1)))
4646 return XEXP (XEXP (op, 0), 0);
4648 /* It can also be represented (for zero-extend) as an AND with an
4649 immediate. */
4650 if (GET_CODE (op) == AND
4651 && GET_CODE (XEXP (op, 0)) == MULT
4652 && CONST_INT_P (XEXP (XEXP (op, 0), 1))
4653 && CONST_INT_P (XEXP (op, 1))
4654 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (XEXP (op, 0), 1))),
4655 INTVAL (XEXP (op, 1))) != 0)
4656 return XEXP (XEXP (op, 0), 0);
4658 /* Now handle extended register, as this may also have an optional
4659 left shift by 1..4. */
4660 if (GET_CODE (op) == ASHIFT
4661 && CONST_INT_P (XEXP (op, 1))
4662 && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
4663 op = XEXP (op, 0);
4665 if (GET_CODE (op) == ZERO_EXTEND
4666 || GET_CODE (op) == SIGN_EXTEND)
4667 op = XEXP (op, 0);
4669 if (op != x)
4670 return op;
4672 return x;
4675 /* Helper function for rtx cost calculation. Calculate the cost of
4676 a MULT, which may be part of a multiply-accumulate rtx. Return
4677 the calculated cost of the expression, recursing manually in to
4678 operands where needed. */
4680 static int
4681 aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
4683 rtx op0, op1;
4684 const struct cpu_cost_table *extra_cost
4685 = aarch64_tune_params->insn_extra_cost;
4686 int cost = 0;
4687 bool maybe_fma = (outer == PLUS || outer == MINUS);
4688 enum machine_mode mode = GET_MODE (x);
4690 gcc_checking_assert (code == MULT);
4692 op0 = XEXP (x, 0);
4693 op1 = XEXP (x, 1);
4695 if (VECTOR_MODE_P (mode))
4696 mode = GET_MODE_INNER (mode);
4698 /* Integer multiply/fma. */
4699 if (GET_MODE_CLASS (mode) == MODE_INT)
4701 /* The multiply will be canonicalized as a shift, cost it as such. */
4702 if (CONST_INT_P (op1)
4703 && exact_log2 (INTVAL (op1)) > 0)
4705 if (speed)
4707 if (maybe_fma)
4708 /* ADD (shifted register). */
4709 cost += extra_cost->alu.arith_shift;
4710 else
4711 /* LSL (immediate). */
4712 cost += extra_cost->alu.shift;
4715 cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
4717 return cost;
4720 /* Integer multiplies or FMAs have zero/sign extending variants. */
4721 if ((GET_CODE (op0) == ZERO_EXTEND
4722 && GET_CODE (op1) == ZERO_EXTEND)
4723 || (GET_CODE (op0) == SIGN_EXTEND
4724 && GET_CODE (op1) == SIGN_EXTEND))
4726 cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
4727 + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
4729 if (speed)
4731 if (maybe_fma)
4732 /* MADD/SMADDL/UMADDL. */
4733 cost += extra_cost->mult[0].extend_add;
4734 else
4735 /* MUL/SMULL/UMULL. */
4736 cost += extra_cost->mult[0].extend;
4739 return cost;
4742 /* This is either an integer multiply or an FMA. In both cases
4743 we want to recurse and cost the operands. */
4744 cost += rtx_cost (op0, MULT, 0, speed)
4745 + rtx_cost (op1, MULT, 1, speed);
4747 if (speed)
4749 if (maybe_fma)
4750 /* MADD. */
4751 cost += extra_cost->mult[mode == DImode].add;
4752 else
4753 /* MUL. */
4754 cost += extra_cost->mult[mode == DImode].simple;
4757 return cost;
4759 else
4761 if (speed)
4763 /* Floating-point FMA/FMUL can also support negations of the
4764 operands. */
4765 if (GET_CODE (op0) == NEG)
4766 op0 = XEXP (op0, 0);
4767 if (GET_CODE (op1) == NEG)
4768 op1 = XEXP (op1, 0);
4770 if (maybe_fma)
4771 /* FMADD/FNMADD/FNMSUB/FMSUB. */
4772 cost += extra_cost->fp[mode == DFmode].fma;
4773 else
4774 /* FMUL/FNMUL. */
4775 cost += extra_cost->fp[mode == DFmode].mult;
4778 cost += rtx_cost (op0, MULT, 0, speed)
4779 + rtx_cost (op1, MULT, 1, speed);
4780 return cost;
4784 static int
4785 aarch64_address_cost (rtx x,
4786 enum machine_mode mode,
4787 addr_space_t as ATTRIBUTE_UNUSED,
4788 bool speed)
4790 enum rtx_code c = GET_CODE (x);
4791 const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
4792 struct aarch64_address_info info;
4793 int cost = 0;
4794 info.shift = 0;
4796 if (!aarch64_classify_address (&info, x, mode, c, false))
4798 if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
4800 /* This is a CONST or SYMBOL ref which will be split
4801 in a different way depending on the code model in use.
4802 Cost it through the generic infrastructure. */
4803 int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
4804 /* Divide through by the cost of one instruction to
4805 bring it to the same units as the address costs. */
4806 cost_symbol_ref /= COSTS_N_INSNS (1);
4807 /* The cost is then the cost of preparing the address,
4808 followed by an immediate (possibly 0) offset. */
4809 return cost_symbol_ref + addr_cost->imm_offset;
4811 else
4813 /* This is most likely a jump table from a case
4814 statement. */
4815 return addr_cost->register_offset;
4819 switch (info.type)
4821 case ADDRESS_LO_SUM:
4822 case ADDRESS_SYMBOLIC:
4823 case ADDRESS_REG_IMM:
4824 cost += addr_cost->imm_offset;
4825 break;
4827 case ADDRESS_REG_WB:
4828 if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
4829 cost += addr_cost->pre_modify;
4830 else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
4831 cost += addr_cost->post_modify;
4832 else
4833 gcc_unreachable ();
4835 break;
4837 case ADDRESS_REG_REG:
4838 cost += addr_cost->register_offset;
4839 break;
4841 case ADDRESS_REG_UXTW:
4842 case ADDRESS_REG_SXTW:
4843 cost += addr_cost->register_extend;
4844 break;
4846 default:
4847 gcc_unreachable ();
4851 if (info.shift > 0)
4853 /* For the sake of calculating the cost of the shifted register
4854 component, we can treat same sized modes in the same way. */
4855 switch (GET_MODE_BITSIZE (mode))
4857 case 16:
4858 cost += addr_cost->addr_scale_costs.hi;
4859 break;
4861 case 32:
4862 cost += addr_cost->addr_scale_costs.si;
4863 break;
4865 case 64:
4866 cost += addr_cost->addr_scale_costs.di;
4867 break;
4869 /* We can't tell, or this is a 128-bit vector. */
4870 default:
4871 cost += addr_cost->addr_scale_costs.ti;
4872 break;
4876 return cost;
4879 /* Return true if the RTX X in mode MODE is a zero or sign extract
4880 usable in an ADD or SUB (extended register) instruction. */
4881 static bool
4882 aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
4884 /* Catch add with a sign extract.
4885 This is add_<optab><mode>_multp2. */
4886 if (GET_CODE (x) == SIGN_EXTRACT
4887 || GET_CODE (x) == ZERO_EXTRACT)
4889 rtx op0 = XEXP (x, 0);
4890 rtx op1 = XEXP (x, 1);
4891 rtx op2 = XEXP (x, 2);
4893 if (GET_CODE (op0) == MULT
4894 && CONST_INT_P (op1)
4895 && op2 == const0_rtx
4896 && CONST_INT_P (XEXP (op0, 1))
4897 && aarch64_is_extend_from_extract (mode,
4898 XEXP (op0, 1),
4899 op1))
4901 return true;
4905 return false;
4908 static bool
4909 aarch64_frint_unspec_p (unsigned int u)
4911 switch (u)
4913 case UNSPEC_FRINTZ:
4914 case UNSPEC_FRINTP:
4915 case UNSPEC_FRINTM:
4916 case UNSPEC_FRINTA:
4917 case UNSPEC_FRINTN:
4918 case UNSPEC_FRINTX:
4919 case UNSPEC_FRINTI:
4920 return true;
4922 default:
4923 return false;
4927 /* Calculate the cost of calculating (if_then_else (OP0) (OP1) (OP2)),
4928 storing it in *COST. Result is true if the total cost of the operation
4929 has now been calculated. */
4930 static bool
4931 aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed)
4933 rtx inner;
4934 rtx comparator;
4935 enum rtx_code cmpcode;
4937 if (COMPARISON_P (op0))
4939 inner = XEXP (op0, 0);
4940 comparator = XEXP (op0, 1);
4941 cmpcode = GET_CODE (op0);
4943 else
4945 inner = op0;
4946 comparator = const0_rtx;
4947 cmpcode = NE;
4950 if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
4952 /* Conditional branch. */
4953 if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4954 return true;
4955 else
4957 if (cmpcode == NE || cmpcode == EQ)
4959 if (comparator == const0_rtx)
4961 /* TBZ/TBNZ/CBZ/CBNZ. */
4962 if (GET_CODE (inner) == ZERO_EXTRACT)
4963 /* TBZ/TBNZ. */
4964 *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
4965 0, speed);
4966 else
4967 /* CBZ/CBNZ. */
4968 *cost += rtx_cost (inner, cmpcode, 0, speed);
4970 return true;
4973 else if (cmpcode == LT || cmpcode == GE)
4975 /* TBZ/TBNZ. */
4976 if (comparator == const0_rtx)
4977 return true;
4981 else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC)
4983 /* It's a conditional operation based on the status flags,
4984 so it must be some flavor of CSEL. */
4986 /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
4987 if (GET_CODE (op1) == NEG
4988 || GET_CODE (op1) == NOT
4989 || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
4990 op1 = XEXP (op1, 0);
4992 *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
4993 *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
4994 return true;
4997 /* We don't know what this is, cost all operands. */
4998 return false;
5001 /* Calculate the cost of calculating X, storing it in *COST. Result
5002 is true if the total cost of the operation has now been calculated. */
5003 static bool
5004 aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
5005 int param ATTRIBUTE_UNUSED, int *cost, bool speed)
5007 rtx op0, op1, op2;
5008 const struct cpu_cost_table *extra_cost
5009 = aarch64_tune_params->insn_extra_cost;
5010 enum machine_mode mode = GET_MODE (x);
5012 /* By default, assume that everything has equivalent cost to the
5013 cheapest instruction. Any additional costs are applied as a delta
5014 above this default. */
5015 *cost = COSTS_N_INSNS (1);
5017 /* TODO: The cost infrastructure currently does not handle
5018 vector operations. Assume that all vector operations
5019 are equally expensive. */
5020 if (VECTOR_MODE_P (mode))
5022 if (speed)
5023 *cost += extra_cost->vect.alu;
5024 return true;
5027 switch (code)
5029 case SET:
5030 /* The cost depends entirely on the operands to SET. */
5031 *cost = 0;
5032 op0 = SET_DEST (x);
5033 op1 = SET_SRC (x);
5035 switch (GET_CODE (op0))
5037 case MEM:
5038 if (speed)
5040 rtx address = XEXP (op0, 0);
5041 if (GET_MODE_CLASS (mode) == MODE_INT)
5042 *cost += extra_cost->ldst.store;
5043 else if (mode == SFmode)
5044 *cost += extra_cost->ldst.storef;
5045 else if (mode == DFmode)
5046 *cost += extra_cost->ldst.stored;
5048 *cost +=
5049 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5050 0, speed));
5053 *cost += rtx_cost (op1, SET, 1, speed);
5054 return true;
5056 case SUBREG:
5057 if (! REG_P (SUBREG_REG (op0)))
5058 *cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
5060 /* Fall through. */
5061 case REG:
5062 /* const0_rtx is in general free, but we will use an
5063 instruction to set a register to 0. */
5064 if (REG_P (op1) || op1 == const0_rtx)
5066 /* The cost is 1 per register copied. */
5067 int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
5068 / UNITS_PER_WORD;
5069 *cost = COSTS_N_INSNS (n_minus_1 + 1);
5071 else
5072 /* Cost is just the cost of the RHS of the set. */
5073 *cost += rtx_cost (op1, SET, 1, speed);
5074 return true;
5076 case ZERO_EXTRACT:
5077 case SIGN_EXTRACT:
5078 /* Bit-field insertion. Strip any redundant widening of
5079 the RHS to meet the width of the target. */
5080 if (GET_CODE (op1) == SUBREG)
5081 op1 = SUBREG_REG (op1);
5082 if ((GET_CODE (op1) == ZERO_EXTEND
5083 || GET_CODE (op1) == SIGN_EXTEND)
5084 && GET_CODE (XEXP (op0, 1)) == CONST_INT
5085 && (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
5086 >= INTVAL (XEXP (op0, 1))))
5087 op1 = XEXP (op1, 0);
5089 if (CONST_INT_P (op1))
5091 /* MOV immediate is assumed to always be cheap. */
5092 *cost = COSTS_N_INSNS (1);
5094 else
5096 /* BFM. */
5097 if (speed)
5098 *cost += extra_cost->alu.bfi;
5099 *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
5102 return true;
5104 default:
5105 /* We can't make sense of this, assume default cost. */
5106 *cost = COSTS_N_INSNS (1);
5107 return false;
5109 return false;
5111 case CONST_INT:
5112 /* If an instruction can incorporate a constant within the
5113 instruction, the instruction's expression avoids calling
5114 rtx_cost() on the constant. If rtx_cost() is called on a
5115 constant, then it is usually because the constant must be
5116 moved into a register by one or more instructions.
5118 The exception is constant 0, which can be expressed
5119 as XZR/WZR and is therefore free. The exception to this is
5120 if we have (set (reg) (const0_rtx)) in which case we must cost
5121 the move. However, we can catch that when we cost the SET, so
5122 we don't need to consider that here. */
5123 if (x == const0_rtx)
5124 *cost = 0;
5125 else
5127 /* To an approximation, building any other constant is
5128 proportionally expensive to the number of instructions
5129 required to build that constant. This is true whether we
5130 are compiling for SPEED or otherwise. */
5131 *cost = COSTS_N_INSNS (aarch64_build_constant (0,
5132 INTVAL (x),
5133 false));
5135 return true;
5137 case CONST_DOUBLE:
5138 if (speed)
5140 /* mov[df,sf]_aarch64. */
5141 if (aarch64_float_const_representable_p (x))
5142 /* FMOV (scalar immediate). */
5143 *cost += extra_cost->fp[mode == DFmode].fpconst;
5144 else if (!aarch64_float_const_zero_rtx_p (x))
5146 /* This will be a load from memory. */
5147 if (mode == DFmode)
5148 *cost += extra_cost->ldst.loadd;
5149 else
5150 *cost += extra_cost->ldst.loadf;
5152 else
5153 /* Otherwise this is +0.0. We get this using MOVI d0, #0
5154 or MOV v0.s[0], wzr - neither of which are modeled by the
5155 cost tables. Just use the default cost. */
5160 return true;
5162 case MEM:
5163 if (speed)
5165 /* For loads we want the base cost of a load, plus an
5166 approximation for the additional cost of the addressing
5167 mode. */
5168 rtx address = XEXP (x, 0);
5169 if (GET_MODE_CLASS (mode) == MODE_INT)
5170 *cost += extra_cost->ldst.load;
5171 else if (mode == SFmode)
5172 *cost += extra_cost->ldst.loadf;
5173 else if (mode == DFmode)
5174 *cost += extra_cost->ldst.loadd;
5176 *cost +=
5177 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5178 0, speed));
5181 return true;
5183 case NEG:
5184 op0 = XEXP (x, 0);
5186 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5188 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5189 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5191 /* CSETM. */
5192 *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
5193 return true;
5196 /* Cost this as SUB wzr, X. */
5197 op0 = CONST0_RTX (GET_MODE (x));
5198 op1 = XEXP (x, 0);
5199 goto cost_minus;
5202 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
5204 /* Support (neg(fma...)) as a single instruction only if
5205 sign of zeros is unimportant. This matches the decision
5206 making in aarch64.md. */
5207 if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
5209 /* FNMADD. */
5210 *cost = rtx_cost (op0, NEG, 0, speed);
5211 return true;
5213 if (speed)
5214 /* FNEG. */
5215 *cost += extra_cost->fp[mode == DFmode].neg;
5216 return false;
5219 return false;
5221 case CLRSB:
5222 case CLZ:
5223 if (speed)
5224 *cost += extra_cost->alu.clz;
5226 return false;
5228 case COMPARE:
5229 op0 = XEXP (x, 0);
5230 op1 = XEXP (x, 1);
5232 if (op1 == const0_rtx
5233 && GET_CODE (op0) == AND)
5235 x = op0;
5236 goto cost_logic;
5239 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
5241 /* TODO: A write to the CC flags possibly costs extra, this
5242 needs encoding in the cost tables. */
5244 /* CC_ZESWPmode supports zero extend for free. */
5245 if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
5246 op0 = XEXP (op0, 0);
5248 /* ANDS. */
5249 if (GET_CODE (op0) == AND)
5251 x = op0;
5252 goto cost_logic;
5255 if (GET_CODE (op0) == PLUS)
5257 /* ADDS (and CMN alias). */
5258 x = op0;
5259 goto cost_plus;
5262 if (GET_CODE (op0) == MINUS)
5264 /* SUBS. */
5265 x = op0;
5266 goto cost_minus;
5269 if (GET_CODE (op1) == NEG)
5271 /* CMN. */
5272 if (speed)
5273 *cost += extra_cost->alu.arith;
5275 *cost += rtx_cost (op0, COMPARE, 0, speed);
5276 *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
5277 return true;
5280 /* CMP.
5282 Compare can freely swap the order of operands, and
5283 canonicalization puts the more complex operation first.
5284 But the integer MINUS logic expects the shift/extend
5285 operation in op1. */
5286 if (! (REG_P (op0)
5287 || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
5289 op0 = XEXP (x, 1);
5290 op1 = XEXP (x, 0);
5292 goto cost_minus;
5295 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
5297 /* FCMP. */
5298 if (speed)
5299 *cost += extra_cost->fp[mode == DFmode].compare;
5301 if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
5303 /* FCMP supports constant 0.0 for no extra cost. */
5304 return true;
5306 return false;
5309 return false;
5311 case MINUS:
5313 op0 = XEXP (x, 0);
5314 op1 = XEXP (x, 1);
5316 cost_minus:
5317 /* Detect valid immediates. */
5318 if ((GET_MODE_CLASS (mode) == MODE_INT
5319 || (GET_MODE_CLASS (mode) == MODE_CC
5320 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
5321 && CONST_INT_P (op1)
5322 && aarch64_uimm12_shift (INTVAL (op1)))
5324 *cost += rtx_cost (op0, MINUS, 0, speed);
5326 if (speed)
5327 /* SUB(S) (immediate). */
5328 *cost += extra_cost->alu.arith;
5329 return true;
5333 /* Look for SUB (extended register). */
5334 if (aarch64_rtx_arith_op_extract_p (op1, mode))
5336 if (speed)
5337 *cost += extra_cost->alu.arith_shift;
5339 *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
5340 (enum rtx_code) GET_CODE (op1),
5341 0, speed);
5342 return true;
5345 rtx new_op1 = aarch64_strip_extend (op1);
5347 /* Cost this as an FMA-alike operation. */
5348 if ((GET_CODE (new_op1) == MULT
5349 || GET_CODE (new_op1) == ASHIFT)
5350 && code != COMPARE)
5352 *cost += aarch64_rtx_mult_cost (new_op1, MULT,
5353 (enum rtx_code) code,
5354 speed);
5355 *cost += rtx_cost (op0, MINUS, 0, speed);
5356 return true;
5359 *cost += rtx_cost (new_op1, MINUS, 1, speed);
5361 if (speed)
5363 if (GET_MODE_CLASS (mode) == MODE_INT)
5364 /* SUB(S). */
5365 *cost += extra_cost->alu.arith;
5366 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5367 /* FSUB. */
5368 *cost += extra_cost->fp[mode == DFmode].addsub;
5370 return true;
5373 case PLUS:
5375 rtx new_op0;
5377 op0 = XEXP (x, 0);
5378 op1 = XEXP (x, 1);
5380 cost_plus:
5381 if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
5382 || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
5384 /* CSINC. */
5385 *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
5386 *cost += rtx_cost (op1, PLUS, 1, speed);
5387 return true;
5390 if (GET_MODE_CLASS (mode) == MODE_INT
5391 && CONST_INT_P (op1)
5392 && aarch64_uimm12_shift (INTVAL (op1)))
5394 *cost += rtx_cost (op0, PLUS, 0, speed);
5396 if (speed)
5397 /* ADD (immediate). */
5398 *cost += extra_cost->alu.arith;
5399 return true;
5402 /* Look for ADD (extended register). */
5403 if (aarch64_rtx_arith_op_extract_p (op0, mode))
5405 if (speed)
5406 *cost += extra_cost->alu.arith_shift;
5408 *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
5409 (enum rtx_code) GET_CODE (op0),
5410 0, speed);
5411 return true;
5414 /* Strip any extend, leave shifts behind as we will
5415 cost them through mult_cost. */
5416 new_op0 = aarch64_strip_extend (op0);
5418 if (GET_CODE (new_op0) == MULT
5419 || GET_CODE (new_op0) == ASHIFT)
5421 *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
5422 speed);
5423 *cost += rtx_cost (op1, PLUS, 1, speed);
5424 return true;
5427 *cost += (rtx_cost (new_op0, PLUS, 0, speed)
5428 + rtx_cost (op1, PLUS, 1, speed));
5430 if (speed)
5432 if (GET_MODE_CLASS (mode) == MODE_INT)
5433 /* ADD. */
5434 *cost += extra_cost->alu.arith;
5435 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5436 /* FADD. */
5437 *cost += extra_cost->fp[mode == DFmode].addsub;
5439 return true;
5442 case BSWAP:
5443 *cost = COSTS_N_INSNS (1);
5445 if (speed)
5446 *cost += extra_cost->alu.rev;
5448 return false;
5450 case IOR:
5451 if (aarch_rev16_p (x))
5453 *cost = COSTS_N_INSNS (1);
5455 if (speed)
5456 *cost += extra_cost->alu.rev;
5458 return true;
5460 /* Fall through. */
5461 case XOR:
5462 case AND:
5463 cost_logic:
5464 op0 = XEXP (x, 0);
5465 op1 = XEXP (x, 1);
5467 if (code == AND
5468 && GET_CODE (op0) == MULT
5469 && CONST_INT_P (XEXP (op0, 1))
5470 && CONST_INT_P (op1)
5471 && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
5472 INTVAL (op1)) != 0)
5474 /* This is a UBFM/SBFM. */
5475 *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
5476 if (speed)
5477 *cost += extra_cost->alu.bfx;
5478 return true;
5481 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5483 /* We possibly get the immediate for free, this is not
5484 modelled. */
5485 if (CONST_INT_P (op1)
5486 && aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
5488 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5490 if (speed)
5491 *cost += extra_cost->alu.logical;
5493 return true;
5495 else
5497 rtx new_op0 = op0;
5499 /* Handle ORN, EON, or BIC. */
5500 if (GET_CODE (op0) == NOT)
5501 op0 = XEXP (op0, 0);
5503 new_op0 = aarch64_strip_shift (op0);
5505 /* If we had a shift on op0 then this is a logical-shift-
5506 by-register/immediate operation. Otherwise, this is just
5507 a logical operation. */
5508 if (speed)
5510 if (new_op0 != op0)
5512 /* Shift by immediate. */
5513 if (CONST_INT_P (XEXP (op0, 1)))
5514 *cost += extra_cost->alu.log_shift;
5515 else
5516 *cost += extra_cost->alu.log_shift_reg;
5518 else
5519 *cost += extra_cost->alu.logical;
5522 /* In both cases we want to cost both operands. */
5523 *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
5524 + rtx_cost (op1, (enum rtx_code) code, 1, speed);
5526 return true;
5529 return false;
5531 case NOT:
5532 /* MVN. */
5533 if (speed)
5534 *cost += extra_cost->alu.logical;
5536 /* The logical instruction could have the shifted register form,
5537 but the cost is the same if the shift is processed as a separate
5538 instruction, so we don't bother with it here. */
5539 return false;
5541 case ZERO_EXTEND:
5543 op0 = XEXP (x, 0);
5544 /* If a value is written in SI mode, then zero extended to DI
5545 mode, the operation will in general be free as a write to
5546 a 'w' register implicitly zeroes the upper bits of an 'x'
5547 register. However, if this is
5549 (set (reg) (zero_extend (reg)))
5551 we must cost the explicit register move. */
5552 if (mode == DImode
5553 && GET_MODE (op0) == SImode
5554 && outer == SET)
5556 int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
5558 if (!op_cost && speed)
5559 /* MOV. */
5560 *cost += extra_cost->alu.extend;
5561 else
5562 /* Free, the cost is that of the SI mode operation. */
5563 *cost = op_cost;
5565 return true;
5567 else if (MEM_P (XEXP (x, 0)))
5569 /* All loads can zero extend to any size for free. */
5570 *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
5571 return true;
5574 /* UXTB/UXTH. */
5575 if (speed)
5576 *cost += extra_cost->alu.extend;
5578 return false;
5580 case SIGN_EXTEND:
5581 if (MEM_P (XEXP (x, 0)))
5583 /* LDRSH. */
5584 if (speed)
5586 rtx address = XEXP (XEXP (x, 0), 0);
5587 *cost += extra_cost->ldst.load_sign_extend;
5589 *cost +=
5590 COSTS_N_INSNS (aarch64_address_cost (address, mode,
5591 0, speed));
5593 return true;
5596 if (speed)
5597 *cost += extra_cost->alu.extend;
5598 return false;
5600 case ASHIFT:
5601 op0 = XEXP (x, 0);
5602 op1 = XEXP (x, 1);
5604 if (CONST_INT_P (op1))
5606 /* LSL (immediate), UBMF, UBFIZ and friends. These are all
5607 aliases. */
5608 if (speed)
5609 *cost += extra_cost->alu.shift;
5611 /* We can incorporate zero/sign extend for free. */
5612 if (GET_CODE (op0) == ZERO_EXTEND
5613 || GET_CODE (op0) == SIGN_EXTEND)
5614 op0 = XEXP (op0, 0);
5616 *cost += rtx_cost (op0, ASHIFT, 0, speed);
5617 return true;
5619 else
5621 /* LSLV. */
5622 if (speed)
5623 *cost += extra_cost->alu.shift_reg;
5625 return false; /* All arguments need to be in registers. */
5628 case ROTATE:
5629 case ROTATERT:
5630 case LSHIFTRT:
5631 case ASHIFTRT:
5632 op0 = XEXP (x, 0);
5633 op1 = XEXP (x, 1);
5635 if (CONST_INT_P (op1))
5637 /* ASR (immediate) and friends. */
5638 if (speed)
5639 *cost += extra_cost->alu.shift;
5641 *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
5642 return true;
5644 else
5647 /* ASR (register) and friends. */
5648 if (speed)
5649 *cost += extra_cost->alu.shift_reg;
5651 return false; /* All arguments need to be in registers. */
5654 case SYMBOL_REF:
5656 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
5658 /* LDR. */
5659 if (speed)
5660 *cost += extra_cost->ldst.load;
5662 else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
5663 || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
5665 /* ADRP, followed by ADD. */
5666 *cost += COSTS_N_INSNS (1);
5667 if (speed)
5668 *cost += 2 * extra_cost->alu.arith;
5670 else if (aarch64_cmodel == AARCH64_CMODEL_TINY
5671 || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
5673 /* ADR. */
5674 if (speed)
5675 *cost += extra_cost->alu.arith;
5678 if (flag_pic)
5680 /* One extra load instruction, after accessing the GOT. */
5681 *cost += COSTS_N_INSNS (1);
5682 if (speed)
5683 *cost += extra_cost->ldst.load;
5685 return true;
5687 case HIGH:
5688 case LO_SUM:
5689 /* ADRP/ADD (immediate). */
5690 if (speed)
5691 *cost += extra_cost->alu.arith;
5692 return true;
5694 case ZERO_EXTRACT:
5695 case SIGN_EXTRACT:
5696 /* UBFX/SBFX. */
5697 if (speed)
5698 *cost += extra_cost->alu.bfx;
5700 /* We can trust that the immediates used will be correct (there
5701 are no by-register forms), so we need only cost op0. */
5702 *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
5703 return true;
5705 case MULT:
5706 *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
5707 /* aarch64_rtx_mult_cost always handles recursion to its
5708 operands. */
5709 return true;
5711 case MOD:
5712 case UMOD:
5713 if (speed)
5715 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
5716 *cost += (extra_cost->mult[GET_MODE (x) == DImode].add
5717 + extra_cost->mult[GET_MODE (x) == DImode].idiv);
5718 else if (GET_MODE (x) == DFmode)
5719 *cost += (extra_cost->fp[1].mult
5720 + extra_cost->fp[1].div);
5721 else if (GET_MODE (x) == SFmode)
5722 *cost += (extra_cost->fp[0].mult
5723 + extra_cost->fp[0].div);
5725 return false; /* All arguments need to be in registers. */
5727 case DIV:
5728 case UDIV:
5729 case SQRT:
5730 if (speed)
5732 if (GET_MODE_CLASS (mode) == MODE_INT)
5733 /* There is no integer SQRT, so only DIV and UDIV can get
5734 here. */
5735 *cost += extra_cost->mult[mode == DImode].idiv;
5736 else
5737 *cost += extra_cost->fp[mode == DFmode].div;
5739 return false; /* All arguments need to be in registers. */
5741 case IF_THEN_ELSE:
5742 return aarch64_if_then_else_costs (XEXP (x, 0), XEXP (x, 1),
5743 XEXP (x, 2), cost, speed);
5745 case EQ:
5746 case NE:
5747 case GT:
5748 case GTU:
5749 case LT:
5750 case LTU:
5751 case GE:
5752 case GEU:
5753 case LE:
5754 case LEU:
5756 return false; /* All arguments must be in registers. */
5758 case FMA:
5759 op0 = XEXP (x, 0);
5760 op1 = XEXP (x, 1);
5761 op2 = XEXP (x, 2);
5763 if (speed)
5764 *cost += extra_cost->fp[mode == DFmode].fma;
5766 /* FMSUB, FNMADD, and FNMSUB are free. */
5767 if (GET_CODE (op0) == NEG)
5768 op0 = XEXP (op0, 0);
5770 if (GET_CODE (op2) == NEG)
5771 op2 = XEXP (op2, 0);
5773 /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
5774 and the by-element operand as operand 0. */
5775 if (GET_CODE (op1) == NEG)
5776 op1 = XEXP (op1, 0);
5778 /* Catch vector-by-element operations. The by-element operand can
5779 either be (vec_duplicate (vec_select (x))) or just
5780 (vec_select (x)), depending on whether we are multiplying by
5781 a vector or a scalar.
5783 Canonicalization is not very good in these cases, FMA4 will put the
5784 by-element operand as operand 0, FNMA4 will have it as operand 1. */
5785 if (GET_CODE (op0) == VEC_DUPLICATE)
5786 op0 = XEXP (op0, 0);
5787 else if (GET_CODE (op1) == VEC_DUPLICATE)
5788 op1 = XEXP (op1, 0);
5790 if (GET_CODE (op0) == VEC_SELECT)
5791 op0 = XEXP (op0, 0);
5792 else if (GET_CODE (op1) == VEC_SELECT)
5793 op1 = XEXP (op1, 0);
5795 /* If the remaining parameters are not registers,
5796 get the cost to put them into registers. */
5797 *cost += rtx_cost (op0, FMA, 0, speed);
5798 *cost += rtx_cost (op1, FMA, 1, speed);
5799 *cost += rtx_cost (op2, FMA, 2, speed);
5800 return true;
5802 case FLOAT_EXTEND:
5803 if (speed)
5804 *cost += extra_cost->fp[mode == DFmode].widen;
5805 return false;
5807 case FLOAT_TRUNCATE:
5808 if (speed)
5809 *cost += extra_cost->fp[mode == DFmode].narrow;
5810 return false;
5812 case FIX:
5813 case UNSIGNED_FIX:
5814 x = XEXP (x, 0);
5815 /* Strip the rounding part. They will all be implemented
5816 by the fcvt* family of instructions anyway. */
5817 if (GET_CODE (x) == UNSPEC)
5819 unsigned int uns_code = XINT (x, 1);
5821 if (uns_code == UNSPEC_FRINTA
5822 || uns_code == UNSPEC_FRINTM
5823 || uns_code == UNSPEC_FRINTN
5824 || uns_code == UNSPEC_FRINTP
5825 || uns_code == UNSPEC_FRINTZ)
5826 x = XVECEXP (x, 0, 0);
5829 if (speed)
5830 *cost += extra_cost->fp[GET_MODE (x) == DFmode].toint;
5832 *cost += rtx_cost (x, (enum rtx_code) code, 0, speed);
5833 return true;
5835 case ABS:
5836 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5838 /* FABS and FNEG are analogous. */
5839 if (speed)
5840 *cost += extra_cost->fp[mode == DFmode].neg;
5842 else
5844 /* Integer ABS will either be split to
5845 two arithmetic instructions, or will be an ABS
5846 (scalar), which we don't model. */
5847 *cost = COSTS_N_INSNS (2);
5848 if (speed)
5849 *cost += 2 * extra_cost->alu.arith;
5851 return false;
5853 case SMAX:
5854 case SMIN:
5855 if (speed)
5857 /* FMAXNM/FMINNM/FMAX/FMIN.
5858 TODO: This may not be accurate for all implementations, but
5859 we do not model this in the cost tables. */
5860 *cost += extra_cost->fp[mode == DFmode].addsub;
5862 return false;
5864 case UNSPEC:
5865 /* The floating point round to integer frint* instructions. */
5866 if (aarch64_frint_unspec_p (XINT (x, 1)))
5868 if (speed)
5869 *cost += extra_cost->fp[mode == DFmode].roundint;
5871 return false;
5874 if (XINT (x, 1) == UNSPEC_RBIT)
5876 if (speed)
5877 *cost += extra_cost->alu.rev;
5879 return false;
5881 break;
5883 case TRUNCATE:
5885 /* Decompose <su>muldi3_highpart. */
5886 if (/* (truncate:DI */
5887 mode == DImode
5888 /* (lshiftrt:TI */
5889 && GET_MODE (XEXP (x, 0)) == TImode
5890 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
5891 /* (mult:TI */
5892 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5893 /* (ANY_EXTEND:TI (reg:DI))
5894 (ANY_EXTEND:TI (reg:DI))) */
5895 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
5896 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
5897 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
5898 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
5899 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
5900 && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
5901 /* (const_int 64) */
5902 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
5903 && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
5905 /* UMULH/SMULH. */
5906 if (speed)
5907 *cost += extra_cost->mult[mode == DImode].extend;
5908 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
5909 MULT, 0, speed);
5910 *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
5911 MULT, 1, speed);
5912 return true;
5915 /* Fall through. */
5916 default:
5917 break;
5920 if (dump_file && (dump_flags & TDF_DETAILS))
5921 fprintf (dump_file,
5922 "\nFailed to cost RTX. Assuming default cost.\n");
5924 return true;
5927 /* Wrapper around aarch64_rtx_costs, dumps the partial, or total cost
5928 calculated for X. This cost is stored in *COST. Returns true
5929 if the total cost of X was calculated. */
5930 static bool
5931 aarch64_rtx_costs_wrapper (rtx x, int code, int outer,
5932 int param, int *cost, bool speed)
5934 bool result = aarch64_rtx_costs (x, code, outer, param, cost, speed);
5936 if (dump_file && (dump_flags & TDF_DETAILS))
5938 print_rtl_single (dump_file, x);
5939 fprintf (dump_file, "\n%s cost: %d (%s)\n",
5940 speed ? "Hot" : "Cold",
5941 *cost, result ? "final" : "partial");
5944 return result;
5947 static int
5948 aarch64_register_move_cost (enum machine_mode mode,
5949 reg_class_t from_i, reg_class_t to_i)
5951 enum reg_class from = (enum reg_class) from_i;
5952 enum reg_class to = (enum reg_class) to_i;
5953 const struct cpu_regmove_cost *regmove_cost
5954 = aarch64_tune_params->regmove_cost;
5956 /* Moving between GPR and stack cost is the same as GP2GP. */
5957 if ((from == GENERAL_REGS && to == STACK_REG)
5958 || (to == GENERAL_REGS && from == STACK_REG))
5959 return regmove_cost->GP2GP;
5961 /* To/From the stack register, we move via the gprs. */
5962 if (to == STACK_REG || from == STACK_REG)
5963 return aarch64_register_move_cost (mode, from, GENERAL_REGS)
5964 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
5966 if (from == GENERAL_REGS && to == GENERAL_REGS)
5967 return regmove_cost->GP2GP;
5968 else if (from == GENERAL_REGS)
5969 return regmove_cost->GP2FP;
5970 else if (to == GENERAL_REGS)
5971 return regmove_cost->FP2GP;
5973 /* When AdvSIMD instructions are disabled it is not possible to move
5974 a 128-bit value directly between Q registers. This is handled in
5975 secondary reload. A general register is used as a scratch to move
5976 the upper DI value and the lower DI value is moved directly,
5977 hence the cost is the sum of three moves. */
5978 if (! TARGET_SIMD && GET_MODE_SIZE (mode) == 128)
5979 return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
5981 return regmove_cost->FP2FP;
5984 static int
5985 aarch64_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
5986 reg_class_t rclass ATTRIBUTE_UNUSED,
5987 bool in ATTRIBUTE_UNUSED)
5989 return aarch64_tune_params->memmov_cost;
5992 /* Return the number of instructions that can be issued per cycle. */
5993 static int
5994 aarch64_sched_issue_rate (void)
5996 return aarch64_tune_params->issue_rate;
5999 /* Vectorizer cost model target hooks. */
6001 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6002 static int
6003 aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6004 tree vectype,
6005 int misalign ATTRIBUTE_UNUSED)
6007 unsigned elements;
6009 switch (type_of_cost)
6011 case scalar_stmt:
6012 return aarch64_tune_params->vec_costs->scalar_stmt_cost;
6014 case scalar_load:
6015 return aarch64_tune_params->vec_costs->scalar_load_cost;
6017 case scalar_store:
6018 return aarch64_tune_params->vec_costs->scalar_store_cost;
6020 case vector_stmt:
6021 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6023 case vector_load:
6024 return aarch64_tune_params->vec_costs->vec_align_load_cost;
6026 case vector_store:
6027 return aarch64_tune_params->vec_costs->vec_store_cost;
6029 case vec_to_scalar:
6030 return aarch64_tune_params->vec_costs->vec_to_scalar_cost;
6032 case scalar_to_vec:
6033 return aarch64_tune_params->vec_costs->scalar_to_vec_cost;
6035 case unaligned_load:
6036 return aarch64_tune_params->vec_costs->vec_unalign_load_cost;
6038 case unaligned_store:
6039 return aarch64_tune_params->vec_costs->vec_unalign_store_cost;
6041 case cond_branch_taken:
6042 return aarch64_tune_params->vec_costs->cond_taken_branch_cost;
6044 case cond_branch_not_taken:
6045 return aarch64_tune_params->vec_costs->cond_not_taken_branch_cost;
6047 case vec_perm:
6048 case vec_promote_demote:
6049 return aarch64_tune_params->vec_costs->vec_stmt_cost;
6051 case vec_construct:
6052 elements = TYPE_VECTOR_SUBPARTS (vectype);
6053 return elements / 2 + 1;
6055 default:
6056 gcc_unreachable ();
6060 /* Implement targetm.vectorize.add_stmt_cost. */
6061 static unsigned
6062 aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6063 struct _stmt_vec_info *stmt_info, int misalign,
6064 enum vect_cost_model_location where)
6066 unsigned *cost = (unsigned *) data;
6067 unsigned retval = 0;
6069 if (flag_vect_cost_model)
6071 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6072 int stmt_cost =
6073 aarch64_builtin_vectorization_cost (kind, vectype, misalign);
6075 /* Statements in an inner loop relative to the loop being
6076 vectorized are weighted more heavily. The value here is
6077 a function (linear for now) of the loop nest level. */
6078 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6080 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
6081 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
6082 unsigned nest_level = loop_depth (loop);
6084 count *= nest_level;
6087 retval = (unsigned) (count * stmt_cost);
6088 cost[where] += retval;
6091 return retval;
6094 static void initialize_aarch64_code_model (void);
6096 /* Parse the architecture extension string. */
6098 static void
6099 aarch64_parse_extension (char *str)
6101 /* The extension string is parsed left to right. */
6102 const struct aarch64_option_extension *opt = NULL;
6104 /* Flag to say whether we are adding or removing an extension. */
6105 int adding_ext = -1;
6107 while (str != NULL && *str != 0)
6109 char *ext;
6110 size_t len;
6112 str++;
6113 ext = strchr (str, '+');
6115 if (ext != NULL)
6116 len = ext - str;
6117 else
6118 len = strlen (str);
6120 if (len >= 2 && strncmp (str, "no", 2) == 0)
6122 adding_ext = 0;
6123 len -= 2;
6124 str += 2;
6126 else if (len > 0)
6127 adding_ext = 1;
6129 if (len == 0)
6131 error ("missing feature modifier after %qs", "+no");
6132 return;
6135 /* Scan over the extensions table trying to find an exact match. */
6136 for (opt = all_extensions; opt->name != NULL; opt++)
6138 if (strlen (opt->name) == len && strncmp (opt->name, str, len) == 0)
6140 /* Add or remove the extension. */
6141 if (adding_ext)
6142 aarch64_isa_flags |= opt->flags_on;
6143 else
6144 aarch64_isa_flags &= ~(opt->flags_off);
6145 break;
6149 if (opt->name == NULL)
6151 /* Extension not found in list. */
6152 error ("unknown feature modifier %qs", str);
6153 return;
6156 str = ext;
6159 return;
6162 /* Parse the ARCH string. */
6164 static void
6165 aarch64_parse_arch (void)
6167 char *ext;
6168 const struct processor *arch;
6169 char *str = (char *) alloca (strlen (aarch64_arch_string) + 1);
6170 size_t len;
6172 strcpy (str, aarch64_arch_string);
6174 ext = strchr (str, '+');
6176 if (ext != NULL)
6177 len = ext - str;
6178 else
6179 len = strlen (str);
6181 if (len == 0)
6183 error ("missing arch name in -march=%qs", str);
6184 return;
6187 /* Loop through the list of supported ARCHs to find a match. */
6188 for (arch = all_architectures; arch->name != NULL; arch++)
6190 if (strlen (arch->name) == len && strncmp (arch->name, str, len) == 0)
6192 selected_arch = arch;
6193 aarch64_isa_flags = selected_arch->flags;
6195 if (!selected_cpu)
6196 selected_cpu = &all_cores[selected_arch->core];
6198 if (ext != NULL)
6200 /* ARCH string contains at least one extension. */
6201 aarch64_parse_extension (ext);
6204 if (strcmp (selected_arch->arch, selected_cpu->arch))
6206 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
6207 selected_cpu->name, selected_arch->name);
6210 return;
6214 /* ARCH name not found in list. */
6215 error ("unknown value %qs for -march", str);
6216 return;
6219 /* Parse the CPU string. */
6221 static void
6222 aarch64_parse_cpu (void)
6224 char *ext;
6225 const struct processor *cpu;
6226 char *str = (char *) alloca (strlen (aarch64_cpu_string) + 1);
6227 size_t len;
6229 strcpy (str, aarch64_cpu_string);
6231 ext = strchr (str, '+');
6233 if (ext != NULL)
6234 len = ext - str;
6235 else
6236 len = strlen (str);
6238 if (len == 0)
6240 error ("missing cpu name in -mcpu=%qs", str);
6241 return;
6244 /* Loop through the list of supported CPUs to find a match. */
6245 for (cpu = all_cores; cpu->name != NULL; cpu++)
6247 if (strlen (cpu->name) == len && strncmp (cpu->name, str, len) == 0)
6249 selected_cpu = cpu;
6250 selected_tune = cpu;
6251 aarch64_isa_flags = selected_cpu->flags;
6253 if (ext != NULL)
6255 /* CPU string contains at least one extension. */
6256 aarch64_parse_extension (ext);
6259 return;
6263 /* CPU name not found in list. */
6264 error ("unknown value %qs for -mcpu", str);
6265 return;
6268 /* Parse the TUNE string. */
6270 static void
6271 aarch64_parse_tune (void)
6273 const struct processor *cpu;
6274 char *str = (char *) alloca (strlen (aarch64_tune_string) + 1);
6275 strcpy (str, aarch64_tune_string);
6277 /* Loop through the list of supported CPUs to find a match. */
6278 for (cpu = all_cores; cpu->name != NULL; cpu++)
6280 if (strcmp (cpu->name, str) == 0)
6282 selected_tune = cpu;
6283 return;
6287 /* CPU name not found in list. */
6288 error ("unknown value %qs for -mtune", str);
6289 return;
6293 /* Implement TARGET_OPTION_OVERRIDE. */
6295 static void
6296 aarch64_override_options (void)
6298 /* -mcpu=CPU is shorthand for -march=ARCH_FOR_CPU, -mtune=CPU.
6299 If either of -march or -mtune is given, they override their
6300 respective component of -mcpu.
6302 So, first parse AARCH64_CPU_STRING, then the others, be careful
6303 with -march as, if -mcpu is not present on the command line, march
6304 must set a sensible default CPU. */
6305 if (aarch64_cpu_string)
6307 aarch64_parse_cpu ();
6310 if (aarch64_arch_string)
6312 aarch64_parse_arch ();
6315 if (aarch64_tune_string)
6317 aarch64_parse_tune ();
6320 #ifndef HAVE_AS_MABI_OPTION
6321 /* The compiler may have been configured with 2.23.* binutils, which does
6322 not have support for ILP32. */
6323 if (TARGET_ILP32)
6324 error ("Assembler does not support -mabi=ilp32");
6325 #endif
6327 initialize_aarch64_code_model ();
6329 aarch64_build_bitmask_table ();
6331 /* This target defaults to strict volatile bitfields. */
6332 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
6333 flag_strict_volatile_bitfields = 1;
6335 /* If the user did not specify a processor, choose the default
6336 one for them. This will be the CPU set during configuration using
6337 --with-cpu, otherwise it is "generic". */
6338 if (!selected_cpu)
6340 selected_cpu = &all_cores[TARGET_CPU_DEFAULT & 0x3f];
6341 aarch64_isa_flags = TARGET_CPU_DEFAULT >> 6;
6344 gcc_assert (selected_cpu);
6346 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
6347 if (!selected_tune)
6348 selected_tune = &all_cores[selected_cpu->core];
6350 aarch64_tune_flags = selected_tune->flags;
6351 aarch64_tune = selected_tune->core;
6352 aarch64_tune_params = selected_tune->tune;
6354 aarch64_override_options_after_change ();
6357 /* Implement targetm.override_options_after_change. */
6359 static void
6360 aarch64_override_options_after_change (void)
6362 if (flag_omit_frame_pointer)
6363 flag_omit_leaf_frame_pointer = false;
6364 else if (flag_omit_leaf_frame_pointer)
6365 flag_omit_frame_pointer = true;
6368 static struct machine_function *
6369 aarch64_init_machine_status (void)
6371 struct machine_function *machine;
6372 machine = ggc_alloc_cleared_machine_function ();
6373 return machine;
6376 void
6377 aarch64_init_expanders (void)
6379 init_machine_status = aarch64_init_machine_status;
6382 /* A checking mechanism for the implementation of the various code models. */
6383 static void
6384 initialize_aarch64_code_model (void)
6386 if (flag_pic)
6388 switch (aarch64_cmodel_var)
6390 case AARCH64_CMODEL_TINY:
6391 aarch64_cmodel = AARCH64_CMODEL_TINY_PIC;
6392 break;
6393 case AARCH64_CMODEL_SMALL:
6394 aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
6395 break;
6396 case AARCH64_CMODEL_LARGE:
6397 sorry ("code model %qs with -f%s", "large",
6398 flag_pic > 1 ? "PIC" : "pic");
6399 default:
6400 gcc_unreachable ();
6403 else
6404 aarch64_cmodel = aarch64_cmodel_var;
6407 /* Return true if SYMBOL_REF X binds locally. */
6409 static bool
6410 aarch64_symbol_binds_local_p (const_rtx x)
6412 return (SYMBOL_REF_DECL (x)
6413 ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
6414 : SYMBOL_REF_LOCAL_P (x));
6417 /* Return true if SYMBOL_REF X is thread local */
6418 static bool
6419 aarch64_tls_symbol_p (rtx x)
6421 if (! TARGET_HAVE_TLS)
6422 return false;
6424 if (GET_CODE (x) != SYMBOL_REF)
6425 return false;
6427 return SYMBOL_REF_TLS_MODEL (x) != 0;
6430 /* Classify a TLS symbol into one of the TLS kinds. */
6431 enum aarch64_symbol_type
6432 aarch64_classify_tls_symbol (rtx x)
6434 enum tls_model tls_kind = tls_symbolic_operand_type (x);
6436 switch (tls_kind)
6438 case TLS_MODEL_GLOBAL_DYNAMIC:
6439 case TLS_MODEL_LOCAL_DYNAMIC:
6440 return TARGET_TLS_DESC ? SYMBOL_SMALL_TLSDESC : SYMBOL_SMALL_TLSGD;
6442 case TLS_MODEL_INITIAL_EXEC:
6443 return SYMBOL_SMALL_GOTTPREL;
6445 case TLS_MODEL_LOCAL_EXEC:
6446 return SYMBOL_SMALL_TPREL;
6448 case TLS_MODEL_EMULATED:
6449 case TLS_MODEL_NONE:
6450 return SYMBOL_FORCE_TO_MEM;
6452 default:
6453 gcc_unreachable ();
6457 /* Return the method that should be used to access SYMBOL_REF or
6458 LABEL_REF X in context CONTEXT. */
6460 enum aarch64_symbol_type
6461 aarch64_classify_symbol (rtx x,
6462 enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
6464 if (GET_CODE (x) == LABEL_REF)
6466 switch (aarch64_cmodel)
6468 case AARCH64_CMODEL_LARGE:
6469 return SYMBOL_FORCE_TO_MEM;
6471 case AARCH64_CMODEL_TINY_PIC:
6472 case AARCH64_CMODEL_TINY:
6473 return SYMBOL_TINY_ABSOLUTE;
6475 case AARCH64_CMODEL_SMALL_PIC:
6476 case AARCH64_CMODEL_SMALL:
6477 return SYMBOL_SMALL_ABSOLUTE;
6479 default:
6480 gcc_unreachable ();
6484 if (GET_CODE (x) == SYMBOL_REF)
6486 if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
6487 return SYMBOL_FORCE_TO_MEM;
6489 if (aarch64_tls_symbol_p (x))
6490 return aarch64_classify_tls_symbol (x);
6492 switch (aarch64_cmodel)
6494 case AARCH64_CMODEL_TINY:
6495 if (SYMBOL_REF_WEAK (x))
6496 return SYMBOL_FORCE_TO_MEM;
6497 return SYMBOL_TINY_ABSOLUTE;
6499 case AARCH64_CMODEL_SMALL:
6500 if (SYMBOL_REF_WEAK (x))
6501 return SYMBOL_FORCE_TO_MEM;
6502 return SYMBOL_SMALL_ABSOLUTE;
6504 case AARCH64_CMODEL_TINY_PIC:
6505 if (!aarch64_symbol_binds_local_p (x))
6506 return SYMBOL_TINY_GOT;
6507 return SYMBOL_TINY_ABSOLUTE;
6509 case AARCH64_CMODEL_SMALL_PIC:
6510 if (!aarch64_symbol_binds_local_p (x))
6511 return SYMBOL_SMALL_GOT;
6512 return SYMBOL_SMALL_ABSOLUTE;
6514 default:
6515 gcc_unreachable ();
6519 /* By default push everything into the constant pool. */
6520 return SYMBOL_FORCE_TO_MEM;
6523 bool
6524 aarch64_constant_address_p (rtx x)
6526 return (CONSTANT_P (x) && memory_address_p (DImode, x));
6529 bool
6530 aarch64_legitimate_pic_operand_p (rtx x)
6532 if (GET_CODE (x) == SYMBOL_REF
6533 || (GET_CODE (x) == CONST
6534 && GET_CODE (XEXP (x, 0)) == PLUS
6535 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6536 return false;
6538 return true;
6541 /* Return true if X holds either a quarter-precision or
6542 floating-point +0.0 constant. */
6543 static bool
6544 aarch64_valid_floating_const (enum machine_mode mode, rtx x)
6546 if (!CONST_DOUBLE_P (x))
6547 return false;
6549 /* TODO: We could handle moving 0.0 to a TFmode register,
6550 but first we would like to refactor the movtf_aarch64
6551 to be more amicable to split moves properly and
6552 correctly gate on TARGET_SIMD. For now - reject all
6553 constants which are not to SFmode or DFmode registers. */
6554 if (!(mode == SFmode || mode == DFmode))
6555 return false;
6557 if (aarch64_float_const_zero_rtx_p (x))
6558 return true;
6559 return aarch64_float_const_representable_p (x);
6562 static bool
6563 aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
6565 /* Do not allow vector struct mode constants. We could support
6566 0 and -1 easily, but they need support in aarch64-simd.md. */
6567 if (TARGET_SIMD && aarch64_vect_struct_mode_p (mode))
6568 return false;
6570 /* This could probably go away because
6571 we now decompose CONST_INTs according to expand_mov_immediate. */
6572 if ((GET_CODE (x) == CONST_VECTOR
6573 && aarch64_simd_valid_immediate (x, mode, false, NULL))
6574 || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
6575 return !targetm.cannot_force_const_mem (mode, x);
6577 if (GET_CODE (x) == HIGH
6578 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
6579 return true;
6581 return aarch64_constant_address_p (x);
6585 aarch64_load_tp (rtx target)
6587 if (!target
6588 || GET_MODE (target) != Pmode
6589 || !register_operand (target, Pmode))
6590 target = gen_reg_rtx (Pmode);
6592 /* Can return in any reg. */
6593 emit_insn (gen_aarch64_load_tp_hard (target));
6594 return target;
6597 /* On AAPCS systems, this is the "struct __va_list". */
6598 static GTY(()) tree va_list_type;
6600 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.
6601 Return the type to use as __builtin_va_list.
6603 AAPCS64 \S 7.1.4 requires that va_list be a typedef for a type defined as:
6605 struct __va_list
6607 void *__stack;
6608 void *__gr_top;
6609 void *__vr_top;
6610 int __gr_offs;
6611 int __vr_offs;
6612 }; */
6614 static tree
6615 aarch64_build_builtin_va_list (void)
6617 tree va_list_name;
6618 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6620 /* Create the type. */
6621 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
6622 /* Give it the required name. */
6623 va_list_name = build_decl (BUILTINS_LOCATION,
6624 TYPE_DECL,
6625 get_identifier ("__va_list"),
6626 va_list_type);
6627 DECL_ARTIFICIAL (va_list_name) = 1;
6628 TYPE_NAME (va_list_type) = va_list_name;
6629 TYPE_STUB_DECL (va_list_type) = va_list_name;
6631 /* Create the fields. */
6632 f_stack = build_decl (BUILTINS_LOCATION,
6633 FIELD_DECL, get_identifier ("__stack"),
6634 ptr_type_node);
6635 f_grtop = build_decl (BUILTINS_LOCATION,
6636 FIELD_DECL, get_identifier ("__gr_top"),
6637 ptr_type_node);
6638 f_vrtop = build_decl (BUILTINS_LOCATION,
6639 FIELD_DECL, get_identifier ("__vr_top"),
6640 ptr_type_node);
6641 f_groff = build_decl (BUILTINS_LOCATION,
6642 FIELD_DECL, get_identifier ("__gr_offs"),
6643 integer_type_node);
6644 f_vroff = build_decl (BUILTINS_LOCATION,
6645 FIELD_DECL, get_identifier ("__vr_offs"),
6646 integer_type_node);
6648 DECL_ARTIFICIAL (f_stack) = 1;
6649 DECL_ARTIFICIAL (f_grtop) = 1;
6650 DECL_ARTIFICIAL (f_vrtop) = 1;
6651 DECL_ARTIFICIAL (f_groff) = 1;
6652 DECL_ARTIFICIAL (f_vroff) = 1;
6654 DECL_FIELD_CONTEXT (f_stack) = va_list_type;
6655 DECL_FIELD_CONTEXT (f_grtop) = va_list_type;
6656 DECL_FIELD_CONTEXT (f_vrtop) = va_list_type;
6657 DECL_FIELD_CONTEXT (f_groff) = va_list_type;
6658 DECL_FIELD_CONTEXT (f_vroff) = va_list_type;
6660 TYPE_FIELDS (va_list_type) = f_stack;
6661 DECL_CHAIN (f_stack) = f_grtop;
6662 DECL_CHAIN (f_grtop) = f_vrtop;
6663 DECL_CHAIN (f_vrtop) = f_groff;
6664 DECL_CHAIN (f_groff) = f_vroff;
6666 /* Compute its layout. */
6667 layout_type (va_list_type);
6669 return va_list_type;
6672 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
6673 static void
6674 aarch64_expand_builtin_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
6676 const CUMULATIVE_ARGS *cum;
6677 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6678 tree stack, grtop, vrtop, groff, vroff;
6679 tree t;
6680 int gr_save_area_size;
6681 int vr_save_area_size;
6682 int vr_offset;
6684 cum = &crtl->args.info;
6685 gr_save_area_size
6686 = (NUM_ARG_REGS - cum->aapcs_ncrn) * UNITS_PER_WORD;
6687 vr_save_area_size
6688 = (NUM_FP_ARG_REGS - cum->aapcs_nvrn) * UNITS_PER_VREG;
6690 if (TARGET_GENERAL_REGS_ONLY)
6692 if (cum->aapcs_nvrn > 0)
6693 sorry ("%qs and floating point or vector arguments",
6694 "-mgeneral-regs-only");
6695 vr_save_area_size = 0;
6698 f_stack = TYPE_FIELDS (va_list_type_node);
6699 f_grtop = DECL_CHAIN (f_stack);
6700 f_vrtop = DECL_CHAIN (f_grtop);
6701 f_groff = DECL_CHAIN (f_vrtop);
6702 f_vroff = DECL_CHAIN (f_groff);
6704 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), valist, f_stack,
6705 NULL_TREE);
6706 grtop = build3 (COMPONENT_REF, TREE_TYPE (f_grtop), valist, f_grtop,
6707 NULL_TREE);
6708 vrtop = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop), valist, f_vrtop,
6709 NULL_TREE);
6710 groff = build3 (COMPONENT_REF, TREE_TYPE (f_groff), valist, f_groff,
6711 NULL_TREE);
6712 vroff = build3 (COMPONENT_REF, TREE_TYPE (f_vroff), valist, f_vroff,
6713 NULL_TREE);
6715 /* Emit code to initialize STACK, which points to the next varargs stack
6716 argument. CUM->AAPCS_STACK_SIZE gives the number of stack words used
6717 by named arguments. STACK is 8-byte aligned. */
6718 t = make_tree (TREE_TYPE (stack), virtual_incoming_args_rtx);
6719 if (cum->aapcs_stack_size > 0)
6720 t = fold_build_pointer_plus_hwi (t, cum->aapcs_stack_size * UNITS_PER_WORD);
6721 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), stack, t);
6722 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6724 /* Emit code to initialize GRTOP, the top of the GR save area.
6725 virtual_incoming_args_rtx should have been 16 byte aligned. */
6726 t = make_tree (TREE_TYPE (grtop), virtual_incoming_args_rtx);
6727 t = build2 (MODIFY_EXPR, TREE_TYPE (grtop), grtop, t);
6728 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6730 /* Emit code to initialize VRTOP, the top of the VR save area.
6731 This address is gr_save_area_bytes below GRTOP, rounded
6732 down to the next 16-byte boundary. */
6733 t = make_tree (TREE_TYPE (vrtop), virtual_incoming_args_rtx);
6734 vr_offset = AARCH64_ROUND_UP (gr_save_area_size,
6735 STACK_BOUNDARY / BITS_PER_UNIT);
6737 if (vr_offset)
6738 t = fold_build_pointer_plus_hwi (t, -vr_offset);
6739 t = build2 (MODIFY_EXPR, TREE_TYPE (vrtop), vrtop, t);
6740 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6742 /* Emit code to initialize GROFF, the offset from GRTOP of the
6743 next GPR argument. */
6744 t = build2 (MODIFY_EXPR, TREE_TYPE (groff), groff,
6745 build_int_cst (TREE_TYPE (groff), -gr_save_area_size));
6746 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6748 /* Likewise emit code to initialize VROFF, the offset from FTOP
6749 of the next VR argument. */
6750 t = build2 (MODIFY_EXPR, TREE_TYPE (vroff), vroff,
6751 build_int_cst (TREE_TYPE (vroff), -vr_save_area_size));
6752 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6755 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
6757 static tree
6758 aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
6759 gimple_seq *post_p ATTRIBUTE_UNUSED)
6761 tree addr;
6762 bool indirect_p;
6763 bool is_ha; /* is HFA or HVA. */
6764 bool dw_align; /* double-word align. */
6765 enum machine_mode ag_mode = VOIDmode;
6766 int nregs;
6767 enum machine_mode mode;
6769 tree f_stack, f_grtop, f_vrtop, f_groff, f_vroff;
6770 tree stack, f_top, f_off, off, arg, roundup, on_stack;
6771 HOST_WIDE_INT size, rsize, adjust, align;
6772 tree t, u, cond1, cond2;
6774 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6775 if (indirect_p)
6776 type = build_pointer_type (type);
6778 mode = TYPE_MODE (type);
6780 f_stack = TYPE_FIELDS (va_list_type_node);
6781 f_grtop = DECL_CHAIN (f_stack);
6782 f_vrtop = DECL_CHAIN (f_grtop);
6783 f_groff = DECL_CHAIN (f_vrtop);
6784 f_vroff = DECL_CHAIN (f_groff);
6786 stack = build3 (COMPONENT_REF, TREE_TYPE (f_stack), unshare_expr (valist),
6787 f_stack, NULL_TREE);
6788 size = int_size_in_bytes (type);
6789 align = aarch64_function_arg_alignment (mode, type) / BITS_PER_UNIT;
6791 dw_align = false;
6792 adjust = 0;
6793 if (aarch64_vfp_is_call_or_return_candidate (mode,
6794 type,
6795 &ag_mode,
6796 &nregs,
6797 &is_ha))
6799 /* TYPE passed in fp/simd registers. */
6800 if (TARGET_GENERAL_REGS_ONLY)
6801 sorry ("%qs and floating point or vector arguments",
6802 "-mgeneral-regs-only");
6804 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_vrtop),
6805 unshare_expr (valist), f_vrtop, NULL_TREE);
6806 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_vroff),
6807 unshare_expr (valist), f_vroff, NULL_TREE);
6809 rsize = nregs * UNITS_PER_VREG;
6811 if (is_ha)
6813 if (BYTES_BIG_ENDIAN && GET_MODE_SIZE (ag_mode) < UNITS_PER_VREG)
6814 adjust = UNITS_PER_VREG - GET_MODE_SIZE (ag_mode);
6816 else if (BLOCK_REG_PADDING (mode, type, 1) == downward
6817 && size < UNITS_PER_VREG)
6819 adjust = UNITS_PER_VREG - size;
6822 else
6824 /* TYPE passed in general registers. */
6825 f_top = build3 (COMPONENT_REF, TREE_TYPE (f_grtop),
6826 unshare_expr (valist), f_grtop, NULL_TREE);
6827 f_off = build3 (COMPONENT_REF, TREE_TYPE (f_groff),
6828 unshare_expr (valist), f_groff, NULL_TREE);
6829 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6830 nregs = rsize / UNITS_PER_WORD;
6832 if (align > 8)
6833 dw_align = true;
6835 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6836 && size < UNITS_PER_WORD)
6838 adjust = UNITS_PER_WORD - size;
6842 /* Get a local temporary for the field value. */
6843 off = get_initialized_tmp_var (f_off, pre_p, NULL);
6845 /* Emit code to branch if off >= 0. */
6846 t = build2 (GE_EXPR, boolean_type_node, off,
6847 build_int_cst (TREE_TYPE (off), 0));
6848 cond1 = build3 (COND_EXPR, ptr_type_node, t, NULL_TREE, NULL_TREE);
6850 if (dw_align)
6852 /* Emit: offs = (offs + 15) & -16. */
6853 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6854 build_int_cst (TREE_TYPE (off), 15));
6855 t = build2 (BIT_AND_EXPR, TREE_TYPE (off), t,
6856 build_int_cst (TREE_TYPE (off), -16));
6857 roundup = build2 (MODIFY_EXPR, TREE_TYPE (off), off, t);
6859 else
6860 roundup = NULL;
6862 /* Update ap.__[g|v]r_offs */
6863 t = build2 (PLUS_EXPR, TREE_TYPE (off), off,
6864 build_int_cst (TREE_TYPE (off), rsize));
6865 t = build2 (MODIFY_EXPR, TREE_TYPE (f_off), unshare_expr (f_off), t);
6867 /* String up. */
6868 if (roundup)
6869 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6871 /* [cond2] if (ap.__[g|v]r_offs > 0) */
6872 u = build2 (GT_EXPR, boolean_type_node, unshare_expr (f_off),
6873 build_int_cst (TREE_TYPE (f_off), 0));
6874 cond2 = build3 (COND_EXPR, ptr_type_node, u, NULL_TREE, NULL_TREE);
6876 /* String up: make sure the assignment happens before the use. */
6877 t = build2 (COMPOUND_EXPR, TREE_TYPE (cond2), t, cond2);
6878 COND_EXPR_ELSE (cond1) = t;
6880 /* Prepare the trees handling the argument that is passed on the stack;
6881 the top level node will store in ON_STACK. */
6882 arg = get_initialized_tmp_var (stack, pre_p, NULL);
6883 if (align > 8)
6885 /* if (alignof(type) > 8) (arg = arg + 15) & -16; */
6886 t = fold_convert (intDI_type_node, arg);
6887 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6888 build_int_cst (TREE_TYPE (t), 15));
6889 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6890 build_int_cst (TREE_TYPE (t), -16));
6891 t = fold_convert (TREE_TYPE (arg), t);
6892 roundup = build2 (MODIFY_EXPR, TREE_TYPE (arg), arg, t);
6894 else
6895 roundup = NULL;
6896 /* Advance ap.__stack */
6897 t = fold_convert (intDI_type_node, arg);
6898 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
6899 build_int_cst (TREE_TYPE (t), size + 7));
6900 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
6901 build_int_cst (TREE_TYPE (t), -8));
6902 t = fold_convert (TREE_TYPE (arg), t);
6903 t = build2 (MODIFY_EXPR, TREE_TYPE (stack), unshare_expr (stack), t);
6904 /* String up roundup and advance. */
6905 if (roundup)
6906 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), roundup, t);
6907 /* String up with arg */
6908 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), t, arg);
6909 /* Big-endianness related address adjustment. */
6910 if (BLOCK_REG_PADDING (mode, type, 1) == downward
6911 && size < UNITS_PER_WORD)
6913 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (arg), arg,
6914 size_int (UNITS_PER_WORD - size));
6915 on_stack = build2 (COMPOUND_EXPR, TREE_TYPE (arg), on_stack, t);
6918 COND_EXPR_THEN (cond1) = unshare_expr (on_stack);
6919 COND_EXPR_THEN (cond2) = unshare_expr (on_stack);
6921 /* Adjustment to OFFSET in the case of BIG_ENDIAN. */
6922 t = off;
6923 if (adjust)
6924 t = build2 (PREINCREMENT_EXPR, TREE_TYPE (off), off,
6925 build_int_cst (TREE_TYPE (off), adjust));
6927 t = fold_convert (sizetype, t);
6928 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (f_top), f_top, t);
6930 if (is_ha)
6932 /* type ha; // treat as "struct {ftype field[n];}"
6933 ... [computing offs]
6934 for (i = 0; i <nregs; ++i, offs += 16)
6935 ha.field[i] = *((ftype *)(ap.__vr_top + offs));
6936 return ha; */
6937 int i;
6938 tree tmp_ha, field_t, field_ptr_t;
6940 /* Declare a local variable. */
6941 tmp_ha = create_tmp_var_raw (type, "ha");
6942 gimple_add_tmp_var (tmp_ha);
6944 /* Establish the base type. */
6945 switch (ag_mode)
6947 case SFmode:
6948 field_t = float_type_node;
6949 field_ptr_t = float_ptr_type_node;
6950 break;
6951 case DFmode:
6952 field_t = double_type_node;
6953 field_ptr_t = double_ptr_type_node;
6954 break;
6955 case TFmode:
6956 field_t = long_double_type_node;
6957 field_ptr_t = long_double_ptr_type_node;
6958 break;
6959 /* The half precision and quad precision are not fully supported yet. Enable
6960 the following code after the support is complete. Need to find the correct
6961 type node for __fp16 *. */
6962 #if 0
6963 case HFmode:
6964 field_t = float_type_node;
6965 field_ptr_t = float_ptr_type_node;
6966 break;
6967 #endif
6968 case V2SImode:
6969 case V4SImode:
6971 tree innertype = make_signed_type (GET_MODE_PRECISION (SImode));
6972 field_t = build_vector_type_for_mode (innertype, ag_mode);
6973 field_ptr_t = build_pointer_type (field_t);
6975 break;
6976 default:
6977 gcc_assert (0);
6980 /* *(field_ptr_t)&ha = *((field_ptr_t)vr_saved_area */
6981 tmp_ha = build1 (ADDR_EXPR, field_ptr_t, tmp_ha);
6982 addr = t;
6983 t = fold_convert (field_ptr_t, addr);
6984 t = build2 (MODIFY_EXPR, field_t,
6985 build1 (INDIRECT_REF, field_t, tmp_ha),
6986 build1 (INDIRECT_REF, field_t, t));
6988 /* ha.field[i] = *((field_ptr_t)vr_saved_area + i) */
6989 for (i = 1; i < nregs; ++i)
6991 addr = fold_build_pointer_plus_hwi (addr, UNITS_PER_VREG);
6992 u = fold_convert (field_ptr_t, addr);
6993 u = build2 (MODIFY_EXPR, field_t,
6994 build2 (MEM_REF, field_t, tmp_ha,
6995 build_int_cst (field_ptr_t,
6996 (i *
6997 int_size_in_bytes (field_t)))),
6998 build1 (INDIRECT_REF, field_t, u));
6999 t = build2 (COMPOUND_EXPR, TREE_TYPE (t), t, u);
7002 u = fold_convert (TREE_TYPE (f_top), tmp_ha);
7003 t = build2 (COMPOUND_EXPR, TREE_TYPE (f_top), t, u);
7006 COND_EXPR_ELSE (cond2) = t;
7007 addr = fold_convert (build_pointer_type (type), cond1);
7008 addr = build_va_arg_indirect_ref (addr);
7010 if (indirect_p)
7011 addr = build_va_arg_indirect_ref (addr);
7013 return addr;
7016 /* Implement TARGET_SETUP_INCOMING_VARARGS. */
7018 static void
7019 aarch64_setup_incoming_varargs (cumulative_args_t cum_v, enum machine_mode mode,
7020 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7021 int no_rtl)
7023 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7024 CUMULATIVE_ARGS local_cum;
7025 int gr_saved, vr_saved;
7027 /* The caller has advanced CUM up to, but not beyond, the last named
7028 argument. Advance a local copy of CUM past the last "real" named
7029 argument, to find out how many registers are left over. */
7030 local_cum = *cum;
7031 aarch64_function_arg_advance (pack_cumulative_args(&local_cum), mode, type, true);
7033 /* Found out how many registers we need to save. */
7034 gr_saved = NUM_ARG_REGS - local_cum.aapcs_ncrn;
7035 vr_saved = NUM_FP_ARG_REGS - local_cum.aapcs_nvrn;
7037 if (TARGET_GENERAL_REGS_ONLY)
7039 if (local_cum.aapcs_nvrn > 0)
7040 sorry ("%qs and floating point or vector arguments",
7041 "-mgeneral-regs-only");
7042 vr_saved = 0;
7045 if (!no_rtl)
7047 if (gr_saved > 0)
7049 rtx ptr, mem;
7051 /* virtual_incoming_args_rtx should have been 16-byte aligned. */
7052 ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
7053 - gr_saved * UNITS_PER_WORD);
7054 mem = gen_frame_mem (BLKmode, ptr);
7055 set_mem_alias_set (mem, get_varargs_alias_set ());
7057 move_block_from_reg (local_cum.aapcs_ncrn + R0_REGNUM,
7058 mem, gr_saved);
7060 if (vr_saved > 0)
7062 /* We can't use move_block_from_reg, because it will use
7063 the wrong mode, storing D regs only. */
7064 enum machine_mode mode = TImode;
7065 int off, i;
7067 /* Set OFF to the offset from virtual_incoming_args_rtx of
7068 the first vector register. The VR save area lies below
7069 the GR one, and is aligned to 16 bytes. */
7070 off = -AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7071 STACK_BOUNDARY / BITS_PER_UNIT);
7072 off -= vr_saved * UNITS_PER_VREG;
7074 for (i = local_cum.aapcs_nvrn; i < NUM_FP_ARG_REGS; ++i)
7076 rtx ptr, mem;
7078 ptr = plus_constant (Pmode, virtual_incoming_args_rtx, off);
7079 mem = gen_frame_mem (mode, ptr);
7080 set_mem_alias_set (mem, get_varargs_alias_set ());
7081 aarch64_emit_move (mem, gen_rtx_REG (mode, V0_REGNUM + i));
7082 off += UNITS_PER_VREG;
7087 /* We don't save the size into *PRETEND_SIZE because we want to avoid
7088 any complication of having crtl->args.pretend_args_size changed. */
7089 cfun->machine->frame.saved_varargs_size
7090 = (AARCH64_ROUND_UP (gr_saved * UNITS_PER_WORD,
7091 STACK_BOUNDARY / BITS_PER_UNIT)
7092 + vr_saved * UNITS_PER_VREG);
7095 static void
7096 aarch64_conditional_register_usage (void)
7098 int i;
7099 if (!TARGET_FLOAT)
7101 for (i = V0_REGNUM; i <= V31_REGNUM; i++)
7103 fixed_regs[i] = 1;
7104 call_used_regs[i] = 1;
7109 /* Walk down the type tree of TYPE counting consecutive base elements.
7110 If *MODEP is VOIDmode, then set it to the first valid floating point
7111 type. If a non-floating point type is found, or if a floating point
7112 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
7113 otherwise return the count in the sub-tree. */
7114 static int
7115 aapcs_vfp_sub_candidate (const_tree type, enum machine_mode *modep)
7117 enum machine_mode mode;
7118 HOST_WIDE_INT size;
7120 switch (TREE_CODE (type))
7122 case REAL_TYPE:
7123 mode = TYPE_MODE (type);
7124 if (mode != DFmode && mode != SFmode && mode != TFmode)
7125 return -1;
7127 if (*modep == VOIDmode)
7128 *modep = mode;
7130 if (*modep == mode)
7131 return 1;
7133 break;
7135 case COMPLEX_TYPE:
7136 mode = TYPE_MODE (TREE_TYPE (type));
7137 if (mode != DFmode && mode != SFmode && mode != TFmode)
7138 return -1;
7140 if (*modep == VOIDmode)
7141 *modep = mode;
7143 if (*modep == mode)
7144 return 2;
7146 break;
7148 case VECTOR_TYPE:
7149 /* Use V2SImode and V4SImode as representatives of all 64-bit
7150 and 128-bit vector types. */
7151 size = int_size_in_bytes (type);
7152 switch (size)
7154 case 8:
7155 mode = V2SImode;
7156 break;
7157 case 16:
7158 mode = V4SImode;
7159 break;
7160 default:
7161 return -1;
7164 if (*modep == VOIDmode)
7165 *modep = mode;
7167 /* Vector modes are considered to be opaque: two vectors are
7168 equivalent for the purposes of being homogeneous aggregates
7169 if they are the same size. */
7170 if (*modep == mode)
7171 return 1;
7173 break;
7175 case ARRAY_TYPE:
7177 int count;
7178 tree index = TYPE_DOMAIN (type);
7180 /* Can't handle incomplete types. */
7181 if (!COMPLETE_TYPE_P (type))
7182 return -1;
7184 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
7185 if (count == -1
7186 || !index
7187 || !TYPE_MAX_VALUE (index)
7188 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
7189 || !TYPE_MIN_VALUE (index)
7190 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
7191 || count < 0)
7192 return -1;
7194 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
7195 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
7197 /* There must be no padding. */
7198 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7199 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
7200 != count * GET_MODE_BITSIZE (*modep)))
7201 return -1;
7203 return count;
7206 case RECORD_TYPE:
7208 int count = 0;
7209 int sub_count;
7210 tree field;
7212 /* Can't handle incomplete types. */
7213 if (!COMPLETE_TYPE_P (type))
7214 return -1;
7216 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7218 if (TREE_CODE (field) != FIELD_DECL)
7219 continue;
7221 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7222 if (sub_count < 0)
7223 return -1;
7224 count += sub_count;
7227 /* There must be no padding. */
7228 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7229 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
7230 != count * GET_MODE_BITSIZE (*modep)))
7231 return -1;
7233 return count;
7236 case UNION_TYPE:
7237 case QUAL_UNION_TYPE:
7239 /* These aren't very interesting except in a degenerate case. */
7240 int count = 0;
7241 int sub_count;
7242 tree field;
7244 /* Can't handle incomplete types. */
7245 if (!COMPLETE_TYPE_P (type))
7246 return -1;
7248 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
7250 if (TREE_CODE (field) != FIELD_DECL)
7251 continue;
7253 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
7254 if (sub_count < 0)
7255 return -1;
7256 count = count > sub_count ? count : sub_count;
7259 /* There must be no padding. */
7260 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
7261 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
7262 != count * GET_MODE_BITSIZE (*modep)))
7263 return -1;
7265 return count;
7268 default:
7269 break;
7272 return -1;
7275 /* Return true if we use LRA instead of reload pass. */
7276 static bool
7277 aarch64_lra_p (void)
7279 return aarch64_lra_flag;
7282 /* Return TRUE if the type, as described by TYPE and MODE, is a composite
7283 type as described in AAPCS64 \S 4.3. This includes aggregate, union and
7284 array types. The C99 floating-point complex types are also considered
7285 as composite types, according to AAPCS64 \S 7.1.1. The complex integer
7286 types, which are GCC extensions and out of the scope of AAPCS64, are
7287 treated as composite types here as well.
7289 Note that MODE itself is not sufficient in determining whether a type
7290 is such a composite type or not. This is because
7291 stor-layout.c:compute_record_mode may have already changed the MODE
7292 (BLKmode) of a RECORD_TYPE TYPE to some other mode. For example, a
7293 structure with only one field may have its MODE set to the mode of the
7294 field. Also an integer mode whose size matches the size of the
7295 RECORD_TYPE type may be used to substitute the original mode
7296 (i.e. BLKmode) in certain circumstances. In other words, MODE cannot be
7297 solely relied on. */
7299 static bool
7300 aarch64_composite_type_p (const_tree type,
7301 enum machine_mode mode)
7303 if (type && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE))
7304 return true;
7306 if (mode == BLKmode
7307 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7308 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
7309 return true;
7311 return false;
7314 /* Return TRUE if the type, as described by TYPE and MODE, is a short vector
7315 type as described in AAPCS64 \S 4.1.2.
7317 See the comment above aarch64_composite_type_p for the notes on MODE. */
7319 static bool
7320 aarch64_short_vector_p (const_tree type,
7321 enum machine_mode mode)
7323 HOST_WIDE_INT size = -1;
7325 if (type && TREE_CODE (type) == VECTOR_TYPE)
7326 size = int_size_in_bytes (type);
7327 else if (!aarch64_composite_type_p (type, mode)
7328 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
7329 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT))
7330 size = GET_MODE_SIZE (mode);
7332 return (size == 8 || size == 16) ? true : false;
7335 /* Return TRUE if an argument, whose type is described by TYPE and MODE,
7336 shall be passed or returned in simd/fp register(s) (providing these
7337 parameter passing registers are available).
7339 Upon successful return, *COUNT returns the number of needed registers,
7340 *BASE_MODE returns the mode of the individual register and when IS_HAF
7341 is not NULL, *IS_HA indicates whether or not the argument is a homogeneous
7342 floating-point aggregate or a homogeneous short-vector aggregate. */
7344 static bool
7345 aarch64_vfp_is_call_or_return_candidate (enum machine_mode mode,
7346 const_tree type,
7347 enum machine_mode *base_mode,
7348 int *count,
7349 bool *is_ha)
7351 enum machine_mode new_mode = VOIDmode;
7352 bool composite_p = aarch64_composite_type_p (type, mode);
7354 if (is_ha != NULL) *is_ha = false;
7356 if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
7357 || aarch64_short_vector_p (type, mode))
7359 *count = 1;
7360 new_mode = mode;
7362 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7364 if (is_ha != NULL) *is_ha = true;
7365 *count = 2;
7366 new_mode = GET_MODE_INNER (mode);
7368 else if (type && composite_p)
7370 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
7372 if (ag_count > 0 && ag_count <= HA_MAX_NUM_FLDS)
7374 if (is_ha != NULL) *is_ha = true;
7375 *count = ag_count;
7377 else
7378 return false;
7380 else
7381 return false;
7383 *base_mode = new_mode;
7384 return true;
7387 /* Implement TARGET_STRUCT_VALUE_RTX. */
7389 static rtx
7390 aarch64_struct_value_rtx (tree fndecl ATTRIBUTE_UNUSED,
7391 int incoming ATTRIBUTE_UNUSED)
7393 return gen_rtx_REG (Pmode, AARCH64_STRUCT_VALUE_REGNUM);
7396 /* Implements target hook vector_mode_supported_p. */
7397 static bool
7398 aarch64_vector_mode_supported_p (enum machine_mode mode)
7400 if (TARGET_SIMD
7401 && (mode == V4SImode || mode == V8HImode
7402 || mode == V16QImode || mode == V2DImode
7403 || mode == V2SImode || mode == V4HImode
7404 || mode == V8QImode || mode == V2SFmode
7405 || mode == V4SFmode || mode == V2DFmode
7406 || mode == V1DFmode))
7407 return true;
7409 return false;
7412 /* Return appropriate SIMD container
7413 for MODE within a vector of WIDTH bits. */
7414 static enum machine_mode
7415 aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
7417 gcc_assert (width == 64 || width == 128);
7418 if (TARGET_SIMD)
7420 if (width == 128)
7421 switch (mode)
7423 case DFmode:
7424 return V2DFmode;
7425 case SFmode:
7426 return V4SFmode;
7427 case SImode:
7428 return V4SImode;
7429 case HImode:
7430 return V8HImode;
7431 case QImode:
7432 return V16QImode;
7433 case DImode:
7434 return V2DImode;
7435 default:
7436 break;
7438 else
7439 switch (mode)
7441 case SFmode:
7442 return V2SFmode;
7443 case SImode:
7444 return V2SImode;
7445 case HImode:
7446 return V4HImode;
7447 case QImode:
7448 return V8QImode;
7449 default:
7450 break;
7453 return word_mode;
7456 /* Return 128-bit container as the preferred SIMD mode for MODE. */
7457 static enum machine_mode
7458 aarch64_preferred_simd_mode (enum machine_mode mode)
7460 return aarch64_simd_container_mode (mode, 128);
7463 /* Return the bitmask of possible vector sizes for the vectorizer
7464 to iterate over. */
7465 static unsigned int
7466 aarch64_autovectorize_vector_sizes (void)
7468 return (16 | 8);
7471 /* A table to help perform AArch64-specific name mangling for AdvSIMD
7472 vector types in order to conform to the AAPCS64 (see "Procedure
7473 Call Standard for the ARM 64-bit Architecture", Appendix A). To
7474 qualify for emission with the mangled names defined in that document,
7475 a vector type must not only be of the correct mode but also be
7476 composed of AdvSIMD vector element types (e.g.
7477 _builtin_aarch64_simd_qi); these types are registered by
7478 aarch64_init_simd_builtins (). In other words, vector types defined
7479 in other ways e.g. via vector_size attribute will get default
7480 mangled names. */
7481 typedef struct
7483 enum machine_mode mode;
7484 const char *element_type_name;
7485 const char *mangled_name;
7486 } aarch64_simd_mangle_map_entry;
7488 static aarch64_simd_mangle_map_entry aarch64_simd_mangle_map[] = {
7489 /* 64-bit containerized types. */
7490 { V8QImode, "__builtin_aarch64_simd_qi", "10__Int8x8_t" },
7491 { V8QImode, "__builtin_aarch64_simd_uqi", "11__Uint8x8_t" },
7492 { V4HImode, "__builtin_aarch64_simd_hi", "11__Int16x4_t" },
7493 { V4HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x4_t" },
7494 { V2SImode, "__builtin_aarch64_simd_si", "11__Int32x2_t" },
7495 { V2SImode, "__builtin_aarch64_simd_usi", "12__Uint32x2_t" },
7496 { V2SFmode, "__builtin_aarch64_simd_sf", "13__Float32x2_t" },
7497 { V8QImode, "__builtin_aarch64_simd_poly8", "11__Poly8x8_t" },
7498 { V4HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x4_t" },
7499 /* 128-bit containerized types. */
7500 { V16QImode, "__builtin_aarch64_simd_qi", "11__Int8x16_t" },
7501 { V16QImode, "__builtin_aarch64_simd_uqi", "12__Uint8x16_t" },
7502 { V8HImode, "__builtin_aarch64_simd_hi", "11__Int16x8_t" },
7503 { V8HImode, "__builtin_aarch64_simd_uhi", "12__Uint16x8_t" },
7504 { V4SImode, "__builtin_aarch64_simd_si", "11__Int32x4_t" },
7505 { V4SImode, "__builtin_aarch64_simd_usi", "12__Uint32x4_t" },
7506 { V2DImode, "__builtin_aarch64_simd_di", "11__Int64x2_t" },
7507 { V2DImode, "__builtin_aarch64_simd_udi", "12__Uint64x2_t" },
7508 { V4SFmode, "__builtin_aarch64_simd_sf", "13__Float32x4_t" },
7509 { V2DFmode, "__builtin_aarch64_simd_df", "13__Float64x2_t" },
7510 { V16QImode, "__builtin_aarch64_simd_poly8", "12__Poly8x16_t" },
7511 { V8HImode, "__builtin_aarch64_simd_poly16", "12__Poly16x8_t" },
7512 { V2DImode, "__builtin_aarch64_simd_poly64", "12__Poly64x2_t" },
7513 { VOIDmode, NULL, NULL }
7516 /* Implement TARGET_MANGLE_TYPE. */
7518 static const char *
7519 aarch64_mangle_type (const_tree type)
7521 /* The AArch64 ABI documents say that "__va_list" has to be
7522 managled as if it is in the "std" namespace. */
7523 if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
7524 return "St9__va_list";
7526 /* Check the mode of the vector type, and the name of the vector
7527 element type, against the table. */
7528 if (TREE_CODE (type) == VECTOR_TYPE)
7530 aarch64_simd_mangle_map_entry *pos = aarch64_simd_mangle_map;
7532 while (pos->mode != VOIDmode)
7534 tree elt_type = TREE_TYPE (type);
7536 if (pos->mode == TYPE_MODE (type)
7537 && TREE_CODE (TYPE_NAME (elt_type)) == TYPE_DECL
7538 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (elt_type))),
7539 pos->element_type_name))
7540 return pos->mangled_name;
7542 pos++;
7546 /* Use the default mangling. */
7547 return NULL;
7550 /* Return the equivalent letter for size. */
7551 static char
7552 sizetochar (int size)
7554 switch (size)
7556 case 64: return 'd';
7557 case 32: return 's';
7558 case 16: return 'h';
7559 case 8 : return 'b';
7560 default: gcc_unreachable ();
7564 /* Return true iff x is a uniform vector of floating-point
7565 constants, and the constant can be represented in
7566 quarter-precision form. Note, as aarch64_float_const_representable
7567 rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
7568 static bool
7569 aarch64_vect_float_const_representable_p (rtx x)
7571 int i = 0;
7572 REAL_VALUE_TYPE r0, ri;
7573 rtx x0, xi;
7575 if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
7576 return false;
7578 x0 = CONST_VECTOR_ELT (x, 0);
7579 if (!CONST_DOUBLE_P (x0))
7580 return false;
7582 REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
7584 for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
7586 xi = CONST_VECTOR_ELT (x, i);
7587 if (!CONST_DOUBLE_P (xi))
7588 return false;
7590 REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
7591 if (!REAL_VALUES_EQUAL (r0, ri))
7592 return false;
7595 return aarch64_float_const_representable_p (x0);
7598 /* Return true for valid and false for invalid. */
7599 bool
7600 aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
7601 struct simd_immediate_info *info)
7603 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
7604 matches = 1; \
7605 for (i = 0; i < idx; i += (STRIDE)) \
7606 if (!(TEST)) \
7607 matches = 0; \
7608 if (matches) \
7610 immtype = (CLASS); \
7611 elsize = (ELSIZE); \
7612 eshift = (SHIFT); \
7613 emvn = (NEG); \
7614 break; \
7617 unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
7618 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
7619 unsigned char bytes[16];
7620 int immtype = -1, matches;
7621 unsigned int invmask = inverse ? 0xff : 0;
7622 int eshift, emvn;
7624 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
7626 if (! (aarch64_simd_imm_zero_p (op, mode)
7627 || aarch64_vect_float_const_representable_p (op)))
7628 return false;
7630 if (info)
7632 info->value = CONST_VECTOR_ELT (op, 0);
7633 info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
7634 info->mvn = false;
7635 info->shift = 0;
7638 return true;
7641 /* Splat vector constant out into a byte vector. */
7642 for (i = 0; i < n_elts; i++)
7644 /* The vector is provided in gcc endian-neutral fashion. For aarch64_be,
7645 it must be laid out in the vector register in reverse order. */
7646 rtx el = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? (n_elts - 1 - i) : i);
7647 unsigned HOST_WIDE_INT elpart;
7648 unsigned int part, parts;
7650 if (GET_CODE (el) == CONST_INT)
7652 elpart = INTVAL (el);
7653 parts = 1;
7655 else if (GET_CODE (el) == CONST_DOUBLE)
7657 elpart = CONST_DOUBLE_LOW (el);
7658 parts = 2;
7660 else
7661 gcc_unreachable ();
7663 for (part = 0; part < parts; part++)
7665 unsigned int byte;
7666 for (byte = 0; byte < innersize; byte++)
7668 bytes[idx++] = (elpart & 0xff) ^ invmask;
7669 elpart >>= BITS_PER_UNIT;
7671 if (GET_CODE (el) == CONST_DOUBLE)
7672 elpart = CONST_DOUBLE_HIGH (el);
7676 /* Sanity check. */
7677 gcc_assert (idx == GET_MODE_SIZE (mode));
7681 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
7682 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
7684 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7685 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7687 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
7688 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7690 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
7691 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
7693 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
7695 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
7697 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
7698 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
7700 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7701 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7703 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
7704 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7706 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
7707 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
7709 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
7711 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
7713 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
7714 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
7716 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
7717 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
7719 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
7720 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
7722 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
7723 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
7725 CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
7727 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
7728 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
7730 while (0);
7732 if (immtype == -1)
7733 return false;
7735 if (info)
7737 info->element_width = elsize;
7738 info->mvn = emvn != 0;
7739 info->shift = eshift;
7741 unsigned HOST_WIDE_INT imm = 0;
7743 if (immtype >= 12 && immtype <= 15)
7744 info->msl = true;
7746 /* Un-invert bytes of recognized vector, if necessary. */
7747 if (invmask != 0)
7748 for (i = 0; i < idx; i++)
7749 bytes[i] ^= invmask;
7751 if (immtype == 17)
7753 /* FIXME: Broken on 32-bit H_W_I hosts. */
7754 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
7756 for (i = 0; i < 8; i++)
7757 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
7758 << (i * BITS_PER_UNIT);
7761 info->value = GEN_INT (imm);
7763 else
7765 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
7766 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
7768 /* Construct 'abcdefgh' because the assembler cannot handle
7769 generic constants. */
7770 if (info->mvn)
7771 imm = ~imm;
7772 imm = (imm >> info->shift) & 0xff;
7773 info->value = GEN_INT (imm);
7777 return true;
7778 #undef CHECK
7781 static bool
7782 aarch64_const_vec_all_same_int_p (rtx x,
7783 HOST_WIDE_INT minval,
7784 HOST_WIDE_INT maxval)
7786 HOST_WIDE_INT firstval;
7787 int count, i;
7789 if (GET_CODE (x) != CONST_VECTOR
7790 || GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
7791 return false;
7793 firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
7794 if (firstval < minval || firstval > maxval)
7795 return false;
7797 count = CONST_VECTOR_NUNITS (x);
7798 for (i = 1; i < count; i++)
7799 if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
7800 return false;
7802 return true;
7805 /* Check of immediate shift constants are within range. */
7806 bool
7807 aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
7809 int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
7810 if (left)
7811 return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
7812 else
7813 return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
7816 /* Return true if X is a uniform vector where all elements
7817 are either the floating-point constant 0.0 or the
7818 integer constant 0. */
7819 bool
7820 aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
7822 return x == CONST0_RTX (mode);
7825 bool
7826 aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
7828 HOST_WIDE_INT imm = INTVAL (x);
7829 int i;
7831 for (i = 0; i < 8; i++)
7833 unsigned int byte = imm & 0xff;
7834 if (byte != 0xff && byte != 0)
7835 return false;
7836 imm >>= 8;
7839 return true;
7842 bool
7843 aarch64_mov_operand_p (rtx x,
7844 enum aarch64_symbol_context context,
7845 enum machine_mode mode)
7847 if (GET_CODE (x) == HIGH
7848 && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
7849 return true;
7851 if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
7852 return true;
7854 if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
7855 return true;
7857 return aarch64_classify_symbolic_expression (x, context)
7858 == SYMBOL_TINY_ABSOLUTE;
7861 /* Return a const_int vector of VAL. */
7863 aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
7865 int nunits = GET_MODE_NUNITS (mode);
7866 rtvec v = rtvec_alloc (nunits);
7867 int i;
7869 for (i=0; i < nunits; i++)
7870 RTVEC_ELT (v, i) = GEN_INT (val);
7872 return gen_rtx_CONST_VECTOR (mode, v);
7875 /* Check OP is a legal scalar immediate for the MOVI instruction. */
7877 bool
7878 aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
7880 enum machine_mode vmode;
7882 gcc_assert (!VECTOR_MODE_P (mode));
7883 vmode = aarch64_preferred_simd_mode (mode);
7884 rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
7885 return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
7888 /* Construct and return a PARALLEL RTX vector. */
7890 aarch64_simd_vect_par_cnst_half (enum machine_mode mode, bool high)
7892 int nunits = GET_MODE_NUNITS (mode);
7893 rtvec v = rtvec_alloc (nunits / 2);
7894 int base = high ? nunits / 2 : 0;
7895 rtx t1;
7896 int i;
7898 for (i=0; i < nunits / 2; i++)
7899 RTVEC_ELT (v, i) = GEN_INT (base + i);
7901 t1 = gen_rtx_PARALLEL (mode, v);
7902 return t1;
7905 /* Bounds-check lanes. Ensure OPERAND lies between LOW (inclusive) and
7906 HIGH (exclusive). */
7907 void
7908 aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7910 HOST_WIDE_INT lane;
7911 gcc_assert (GET_CODE (operand) == CONST_INT);
7912 lane = INTVAL (operand);
7914 if (lane < low || lane >= high)
7915 error ("lane out of range");
7918 void
7919 aarch64_simd_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
7921 gcc_assert (GET_CODE (operand) == CONST_INT);
7922 HOST_WIDE_INT lane = INTVAL (operand);
7924 if (lane < low || lane >= high)
7925 error ("constant out of range");
7928 /* Emit code to reinterpret one AdvSIMD type as another,
7929 without altering bits. */
7930 void
7931 aarch64_simd_reinterpret (rtx dest, rtx src)
7933 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
7936 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
7937 registers). */
7938 void
7939 aarch64_simd_emit_pair_result_insn (enum machine_mode mode,
7940 rtx (*intfn) (rtx, rtx, rtx), rtx destaddr,
7941 rtx op1)
7943 rtx mem = gen_rtx_MEM (mode, destaddr);
7944 rtx tmp1 = gen_reg_rtx (mode);
7945 rtx tmp2 = gen_reg_rtx (mode);
7947 emit_insn (intfn (tmp1, op1, tmp2));
7949 emit_move_insn (mem, tmp1);
7950 mem = adjust_address (mem, mode, GET_MODE_SIZE (mode));
7951 emit_move_insn (mem, tmp2);
7954 /* Return TRUE if OP is a valid vector addressing mode. */
7955 bool
7956 aarch64_simd_mem_operand_p (rtx op)
7958 return MEM_P (op) && (GET_CODE (XEXP (op, 0)) == POST_INC
7959 || GET_CODE (XEXP (op, 0)) == REG);
7962 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
7963 not to early-clobber SRC registers in the process.
7965 We assume that the operands described by SRC and DEST represent a
7966 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
7967 number of components into which the copy has been decomposed. */
7968 void
7969 aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest,
7970 rtx *src, unsigned int count)
7972 unsigned int i;
7974 if (!reg_overlap_mentioned_p (operands[0], operands[1])
7975 || REGNO (operands[0]) < REGNO (operands[1]))
7977 for (i = 0; i < count; i++)
7979 operands[2 * i] = dest[i];
7980 operands[2 * i + 1] = src[i];
7983 else
7985 for (i = 0; i < count; i++)
7987 operands[2 * i] = dest[count - i - 1];
7988 operands[2 * i + 1] = src[count - i - 1];
7993 /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
7994 one of VSTRUCT modes: OI, CI or XI. */
7996 aarch64_simd_attr_length_move (rtx insn)
7998 enum machine_mode mode;
8000 extract_insn_cached (insn);
8002 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
8004 mode = GET_MODE (recog_data.operand[0]);
8005 switch (mode)
8007 case OImode:
8008 return 8;
8009 case CImode:
8010 return 12;
8011 case XImode:
8012 return 16;
8013 default:
8014 gcc_unreachable ();
8017 return 4;
8020 /* Implement target hook TARGET_VECTOR_ALIGNMENT. The AAPCS64 sets the maximum
8021 alignment of a vector to 128 bits. */
8022 static HOST_WIDE_INT
8023 aarch64_simd_vector_alignment (const_tree type)
8025 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
8026 return MIN (align, 128);
8029 /* Implement target hook TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE. */
8030 static bool
8031 aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
8033 if (is_packed)
8034 return false;
8036 /* We guarantee alignment for vectors up to 128-bits. */
8037 if (tree_int_cst_compare (TYPE_SIZE (type),
8038 bitsize_int (BIGGEST_ALIGNMENT)) > 0)
8039 return false;
8041 /* Vectors whose size is <= BIGGEST_ALIGNMENT are naturally aligned. */
8042 return true;
8045 /* If VALS is a vector constant that can be loaded into a register
8046 using DUP, generate instructions to do so and return an RTX to
8047 assign to the register. Otherwise return NULL_RTX. */
8048 static rtx
8049 aarch64_simd_dup_constant (rtx vals)
8051 enum machine_mode mode = GET_MODE (vals);
8052 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8053 int n_elts = GET_MODE_NUNITS (mode);
8054 bool all_same = true;
8055 rtx x;
8056 int i;
8058 if (GET_CODE (vals) != CONST_VECTOR)
8059 return NULL_RTX;
8061 for (i = 1; i < n_elts; ++i)
8063 x = CONST_VECTOR_ELT (vals, i);
8064 if (!rtx_equal_p (x, CONST_VECTOR_ELT (vals, 0)))
8065 all_same = false;
8068 if (!all_same)
8069 return NULL_RTX;
8071 /* We can load this constant by using DUP and a constant in a
8072 single ARM register. This will be cheaper than a vector
8073 load. */
8074 x = copy_to_mode_reg (inner_mode, CONST_VECTOR_ELT (vals, 0));
8075 return gen_rtx_VEC_DUPLICATE (mode, x);
8079 /* Generate code to load VALS, which is a PARALLEL containing only
8080 constants (for vec_init) or CONST_VECTOR, efficiently into a
8081 register. Returns an RTX to copy into the register, or NULL_RTX
8082 for a PARALLEL that can not be converted into a CONST_VECTOR. */
8083 static rtx
8084 aarch64_simd_make_constant (rtx vals)
8086 enum machine_mode mode = GET_MODE (vals);
8087 rtx const_dup;
8088 rtx const_vec = NULL_RTX;
8089 int n_elts = GET_MODE_NUNITS (mode);
8090 int n_const = 0;
8091 int i;
8093 if (GET_CODE (vals) == CONST_VECTOR)
8094 const_vec = vals;
8095 else if (GET_CODE (vals) == PARALLEL)
8097 /* A CONST_VECTOR must contain only CONST_INTs and
8098 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
8099 Only store valid constants in a CONST_VECTOR. */
8100 for (i = 0; i < n_elts; ++i)
8102 rtx x = XVECEXP (vals, 0, i);
8103 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
8104 n_const++;
8106 if (n_const == n_elts)
8107 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
8109 else
8110 gcc_unreachable ();
8112 if (const_vec != NULL_RTX
8113 && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
8114 /* Load using MOVI/MVNI. */
8115 return const_vec;
8116 else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
8117 /* Loaded using DUP. */
8118 return const_dup;
8119 else if (const_vec != NULL_RTX)
8120 /* Load from constant pool. We can not take advantage of single-cycle
8121 LD1 because we need a PC-relative addressing mode. */
8122 return const_vec;
8123 else
8124 /* A PARALLEL containing something not valid inside CONST_VECTOR.
8125 We can not construct an initializer. */
8126 return NULL_RTX;
8129 void
8130 aarch64_expand_vector_init (rtx target, rtx vals)
8132 enum machine_mode mode = GET_MODE (target);
8133 enum machine_mode inner_mode = GET_MODE_INNER (mode);
8134 int n_elts = GET_MODE_NUNITS (mode);
8135 int n_var = 0, one_var = -1;
8136 bool all_same = true;
8137 rtx x, mem;
8138 int i;
8140 x = XVECEXP (vals, 0, 0);
8141 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8142 n_var = 1, one_var = 0;
8144 for (i = 1; i < n_elts; ++i)
8146 x = XVECEXP (vals, 0, i);
8147 if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
8148 ++n_var, one_var = i;
8150 if (!rtx_equal_p (x, XVECEXP (vals, 0, 0)))
8151 all_same = false;
8154 if (n_var == 0)
8156 rtx constant = aarch64_simd_make_constant (vals);
8157 if (constant != NULL_RTX)
8159 emit_move_insn (target, constant);
8160 return;
8164 /* Splat a single non-constant element if we can. */
8165 if (all_same)
8167 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8168 aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
8169 return;
8172 /* One field is non-constant. Load constant then overwrite varying
8173 field. This is more efficient than using the stack. */
8174 if (n_var == 1)
8176 rtx copy = copy_rtx (vals);
8177 rtx index = GEN_INT (one_var);
8178 enum insn_code icode;
8180 /* Load constant part of vector, substitute neighboring value for
8181 varying element. */
8182 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1);
8183 aarch64_expand_vector_init (target, copy);
8185 /* Insert variable. */
8186 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8187 icode = optab_handler (vec_set_optab, mode);
8188 gcc_assert (icode != CODE_FOR_nothing);
8189 emit_insn (GEN_FCN (icode) (target, x, index));
8190 return;
8193 /* Construct the vector in memory one field at a time
8194 and load the whole vector. */
8195 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
8196 for (i = 0; i < n_elts; i++)
8197 emit_move_insn (adjust_address_nv (mem, inner_mode,
8198 i * GET_MODE_SIZE (inner_mode)),
8199 XVECEXP (vals, 0, i));
8200 emit_move_insn (target, mem);
8204 static unsigned HOST_WIDE_INT
8205 aarch64_shift_truncation_mask (enum machine_mode mode)
8207 return
8208 (aarch64_vector_mode_supported_p (mode)
8209 || aarch64_vect_struct_mode_p (mode)) ? 0 : (GET_MODE_BITSIZE (mode) - 1);
8212 #ifndef TLS_SECTION_ASM_FLAG
8213 #define TLS_SECTION_ASM_FLAG 'T'
8214 #endif
8216 void
8217 aarch64_elf_asm_named_section (const char *name, unsigned int flags,
8218 tree decl ATTRIBUTE_UNUSED)
8220 char flagchars[10], *f = flagchars;
8222 /* If we have already declared this section, we can use an
8223 abbreviated form to switch back to it -- unless this section is
8224 part of a COMDAT groups, in which case GAS requires the full
8225 declaration every time. */
8226 if (!(HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8227 && (flags & SECTION_DECLARED))
8229 fprintf (asm_out_file, "\t.section\t%s\n", name);
8230 return;
8233 if (!(flags & SECTION_DEBUG))
8234 *f++ = 'a';
8235 if (flags & SECTION_WRITE)
8236 *f++ = 'w';
8237 if (flags & SECTION_CODE)
8238 *f++ = 'x';
8239 if (flags & SECTION_SMALL)
8240 *f++ = 's';
8241 if (flags & SECTION_MERGE)
8242 *f++ = 'M';
8243 if (flags & SECTION_STRINGS)
8244 *f++ = 'S';
8245 if (flags & SECTION_TLS)
8246 *f++ = TLS_SECTION_ASM_FLAG;
8247 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8248 *f++ = 'G';
8249 *f = '\0';
8251 fprintf (asm_out_file, "\t.section\t%s,\"%s\"", name, flagchars);
8253 if (!(flags & SECTION_NOTYPE))
8255 const char *type;
8256 const char *format;
8258 if (flags & SECTION_BSS)
8259 type = "nobits";
8260 else
8261 type = "progbits";
8263 #ifdef TYPE_OPERAND_FMT
8264 format = "," TYPE_OPERAND_FMT;
8265 #else
8266 format = ",@%s";
8267 #endif
8269 fprintf (asm_out_file, format, type);
8271 if (flags & SECTION_ENTSIZE)
8272 fprintf (asm_out_file, ",%d", flags & SECTION_ENTSIZE);
8273 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
8275 if (TREE_CODE (decl) == IDENTIFIER_NODE)
8276 fprintf (asm_out_file, ",%s,comdat", IDENTIFIER_POINTER (decl));
8277 else
8278 fprintf (asm_out_file, ",%s,comdat",
8279 IDENTIFIER_POINTER (DECL_COMDAT_GROUP (decl)));
8283 putc ('\n', asm_out_file);
8286 /* Select a format to encode pointers in exception handling data. */
8288 aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
8290 int type;
8291 switch (aarch64_cmodel)
8293 case AARCH64_CMODEL_TINY:
8294 case AARCH64_CMODEL_TINY_PIC:
8295 case AARCH64_CMODEL_SMALL:
8296 case AARCH64_CMODEL_SMALL_PIC:
8297 /* text+got+data < 4Gb. 4-byte signed relocs are sufficient
8298 for everything. */
8299 type = DW_EH_PE_sdata4;
8300 break;
8301 default:
8302 /* No assumptions here. 8-byte relocs required. */
8303 type = DW_EH_PE_sdata8;
8304 break;
8306 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
8309 /* Emit load exclusive. */
8311 static void
8312 aarch64_emit_load_exclusive (enum machine_mode mode, rtx rval,
8313 rtx mem, rtx model_rtx)
8315 rtx (*gen) (rtx, rtx, rtx);
8317 switch (mode)
8319 case QImode: gen = gen_aarch64_load_exclusiveqi; break;
8320 case HImode: gen = gen_aarch64_load_exclusivehi; break;
8321 case SImode: gen = gen_aarch64_load_exclusivesi; break;
8322 case DImode: gen = gen_aarch64_load_exclusivedi; break;
8323 default:
8324 gcc_unreachable ();
8327 emit_insn (gen (rval, mem, model_rtx));
8330 /* Emit store exclusive. */
8332 static void
8333 aarch64_emit_store_exclusive (enum machine_mode mode, rtx bval,
8334 rtx rval, rtx mem, rtx model_rtx)
8336 rtx (*gen) (rtx, rtx, rtx, rtx);
8338 switch (mode)
8340 case QImode: gen = gen_aarch64_store_exclusiveqi; break;
8341 case HImode: gen = gen_aarch64_store_exclusivehi; break;
8342 case SImode: gen = gen_aarch64_store_exclusivesi; break;
8343 case DImode: gen = gen_aarch64_store_exclusivedi; break;
8344 default:
8345 gcc_unreachable ();
8348 emit_insn (gen (bval, rval, mem, model_rtx));
8351 /* Mark the previous jump instruction as unlikely. */
8353 static void
8354 aarch64_emit_unlikely_jump (rtx insn)
8356 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
8358 insn = emit_jump_insn (insn);
8359 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
8362 /* Expand a compare and swap pattern. */
8364 void
8365 aarch64_expand_compare_and_swap (rtx operands[])
8367 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
8368 enum machine_mode mode, cmp_mode;
8369 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
8371 bval = operands[0];
8372 rval = operands[1];
8373 mem = operands[2];
8374 oldval = operands[3];
8375 newval = operands[4];
8376 is_weak = operands[5];
8377 mod_s = operands[6];
8378 mod_f = operands[7];
8379 mode = GET_MODE (mem);
8380 cmp_mode = mode;
8382 /* Normally the succ memory model must be stronger than fail, but in the
8383 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
8384 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
8386 if (INTVAL (mod_f) == MEMMODEL_ACQUIRE
8387 && INTVAL (mod_s) == MEMMODEL_RELEASE)
8388 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
8390 switch (mode)
8392 case QImode:
8393 case HImode:
8394 /* For short modes, we're going to perform the comparison in SImode,
8395 so do the zero-extension now. */
8396 cmp_mode = SImode;
8397 rval = gen_reg_rtx (SImode);
8398 oldval = convert_modes (SImode, mode, oldval, true);
8399 /* Fall through. */
8401 case SImode:
8402 case DImode:
8403 /* Force the value into a register if needed. */
8404 if (!aarch64_plus_operand (oldval, mode))
8405 oldval = force_reg (cmp_mode, oldval);
8406 break;
8408 default:
8409 gcc_unreachable ();
8412 switch (mode)
8414 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
8415 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
8416 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
8417 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
8418 default:
8419 gcc_unreachable ();
8422 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
8424 if (mode == QImode || mode == HImode)
8425 emit_move_insn (operands[1], gen_lowpart (mode, rval));
8427 x = gen_rtx_REG (CCmode, CC_REGNUM);
8428 x = gen_rtx_EQ (SImode, x, const0_rtx);
8429 emit_insn (gen_rtx_SET (VOIDmode, bval, x));
8432 /* Split a compare and swap pattern. */
8434 void
8435 aarch64_split_compare_and_swap (rtx operands[])
8437 rtx rval, mem, oldval, newval, scratch;
8438 enum machine_mode mode;
8439 bool is_weak;
8440 rtx label1, label2, x, cond;
8442 rval = operands[0];
8443 mem = operands[1];
8444 oldval = operands[2];
8445 newval = operands[3];
8446 is_weak = (operands[4] != const0_rtx);
8447 scratch = operands[7];
8448 mode = GET_MODE (mem);
8450 label1 = NULL_RTX;
8451 if (!is_weak)
8453 label1 = gen_label_rtx ();
8454 emit_label (label1);
8456 label2 = gen_label_rtx ();
8458 aarch64_emit_load_exclusive (mode, rval, mem, operands[5]);
8460 cond = aarch64_gen_compare_reg (NE, rval, oldval);
8461 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8462 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8463 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
8464 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8466 aarch64_emit_store_exclusive (mode, scratch, mem, newval, operands[5]);
8468 if (!is_weak)
8470 x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
8471 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8472 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
8473 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8475 else
8477 cond = gen_rtx_REG (CCmode, CC_REGNUM);
8478 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
8479 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
8482 emit_label (label2);
8485 /* Split an atomic operation. */
8487 void
8488 aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
8489 rtx value, rtx model_rtx, rtx cond)
8491 enum machine_mode mode = GET_MODE (mem);
8492 enum machine_mode wmode = (mode == DImode ? DImode : SImode);
8493 rtx label, x;
8495 label = gen_label_rtx ();
8496 emit_label (label);
8498 if (new_out)
8499 new_out = gen_lowpart (wmode, new_out);
8500 if (old_out)
8501 old_out = gen_lowpart (wmode, old_out);
8502 else
8503 old_out = new_out;
8504 value = simplify_gen_subreg (wmode, value, mode, 0);
8506 aarch64_emit_load_exclusive (mode, old_out, mem, model_rtx);
8508 switch (code)
8510 case SET:
8511 new_out = value;
8512 break;
8514 case NOT:
8515 x = gen_rtx_AND (wmode, old_out, value);
8516 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8517 x = gen_rtx_NOT (wmode, new_out);
8518 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8519 break;
8521 case MINUS:
8522 if (CONST_INT_P (value))
8524 value = GEN_INT (-INTVAL (value));
8525 code = PLUS;
8527 /* Fall through. */
8529 default:
8530 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
8531 emit_insn (gen_rtx_SET (VOIDmode, new_out, x));
8532 break;
8535 aarch64_emit_store_exclusive (mode, cond, mem,
8536 gen_lowpart (mode, new_out), model_rtx);
8538 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
8539 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
8540 gen_rtx_LABEL_REF (Pmode, label), pc_rtx);
8541 aarch64_emit_unlikely_jump (gen_rtx_SET (VOIDmode, pc_rtx, x));
8544 static void
8545 aarch64_print_extension (void)
8547 const struct aarch64_option_extension *opt = NULL;
8549 for (opt = all_extensions; opt->name != NULL; opt++)
8550 if ((aarch64_isa_flags & opt->flags_on) == opt->flags_on)
8551 asm_fprintf (asm_out_file, "+%s", opt->name);
8553 asm_fprintf (asm_out_file, "\n");
8556 static void
8557 aarch64_start_file (void)
8559 if (selected_arch)
8561 asm_fprintf (asm_out_file, "\t.arch %s", selected_arch->name);
8562 aarch64_print_extension ();
8564 else if (selected_cpu)
8566 const char *truncated_name
8567 = aarch64_rewrite_selected_cpu (selected_cpu->name);
8568 asm_fprintf (asm_out_file, "\t.cpu %s", truncated_name);
8569 aarch64_print_extension ();
8571 default_file_start();
8574 /* Target hook for c_mode_for_suffix. */
8575 static enum machine_mode
8576 aarch64_c_mode_for_suffix (char suffix)
8578 if (suffix == 'q')
8579 return TFmode;
8581 return VOIDmode;
8584 /* We can only represent floating point constants which will fit in
8585 "quarter-precision" values. These values are characterised by
8586 a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
8589 (-1)^s * (n/16) * 2^r
8591 Where:
8592 's' is the sign bit.
8593 'n' is an integer in the range 16 <= n <= 31.
8594 'r' is an integer in the range -3 <= r <= 4. */
8596 /* Return true iff X can be represented by a quarter-precision
8597 floating point immediate operand X. Note, we cannot represent 0.0. */
8598 bool
8599 aarch64_float_const_representable_p (rtx x)
8601 /* This represents our current view of how many bits
8602 make up the mantissa. */
8603 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
8604 int exponent;
8605 unsigned HOST_WIDE_INT mantissa, mask;
8606 HOST_WIDE_INT m1, m2;
8607 REAL_VALUE_TYPE r, m;
8609 if (!CONST_DOUBLE_P (x))
8610 return false;
8612 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8614 /* We cannot represent infinities, NaNs or +/-zero. We won't
8615 know if we have +zero until we analyse the mantissa, but we
8616 can reject the other invalid values. */
8617 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
8618 || REAL_VALUE_MINUS_ZERO (r))
8619 return false;
8621 /* Extract exponent. */
8622 r = real_value_abs (&r);
8623 exponent = REAL_EXP (&r);
8625 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
8626 highest (sign) bit, with a fixed binary point at bit point_pos.
8627 m1 holds the low part of the mantissa, m2 the high part.
8628 WARNING: If we ever have a representation using more than 2 * H_W_I - 1
8629 bits for the mantissa, this can fail (low bits will be lost). */
8630 real_ldexp (&m, &r, point_pos - exponent);
8631 REAL_VALUE_TO_INT (&m1, &m2, m);
8633 /* If the low part of the mantissa has bits set we cannot represent
8634 the value. */
8635 if (m1 != 0)
8636 return false;
8637 /* We have rejected the lower HOST_WIDE_INT, so update our
8638 understanding of how many bits lie in the mantissa and
8639 look only at the high HOST_WIDE_INT. */
8640 mantissa = m2;
8641 point_pos -= HOST_BITS_PER_WIDE_INT;
8643 /* We can only represent values with a mantissa of the form 1.xxxx. */
8644 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
8645 if ((mantissa & mask) != 0)
8646 return false;
8648 /* Having filtered unrepresentable values, we may now remove all
8649 but the highest 5 bits. */
8650 mantissa >>= point_pos - 5;
8652 /* We cannot represent the value 0.0, so reject it. This is handled
8653 elsewhere. */
8654 if (mantissa == 0)
8655 return false;
8657 /* Then, as bit 4 is always set, we can mask it off, leaving
8658 the mantissa in the range [0, 15]. */
8659 mantissa &= ~(1 << 4);
8660 gcc_assert (mantissa <= 15);
8662 /* GCC internally does not use IEEE754-like encoding (where normalized
8663 significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
8664 Our mantissa values are shifted 4 places to the left relative to
8665 normalized IEEE754 so we must modify the exponent returned by REAL_EXP
8666 by 5 places to correct for GCC's representation. */
8667 exponent = 5 - exponent;
8669 return (exponent >= 0 && exponent <= 7);
8672 char*
8673 aarch64_output_simd_mov_immediate (rtx const_vector,
8674 enum machine_mode mode,
8675 unsigned width)
8677 bool is_valid;
8678 static char templ[40];
8679 const char *mnemonic;
8680 const char *shift_op;
8681 unsigned int lane_count = 0;
8682 char element_char;
8684 struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
8686 /* This will return true to show const_vector is legal for use as either
8687 a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
8688 also update INFO to show how the immediate should be generated. */
8689 is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
8690 gcc_assert (is_valid);
8692 element_char = sizetochar (info.element_width);
8693 lane_count = width / info.element_width;
8695 mode = GET_MODE_INNER (mode);
8696 if (mode == SFmode || mode == DFmode)
8698 gcc_assert (info.shift == 0 && ! info.mvn);
8699 if (aarch64_float_const_zero_rtx_p (info.value))
8700 info.value = GEN_INT (0);
8701 else
8703 #define buf_size 20
8704 REAL_VALUE_TYPE r;
8705 REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
8706 char float_buf[buf_size] = {'\0'};
8707 real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
8708 #undef buf_size
8710 if (lane_count == 1)
8711 snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
8712 else
8713 snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
8714 lane_count, element_char, float_buf);
8715 return templ;
8719 mnemonic = info.mvn ? "mvni" : "movi";
8720 shift_op = info.msl ? "msl" : "lsl";
8722 if (lane_count == 1)
8723 snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
8724 mnemonic, UINTVAL (info.value));
8725 else if (info.shift)
8726 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
8727 ", %s %d", mnemonic, lane_count, element_char,
8728 UINTVAL (info.value), shift_op, info.shift);
8729 else
8730 snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
8731 mnemonic, lane_count, element_char, UINTVAL (info.value));
8732 return templ;
8735 char*
8736 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
8737 enum machine_mode mode)
8739 enum machine_mode vmode;
8741 gcc_assert (!VECTOR_MODE_P (mode));
8742 vmode = aarch64_simd_container_mode (mode, 64);
8743 rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
8744 return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
8747 /* Split operands into moves from op[1] + op[2] into op[0]. */
8749 void
8750 aarch64_split_combinev16qi (rtx operands[3])
8752 unsigned int dest = REGNO (operands[0]);
8753 unsigned int src1 = REGNO (operands[1]);
8754 unsigned int src2 = REGNO (operands[2]);
8755 enum machine_mode halfmode = GET_MODE (operands[1]);
8756 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
8757 rtx destlo, desthi;
8759 gcc_assert (halfmode == V16QImode);
8761 if (src1 == dest && src2 == dest + halfregs)
8763 /* No-op move. Can't split to nothing; emit something. */
8764 emit_note (NOTE_INSN_DELETED);
8765 return;
8768 /* Preserve register attributes for variable tracking. */
8769 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
8770 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
8771 GET_MODE_SIZE (halfmode));
8773 /* Special case of reversed high/low parts. */
8774 if (reg_overlap_mentioned_p (operands[2], destlo)
8775 && reg_overlap_mentioned_p (operands[1], desthi))
8777 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8778 emit_insn (gen_xorv16qi3 (operands[2], operands[1], operands[2]));
8779 emit_insn (gen_xorv16qi3 (operands[1], operands[1], operands[2]));
8781 else if (!reg_overlap_mentioned_p (operands[2], destlo))
8783 /* Try to avoid unnecessary moves if part of the result
8784 is in the right place already. */
8785 if (src1 != dest)
8786 emit_move_insn (destlo, operands[1]);
8787 if (src2 != dest + halfregs)
8788 emit_move_insn (desthi, operands[2]);
8790 else
8792 if (src2 != dest + halfregs)
8793 emit_move_insn (desthi, operands[2]);
8794 if (src1 != dest)
8795 emit_move_insn (destlo, operands[1]);
8799 /* vec_perm support. */
8801 #define MAX_VECT_LEN 16
8803 struct expand_vec_perm_d
8805 rtx target, op0, op1;
8806 unsigned char perm[MAX_VECT_LEN];
8807 enum machine_mode vmode;
8808 unsigned char nelt;
8809 bool one_vector_p;
8810 bool testing_p;
8813 /* Generate a variable permutation. */
8815 static void
8816 aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
8818 enum machine_mode vmode = GET_MODE (target);
8819 bool one_vector_p = rtx_equal_p (op0, op1);
8821 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
8822 gcc_checking_assert (GET_MODE (op0) == vmode);
8823 gcc_checking_assert (GET_MODE (op1) == vmode);
8824 gcc_checking_assert (GET_MODE (sel) == vmode);
8825 gcc_checking_assert (TARGET_SIMD);
8827 if (one_vector_p)
8829 if (vmode == V8QImode)
8831 /* Expand the argument to a V16QI mode by duplicating it. */
8832 rtx pair = gen_reg_rtx (V16QImode);
8833 emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
8834 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8836 else
8838 emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
8841 else
8843 rtx pair;
8845 if (vmode == V8QImode)
8847 pair = gen_reg_rtx (V16QImode);
8848 emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
8849 emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
8851 else
8853 pair = gen_reg_rtx (OImode);
8854 emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
8855 emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
8860 void
8861 aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
8863 enum machine_mode vmode = GET_MODE (target);
8864 unsigned int nelt = GET_MODE_NUNITS (vmode);
8865 bool one_vector_p = rtx_equal_p (op0, op1);
8866 rtx mask;
8868 /* The TBL instruction does not use a modulo index, so we must take care
8869 of that ourselves. */
8870 mask = aarch64_simd_gen_const_vector_dup (vmode,
8871 one_vector_p ? nelt - 1 : 2 * nelt - 1);
8872 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
8874 /* For big-endian, we also need to reverse the index within the vector
8875 (but not which vector). */
8876 if (BYTES_BIG_ENDIAN)
8878 /* If one_vector_p, mask is a vector of (nelt - 1)'s already. */
8879 if (!one_vector_p)
8880 mask = aarch64_simd_gen_const_vector_dup (vmode, nelt - 1);
8881 sel = expand_simple_binop (vmode, XOR, sel, mask,
8882 NULL, 0, OPTAB_LIB_WIDEN);
8884 aarch64_expand_vec_perm_1 (target, op0, op1, sel);
8887 /* Recognize patterns suitable for the TRN instructions. */
8888 static bool
8889 aarch64_evpc_trn (struct expand_vec_perm_d *d)
8891 unsigned int i, odd, mask, nelt = d->nelt;
8892 rtx out, in0, in1, x;
8893 rtx (*gen) (rtx, rtx, rtx);
8894 enum machine_mode vmode = d->vmode;
8896 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8897 return false;
8899 /* Note that these are little-endian tests.
8900 We correct for big-endian later. */
8901 if (d->perm[0] == 0)
8902 odd = 0;
8903 else if (d->perm[0] == 1)
8904 odd = 1;
8905 else
8906 return false;
8907 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8909 for (i = 0; i < nelt; i += 2)
8911 if (d->perm[i] != i + odd)
8912 return false;
8913 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
8914 return false;
8917 /* Success! */
8918 if (d->testing_p)
8919 return true;
8921 in0 = d->op0;
8922 in1 = d->op1;
8923 if (BYTES_BIG_ENDIAN)
8925 x = in0, in0 = in1, in1 = x;
8926 odd = !odd;
8928 out = d->target;
8930 if (odd)
8932 switch (vmode)
8934 case V16QImode: gen = gen_aarch64_trn2v16qi; break;
8935 case V8QImode: gen = gen_aarch64_trn2v8qi; break;
8936 case V8HImode: gen = gen_aarch64_trn2v8hi; break;
8937 case V4HImode: gen = gen_aarch64_trn2v4hi; break;
8938 case V4SImode: gen = gen_aarch64_trn2v4si; break;
8939 case V2SImode: gen = gen_aarch64_trn2v2si; break;
8940 case V2DImode: gen = gen_aarch64_trn2v2di; break;
8941 case V4SFmode: gen = gen_aarch64_trn2v4sf; break;
8942 case V2SFmode: gen = gen_aarch64_trn2v2sf; break;
8943 case V2DFmode: gen = gen_aarch64_trn2v2df; break;
8944 default:
8945 return false;
8948 else
8950 switch (vmode)
8952 case V16QImode: gen = gen_aarch64_trn1v16qi; break;
8953 case V8QImode: gen = gen_aarch64_trn1v8qi; break;
8954 case V8HImode: gen = gen_aarch64_trn1v8hi; break;
8955 case V4HImode: gen = gen_aarch64_trn1v4hi; break;
8956 case V4SImode: gen = gen_aarch64_trn1v4si; break;
8957 case V2SImode: gen = gen_aarch64_trn1v2si; break;
8958 case V2DImode: gen = gen_aarch64_trn1v2di; break;
8959 case V4SFmode: gen = gen_aarch64_trn1v4sf; break;
8960 case V2SFmode: gen = gen_aarch64_trn1v2sf; break;
8961 case V2DFmode: gen = gen_aarch64_trn1v2df; break;
8962 default:
8963 return false;
8967 emit_insn (gen (out, in0, in1));
8968 return true;
8971 /* Recognize patterns suitable for the UZP instructions. */
8972 static bool
8973 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
8975 unsigned int i, odd, mask, nelt = d->nelt;
8976 rtx out, in0, in1, x;
8977 rtx (*gen) (rtx, rtx, rtx);
8978 enum machine_mode vmode = d->vmode;
8980 if (GET_MODE_UNIT_SIZE (vmode) > 8)
8981 return false;
8983 /* Note that these are little-endian tests.
8984 We correct for big-endian later. */
8985 if (d->perm[0] == 0)
8986 odd = 0;
8987 else if (d->perm[0] == 1)
8988 odd = 1;
8989 else
8990 return false;
8991 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
8993 for (i = 0; i < nelt; i++)
8995 unsigned elt = (i * 2 + odd) & mask;
8996 if (d->perm[i] != elt)
8997 return false;
9000 /* Success! */
9001 if (d->testing_p)
9002 return true;
9004 in0 = d->op0;
9005 in1 = d->op1;
9006 if (BYTES_BIG_ENDIAN)
9008 x = in0, in0 = in1, in1 = x;
9009 odd = !odd;
9011 out = d->target;
9013 if (odd)
9015 switch (vmode)
9017 case V16QImode: gen = gen_aarch64_uzp2v16qi; break;
9018 case V8QImode: gen = gen_aarch64_uzp2v8qi; break;
9019 case V8HImode: gen = gen_aarch64_uzp2v8hi; break;
9020 case V4HImode: gen = gen_aarch64_uzp2v4hi; break;
9021 case V4SImode: gen = gen_aarch64_uzp2v4si; break;
9022 case V2SImode: gen = gen_aarch64_uzp2v2si; break;
9023 case V2DImode: gen = gen_aarch64_uzp2v2di; break;
9024 case V4SFmode: gen = gen_aarch64_uzp2v4sf; break;
9025 case V2SFmode: gen = gen_aarch64_uzp2v2sf; break;
9026 case V2DFmode: gen = gen_aarch64_uzp2v2df; break;
9027 default:
9028 return false;
9031 else
9033 switch (vmode)
9035 case V16QImode: gen = gen_aarch64_uzp1v16qi; break;
9036 case V8QImode: gen = gen_aarch64_uzp1v8qi; break;
9037 case V8HImode: gen = gen_aarch64_uzp1v8hi; break;
9038 case V4HImode: gen = gen_aarch64_uzp1v4hi; break;
9039 case V4SImode: gen = gen_aarch64_uzp1v4si; break;
9040 case V2SImode: gen = gen_aarch64_uzp1v2si; break;
9041 case V2DImode: gen = gen_aarch64_uzp1v2di; break;
9042 case V4SFmode: gen = gen_aarch64_uzp1v4sf; break;
9043 case V2SFmode: gen = gen_aarch64_uzp1v2sf; break;
9044 case V2DFmode: gen = gen_aarch64_uzp1v2df; break;
9045 default:
9046 return false;
9050 emit_insn (gen (out, in0, in1));
9051 return true;
9054 /* Recognize patterns suitable for the ZIP instructions. */
9055 static bool
9056 aarch64_evpc_zip (struct expand_vec_perm_d *d)
9058 unsigned int i, high, mask, nelt = d->nelt;
9059 rtx out, in0, in1, x;
9060 rtx (*gen) (rtx, rtx, rtx);
9061 enum machine_mode vmode = d->vmode;
9063 if (GET_MODE_UNIT_SIZE (vmode) > 8)
9064 return false;
9066 /* Note that these are little-endian tests.
9067 We correct for big-endian later. */
9068 high = nelt / 2;
9069 if (d->perm[0] == high)
9070 /* Do Nothing. */
9072 else if (d->perm[0] == 0)
9073 high = 0;
9074 else
9075 return false;
9076 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
9078 for (i = 0; i < nelt / 2; i++)
9080 unsigned elt = (i + high) & mask;
9081 if (d->perm[i * 2] != elt)
9082 return false;
9083 elt = (elt + nelt) & mask;
9084 if (d->perm[i * 2 + 1] != elt)
9085 return false;
9088 /* Success! */
9089 if (d->testing_p)
9090 return true;
9092 in0 = d->op0;
9093 in1 = d->op1;
9094 if (BYTES_BIG_ENDIAN)
9096 x = in0, in0 = in1, in1 = x;
9097 high = !high;
9099 out = d->target;
9101 if (high)
9103 switch (vmode)
9105 case V16QImode: gen = gen_aarch64_zip2v16qi; break;
9106 case V8QImode: gen = gen_aarch64_zip2v8qi; break;
9107 case V8HImode: gen = gen_aarch64_zip2v8hi; break;
9108 case V4HImode: gen = gen_aarch64_zip2v4hi; break;
9109 case V4SImode: gen = gen_aarch64_zip2v4si; break;
9110 case V2SImode: gen = gen_aarch64_zip2v2si; break;
9111 case V2DImode: gen = gen_aarch64_zip2v2di; break;
9112 case V4SFmode: gen = gen_aarch64_zip2v4sf; break;
9113 case V2SFmode: gen = gen_aarch64_zip2v2sf; break;
9114 case V2DFmode: gen = gen_aarch64_zip2v2df; break;
9115 default:
9116 return false;
9119 else
9121 switch (vmode)
9123 case V16QImode: gen = gen_aarch64_zip1v16qi; break;
9124 case V8QImode: gen = gen_aarch64_zip1v8qi; break;
9125 case V8HImode: gen = gen_aarch64_zip1v8hi; break;
9126 case V4HImode: gen = gen_aarch64_zip1v4hi; break;
9127 case V4SImode: gen = gen_aarch64_zip1v4si; break;
9128 case V2SImode: gen = gen_aarch64_zip1v2si; break;
9129 case V2DImode: gen = gen_aarch64_zip1v2di; break;
9130 case V4SFmode: gen = gen_aarch64_zip1v4sf; break;
9131 case V2SFmode: gen = gen_aarch64_zip1v2sf; break;
9132 case V2DFmode: gen = gen_aarch64_zip1v2df; break;
9133 default:
9134 return false;
9138 emit_insn (gen (out, in0, in1));
9139 return true;
9142 /* Recognize patterns for the EXT insn. */
9144 static bool
9145 aarch64_evpc_ext (struct expand_vec_perm_d *d)
9147 unsigned int i, nelt = d->nelt;
9148 rtx (*gen) (rtx, rtx, rtx, rtx);
9149 rtx offset;
9151 unsigned int location = d->perm[0]; /* Always < nelt. */
9153 /* Check if the extracted indices are increasing by one. */
9154 for (i = 1; i < nelt; i++)
9156 unsigned int required = location + i;
9157 if (d->one_vector_p)
9159 /* We'll pass the same vector in twice, so allow indices to wrap. */
9160 required &= (nelt - 1);
9162 if (d->perm[i] != required)
9163 return false;
9166 switch (d->vmode)
9168 case V16QImode: gen = gen_aarch64_extv16qi; break;
9169 case V8QImode: gen = gen_aarch64_extv8qi; break;
9170 case V4HImode: gen = gen_aarch64_extv4hi; break;
9171 case V8HImode: gen = gen_aarch64_extv8hi; break;
9172 case V2SImode: gen = gen_aarch64_extv2si; break;
9173 case V4SImode: gen = gen_aarch64_extv4si; break;
9174 case V2SFmode: gen = gen_aarch64_extv2sf; break;
9175 case V4SFmode: gen = gen_aarch64_extv4sf; break;
9176 case V2DImode: gen = gen_aarch64_extv2di; break;
9177 case V2DFmode: gen = gen_aarch64_extv2df; break;
9178 default:
9179 return false;
9182 /* Success! */
9183 if (d->testing_p)
9184 return true;
9186 /* The case where (location == 0) is a no-op for both big- and little-endian,
9187 and is removed by the mid-end at optimization levels -O1 and higher. */
9189 if (BYTES_BIG_ENDIAN && (location != 0))
9191 /* After setup, we want the high elements of the first vector (stored
9192 at the LSB end of the register), and the low elements of the second
9193 vector (stored at the MSB end of the register). So swap. */
9194 rtx temp = d->op0;
9195 d->op0 = d->op1;
9196 d->op1 = temp;
9197 /* location != 0 (above), so safe to assume (nelt - location) < nelt. */
9198 location = nelt - location;
9201 offset = GEN_INT (location);
9202 emit_insn (gen (d->target, d->op0, d->op1, offset));
9203 return true;
9206 /* Recognize patterns for the REV insns. */
9208 static bool
9209 aarch64_evpc_rev (struct expand_vec_perm_d *d)
9211 unsigned int i, j, diff, nelt = d->nelt;
9212 rtx (*gen) (rtx, rtx);
9214 if (!d->one_vector_p)
9215 return false;
9217 diff = d->perm[0];
9218 switch (diff)
9220 case 7:
9221 switch (d->vmode)
9223 case V16QImode: gen = gen_aarch64_rev64v16qi; break;
9224 case V8QImode: gen = gen_aarch64_rev64v8qi; break;
9225 default:
9226 return false;
9228 break;
9229 case 3:
9230 switch (d->vmode)
9232 case V16QImode: gen = gen_aarch64_rev32v16qi; break;
9233 case V8QImode: gen = gen_aarch64_rev32v8qi; break;
9234 case V8HImode: gen = gen_aarch64_rev64v8hi; break;
9235 case V4HImode: gen = gen_aarch64_rev64v4hi; break;
9236 default:
9237 return false;
9239 break;
9240 case 1:
9241 switch (d->vmode)
9243 case V16QImode: gen = gen_aarch64_rev16v16qi; break;
9244 case V8QImode: gen = gen_aarch64_rev16v8qi; break;
9245 case V8HImode: gen = gen_aarch64_rev32v8hi; break;
9246 case V4HImode: gen = gen_aarch64_rev32v4hi; break;
9247 case V4SImode: gen = gen_aarch64_rev64v4si; break;
9248 case V2SImode: gen = gen_aarch64_rev64v2si; break;
9249 case V4SFmode: gen = gen_aarch64_rev64v4sf; break;
9250 case V2SFmode: gen = gen_aarch64_rev64v2sf; break;
9251 default:
9252 return false;
9254 break;
9255 default:
9256 return false;
9259 for (i = 0; i < nelt ; i += diff + 1)
9260 for (j = 0; j <= diff; j += 1)
9262 /* This is guaranteed to be true as the value of diff
9263 is 7, 3, 1 and we should have enough elements in the
9264 queue to generate this. Getting a vector mask with a
9265 value of diff other than these values implies that
9266 something is wrong by the time we get here. */
9267 gcc_assert (i + j < nelt);
9268 if (d->perm[i + j] != i + diff - j)
9269 return false;
9272 /* Success! */
9273 if (d->testing_p)
9274 return true;
9276 emit_insn (gen (d->target, d->op0));
9277 return true;
9280 static bool
9281 aarch64_evpc_dup (struct expand_vec_perm_d *d)
9283 rtx (*gen) (rtx, rtx, rtx);
9284 rtx out = d->target;
9285 rtx in0;
9286 enum machine_mode vmode = d->vmode;
9287 unsigned int i, elt, nelt = d->nelt;
9288 rtx lane;
9290 /* TODO: This may not be big-endian safe. */
9291 if (BYTES_BIG_ENDIAN)
9292 return false;
9294 elt = d->perm[0];
9295 for (i = 1; i < nelt; i++)
9297 if (elt != d->perm[i])
9298 return false;
9301 /* The generic preparation in aarch64_expand_vec_perm_const_1
9302 swaps the operand order and the permute indices if it finds
9303 d->perm[0] to be in the second operand. Thus, we can always
9304 use d->op0 and need not do any extra arithmetic to get the
9305 correct lane number. */
9306 in0 = d->op0;
9307 lane = GEN_INT (elt);
9309 switch (vmode)
9311 case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
9312 case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
9313 case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
9314 case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
9315 case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
9316 case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
9317 case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
9318 case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
9319 case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
9320 case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
9321 default:
9322 return false;
9325 emit_insn (gen (out, in0, lane));
9326 return true;
9329 static bool
9330 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
9332 rtx rperm[MAX_VECT_LEN], sel;
9333 enum machine_mode vmode = d->vmode;
9334 unsigned int i, nelt = d->nelt;
9336 if (d->testing_p)
9337 return true;
9339 /* Generic code will try constant permutation twice. Once with the
9340 original mode and again with the elements lowered to QImode.
9341 So wait and don't do the selector expansion ourselves. */
9342 if (vmode != V8QImode && vmode != V16QImode)
9343 return false;
9345 for (i = 0; i < nelt; ++i)
9347 int nunits = GET_MODE_NUNITS (vmode);
9349 /* If big-endian and two vectors we end up with a weird mixed-endian
9350 mode on NEON. Reverse the index within each word but not the word
9351 itself. */
9352 rperm[i] = GEN_INT (BYTES_BIG_ENDIAN ? d->perm[i] ^ (nunits - 1)
9353 : d->perm[i]);
9355 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
9356 sel = force_reg (vmode, sel);
9358 aarch64_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
9359 return true;
9362 static bool
9363 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
9365 /* The pattern matching functions above are written to look for a small
9366 number to begin the sequence (0, 1, N/2). If we begin with an index
9367 from the second operand, we can swap the operands. */
9368 if (d->perm[0] >= d->nelt)
9370 unsigned i, nelt = d->nelt;
9371 rtx x;
9373 for (i = 0; i < nelt; ++i)
9374 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
9376 x = d->op0;
9377 d->op0 = d->op1;
9378 d->op1 = x;
9381 if (TARGET_SIMD)
9383 if (aarch64_evpc_rev (d))
9384 return true;
9385 else if (aarch64_evpc_ext (d))
9386 return true;
9387 else if (aarch64_evpc_zip (d))
9388 return true;
9389 else if (aarch64_evpc_uzp (d))
9390 return true;
9391 else if (aarch64_evpc_trn (d))
9392 return true;
9393 else if (aarch64_evpc_dup (d))
9394 return true;
9395 return aarch64_evpc_tbl (d);
9397 return false;
9400 /* Expand a vec_perm_const pattern. */
9402 bool
9403 aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
9405 struct expand_vec_perm_d d;
9406 int i, nelt, which;
9408 d.target = target;
9409 d.op0 = op0;
9410 d.op1 = op1;
9412 d.vmode = GET_MODE (target);
9413 gcc_assert (VECTOR_MODE_P (d.vmode));
9414 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9415 d.testing_p = false;
9417 for (i = which = 0; i < nelt; ++i)
9419 rtx e = XVECEXP (sel, 0, i);
9420 int ei = INTVAL (e) & (2 * nelt - 1);
9421 which |= (ei < nelt ? 1 : 2);
9422 d.perm[i] = ei;
9425 switch (which)
9427 default:
9428 gcc_unreachable ();
9430 case 3:
9431 d.one_vector_p = false;
9432 if (!rtx_equal_p (op0, op1))
9433 break;
9435 /* The elements of PERM do not suggest that only the first operand
9436 is used, but both operands are identical. Allow easier matching
9437 of the permutation by folding the permutation into the single
9438 input vector. */
9439 /* Fall Through. */
9440 case 2:
9441 for (i = 0; i < nelt; ++i)
9442 d.perm[i] &= nelt - 1;
9443 d.op0 = op1;
9444 d.one_vector_p = true;
9445 break;
9447 case 1:
9448 d.op1 = op0;
9449 d.one_vector_p = true;
9450 break;
9453 return aarch64_expand_vec_perm_const_1 (&d);
9456 static bool
9457 aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
9458 const unsigned char *sel)
9460 struct expand_vec_perm_d d;
9461 unsigned int i, nelt, which;
9462 bool ret;
9464 d.vmode = vmode;
9465 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
9466 d.testing_p = true;
9467 memcpy (d.perm, sel, nelt);
9469 /* Calculate whether all elements are in one vector. */
9470 for (i = which = 0; i < nelt; ++i)
9472 unsigned char e = d.perm[i];
9473 gcc_assert (e < 2 * nelt);
9474 which |= (e < nelt ? 1 : 2);
9477 /* If all elements are from the second vector, reindex as if from the
9478 first vector. */
9479 if (which == 2)
9480 for (i = 0; i < nelt; ++i)
9481 d.perm[i] -= nelt;
9483 /* Check whether the mask can be applied to a single vector. */
9484 d.one_vector_p = (which != 3);
9486 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
9487 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
9488 if (!d.one_vector_p)
9489 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
9491 start_sequence ();
9492 ret = aarch64_expand_vec_perm_const_1 (&d);
9493 end_sequence ();
9495 return ret;
9498 /* Implement target hook CANNOT_CHANGE_MODE_CLASS. */
9499 bool
9500 aarch64_cannot_change_mode_class (enum machine_mode from,
9501 enum machine_mode to,
9502 enum reg_class rclass)
9504 /* Full-reg subregs are allowed on general regs or any class if they are
9505 the same size. */
9506 if (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
9507 || !reg_classes_intersect_p (FP_REGS, rclass))
9508 return false;
9510 /* Limited combinations of subregs are safe on FPREGs. Particularly,
9511 1. Vector Mode to Scalar mode where 1 unit of the vector is accessed.
9512 2. Scalar to Scalar for integer modes or same size float modes.
9513 3. Vector to Vector modes.
9514 4. On little-endian only, Vector-Structure to Vector modes. */
9515 if (GET_MODE_SIZE (from) > GET_MODE_SIZE (to))
9517 if (aarch64_vector_mode_supported_p (from)
9518 && GET_MODE_SIZE (GET_MODE_INNER (from)) == GET_MODE_SIZE (to))
9519 return false;
9521 if (GET_MODE_NUNITS (from) == 1
9522 && GET_MODE_NUNITS (to) == 1
9523 && (GET_MODE_CLASS (from) == MODE_INT
9524 || from == to))
9525 return false;
9527 if (aarch64_vector_mode_supported_p (from)
9528 && aarch64_vector_mode_supported_p (to))
9529 return false;
9531 /* Within an vector structure straddling multiple vector registers
9532 we are in a mixed-endian representation. As such, we can't
9533 easily change modes for BYTES_BIG_ENDIAN. Otherwise, we can
9534 switch between vectors and vector structures cheaply. */
9535 if (!BYTES_BIG_ENDIAN)
9536 if ((aarch64_vector_mode_supported_p (from)
9537 && aarch64_vect_struct_mode_p (to))
9538 || (aarch64_vector_mode_supported_p (to)
9539 && aarch64_vect_struct_mode_p (from)))
9540 return false;
9543 return true;
9546 /* Implement MODES_TIEABLE_P. */
9548 bool
9549 aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
9551 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
9552 return true;
9554 /* We specifically want to allow elements of "structure" modes to
9555 be tieable to the structure. This more general condition allows
9556 other rarer situations too. */
9557 if (TARGET_SIMD
9558 && aarch64_vector_mode_p (mode1)
9559 && aarch64_vector_mode_p (mode2))
9560 return true;
9562 return false;
9565 /* Return a new RTX holding the result of moving POINTER forward by
9566 AMOUNT bytes. */
9568 static rtx
9569 aarch64_move_pointer (rtx pointer, int amount)
9571 rtx next = plus_constant (Pmode, XEXP (pointer, 0), amount);
9573 return adjust_automodify_address (pointer, GET_MODE (pointer),
9574 next, amount);
9577 /* Return a new RTX holding the result of moving POINTER forward by the
9578 size of the mode it points to. */
9580 static rtx
9581 aarch64_progress_pointer (rtx pointer)
9583 HOST_WIDE_INT amount = GET_MODE_SIZE (GET_MODE (pointer));
9585 return aarch64_move_pointer (pointer, amount);
9588 /* Copy one MODE sized block from SRC to DST, then progress SRC and DST by
9589 MODE bytes. */
9591 static void
9592 aarch64_copy_one_block_and_progress_pointers (rtx *src, rtx *dst,
9593 enum machine_mode mode)
9595 rtx reg = gen_reg_rtx (mode);
9597 /* "Cast" the pointers to the correct mode. */
9598 *src = adjust_address (*src, mode, 0);
9599 *dst = adjust_address (*dst, mode, 0);
9600 /* Emit the memcpy. */
9601 emit_move_insn (reg, *src);
9602 emit_move_insn (*dst, reg);
9603 /* Move the pointers forward. */
9604 *src = aarch64_progress_pointer (*src);
9605 *dst = aarch64_progress_pointer (*dst);
9608 /* Expand movmem, as if from a __builtin_memcpy. Return true if
9609 we succeed, otherwise return false. */
9611 bool
9612 aarch64_expand_movmem (rtx *operands)
9614 unsigned int n;
9615 rtx dst = operands[0];
9616 rtx src = operands[1];
9617 rtx base;
9618 bool speed_p = !optimize_function_for_size_p (cfun);
9620 /* When optimizing for size, give a better estimate of the length of a
9621 memcpy call, but use the default otherwise. */
9622 unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
9624 /* We can't do anything smart if the amount to copy is not constant. */
9625 if (!CONST_INT_P (operands[2]))
9626 return false;
9628 n = UINTVAL (operands[2]);
9630 /* Try to keep the number of instructions low. For cases below 16 bytes we
9631 need to make at most two moves. For cases above 16 bytes it will be one
9632 move for each 16 byte chunk, then at most two additional moves. */
9633 if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
9634 return false;
9636 base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
9637 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
9639 base = copy_to_mode_reg (Pmode, XEXP (src, 0));
9640 src = adjust_automodify_address (src, VOIDmode, base, 0);
9642 /* Simple cases. Copy 0-3 bytes, as (if applicable) a 2-byte, then a
9643 1-byte chunk. */
9644 if (n < 4)
9646 if (n >= 2)
9648 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9649 n -= 2;
9652 if (n == 1)
9653 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9655 return true;
9658 /* Copy 4-8 bytes. First a 4-byte chunk, then (if applicable) a second
9659 4-byte chunk, partially overlapping with the previously copied chunk. */
9660 if (n < 8)
9662 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9663 n -= 4;
9664 if (n > 0)
9666 int move = n - 4;
9668 src = aarch64_move_pointer (src, move);
9669 dst = aarch64_move_pointer (dst, move);
9670 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9672 return true;
9675 /* Copy more than 8 bytes. Copy chunks of 16 bytes until we run out of
9676 them, then (if applicable) an 8-byte chunk. */
9677 while (n >= 8)
9679 if (n / 16)
9681 aarch64_copy_one_block_and_progress_pointers (&src, &dst, TImode);
9682 n -= 16;
9684 else
9686 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9687 n -= 8;
9691 /* Finish the final bytes of the copy. We can always do this in one
9692 instruction. We either copy the exact amount we need, or partially
9693 overlap with the previous chunk we copied and copy 8-bytes. */
9694 if (n == 0)
9695 return true;
9696 else if (n == 1)
9697 aarch64_copy_one_block_and_progress_pointers (&src, &dst, QImode);
9698 else if (n == 2)
9699 aarch64_copy_one_block_and_progress_pointers (&src, &dst, HImode);
9700 else if (n == 4)
9701 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9702 else
9704 if (n == 3)
9706 src = aarch64_move_pointer (src, -1);
9707 dst = aarch64_move_pointer (dst, -1);
9708 aarch64_copy_one_block_and_progress_pointers (&src, &dst, SImode);
9710 else
9712 int move = n - 8;
9714 src = aarch64_move_pointer (src, move);
9715 dst = aarch64_move_pointer (dst, move);
9716 aarch64_copy_one_block_and_progress_pointers (&src, &dst, DImode);
9720 return true;
9723 #undef TARGET_ADDRESS_COST
9724 #define TARGET_ADDRESS_COST aarch64_address_cost
9726 /* This hook will determines whether unnamed bitfields affect the alignment
9727 of the containing structure. The hook returns true if the structure
9728 should inherit the alignment requirements of an unnamed bitfield's
9729 type. */
9730 #undef TARGET_ALIGN_ANON_BITFIELD
9731 #define TARGET_ALIGN_ANON_BITFIELD hook_bool_void_true
9733 #undef TARGET_ASM_ALIGNED_DI_OP
9734 #define TARGET_ASM_ALIGNED_DI_OP "\t.xword\t"
9736 #undef TARGET_ASM_ALIGNED_HI_OP
9737 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
9739 #undef TARGET_ASM_ALIGNED_SI_OP
9740 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
9742 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
9743 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
9744 hook_bool_const_tree_hwi_hwi_const_tree_true
9746 #undef TARGET_ASM_FILE_START
9747 #define TARGET_ASM_FILE_START aarch64_start_file
9749 #undef TARGET_ASM_OUTPUT_MI_THUNK
9750 #define TARGET_ASM_OUTPUT_MI_THUNK aarch64_output_mi_thunk
9752 #undef TARGET_ASM_SELECT_RTX_SECTION
9753 #define TARGET_ASM_SELECT_RTX_SECTION aarch64_select_rtx_section
9755 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
9756 #define TARGET_ASM_TRAMPOLINE_TEMPLATE aarch64_asm_trampoline_template
9758 #undef TARGET_BUILD_BUILTIN_VA_LIST
9759 #define TARGET_BUILD_BUILTIN_VA_LIST aarch64_build_builtin_va_list
9761 #undef TARGET_CALLEE_COPIES
9762 #define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_false
9764 #undef TARGET_CAN_ELIMINATE
9765 #define TARGET_CAN_ELIMINATE aarch64_can_eliminate
9767 #undef TARGET_CANNOT_FORCE_CONST_MEM
9768 #define TARGET_CANNOT_FORCE_CONST_MEM aarch64_cannot_force_const_mem
9770 #undef TARGET_CONDITIONAL_REGISTER_USAGE
9771 #define TARGET_CONDITIONAL_REGISTER_USAGE aarch64_conditional_register_usage
9773 /* Only the least significant bit is used for initialization guard
9774 variables. */
9775 #undef TARGET_CXX_GUARD_MASK_BIT
9776 #define TARGET_CXX_GUARD_MASK_BIT hook_bool_void_true
9778 #undef TARGET_C_MODE_FOR_SUFFIX
9779 #define TARGET_C_MODE_FOR_SUFFIX aarch64_c_mode_for_suffix
9781 #ifdef TARGET_BIG_ENDIAN_DEFAULT
9782 #undef TARGET_DEFAULT_TARGET_FLAGS
9783 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_END)
9784 #endif
9786 #undef TARGET_CLASS_MAX_NREGS
9787 #define TARGET_CLASS_MAX_NREGS aarch64_class_max_nregs
9789 #undef TARGET_BUILTIN_DECL
9790 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
9792 #undef TARGET_EXPAND_BUILTIN
9793 #define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
9795 #undef TARGET_EXPAND_BUILTIN_VA_START
9796 #define TARGET_EXPAND_BUILTIN_VA_START aarch64_expand_builtin_va_start
9798 #undef TARGET_FOLD_BUILTIN
9799 #define TARGET_FOLD_BUILTIN aarch64_fold_builtin
9801 #undef TARGET_FUNCTION_ARG
9802 #define TARGET_FUNCTION_ARG aarch64_function_arg
9804 #undef TARGET_FUNCTION_ARG_ADVANCE
9805 #define TARGET_FUNCTION_ARG_ADVANCE aarch64_function_arg_advance
9807 #undef TARGET_FUNCTION_ARG_BOUNDARY
9808 #define TARGET_FUNCTION_ARG_BOUNDARY aarch64_function_arg_boundary
9810 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
9811 #define TARGET_FUNCTION_OK_FOR_SIBCALL aarch64_function_ok_for_sibcall
9813 #undef TARGET_FUNCTION_VALUE
9814 #define TARGET_FUNCTION_VALUE aarch64_function_value
9816 #undef TARGET_FUNCTION_VALUE_REGNO_P
9817 #define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
9819 #undef TARGET_FRAME_POINTER_REQUIRED
9820 #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
9822 #undef TARGET_GIMPLE_FOLD_BUILTIN
9823 #define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
9825 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
9826 #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
9828 #undef TARGET_INIT_BUILTINS
9829 #define TARGET_INIT_BUILTINS aarch64_init_builtins
9831 #undef TARGET_LEGITIMATE_ADDRESS_P
9832 #define TARGET_LEGITIMATE_ADDRESS_P aarch64_legitimate_address_hook_p
9834 #undef TARGET_LEGITIMATE_CONSTANT_P
9835 #define TARGET_LEGITIMATE_CONSTANT_P aarch64_legitimate_constant_p
9837 #undef TARGET_LIBGCC_CMP_RETURN_MODE
9838 #define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
9840 #undef TARGET_LRA_P
9841 #define TARGET_LRA_P aarch64_lra_p
9843 #undef TARGET_MANGLE_TYPE
9844 #define TARGET_MANGLE_TYPE aarch64_mangle_type
9846 #undef TARGET_MEMORY_MOVE_COST
9847 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
9849 #undef TARGET_MUST_PASS_IN_STACK
9850 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
9852 /* This target hook should return true if accesses to volatile bitfields
9853 should use the narrowest mode possible. It should return false if these
9854 accesses should use the bitfield container type. */
9855 #undef TARGET_NARROW_VOLATILE_BITFIELD
9856 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
9858 #undef TARGET_OPTION_OVERRIDE
9859 #define TARGET_OPTION_OVERRIDE aarch64_override_options
9861 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
9862 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
9863 aarch64_override_options_after_change
9865 #undef TARGET_PASS_BY_REFERENCE
9866 #define TARGET_PASS_BY_REFERENCE aarch64_pass_by_reference
9868 #undef TARGET_PREFERRED_RELOAD_CLASS
9869 #define TARGET_PREFERRED_RELOAD_CLASS aarch64_preferred_reload_class
9871 #undef TARGET_SECONDARY_RELOAD
9872 #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
9874 #undef TARGET_SHIFT_TRUNCATION_MASK
9875 #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
9877 #undef TARGET_SETUP_INCOMING_VARARGS
9878 #define TARGET_SETUP_INCOMING_VARARGS aarch64_setup_incoming_varargs
9880 #undef TARGET_STRUCT_VALUE_RTX
9881 #define TARGET_STRUCT_VALUE_RTX aarch64_struct_value_rtx
9883 #undef TARGET_REGISTER_MOVE_COST
9884 #define TARGET_REGISTER_MOVE_COST aarch64_register_move_cost
9886 #undef TARGET_RETURN_IN_MEMORY
9887 #define TARGET_RETURN_IN_MEMORY aarch64_return_in_memory
9889 #undef TARGET_RETURN_IN_MSB
9890 #define TARGET_RETURN_IN_MSB aarch64_return_in_msb
9892 #undef TARGET_RTX_COSTS
9893 #define TARGET_RTX_COSTS aarch64_rtx_costs_wrapper
9895 #undef TARGET_SCHED_ISSUE_RATE
9896 #define TARGET_SCHED_ISSUE_RATE aarch64_sched_issue_rate
9898 #undef TARGET_TRAMPOLINE_INIT
9899 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
9901 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
9902 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P aarch64_use_blocks_for_constant_p
9904 #undef TARGET_VECTOR_MODE_SUPPORTED_P
9905 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
9907 #undef TARGET_ARRAY_MODE_SUPPORTED_P
9908 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
9910 #undef TARGET_VECTORIZE_ADD_STMT_COST
9911 #define TARGET_VECTORIZE_ADD_STMT_COST aarch64_add_stmt_cost
9913 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
9914 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
9915 aarch64_builtin_vectorization_cost
9917 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
9918 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE aarch64_preferred_simd_mode
9920 #undef TARGET_VECTORIZE_BUILTINS
9921 #define TARGET_VECTORIZE_BUILTINS
9923 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
9924 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
9925 aarch64_builtin_vectorized_function
9927 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
9928 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
9929 aarch64_autovectorize_vector_sizes
9931 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
9932 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
9933 aarch64_atomic_assign_expand_fenv
9935 /* Section anchor support. */
9937 #undef TARGET_MIN_ANCHOR_OFFSET
9938 #define TARGET_MIN_ANCHOR_OFFSET -256
9940 /* Limit the maximum anchor offset to 4k-1, since that's the limit for a
9941 byte offset; we can do much more for larger data types, but have no way
9942 to determine the size of the access. We assume accesses are aligned. */
9943 #undef TARGET_MAX_ANCHOR_OFFSET
9944 #define TARGET_MAX_ANCHOR_OFFSET 4095
9946 #undef TARGET_VECTOR_ALIGNMENT
9947 #define TARGET_VECTOR_ALIGNMENT aarch64_simd_vector_alignment
9949 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
9950 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
9951 aarch64_simd_vector_alignment_reachable
9953 /* vec_perm support. */
9955 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
9956 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
9957 aarch64_vectorize_vec_perm_const_ok
9960 #undef TARGET_FIXED_CONDITION_CODE_REGS
9961 #define TARGET_FIXED_CONDITION_CODE_REGS aarch64_fixed_condition_code_regs
9963 struct gcc_target targetm = TARGET_INITIALIZER;
9965 #include "gt-aarch64.h"